linfa_preprocessing::tf_idf_vectorization

Struct TfIdfVectorizer

pub struct TfIdfVectorizer { /* private fields */ }

Expand description

Simlar to CountVectorizer but instead of just counting the term frequency of each vocabulary entry in each given document, it computes the term frequecy times the inverse document frequency, thus giving more importance to entries that appear many times but only on some documents. The weight function can be adjusted by setting the appropriate method. This struct provides the same string
processing customizations described in CountVectorizer.

TfIdfVectorizer

Struct TfIdfVectorizer Copy item path

Implementations§

impl TfIdfVectorizer

pub fn tokenizer(self, tokenizer: Tokenizer) -> Self

pub fn max_features(self, max_features: Option<usize>) -> Self

pub fn convert_to_lowercase(self, convert_to_lowercase: bool) -> Self

pub fn n_gram_range(self, min_n: usize, max_n: usize) -> Self

pub fn normalize(self, normalize: bool) -> Self

pub fn document_frequency(self, min_freq: f32, max_freq: f32) -> Self

pub fn stopwords<T: ToString>(self, stopwords: &[T]) -> Self

pub fn fit<T: ToString + Clone, D: Data<Elem = T>>( &self, x: &ArrayBase<D, Ix1>, ) -> Result<FittedTfIdfVectorizer>

pub fn fit_vocabulary<T: ToString>( &self, words: &[T], ) -> Result<FittedTfIdfVectorizer>

pub fn fit_files<P: AsRef<Path>>( &self, input: &[P], encoding: EncodingRef, trap: DecoderTrap, ) -> Result<FittedTfIdfVectorizer>

Trait Implementations§

impl Clone for TfIdfVectorizer

fn clone(&self) -> TfIdfVectorizer

fn clone_from(&mut self, source: &Self)

impl Debug for TfIdfVectorizer

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl Default for TfIdfVectorizer

fn default() -> Self

Auto Trait Implementations§

impl !Freeze for TfIdfVectorizer

impl !RefUnwindSafe for TfIdfVectorizer

impl Send for TfIdfVectorizer

impl !Sync for TfIdfVectorizer

impl Unpin for TfIdfVectorizer

impl UnsafeUnpin for TfIdfVectorizer

impl UnwindSafe for TfIdfVectorizer

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoEither for T

fn into_either(self, into_left: bool) -> Either<Self, Self>

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>where F: FnOnce(&Self) -> bool,

impl<T> Pointable for T

const ALIGN: usize

type Init = T

unsafe fn init(init: <T as Pointable>::Init) -> usize

unsafe fn deref<'a>(ptr: usize) -> &'a T

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

unsafe fn drop(ptr: usize)

impl<B> Stagewise for B

fn chain_shrunk<C, F>( self, corrector: Shrunk<C, F>, ) -> ResidualChainParams<B, C, F>where F: Float,

fn chain<C, F, E>(self, corrector: C) -> ResidualChainParams<B, C, F>where F: Float, C: Fit<ArrayBase<OwnedRepr<F>, Dim<[usize; 2]>>, ArrayBase<OwnedRepr<F>, Dim<[usize; 1]>>, E>, E: Error + From<Error>,

fn shrink_by<F, E>(self, shrinkage: F) -> Shrunk<B, F>where F: Float, B: Fit<ArrayBase<OwnedRepr<F>, Dim<[usize; 2]>>, ArrayBase<OwnedRepr<F>, Dim<[usize; 1]>>, E>, E: Error + From<Error>,

impl<SS, SP> SupersetOf<SS> for SPwhere SS: SubsetOf<SP>,

fn to_subset(&self) -> Option<SS>

fn is_in_subset(&self) -> bool

unsafe fn to_subset_unchecked(&self) -> SS

fn from_subset(element: &SS) -> SP

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<ST, DT> CastableFrom<ST, Initialized, Initialized> for DTwhere ST: ?Sized, DT: ?Sized,

impl<ST, DT> CastableFrom<ST, Uninit, Uninit> for DTwhere ST: ?Sized, DT: ?Sized,

impl<T> Read<Exclusive, BecauseExclusive> for Twhere T: ?Sized,

Struct TfIdfVectorizer

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

fn chain_shrunk<C, F>( self, corrector: Shrunk<C, F>, ) -> ResidualChainParams<B, C, F>
where F: Float,

fn chain<C, F, E>(self, corrector: C) -> ResidualChainParams<B, C, F>
where F: Float, C: Fit<ArrayBase<OwnedRepr<F>, Dim<[usize; 2]>>, ArrayBase<OwnedRepr<F>, Dim<[usize; 1]>>, E>, E: Error + From<Error>,

fn shrink_by<F, E>(self, shrinkage: F) -> Shrunk<B, F>
where F: Float, B: Fit<ArrayBase<OwnedRepr<F>, Dim<[usize; 2]>>, ArrayBase<OwnedRepr<F>, Dim<[usize; 1]>>, E>, E: Error + From<Error>,

impl<SS, SP> SupersetOf<SS> for SP
where SS: SubsetOf<SP>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

impl<ST, DT> CastableFrom<ST, Initialized, Initialized> for DT
where ST: ?Sized, DT: ?Sized,

impl<ST, DT> CastableFrom<ST, Uninit, Uninit> for DT
where ST: ?Sized, DT: ?Sized,

impl<T> Read<Exclusive, BecauseExclusive> for T
where T: ?Sized,