linfa_reduction/random_projection/
mod.rs

1//! # Random Projections
2//!
3//! These algorithms build a low-distortion embedding of the input data
4//! in a low-dimensional Euclidean space by projecting the data onto a random subspace.
5//! The embedding is a randomly chosen matrix (either Gaussian or sparse),
6//! following the [Johnson-Lindenstrauss Lemma](https://en.wikipedia.org/wiki/Johnson%E2%80%93Lindenstrauss_lemma).
7//!
8//! This result states that, if the dimension of the embedding is `Ω(log(n_samples)/eps^2)`,
9//! then with high probability, the projection `p` has distortion less than `eps`,
10//! where `eps` is parameter, with `0 < eps < 1`.
11//! "Distortion less than `eps`" means that for all vectors `u, v` in the original dataset,
12//! we have `(1 - eps) d(u, v) <= d(p(u), p(v)) <= (1 + eps) d(u, v)`,
13//! where `d` denotes the distance between two vectors.
14//!
15//! Note that the dimension of the embedding does not
16//! depend on the original dimension of the data set (the number of features).
17//!
18//! ## Comparison with other methods
19//!
20//! To obtain a given accuracy on a given task, random projections will
21//! often require a larger embedding dimension than other reduction methods such as PCA.
22//! However, random projections have a very low computational cost,
23//! since they only consist in sampling a random matrix,
24//! whereas the PCA requires computing the pseudoinverse of a large matrix,
25//! which is computationally expensive.
26mod algorithms;
27mod common;
28mod hyperparams;
29mod methods;
30
31pub use algorithms::RandomProjection;
32pub use hyperparams::{RandomProjectionParams, RandomProjectionValidParams};
33
34use self::methods::{Gaussian, Sparse};
35
36pub type GaussianRandomProjection<F> = RandomProjection<Gaussian, F>;
37pub type GaussianRandomProjectionParams<R> = RandomProjectionParams<Gaussian, R>;
38pub type GaussianRandomProjectionValidParams<R> = RandomProjectionValidParams<Gaussian, R>;
39pub type SparseRandomProjection<F> = RandomProjection<Sparse, F>;
40pub type SparseRandomProjectionParams<R> = RandomProjectionParams<Sparse, R>;
41pub type SparseRandomProjectionValidParams<R> = RandomProjectionValidParams<Sparse, R>;
42
43#[cfg(test)]
44mod tests {
45    use super::*;
46
47    use rand_xoshiro::Xoshiro256Plus;
48
49    #[test]
50    fn autotraits_gaussian() {
51        fn has_autotraits<T: Send + Sync + Sized + Unpin>() {}
52        has_autotraits::<GaussianRandomProjection<f64>>();
53        has_autotraits::<GaussianRandomProjection<f32>>();
54        has_autotraits::<GaussianRandomProjectionValidParams<Xoshiro256Plus>>();
55        has_autotraits::<GaussianRandomProjectionParams<Xoshiro256Plus>>();
56    }
57
58    #[test]
59    fn autotraits_sparse() {
60        fn has_autotraits<T: Send + Sync + Sized + Unpin>() {}
61        has_autotraits::<SparseRandomProjection<f64>>();
62        has_autotraits::<SparseRandomProjection<f32>>();
63        has_autotraits::<SparseRandomProjectionValidParams<Xoshiro256Plus>>();
64        has_autotraits::<SparseRandomProjectionParams<Xoshiro256Plus>>();
65    }
66
67    use linfa::{traits::Fit, Dataset};
68
69    #[test]
70    fn gaussian_dim_increase_error() {
71        let records = array![[10., 10.], [1., 12.], [20., 30.], [-20., 30.],];
72        let dataset = Dataset::from(records);
73        let res = GaussianRandomProjection::<f32>::params()
74            .target_dim(10)
75            .fit(&dataset);
76        assert!(res.is_err())
77    }
78    #[test]
79    fn sparse_dim_increase_error() {
80        let records = array![[10., 10.], [1., 12.], [20., 30.], [-20., 30.],];
81        let dataset = Dataset::from(records);
82        let res = SparseRandomProjection::<f32>::params()
83            .target_dim(10)
84            .fit(&dataset);
85        assert!(res.is_err())
86    }
87}