linfa_reduction/random_projection/
methods.rs

1use linfa::Float;
2use ndarray::{Array, Array2};
3use ndarray_rand::{
4    rand_distr::{Normal, StandardNormal},
5    RandomExt,
6};
7use rand::{
8    distributions::{Bernoulli, Distribution, Standard},
9    Rng,
10};
11use sprs::{CsMat, TriMat};
12
13use crate::ReductionError;
14
15pub trait ProjectionMethod {
16    type RandomDistribution;
17    type ProjectionMatrix<F: Float>
18    where
19        Self::RandomDistribution: Distribution<F>;
20
21    fn generate_matrix<F: Float>(
22        n_features: usize,
23        n_dims: usize,
24        rng: &mut impl Rng,
25    ) -> Result<Self::ProjectionMatrix<F>, ReductionError>
26    where
27        Self::RandomDistribution: Distribution<F>;
28}
29
30pub struct Gaussian;
31
32impl ProjectionMethod for Gaussian {
33    type RandomDistribution = StandardNormal;
34    type ProjectionMatrix<F: Float>
35        = Array2<F>
36    where
37        StandardNormal: Distribution<F>;
38
39    fn generate_matrix<F: Float>(
40        n_features: usize,
41        n_dims: usize,
42        rng: &mut impl Rng,
43    ) -> Result<Self::ProjectionMatrix<F>, ReductionError>
44    where
45        StandardNormal: Distribution<F>,
46    {
47        let std_dev = F::cast(n_features).sqrt().recip();
48        let gaussian = Normal::new(F::zero(), std_dev)?;
49
50        Ok(Array::random_using((n_features, n_dims), gaussian, rng))
51    }
52}
53
54pub struct Sparse;
55
56impl ProjectionMethod for Sparse {
57    type RandomDistribution = Standard;
58    type ProjectionMatrix<F: Float>
59        = CsMat<F>
60    where
61        Standard: Distribution<F>;
62
63    fn generate_matrix<F: Float>(
64        n_features: usize,
65        n_dims: usize,
66        rng: &mut impl Rng,
67    ) -> Result<Self::ProjectionMatrix<F>, ReductionError>
68    where
69        Standard: Distribution<F>,
70    {
71        let scale = (n_features as f64).sqrt();
72        let p = 1f64 / scale;
73        let dist = SparseDistribution::new(F::cast(scale), p);
74
75        let (mut row_inds, mut col_inds, mut values) = (Vec::new(), Vec::new(), Vec::new());
76        for row in 0..n_features {
77            for col in 0..n_dims {
78                if let Some(x) = dist.sample(rng) {
79                    row_inds.push(row);
80                    col_inds.push(col);
81                    values.push(x);
82                }
83            }
84        }
85
86        // `proj` will be used as the RHS of a matrix multiplication in [`SparseRandomProjection::transform`],
87        // therefore we convert it to `csc` storage.
88        let proj = TriMat::from_triplets((n_features, n_dims), row_inds, col_inds, values).to_csc();
89
90        Ok(proj)
91    }
92}
93
94/// Random variable that has value `Some(+/- scale)` with probability `p/2` each,
95/// and [`None`] with probability `1-p`.
96struct SparseDistribution<F: Float> {
97    scale: F,
98    b: Bernoulli,
99}
100
101impl<F: Float> SparseDistribution<F> {
102    pub fn new(scale: F, p: f64) -> Self {
103        SparseDistribution {
104            scale,
105            b: Bernoulli::new(p).expect("Parmeter `p` must be between 0 and 1."),
106        }
107    }
108}
109
110impl<F: Float> Distribution<Option<F>> for SparseDistribution<F> {
111    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Option<F> {
112        let non_zero = self.b.sample(rng);
113        if non_zero {
114            if rng.gen::<bool>() {
115                Some(self.scale)
116            } else {
117                Some(-self.scale)
118            }
119        } else {
120            None
121        }
122    }
123}