linfa_reduction/random_projection/
methods.rs1use linfa::Float;
2use ndarray::{Array, Array2};
3use ndarray_rand::{
4 rand_distr::{Normal, StandardNormal},
5 RandomExt,
6};
7use rand::{
8 distributions::{Bernoulli, Distribution, Standard},
9 Rng,
10};
11use sprs::{CsMat, TriMat};
12
13use crate::ReductionError;
14
15pub trait ProjectionMethod {
16 type RandomDistribution;
17 type ProjectionMatrix<F: Float>
18 where
19 Self::RandomDistribution: Distribution<F>;
20
21 fn generate_matrix<F: Float>(
22 n_features: usize,
23 n_dims: usize,
24 rng: &mut impl Rng,
25 ) -> Result<Self::ProjectionMatrix<F>, ReductionError>
26 where
27 Self::RandomDistribution: Distribution<F>;
28}
29
30pub struct Gaussian;
31
32impl ProjectionMethod for Gaussian {
33 type RandomDistribution = StandardNormal;
34 type ProjectionMatrix<F: Float>
35 = Array2<F>
36 where
37 StandardNormal: Distribution<F>;
38
39 fn generate_matrix<F: Float>(
40 n_features: usize,
41 n_dims: usize,
42 rng: &mut impl Rng,
43 ) -> Result<Self::ProjectionMatrix<F>, ReductionError>
44 where
45 StandardNormal: Distribution<F>,
46 {
47 let std_dev = F::cast(n_features).sqrt().recip();
48 let gaussian = Normal::new(F::zero(), std_dev)?;
49
50 Ok(Array::random_using((n_features, n_dims), gaussian, rng))
51 }
52}
53
54pub struct Sparse;
55
56impl ProjectionMethod for Sparse {
57 type RandomDistribution = Standard;
58 type ProjectionMatrix<F: Float>
59 = CsMat<F>
60 where
61 Standard: Distribution<F>;
62
63 fn generate_matrix<F: Float>(
64 n_features: usize,
65 n_dims: usize,
66 rng: &mut impl Rng,
67 ) -> Result<Self::ProjectionMatrix<F>, ReductionError>
68 where
69 Standard: Distribution<F>,
70 {
71 let scale = (n_features as f64).sqrt();
72 let p = 1f64 / scale;
73 let dist = SparseDistribution::new(F::cast(scale), p);
74
75 let (mut row_inds, mut col_inds, mut values) = (Vec::new(), Vec::new(), Vec::new());
76 for row in 0..n_features {
77 for col in 0..n_dims {
78 if let Some(x) = dist.sample(rng) {
79 row_inds.push(row);
80 col_inds.push(col);
81 values.push(x);
82 }
83 }
84 }
85
86 let proj = TriMat::from_triplets((n_features, n_dims), row_inds, col_inds, values).to_csc();
89
90 Ok(proj)
91 }
92}
93
94struct SparseDistribution<F: Float> {
97 scale: F,
98 b: Bernoulli,
99}
100
101impl<F: Float> SparseDistribution<F> {
102 pub fn new(scale: F, p: f64) -> Self {
103 SparseDistribution {
104 scale,
105 b: Bernoulli::new(p).expect("Parmeter `p` must be between 0 and 1."),
106 }
107 }
108}
109
110impl<F: Float> Distribution<Option<F>> for SparseDistribution<F> {
111 fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Option<F> {
112 let non_zero = self.b.sample(rng);
113 if non_zero {
114 if rng.gen::<bool>() {
115 Some(self.scale)
116 } else {
117 Some(-self.scale)
118 }
119 } else {
120 None
121 }
122 }
123}