linfa_linear/
isotonic.rs

1//! Isotonic
2#![allow(non_snake_case)]
3use crate::error::{LinearError, Result};
4use ndarray::{s, stack, Array1, ArrayBase, Axis, Data, Ix1, Ix2};
5#[cfg(feature = "serde")]
6use serde_crate::{Deserialize, Serialize};
7
8use std::cmp::Ordering;
9
10use linfa::dataset::{AsSingleTargets, DatasetBase};
11use linfa::traits::{Fit, PredictInplace};
12
13pub trait Float: linfa::Float {}
14impl Float for f32 {}
15impl Float for f64 {}
16
17/// An implementation of PVA algorithm from Best (1990)
18/// for solving IRC problem
19fn pva<F, D>(
20    ys: &ArrayBase<D, Ix1>,
21    weights: Option<&[f32]>,
22    index: &[usize],
23) -> (Vec<F>, Vec<usize>)
24where
25    F: Float,
26    D: Data<Elem = F>,
27{
28    let n = ys.len();
29    let mut V = Vec::<F>::new();
30    let mut W = Vec::<F>::new();
31    let mut J_index: Vec<usize> = (0..n).collect();
32    let mut i = 0;
33    let (mut AvB_zero, mut W_B_zero) = waverage(ys, weights, i, i, index);
34    while i < n {
35        // Step 1
36        let j = J_index[i];
37        let k = j + 1;
38        if k == n {
39            break;
40        }
41        let l = J_index[k];
42        let (AvB_plus, W_B_plus) = waverage(ys, weights, k, l, index);
43        if AvB_zero <= AvB_plus {
44            V.push(AvB_zero);
45            W.push(W_B_zero);
46            AvB_zero = AvB_plus;
47            W_B_zero = W_B_plus;
48            i = k;
49        } else {
50            // Step 2
51            J_index[i] = l;
52            J_index[l] = i;
53            AvB_zero = AvB_zero * W_B_zero + AvB_plus * W_B_plus;
54            W_B_zero += W_B_plus;
55            AvB_zero /= W_B_zero;
56
57            // Step 2.1
58            let mut AvB_minus = *V.last().unwrap_or(&F::neg_infinity());
59            while !V.is_empty() && AvB_zero < AvB_minus {
60                AvB_minus = V.pop().unwrap();
61                let W_B_minus = W.pop().unwrap();
62                i = J_index[J_index[l] - 1];
63                J_index[l] = i;
64                J_index[i] = l;
65                AvB_zero = AvB_zero * W_B_zero + AvB_minus * W_B_minus;
66                W_B_zero += W_B_minus;
67                AvB_zero /= W_B_zero;
68            }
69        }
70    }
71
72    // Last block average
73    let (AvB_minus, _) = waverage(ys, weights, i, J_index[i], index);
74    V.push(AvB_minus);
75
76    (V, J_index)
77}
78
79#[derive(Debug, Clone, PartialEq, Eq, Default)]
80#[cfg_attr(
81    feature = "serde",
82    derive(Serialize, Deserialize),
83    serde(crate = "serde_crate")
84)]
85/// An isotonic regression model.
86///
87/// IsotonicRegression solves an isotonic regression problem using the pool
88/// adjacent violators algorithm.
89///
90/// ## Examples
91///
92/// Here's an example on how to train an isotonic regression model on
93/// the first feature from the `diabetes` dataset.
94/// ```rust
95/// use linfa::{traits::Fit, traits::Predict, Dataset};
96/// use linfa_linear::IsotonicRegression;
97/// use linfa::prelude::SingleTargetRegression;
98///
99/// let diabetes = linfa_datasets::diabetes();
100/// let dataset = diabetes.feature_iter().next().unwrap(); // get first 1D feature
101/// let model = IsotonicRegression::default().fit(&dataset).unwrap();
102/// let pred = model.predict(&dataset);
103/// let r2 = pred.r2(&dataset).unwrap();
104/// println!("r2 from prediction: {}", r2);
105/// ```
106///
107/// ## References
108///
109/// Best, M.J., Chakravarti, N. Active set algorithms for isotonic regression;
110/// A unifying framework. Mathematical Programming 47, 425–439 (1990).
111pub struct IsotonicRegression {}
112
113#[derive(Debug, Clone, PartialEq)]
114#[cfg_attr(
115    feature = "serde",
116    derive(Serialize, Deserialize),
117    serde(crate = "serde_crate")
118)]
119/// A fitted isotonic regression model which can be used for making predictions.
120pub struct FittedIsotonicRegression<F> {
121    regressor: Array1<F>,
122    response: Array1<F>,
123}
124
125impl IsotonicRegression {
126    /// Create a default isotonic regression model.
127    pub fn new() -> IsotonicRegression {
128        IsotonicRegression {}
129    }
130}
131
132impl<F: Float, D: Data<Elem = F>, T: AsSingleTargets<Elem = F>>
133    Fit<ArrayBase<D, Ix2>, T, LinearError<F>> for IsotonicRegression
134{
135    type Object = FittedIsotonicRegression<F>;
136
137    /// Fit an isotonic regression model given a feature matrix `X` and a target
138    /// variable `y`.
139    ///
140    /// The feature matrix `X` must have shape `(n_samples, 1)`
141    ///
142    /// The target variable `y` must have shape `(n_samples)`
143    ///
144    /// Returns a `FittedIsotonicRegression` object which contains the fitted
145    /// parameters and can be used to `predict` values of the target variable
146    /// for new feature values.
147    fn fit(&self, dataset: &DatasetBase<ArrayBase<D, Ix2>, T>) -> Result<Self::Object, F> {
148        let X = dataset.records();
149        let (n, dim) = X.dim();
150        let y = dataset.as_single_targets();
151
152        // Check the input dimension
153        assert_eq!(dim, 1, "The input dimension must be 1.");
154
155        // Check that our inputs have compatible shapes
156        assert_eq!(y.dim(), n);
157
158        // use correlation for determining relationship between x & y
159        let x = X.column(0);
160        let rho = DatasetBase::from(stack![Axis(1), x, y]).pearson_correlation();
161        let increasing = rho.get_coeffs()[0] >= F::zero();
162
163        // sort data
164        let mut indices = argsort_by(&x, |a, b| a.partial_cmp(b).unwrap_or(Ordering::Greater));
165        if !increasing {
166            indices.reverse();
167        };
168
169        // Construct response value using particular algorithm
170        let (V, J_index) = pva(&y, dataset.weights(), &indices);
171        let response = Array1::from_vec(V.clone());
172
173        // Construct regressor array
174        let mut W = Vec::<F>::new();
175        let mut i = 0;
176        while i < n {
177            let j = J_index[i];
178            let x = X
179                .slice(s![i..=j, -1])
180                .into_iter()
181                .max_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Greater))
182                .unwrap();
183            W.push(*x);
184            i = j + 1
185        }
186        let regressor = Array1::from_vec(W.clone());
187
188        Ok(FittedIsotonicRegression {
189            regressor,
190            response,
191        })
192    }
193}
194
195fn waverage<F, D>(
196    vs: &ArrayBase<D, Ix1>,
197    ws: Option<&[f32]>,
198    start: usize,
199    end: usize,
200    index: &[usize],
201) -> (F, F)
202where
203    F: Float,
204    D: Data<Elem = F>,
205{
206    let mut wsum = F::zero();
207    let mut avg = F::zero();
208    for kk in &index[start..=end] {
209        let w = if let Some(ws) = ws {
210            F::cast(ws[*kk])
211        } else {
212            F::one()
213        };
214        wsum += w;
215        avg += vs[*kk] * w;
216    }
217    avg /= wsum;
218    (avg, wsum)
219}
220
221fn argsort_by<S, F>(arr: &ArrayBase<S, Ix1>, mut compare: F) -> Vec<usize>
222where
223    S: Data,
224    F: FnMut(&S::Elem, &S::Elem) -> Ordering,
225{
226    let mut indices: Vec<usize> = (0..arr.len()).collect();
227    indices.sort_unstable_by(move |&i, &j| compare(&arr[i], &arr[j]));
228    indices
229}
230
231impl<F: Float, D: Data<Elem = F>> PredictInplace<ArrayBase<D, Ix2>, Array1<F>>
232    for FittedIsotonicRegression<F>
233{
234    /// Given an input matrix `X`, with shape `(n_samples, 1)`,
235    /// `predict` returns the target variable according to linear model
236    /// learned from the training data distribution.
237    fn predict_inplace(&self, x: &ArrayBase<D, Ix2>, y: &mut Array1<F>) {
238        let (n_samples, dim) = x.dim();
239
240        // Check the input dimension
241        assert_eq!(dim, 1, "The input dimension must be 1.");
242
243        // Check that our inputs have compatible shapes
244        assert_eq!(
245            n_samples,
246            y.len(),
247            "The number of data points must match the number of output targets."
248        );
249
250        let regressor = &self.regressor;
251        let n = regressor.len();
252        let x_min = regressor[0];
253        let x_max = regressor[n - 1];
254
255        let response = &self.response;
256        let y_min = response[0];
257        let y_max = response[n - 1];
258
259        // calculate a piecewise linear approximation of response
260        for (i, row) in x.rows().into_iter().enumerate() {
261            let val = row[0];
262            if val >= x_max {
263                y[i] = y_max;
264            } else if val <= x_min {
265                y[i] = y_min;
266            } else {
267                let res = regressor.into_iter().position(|x| x >= &val);
268                if res.is_some() {
269                    let j = res.unwrap();
270                    if val <= regressor[j] && j < n {
271                        let x_scale = (val - regressor[j - 1]) / (regressor[j] - regressor[j - 1]);
272                        y[i] = response[j - 1] + x_scale * (response[j] - response[j - 1]);
273                    } else {
274                        y[i] = y_min;
275                    }
276                }
277            }
278        }
279    }
280
281    fn default_target(&self, x: &ArrayBase<D, Ix2>) -> Array1<F> {
282        Array1::zeros(x.nrows())
283    }
284}
285
286#[cfg(test)]
287mod tests {
288    use super::*;
289    use approx::assert_abs_diff_eq;
290    use linfa::{traits::Predict, Dataset};
291    use ndarray::array;
292
293    #[test]
294    fn autotraits() {
295        fn has_autotraits<T: Send + Sync + Sized + Unpin>() {}
296        has_autotraits::<FittedIsotonicRegression<f64>>();
297        has_autotraits::<IsotonicRegression>();
298        has_autotraits::<LinearError<f64>>();
299    }
300
301    #[test]
302    #[should_panic]
303    fn dimension_mismatch() {
304        let reg = IsotonicRegression::new();
305        let dataset = Dataset::new(array![[3.3f64, 0.], [3.3, 0.]], array![4., 5.]);
306        let _res = reg.fit(&dataset);
307    }
308
309    #[test]
310    #[should_panic]
311    fn length_mismatch() {
312        let reg = IsotonicRegression::default();
313        let dataset = Dataset::new(array![[3.3f64, 0.], [3.3, 0.]], array![4., 5., 6.]);
314        let _res = reg.fit(&dataset);
315    }
316
317    #[test]
318    fn best_example1() {
319        let reg = IsotonicRegression::new();
320        let (X, y, regr, resp, yy, V, w) = (
321            array![[3.3f64], [3.3], [3.3], [6.], [7.5], [7.5]], // X
322            array![4., 5., 1., 6., 8., 7.0],                    // y
323            array![3.3, 6., 7.5],                               // regressor
324            array![10.0 / 3.0, 6., 7.5],                        // response
325            array![10. / 3., 10. / 3., 10. / 3., 6., 7.5, 7.5], // predict X
326            array![[2.0f64], [5.], [7.], [9.]],                 // newX
327            array![10. / 3., 5.01234567901234, 7., 7.5],        // predict newX
328        );
329
330        let dataset = Dataset::new(X, y);
331
332        let model = reg.fit(&dataset).unwrap();
333        assert_abs_diff_eq!(model.regressor, &regr, epsilon = 1e-12);
334        assert_abs_diff_eq!(model.response, &resp, epsilon = 1e-12);
335
336        let result = model.predict(dataset.records());
337        assert_abs_diff_eq!(result, &yy, epsilon = 1e-12);
338
339        let result = model.predict(&V);
340        assert_abs_diff_eq!(result, &w, epsilon = 1e-12);
341    }
342
343    #[test]
344    fn best_example1_decr() {
345        let reg = IsotonicRegression::default();
346        let (X, y, regr, resp, yy, V, w) = (
347            array![[7.5], [7.5], [6.], [3.3], [3.3], [3.3]], // X
348            array![4., 5., 1., 6., 8., 7.0],                 // y
349            array![7.5, 3.3, 3.3],
350            array![10.0 / 3.0, 7., 7.],
351            array![7., 7., 7., 7., 7., 7.],     // predict X
352            array![[2.], [3.], [3.3], [7.]],    // newX
353            array![10. / 3., 10. / 3., 7., 7.], // predict newX
354        );
355
356        let dataset = Dataset::new(X, y);
357
358        let model = reg.fit(&dataset).unwrap();
359        assert_abs_diff_eq!(model.regressor, &regr, epsilon = 1e-12);
360        assert_abs_diff_eq!(model.response, &resp, epsilon = 1e-12);
361
362        let result = model.predict(dataset.records());
363        assert_abs_diff_eq!(result, &yy, epsilon = 1e-12);
364
365        let result = model.predict(&V);
366        assert_abs_diff_eq!(result, &w, epsilon = 1e-12);
367    }
368
369    #[test]
370    fn example2_incr() {
371        let reg = IsotonicRegression::default();
372        let (X, y, regr, resp, yy) = (
373            array![[1.0f64], [2.], [3.], [4.], [5.], [6.], [7.], [8.], [9.]],
374            array![1., 2., 6., 2., 1., 2., 8., 2., 1.0],
375            array![1., 2., 6., 9.],
376            array![1., 2., 2.75, 11. / 3.],
377            array![
378                1.,
379                2.,
380                2.1875,
381                2.375,
382                2.5625,
383                2.75,
384                55. / 18.,
385                121. / 36.,
386                11. / 3.
387            ],
388        );
389
390        let dataset = Dataset::new(X, y);
391
392        let model = reg.fit(&dataset).unwrap();
393        assert_abs_diff_eq!(model.regressor, &regr, epsilon = 1e-12);
394        assert_abs_diff_eq!(model.response, &resp, epsilon = 1e-12);
395
396        let result = model.predict(dataset.records());
397        assert_abs_diff_eq!(result, &yy, epsilon = 1e-12);
398    }
399
400    #[test]
401    fn example2_decr() {
402        let reg = IsotonicRegression::default();
403        let v1 = 11. / 3.;
404        let (X, y, regr, resp, yy) = (
405            array![[9.0], [8.], [7.], [6.], [5.], [4.], [3.], [2.], [1.]],
406            array![1., 2., 6., 2., 1., 2., 8., 2., 1.0],
407            array![9., 8., 7., 3.],
408            array![1., 2., 2.75, v1],
409            array![v1, v1, v1, v1, v1, v1, v1, 1., 1.],
410        );
411
412        let dataset = Dataset::new(X, y);
413
414        let model = reg.fit(&dataset).unwrap();
415        assert_abs_diff_eq!(model.regressor, &regr, epsilon = 1e-12);
416        assert_abs_diff_eq!(model.response, &resp, epsilon = 1e-12);
417
418        let result = model.predict(dataset.records());
419        assert_abs_diff_eq!(result, &yy, epsilon = 1e-12);
420    }
421}