criterion/stats/univariate/kde/
mod.rs

1//! Kernel density estimation
2
3pub mod kernel;
4
5use rayon::prelude::*;
6use stats::float::Float;
7
8use stats::univariate::Sample;
9
10use self::kernel::Kernel;
11
12/// Univariate kernel density estimator
13pub struct Kde<'a, A, K>
14where
15    A: 'a + Float,
16    K: Kernel<A>,
17{
18    bandwidth: A,
19    kernel: K,
20    sample: &'a Sample<A>,
21}
22
23impl<'a, A, K> Kde<'a, A, K>
24where
25    A: 'a + Float,
26    K: Kernel<A>,
27{
28    /// Creates a new kernel density estimator from the `sample`, using a kernel and estimating
29    /// the bandwidth using the method `bw`
30    pub fn new(sample: &'a Sample<A>, kernel: K, bw: Bandwidth) -> Kde<'a, A, K> {
31        Kde {
32            bandwidth: bw.estimate(sample),
33            kernel,
34            sample,
35        }
36    }
37
38    /// Returns the bandwidth used by the estimator
39    pub fn bandwidth(&self) -> A {
40        self.bandwidth
41    }
42
43    /// Maps the KDE over `xs`
44    ///
45    /// - Multihreaded
46    pub fn map(&self, xs: &[A]) -> Box<[A]> {
47        xs.par_iter()
48            .map(|&x| self.estimate(x))
49            .collect::<Vec<_>>()
50            .into_boxed_slice()
51    }
52
53    /// Estimates the probability density of `x`
54    pub fn estimate(&self, x: A) -> A {
55        let _0 = A::cast(0);
56        let slice = self.sample;
57        let h = self.bandwidth;
58        let n = A::cast(slice.len());
59
60        let sum = slice
61            .iter()
62            .fold(_0, |acc, &x_i| acc + self.kernel.evaluate((x - x_i) / h));
63
64        sum / h / n
65    }
66}
67
68/// Method to estimate the bandwidth
69pub enum Bandwidth {
70    /// Use Silverman's rule of thumb to estimate the bandwidth from the sample
71    Silverman,
72}
73
74impl Bandwidth {
75    fn estimate<A: Float>(self, sample: &Sample<A>) -> A {
76        match self {
77            Bandwidth::Silverman => {
78                let factor = A::cast(4. / 3.);
79                let exponent = A::cast(1. / 5.);
80                let n = A::cast(sample.len());
81                let sigma = sample.std_dev(None);
82
83                sigma * (factor / n).powf(exponent)
84            }
85        }
86    }
87}
88
89#[cfg(test)]
90macro_rules! test {
91    ($ty:ident) => {
92        mod $ty {
93            use quickcheck::TestResult;
94
95            use stats::univariate::kde::kernel::Gaussian;
96            use stats::univariate::kde::{Bandwidth, Kde};
97            use stats::univariate::Sample;
98
99            // The [-inf inf] integral of the estimated PDF should be one
100            quickcheck! {
101                fn integral(size: usize, start: usize) -> TestResult {
102                    const DX: $ty = 1e-3;
103
104                    if let Some(v) = ::stats::test::vec::<$ty>(size, start) {
105                        let slice = &v[start..];
106                        let data = Sample::new(slice);
107                        let kde = Kde::new(data, Gaussian, Bandwidth::Silverman);
108                        let h = kde.bandwidth();
109                        // NB Obviously a [-inf inf] integral is not feasible, but this range works
110                        // quite well
111                        let (a, b) = (data.min() - 5. * h, data.max() + 5. * h);
112
113                        let mut acc = 0.;
114                        let mut x = a;
115                        let mut y = kde.estimate(a);
116
117                        while x < b {
118                            acc += DX * y / 2.;
119
120                            x += DX;
121                            y = kde.estimate(x);
122
123                            acc += DX * y / 2.;
124                        }
125
126                        TestResult::from_bool(relative_eq!(acc, 1., epsilon = 2e-5))
127                    } else {
128                        TestResult::discard()
129                    }
130                }
131            }
132        }
133    };
134}
135
136#[cfg(test)]
137mod test {
138    test!(f32);
139    test!(f64);
140}