criterion/stats/univariate/kde/mod.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
//! Kernel density estimation
pub mod kernel;
use rayon::prelude::*;
use stats::float::Float;
use stats::univariate::Sample;
use self::kernel::Kernel;
/// Univariate kernel density estimator
pub struct Kde<'a, A, K>
where
A: 'a + Float,
K: Kernel<A>,
{
bandwidth: A,
kernel: K,
sample: &'a Sample<A>,
}
impl<'a, A, K> Kde<'a, A, K>
where
A: 'a + Float,
K: Kernel<A>,
{
/// Creates a new kernel density estimator from the `sample`, using a kernel and estimating
/// the bandwidth using the method `bw`
pub fn new(sample: &'a Sample<A>, kernel: K, bw: Bandwidth) -> Kde<'a, A, K> {
Kde {
bandwidth: bw.estimate(sample),
kernel,
sample,
}
}
/// Returns the bandwidth used by the estimator
pub fn bandwidth(&self) -> A {
self.bandwidth
}
/// Maps the KDE over `xs`
///
/// - Multihreaded
pub fn map(&self, xs: &[A]) -> Box<[A]> {
xs.par_iter()
.map(|&x| self.estimate(x))
.collect::<Vec<_>>()
.into_boxed_slice()
}
/// Estimates the probability density of `x`
pub fn estimate(&self, x: A) -> A {
let _0 = A::cast(0);
let slice = self.sample;
let h = self.bandwidth;
let n = A::cast(slice.len());
let sum = slice
.iter()
.fold(_0, |acc, &x_i| acc + self.kernel.evaluate((x - x_i) / h));
sum / h / n
}
}
/// Method to estimate the bandwidth
pub enum Bandwidth {
/// Use Silverman's rule of thumb to estimate the bandwidth from the sample
Silverman,
}
impl Bandwidth {
fn estimate<A: Float>(self, sample: &Sample<A>) -> A {
match self {
Bandwidth::Silverman => {
let factor = A::cast(4. / 3.);
let exponent = A::cast(1. / 5.);
let n = A::cast(sample.len());
let sigma = sample.std_dev(None);
sigma * (factor / n).powf(exponent)
}
}
}
}
#[cfg(test)]
macro_rules! test {
($ty:ident) => {
mod $ty {
use quickcheck::TestResult;
use stats::univariate::kde::kernel::Gaussian;
use stats::univariate::kde::{Bandwidth, Kde};
use stats::univariate::Sample;
// The [-inf inf] integral of the estimated PDF should be one
quickcheck! {
fn integral(size: usize, start: usize) -> TestResult {
const DX: $ty = 1e-3;
if let Some(v) = ::stats::test::vec::<$ty>(size, start) {
let slice = &v[start..];
let data = Sample::new(slice);
let kde = Kde::new(data, Gaussian, Bandwidth::Silverman);
let h = kde.bandwidth();
// NB Obviously a [-inf inf] integral is not feasible, but this range works
// quite well
let (a, b) = (data.min() - 5. * h, data.max() + 5. * h);
let mut acc = 0.;
let mut x = a;
let mut y = kde.estimate(a);
while x < b {
acc += DX * y / 2.;
x += DX;
y = kde.estimate(x);
acc += DX * y / 2.;
}
TestResult::from_bool(relative_eq!(acc, 1., epsilon = 2e-5))
} else {
TestResult::discard()
}
}
}
}
};
}
#[cfg(test)]
mod test {
test!(f32);
test!(f64);
}