1use std::{mem, ops};
23use cast;
4use rayon::prelude::*;
5use stats::float::Float;
67use stats::tuple::{Tuple, TupledDistributionsBuilder};
8use stats::univariate::Percentiles;
9use stats::univariate::Resamples;
1011/// A collection of data points drawn from a population
12///
13/// Invariants:
14///
15/// - The sample contains at least 2 data points
16/// - The sample contains no `NaN`s
17pub struct Sample<A>([A]);
1819// TODO(rust-lang/rfcs#735) move this `impl` into a private percentiles module
20impl<A> Sample<A>
21where
22A: Float,
23{
24/// Creates a new sample from an existing slice
25 ///
26 /// # Panics
27 ///
28 /// Panics if `slice` contains any `NaN` or if `slice` has less than two elements
29#[cfg_attr(feature = "cargo-clippy", allow(clippy::new_ret_no_self))]
30pub fn new(slice: &[A]) -> &Sample<A> {
31assert!(slice.len() > 1 && slice.iter().all(|x| !x.is_nan()));
3233unsafe { mem::transmute(slice) }
34 }
3536/// Returns the biggest element in the sample
37 ///
38 /// - Time: `O(length)`
39pub fn max(&self) -> A {
40let mut elems = self.iter();
4142match elems.next() {
43Some(&head) => elems.fold(head, |a, &b| a.max(b)),
44// NB `unreachable!` because `Sample` is guaranteed to have at least one data point
45None => unreachable!(),
46 }
47 }
4849/// Returns the arithmetic average of the sample
50 ///
51 /// - Time: `O(length)`
52pub fn mean(&self) -> A {
53let n = self.len();
5455self.sum() / A::cast(n)
56 }
5758/// Returns the median absolute deviation
59 ///
60 /// The `median` can be optionally passed along to speed up (2X) the computation
61 ///
62 /// - Time: `O(length)`
63 /// - Memory: `O(length)`
64pub fn median_abs_dev(&self, median: Option<A>) -> A
65where
66usize: cast::From<A, Output = Result<usize, cast::Error>>,
67 {
68let median = median.unwrap_or_else(|| self.percentiles().median());
6970// NB Although this operation can be SIMD accelerated, the gain is negligible because the
71 // bottle neck is the sorting operation which is part of the computation of the median
72let abs_devs = self.iter().map(|&x| (x - median).abs()).collect::<Vec<_>>();
7374let abs_devs: &Self = Self::new(&abs_devs);
7576 abs_devs.percentiles().median() * A::cast(1.4826)
77 }
7879/// Returns the median absolute deviation as a percentage of the median
80 ///
81 /// - Time: `O(length)`
82 /// - Memory: `O(length)`
83pub fn median_abs_dev_pct(&self) -> A
84where
85usize: cast::From<A, Output = Result<usize, cast::Error>>,
86 {
87let _100 = A::cast(100);
88let median = self.percentiles().median();
89let mad = self.median_abs_dev(Some(median));
9091 (mad / median) * _100
92 }
9394/// Returns the smallest element in the sample
95 ///
96 /// - Time: `O(length)`
97pub fn min(&self) -> A {
98let mut elems = self.iter();
99100match elems.next() {
101Some(&elem) => elems.fold(elem, |a, &b| a.min(b)),
102// NB `unreachable!` because `Sample` is guaranteed to have at least one data point
103None => unreachable!(),
104 }
105 }
106107/// Returns a "view" into the percentiles of the sample
108 ///
109 /// This "view" makes consecutive computations of percentiles much faster (`O(1)`)
110 ///
111 /// - Time: `O(N log N) where N = length`
112 /// - Memory: `O(length)`
113pub fn percentiles(&self) -> Percentiles<A>
114where
115usize: cast::From<A, Output = Result<usize, cast::Error>>,
116 {
117use std::cmp::Ordering;
118119// NB This function assumes that there are no `NaN`s in the sample
120fn cmp<T>(a: &T, b: &T) -> Ordering
121where
122T: PartialOrd,
123 {
124if a < b {
125 Ordering::Less
126 } else if a == b {
127 Ordering::Equal
128 } else {
129 Ordering::Greater
130 }
131 }
132133let mut v = self.to_vec().into_boxed_slice();
134 v.par_sort_unstable_by(cmp);
135136// NB :-1: to intra-crate privacy rules
137unsafe { mem::transmute(v) }
138 }
139140/// Returns the standard deviation of the sample
141 ///
142 /// The `mean` can be optionally passed along to speed up (2X) the computation
143 ///
144 /// - Time: `O(length)`
145pub fn std_dev(&self, mean: Option<A>) -> A {
146self.var(mean).sqrt()
147 }
148149/// Returns the standard deviation as a percentage of the mean
150 ///
151 /// - Time: `O(length)`
152pub fn std_dev_pct(&self) -> A {
153let _100 = A::cast(100);
154let mean = self.mean();
155let std_dev = self.std_dev(Some(mean));
156157 (std_dev / mean) * _100
158 }
159160/// Returns the sum of all the elements of the sample
161 ///
162 /// - Time: `O(length)`
163pub fn sum(&self) -> A {
164 ::stats::sum(self)
165 }
166167/// Returns the t score between these two samples
168 ///
169 /// - Time: `O(length)`
170pub fn t(&self, other: &Sample<A>) -> A {
171let (x_bar, y_bar) = (self.mean(), other.mean());
172let (s2_x, s2_y) = (self.var(Some(x_bar)), other.var(Some(y_bar)));
173let n_x = A::cast(self.len());
174let n_y = A::cast(other.len());
175let num = x_bar - y_bar;
176let den = (s2_x / n_x + s2_y / n_y).sqrt();
177178 num / den
179 }
180181/// Returns the variance of the sample
182 ///
183 /// The `mean` can be optionally passed along to speed up (2X) the computation
184 ///
185 /// - Time: `O(length)`
186pub fn var(&self, mean: Option<A>) -> A {
187use std::ops::Add;
188189let mean = mean.unwrap_or_else(|| self.mean());
190let slice = self;
191192let sum = slice
193 .iter()
194 .map(|&x| (x - mean).powi(2))
195 .fold(A::cast(0), Add::add);
196197 sum / A::cast(slice.len() - 1)
198 }
199200// TODO Remove the `T` parameter in favor of `S::Output`
201/// Returns the bootstrap distributions of the parameters estimated by the 1-sample statistic
202 ///
203 /// - Multi-threaded
204 /// - Time: `O(nresamples)`
205 /// - Memory: `O(nresamples)`
206pub fn bootstrap<T, S>(&self, nresamples: usize, statistic: S) -> T::Distributions
207where
208S: Fn(&Sample<A>) -> T,
209 S: Sync,
210 T: Tuple,
211 T: Send,
212 T::Distributions: Send,
213 T::Builder: Send,
214 {
215 (0..nresamples)
216 .into_par_iter()
217 .map_init(
218 || Resamples::new(self),
219 |resamples, _| statistic(resamples.next()),
220 )
221 .fold(
222 || T::Builder::new(0),
223 |mut sub_distributions, sample| {
224 sub_distributions.push(sample);
225 sub_distributions
226 },
227 )
228 .reduce(
229 || T::Builder::new(0),
230 |mut a, mut b| {
231 a.extend(&mut b);
232 a
233 },
234 )
235 .complete()
236 }
237238#[cfg(test)]
239pub fn iqr(&self) -> A
240where
241usize: cast::From<A, Output = Result<usize, cast::Error>>,
242 {
243self.percentiles().iqr()
244 }
245246#[cfg(test)]
247pub fn median(&self) -> A
248where
249usize: cast::From<A, Output = Result<usize, cast::Error>>,
250 {
251self.percentiles().median()
252 }
253}
254255impl<A> ops::Deref for Sample<A> {
256type Target = [A];
257258fn deref(&self) -> &[A] {
259&self.0
260}
261}