criterion/analysis/
mod.rs

1use std::collections::BTreeMap;
2use std::path::Path;
3
4use stats::bivariate::regression::Slope;
5use stats::bivariate::Data;
6use stats::univariate::outliers::tukey::{self, LabeledSample};
7use stats::univariate::Sample;
8use stats::{Distribution, Tails};
9
10use benchmark::BenchmarkConfig;
11use estimate::{Distributions, Estimates, Statistic};
12use report::{BenchmarkId, ReportContext};
13use routine::Routine;
14use {build_estimates, Baseline, ConfidenceInterval, Criterion, Estimate, Throughput};
15use {format, fs};
16
17macro_rules! elapsed {
18    ($msg:expr, $block:expr) => {{
19        let start = ::std::time::Instant::now();
20        let out = $block;
21        let elapsed = &start.elapsed();
22
23        info!(
24            "{} took {}",
25            $msg,
26            format::time(::DurationExt::to_nanos(elapsed) as f64)
27        );
28
29        out
30    }};
31}
32
33mod compare;
34
35// Common analysis procedure
36pub(crate) fn common<T>(
37    id: &BenchmarkId,
38    routine: &mut Routine<T>,
39    config: &BenchmarkConfig,
40    criterion: &Criterion,
41    report_context: &ReportContext,
42    parameter: &T,
43    throughput: Option<Throughput>,
44) {
45    if criterion.list_mode {
46        println!("{}: bench", id);
47        return;
48    }
49    criterion.report.benchmark_start(id, report_context);
50
51    // In test mode, run the benchmark exactly once, then exit.
52    if criterion.test_mode {
53        routine.test(parameter);
54        criterion.report.terminated(id, report_context);
55        return;
56    }
57
58    if let Baseline::Compare = criterion.baseline {
59        if !base_dir_exists(
60            id,
61            &criterion.baseline_directory,
62            &criterion.output_directory,
63        ) {
64            panic!(format!(
65                "Baseline '{base}' must exist before comparison is allowed; try --save-baseline {base}",
66                base=criterion.baseline_directory,
67            ));
68        }
69    }
70
71    // In profiling mode, skip all of the analysis.
72    if let Some(time) = criterion.profile_time {
73        routine.profile(id, criterion, report_context, time, parameter);
74        return;
75    }
76
77    let (iters, times) = routine.sample(id, config, criterion, report_context, parameter);
78
79    criterion.report.analysis(id, report_context);
80
81    let avg_times = iters
82        .iter()
83        .zip(times.iter())
84        .map(|(&iters, &elapsed)| elapsed / iters)
85        .collect::<Vec<f64>>();
86    let avg_times = Sample::new(&avg_times);
87
88    log_if_err!(fs::mkdirp(&format!(
89        "{}/{}/new",
90        criterion.output_directory,
91        id.as_directory_name()
92    )));
93
94    let data = Data::new(&iters, &times);
95    let labeled_sample = outliers(id, &criterion.output_directory, avg_times);
96    let (distribution, slope) = regression(&data, config);
97    let (mut distributions, mut estimates) = estimates(avg_times, config);
98
99    estimates.insert(Statistic::Slope, slope);
100    distributions.insert(Statistic::Slope, distribution);
101
102    log_if_err!(fs::save(
103        &(data.x().as_ref(), data.y().as_ref()),
104        &format!(
105            "{}/{}/new/sample.json",
106            criterion.output_directory,
107            id.as_directory_name()
108        ),
109    ));
110    log_if_err!(fs::save(
111        &estimates,
112        &format!(
113            "{}/{}/new/estimates.json",
114            criterion.output_directory,
115            id.as_directory_name()
116        )
117    ));
118
119    let compare_data = if base_dir_exists(
120        id,
121        &criterion.baseline_directory,
122        &criterion.output_directory,
123    ) {
124        let result = compare::common(id, avg_times, config, criterion);
125        match result {
126            Ok((
127                t_value,
128                t_distribution,
129                relative_estimates,
130                relative_distributions,
131                base_iter_counts,
132                base_sample_times,
133                base_avg_times,
134                base_estimates,
135            )) => {
136                let p_value = t_distribution.p_value(t_value, &Tails::Two);
137                Some(::report::ComparisonData {
138                    p_value,
139                    t_distribution,
140                    t_value,
141                    relative_estimates,
142                    relative_distributions,
143                    significance_threshold: config.significance_level,
144                    noise_threshold: config.noise_threshold,
145                    base_iter_counts,
146                    base_sample_times,
147                    base_avg_times,
148                    base_estimates,
149                })
150            }
151            Err(e) => {
152                ::error::log_error(&e);
153                None
154            }
155        }
156    } else {
157        None
158    };
159
160    let measurement_data = ::report::MeasurementData {
161        data: Data::new(&*iters, &*times),
162        avg_times: labeled_sample,
163        absolute_estimates: estimates.clone(),
164        distributions,
165        comparison: compare_data,
166        throughput,
167    };
168
169    criterion
170        .report
171        .measurement_complete(id, report_context, &measurement_data);
172
173    log_if_err!(fs::save(
174        &id,
175        &format!(
176            "{}/{}/new/benchmark.json",
177            criterion.output_directory,
178            id.as_directory_name()
179        )
180    ));
181
182    if let Baseline::Save = criterion.baseline {
183        copy_new_dir_to_base(
184            id.as_directory_name(),
185            &criterion.baseline_directory,
186            &criterion.output_directory,
187        );
188    }
189}
190
191fn base_dir_exists(id: &BenchmarkId, baseline: &str, output_directory: &str) -> bool {
192    Path::new(&format!(
193        "{}/{}/{}",
194        output_directory,
195        id.as_directory_name(),
196        baseline
197    ))
198    .exists()
199}
200
201// Performs a simple linear regression on the sample
202fn regression(data: &Data<f64, f64>, config: &BenchmarkConfig) -> (Distribution<f64>, Estimate) {
203    let cl = config.confidence_level;
204
205    let distribution = elapsed!(
206        "Bootstrapped linear regression",
207        data.bootstrap(config.nresamples, |d| (Slope::fit(&d).0,))
208    )
209    .0;
210
211    let point = Slope::fit(&data);
212    let (lb, ub) = distribution.confidence_interval(config.confidence_level);
213    let se = distribution.std_dev(None);
214
215    (
216        distribution,
217        Estimate {
218            confidence_interval: ConfidenceInterval {
219                confidence_level: cl,
220                lower_bound: lb,
221                upper_bound: ub,
222            },
223            point_estimate: point.0,
224            standard_error: se,
225        },
226    )
227}
228
229// Classifies the outliers in the sample
230fn outliers<'a>(
231    id: &BenchmarkId,
232    output_directory: &str,
233    avg_times: &'a Sample<f64>,
234) -> LabeledSample<'a, f64> {
235    let sample = tukey::classify(avg_times);
236    log_if_err!(fs::save(
237        &sample.fences(),
238        &format!(
239            "{}/{}/new/tukey.json",
240            output_directory,
241            id.as_directory_name()
242        )
243    ));
244    sample
245}
246
247// Estimates the statistics of the population from the sample
248fn estimates(avg_times: &Sample<f64>, config: &BenchmarkConfig) -> (Distributions, Estimates) {
249    fn stats(sample: &Sample<f64>) -> (f64, f64, f64, f64) {
250        let mean = sample.mean();
251        let std_dev = sample.std_dev(Some(mean));
252        let median = sample.percentiles().median();
253        let mad = sample.median_abs_dev(Some(median));
254
255        (mean, std_dev, median, mad)
256    }
257
258    let cl = config.confidence_level;
259    let nresamples = config.nresamples;
260
261    let (mean, std_dev, median, mad) = stats(avg_times);
262    let mut point_estimates = BTreeMap::new();
263    point_estimates.insert(Statistic::Mean, mean);
264    point_estimates.insert(Statistic::StdDev, std_dev);
265    point_estimates.insert(Statistic::Median, median);
266    point_estimates.insert(Statistic::MedianAbsDev, mad);
267
268    let (dist_mean, dist_stddev, dist_median, dist_mad) = elapsed!(
269        "Bootstrapping the absolute statistics.",
270        avg_times.bootstrap(nresamples, stats)
271    );
272
273    let mut distributions = Distributions::new();
274    distributions.insert(Statistic::Mean, dist_mean);
275    distributions.insert(Statistic::StdDev, dist_stddev);
276    distributions.insert(Statistic::Median, dist_median);
277    distributions.insert(Statistic::MedianAbsDev, dist_mad);
278
279    let estimates = build_estimates(&distributions, &point_estimates, cl);
280
281    (distributions, estimates)
282}
283
284fn copy_new_dir_to_base(id: &str, baseline: &str, output_directory: &str) {
285    let root_dir = Path::new(output_directory).join(id);
286    let base_dir = root_dir.join(baseline);
287    let new_dir = root_dir.join("new");
288
289    if !new_dir.exists() {
290        return;
291    };
292    if !base_dir.exists() {
293        try_else_return!(fs::mkdirp(&base_dir));
294    }
295
296    // TODO: consider using walkdir or similar to generically copy.
297    try_else_return!(fs::cp(
298        &new_dir.join("estimates.json"),
299        &base_dir.join("estimates.json")
300    ));
301    try_else_return!(fs::cp(
302        &new_dir.join("sample.json"),
303        &base_dir.join("sample.json")
304    ));
305    try_else_return!(fs::cp(
306        &new_dir.join("tukey.json"),
307        &base_dir.join("tukey.json")
308    ));
309    try_else_return!(fs::cp(
310        &new_dir.join("benchmark.json"),
311        &base_dir.join("benchmark.json")
312    ));
313    try_else_return!(fs::cp(&new_dir.join("raw.csv"), &base_dir.join("raw.csv")));
314}