crash_throttling/
lib.rs

1// Copyright 2026 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use fuchsia_inspect::{Inspector, Node};
6use futures::FutureExt;
7use starnix_logging::log_info;
8use starnix_sync::Mutex;
9use std::collections::{HashMap, VecDeque};
10use std::sync::Arc;
11use zx::{self as zx};
12
13/// The maximum number of crashes we allow to happen for a process within the last
14/// CrashReporter.crash_loop_age_out before we consider it to be crash looping. 8 within 8 minutes
15/// was chosen as a balance between "definitely a crash loop" and "still saves system resources."
16pub const CRASH_LOOP_LIMIT: usize = 8;
17
18/// While throttled, we should still occasionally file a report with a higher "weight" that can
19/// represent the rest of the crashes.
20const REPORT_EVERY_X_WHILE_THROTTLED: u32 = 10;
21
22/// Decides whether to throttle crashes for a given process based on how frequently they are
23/// occurring. Records information about throttled crashes for diagnostics.
24pub struct CrashThrottler {
25    /// Diagnostics information. A mapping from process name -> number of crashes for that process
26    /// that weren't uploaded because of process throttling.
27    throttled_core_dumps: Arc<Mutex<HashMap<String, i64>>>,
28
29    /// Tracks when crashes occurred for each process name.
30    crashes_per_process: Arc<Mutex<HashMap<String, CrashInfo>>>,
31
32    /// The period before a crash is no longer considered for detecting crash loops.
33    pub crash_loop_age_out: zx::MonotonicDuration,
34
35    /// Whether excessive crash reports should be throttled.
36    enable_throttling: bool,
37}
38
39pub struct PendingCrashReport {
40    /// The current task's argv.
41    pub argv: Vec<String>,
42
43    /// The crashed process name.
44    pub argv0: String,
45
46    /// How many crashes this report represents. For example, a value of 10 would indicate that
47    /// this report will represent 9 other throttled crashes for this process.
48    pub weight: u32,
49}
50
51impl CrashThrottler {
52    pub fn new(
53        inspect_node: &Node,
54        crash_loop_age_out: zx::MonotonicDuration,
55        enable_throttling: bool,
56    ) -> Self {
57        let throttler = Self {
58            throttled_core_dumps: Arc::new(Mutex::new(Default::default())),
59            crashes_per_process: Arc::new(Mutex::new(Default::default())),
60            crash_loop_age_out,
61            enable_throttling,
62        };
63
64        throttler.record_throttling_in_inspect(inspect_node);
65        throttler
66    }
67
68    /// Locally records that a crash for `process_name` occurred at `runtime` and returns a guard
69    /// for an in-flight report if few enough overall are in-flight, as well as the weight that
70    /// should be assigned to the crash report.
71    ///
72    /// Note: runtime is the total time the device has been on according to the monotonic clock, not
73    /// the amount of time the process was running.
74    pub fn should_report(
75        &self,
76        argv: Vec<String>,
77        argv0: String,
78        runtime: zx::MonotonicInstant,
79    ) -> Option<PendingCrashReport> {
80        if !self.enable_throttling {
81            return Some(PendingCrashReport { argv, argv0, weight: 1 });
82        }
83
84        // Locally record that the crash occurred.
85        let mut crashes_per_process = self.crashes_per_process.lock();
86        let crash_info = crashes_per_process.entry(argv0.clone()).or_default();
87        crash_info.crash_runtimes.push_back(runtime);
88
89        crash_info.prune_crash_runtimes(runtime, self.crash_loop_age_out);
90
91        // Even if we're not throttled, we still need to have a weight of 1 so incrementing this
92        // here will let us later use it as the weight.
93        crash_info.num_crashes_while_throttled += 1;
94
95        // Check if this particular process has been filing too many reports.
96        if crash_info.is_throttled_at(runtime, self.crash_loop_age_out)
97            && (crash_info.num_crashes_while_throttled < REPORT_EVERY_X_WHILE_THROTTLED)
98        {
99            log_info!(
100                "Process '{argv0}' is throttled due to suspected crash loop, will fold report into later crash"
101            );
102            *self.throttled_core_dumps.lock().entry(argv0).or_default() += 1;
103            return None;
104        }
105
106        let weight = crash_info.num_crashes_while_throttled;
107        crash_info.num_crashes_while_throttled = 0;
108
109        Some(PendingCrashReport { argv, argv0, weight })
110    }
111
112    fn record_throttling_in_inspect(&self, inspect_node: &Node) {
113        let throttled_core_dumps = self.throttled_core_dumps.clone();
114        let crashes_per_process = self.crashes_per_process.clone();
115        let crash_loop_age_out = self.crash_loop_age_out;
116
117        inspect_node.record_lazy_child("coredumps_throttled", move || {
118            let throttled_core_dumps = throttled_core_dumps.clone();
119            let crashes_per_process = crashes_per_process.clone();
120
121            async move {
122                let inspector = Inspector::default();
123                let mut crashes_per_process = crashes_per_process.lock();
124                let runtime = zx::MonotonicInstant::get();
125
126                for (process, count) in throttled_core_dumps.lock().iter() {
127                    let Some(crash_info) = crashes_per_process.get_mut(process) else {
128                        continue;
129                    };
130
131                    crash_info.prune_crash_runtimes(runtime, crash_loop_age_out);
132
133                    let process_node = inspector.root().create_child(process);
134                    process_node.record_bool(
135                        "currently_throttled",
136                        crash_info.is_throttled_at(runtime, crash_loop_age_out),
137                    );
138                    process_node.record_int("total_throttled_crashes", *count);
139                    if let Some(end) = crash_info.throttling_end(crash_loop_age_out) {
140                        process_node.record_int("throttling_runtime_end_millis", end.into_millis());
141                    }
142
143                    inspector.root().record(process_node);
144                }
145                Ok(inspector)
146            }
147            .boxed()
148        });
149    }
150}
151
152#[derive(Default)]
153struct CrashInfo {
154    /// How many crashes have occurred while throttled. Resets to 0 if the throttling ends or if a
155    /// representative report is uploaded every REPORT_EVERY_X_WHILE_THROTTLED.
156    num_crashes_while_throttled: u32,
157
158    /// When the crashes occurred. Crashes that occurred more than CrashReporter.crash_loop_age_out
159    /// ago may be removed.
160    crash_runtimes: VecDeque<zx::MonotonicInstant>,
161}
162
163impl CrashInfo {
164    /// Whether the process is throttled at a given instant.
165    fn is_throttled_at(
166        &self,
167        runtime: zx::MonotonicInstant,
168        crash_loop_age_out: zx::MonotonicDuration,
169    ) -> bool {
170        self.crash_runtimes.iter().filter(|&&x| (runtime - x) < crash_loop_age_out).count()
171            > CRASH_LOOP_LIMIT
172    }
173
174    /// When a process will no longer be throttled, if it currently is throttled.
175    fn throttling_end(
176        &self,
177        crash_loop_age_out: zx::MonotonicDuration,
178    ) -> Option<zx::MonotonicDuration> {
179        let throttling_end = self.crash_runtimes.iter().nth_back(CRASH_LOOP_LIMIT - 1)?;
180        Some(crash_loop_age_out + zx::Duration::from_nanos(throttling_end.into_nanos()))
181    }
182
183    // Only keeps entries that are within `crash_loop_age_out`.
184    fn prune_crash_runtimes(
185        &mut self,
186        runtime: zx::MonotonicInstant,
187        crash_loop_age_out: zx::MonotonicDuration,
188    ) {
189        self.crash_runtimes.retain(|&x| (runtime - x) < crash_loop_age_out);
190    }
191}
192
193#[cfg(test)]
194mod tests {
195    use super::*;
196
197    const CRASH_LOOP_AGE_OUT: zx::MonotonicDuration = zx::Duration::from_minutes(8);
198
199    #[test]
200    fn not_throttled() {
201        let throttler = CrashThrottler::new(
202            &fuchsia_inspect::Node::default(),
203            CRASH_LOOP_AGE_OUT,
204            /*enable_throttling=*/ true,
205        );
206
207        assert!(
208            throttler
209                .should_report(vec![], "test-process".to_string(), zx::Instant::from_nanos(0))
210                .is_some()
211        );
212    }
213
214    #[test]
215    fn throttled() {
216        let throttler = CrashThrottler::new(
217            &fuchsia_inspect::Node::default(),
218            CRASH_LOOP_AGE_OUT,
219            /*enable_throttling=*/ true,
220        );
221
222        for _ in 0..CRASH_LOOP_LIMIT {
223            assert!(
224                throttler
225                    .should_report(vec![], "test-process".to_string(), zx::Instant::from_nanos(0))
226                    .is_some()
227            );
228        }
229        assert!(
230            throttler
231                .should_report(vec![], "test-process".to_string(), zx::Instant::from_nanos(0))
232                .is_none()
233        );
234    }
235
236    #[test]
237    fn throttling_ages_out() {
238        let throttler = CrashThrottler::new(
239            &fuchsia_inspect::Node::default(),
240            CRASH_LOOP_AGE_OUT,
241            /*enable_throttling=*/ true,
242        );
243
244        for _ in 0..CRASH_LOOP_LIMIT {
245            assert!(
246                throttler
247                    .should_report(vec![], "test-process".to_string(), zx::Instant::from_nanos(0))
248                    .is_some()
249            );
250        }
251        assert!(
252            throttler
253                .should_report(vec![], "test-process".to_string(), zx::Instant::from_nanos(0))
254                .is_none()
255        );
256        assert!(
257            throttler
258                .should_report(
259                    vec![],
260                    "test-process".to_string(),
261                    zx::Instant::from_nanos(CRASH_LOOP_AGE_OUT.into_nanos())
262                )
263                .is_some()
264        );
265    }
266
267    #[test]
268    fn reports_some_crashes_while_throttled() {
269        const RUNTIME: zx::MonotonicInstant = zx::Instant::from_nanos(0);
270        let throttler = CrashThrottler::new(
271            &fuchsia_inspect::Node::default(),
272            CRASH_LOOP_AGE_OUT,
273            /*enable_throttling=*/ true,
274        );
275
276        for _ in 0..CRASH_LOOP_LIMIT {
277            assert!(throttler.should_report(vec![], "test-process".to_string(), RUNTIME).is_some());
278        }
279
280        for _ in 0..REPORT_EVERY_X_WHILE_THROTTLED - 1 {
281            assert!(throttler.should_report(vec![], "test-process".to_string(), RUNTIME).is_none());
282        }
283
284        assert_eq!(
285            throttler.should_report(vec![], "test-process".to_string(), RUNTIME).unwrap().weight,
286            REPORT_EVERY_X_WHILE_THROTTLED
287        );
288    }
289
290    #[test]
291    fn is_throttled_filters() {
292        let mut crash_info: CrashInfo = Default::default();
293
294        crash_info.crash_runtimes.push_back(zx::MonotonicInstant::from_nanos(0));
295        for _ in 0..CRASH_LOOP_LIMIT {
296            crash_info.crash_runtimes.push_back(zx::MonotonicInstant::from_nanos(50));
297        }
298
299        assert!(
300            crash_info.is_throttled_at(zx::MonotonicInstant::from_nanos(0), CRASH_LOOP_AGE_OUT)
301        );
302        assert!(!crash_info.is_throttled_at(
303            zx::MonotonicInstant::from_nanos(CRASH_LOOP_AGE_OUT.into_nanos()),
304            CRASH_LOOP_AGE_OUT
305        ));
306    }
307
308    #[test]
309    fn throttling_ends() {
310        let age_out = zx::Duration::from_millis(200);
311        let throttler = CrashThrottler::new(
312            &fuchsia_inspect::Node::default(),
313            age_out,
314            /*enable_throttling=*/ true,
315        );
316
317        let mut time = zx::Instant::from_nanos(0);
318
319        for _ in 0..CRASH_LOOP_LIMIT {
320            assert!(throttler.should_report(vec![], "test-process".to_string(), time).is_some());
321        }
322
323        assert!(throttler.should_report(vec![], "test-process".to_string(), time).is_none());
324
325        time += age_out + zx::Duration::from_millis(50);
326
327        assert!(throttler.should_report(vec![], "test-process".to_string(), time).is_some());
328    }
329}