1use fuchsia_inspect::{Inspector, Node};
6use futures::FutureExt;
7use starnix_logging::log_info;
8use starnix_sync::Mutex;
9use std::collections::{HashMap, VecDeque};
10use std::sync::Arc;
11use zx::{self as zx};
12
13pub const CRASH_LOOP_LIMIT: usize = 8;
17
18const REPORT_EVERY_X_WHILE_THROTTLED: u32 = 10;
21
22pub struct CrashThrottler {
25 throttled_core_dumps: Arc<Mutex<HashMap<String, i64>>>,
28
29 crashes_per_process: Arc<Mutex<HashMap<String, CrashInfo>>>,
31
32 pub crash_loop_age_out: zx::MonotonicDuration,
34
35 enable_throttling: bool,
37}
38
39pub struct PendingCrashReport {
40 pub argv: Vec<String>,
42
43 pub argv0: String,
45
46 pub weight: u32,
49}
50
51impl CrashThrottler {
52 pub fn new(
53 inspect_node: &Node,
54 crash_loop_age_out: zx::MonotonicDuration,
55 enable_throttling: bool,
56 ) -> Self {
57 let throttler = Self {
58 throttled_core_dumps: Arc::new(Mutex::new(Default::default())),
59 crashes_per_process: Arc::new(Mutex::new(Default::default())),
60 crash_loop_age_out,
61 enable_throttling,
62 };
63
64 throttler.record_throttling_in_inspect(inspect_node);
65 throttler
66 }
67
68 pub fn should_report(
75 &self,
76 argv: Vec<String>,
77 argv0: String,
78 runtime: zx::MonotonicInstant,
79 ) -> Option<PendingCrashReport> {
80 if !self.enable_throttling {
81 return Some(PendingCrashReport { argv, argv0, weight: 1 });
82 }
83
84 let mut crashes_per_process = self.crashes_per_process.lock();
86 let crash_info = crashes_per_process.entry(argv0.clone()).or_default();
87 crash_info.crash_runtimes.push_back(runtime);
88
89 crash_info.prune_crash_runtimes(runtime, self.crash_loop_age_out);
90
91 crash_info.num_crashes_while_throttled += 1;
94
95 if crash_info.is_throttled_at(runtime, self.crash_loop_age_out)
97 && (crash_info.num_crashes_while_throttled < REPORT_EVERY_X_WHILE_THROTTLED)
98 {
99 log_info!(
100 "Process '{argv0}' is throttled due to suspected crash loop, will fold report into later crash"
101 );
102 *self.throttled_core_dumps.lock().entry(argv0).or_default() += 1;
103 return None;
104 }
105
106 let weight = crash_info.num_crashes_while_throttled;
107 crash_info.num_crashes_while_throttled = 0;
108
109 Some(PendingCrashReport { argv, argv0, weight })
110 }
111
112 fn record_throttling_in_inspect(&self, inspect_node: &Node) {
113 let throttled_core_dumps = self.throttled_core_dumps.clone();
114 let crashes_per_process = self.crashes_per_process.clone();
115 let crash_loop_age_out = self.crash_loop_age_out;
116
117 inspect_node.record_lazy_child("coredumps_throttled", move || {
118 let throttled_core_dumps = throttled_core_dumps.clone();
119 let crashes_per_process = crashes_per_process.clone();
120
121 async move {
122 let inspector = Inspector::default();
123 let mut crashes_per_process = crashes_per_process.lock();
124 let runtime = zx::MonotonicInstant::get();
125
126 for (process, count) in throttled_core_dumps.lock().iter() {
127 let Some(crash_info) = crashes_per_process.get_mut(process) else {
128 continue;
129 };
130
131 crash_info.prune_crash_runtimes(runtime, crash_loop_age_out);
132
133 let process_node = inspector.root().create_child(process);
134 process_node.record_bool(
135 "currently_throttled",
136 crash_info.is_throttled_at(runtime, crash_loop_age_out),
137 );
138 process_node.record_int("total_throttled_crashes", *count);
139 if let Some(end) = crash_info.throttling_end(crash_loop_age_out) {
140 process_node.record_int("throttling_runtime_end_millis", end.into_millis());
141 }
142
143 inspector.root().record(process_node);
144 }
145 Ok(inspector)
146 }
147 .boxed()
148 });
149 }
150}
151
152#[derive(Default)]
153struct CrashInfo {
154 num_crashes_while_throttled: u32,
157
158 crash_runtimes: VecDeque<zx::MonotonicInstant>,
161}
162
163impl CrashInfo {
164 fn is_throttled_at(
166 &self,
167 runtime: zx::MonotonicInstant,
168 crash_loop_age_out: zx::MonotonicDuration,
169 ) -> bool {
170 self.crash_runtimes.iter().filter(|&&x| (runtime - x) < crash_loop_age_out).count()
171 > CRASH_LOOP_LIMIT
172 }
173
174 fn throttling_end(
176 &self,
177 crash_loop_age_out: zx::MonotonicDuration,
178 ) -> Option<zx::MonotonicDuration> {
179 let throttling_end = self.crash_runtimes.iter().nth_back(CRASH_LOOP_LIMIT - 1)?;
180 Some(crash_loop_age_out + zx::Duration::from_nanos(throttling_end.into_nanos()))
181 }
182
183 fn prune_crash_runtimes(
185 &mut self,
186 runtime: zx::MonotonicInstant,
187 crash_loop_age_out: zx::MonotonicDuration,
188 ) {
189 self.crash_runtimes.retain(|&x| (runtime - x) < crash_loop_age_out);
190 }
191}
192
193#[cfg(test)]
194mod tests {
195 use super::*;
196
197 const CRASH_LOOP_AGE_OUT: zx::MonotonicDuration = zx::Duration::from_minutes(8);
198
199 #[test]
200 fn not_throttled() {
201 let throttler = CrashThrottler::new(
202 &fuchsia_inspect::Node::default(),
203 CRASH_LOOP_AGE_OUT,
204 true,
205 );
206
207 assert!(
208 throttler
209 .should_report(vec![], "test-process".to_string(), zx::Instant::from_nanos(0))
210 .is_some()
211 );
212 }
213
214 #[test]
215 fn throttled() {
216 let throttler = CrashThrottler::new(
217 &fuchsia_inspect::Node::default(),
218 CRASH_LOOP_AGE_OUT,
219 true,
220 );
221
222 for _ in 0..CRASH_LOOP_LIMIT {
223 assert!(
224 throttler
225 .should_report(vec![], "test-process".to_string(), zx::Instant::from_nanos(0))
226 .is_some()
227 );
228 }
229 assert!(
230 throttler
231 .should_report(vec![], "test-process".to_string(), zx::Instant::from_nanos(0))
232 .is_none()
233 );
234 }
235
236 #[test]
237 fn throttling_ages_out() {
238 let throttler = CrashThrottler::new(
239 &fuchsia_inspect::Node::default(),
240 CRASH_LOOP_AGE_OUT,
241 true,
242 );
243
244 for _ in 0..CRASH_LOOP_LIMIT {
245 assert!(
246 throttler
247 .should_report(vec![], "test-process".to_string(), zx::Instant::from_nanos(0))
248 .is_some()
249 );
250 }
251 assert!(
252 throttler
253 .should_report(vec![], "test-process".to_string(), zx::Instant::from_nanos(0))
254 .is_none()
255 );
256 assert!(
257 throttler
258 .should_report(
259 vec![],
260 "test-process".to_string(),
261 zx::Instant::from_nanos(CRASH_LOOP_AGE_OUT.into_nanos())
262 )
263 .is_some()
264 );
265 }
266
267 #[test]
268 fn reports_some_crashes_while_throttled() {
269 const RUNTIME: zx::MonotonicInstant = zx::Instant::from_nanos(0);
270 let throttler = CrashThrottler::new(
271 &fuchsia_inspect::Node::default(),
272 CRASH_LOOP_AGE_OUT,
273 true,
274 );
275
276 for _ in 0..CRASH_LOOP_LIMIT {
277 assert!(throttler.should_report(vec![], "test-process".to_string(), RUNTIME).is_some());
278 }
279
280 for _ in 0..REPORT_EVERY_X_WHILE_THROTTLED - 1 {
281 assert!(throttler.should_report(vec![], "test-process".to_string(), RUNTIME).is_none());
282 }
283
284 assert_eq!(
285 throttler.should_report(vec![], "test-process".to_string(), RUNTIME).unwrap().weight,
286 REPORT_EVERY_X_WHILE_THROTTLED
287 );
288 }
289
290 #[test]
291 fn is_throttled_filters() {
292 let mut crash_info: CrashInfo = Default::default();
293
294 crash_info.crash_runtimes.push_back(zx::MonotonicInstant::from_nanos(0));
295 for _ in 0..CRASH_LOOP_LIMIT {
296 crash_info.crash_runtimes.push_back(zx::MonotonicInstant::from_nanos(50));
297 }
298
299 assert!(
300 crash_info.is_throttled_at(zx::MonotonicInstant::from_nanos(0), CRASH_LOOP_AGE_OUT)
301 );
302 assert!(!crash_info.is_throttled_at(
303 zx::MonotonicInstant::from_nanos(CRASH_LOOP_AGE_OUT.into_nanos()),
304 CRASH_LOOP_AGE_OUT
305 ));
306 }
307
308 #[test]
309 fn throttling_ends() {
310 let age_out = zx::Duration::from_millis(200);
311 let throttler = CrashThrottler::new(
312 &fuchsia_inspect::Node::default(),
313 age_out,
314 true,
315 );
316
317 let mut time = zx::Instant::from_nanos(0);
318
319 for _ in 0..CRASH_LOOP_LIMIT {
320 assert!(throttler.should_report(vec![], "test-process".to_string(), time).is_some());
321 }
322
323 assert!(throttler.should_report(vec![], "test-process".to_string(), time).is_none());
324
325 time += age_out + zx::Duration::from_millis(50);
326
327 assert!(throttler.should_report(vec![], "test-process".to_string(), time).is_some());
328 }
329}