starnix_core/execution/
crash_reporter.rs

1// Copyright 2024 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::task::{CurrentTask, ExitStatus};
6use crash_throttling::{CrashThrottler, PendingCrashReport};
7use fidl_fuchsia_feedback::{
8    Annotation, CrashReport, CrashReporterProxy, MAX_ANNOTATION_VALUE_LENGTH,
9    MAX_CRASH_SIGNATURE_LENGTH, NativeCrashReport, SpecificCrashReport,
10};
11use fuchsia_inspect::Node;
12use starnix_logging::{
13    CATEGORY_STARNIX, CoreDumpInfo, CoreDumpList, TraceScope, log_error, log_info, log_warn,
14    trace_instant,
15};
16
17pub struct CrashReporter {
18    /// Diagnostics information about crashed tasks.
19    core_dumps: CoreDumpList,
20
21    /// Throttles crash reports to avoid spamming the system.
22    throttler: CrashThrottler,
23
24    /// Connection to the feedback stack for reporting crashes.
25    proxy: Option<CrashReporterProxy>,
26}
27
28impl CrashReporter {
29    pub fn new(
30        inspect_node: &Node,
31        proxy: Option<CrashReporterProxy>,
32        crash_loop_age_out: zx::MonotonicDuration,
33        enable_throttling: bool,
34    ) -> Self {
35        Self {
36            core_dumps: CoreDumpList::new(inspect_node.create_child("coredumps")),
37            throttler: CrashThrottler::new(inspect_node, crash_loop_age_out, enable_throttling),
38            proxy,
39        }
40    }
41
42    /// Returns a PendingCrashReport if the crash report should be reported. Otherwise, returns
43    /// None.
44    pub fn begin_crash_report(&self, current_task: &CurrentTask) -> Option<PendingCrashReport> {
45        let argv = current_task
46            .read_argv(MAX_ANNOTATION_VALUE_LENGTH as usize)
47            .unwrap_or_else(|_| vec!["<unknown>".into()])
48            .into_iter()
49            .map(|a| a.to_string())
50            .collect::<Vec<_>>();
51        let argv0 = argv.get(0).map(AsRef::as_ref).unwrap_or_else(|| "<unknown>");
52
53        // Get the filename.
54        let argv0 = argv0.rsplit_once("/").unwrap_or(("", &argv0)).1.to_string();
55
56        self.throttler.should_report(argv, argv0, zx::MonotonicInstant::get())
57    }
58
59    /// Callers should first check whether the crash should be reported via begin_crash_report.
60    pub fn handle_core_dump(
61        &self,
62        current_task: &CurrentTask,
63        exit_status: &ExitStatus,
64        pending_crash_report: PendingCrashReport,
65    ) {
66        trace_instant!(CATEGORY_STARNIX, "RecordCoreDump", TraceScope::Process);
67
68        let argv = pending_crash_report.argv;
69        let argv0 = pending_crash_report.argv0;
70        let process_koid = current_task
71            .thread_group()
72            .process
73            .koid()
74            .expect("handles for processes with crashing threads are still valid");
75        let thread_koid = current_task
76            .thread
77            .read()
78            .as_ref()
79            .expect("coredumps occur in tasks with associated threads")
80            .koid()
81            .expect("handles for crashing threads are still valid");
82        let linux_pid = current_task.thread_group().leader as i64;
83        let thread_name = current_task.command().to_string();
84        let signal = match exit_status {
85            ExitStatus::CoreDump(s) => s.signal,
86            other => {
87                log_error!(
88                    "only core dump exit statuses should be handled as core dumps, got {other:?}"
89                );
90                return;
91            }
92        };
93
94        // TODO(https://fxbug.dev/356912301) use boot time
95        let uptime = zx::MonotonicInstant::get() - current_task.thread_group().start_time;
96
97        let dump_info = CoreDumpInfo {
98            process_koid,
99            thread_koid,
100            linux_pid,
101            uptime: uptime.into_nanos(),
102            argv: argv.clone(),
103            thread_name: thread_name.clone(),
104            signal: signal.to_string(),
105        };
106        self.core_dumps.record_core_dump(dump_info);
107
108        let mut argv_joined = argv.join(" ");
109        truncate_with_ellipsis(&mut argv_joined, MAX_ANNOTATION_VALUE_LENGTH as usize);
110
111        let mut env_joined = current_task
112            .read_env(MAX_ANNOTATION_VALUE_LENGTH as usize)
113            .unwrap_or_else(|_| vec![])
114            .into_iter()
115            .map(|a| a.to_string())
116            .collect::<Vec<_>>()
117            .join(" ");
118        truncate_with_ellipsis(&mut env_joined, MAX_ANNOTATION_VALUE_LENGTH as usize);
119
120        let signal_str = signal.to_string();
121
122        // Truncate program name to fit in crash signature with a space and signal string added.
123        let max_signature_prefix_len = MAX_CRASH_SIGNATURE_LENGTH as usize - (signal_str.len() + 1);
124        let mut crash_signature = argv0.clone();
125        truncate_with_ellipsis(&mut crash_signature, max_signature_prefix_len);
126        crash_signature.push(' ');
127        crash_signature.push_str(&signal_str);
128
129        let crash_report = CrashReport {
130            crash_signature: Some(crash_signature),
131            program_name: Some(argv0.clone()),
132            program_uptime: Some(uptime.into_nanos()),
133            specific_report: Some(SpecificCrashReport::Native(NativeCrashReport {
134                process_koid: Some(process_koid.raw_koid()),
135                process_name: Some(argv0),
136                thread_koid: Some(thread_koid.raw_koid()),
137                thread_name: Some(thread_name),
138                ..Default::default()
139            })),
140            annotations: Some(vec![
141                // Note that this pid will be different from the Zircon process koid that's visible
142                // to the rest of Fuchsia. We want to include both so that this can be correlated
143                // against debugging artifacts produced by Android code.
144                Annotation { key: "linux.pid".to_string(), value: linux_pid.to_string() },
145                Annotation { key: "linux.argv".to_string(), value: argv_joined },
146                Annotation { key: "linux.env".to_string(), value: env_joined },
147                Annotation { key: "linux.signal".to_string(), value: signal_str },
148            ]),
149            is_fatal: Some(true),
150            weight: Some(pending_crash_report.weight),
151            ..Default::default()
152        };
153
154        if let Some(reporter) = &self.proxy {
155            let reporter = reporter.clone();
156            // Do the actual report in the background since they can take a while to file.
157            current_task.kernel().kthreads.spawn_future(async move || {
158                match reporter.file_report(crash_report).await {
159                    Ok(Ok(_)) => (),
160                    Ok(Err(filing_error)) => {
161                        log_error!(filing_error:?; "Couldn't file crash report.");
162                    }
163                    Err(fidl_error) => log_warn!(
164                        fidl_error:?;
165                        "Couldn't file crash report due to error on underlying channel."
166                    ),
167                };
168            });
169        } else {
170            log_info!(crash_report:?; "no crash reporter available for crash");
171        }
172    }
173}
174
175fn truncate_with_ellipsis(s: &mut String, max_len: usize) {
176    if s.len() <= max_len {
177        return;
178    }
179
180    // 3 bytes for ellipsis.
181    let max_content_len = max_len - 3;
182
183    // String::truncate panics if the new max length is in the middle of a character, so we need to
184    // find an appropriate byte boundary.
185    let mut new_len = 0;
186    let mut iter = s.char_indices();
187    while let Some((offset, _)) = iter.next() {
188        if offset > max_content_len {
189            break;
190        }
191        new_len = offset;
192    }
193
194    s.truncate(new_len);
195    s.push_str("...");
196}
197
198#[cfg(test)]
199mod tests {
200    use super::*;
201
202    #[test]
203    fn truncate_noop_on_max_length_string() {
204        let mut s = String::from("1234567890");
205        let before = s.clone();
206        truncate_with_ellipsis(&mut s, 10);
207        assert_eq!(s, before);
208    }
209
210    #[test]
211    fn truncate_adds_ellipsis() {
212        let mut s = String::from("1234567890");
213        truncate_with_ellipsis(&mut s, 9);
214        assert_eq!(s.len(), 9);
215        assert_eq!(s, "123456...", "truncate must add ellipsis and still fit under max len");
216    }
217
218    #[test]
219    fn truncate_is_sensible_in_middle_of_multibyte_chars() {
220        let mut s = String::from("æææææææææ");
221        // æ is 2 bytes, so any odd byte length should be in the middle of a character. Truncate
222        // adds 3 bytes for the ellipsis so we actually need an even max length to hit the middle
223        // of a character.
224        truncate_with_ellipsis(&mut s, 8);
225        assert_eq!(s.len(), 7, "may end up shorter than provided max length w/ multi-byte chars");
226        assert_eq!(s, "ææ...", "truncate must remove whole characters and add ellipsis");
227    }
228}