1mod delay_tracker;
8mod diagnostics;
9mod snapshot;
10mod test_invoker;
11mod triage_shim;
12
13use anyhow::{bail, Context, Error};
14use argh::FromArgs;
15use delay_tracker::DelayTracker;
16use fidl_fuchsia_diagnostics_test::DetectControllerRequestStream;
17use fidl_fuchsia_feedback::MAX_CRASH_SIGNATURE_LENGTH;
18use fuchsia_async as fasync;
19use fuchsia_async::Task;
20use fuchsia_component::server::{ServiceFs, ServiceObj};
21use fuchsia_inspect::health::Reporter;
22use fuchsia_inspect::{self as inspect, NumericProperty};
23use fuchsia_inspect_contrib::nodes::BoundedListNode;
24use fuchsia_inspect_derive::{Inspect, WithInspect};
25use fuchsia_triage::{inspect_logger, SnapshotTrigger};
26use futures::lock::Mutex;
27use futures::{join, StreamExt};
28use glob::glob;
29use injectable_time::MonotonicInstant;
30use log::{error, info, warn};
31use serde_derive::Deserialize;
32use snapshot::SnapshotRequest;
33use std::collections::HashMap;
34use std::sync::Arc;
35use triage_detect_config::Config as ComponentConfig;
36
37const MINIMUM_CHECK_TIME_NANOS: i64 = 60 * 1_000_000_000;
38const CONFIG_GLOB: &str = "/config/data/*.triage";
39const PROGRAM_CONFIG_PATH: &str = "/config/data/config.json";
40const SIGNATURE_PREFIX: &str = "fuchsia-detect-";
41const MINIMUM_SIGNATURE_INTERVAL_NANOS: i64 = 3600 * 1_000_000_000;
43const WARNING_LIST_MAX_SIZE: usize = 100;
45
46pub const PROGRAM_NAME: &str = "detect";
48
49#[derive(FromArgs, Debug, PartialEq)]
51#[argh(subcommand, name = "detect")]
52pub struct CommandLine {}
53
54#[derive(PartialEq, Debug, Clone, Copy)]
57pub(crate) enum Mode {
58 IntegrationTest,
59 Production,
60}
61
62#[derive(Deserialize, Default, Debug)]
63struct ProgramConfig {
64 enable_filing: Option<bool>,
65}
66
67fn load_program_config() -> Result<ProgramConfig, Error> {
68 let config_text = match std::fs::read_to_string(PROGRAM_CONFIG_PATH) {
69 Ok(text) => text,
70 Err(_) => {
71 info!("Program config file not found; using default config.");
72 "{}".to_string()
73 }
74 };
75 match serde_json5::from_str::<ProgramConfig>(&config_text) {
76 Ok(config) => Ok(config),
77 Err(e) => bail!("Program config format error: {}", e),
78 }
79}
80
81fn load_configuration_files() -> Result<HashMap<String, String>, Error> {
82 fn file_stem(file_path: &std::path::PathBuf) -> Result<String, Error> {
83 if let Some(s) = file_path.file_stem() {
84 if let Some(s) = s.to_str() {
85 return Ok(s.to_owned());
86 }
87 }
88 bail!("Bad path {:?} - can't find file_stem", file_path)
89 }
90
91 let mut file_contents = HashMap::new();
92 for file_path in glob(CONFIG_GLOB)? {
93 let file_path = file_path?;
94 let stem = file_stem(&file_path)?;
95 let config_text = std::fs::read_to_string(file_path)?;
96 file_contents.insert(stem, config_text);
97 }
98 Ok(file_contents)
99}
100
101fn appropriate_check_interval(
107 expression: &str,
108 mode: Mode,
109) -> Result<zx::MonotonicDuration, Error> {
110 let check_every = triage_shim::evaluate_int_math(expression)
111 .or_else(|e| bail!("Check_every argument must be Minutes(n), Hours(n), etc. but: {}", e))?;
112 if check_every < MINIMUM_CHECK_TIME_NANOS && mode != Mode::IntegrationTest {
113 bail!(
114 "Minimum time to check is {} seconds; {} nanos is too small",
115 MINIMUM_CHECK_TIME_NANOS / 1_000_000_000,
116 check_every
117 );
118 }
119 info!(
120 "Checking every {} seconds from command line '{:?}'",
121 check_every / 1_000_000_000,
122 expression
123 );
124 Ok(zx::MonotonicDuration::from_nanos(check_every))
125}
126
127fn build_signature(snapshot: SnapshotTrigger, mode: Mode) -> String {
128 let sanitized: String = snapshot
131 .signature
132 .chars()
133 .map(|char| match char {
134 c if char.is_ascii_lowercase() => c,
135 c if char.is_ascii_uppercase() => c.to_ascii_lowercase(),
136 _ => '-',
137 })
138 .collect();
139 if sanitized != snapshot.signature {
140 let message = format!("Signature {} was sanitized to {}", snapshot.signature, sanitized);
141 if mode == Mode::IntegrationTest {
142 warn!("{}", message);
143 } else {
144 error!("{}", message);
145 }
146 }
147 let mut signature = format!("{}{}", SIGNATURE_PREFIX, sanitized);
148 if signature.len() > fidl_fuchsia_feedback::MAX_CRASH_SIGNATURE_LENGTH as usize {
149 let new_signature =
150 signature.chars().take(MAX_CRASH_SIGNATURE_LENGTH as usize).collect::<String>();
151 let message = format!("Signature '{}' truncated to '{}'", signature, new_signature);
152 if mode == Mode::IntegrationTest {
153 warn!("{}", message);
154 } else {
155 error!("{}", message);
156 }
157 signature = new_signature;
158 }
159 signature
160}
161
162#[derive(Inspect, Default)]
163struct Stats {
164 scan_count: inspect::UintProperty,
165 scan_test_count: inspect::UintProperty,
166 missed_deadlines: inspect::UintProperty,
167 triage_warnings: inspect::UintProperty,
168 issues_detected: inspect::UintProperty,
169 issues_throttled: inspect::UintProperty,
170 issues_send_count: inspect::UintProperty,
171 issues_send_errors: inspect::UintProperty,
172 inspect_node: fuchsia_inspect::Node,
173}
174
175enum IncomingService {
177 DetectController(DetectControllerRequestStream),
178}
179
180pub async fn main() -> Result<(), Error> {
181 let inspector = inspect::component::inspector();
182 let _inspect_server_task =
183 inspect_runtime::publish(inspector, inspect_runtime::PublishOptions::default());
184
185 let component_config = ComponentConfig::take_from_startup_handle();
186 inspector
187 .root()
188 .record_child("config", |config_node| component_config.record_inspect(config_node));
189
190 inspect_logger::set_log_list_node(BoundedListNode::new(
191 inspector.root().create_child("triage_warnings"),
192 WARNING_LIST_MAX_SIZE,
193 ));
194
195 let stats = Stats::default().with_inspect(inspector.root(), "stats")?;
196 let mode = match component_config.test_only {
197 true => Mode::IntegrationTest,
198 false => Mode::Production,
199 };
200 let check_every = appropriate_check_interval(&component_config.check_every, mode)
201 .context("Invalid command line arg for check time")?;
202 let program_config = load_program_config().context("Error loading program config")?;
203 info!("Test mode: {:?}, program config: {:?}", mode, program_config);
204 let configuration = load_configuration_files().context("Error reading configuration files")?;
205
206 let triage_engine = triage_shim::TriageLib::new(configuration)
207 .context("Failed to parse Detect configuration files")?;
208 info!("Loaded and parsed .triage files");
209
210 let selectors = triage_engine.selectors();
211 let diagnostic_source = diagnostics::DiagnosticFetcher::create(selectors)?;
212 let snapshot_service =
213 snapshot::CrashReportHandlerBuilder::new(MonotonicInstant::new()).build().await?;
214 let system_time = MonotonicInstant::new();
215 let delay_tracker = DelayTracker::new(Arc::new(system_time), mode);
216 let detection_runner = DetectionRunner {
217 stats,
218 mode,
219 diagnostic_source,
220 snapshot_service,
221 delay_tracker,
222 program_config,
223 triage_engine,
224 };
225 let detection_runner = Arc::new(Mutex::new(detection_runner));
228 let mut service_fs: ServiceFs<ServiceObj<'_, IncomingService>> = ServiceFs::new();
229 service_fs.dir("svc").add_fidl_service(IncomingService::DetectController);
230 service_fs.take_and_serve_directory_handle()?;
231 const MAX_CONCURRENT: usize = 10_000;
232 let runner_clone = detection_runner.clone();
233 let test_invoker_task = Task::local(service_fs.for_each_concurrent(
234 MAX_CONCURRENT,
235 move |IncomingService::DetectController(stream)| {
236 test_invoker::run_test_service(stream, runner_clone.clone())
237 },
238 ));
239 inspect::component::health().set_ok();
240
241 let main_loop_fut = main_loop(detection_runner, check_every);
242
243 join!(main_loop_fut, test_invoker_task);
244
245 Ok(())
246}
247
248async fn main_loop(
249 detection_runner: Arc<Mutex<impl RunsDetection>>,
250 check_every: zx::MonotonicDuration,
251) {
252 let mut next_check_time = fasync::MonotonicInstant::INFINITE_PAST;
254 loop {
255 if next_check_time < fasync::MonotonicInstant::now() {
256 if next_check_time != fasync::MonotonicInstant::INFINITE_PAST {
259 (*detection_runner.lock().await).stats().missed_deadlines.add(1);
260 warn!(
261 "Missed diagnostic check deadline {:?} by {:?} nanos",
262 next_check_time,
263 fasync::MonotonicInstant::now() - next_check_time
264 );
265 }
266 next_check_time = fasync::MonotonicInstant::now() + check_every;
267 } else {
268 fasync::Timer::new(next_check_time).await;
270 next_check_time += check_every;
274 }
275 detection_runner.lock().await.run_detection(DetectionOpts::default()).await;
276 }
277}
278
279pub(crate) trait RunsDetection {
280 fn run_detection(
282 &mut self,
283 opts: DetectionOpts,
284 ) -> impl std::future::Future<Output = ()> + std::marker::Send;
285
286 fn stats(&self) -> &Stats;
287}
288
289struct DetectionRunner {
291 stats: Stats,
292 mode: Mode,
293 diagnostic_source: diagnostics::DiagnosticFetcher,
294 snapshot_service: snapshot::CrashReportHandler,
295 delay_tracker: DelayTracker,
296 program_config: ProgramConfig,
297 triage_engine: triage_shim::TriageLib,
298}
299
300#[derive(Default)]
301pub(crate) struct DetectionOpts {
302 cpu_test: bool,
303}
304
305impl RunsDetection for DetectionRunner {
306 fn stats(&self) -> &Stats {
307 &self.stats
308 }
309
310 async fn run_detection(&mut self, opts: DetectionOpts) {
312 if opts.cpu_test {
313 self.stats().scan_test_count.add(1);
314 } else {
315 self.stats().scan_count.add(1);
316 }
317 let diagnostics = self.diagnostic_source.get_diagnostics().await;
318 let diagnostics = match diagnostics {
319 Ok(diagnostics) => diagnostics,
320 Err(e) => {
321 if self.mode != Mode::IntegrationTest {
323 error!("Fetching diagnostics failed: {}", e);
324 }
325 return;
326 }
327 };
328
329 let (snapshot_requests, warnings) = self.triage_engine.evaluate(diagnostics);
330
331 if opts.cpu_test {
332 return;
333 }
334 self.stats().triage_warnings.add(warnings.len() as u64);
335 for snapshot in snapshot_requests {
336 self.handle_snapshot(snapshot);
337 }
338 }
339}
340
341impl DetectionRunner {
342 fn handle_snapshot(&mut self, snapshot: SnapshotTrigger) {
343 self.stats().issues_detected.add(1);
344 if self.delay_tracker.ok_to_send(&snapshot) {
345 let signature = build_signature(snapshot, self.mode);
346 if self.program_config.enable_filing == Some(true) {
347 self.stats.issues_send_count.add(1);
348 if let Err(e) =
349 self.snapshot_service.request_snapshot(SnapshotRequest::new(signature))
350 {
351 self.stats.issues_send_errors.add(1);
352 error!("Snapshot request failed: {}", e);
353 }
354 } else {
355 warn!("Detect would have filed {}", signature);
356 }
357 } else {
358 self.stats().issues_throttled.add(1);
359 }
360 }
361}
362
363#[cfg(test)]
364mod test {
365 use super::*;
366
367 #[fuchsia::test]
368 fn verify_appropriate_check_interval() -> Result<(), Error> {
369 let error_a = "a".to_string();
370 let error_empty = "".to_string();
371 let raw_1 = "1".to_string();
372 let raw_1_result = zx::MonotonicDuration::from_nanos(1);
373 let short_time = format!("Nanos({})", MINIMUM_CHECK_TIME_NANOS - 1);
374 let short_time_result = zx::MonotonicDuration::from_nanos(MINIMUM_CHECK_TIME_NANOS - 1);
375 let minimum_time = format!("Nanos({})", MINIMUM_CHECK_TIME_NANOS);
376 let minimum_time_result = zx::MonotonicDuration::from_nanos(MINIMUM_CHECK_TIME_NANOS);
377 let long_time = format!("Nanos({})", MINIMUM_CHECK_TIME_NANOS + 1);
378 let long_time_result = zx::MonotonicDuration::from_nanos(MINIMUM_CHECK_TIME_NANOS + 1);
379
380 assert!(appropriate_check_interval(&error_a, Mode::IntegrationTest).is_err());
381 assert!(appropriate_check_interval(&error_empty, Mode::IntegrationTest).is_err());
382 assert_eq!(appropriate_check_interval(&raw_1, Mode::IntegrationTest)?, raw_1_result);
383 assert_eq!(
384 appropriate_check_interval(&short_time, Mode::IntegrationTest)?,
385 short_time_result
386 );
387 assert_eq!(
388 appropriate_check_interval(&minimum_time, Mode::IntegrationTest)?,
389 minimum_time_result
390 );
391 assert_eq!(
392 appropriate_check_interval(&long_time, Mode::IntegrationTest)?,
393 long_time_result
394 );
395
396 assert!(appropriate_check_interval(&error_a, Mode::Production).is_err());
397 assert!(appropriate_check_interval(&error_empty, Mode::Production).is_err());
398 assert!(appropriate_check_interval(&raw_1, Mode::Production).is_err());
399 assert!(appropriate_check_interval(&short_time, Mode::Production).is_err());
400 assert_eq!(
401 appropriate_check_interval(&minimum_time, Mode::Production)?,
402 minimum_time_result
403 );
404 assert_eq!(
405 appropriate_check_interval(&long_time, Mode::IntegrationTest)?,
406 long_time_result
407 );
408 assert_eq!(appropriate_check_interval(&long_time, Mode::Production)?, long_time_result);
409 Ok(())
410 }
411
412 #[fuchsia::test]
413 fn verify_build_signature() {
414 fn sig(signature: &str) -> fuchsia_triage::SnapshotTrigger {
415 fuchsia_triage::SnapshotTrigger { interval: 0, signature: signature.to_string() }
416 }
417
418 let long_sig_input = "A very very long string that just keeps going and going and won't \
419 quit no matter how long it goes and I don't know when it will ever stop";
420 let desired_long_output = "fuchsia-detect-a-very-very-long-string-that-just-keeps-going-and-going-and-won-t-quit-no-matter-how-long-it-goes-and-i-don-t-kno";
421 let long_sig_output = build_signature(sig(long_sig_input), Mode::Production);
422
423 assert_eq!(long_sig_output, desired_long_output.to_string());
424 assert_eq!(long_sig_output.len(), MAX_CRASH_SIGNATURE_LENGTH as usize);
425 assert_eq!(
426 build_signature(sig("Test lowercase"), Mode::Production),
427 "fuchsia-detect-test-lowercase".to_string()
428 );
429 }
430}