Skip to main content

wlan_telemetry/
lib.rs

1// Copyright 2024 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4use anyhow::{Context as _, Error, format_err};
5use fidl_fuchsia_power_battery as fidl_battery;
6use fidl_fuchsia_wlan_ieee80211 as fidl_ieee80211;
7use fidl_fuchsia_wlan_internal as fidl_internal;
8use fuchsia_async as fasync;
9use fuchsia_inspect::Node as InspectNode;
10use futures::channel::mpsc;
11use futures::{Future, StreamExt, select};
12use log::error;
13use std::boxed::Box;
14use windowed_stats::experimental::inspect::TimeMatrixClient;
15use wlan_common::bss::BssDescription;
16use wlan_legacy_metrics_registry as metrics;
17
18mod convert;
19mod processors;
20pub(crate) mod util;
21pub use crate::processors::connect_disconnect::DisconnectInfo;
22pub use crate::processors::pno_scan::PnoScanDisabledReason;
23pub use crate::processors::power::{IfacePowerLevel, UnclearPowerDemand};
24pub use crate::processors::scan::ScanResult;
25pub use crate::processors::toggle_events::ClientConnectionsToggleEvent;
26pub use util::sender::TelemetrySender;
27#[cfg(test)]
28mod testing;
29
30#[derive(Debug)]
31pub enum TelemetryEvent {
32    ConnectResult {
33        result: fidl_ieee80211::StatusCode,
34        bss: Box<BssDescription>,
35        is_credential_rejected: bool,
36        is_owe_transition: bool,
37    },
38    Disconnect {
39        info: DisconnectInfo,
40    },
41    // We should maintain docstrings if we can see any possibility of ambiguity for an enum
42    /// Client connections enabled or disabled
43    ClientConnectionsToggle {
44        event: ClientConnectionsToggleEvent,
45    },
46    ClientIfaceCreated {
47        iface_id: u16,
48    },
49    ClientIfaceDestroyed {
50        iface_id: u16,
51    },
52    IfaceCreationFailure,
53    IfaceDestructionFailure,
54    ScanStart,
55    ScanResult {
56        result: ScanResult,
57    },
58    IfacePowerLevelChanged {
59        iface_power_level: IfacePowerLevel,
60        iface_id: u16,
61    },
62    /// System suspension imminent
63    SuspendImminent,
64    /// Unclear power level requested by policy layer
65    UnclearPowerDemand(UnclearPowerDemand),
66    BatteryChargeStatus(fidl_battery::ChargeStatus),
67    RecoveryEvent {
68        result: Result<(), ()>,
69    },
70    SmeTimeout,
71    ChipPowerUpFailure,
72    ChipPowerDownFailure,
73    ResetTxPowerScenario,
74    SetTxPowerScenario {
75        scenario: fidl_internal::TxPowerScenario,
76    },
77    PnoScanFailure,
78    PnoScanEnabled,
79    PnoScanResultsReceived,
80    PnoScanDisabled {
81        reason: PnoScanDisabledReason,
82    },
83}
84
85/// If metrics cannot be reported for extended periods of time, logging new metrics will fail and
86/// the error messages tend to clutter up the logs.  This container limits the rate at which such
87/// potentially noisy logs are reported.  Duplicate error messages are aggregated periodically
88/// reported.
89pub struct ThrottledErrorLogger {
90    time_of_last_log: fasync::MonotonicInstant,
91    pub suppressed_errors: std::collections::HashMap<String, usize>,
92    minutes_between_reports: i64,
93}
94
95impl ThrottledErrorLogger {
96    pub fn new(minutes_between_reports: i64) -> Self {
97        Self {
98            time_of_last_log: fasync::MonotonicInstant::from_nanos(0),
99            suppressed_errors: std::collections::HashMap::new(),
100            minutes_between_reports,
101        }
102    }
103
104    pub fn throttle_log(&mut self, message: String, level: log::Level) {
105        let curr_time = fasync::MonotonicInstant::now();
106        let time_since_last_log = curr_time - self.time_of_last_log;
107
108        if time_since_last_log.into_minutes() > self.minutes_between_reports {
109            log::log!(level, "{}", message);
110            if !self.suppressed_errors.is_empty() {
111                for (log, count) in self.suppressed_errors.iter() {
112                    log::warn!("Suppressed {} instances: {}", count, log);
113                }
114                self.suppressed_errors.clear();
115            }
116            self.time_of_last_log = curr_time;
117        } else {
118            let count = self.suppressed_errors.entry(message).or_default();
119            *count += 1;
120        }
121    }
122
123    pub fn throttle_error(&mut self, result: Result<(), Error>) {
124        if let Err(e) = result {
125            self.throttle_log(e.to_string(), log::Level::Error);
126        }
127    }
128}
129
130/// Attempts to connect to the Cobalt service.
131pub async fn setup_cobalt_proxy()
132-> Result<fidl_fuchsia_metrics::MetricEventLoggerProxy, anyhow::Error> {
133    let cobalt_svc = fuchsia_component::client::connect_to_protocol::<
134        fidl_fuchsia_metrics::MetricEventLoggerFactoryMarker,
135    >()
136    .context("failed to connect to metrics service")?;
137
138    let (cobalt_proxy, cobalt_server) =
139        fidl::endpoints::create_proxy::<fidl_fuchsia_metrics::MetricEventLoggerMarker>();
140
141    let project_spec = fidl_fuchsia_metrics::ProjectSpec {
142        customer_id: Some(metrics::CUSTOMER_ID),
143        project_id: Some(metrics::PROJECT_ID),
144        ..Default::default()
145    };
146
147    match cobalt_svc.create_metric_event_logger(&project_spec, cobalt_server).await {
148        Ok(_) => Ok(cobalt_proxy),
149        Err(err) => Err(format_err!("failed to create metrics event logger: {:?}", err)),
150    }
151}
152
153/// Attempts to create a disconnected FIDL channel with types matching the Cobalt service. This
154/// allows for a fallback with a uniform code path in case of a failure to connect to Cobalt.
155pub fn setup_disconnected_cobalt_proxy()
156-> Result<fidl_fuchsia_metrics::MetricEventLoggerProxy, anyhow::Error> {
157    // Create a disconnected proxy
158    Ok(fidl::endpoints::create_proxy::<fidl_fuchsia_metrics::MetricEventLoggerMarker>().0)
159}
160
161/// How often to refresh time series stats. Also how often to request packet counters.
162const TELEMETRY_QUERY_INTERVAL: zx::MonotonicDuration = zx::MonotonicDuration::from_seconds(10);
163
164pub fn serve_telemetry(
165    cobalt_proxy: fidl_fuchsia_metrics::MetricEventLoggerProxy,
166    monitor_svc_proxy: fidl_fuchsia_wlan_device_service::DeviceMonitorProxy,
167    inspect_node: InspectNode,
168    inspect_path: &str,
169) -> (TelemetrySender, impl Future<Output = Result<(), Error>> + use<>) {
170    let (sender, mut receiver) =
171        mpsc::channel::<TelemetryEvent>(util::sender::TELEMETRY_EVENT_BUFFER_SIZE);
172    let sender = TelemetrySender::new(sender);
173
174    // Inspect nodes to hold time series and metadata for other nodes
175    const METADATA_NODE_NAME: &str = "metadata";
176    let inspect_metadata_node = inspect_node.create_child(METADATA_NODE_NAME);
177    let inspect_metadata_path = format!("{inspect_path}/{METADATA_NODE_NAME}");
178    let inspect_time_series_node = inspect_node.create_child("time_series");
179    let driver_specific_time_series_node = inspect_time_series_node.create_child("driver_specific");
180    let driver_counters_time_series_node =
181        driver_specific_time_series_node.create_child("counters");
182    let driver_gauges_time_series_node = driver_specific_time_series_node.create_child("gauges");
183
184    let time_matrix_client = TimeMatrixClient::new(inspect_time_series_node.clone_weak());
185    let driver_counters_time_series_client =
186        TimeMatrixClient::new(driver_counters_time_series_node.clone_weak());
187    let driver_gauges_time_series_client =
188        TimeMatrixClient::new(driver_gauges_time_series_node.clone_weak());
189
190    // Create and initialize modules
191    let connect_disconnect = processors::connect_disconnect::ConnectDisconnectLogger::new(
192        cobalt_proxy.clone(),
193        &inspect_node,
194        &inspect_metadata_node,
195        &inspect_metadata_path,
196        &time_matrix_client,
197    );
198    let iface_logger = processors::iface::IfaceLogger::new(cobalt_proxy.clone());
199    let power_logger = processors::power::PowerLogger::new(cobalt_proxy.clone(), &inspect_node);
200    let recovery_logger = processors::recovery::RecoveryLogger::new(cobalt_proxy.clone());
201    let mut scan_logger =
202        processors::scan::ScanLogger::new(cobalt_proxy.clone(), &time_matrix_client);
203    let mut pno_scan_logger = processors::pno_scan::PnoScanLogger::new(cobalt_proxy.clone());
204    let sme_timeout_logger = processors::sme_timeout::SmeTimeoutLogger::new(cobalt_proxy.clone());
205    let mut toggle_logger =
206        processors::toggle_events::ToggleLogger::new(cobalt_proxy.clone(), &inspect_node);
207    let tx_power_scenario_logger =
208        processors::tx_power_scenario::TxPowerScenarioLogger::new(cobalt_proxy.clone());
209
210    let client_iface_counters_logger =
211        processors::client_iface_counters::ClientIfaceCountersLogger::new(
212            cobalt_proxy,
213            monitor_svc_proxy,
214            &inspect_metadata_node,
215            &inspect_metadata_path,
216            &time_matrix_client,
217            driver_counters_time_series_client,
218            driver_gauges_time_series_client,
219        );
220
221    let fut = async move {
222        // Prevent the inspect nodes from being dropped while the loop is running.
223        let _inspect_node = inspect_node;
224        let _inspect_metadata_node = inspect_metadata_node;
225        let _inspect_time_series_node = inspect_time_series_node;
226        let _driver_specific_time_series_node = driver_specific_time_series_node;
227        let _driver_counters_time_series_node = driver_counters_time_series_node;
228        let _driver_gauges_time_series_node = driver_gauges_time_series_node;
229
230        let mut telemetry_interval = fasync::Interval::new(TELEMETRY_QUERY_INTERVAL);
231        loop {
232            select! {
233                event = receiver.next() => {
234                    let Some(event) = event else {
235                        error!("Telemetry event stream unexpectedly terminated.");
236                        return Err(format_err!("Telemetry event stream unexpectedly terminated."));
237                    };
238                    use TelemetryEvent::*;
239                    match event {
240                        ConnectResult { result, bss, is_credential_rejected, is_owe_transition } => {
241                            connect_disconnect.handle_connect_attempt(result, &bss, is_credential_rejected, is_owe_transition).await;
242                        }
243                        Disconnect { info } => {
244                            connect_disconnect.log_disconnect(&info).await;
245                            power_logger.handle_iface_disconnect(info.iface_id).await;
246                        }
247                        ClientConnectionsToggle { event } => {
248                            connect_disconnect.handle_client_connections_toggle(&event).await;
249                            toggle_logger.handle_toggle_event(event).await;
250                        }
251                        ClientIfaceCreated { iface_id } => {
252                            client_iface_counters_logger.handle_iface_created(iface_id).await;
253                        }
254                        ClientIfaceDestroyed { iface_id } => {
255                            connect_disconnect.handle_iface_destroyed().await;
256                            client_iface_counters_logger.handle_iface_destroyed(iface_id).await;
257                            power_logger.handle_iface_destroyed(iface_id).await;
258                        }
259                        IfaceCreationFailure => {
260                            iface_logger.handle_iface_creation_failure().await;
261                        }
262                        IfaceDestructionFailure => {
263                            iface_logger.handle_iface_destruction_failure().await;
264                        }
265                        ScanStart => {
266                            scan_logger.handle_scan_start().await;
267                        }
268                        ScanResult { result } => {
269                            scan_logger.handle_scan_result(result).await;
270                        }
271                        IfacePowerLevelChanged { iface_power_level, iface_id } => {
272                            power_logger.log_iface_power_event(iface_power_level, iface_id).await;
273                        }
274                        // TODO(b/340921554): either watch for suspension directly in the library,
275                        // or plumb this from callers once suspend mechanisms are integrated
276                        SuspendImminent => {
277                            power_logger.handle_suspend_imminent().await;
278                            connect_disconnect.handle_suspend_imminent().await;
279                        }
280                        UnclearPowerDemand(demand) => {
281                            power_logger.handle_unclear_power_demand(demand).await;
282                        }
283                        ChipPowerUpFailure => {
284                            power_logger.handle_chip_power_up_failure().await;
285                            connect_disconnect.handle_client_connections_failed_to_start().await;
286                        }
287                        ChipPowerDownFailure => {
288                            power_logger.chip_power_down_failure().await;
289                            connect_disconnect.handle_client_connections_failed_to_stop().await;
290                        }
291                        BatteryChargeStatus(charge_status) => {
292                            scan_logger.handle_battery_charge_status(charge_status).await;
293                            toggle_logger.handle_battery_charge_status(charge_status).await;
294                        }
295                        RecoveryEvent { result } => {
296                            recovery_logger.handle_recovery_event(result).await;
297                        }
298                        SmeTimeout => {
299                            sme_timeout_logger.handle_sme_timeout_event().await;
300                        }
301                        ResetTxPowerScenario => {
302                            tx_power_scenario_logger.handle_sar_reset().await;
303                        }
304                        SetTxPowerScenario {scenario} => {
305                            tx_power_scenario_logger.handle_set_sar(scenario).await;
306                        }
307                        PnoScanFailure => {
308                            connect_disconnect.handle_pno_scan_failure().await;
309                        }
310                        PnoScanEnabled => {
311                            let is_connected = connect_disconnect.is_connected();
312                            pno_scan_logger.handle_pno_scan_enabled(is_connected).await;
313                        }
314                        PnoScanResultsReceived => {
315                            pno_scan_logger.handle_pno_scan_results_received().await;
316                        }
317                        PnoScanDisabled { reason } => {
318                            pno_scan_logger.handle_pno_scan_disabled(reason).await;
319                        }
320                    }
321                }
322                _ = telemetry_interval.next() => {
323                    connect_disconnect.handle_periodic_telemetry().await;
324                    client_iface_counters_logger.handle_periodic_telemetry().await;
325                    pno_scan_logger.handle_periodic_telemetry().await;
326                }
327            }
328        }
329    };
330    (sender, fut)
331}