Skip to main content

wlan_telemetry/
lib.rs

1// Copyright 2024 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4use anyhow::{Context as _, Error, format_err};
5use fidl_fuchsia_power_battery as fidl_battery;
6use fidl_fuchsia_wlan_ieee80211 as fidl_ieee80211;
7use fidl_fuchsia_wlan_internal as fidl_internal;
8use fuchsia_async as fasync;
9use fuchsia_inspect::Node as InspectNode;
10use futures::channel::mpsc;
11use futures::{Future, StreamExt, select};
12use log::error;
13use std::boxed::Box;
14use windowed_stats::experimental::inspect::TimeMatrixClient;
15use wlan_common::bss::BssDescription;
16use wlan_legacy_metrics_registry as metrics;
17
18mod processors;
19pub(crate) mod util;
20pub use crate::processors::connect_disconnect::DisconnectInfo;
21pub use crate::processors::pno_scan::PnoScanDisabledReason;
22pub use crate::processors::power::{IfacePowerLevel, UnclearPowerDemand};
23pub use crate::processors::scan::ScanResult;
24pub use crate::processors::toggle_events::ClientConnectionsToggleEvent;
25pub use util::sender::TelemetrySender;
26#[cfg(test)]
27mod testing;
28
29#[derive(Debug)]
30pub enum TelemetryEvent {
31    ConnectResult {
32        result: fidl_ieee80211::StatusCode,
33        bss: Box<BssDescription>,
34        is_credential_rejected: bool,
35    },
36    Disconnect {
37        info: DisconnectInfo,
38    },
39    // We should maintain docstrings if we can see any possibility of ambiguity for an enum
40    /// Client connections enabled or disabled
41    ClientConnectionsToggle {
42        event: ClientConnectionsToggleEvent,
43    },
44    ClientIfaceCreated {
45        iface_id: u16,
46    },
47    ClientIfaceDestroyed {
48        iface_id: u16,
49    },
50    IfaceCreationFailure,
51    IfaceDestructionFailure,
52    ScanStart,
53    ScanResult {
54        result: ScanResult,
55    },
56    IfacePowerLevelChanged {
57        iface_power_level: IfacePowerLevel,
58        iface_id: u16,
59    },
60    /// System suspension imminent
61    SuspendImminent,
62    /// Unclear power level requested by policy layer
63    UnclearPowerDemand(UnclearPowerDemand),
64    BatteryChargeStatus(fidl_battery::ChargeStatus),
65    RecoveryEvent {
66        result: Result<(), ()>,
67    },
68    SmeTimeout,
69    ChipPowerUpFailure,
70    ChipPowerDownFailure,
71    ResetTxPowerScenario,
72    SetTxPowerScenario {
73        scenario: fidl_internal::TxPowerScenario,
74    },
75    PnoScanFailure,
76    PnoScanEnabled,
77    PnoScanResultsReceived,
78    PnoScanDisabled {
79        reason: PnoScanDisabledReason,
80    },
81}
82
83/// If metrics cannot be reported for extended periods of time, logging new metrics will fail and
84/// the error messages tend to clutter up the logs.  This container limits the rate at which such
85/// potentially noisy logs are reported.  Duplicate error messages are aggregated periodically
86/// reported.
87pub struct ThrottledErrorLogger {
88    time_of_last_log: fasync::MonotonicInstant,
89    pub suppressed_errors: std::collections::HashMap<String, usize>,
90    minutes_between_reports: i64,
91}
92
93impl ThrottledErrorLogger {
94    pub fn new(minutes_between_reports: i64) -> Self {
95        Self {
96            time_of_last_log: fasync::MonotonicInstant::from_nanos(0),
97            suppressed_errors: std::collections::HashMap::new(),
98            minutes_between_reports,
99        }
100    }
101
102    pub fn throttle_log(&mut self, message: String, level: log::Level) {
103        let curr_time = fasync::MonotonicInstant::now();
104        let time_since_last_log = curr_time - self.time_of_last_log;
105
106        if time_since_last_log.into_minutes() > self.minutes_between_reports {
107            log::log!(level, "{}", message);
108            if !self.suppressed_errors.is_empty() {
109                for (log, count) in self.suppressed_errors.iter() {
110                    log::warn!("Suppressed {} instances: {}", count, log);
111                }
112                self.suppressed_errors.clear();
113            }
114            self.time_of_last_log = curr_time;
115        } else {
116            let count = self.suppressed_errors.entry(message).or_default();
117            *count += 1;
118        }
119    }
120
121    pub fn throttle_error(&mut self, result: Result<(), Error>) {
122        if let Err(e) = result {
123            self.throttle_log(e.to_string(), log::Level::Error);
124        }
125    }
126}
127
128/// Attempts to connect to the Cobalt service.
129pub async fn setup_cobalt_proxy()
130-> Result<fidl_fuchsia_metrics::MetricEventLoggerProxy, anyhow::Error> {
131    let cobalt_svc = fuchsia_component::client::connect_to_protocol::<
132        fidl_fuchsia_metrics::MetricEventLoggerFactoryMarker,
133    >()
134    .context("failed to connect to metrics service")?;
135
136    let (cobalt_proxy, cobalt_server) =
137        fidl::endpoints::create_proxy::<fidl_fuchsia_metrics::MetricEventLoggerMarker>();
138
139    let project_spec = fidl_fuchsia_metrics::ProjectSpec {
140        customer_id: Some(metrics::CUSTOMER_ID),
141        project_id: Some(metrics::PROJECT_ID),
142        ..Default::default()
143    };
144
145    match cobalt_svc.create_metric_event_logger(&project_spec, cobalt_server).await {
146        Ok(_) => Ok(cobalt_proxy),
147        Err(err) => Err(format_err!("failed to create metrics event logger: {:?}", err)),
148    }
149}
150
151/// Attempts to create a disconnected FIDL channel with types matching the Cobalt service. This
152/// allows for a fallback with a uniform code path in case of a failure to connect to Cobalt.
153pub fn setup_disconnected_cobalt_proxy()
154-> Result<fidl_fuchsia_metrics::MetricEventLoggerProxy, anyhow::Error> {
155    // Create a disconnected proxy
156    Ok(fidl::endpoints::create_proxy::<fidl_fuchsia_metrics::MetricEventLoggerMarker>().0)
157}
158
159/// How often to refresh time series stats. Also how often to request packet counters.
160const TELEMETRY_QUERY_INTERVAL: zx::MonotonicDuration = zx::MonotonicDuration::from_seconds(10);
161
162pub fn serve_telemetry(
163    cobalt_proxy: fidl_fuchsia_metrics::MetricEventLoggerProxy,
164    monitor_svc_proxy: fidl_fuchsia_wlan_device_service::DeviceMonitorProxy,
165    inspect_node: InspectNode,
166    inspect_path: &str,
167) -> (TelemetrySender, impl Future<Output = Result<(), Error>> + use<>) {
168    let (sender, mut receiver) =
169        mpsc::channel::<TelemetryEvent>(util::sender::TELEMETRY_EVENT_BUFFER_SIZE);
170    let sender = TelemetrySender::new(sender);
171
172    // Inspect nodes to hold time series and metadata for other nodes
173    const METADATA_NODE_NAME: &str = "metadata";
174    let inspect_metadata_node = inspect_node.create_child(METADATA_NODE_NAME);
175    let inspect_metadata_path = format!("{inspect_path}/{METADATA_NODE_NAME}");
176    let inspect_time_series_node = inspect_node.create_child("time_series");
177    let driver_specific_time_series_node = inspect_time_series_node.create_child("driver_specific");
178    let driver_counters_time_series_node =
179        driver_specific_time_series_node.create_child("counters");
180    let driver_gauges_time_series_node = driver_specific_time_series_node.create_child("gauges");
181
182    let time_matrix_client = TimeMatrixClient::new(inspect_time_series_node.clone_weak());
183    let driver_counters_time_series_client =
184        TimeMatrixClient::new(driver_counters_time_series_node.clone_weak());
185    let driver_gauges_time_series_client =
186        TimeMatrixClient::new(driver_gauges_time_series_node.clone_weak());
187
188    // Create and initialize modules
189    let connect_disconnect = processors::connect_disconnect::ConnectDisconnectLogger::new(
190        cobalt_proxy.clone(),
191        &inspect_node,
192        &inspect_metadata_node,
193        &inspect_metadata_path,
194        &time_matrix_client,
195    );
196    let iface_logger = processors::iface::IfaceLogger::new(cobalt_proxy.clone());
197    let power_logger = processors::power::PowerLogger::new(cobalt_proxy.clone(), &inspect_node);
198    let recovery_logger = processors::recovery::RecoveryLogger::new(cobalt_proxy.clone());
199    let mut scan_logger =
200        processors::scan::ScanLogger::new(cobalt_proxy.clone(), &time_matrix_client);
201    let mut pno_scan_logger = processors::pno_scan::PnoScanLogger::new(cobalt_proxy.clone());
202    let sme_timeout_logger = processors::sme_timeout::SmeTimeoutLogger::new(cobalt_proxy.clone());
203    let mut toggle_logger =
204        processors::toggle_events::ToggleLogger::new(cobalt_proxy.clone(), &inspect_node);
205    let tx_power_scenario_logger =
206        processors::tx_power_scenario::TxPowerScenarioLogger::new(cobalt_proxy.clone());
207
208    let client_iface_counters_logger =
209        processors::client_iface_counters::ClientIfaceCountersLogger::new(
210            cobalt_proxy,
211            monitor_svc_proxy,
212            &inspect_metadata_node,
213            &inspect_metadata_path,
214            &time_matrix_client,
215            driver_counters_time_series_client,
216            driver_gauges_time_series_client,
217        );
218
219    let fut = async move {
220        // Prevent the inspect nodes from being dropped while the loop is running.
221        let _inspect_node = inspect_node;
222        let _inspect_metadata_node = inspect_metadata_node;
223        let _inspect_time_series_node = inspect_time_series_node;
224        let _driver_specific_time_series_node = driver_specific_time_series_node;
225        let _driver_counters_time_series_node = driver_counters_time_series_node;
226        let _driver_gauges_time_series_node = driver_gauges_time_series_node;
227
228        let mut telemetry_interval = fasync::Interval::new(TELEMETRY_QUERY_INTERVAL);
229        loop {
230            select! {
231                event = receiver.next() => {
232                    let Some(event) = event else {
233                        error!("Telemetry event stream unexpectedly terminated.");
234                        return Err(format_err!("Telemetry event stream unexpectedly terminated."));
235                    };
236                    use TelemetryEvent::*;
237                    match event {
238                        ConnectResult { result, bss, is_credential_rejected } => {
239                            connect_disconnect.handle_connect_attempt(result, &bss, is_credential_rejected).await;
240                        }
241                        Disconnect { info } => {
242                            connect_disconnect.log_disconnect(&info).await;
243                            power_logger.handle_iface_disconnect(info.iface_id).await;
244                        }
245                        ClientConnectionsToggle { event } => {
246                            connect_disconnect.handle_client_connections_toggle(&event).await;
247                            toggle_logger.handle_toggle_event(event).await;
248                        }
249                        ClientIfaceCreated { iface_id } => {
250                            client_iface_counters_logger.handle_iface_created(iface_id).await;
251                        }
252                        ClientIfaceDestroyed { iface_id } => {
253                            connect_disconnect.handle_iface_destroyed().await;
254                            client_iface_counters_logger.handle_iface_destroyed(iface_id).await;
255                            power_logger.handle_iface_destroyed(iface_id).await;
256                        }
257                        IfaceCreationFailure => {
258                            iface_logger.handle_iface_creation_failure().await;
259                        }
260                        IfaceDestructionFailure => {
261                            iface_logger.handle_iface_destruction_failure().await;
262                        }
263                        ScanStart => {
264                            scan_logger.handle_scan_start().await;
265                        }
266                        ScanResult { result } => {
267                            scan_logger.handle_scan_result(result).await;
268                        }
269                        IfacePowerLevelChanged { iface_power_level, iface_id } => {
270                            power_logger.log_iface_power_event(iface_power_level, iface_id).await;
271                        }
272                        // TODO(b/340921554): either watch for suspension directly in the library,
273                        // or plumb this from callers once suspend mechanisms are integrated
274                        SuspendImminent => {
275                            power_logger.handle_suspend_imminent().await;
276                            connect_disconnect.handle_suspend_imminent().await;
277                        }
278                        UnclearPowerDemand(demand) => {
279                            power_logger.handle_unclear_power_demand(demand).await;
280                        }
281                        ChipPowerUpFailure => {
282                            power_logger.handle_chip_power_up_failure().await;
283                            connect_disconnect.handle_client_connections_failed_to_start().await;
284                        }
285                        ChipPowerDownFailure => {
286                            power_logger.chip_power_down_failure().await;
287                            connect_disconnect.handle_client_connections_failed_to_stop().await;
288                        }
289                        BatteryChargeStatus(charge_status) => {
290                            scan_logger.handle_battery_charge_status(charge_status).await;
291                            toggle_logger.handle_battery_charge_status(charge_status).await;
292                        }
293                        RecoveryEvent { result } => {
294                            recovery_logger.handle_recovery_event(result).await;
295                        }
296                        SmeTimeout => {
297                            sme_timeout_logger.handle_sme_timeout_event().await;
298                        }
299                        ResetTxPowerScenario => {
300                            tx_power_scenario_logger.handle_sar_reset().await;
301                        }
302                        SetTxPowerScenario {scenario} => {
303                            tx_power_scenario_logger.handle_set_sar(scenario).await;
304                        }
305                        PnoScanFailure => {
306                            connect_disconnect.handle_pno_scan_failure().await;
307                        }
308                        PnoScanEnabled => {
309                            let is_connected = connect_disconnect.is_connected();
310                            pno_scan_logger.handle_pno_scan_enabled(is_connected).await;
311                        }
312                        PnoScanResultsReceived => {
313                            pno_scan_logger.handle_pno_scan_results_received().await;
314                        }
315                        PnoScanDisabled { reason } => {
316                            pno_scan_logger.handle_pno_scan_disabled(reason).await;
317                        }
318                    }
319                }
320                _ = telemetry_interval.next() => {
321                    connect_disconnect.handle_periodic_telemetry().await;
322                    client_iface_counters_logger.handle_periodic_telemetry().await;
323                    pno_scan_logger.handle_periodic_telemetry().await;
324                }
325            }
326        }
327    };
328    (sender, fut)
329}