wlancfg_lib/mode_management/
recovery.rs

1// Copyright 2023 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mode_management::{Defect, EventHistory, IfaceFailure, PhyFailure};
6use crate::telemetry;
7use fuchsia_inspect::Node as InspectNode;
8use fuchsia_inspect_contrib::inspect_insert;
9use fuchsia_inspect_contrib::log::WriteInspect;
10use futures::channel::mpsc;
11use log::warn;
12use std::borrow::Cow;
13
14// As a general note, recovery is intended to be a method of last resort.  It should be used in
15// circumstances where it is thought that WLAN firmware or the interface with the WLAN peripheral
16// are not working properly.
17
18// To ensure that devices are not constantly recovering, throttle recovery interventions by
19// ensuring that PHY resets are only recommended every 24 hours and interface destructions are only
20// recommended every 12 hours.
21const HOURS_BETWEEN_PHY_RESETS: i64 = 24;
22const HOURS_BETWEEN_IFACE_DESTRUCTIONS: i64 = 12;
23
24// The following constants were empirically determined by looking over aggregate fleet metrics of
25// device-day counts of events that represent unexpected device behavior.  Devices are allowed to
26// encounter a number of events up to these thresholds before some recovery intervention may be
27// recommended.
28const SCAN_FAILURE_RECOVERY_THRESHOLD: usize = 5;
29const EMPTY_SCAN_RECOVERY_THRESHOLD: usize = 10;
30const CANCELED_SCAN_RECOVERY_THRESHOLD: usize = 9;
31pub(crate) const CONNECT_FAILURE_RECOVERY_THRESHOLD: usize = 15;
32pub(crate) const AP_START_FAILURE_RECOVERY_THRESHOLD: usize = 14;
33const CREATE_IFACE_FAILURE_RECOVERY_THRESHOLD: usize = 1;
34const DESTROY_IFACE_FAILURE_RECOVERY_THRESHOLD: usize = 1;
35
36#[derive(Clone, Copy, Debug)]
37pub enum PhyRecoveryOperation {
38    DestroyIface { iface_id: u16 },
39    ResetPhy { phy_id: u16 },
40}
41
42impl PartialEq for PhyRecoveryOperation {
43    fn eq(&self, other: &Self) -> bool {
44        match *self {
45            PhyRecoveryOperation::DestroyIface { .. } => {
46                matches!(*other, PhyRecoveryOperation::DestroyIface { .. })
47            }
48            PhyRecoveryOperation::ResetPhy { .. } => {
49                matches!(*other, PhyRecoveryOperation::ResetPhy { .. })
50            }
51        }
52    }
53}
54
55#[derive(Clone, Copy, Debug)]
56pub enum IfaceRecoveryOperation {
57    Disconnect { iface_id: u16 },
58    StopAp { iface_id: u16 },
59}
60
61impl PartialEq for IfaceRecoveryOperation {
62    fn eq(&self, other: &Self) -> bool {
63        match *self {
64            IfaceRecoveryOperation::Disconnect { .. } => {
65                matches!(*other, IfaceRecoveryOperation::Disconnect { .. })
66            }
67            IfaceRecoveryOperation::StopAp { .. } => {
68                matches!(*other, IfaceRecoveryOperation::StopAp { .. })
69            }
70        }
71    }
72}
73
74#[derive(Clone, Copy, Debug, PartialEq)]
75pub enum RecoveryAction {
76    PhyRecovery(PhyRecoveryOperation),
77    IfaceRecovery(IfaceRecoveryOperation),
78}
79
80impl RecoveryAction {
81    fn as_phy_recovery_mechanism(self) -> Option<telemetry::PhyRecoveryMechanism> {
82        match self {
83            RecoveryAction::PhyRecovery(PhyRecoveryOperation::ResetPhy { .. }) => {
84                Some(telemetry::PhyRecoveryMechanism::PhyReset)
85            }
86            RecoveryAction::IfaceRecovery(..)
87            | RecoveryAction::PhyRecovery(PhyRecoveryOperation::DestroyIface { .. }) => None,
88        }
89    }
90
91    fn as_ap_recovery_mechanism(self) -> Option<telemetry::ApRecoveryMechanism> {
92        match self {
93            RecoveryAction::PhyRecovery(PhyRecoveryOperation::DestroyIface { .. }) => {
94                Some(telemetry::ApRecoveryMechanism::DestroyIface)
95            }
96            RecoveryAction::PhyRecovery(PhyRecoveryOperation::ResetPhy { .. }) => {
97                Some(telemetry::ApRecoveryMechanism::ResetPhy)
98            }
99            RecoveryAction::IfaceRecovery(IfaceRecoveryOperation::StopAp { .. }) => {
100                Some(telemetry::ApRecoveryMechanism::StopAp)
101            }
102            RecoveryAction::IfaceRecovery(IfaceRecoveryOperation::Disconnect { .. }) => None,
103        }
104    }
105
106    fn as_client_recovery_mechanism(self) -> Option<telemetry::ClientRecoveryMechanism> {
107        match self {
108            RecoveryAction::PhyRecovery(PhyRecoveryOperation::DestroyIface { .. }) => {
109                Some(telemetry::ClientRecoveryMechanism::DestroyIface)
110            }
111            RecoveryAction::PhyRecovery(PhyRecoveryOperation::ResetPhy { .. }) => {
112                Some(telemetry::ClientRecoveryMechanism::PhyReset)
113            }
114            RecoveryAction::IfaceRecovery(IfaceRecoveryOperation::Disconnect { .. }) => {
115                Some(telemetry::ClientRecoveryMechanism::Disconnect)
116            }
117            RecoveryAction::IfaceRecovery(IfaceRecoveryOperation::StopAp { .. }) => None,
118        }
119    }
120
121    fn as_timeout_recovery_mechanism(self) -> Option<telemetry::TimeoutRecoveryMechanism> {
122        match self {
123            RecoveryAction::PhyRecovery(PhyRecoveryOperation::ResetPhy { .. }) => {
124                Some(telemetry::TimeoutRecoveryMechanism::PhyReset)
125            }
126            RecoveryAction::PhyRecovery(PhyRecoveryOperation::DestroyIface { .. }) => {
127                Some(telemetry::TimeoutRecoveryMechanism::DestroyIface)
128            }
129            RecoveryAction::IfaceRecovery(..) => None,
130        }
131    }
132}
133
134impl WriteInspect for RecoveryAction {
135    fn write_inspect<'a>(&self, writer: &InspectNode, key: impl Into<Cow<'a, str>>) {
136        match self {
137            RecoveryAction::PhyRecovery(PhyRecoveryOperation::DestroyIface { iface_id }) => {
138                inspect_insert!(writer, var key: {DestroyIface: {iface_id: iface_id}})
139            }
140            RecoveryAction::PhyRecovery(PhyRecoveryOperation::ResetPhy { phy_id }) => {
141                inspect_insert!(writer, var key: {ResetPhy: {phy_id: phy_id}})
142            }
143            RecoveryAction::IfaceRecovery(IfaceRecoveryOperation::Disconnect { iface_id }) => {
144                inspect_insert!(writer, var key: {Disconnect: {iface_id: iface_id}})
145            }
146            RecoveryAction::IfaceRecovery(IfaceRecoveryOperation::StopAp { iface_id }) => {
147                inspect_insert!(writer, var key: {StopAp: {iface_id: iface_id}})
148            }
149        }
150    }
151}
152
153#[derive(Clone, Copy, Debug, PartialEq)]
154pub struct RecoverySummary {
155    pub defect: Defect,
156    pub action: RecoveryAction,
157}
158
159impl RecoverySummary {
160    pub fn new(defect: Defect, action: RecoveryAction) -> Self {
161        RecoverySummary { defect, action }
162    }
163
164    pub fn as_recovery_reason(self) -> Option<telemetry::RecoveryReason> {
165        // Construct the associated metric and its dimension.
166        match self.defect {
167            Defect::Phy(PhyFailure::IfaceCreationFailure { .. }) => {
168                Some(telemetry::RecoveryReason::CreateIfaceFailure(
169                    self.action.as_phy_recovery_mechanism()?,
170                ))
171            }
172            Defect::Phy(PhyFailure::IfaceDestructionFailure { .. }) => {
173                Some(telemetry::RecoveryReason::DestroyIfaceFailure(
174                    self.action.as_phy_recovery_mechanism()?,
175                ))
176            }
177            Defect::Iface(IfaceFailure::Timeout { .. }) => Some(
178                telemetry::RecoveryReason::Timeout(self.action.as_timeout_recovery_mechanism()?),
179            ),
180            Defect::Iface(IfaceFailure::ApStartFailure { .. }) => Some(
181                telemetry::RecoveryReason::StartApFailure(self.action.as_ap_recovery_mechanism()?),
182            ),
183            Defect::Iface(IfaceFailure::CanceledScan { .. }) => {
184                Some(telemetry::RecoveryReason::ScanCancellation(
185                    self.action.as_client_recovery_mechanism()?,
186                ))
187            }
188            Defect::Iface(IfaceFailure::FailedScan { .. }) => Some(
189                telemetry::RecoveryReason::ScanFailure(self.action.as_client_recovery_mechanism()?),
190            ),
191            Defect::Iface(IfaceFailure::EmptyScanResults { .. }) => {
192                Some(telemetry::RecoveryReason::ScanResultsEmpty(
193                    self.action.as_client_recovery_mechanism()?,
194                ))
195            }
196            Defect::Iface(IfaceFailure::ConnectionFailure { .. }) => {
197                Some(telemetry::RecoveryReason::ConnectFailure(
198                    self.action.as_client_recovery_mechanism()?,
199                ))
200            }
201        }
202    }
203}
204
205impl WriteInspect for RecoverySummary {
206    fn write_inspect<'a>(&self, writer: &InspectNode, key: impl Into<Cow<'a, str>>) {
207        inspect_insert!(writer, var key: {
208            defect: self.defect,
209            action: self.action,
210        })
211    }
212}
213
214pub const RECOVERY_SUMMARY_CHANNEL_CAPACITY: usize = 100;
215pub(crate) type RecoveryActionSender = mpsc::Sender<RecoverySummary>;
216pub(crate) type RecoveryActionReceiver = mpsc::Receiver<RecoverySummary>;
217
218// The purpose of a RecoveryProfile function is to look at the most-recently observed defect in the
219// context of past defects that have been encountered and past recovery actions that have been
220// taken and suggest a possible recovery action to take to remedy the most recent defect.
221pub type RecoveryProfile = fn(
222    phy_id: u16,
223    defect_history: &mut EventHistory<Defect>,
224    recovery_history: &mut EventHistory<RecoveryAction>,
225    latest_defect: Defect,
226) -> Option<RecoveryAction>;
227
228// This is available so that new products' behaviors can be characterized before enforcing any
229// recovery thresholds.  This will enable finding real bugs in a device's behavior.
230fn recovery_disabled(
231    _phy_id: u16,
232    _defect_history: &mut EventHistory<Defect>,
233    _recovery_history: &mut EventHistory<RecoveryAction>,
234    _latest_defect: Defect,
235) -> Option<RecoveryAction> {
236    None
237}
238
239// This recovery profile takes into account the various defect thresholds and remediation limits to
240// recommend a recovery solution when defects are encountered.
241fn thresholded_recovery(
242    phy_id: u16,
243    defect_history: &mut EventHistory<Defect>,
244    recovery_history: &mut EventHistory<RecoveryAction>,
245    latest_defect: Defect,
246) -> Option<RecoveryAction> {
247    match latest_defect {
248        Defect::Phy(PhyFailure::IfaceCreationFailure { .. }) => {
249            thresholded_create_iface_failure_recovery_profile(
250                phy_id,
251                defect_history,
252                recovery_history,
253                latest_defect,
254            )
255        }
256        Defect::Phy(PhyFailure::IfaceDestructionFailure { .. }) => {
257            thresholded_destroy_iface_failure_recovery_profile(
258                phy_id,
259                defect_history,
260                recovery_history,
261                latest_defect,
262            )
263        }
264        Defect::Iface(IfaceFailure::CanceledScan { .. }) => {
265            thresholded_canceled_scan_recovery_profile(
266                phy_id,
267                defect_history,
268                recovery_history,
269                latest_defect,
270            )
271        }
272        Defect::Iface(IfaceFailure::FailedScan { .. }) => {
273            thresholded_scan_failure_recovery_profile(
274                phy_id,
275                defect_history,
276                recovery_history,
277                latest_defect,
278            )
279        }
280        Defect::Iface(IfaceFailure::EmptyScanResults { .. }) => {
281            thresholded_empty_scan_results_recovery_profile(
282                phy_id,
283                defect_history,
284                recovery_history,
285                latest_defect,
286            )
287        }
288        Defect::Iface(IfaceFailure::ApStartFailure { .. }) => {
289            thresholded_ap_start_failure_recovery_profile(
290                phy_id,
291                defect_history,
292                recovery_history,
293                latest_defect,
294            )
295        }
296        Defect::Iface(IfaceFailure::ConnectionFailure { .. }) => {
297            thresholded_connect_failure_recovery_profile(
298                phy_id,
299                defect_history,
300                recovery_history,
301                latest_defect,
302            )
303        }
304        Defect::Iface(IfaceFailure::Timeout { .. }) => {
305            // TODO(b/42066276): Determine thresholds and recovery options.
306            None
307        }
308    }
309}
310
311// Enable the lookup of recovery profiles by description.
312pub fn lookup_recovery_profile(profile_name: &str) -> RecoveryProfile {
313    match profile_name {
314        "" => recovery_disabled,
315        "thresholded_recovery" => thresholded_recovery,
316        other => {
317            warn!("Invalid recovery profile: {}.  Proceeding with default.", other);
318            recovery_disabled
319        }
320    }
321}
322
323fn thresholded_iface_destruction_and_phy_reset(
324    phy_id: u16,
325    iface_id: u16,
326    defect_history: &mut EventHistory<Defect>,
327    recovery_history: &mut EventHistory<RecoveryAction>,
328    most_recent_defect: Defect,
329    defect_count_threshold: usize,
330) -> Option<RecoveryAction> {
331    let proposed_iface_destruction_action =
332        RecoveryAction::PhyRecovery(PhyRecoveryOperation::DestroyIface { iface_id });
333
334    if recovery_history.event_count(proposed_iface_destruction_action) > 0
335        && let Some(phy_reset_recovery) = thresholded_phy_reset(
336            phy_id,
337            defect_history,
338            recovery_history,
339            most_recent_defect,
340            defect_count_threshold,
341        )
342    {
343        return Some(phy_reset_recovery);
344    }
345
346    thresholded_iface_destruction(
347        iface_id,
348        defect_history,
349        recovery_history,
350        most_recent_defect,
351        defect_count_threshold,
352    )
353}
354
355fn thresholded_phy_reset(
356    phy_id: u16,
357    defect_history: &mut EventHistory<Defect>,
358    recovery_history: &mut EventHistory<RecoveryAction>,
359    most_recent_defect: Defect,
360    defect_count_threshold: usize,
361) -> Option<RecoveryAction> {
362    let proposed_phy_reset_action =
363        RecoveryAction::PhyRecovery(PhyRecoveryOperation::ResetPhy { phy_id });
364
365    if defect_history.event_count(most_recent_defect) < defect_count_threshold {
366        return None;
367    }
368
369    // If the threshold has been crossed and sufficient time has passed since the last PHY reset,
370    // recommend that the PHY be reset.
371    let recovery_allowed = match recovery_history.time_since_last_event(proposed_phy_reset_action) {
372        None => true,
373        Some(time) => time.into_hours() > HOURS_BETWEEN_PHY_RESETS,
374    };
375
376    if recovery_allowed {
377        return Some(proposed_phy_reset_action);
378    }
379
380    None
381}
382
383fn thresholded_iface_destruction(
384    iface_id: u16,
385    defect_history: &mut EventHistory<Defect>,
386    recovery_history: &mut EventHistory<RecoveryAction>,
387    most_recent_defect: Defect,
388    defect_count_threshold: usize,
389) -> Option<RecoveryAction> {
390    let proposed_iface_destruction_action =
391        RecoveryAction::PhyRecovery(PhyRecoveryOperation::DestroyIface { iface_id });
392
393    if defect_history.event_count(most_recent_defect) < defect_count_threshold {
394        return None;
395    }
396
397    // If the threshold has been crossed and sufficient time has passed since the last iface
398    // destruction, recommend that the iface be destroyed.
399    let recovery_allowed =
400        match recovery_history.time_since_last_event(proposed_iface_destruction_action) {
401            None => true,
402            Some(time) => time.into_hours() > HOURS_BETWEEN_IFACE_DESTRUCTIONS,
403        };
404
405    if recovery_allowed {
406        return Some(proposed_iface_destruction_action);
407    }
408
409    None
410}
411
412fn thresholded_scan_failure_recovery_profile(
413    phy_id: u16,
414    defect_history: &mut EventHistory<Defect>,
415    recovery_history: &mut EventHistory<RecoveryAction>,
416    scan_failure_defect: Defect,
417) -> Option<RecoveryAction> {
418    let iface_id = match scan_failure_defect {
419        Defect::Iface(IfaceFailure::FailedScan { iface_id }) => iface_id,
420        other => {
421            warn!("Assessing invalid defect type for scan failure recovery: {:?}", other);
422            return None;
423        }
424    };
425
426    thresholded_iface_destruction_and_phy_reset(
427        phy_id,
428        iface_id,
429        defect_history,
430        recovery_history,
431        scan_failure_defect,
432        SCAN_FAILURE_RECOVERY_THRESHOLD,
433    )
434}
435
436fn thresholded_empty_scan_results_recovery_profile(
437    phy_id: u16,
438    defect_history: &mut EventHistory<Defect>,
439    recovery_history: &mut EventHistory<RecoveryAction>,
440    empty_scan_defect: Defect,
441) -> Option<RecoveryAction> {
442    let iface_id = match empty_scan_defect {
443        Defect::Iface(IfaceFailure::EmptyScanResults { iface_id }) => iface_id,
444        other => {
445            warn!("Assessing invalid defect type for empty scan results recovery: {:?}", other);
446            return None;
447        }
448    };
449
450    thresholded_iface_destruction_and_phy_reset(
451        phy_id,
452        iface_id,
453        defect_history,
454        recovery_history,
455        empty_scan_defect,
456        EMPTY_SCAN_RECOVERY_THRESHOLD,
457    )
458}
459
460fn thresholded_connect_failure_recovery_profile(
461    phy_id: u16,
462    defect_history: &mut EventHistory<Defect>,
463    recovery_history: &mut EventHistory<RecoveryAction>,
464    connect_defect: Defect,
465) -> Option<RecoveryAction> {
466    let iface_id = match connect_defect {
467        Defect::Iface(IfaceFailure::ConnectionFailure { iface_id }) => iface_id,
468        other => {
469            warn!("Assessing invalid defect type for connection failure recovery: {:?}", other);
470            return None;
471        }
472    };
473
474    thresholded_iface_destruction_and_phy_reset(
475        phy_id,
476        iface_id,
477        defect_history,
478        recovery_history,
479        connect_defect,
480        CONNECT_FAILURE_RECOVERY_THRESHOLD,
481    )
482}
483
484fn thresholded_ap_start_failure_recovery_profile(
485    phy_id: u16,
486    defect_history: &mut EventHistory<Defect>,
487    recovery_history: &mut EventHistory<RecoveryAction>,
488    ap_start_defect: Defect,
489) -> Option<RecoveryAction> {
490    match ap_start_defect {
491        Defect::Iface(IfaceFailure::ApStartFailure { .. }) => thresholded_phy_reset(
492            phy_id,
493            defect_history,
494            recovery_history,
495            ap_start_defect,
496            AP_START_FAILURE_RECOVERY_THRESHOLD,
497        ),
498        other => {
499            warn!("Assessing invalid defect type for AP start failure recovery: {:?}", other);
500            None
501        }
502    }
503}
504
505fn thresholded_create_iface_failure_recovery_profile(
506    phy_id: u16,
507    defect_history: &mut EventHistory<Defect>,
508    recovery_history: &mut EventHistory<RecoveryAction>,
509    create_iface_defect: Defect,
510) -> Option<RecoveryAction> {
511    match create_iface_defect {
512        Defect::Phy(PhyFailure::IfaceCreationFailure { .. }) => thresholded_phy_reset(
513            phy_id,
514            defect_history,
515            recovery_history,
516            create_iface_defect,
517            CREATE_IFACE_FAILURE_RECOVERY_THRESHOLD,
518        ),
519        other => {
520            warn!("Assessing invalid defect type for create iface failure recovery: {:?}", other);
521            None
522        }
523    }
524}
525
526fn thresholded_destroy_iface_failure_recovery_profile(
527    phy_id: u16,
528    defect_history: &mut EventHistory<Defect>,
529    recovery_history: &mut EventHistory<RecoveryAction>,
530    destroy_iface_defect: Defect,
531) -> Option<RecoveryAction> {
532    match destroy_iface_defect {
533        Defect::Phy(PhyFailure::IfaceDestructionFailure { .. }) => thresholded_phy_reset(
534            phy_id,
535            defect_history,
536            recovery_history,
537            destroy_iface_defect,
538            DESTROY_IFACE_FAILURE_RECOVERY_THRESHOLD,
539        ),
540        other => {
541            warn!("Assessing invalid defect type for destroy iface failure recovery: {:?}", other);
542            None
543        }
544    }
545}
546
547fn thresholded_canceled_scan_recovery_profile(
548    _phy_id: u16,
549    defect_history: &mut EventHistory<Defect>,
550    recovery_history: &mut EventHistory<RecoveryAction>,
551    destroy_iface_defect: Defect,
552) -> Option<RecoveryAction> {
553    match destroy_iface_defect {
554        Defect::Iface(IfaceFailure::CanceledScan { iface_id }) => thresholded_iface_destruction(
555            iface_id,
556            defect_history,
557            recovery_history,
558            destroy_iface_defect,
559            CANCELED_SCAN_RECOVERY_THRESHOLD,
560        ),
561        other => {
562            warn!("Assessing invalid defect type for canceled scan recovery: {:?}", other);
563            None
564        }
565    }
566}
567
568#[cfg(test)]
569mod tests {
570    use super::*;
571    use fuchsia_async::{MonotonicInstant, TestExecutor};
572
573    use rand::Rng;
574    use test_case::test_case;
575
576    #[fuchsia::test]
577    fn test_recovery_action_equality() {
578        let mut rng = rand::rng();
579        assert_eq!(
580            RecoveryAction::PhyRecovery(PhyRecoveryOperation::DestroyIface {
581                iface_id: rng.random::<u16>()
582            }),
583            RecoveryAction::PhyRecovery(PhyRecoveryOperation::DestroyIface {
584                iface_id: rng.random::<u16>()
585            }),
586        );
587        assert_eq!(
588            RecoveryAction::PhyRecovery(PhyRecoveryOperation::ResetPhy {
589                phy_id: rng.random::<u16>()
590            }),
591            RecoveryAction::PhyRecovery(PhyRecoveryOperation::ResetPhy {
592                phy_id: rng.random::<u16>()
593            }),
594        );
595        assert_eq!(
596            RecoveryAction::IfaceRecovery(IfaceRecoveryOperation::Disconnect {
597                iface_id: rng.random::<u16>()
598            }),
599            RecoveryAction::IfaceRecovery(IfaceRecoveryOperation::Disconnect {
600                iface_id: rng.random::<u16>()
601            }),
602        );
603        assert_eq!(
604            RecoveryAction::IfaceRecovery(IfaceRecoveryOperation::StopAp {
605                iface_id: rng.random::<u16>()
606            }),
607            RecoveryAction::IfaceRecovery(IfaceRecoveryOperation::StopAp {
608                iface_id: rng.random::<u16>()
609            }),
610        );
611    }
612
613    const PHY_ID: u16 = 123;
614    const IFACE_ID: u16 = 456;
615
616    // This test verifies that:
617    // 1. No recovery is suggested until the failure recovery threshold is crossed.
618    // 2. That interface destruction is suggested prior to PHY reset.
619    // 3. Once interface destruction recovery is performed, it is not recommended until the time
620    //    limit has elapsed.
621    // 4. Once PHY reset has been suggested, it is not recommended again until the time limit has
622    //    elapsed.
623    fn test_thresholded_iface_destruction_and_phy_reset(
624        exec: &TestExecutor,
625        recovery_fn: RecoveryProfile,
626        defect_to_log: Defect,
627        defect_threshold: usize,
628    ) {
629        // Set the test time to start at time zero.
630        let start_time = MonotonicInstant::from_nanos(0);
631        exec.set_fake_time(start_time);
632
633        // These are the potential recovery interventions that will be recommended.
634        let destroy_iface_recommendation =
635            RecoveryAction::PhyRecovery(PhyRecoveryOperation::DestroyIface { iface_id: IFACE_ID });
636        let reset_phy_recommendation =
637            RecoveryAction::PhyRecovery(PhyRecoveryOperation::ResetPhy { phy_id: PHY_ID });
638
639        // Retain defects and recovery actions for 48 hours.
640        let mut defects = EventHistory::<Defect>::new(48 * 60 * 60);
641        let mut recoveries = EventHistory::<RecoveryAction>::new(48 * 60 * 60);
642
643        // Add failures until just under the threshold.
644        for _ in 0..(defect_threshold - 1) {
645            defects.add_event(defect_to_log);
646
647            // Verify that there is no recovery recommended
648            assert_eq!(None, recovery_fn(PHY_ID, &mut defects, &mut recoveries, defect_to_log,));
649        }
650
651        // Add one more failure and verify that a destroy iface was recommended.
652        defects.add_event(defect_to_log);
653        assert_eq!(
654            Some(destroy_iface_recommendation),
655            recovery_fn(PHY_ID, &mut defects, &mut recoveries, defect_to_log,)
656        );
657
658        // Record the recovery action and then log another failure to verify that a PHY reset
659        // is recommended.
660        recoveries.add_event(destroy_iface_recommendation);
661        defects.add_event(defect_to_log);
662        assert_eq!(
663            Some(reset_phy_recommendation),
664            recovery_fn(PHY_ID, &mut defects, &mut recoveries, defect_to_log,)
665        );
666
667        // Record the PHY reset and then advance the clock 11 hours and make sure that no recovery
668        // is recommended.
669        //
670        // This is now 11 hours past the test start time.
671        recoveries.add_event(reset_phy_recommendation);
672        exec.set_fake_time(MonotonicInstant::after(zx::MonotonicDuration::from_hours(11)));
673        defects.add_event(defect_to_log);
674        assert_eq!(None, recovery_fn(PHY_ID, &mut defects, &mut recoveries, defect_to_log,));
675
676        // Advance the clock another 2 hours to ensure that the time between iface destruction
677        // recovery recommendations has elapsed.
678        //
679        // This is now 13 hours past the start of the test.
680        exec.set_fake_time(MonotonicInstant::after(zx::MonotonicDuration::from_hours(2)));
681        defects.add_event(defect_to_log);
682        assert_eq!(
683            Some(destroy_iface_recommendation),
684            recovery_fn(PHY_ID, &mut defects, &mut recoveries, defect_to_log,)
685        );
686
687        // Record the destroy iface recovery action and advance the clock 10 more hours and make
688        // sure that no recovery is recommended.
689        //
690        // This is now 23 hours past the start of the test.
691        recoveries.add_event(destroy_iface_recommendation);
692        exec.set_fake_time(MonotonicInstant::after(zx::MonotonicDuration::from_hours(10)));
693        defects.add_event(defect_to_log);
694        assert_eq!(None, recovery_fn(PHY_ID, &mut defects, &mut recoveries, defect_to_log,));
695
696        // Advance the clock another 2 hours to ensure that the time between PHY resets has elapsed.
697        //
698        // This is now 25 hours past the start of the test.
699        exec.set_fake_time(MonotonicInstant::after(zx::MonotonicDuration::from_hours(2)));
700        defects.add_event(defect_to_log);
701        assert_eq!(
702            Some(reset_phy_recommendation),
703            recovery_fn(PHY_ID, &mut defects, &mut recoveries, defect_to_log,)
704        );
705    }
706
707    // This test verifies that:
708    // 1. No recovery is recommended until the threshold has been crossed.
709    // 2. PHY reset is recommended once the threshold is crossed.
710    // 3. PHY resets are only recommended once per 24-hour period.
711    fn test_thresholded_phy_reset(
712        exec: &TestExecutor,
713        recovery_fn: RecoveryProfile,
714        defect_to_log: Defect,
715        defect_threshold: usize,
716    ) {
717        // Set the test time to start at time zero.
718        let start_time = MonotonicInstant::from_nanos(0);
719        exec.set_fake_time(start_time);
720
721        // The PHY recovery intervention that is expected.
722        let reset_phy_recommendation =
723            RecoveryAction::PhyRecovery(PhyRecoveryOperation::ResetPhy { phy_id: PHY_ID });
724
725        // Retain defects and recovery actions for 48 hours.
726        let mut defects = EventHistory::<Defect>::new(48 * 60 * 60);
727        let mut recoveries = EventHistory::<RecoveryAction>::new(48 * 60 * 60);
728
729        // Add failures until just under the threshold.
730        for _ in 0..(defect_threshold - 1) {
731            defects.add_event(defect_to_log);
732
733            // Verify that there is no recovery recommended
734            assert_eq!(None, recovery_fn(PHY_ID, &mut defects, &mut recoveries, defect_to_log,));
735        }
736
737        // Add one more failure and verify that a PHY reset was recommended.
738        defects.add_event(defect_to_log);
739        assert_eq!(
740            Some(reset_phy_recommendation),
741            recovery_fn(PHY_ID, &mut defects, &mut recoveries, defect_to_log,)
742        );
743        recoveries.add_event(reset_phy_recommendation);
744
745        // Add another defect and verify that no recovery is recommended.
746        defects.add_event(defect_to_log);
747        assert_eq!(None, recovery_fn(PHY_ID, &mut defects, &mut recoveries, defect_to_log,));
748
749        // Advance the clock 23 hours, log another defect, and verify no recovery is recommended.
750        exec.set_fake_time(MonotonicInstant::after(zx::MonotonicDuration::from_hours(23)));
751        defects.add_event(defect_to_log);
752        assert_eq!(None, recovery_fn(PHY_ID, &mut defects, &mut recoveries, defect_to_log,));
753
754        // Advance the clock another 2 hours to get beyond the 24 hour throttle and verify that
755        // another occurrence of the defect results in a PHY reset recovery recommendation.
756        exec.set_fake_time(MonotonicInstant::after(zx::MonotonicDuration::from_hours(23)));
757        defects.add_event(defect_to_log);
758        assert_eq!(
759            Some(reset_phy_recommendation),
760            recovery_fn(PHY_ID, &mut defects, &mut recoveries, defect_to_log,)
761        );
762    }
763
764    fn test_thresholded_destroy_iface(
765        exec: &TestExecutor,
766        recovery_fn: RecoveryProfile,
767        defect_to_log: Defect,
768        defect_threshold: usize,
769    ) {
770        // Set the test time to start at time zero.
771        let start_time = MonotonicInstant::from_nanos(0);
772        exec.set_fake_time(start_time);
773
774        // The iface destruction intervention that is expected.
775        let destroy_iface_recommendation =
776            RecoveryAction::PhyRecovery(PhyRecoveryOperation::DestroyIface { iface_id: IFACE_ID });
777
778        // Retain defects and recovery actions for 48 hours.
779        let mut defects = EventHistory::<Defect>::new(48 * 60 * 60);
780        let mut recoveries = EventHistory::<RecoveryAction>::new(48 * 60 * 60);
781
782        // Add failures until just under the threshold.
783        for _ in 0..(defect_threshold - 1) {
784            defects.add_event(defect_to_log);
785
786            // Verify that there is no recovery recommended
787            assert_eq!(None, recovery_fn(PHY_ID, &mut defects, &mut recoveries, defect_to_log,));
788        }
789
790        // Add one more failure and verify that a destroy iface was recommended.
791        defects.add_event(defect_to_log);
792        assert_eq!(
793            Some(destroy_iface_recommendation),
794            recovery_fn(PHY_ID, &mut defects, &mut recoveries, defect_to_log,)
795        );
796        recoveries.add_event(destroy_iface_recommendation);
797
798        // Add another defect and verify that no recovery is recommended.
799        defects.add_event(defect_to_log);
800        assert_eq!(None, recovery_fn(PHY_ID, &mut defects, &mut recoveries, defect_to_log,));
801
802        // Advance the clock 11 hours, log another defect, and verify no recovery is recommended.
803        exec.set_fake_time(MonotonicInstant::after(zx::MonotonicDuration::from_hours(11)));
804        defects.add_event(defect_to_log);
805        assert_eq!(None, recovery_fn(PHY_ID, &mut defects, &mut recoveries, defect_to_log,));
806
807        // Advance the clock another 2 hours to get beyond the 12 hour throttle and verify that
808        // another occurrence of the defect results in a destroy iface recovery recommendation.
809        exec.set_fake_time(MonotonicInstant::after(zx::MonotonicDuration::from_hours(2)));
810        defects.add_event(defect_to_log);
811        assert_eq!(
812            Some(destroy_iface_recommendation),
813            recovery_fn(PHY_ID, &mut defects, &mut recoveries, defect_to_log,)
814        );
815    }
816
817    #[fuchsia::test]
818    fn test_scan_failure_recovery() {
819        let exec = TestExecutor::new_with_fake_time();
820        let defect_to_log = Defect::Iface(IfaceFailure::FailedScan { iface_id: IFACE_ID });
821        test_thresholded_iface_destruction_and_phy_reset(
822            &exec,
823            thresholded_scan_failure_recovery_profile,
824            defect_to_log,
825            SCAN_FAILURE_RECOVERY_THRESHOLD,
826        );
827    }
828
829    #[fuchsia::test]
830    fn test_empty_scan_results_recovery() {
831        let exec = TestExecutor::new_with_fake_time();
832        let defect_to_log = Defect::Iface(IfaceFailure::EmptyScanResults { iface_id: IFACE_ID });
833        test_thresholded_iface_destruction_and_phy_reset(
834            &exec,
835            thresholded_empty_scan_results_recovery_profile,
836            defect_to_log,
837            EMPTY_SCAN_RECOVERY_THRESHOLD,
838        );
839    }
840
841    #[fuchsia::test]
842    fn test_connect_failure_recovery() {
843        let exec = TestExecutor::new_with_fake_time();
844        let defect_to_log = Defect::Iface(IfaceFailure::ConnectionFailure { iface_id: IFACE_ID });
845        test_thresholded_iface_destruction_and_phy_reset(
846            &exec,
847            thresholded_connect_failure_recovery_profile,
848            defect_to_log,
849            CONNECT_FAILURE_RECOVERY_THRESHOLD,
850        );
851    }
852
853    #[fuchsia::test]
854    fn test_ap_start_failure_recovery() {
855        let exec = TestExecutor::new_with_fake_time();
856        let defect_to_log = Defect::Iface(IfaceFailure::ApStartFailure { iface_id: IFACE_ID });
857        test_thresholded_phy_reset(
858            &exec,
859            thresholded_ap_start_failure_recovery_profile,
860            defect_to_log,
861            AP_START_FAILURE_RECOVERY_THRESHOLD,
862        )
863    }
864
865    #[fuchsia::test]
866    fn test_create_iface_failure_recovery() {
867        let exec = TestExecutor::new_with_fake_time();
868        let defect_to_log = Defect::Phy(PhyFailure::IfaceCreationFailure { phy_id: PHY_ID });
869        test_thresholded_phy_reset(
870            &exec,
871            thresholded_create_iface_failure_recovery_profile,
872            defect_to_log,
873            CREATE_IFACE_FAILURE_RECOVERY_THRESHOLD,
874        )
875    }
876
877    #[fuchsia::test]
878    fn test_destroy_iface_failure_recovery() {
879        let exec = TestExecutor::new_with_fake_time();
880        let defect_to_log = Defect::Phy(PhyFailure::IfaceDestructionFailure { phy_id: PHY_ID });
881        test_thresholded_phy_reset(
882            &exec,
883            thresholded_destroy_iface_failure_recovery_profile,
884            defect_to_log,
885            DESTROY_IFACE_FAILURE_RECOVERY_THRESHOLD,
886        )
887    }
888
889    #[fuchsia::test]
890    fn test_canceled_scan_recovery() {
891        let exec = TestExecutor::new_with_fake_time();
892        let defect_to_log = Defect::Iface(IfaceFailure::CanceledScan { iface_id: IFACE_ID });
893        test_thresholded_destroy_iface(
894            &exec,
895            thresholded_canceled_scan_recovery_profile,
896            defect_to_log,
897            CANCELED_SCAN_RECOVERY_THRESHOLD,
898        )
899    }
900
901    #[test_case(
902        Defect::Iface(IfaceFailure::FailedScan { iface_id: IFACE_ID }),
903        SCAN_FAILURE_RECOVERY_THRESHOLD ;
904        "scan failure threshold test"
905    )]
906    #[test_case(
907        Defect::Iface(IfaceFailure::EmptyScanResults { iface_id: IFACE_ID }),
908        EMPTY_SCAN_RECOVERY_THRESHOLD ;
909        "empty scan threshold test"
910    )]
911    #[test_case(
912        Defect::Iface(IfaceFailure::ConnectionFailure { iface_id: IFACE_ID }),
913        CONNECT_FAILURE_RECOVERY_THRESHOLD ;
914        "connection failure threshold test"
915    )]
916    #[test_case(
917        Defect::Iface(IfaceFailure::ApStartFailure { iface_id: IFACE_ID }),
918        AP_START_FAILURE_RECOVERY_THRESHOLD ;
919        "start ap failure threshold test"
920    )]
921    #[test_case(
922        Defect::Phy(PhyFailure::IfaceCreationFailure { phy_id: PHY_ID }),
923        CREATE_IFACE_FAILURE_RECOVERY_THRESHOLD ;
924        "create iface failure threshold test"
925    )]
926    #[test_case(
927        Defect::Phy(PhyFailure::IfaceDestructionFailure { phy_id: PHY_ID }),
928        DESTROY_IFACE_FAILURE_RECOVERY_THRESHOLD ;
929        "destroy iface failure threshold test"
930    )]
931    #[test_case(
932        Defect::Iface(IfaceFailure::CanceledScan { iface_id: IFACE_ID }),
933        CANCELED_SCAN_RECOVERY_THRESHOLD ;
934        "canceled scan threshold test"
935    )]
936    #[fuchsia::test(add_test_attr = false)]
937    fn test_thresholded_recovery(defect: Defect, threshold: usize) {
938        let exec = TestExecutor::new_with_fake_time();
939        let profile = lookup_recovery_profile("thresholded_recovery");
940
941        match defect {
942            Defect::Iface(IfaceFailure::FailedScan { .. })
943            | Defect::Iface(IfaceFailure::EmptyScanResults { .. })
944            | Defect::Iface(IfaceFailure::ConnectionFailure { .. }) => {
945                test_thresholded_iface_destruction_and_phy_reset(&exec, profile, defect, threshold)
946            }
947            Defect::Iface(IfaceFailure::ApStartFailure { .. })
948            | Defect::Phy(PhyFailure::IfaceCreationFailure { .. })
949            | Defect::Phy(PhyFailure::IfaceDestructionFailure { .. }) => {
950                test_thresholded_phy_reset(&exec, profile, defect, threshold)
951            }
952            Defect::Iface(IfaceFailure::CanceledScan { .. }) => {
953                test_thresholded_destroy_iface(&exec, profile, defect, threshold)
954            }
955            Defect::Iface(IfaceFailure::Timeout { .. }) => {
956                // TODO(b/42066276): Determine thresholds and recovery options.
957            }
958        }
959    }
960}