starnix_core/task/
hr_timer_manager.rs

1// Copyright 2024 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::power::{
6    OnWakeOps, OwnedMessageCounterHandle, SharedMessageCounter,
7    create_proxy_for_wake_events_counter_zero,
8};
9use crate::task::dynamic_thread_spawner::SpawnRequestBuilder;
10use crate::task::{CurrentTask, Kernel, LockedAndTask, TargetTime};
11use crate::vfs::timer::{TimelineChangeObserver, TimerOps};
12use anyhow::{Context, Result};
13use fuchsia_inspect::ArrayProperty;
14use futures::channel::mpsc::{self, UnboundedReceiver, UnboundedSender};
15use futures::{FutureExt, SinkExt, StreamExt, select};
16use scopeguard::defer;
17use starnix_logging::{log_debug, log_error, log_info, log_warn};
18use starnix_sync::{Mutex, MutexGuard};
19use starnix_uapi::errors::Errno;
20use starnix_uapi::{errno, from_status_like_fdio};
21use std::collections::{HashMap, VecDeque};
22use std::sync::{Arc, OnceLock, Weak};
23use zx::{self as zx, AsHandleRef, HandleBased, HandleRef};
24use {fidl_fuchsia_time_alarms as fta, fuchsia_async as fasync, fuchsia_trace as ftrace};
25
26/// Max value for inspect event history.
27const INSPECT_EVENT_BUFFER_SIZE: usize = 128;
28
29fn to_errno_with_log<T: std::fmt::Debug>(v: T) -> Errno {
30    log_error!("hr_timer_manager internal error: {v:?}");
31    from_status_like_fdio!(zx::Status::IO)
32}
33
34fn signal_event(
35    event: &zx::Event,
36    clear_mask: zx::Signals,
37    set_mask: zx::Signals,
38) -> Result<(), zx::Status> {
39    event
40        .signal(clear_mask, set_mask)
41        .inspect_err(|err| log_error!(err:?, clear_mask:?, set_mask:?; "while signaling event"))
42}
43
44fn duplicate_handle<H: HandleBased>(h: &H) -> Result<H, Errno> {
45    h.duplicate_handle(zx::Rights::SAME_RIGHTS).map_err(|status| from_status_like_fdio!(status))
46}
47
48const TIMEOUT_SECONDS: i64 = 40;
49//
50/// Waits forever synchronously for EVENT_SIGNALED.
51///
52/// For us there is no useful scenario where this wait times out and we can continue operating.
53fn wait_signaled_sync(event: &zx::Event) -> zx::WaitResult {
54    let mut logged = false;
55    loop {
56        let timeout =
57            zx::MonotonicInstant::after(zx::MonotonicDuration::from_seconds(TIMEOUT_SECONDS));
58        let result = event.wait_one(zx::Signals::EVENT_SIGNALED, timeout);
59        if let zx::WaitResult::Ok(_) = result {
60            if logged {
61                log_error!(
62                    "wait_signaled_sync: signal resolved. See HrTimer bug: b/454085350: result={result:?}",
63                );
64            }
65            return result;
66        }
67        fuchsia_trace::instant!(
68            "alarms",
69            "starnix:hrtimer:wait_timeout",
70            fuchsia_trace::Scope::Process
71        );
72        // This is bad and should never happen. If it does, it's a bug that has to be found and
73        // fixed. There is no good way to proceed if these signals are not being signaled properly.
74        log_error!(
75            // LINT.IfChange(hrtimer_wait_signaled_sync_tefmo)
76            "wait_signaled_sync: not signaled yet. See HrTimer bug: b/454085350: result={result:?}",
77            // LINT.ThenChange(//tools/testing/tefmocheck/string_in_log_check.go:hrtimer_wait_signaled_sync_tefmo)
78        );
79        if !logged {
80            #[cfg(all(target_os = "fuchsia", not(doc)))]
81            ::debug::backtrace_request_all_threads();
82            logged = true;
83        }
84    }
85}
86
87/// A macro that waits on a future, but if the future takes longer than
88/// `TIMEOUT_SECONDS`, we log a warning and a stack trace.
89macro_rules! log_long_op {
90    ($fut:expr) => {{
91        use futures::FutureExt;
92        let fut = $fut;
93        futures::pin_mut!(fut);
94        let mut logged = false;
95        loop {
96            let timeout = fasync::Timer::new(zx::MonotonicDuration::from_seconds(TIMEOUT_SECONDS));
97            futures::select! {
98                res = fut.as_mut().fuse() => {
99                    if logged {
100                        log_warn!("unexpected blocking is now resolved: long-running async operation at {}:{}. See HrTimer bug: b/428223204", file!(), line!());
101                    }
102                    break res;
103                }
104                _ = timeout.fuse() => {
105                    log_warn!("unexpected blocking: long-running async operation at {}:{}. See HrTimer bug: b/428223204",
106                        file!(), line!());
107                    if !logged {
108                        #[cfg(all(target_os = "fuchsia", not(doc)))]
109                        ::debug::backtrace_request_all_threads();
110                    }
111                    logged = true;
112                }
113            }
114        }
115    }};
116}
117
118/// Waits forever asynchronously for EVENT_SIGNALED.
119async fn wait_signaled<H: HandleBased>(handle: &H) -> Result<()> {
120    log_long_op!(fasync::OnSignals::new(handle, zx::Signals::EVENT_SIGNALED))
121        .context("hr_timer_manager:wait_signaled")?;
122    Ok(())
123}
124
125/// Cancels an alarm by ID.
126async fn cancel_by_id(
127    _message_counter: &SharedMessageCounter,
128    timer_state: Option<TimerState>,
129    timer_id: &zx::Koid,
130    proxy: &fta::WakeAlarmsProxy,
131    interval_timers_pending_reschedule: &mut HashMap<zx::Koid, SharedMessageCounter>,
132    task_by_timer_id: &mut HashMap<zx::Koid, fasync::Task<()>>,
133    alarm_id: &str,
134) {
135    if let Some(task) = task_by_timer_id.remove(timer_id) {
136        // Let this task complete and get removed.
137        task.detach();
138    }
139    if let Some(timer_state) = timer_state {
140        ftrace::duration!("alarms", "starnix:hrtimer:cancel_by_id", "timer_id" => *timer_id);
141        log_debug!("cancel_by_id: START canceling timer: {:?}: alarm_id: {}", timer_id, alarm_id);
142        proxy.cancel(&alarm_id).expect("infallible");
143        log_debug!("cancel_by_id: 1/2 canceling timer: {:?}: alarm_id: {}", timer_id, alarm_id);
144
145        // Let the timer closure complete before continuing.
146        let _ = log_long_op!(timer_state.task);
147
148        // If this timer is an interval timer, we must remove it from the pending reschedule list.
149        // This does not affect container suspend, since `_message_counter` is live. It's a no-op
150        // for other timers.
151        interval_timers_pending_reschedule.remove(timer_id);
152        log_debug!("cancel_by_id: 2/2 DONE canceling timer: {timer_id:?}: alarm_id: {alarm_id}");
153    }
154}
155
156/// Called when the underlying wake alarms manager reports a fta::WakeAlarmsError
157/// as a result of a call to set_and_wait.
158fn process_alarm_protocol_error(
159    pending: &mut HashMap<zx::Koid, TimerState>,
160    timer_id: &zx::Koid,
161    error: fta::WakeAlarmsError,
162) -> Option<TimerState> {
163    match error {
164        fta::WakeAlarmsError::Unspecified => {
165            log_warn!(
166                "watch_new_hrtimer_loop: Cmd::AlarmProtocolFail: unspecified error: {error:?}"
167            );
168            pending.remove(timer_id)
169        }
170        fta::WakeAlarmsError::Dropped => {
171            log_debug!("watch_new_hrtimer_loop: Cmd::AlarmProtocolFail: alarm dropped: {error:?}");
172            // Do not remove a Dropped timer here, in contrast to other error states: a Dropped
173            // timer is a result of a Stop or a Cancel ahead of a reschedule. In both cases, that
174            // code takes care of removing the timer from the pending timers list.
175            None
176        }
177        error => {
178            log_warn!(
179                "watch_new_hrtimer_loop: Cmd::AlarmProtocolFail: unspecified error: {error:?}"
180            );
181            pending.remove(timer_id)
182        }
183    }
184}
185
186// This function is swapped out for an injected proxy in tests.
187fn connect_to_wake_alarms_async() -> Result<zx::Channel, Errno> {
188    log_debug!("connecting to wake alarms");
189    let (client, server) = zx::Channel::create();
190    fuchsia_component::client::connect_channel_to_protocol::<fta::WakeAlarmsMarker>(server)
191        .map(|()| client)
192        .map_err(|err| {
193            errno!(EINVAL, format!("Failed to connect to fuchsia.time.alarms/Wake: {err}"))
194        })
195}
196
197#[derive(Debug)]
198enum InspectHrTimerEvent {
199    Add,
200    Update,
201    Remove,
202    // The String inside will be used in fmt. But the compiler does not recognize the use when
203    // formatting with the Debug derivative.
204    Error(#[allow(dead_code)] String),
205}
206
207impl InspectHrTimerEvent {
208    fn retain_err(prev_len: usize, after_len: usize, context: &str) -> InspectHrTimerEvent {
209        InspectHrTimerEvent::Error(format!(
210            "retain the timer incorrectly, before len: {prev_len}, after len: {after_len}, context: {context}",
211        ))
212    }
213}
214
215impl std::fmt::Display for InspectHrTimerEvent {
216    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
217        write!(f, "{:?}", self)
218    }
219}
220
221#[derive(Debug)]
222struct InspectEvent {
223    event_type: InspectHrTimerEvent,
224    created_at: zx::BootInstant,
225    deadline: Option<TargetTime>,
226}
227
228impl InspectEvent {
229    fn new(event_type: InspectHrTimerEvent, deadline: Option<TargetTime>) -> Self {
230        Self { event_type, created_at: zx::BootInstant::get(), deadline }
231    }
232}
233
234#[derive(Debug)]
235struct TimerState {
236    /// The task that waits for the timer to expire.
237    task: fasync::Task<()>,
238    /// The desired deadline for the timer.
239    deadline: TargetTime,
240}
241
242impl std::fmt::Display for TimerState {
243    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
244        write!(f, "TimerState[deadline:{:?}]", self.deadline)
245    }
246}
247
248struct HrTimerManagerState {
249    /// All pending timers are stored here.
250    pending_timers: HashMap<zx::Koid, TimerState>,
251
252    /// The event that is registered with runner to allow the hrtimer to wake the kernel.
253    /// Optional, because we want the ability to inject a counter in tests.
254    message_counter: Option<OwnedMessageCounterHandle>,
255
256    /// For recording timer events.
257    events: VecDeque<InspectEvent>,
258
259    /// The last timestamp at which the hrtimer loop was started.
260    last_loop_started_timestamp: zx::BootInstant,
261
262    /// The last timestamp at which the hrtimer loop was completed.
263    last_loop_completed_timestamp: zx::BootInstant,
264
265    // Debug progress counter for Cmd::Start.
266    // TODO: b/454085350 - remove once diagnosed.
267    debug_start_stage_counter: u64,
268}
269
270impl HrTimerManagerState {
271    fn new(_parent_node: &fuchsia_inspect::Node) -> Self {
272        Self {
273            pending_timers: HashMap::new(),
274            // Initialized later in the State's lifecycle because it only becomes
275            // available after making a connection to the wake proxy.
276            message_counter: None,
277            events: VecDeque::with_capacity(INSPECT_EVENT_BUFFER_SIZE),
278            last_loop_started_timestamp: zx::BootInstant::INFINITE_PAST,
279            last_loop_completed_timestamp: zx::BootInstant::INFINITE_PAST,
280            debug_start_stage_counter: 0,
281        }
282    }
283
284    fn get_pending_timers_count(&self) -> usize {
285        self.pending_timers.len()
286    }
287
288    /// Gets a new shareable instance of the message counter.
289    fn share_message_counter(&self, new_pending_message: bool) -> SharedMessageCounter {
290        let counter_ref =
291            self.message_counter.as_ref().expect("message_counter is None, but should not be.");
292        counter_ref.share(new_pending_message)
293    }
294
295    fn record_events(&self, node: &fuchsia_inspect::Node) {
296        let events_node = node.create_child("events");
297        for (i, event) in self.events.iter().enumerate() {
298            let child = events_node.create_child(i.to_string());
299            child.record_string("type", event.event_type.to_string());
300            child.record_int("created_at", event.created_at.into_nanos());
301            if let Some(deadline) = event.deadline {
302                child.record_int("deadline", deadline.estimate_boot().unwrap().into_nanos());
303            }
304            events_node.record(child);
305        }
306        node.record(events_node);
307    }
308}
309
310/// Asynchronous commands sent to `watch_new_hrtimer_loop`.
311///
312/// The synchronous methods on HrTimerManager use these commands to communicate
313/// with the alarm manager actor that loops about in `watch_new_hrtimer_loop`.
314///
315/// This allows us to not have to share state between the synchronous and async
316/// methods of `HrTimerManager`.
317#[derive(Debug)]
318enum Cmd {
319    // Start the timer contained in `new_timer_node`.
320    // The processing loop will signal `done` to allow synchronous
321    // return from scheduling an async Cmd::Start.
322    Start {
323        new_timer_node: HrTimerNode,
324        /// Signaled once the timer is started.
325        done: zx::Event,
326        /// The Starnix container suspend lock. Keep it alive until no more
327        /// work is necessary.
328        message_counter: SharedMessageCounter,
329    },
330    /// Stop the timer noted below. `done` is similar to above.
331    Stop {
332        /// The timer to stop.
333        timer: HrTimerHandle,
334        /// Signaled once the timer is stopped.
335        done: zx::Event,
336        /// The Starnix container suspend lock. Keep it alive until no more
337        /// work is necessary.
338        message_counter: SharedMessageCounter,
339    },
340    /// A wake alarm occurred.
341    Alarm {
342        /// The affected timer's node.
343        new_timer_node: HrTimerNode,
344        /// The wake lease provided by the underlying API.
345        lease: zx::EventPair,
346        /// The Starnix container suspend lock. Keep it alive until no more
347        /// work is necessary.
348        message_counter: SharedMessageCounter,
349    },
350    /// Install a timeline change monitor
351    MonitorUtc { timer: HrTimerHandle, counter: zx::Counter, recv: mpsc::UnboundedReceiver<bool> },
352}
353
354// Increments `counter` every time the UTC timeline changes.
355//
356// This counter is shared with UTC timers to provide UTC timeline change notification.
357//
358// Use cases:
359//
360// 1. Counter's value counts the number of times the UTC timeline changed, which is used in timer
361//    `read` calls to report the number of encountered changes, as required by `read`.
362//
363// 2. Counter's `COUNTER_POSITIVE` signal is used in `wait_async` calls on timers, as Starnix must
364//    wake such timers whenever a timeline change happens. The counter reader must reset the
365//    counter to zero after reading its value to allow for a next wake.
366//
367// Other primitives are not appropriate to use here: an Event does not remember how many times it
368// has been signaled, so does not fulfill (1). Atomics don't generate a signal on increment, so
369// don't satisfy (2). Conversely, the `wait_async` machinery on timers can already deal with
370// HandleBased objects, so a Counter can be readily used there.
371async fn run_utc_timeline_monitor(counter: zx::Counter, mut recv: mpsc::UnboundedReceiver<bool>) {
372    log_debug!("run_utc_timeline_monitor: monitoring UTC clock timeline changes: enter");
373    let utc_handle = crate::time::utc::duplicate_real_utc_clock_handle().inspect_err(|err| {
374        log_error!("run_utc_timeline_monitor: could not monitor UTC timeline: {err:?}")
375    });
376    if let Ok(utc_handle) = utc_handle {
377        let koid = utc_handle.as_handle_ref().get_koid();
378        log_debug!(
379            "run_utc_timeline_monitor: monitoring UTC clock timeline: enter: UTC clock koid={koid:?}, counter={counter:?}"
380        );
381        let utc_handle = std::rc::Rc::new(utc_handle);
382        let utc_handle_fn = || utc_handle.clone();
383        let mut interested = false;
384        loop {
385            let utc_handle = utc_handle_fn();
386            // CLOCK_UPDATED is auto-cleared.
387            let mut updated_fut =
388                fasync::OnSignals::new(utc_handle.as_handle_ref(), zx::Signals::CLOCK_UPDATED)
389                    .fuse();
390            let mut interest_fut = recv.next();
391            select! {
392                result = updated_fut => {
393                    if result.is_err() {
394                        log_warn!("run_utc_timeline_monitor: could not wait on signals: {:?}, counter={counter:?}", result);
395                        break;
396                    }
397                    if interested {
398                        log_debug!("run_utc_timeline_monitor: UTC timeline updated, counter: {counter:?}");
399                        // The consumer of this `counter` should wait for COUNTER_POSITIVE, and
400                        // once it observes the value of the counter, subtract the read value from
401                        // counter.
402                        counter
403                            .add(1)
404                            // Ignore the error after logging it. Should we exit the loop here?
405                            .inspect_err(|err| {
406                                log_error!("run_utc_timeline_monitor: could not increment counter: {err:?}")
407                            })
408                            .unwrap_or(());
409                    }
410                },
411                result = interest_fut => {
412                    if let Some(interest) = result {
413                        log_debug!("interest change: {counter:?}, interest: {interest:?}");
414                        interested = interest;
415                    }
416                },
417            };
418        }
419    }
420    log_debug!("run_utc_timeline_monitor: monitoring UTC clock timeline changes: exit");
421}
422
423/// The manager for high-resolution timers.
424///
425/// This manager is responsible for creating and managing high-resolution timers.
426pub struct HrTimerManager {
427    state: Mutex<HrTimerManagerState>,
428
429    /// The channel sender that notifies the worker thread that HrTimer driver needs to be
430    /// (re)started with a new deadline.
431    start_next_sender: OnceLock<UnboundedSender<Cmd>>,
432}
433pub type HrTimerManagerHandle = Arc<HrTimerManager>;
434
435impl HrTimerManager {
436    pub fn new(parent_node: &fuchsia_inspect::Node) -> HrTimerManagerHandle {
437        let inspect_node = parent_node.create_child("hr_timer_manager");
438        let new_manager = Arc::new(Self {
439            state: Mutex::new(HrTimerManagerState::new(&inspect_node)),
440            start_next_sender: Default::default(),
441        });
442        let manager_weak = Arc::downgrade(&new_manager);
443
444        // Create a lazy inspect node to get HrTimerManager info at read-time.
445        inspect_node.record_lazy_child("hr_timer_manager", move || {
446            let manager_ref = manager_weak.upgrade().expect("inner HrTimerManager");
447            async move {
448                // This gets the clock value directly from the kernel, it is not subject
449                // to the local runner's clock.
450                let now = zx::BootInstant::get();
451
452                let inspector = fuchsia_inspect::Inspector::default();
453                inspector.root().record_int("now_ns", now.into_nanos());
454
455                let (
456                    timers,
457                    pending_timers_count,
458                    message_counter,
459                    loop_started,
460                    loop_completed,
461                    debug_start_stage_counter,
462                ) = {
463                    let guard = manager_ref.lock();
464                    (
465                        guard
466                            .pending_timers
467                            .iter()
468                            .map(|(k, v)| (*k, v.deadline))
469                            .collect::<Vec<_>>(),
470                        guard.get_pending_timers_count(),
471                        guard.message_counter.as_ref().map(|c| c.to_string()).unwrap_or_default(),
472                        guard.last_loop_started_timestamp,
473                        guard.last_loop_completed_timestamp,
474                        guard.debug_start_stage_counter,
475                    )
476                };
477                inspector.root().record_uint("pending_timers_count", pending_timers_count as u64);
478                inspector.root().record_string("message_counter", message_counter);
479
480                // These are the deadlines we are currently waiting for. The format is:
481                // `alarm koid` -> `deadline nanos` (remains: `duration until alarm nanos`)
482                let deadlines = inspector.root().create_string_array("timers", timers.len());
483                for (i, (k, v)) in timers.into_iter().enumerate() {
484                    let remaining = v.estimate_boot().unwrap() - now;
485                    deadlines.set(
486                        i,
487                        format!(
488                            "{k:?} -> {v} ns (remains: {})",
489                            time_pretty::format_duration(remaining)
490                        ),
491                    );
492                }
493                inspector.root().record(deadlines);
494
495                inspector.root().record_int("last_loop_started_at_ns", loop_started.into_nanos());
496                inspector
497                    .root()
498                    .record_int("last_loop_completed_at_ns", loop_completed.into_nanos());
499                inspector
500                    .root()
501                    .record_uint("debug_start_stage_counter", debug_start_stage_counter);
502
503                {
504                    let guard = manager_ref.lock();
505                    guard.record_events(inspector.root());
506                }
507
508                Ok(inspector)
509            }
510            .boxed()
511        });
512        parent_node.record(inspect_node);
513        new_manager
514    }
515
516    /// Get a copy of a sender channel used for passing async command to the
517    /// event processing loop.
518    fn get_sender(&self) -> UnboundedSender<Cmd> {
519        self.start_next_sender.get().expect("start_next_sender is initialized").clone()
520    }
521
522    /// Returns the counter that tallies the timeline changes of the UTC timeline.
523    ///
524    /// # Args
525    /// - `timer`: the handle of the timer that needs monitoring of timeline changes.
526    pub fn get_timeline_change_observer(
527        &self,
528        timer: &HrTimerHandle,
529    ) -> Result<TimelineChangeObserver, Errno> {
530        let timer_id = timer.get_id();
531        let counter = zx::Counter::create();
532        let counter_clone = duplicate_handle(&counter).map_err(|err| {
533            log_error!("could not duplicate handle: {err:?}");
534            errno!(EINVAL, format!("could not duplicate handle: {err}, {timer_id:?}"))
535        })?;
536        let (send, recv) = mpsc::unbounded();
537        self.get_sender()
538            .unbounded_send(Cmd::MonitorUtc { timer: timer.clone(), counter, recv })
539            .map_err(|err| {
540            log_error!("could not send: {err:?}");
541            errno!(EINVAL, format!("could not send Cmd::Monitor: {err}, {timer_id:?}"))
542        })?;
543        Ok(TimelineChangeObserver::new(counter_clone, send))
544    }
545
546    /// Initialize the [HrTimerManager] in the context of the current system task.
547    pub fn init(self: &HrTimerManagerHandle, system_task: &CurrentTask) -> Result<(), Errno> {
548        self.init_internal(
549            system_task,
550            /*wake_channel_for_test=*/ None,
551            /*message_counter_for_test=*/ None,
552        )
553    }
554
555    // Call this init for testing instead of the one above.
556    fn init_internal(
557        self: &HrTimerManagerHandle,
558        system_task: &CurrentTask,
559        // Can be injected for testing.
560        wake_channel_for_test: Option<zx::Channel>,
561        // Can be injected for testing.
562        message_counter_for_test: Option<zx::Counter>,
563    ) -> Result<(), Errno> {
564        let (start_next_sender, start_next_receiver) = mpsc::unbounded();
565        self.start_next_sender.set(start_next_sender).map_err(|_| errno!(EEXIST))?;
566
567        let self_ref = self.clone();
568
569        // Ensure that all internal init has completed in `watch_new_hrtimer_loop`
570        // before proceeding from here.
571        let setup_done = zx::Event::create();
572        let setup_done_clone = duplicate_handle(&setup_done)?;
573
574        let closure = async move |locked_and_task: LockedAndTask<'_>| {
575            let current_thread = std::thread::current();
576            // Helps find the thread in backtraces, see wait_signaled_sync.
577            log_info!(
578                "hr_timer_manager thread: {:?} ({:?})",
579                current_thread.name(),
580                current_thread.id()
581            );
582            if let Err(e) = self_ref
583                .watch_new_hrtimer_loop(
584                    locked_and_task.current_task(),
585                    start_next_receiver,
586                    wake_channel_for_test,
587                    message_counter_for_test,
588                    Some(setup_done_clone),
589                )
590                .await
591            {
592                log_error!("while running watch_new_hrtimer_loop: {e:?}");
593            }
594            log_warn!("hr_timer_manager: finished kernel thread. should never happen in prod code");
595        };
596        let req = SpawnRequestBuilder::new()
597            .with_debug_name("hr-timer-manager")
598            .with_async_closure(closure)
599            .build();
600        system_task.kernel().kthreads.spawner().spawn_from_request(req);
601        wait_signaled_sync(&setup_done)
602            .to_result()
603            .map_err(|status| from_status_like_fdio!(status))?;
604
605        Ok(())
606    }
607
608    // Notifies `timer` and wake sources about a triggered alarm.
609    fn notify_timer(
610        self: &HrTimerManagerHandle,
611        system_task: &CurrentTask,
612        timer: &HrTimerNode,
613        lease: impl HandleBased,
614    ) -> Result<()> {
615        let timer_id = timer.hr_timer.get_id();
616        log_debug!("watch_new_hrtimer_loop: Cmd::Alarm: triggered alarm: {:?}", timer_id);
617        ftrace::duration!("alarms", "starnix:hrtimer:notify_timer", "timer_id" => timer_id);
618        self.lock().pending_timers.remove(&timer_id).map(|s| s.task.detach());
619        signal_event(&timer.hr_timer.event(), zx::Signals::NONE, zx::Signals::TIMER_SIGNALED)
620            .context("notify_timer: hrtimer signal handle")?;
621
622        // Handle wake source here.
623        let wake_source = timer.wake_source.clone();
624        if let Some(wake_source) = wake_source.as_ref().and_then(|f| f.upgrade()) {
625            let lease_token = lease.into_handle();
626            wake_source.on_wake(system_task, &lease_token);
627            // Drop the baton lease after wake leases in associated epfd
628            // are activated.
629            drop(lease_token);
630        }
631        ftrace::instant!("alarms", "starnix:hrtimer:notify_timer:drop_lease", ftrace::Scope::Process, "timer_id" => timer_id);
632        Ok(())
633    }
634
635    // If no counter has been injected for tests, set provided `counter` to serve as that
636    // counter. Used to inject a fake counter in tests.
637    fn inject_or_set_message_counter(
638        self: &HrTimerManagerHandle,
639        message_counter: OwnedMessageCounterHandle,
640    ) {
641        let mut guard = self.lock();
642        if guard.message_counter.is_none() {
643            guard.message_counter = Some(message_counter);
644        }
645    }
646
647    fn record_inspect_on_stop(
648        self: &HrTimerManagerHandle,
649        guard: &mut MutexGuard<'_, HrTimerManagerState>,
650        prev_len: usize,
651    ) {
652        let after_len = guard.get_pending_timers_count();
653        let inspect_event_type = if after_len == prev_len {
654            None
655        } else if after_len == prev_len - 1 {
656            Some(InspectHrTimerEvent::Remove)
657        } else {
658            Some(InspectHrTimerEvent::retain_err(prev_len, after_len, "removing timer"))
659        };
660        if let Some(inspect_event_type) = inspect_event_type {
661            self.record_event(guard, inspect_event_type, None);
662        }
663    }
664
665    fn record_inspect_on_start(
666        self: &HrTimerManagerHandle,
667        guard: &mut MutexGuard<'_, HrTimerManagerState>,
668        timer_id: zx::Koid,
669        task: fasync::Task<()>,
670        deadline: TargetTime,
671        prev_len: usize,
672    ) {
673        guard
674            .pending_timers
675            .insert(timer_id, TimerState { task, deadline })
676            .map(|timer_state| {
677                // This should not happen, at this point we already canceled
678                // any previous instances of the same wake alarm.
679                log_debug!(
680                    "watch_new_hrtimer_loop: removing timer task in Cmd::Start: {:?}",
681                    timer_state
682                );
683                timer_state
684            })
685            .map(|v| v.task.detach());
686
687        // Record the inspect event
688        let after_len = guard.get_pending_timers_count();
689        let inspect_event_type = if after_len == prev_len {
690            InspectHrTimerEvent::Update
691        } else if after_len == prev_len + 1 {
692            InspectHrTimerEvent::Add
693        } else {
694            InspectHrTimerEvent::retain_err(prev_len, after_len, "adding timer")
695        };
696        self.record_event(guard, inspect_event_type, Some(deadline));
697    }
698
699    /// Timer handler loop.
700    ///
701    /// # Args:
702    /// - `wake_channel_for_test`: a channel implementing `fuchsia.time.alarms/Wake`
703    ///   injected by tests only.
704    /// - `message_counter_for_test`: a zx::Counter injected only by tests, to
705    ///   emulate the wake proxy message counter.
706    /// - `setup_done`: signaled once the initial loop setup is complete. Allows
707    ///   pausing any async callers until this loop is in a runnable state.
708    async fn watch_new_hrtimer_loop(
709        self: &HrTimerManagerHandle,
710        system_task: &CurrentTask,
711        mut start_next_receiver: UnboundedReceiver<Cmd>,
712        mut wake_channel_for_test: Option<zx::Channel>,
713        message_counter_for_test: Option<zx::Counter>,
714        setup_done: Option<zx::Event>,
715    ) -> Result<()> {
716        // The values assigned to the counter are arbitrary, but unique for each assignment.
717        // This should give us hints as to where any deadlocks might occur if they do.
718        // We also take stack traces, but stack traces do not capture async stacks presently.
719        self.lock().debug_start_stage_counter = 1005;
720        ftrace::instant!("alarms", "watch_new_hrtimer_loop:init", ftrace::Scope::Process);
721        defer! {
722            log_warn!("watch_new_hrtimer_loop: exiting. This should only happen in tests.");
723        }
724
725        let wake_channel = wake_channel_for_test.take().unwrap_or_else(|| {
726            connect_to_wake_alarms_async().expect("connection to wake alarms async proxy")
727        });
728        self.lock().debug_start_stage_counter = 1004;
729
730        let counter_name = "wake-alarms";
731        let (device_channel, counter) = if let Some(message_counter) = message_counter_for_test {
732            // For tests only.
733            (wake_channel, message_counter)
734        } else {
735            create_proxy_for_wake_events_counter_zero(wake_channel, counter_name.to_string())
736        };
737        self.lock().debug_start_stage_counter = 1003;
738        let message_counter = system_task
739            .kernel()
740            .suspend_resume_manager
741            .add_message_counter(counter_name, Some(counter));
742        self.inject_or_set_message_counter(message_counter.clone());
743        setup_done
744            .as_ref()
745            .map(|e| signal_event(e, zx::Signals::NONE, zx::Signals::EVENT_SIGNALED));
746
747        self.lock().debug_start_stage_counter = 1002;
748        let device_async_proxy =
749            fta::WakeAlarmsProxy::new(fidl::AsyncChannel::from_channel(device_channel));
750
751        // Contains suspend locks for interval (periodic) timers that expired, but have not been
752        // rescheduled yet. This allows us to defer container suspend until all such timers have
753        // been rescheduled.
754        // TODO: b/418813184 - Remove in favor of Fuchsia-specific interval timer support
755        // once it is available.
756        let mut interval_timers_pending_reschedule: HashMap<zx::Koid, SharedMessageCounter> =
757            HashMap::new();
758
759        // Per timer tasks.
760        let mut task_by_timer_id: HashMap<zx::Koid, fasync::Task<()>> = HashMap::new();
761
762        self.lock().debug_start_stage_counter = 1001;
763        ftrace::instant!("alarms", "watch_new_hrtimer_loop:init_done", ftrace::Scope::Process);
764        while let Some(cmd) = start_next_receiver.next().await {
765            {
766                let mut guard = self.lock();
767                guard.debug_start_stage_counter = 1002;
768                guard.last_loop_started_timestamp = zx::BootInstant::get();
769            }
770            ftrace::duration!("alarms", "start_next_receiver:loop");
771
772            log_debug!("watch_new_hrtimer_loop: got command: {cmd:?}");
773            self.lock().debug_start_stage_counter = 0;
774            match cmd {
775                // A new timer needs to be started.  The timer node for the timer
776                // is provided, and `done` must be signaled once the setup is
777                // complete.
778                Cmd::Start { new_timer_node, done, message_counter } => {
779                    self.lock().debug_start_stage_counter = 1;
780                    defer! {
781                        // Allow add_timer to proceed once command processing is done.
782                        signal_event(&done, zx::Signals::NONE, zx::Signals::EVENT_SIGNALED).map_err(|err| to_errno_with_log(err)).expect("event can be signaled");
783                    }
784
785                    let hr_timer = &new_timer_node.hr_timer;
786                    let timer_id = hr_timer.get_id();
787                    let wake_alarm_id = hr_timer.wake_alarm_id();
788                    let trace_id = hr_timer.trace_id();
789                    log_debug!(
790                        "watch_new_hrtimer_loop: Cmd::Start: timer_id: {:?}, wake_alarm_id: {}",
791                        timer_id,
792                        wake_alarm_id
793                    );
794                    ftrace::duration!("alarms", "starnix:hrtimer:start", "timer_id" => timer_id);
795                    ftrace::flow_begin!("alarms", "hrtimer_lifecycle", trace_id);
796
797                    self.lock().debug_start_stage_counter = 2;
798                    let maybe_cancel = self.lock().pending_timers.remove(&timer_id);
799                    log_long_op!(cancel_by_id(
800                        &message_counter,
801                        maybe_cancel,
802                        &timer_id,
803                        &device_async_proxy,
804                        &mut interval_timers_pending_reschedule,
805                        &mut task_by_timer_id,
806                        &wake_alarm_id,
807                    ));
808                    ftrace::instant!("alarms", "starnix:hrtimer:cancel_pre_start", ftrace::Scope::Process, "timer_id" => timer_id);
809
810                    // Signaled when the timer completed setup. We can not forward `done` because
811                    // we have post-schedule work as well.
812                    let setup_event = zx::Event::create();
813                    let deadline = new_timer_node.deadline;
814
815                    ftrace::duration!("alarms", "starnix:hrtimer:signaled", "timer_id" => timer_id);
816
817                    self.lock().debug_start_stage_counter = 3;
818                    // Make a request here. Move it into the closure after. Current FIDL semantics
819                    // ensure that even though we do not `.await` on this future, a request to
820                    // schedule a wake alarm based on this timer will be sent.
821                    let request_fut = match deadline {
822                        TargetTime::Monotonic(_) => {
823                            // If we hit this, it's a Starnix bug.
824                            panic!("can not schedule wake alarm on monotonic timeline")
825                        }
826                        TargetTime::BootInstant(boot_instant) => device_async_proxy.set_and_wait(
827                            boot_instant,
828                            fta::SetMode::NotifySetupDone(duplicate_handle(&setup_event)?),
829                            &wake_alarm_id,
830                        ),
831                        TargetTime::RealTime(utc_instant) => device_async_proxy.set_and_wait_utc(
832                            &fta::InstantUtc { timestamp_utc: utc_instant.into_nanos() },
833                            fta::SetMode::NotifySetupDone(duplicate_handle(&setup_event)?),
834                            &wake_alarm_id,
835                        ),
836                    };
837                    let mut done_sender = self.get_sender();
838                    let prev_len = self.lock().get_pending_timers_count();
839
840                    self.lock().debug_start_stage_counter = 4;
841                    let self_clone = self.clone();
842                    let task = fasync::Task::local(async move {
843                        log_debug!(
844                            "wake_alarm_future: set_and_wait will block here: {wake_alarm_id:?}"
845                        );
846                        ftrace::instant!("alarms", "starnix:hrtimer:wait", ftrace::Scope::Process, "timer_id" => timer_id);
847                        ftrace::flow_step!("alarms", "hrtimer_lifecycle", trace_id);
848
849                        // Wait for this timer to expire. This wait can be arbitrarily long.
850                        let response = request_fut.await;
851
852                        // The counter was already incremented by the wake proxy when the alarm fired.
853                        let message_counter = self_clone.lock().share_message_counter(false);
854                        ftrace::instant!("alarms", "starnix:hrtimer:wake", ftrace::Scope::Process, "timer_id" => timer_id);
855
856                        log_debug!("wake_alarm_future: set_and_wait over: {:?}", response);
857                        match response {
858                            // Alarm.  This must be processed in the main loop because notification
859                            // requires access to &CurrentTask, which is not available here. So we
860                            // only forward it.
861                            Ok(Ok(lease)) => {
862                                log_long_op!(done_sender.send(Cmd::Alarm {
863                                    new_timer_node,
864                                    lease,
865                                    message_counter
866                                }))
867                                .expect("infallible");
868                            }
869                            Ok(Err(error)) => {
870                                ftrace::duration!("alarms", "starnix:hrtimer:wake_error", "timer_id" => timer_id);
871                                log_debug!(
872                                    "wake_alarm_future: protocol error: {error:?}: timer_id: {timer_id:?}"
873                                );
874                                let mut guard = self_clone.lock();
875                                let pending = &mut guard.pending_timers;
876                                process_alarm_protocol_error(pending, &timer_id, error);
877                            }
878                            Err(error) => {
879                                ftrace::duration!("alarms", "starnix:hrtimer:fidl_error", "timer_id" => timer_id);
880                                log_debug!(
881                                    "wake_alarm_future: FIDL error: {error:?}: timer_id: {timer_id:?}"
882                                );
883                                self_clone.lock().pending_timers.remove(&timer_id);
884                            }
885                        }
886                        log_debug!("wake_alarm_future: closure done for timer_id: {timer_id:?}");
887                    });
888                    self.lock().debug_start_stage_counter = 5;
889                    ftrace::instant!("alarms", "starnix:hrtimer:pre_setup_event_signal", ftrace::Scope::Process, "timer_id" => timer_id);
890
891                    // This should be almost instantaneous.  Blocking for a long time here is a
892                    // bug.
893                    log_long_op!(wait_signaled(&setup_event)).map_err(|e| to_errno_with_log(e))?;
894                    ftrace::instant!("alarms", "starnix:hrtimer:setup_event_signaled", ftrace::Scope::Process, "timer_id" => timer_id);
895                    let mut guard = self.lock();
896                    guard.debug_start_stage_counter = 6;
897                    self.record_inspect_on_start(&mut guard, timer_id, task, deadline, prev_len);
898                    log_debug!("Cmd::Start scheduled: timer_id: {:?}", timer_id);
899                    guard.debug_start_stage_counter = 999;
900                }
901                Cmd::Alarm { new_timer_node, lease, message_counter } => {
902                    self.lock().debug_start_stage_counter = 10;
903                    let timer = &new_timer_node.hr_timer;
904                    let timer_id = timer.get_id();
905                    ftrace::duration!("alarms", "starnix:hrtimer:alarm", "timer_id" => timer_id);
906                    ftrace::flow_step!("alarms", "hrtimer_lifecycle", timer.trace_id());
907                    self.notify_timer(system_task, &new_timer_node, lease)
908                        .map_err(|e| to_errno_with_log(e))?;
909
910                    // Interval timers currently need special handling: we must not suspend the
911                    // container until the interval timer in question gets re-scheduled. To
912                    // ensure that we stay awake, we store the suspend lock for a while. This
913                    // prevents container suspend.
914                    //
915                    // This map entry and its MessageCounterHandle is removed in one of the following cases:
916                    //
917                    // (1) When the interval timer eventually gets rescheduled. We
918                    // assume that for interval timers the reschedule will be imminent and that
919                    // therefore not suspending until that re-schedule happens will not unreasonably
920                    // extend the awake period.
921                    //
922                    // (2) When the timer is canceled.
923                    if *timer.is_interval.lock() {
924                        interval_timers_pending_reschedule.insert(timer_id, message_counter);
925                    }
926                    log_debug!("Cmd::Alarm done: timer_id: {timer_id:?}");
927                    self.lock().debug_start_stage_counter = 19;
928                }
929                Cmd::Stop { timer, done, message_counter } => {
930                    self.lock().debug_start_stage_counter = 20;
931                    defer! {
932                        signal_event(&done, zx::Signals::NONE, zx::Signals::EVENT_SIGNALED).expect("can signal");
933                    }
934                    let timer_id = timer.get_id();
935                    log_debug!("watch_new_hrtimer_loop: Cmd::Stop: timer_id: {:?}", timer_id);
936                    ftrace::duration!("alarms", "starnix:hrtimer:stop", "timer_id" => timer_id);
937                    ftrace::flow_step!("alarms", "hrtimer_lifecycle", timer.trace_id());
938
939                    let (maybe_cancel, prev_len) = {
940                        let mut guard = self.lock();
941                        let prev_len = guard.get_pending_timers_count();
942                        (guard.pending_timers.remove(&timer_id), prev_len)
943                    };
944
945                    let wake_alarm_id = timer.wake_alarm_id();
946                    log_long_op!(cancel_by_id(
947                        &message_counter,
948                        maybe_cancel,
949                        &timer_id,
950                        &device_async_proxy,
951                        &mut interval_timers_pending_reschedule,
952                        &mut task_by_timer_id,
953                        &wake_alarm_id,
954                    ));
955                    ftrace::instant!("alarms", "starnix:hrtimer:cancel_at_stop", ftrace::Scope::Process, "timer_id" => timer_id);
956
957                    {
958                        let mut guard = self.lock();
959                        self.record_inspect_on_stop(&mut guard, prev_len);
960                    }
961                    log_debug!("Cmd::Stop done: {timer_id:?}");
962                    self.lock().debug_start_stage_counter = 29;
963                }
964                Cmd::MonitorUtc { timer, counter, recv } => {
965                    self.lock().debug_start_stage_counter = 30;
966                    ftrace::duration!("alarms", "starnix:hrtimer:monitor_utc", "timer_id" => timer.get_id());
967                    ftrace::flow_step!("alarms", "hrtimer_lifecycle", timer.trace_id());
968                    let monitor_task = fasync::Task::local(async move {
969                        run_utc_timeline_monitor(counter, recv).await;
970                    });
971                    task_by_timer_id.insert(timer.get_id(), monitor_task);
972                    self.lock().debug_start_stage_counter = 39;
973                }
974            }
975            let mut guard = self.lock();
976            guard.debug_start_stage_counter = 90;
977
978            log_debug!(
979                "watch_new_hrtimer_loop: pending timers count: {}",
980                guard.pending_timers.len()
981            );
982            log_debug!("watch_new_hrtimer_loop: pending timers:       {:?}", guard.pending_timers);
983            log_debug!(
984                "watch_new_hrtimer_loop: message counter:      {:?}",
985                message_counter.to_string(),
986            );
987            log_debug!(
988                "watch_new_hrtimer_loop: interval timers:      {:?}",
989                interval_timers_pending_reschedule.len(),
990            );
991
992            guard.last_loop_completed_timestamp = zx::BootInstant::get();
993            guard.debug_start_stage_counter = 99;
994        } // while
995
996        Ok(())
997    }
998
999    fn lock(&self) -> MutexGuard<'_, HrTimerManagerState> {
1000        self.state.lock()
1001    }
1002
1003    fn record_event(
1004        self: &HrTimerManagerHandle,
1005        guard: &mut MutexGuard<'_, HrTimerManagerState>,
1006        event_type: InspectHrTimerEvent,
1007        deadline: Option<TargetTime>,
1008    ) {
1009        if guard.events.len() >= INSPECT_EVENT_BUFFER_SIZE {
1010            guard.events.pop_front();
1011        }
1012        guard.events.push_back(InspectEvent::new(event_type, deadline));
1013    }
1014
1015    /// Add a new timer.
1016    ///
1017    /// A wake alarm is scheduled for the timer.
1018    pub fn add_timer(
1019        self: &HrTimerManagerHandle,
1020        wake_source: Option<Weak<dyn OnWakeOps>>,
1021        new_timer: &HrTimerHandle,
1022        deadline: TargetTime,
1023    ) -> Result<(), Errno> {
1024        log_debug!("add_timer: entry: {new_timer:?}, deadline: {deadline:?}");
1025        ftrace::duration!("alarms", "starnix:add_timer", "deadline" => deadline.estimate_boot().unwrap().into_nanos());
1026        ftrace::flow_step!("alarms", "hrtimer_lifecycle", new_timer.trace_id());
1027
1028        // Keep system awake until timer is scheduled.
1029        let message_counter_until_timer_scheduled = self.lock().share_message_counter(true);
1030
1031        let sender = self.get_sender();
1032        let new_timer_node = HrTimerNode::new(deadline, wake_source, new_timer.clone());
1033        let wake_alarm_scheduled = zx::Event::create();
1034        let wake_alarm_scheduled_clone = duplicate_handle(&wake_alarm_scheduled)?;
1035        let timer_id = new_timer.get_id();
1036        sender
1037            .unbounded_send(Cmd::Start {
1038                new_timer_node,
1039                message_counter: message_counter_until_timer_scheduled,
1040                done: wake_alarm_scheduled_clone,
1041            })
1042            .map_err(|_| errno!(EINVAL, "add_timer: could not send Cmd::Start"))?;
1043
1044        // Block until the wake alarm for this timer is scheduled.
1045        wait_signaled_sync(&wake_alarm_scheduled)
1046            .map_err(|_| errno!(EINVAL, "add_timer: wait_signaled_sync failed"))?;
1047
1048        log_debug!("add_timer: exit : timer_id: {timer_id:?}");
1049        Ok(())
1050    }
1051
1052    /// Remove a timer.
1053    ///
1054    /// The timer is removed if scheduled, nothing is changed if it is not.
1055    pub fn remove_timer(self: &HrTimerManagerHandle, timer: &HrTimerHandle) -> Result<(), Errno> {
1056        log_debug!("remove_timer: entry:  {timer:?}");
1057        ftrace::duration!("alarms", "starnix:remove_timer");
1058        // Keep system awake until timer is removed.
1059        let message_counter_until_removed = self.lock().share_message_counter(true);
1060
1061        let sender = self.get_sender();
1062        let done = zx::Event::create();
1063        let done_clone = duplicate_handle(&done)?;
1064        let timer_id = timer.get_id();
1065        sender
1066            .unbounded_send(Cmd::Stop {
1067                timer: timer.clone(),
1068                message_counter: message_counter_until_removed,
1069                done: done_clone,
1070            })
1071            .map_err(|_| errno!(EINVAL, "remove_timer: could not send Cmd::Stop"))?;
1072
1073        // Block until the alarm for this timer is scheduled.
1074        wait_signaled_sync(&done)
1075            .map_err(|_| errno!(EINVAL, "add_timer: wait_signaled_sync failed"))?;
1076        log_debug!("remove_timer: exit:  {timer_id:?}");
1077        Ok(())
1078    }
1079}
1080
1081#[derive(Debug)]
1082pub struct HrTimer {
1083    event: zx::Event,
1084
1085    /// True iff the timer is currently set to trigger at an interval.
1086    ///
1087    /// This is used to determine at which point the hrtimer event (not
1088    /// `HrTimer::event` but the one that is shared with the actual driver)
1089    /// should be cleared.
1090    ///
1091    /// If this is true, the timer manager will wait to clear the timer event
1092    /// until the next timer request has been sent to the driver. This prevents
1093    /// lost wake ups where the container happens to suspend between two instances
1094    /// of an interval timer triggering.
1095    pub is_interval: Mutex<bool>,
1096}
1097pub type HrTimerHandle = Arc<HrTimer>;
1098
1099impl Drop for HrTimer {
1100    fn drop(&mut self) {
1101        let wake_alarm_id = self.wake_alarm_id();
1102        ftrace::duration!("alarms", "hrtimer::drop", "timer_id" => self.get_id(), "wake_alarm_id" => &wake_alarm_id[..]);
1103        ftrace::flow_end!("alarms", "hrtimer_lifecycle", self.trace_id());
1104    }
1105}
1106
1107impl HrTimer {
1108    pub fn new() -> HrTimerHandle {
1109        let ret = Arc::new(Self { event: zx::Event::create(), is_interval: Mutex::new(false) });
1110        let wake_alarm_id = ret.wake_alarm_id();
1111        ftrace::duration!("alarms", "hrtimer::new", "timer_id" => ret.get_id(), "wake_alarm_id" => &wake_alarm_id[..]);
1112        ftrace::flow_begin!("alarms", "hrtimer_lifecycle", ret.trace_id(), "wake_alarm_id" => &wake_alarm_id[..]);
1113        ret
1114    }
1115
1116    pub fn event(&self) -> zx::Event {
1117        self.event
1118            .duplicate_handle(zx::Rights::SAME_RIGHTS)
1119            .expect("Duplicate hrtimer event handle")
1120    }
1121
1122    /// Returns the unique identifier of this [HrTimer].
1123    ///
1124    /// All holders of the same [HrTimerHandle] will see the same value here.
1125    pub fn get_id(&self) -> zx::Koid {
1126        self.event.as_handle_ref().get_koid().expect("infallible")
1127    }
1128
1129    /// Returns the unique alarm ID for this [HrTimer].
1130    ///
1131    /// The naming pattern is: `starnix:Koid(NNNNN):iB`, where `NNNNN` is a koid
1132    /// and B is `1` if the timer is an interval timer, or `0` otherwise.
1133    fn wake_alarm_id(&self) -> String {
1134        let i = if *self.is_interval.lock() { "i1" } else { "i0" };
1135        let koid = self.get_id();
1136        format!("starnix:{koid:?}:{i}")
1137    }
1138
1139    fn trace_id(&self) -> ftrace::Id {
1140        self.get_id().raw_koid().into()
1141    }
1142}
1143
1144impl TimerOps for HrTimerHandle {
1145    fn start(
1146        &self,
1147        current_task: &CurrentTask,
1148        source: Option<Weak<dyn OnWakeOps>>,
1149        deadline: TargetTime,
1150    ) -> Result<(), Errno> {
1151        // Before (re)starting the timer, ensure the signal is cleared.
1152        signal_event(&self.event, zx::Signals::TIMER_SIGNALED, zx::Signals::NONE)
1153            .map_err(|status| from_status_like_fdio!(status))?;
1154        current_task.kernel().hrtimer_manager.add_timer(
1155            source,
1156            self,
1157            deadline.into_resolved_utc_deadline(),
1158        )?;
1159        Ok(())
1160    }
1161
1162    fn stop(&self, kernel: &Arc<Kernel>) -> Result<(), Errno> {
1163        // Clear the signal when removing the hrtimer.
1164        signal_event(&self.event, zx::Signals::TIMER_SIGNALED, zx::Signals::NONE)
1165            .map_err(|status| from_status_like_fdio!(status))?;
1166        Ok(kernel.hrtimer_manager.remove_timer(self)?)
1167    }
1168
1169    fn as_handle_ref(&self) -> HandleRef<'_> {
1170        self.event.as_handle_ref()
1171    }
1172
1173    fn get_timeline_change_observer(
1174        &self,
1175        current_task: &CurrentTask,
1176    ) -> Option<TimelineChangeObserver> {
1177        // Should this return errno instead?
1178        current_task
1179            .kernel()
1180            .hrtimer_manager
1181            .get_timeline_change_observer(self)
1182            .inspect_err(|err| {
1183                log_error!("hr_timer_manager: could not create timeline change counter: {err:?}")
1184            })
1185            .ok()
1186    }
1187}
1188
1189/// Represents a node of `HrTimer`.
1190#[derive(Clone, Debug)]
1191struct HrTimerNode {
1192    /// The deadline of the associated `HrTimer`.
1193    deadline: TargetTime,
1194
1195    /// The source where initiated this `HrTimer`.
1196    ///
1197    /// When the timer expires, the system will be woken up if necessary. The `on_wake` callback
1198    /// will be triggered with a baton lease to prevent further suspend while Starnix handling the
1199    /// wake event.
1200    wake_source: Option<Weak<dyn OnWakeOps>>,
1201
1202    /// The underlying HrTimer.
1203    hr_timer: HrTimerHandle,
1204}
1205
1206impl HrTimerNode {
1207    fn new(
1208        deadline: TargetTime,
1209        wake_source: Option<Weak<dyn OnWakeOps>>,
1210        hr_timer: HrTimerHandle,
1211    ) -> Self {
1212        Self { deadline, wake_source, hr_timer }
1213    }
1214}
1215
1216#[cfg(test)]
1217mod tests {
1218    use super::*;
1219    use crate::task::HrTimer;
1220    use crate::testing::spawn_kernel_and_run;
1221    use fake_wake_alarms::{MAGIC_EXPIRE_DEADLINE, Response, serve_fake_wake_alarms};
1222    use fuchsia_runtime::UtcInstant;
1223    use std::thread;
1224    use {fidl_fuchsia_time_alarms as fta, fuchsia_async as fasync};
1225
1226    impl HrTimerManagerState {
1227        fn new_for_test() -> Self {
1228            Self {
1229                events: VecDeque::with_capacity(INSPECT_EVENT_BUFFER_SIZE),
1230                pending_timers: Default::default(),
1231                message_counter: None,
1232                last_loop_started_timestamp: zx::BootInstant::INFINITE_PAST,
1233                last_loop_completed_timestamp: zx::BootInstant::INFINITE_PAST,
1234                debug_start_stage_counter: 0,
1235            }
1236        }
1237    }
1238
1239    // Injected for testing.
1240    fn connect_factory(message_counter: zx::Counter, response_type: Response) -> zx::Channel {
1241        let (client, server) = zx::Channel::create();
1242
1243        // A separate thread is needed to allow independent execution of the server.
1244        let _detached = thread::spawn(move || {
1245            fasync::LocalExecutor::default().run_singlethreaded(async move {
1246                let stream =
1247                    fidl::endpoints::ServerEnd::<fta::WakeAlarmsMarker>::new(server).into_stream();
1248                serve_fake_wake_alarms(message_counter, response_type, stream, /*once*/ false)
1249                    .await;
1250            });
1251        });
1252        client
1253    }
1254
1255    // Initializes HrTimerManager for tests.
1256    //
1257    // # Returns
1258    //
1259    // A tuple of:
1260    // - `HrTimerManagerHandle` the unit under test
1261    // - `zx::Counter` a message counter to use in tests to observe suspend state
1262    fn init_hr_timer_manager(
1263        current_task: &CurrentTask,
1264        response_type: Response,
1265    ) -> (HrTimerManagerHandle, zx::Counter) {
1266        let manager = Arc::new(HrTimerManager {
1267            state: Mutex::new(HrTimerManagerState::new_for_test()),
1268            start_next_sender: Default::default(),
1269        });
1270        let counter = zx::Counter::create();
1271        let counter_clone = duplicate_handle(&counter).unwrap();
1272        let wake_channel = connect_factory(counter_clone, response_type);
1273        let counter_clone = duplicate_handle(&counter).unwrap();
1274        manager
1275            .init_internal(&current_task, Some(wake_channel), Some(counter_clone))
1276            .expect("infallible");
1277        (manager, counter)
1278    }
1279
1280    #[fuchsia::test]
1281    async fn test_triggering() {
1282        spawn_kernel_and_run(async |_, current_task| {
1283            let (manager, counter) = init_hr_timer_manager(current_task, Response::Immediate);
1284
1285            let timer1 = HrTimer::new();
1286            let timer2 = HrTimer::new();
1287            let timer3 = HrTimer::new();
1288
1289            manager.add_timer(None, &timer1, zx::BootInstant::from_nanos(1).into()).unwrap();
1290            manager.add_timer(None, &timer2, zx::BootInstant::from_nanos(2).into()).unwrap();
1291            manager.add_timer(None, &timer3, zx::BootInstant::from_nanos(3).into()).unwrap();
1292
1293            wait_signaled_sync(&timer1.event()).to_result().unwrap();
1294            wait_signaled_sync(&timer2.event()).to_result().unwrap();
1295            wait_signaled_sync(&timer3.event()).to_result().unwrap();
1296
1297            assert_eq!(
1298                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1299                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1300            );
1301        })
1302        .await;
1303    }
1304
1305    #[fuchsia::test]
1306    async fn test_triggering_utc() {
1307        spawn_kernel_and_run(async |_, current_task| {
1308            let (manager, counter) = init_hr_timer_manager(current_task, Response::Immediate);
1309
1310            let timer1 = HrTimer::new();
1311            let timer2 = HrTimer::new();
1312            let timer3 = HrTimer::new();
1313
1314            // All these are normally already expired as scheduled.
1315            manager.add_timer(None, &timer1, UtcInstant::from_nanos(1).into()).unwrap();
1316            manager.add_timer(None, &timer2, UtcInstant::from_nanos(2).into()).unwrap();
1317            manager.add_timer(None, &timer3, UtcInstant::from_nanos(3).into()).unwrap();
1318
1319            wait_signaled_sync(&timer1.event()).to_result().unwrap();
1320            wait_signaled_sync(&timer2.event()).to_result().unwrap();
1321            wait_signaled_sync(&timer3.event()).to_result().unwrap();
1322
1323            assert_eq!(
1324                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1325                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1326            );
1327        })
1328        .await;
1329    }
1330
1331    #[fuchsia::test]
1332    async fn test_delayed_response() {
1333        spawn_kernel_and_run(async |_, current_task| {
1334            let (manager, counter) = init_hr_timer_manager(current_task, Response::Immediate);
1335
1336            let timer = HrTimer::new();
1337
1338            manager.add_timer(None, &timer, zx::BootInstant::from_nanos(1).into()).unwrap();
1339
1340            assert_eq!(
1341                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1342                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1343            );
1344        })
1345        .await;
1346    }
1347
1348    #[fuchsia::test]
1349    async fn test_protocol_error_response() {
1350        spawn_kernel_and_run(async |_, current_task| {
1351            let (manager, counter) = init_hr_timer_manager(current_task, Response::Error);
1352
1353            let timer = HrTimer::new();
1354            manager.add_timer(None, &timer, zx::BootInstant::from_nanos(1).into()).unwrap();
1355            assert_eq!(
1356                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1357                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1358            );
1359        })
1360        .await;
1361    }
1362
1363    #[fuchsia::test]
1364    async fn reschedule_same_timer() {
1365        spawn_kernel_and_run(async |_, current_task| {
1366            let (manager, counter) = init_hr_timer_manager(current_task, Response::Delayed);
1367
1368            let timer = HrTimer::new();
1369
1370            manager.add_timer(None, &timer, zx::BootInstant::from_nanos(1).into()).unwrap();
1371            manager.add_timer(None, &timer, zx::BootInstant::from_nanos(2).into()).unwrap();
1372
1373            // Force alarm expiry.
1374            manager
1375                .add_timer(None, &timer, zx::BootInstant::from_nanos(MAGIC_EXPIRE_DEADLINE).into())
1376                .unwrap();
1377            wait_signaled_sync(&timer.event()).to_result().unwrap();
1378
1379            assert_eq!(
1380                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1381                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1382            );
1383        })
1384        .await;
1385    }
1386
1387    #[fuchsia::test]
1388    async fn rescheduling_interval_timers_forbids_suspend() {
1389        spawn_kernel_and_run(async |_, current_task| {
1390            let (hrtimer_manager, counter) = init_hr_timer_manager(current_task, Response::Delayed);
1391
1392            // Schedule an interval timer and let it expire.
1393            let timer1 = HrTimer::new();
1394            *timer1.is_interval.lock() = true;
1395            hrtimer_manager
1396                .add_timer(None, &timer1, zx::BootInstant::from_nanos(MAGIC_EXPIRE_DEADLINE).into())
1397                .unwrap();
1398            wait_signaled_sync(&timer1.event()).to_result().unwrap();
1399
1400            // Schedule a regular timer and let it expire.
1401            let timer2 = HrTimer::new();
1402            hrtimer_manager
1403                .add_timer(None, &timer2, zx::BootInstant::from_nanos(MAGIC_EXPIRE_DEADLINE).into())
1404                .unwrap();
1405            wait_signaled_sync(&timer2.event()).to_result().unwrap();
1406
1407            // When we have an expired but not rescheduled interval timer (`timer1`), and we have
1408            // an intervening timer that gets scheduled and expires (`timer2`) before `timer1` is
1409            // rescheduled, then suspend should be disallowed (counter > 0) to allow `timer1` to
1410            // be scheduled eventually.
1411            assert_eq!(
1412                counter.wait_one(zx::Signals::COUNTER_POSITIVE, zx::MonotonicInstant::INFINITE),
1413                zx::WaitResult::Ok(zx::Signals::COUNTER_POSITIVE)
1414            );
1415        })
1416        .await;
1417    }
1418
1419    #[fuchsia::test]
1420    async fn canceling_interval_timer_allows_suspend() {
1421        spawn_kernel_and_run(async |_, current_task| {
1422            let (hrtimer_manager, counter) = init_hr_timer_manager(current_task, Response::Delayed);
1423
1424            let timer1 = HrTimer::new();
1425            *timer1.is_interval.lock() = true;
1426            hrtimer_manager
1427                .add_timer(None, &timer1, zx::BootInstant::from_nanos(MAGIC_EXPIRE_DEADLINE).into())
1428                .unwrap();
1429            wait_signaled_sync(&timer1.event()).to_result().unwrap();
1430
1431            // When an interval timer expires, we should not be allowed to suspend.
1432            assert_eq!(
1433                counter.wait_one(zx::Signals::COUNTER_POSITIVE, zx::MonotonicInstant::INFINITE),
1434                zx::WaitResult::Ok(zx::Signals::COUNTER_POSITIVE)
1435            );
1436
1437            // Schedule the same timer again. This time around we do not wait for it to expire,
1438            // but cancel the timer instead.
1439            const DURATION_100S: zx::BootDuration = zx::BootDuration::from_seconds(100);
1440            let deadline2: zx::BootInstant = zx::BootInstant::after(DURATION_100S.into());
1441            hrtimer_manager.add_timer(None, &timer1, deadline2.into()).unwrap();
1442
1443            hrtimer_manager.remove_timer(&timer1).unwrap();
1444
1445            // When we cancel an interval timer, we should be allowed to suspend.
1446            assert_eq!(
1447                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1448                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1449            );
1450        })
1451        .await;
1452    }
1453
1454    #[fuchsia::test]
1455    async fn canceling_interval_timer_allows_suspend_with_flake() {
1456        spawn_kernel_and_run(async |_, current_task| {
1457            let (hrtimer_manager, counter) = init_hr_timer_manager(current_task, Response::Delayed);
1458
1459            let timer1 = HrTimer::new();
1460            *timer1.is_interval.lock() = true;
1461            hrtimer_manager
1462                .add_timer(None, &timer1, zx::BootInstant::from_nanos(MAGIC_EXPIRE_DEADLINE).into())
1463                .unwrap();
1464            wait_signaled_sync(&timer1.event()).to_result().unwrap();
1465
1466            assert_eq!(
1467                counter.wait_one(zx::Signals::COUNTER_POSITIVE, zx::MonotonicInstant::INFINITE),
1468                zx::WaitResult::Ok(zx::Signals::COUNTER_POSITIVE)
1469            );
1470            const DURATION_100S: zx::BootDuration = zx::BootDuration::from_seconds(100);
1471            let deadline2: zx::BootInstant = zx::BootInstant::after(DURATION_100S.into());
1472            hrtimer_manager.add_timer(None, &timer1, deadline2.into()).unwrap();
1473            // No pause between start and stop has led to flakes before.
1474            hrtimer_manager.remove_timer(&timer1).unwrap();
1475
1476            assert_eq!(
1477                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1478                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1479            );
1480        })
1481        .await;
1482    }
1483}