Skip to main content

starnix_core/time/
hr_timer_manager.rs

1// Copyright 2024 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::power::{
6    OnWakeOps, OwnedMessageCounterHandle, SharedMessageCounter,
7    create_proxy_for_wake_events_counter_zero,
8};
9use crate::task::dynamic_thread_spawner::SpawnRequestBuilder;
10use crate::task::{CurrentTask, Kernel, LockedAndTask};
11use crate::time::TargetTime;
12use crate::vfs::timer::{TimelineChangeObserver, TimerOps};
13use anyhow::{Context, Result};
14use fidl_fuchsia_time_alarms as fta;
15use fuchsia_async as fasync;
16use fuchsia_inspect::ArrayProperty;
17use fuchsia_runtime::UtcClock;
18use fuchsia_trace as ftrace;
19use futures::channel::mpsc::{self, UnboundedReceiver, UnboundedSender};
20use futures::{FutureExt, SinkExt, StreamExt, select};
21use scopeguard::defer;
22use starnix_logging::{log_debug, log_error, log_info, log_warn};
23use starnix_sync::{Mutex, MutexGuard};
24use starnix_uapi::errors::Errno;
25use starnix_uapi::{errno, from_status_like_fdio};
26use std::collections::{HashMap, VecDeque};
27use std::pin::pin;
28use std::sync::{Arc, OnceLock, Weak};
29use zx::{HandleBased, HandleRef};
30
31/// Max value for inspect event history.
32const INSPECT_EVENT_BUFFER_SIZE: usize = 128;
33
34fn to_errno_with_log<T: std::fmt::Debug>(v: T) -> Errno {
35    log_error!("hr_timer_manager internal error: {v:?}");
36    from_status_like_fdio!(zx::Status::IO)
37}
38
39fn signal_event(
40    event: &zx::Event,
41    clear_mask: zx::Signals,
42    set_mask: zx::Signals,
43) -> Result<(), zx::Status> {
44    event
45        .signal(clear_mask, set_mask)
46        .inspect_err(|err| log_error!(err:?, clear_mask:?, set_mask:?; "while signaling event"))
47}
48
49fn duplicate_handle<H: HandleBased>(h: &H) -> Result<H, Errno> {
50    h.duplicate_handle(zx::Rights::SAME_RIGHTS).map_err(|status| from_status_like_fdio!(status))
51}
52
53const TIMEOUT_SECONDS: i64 = 40;
54//
55/// Waits forever synchronously for EVENT_SIGNALED.
56///
57/// For us there is no useful scenario where this wait times out and we can continue operating.
58fn wait_signaled_sync(event: &zx::Event) -> zx::WaitResult {
59    let mut logged = false;
60    loop {
61        let timeout =
62            zx::MonotonicInstant::after(zx::MonotonicDuration::from_seconds(TIMEOUT_SECONDS));
63        let result = event.wait_one(zx::Signals::EVENT_SIGNALED, timeout);
64        if let zx::WaitResult::Ok(_) = result {
65            if logged {
66                log_error!("wait_signaled_sync: signal resolved: result={result:?}",);
67            }
68            return result;
69        }
70        fuchsia_trace::instant!(
71            "alarms",
72            "starnix:hrtimer:wait_timeout",
73            fuchsia_trace::Scope::Process
74        );
75        // This is bad and should never happen. If it does, it's a bug that has to be found and
76        // fixed. There is no good way to proceed if these signals are not being signaled properly.
77        log_error!(
78            // Check logs for a `kBadState` status reported from the hrtimer driver.
79            // LINT.IfChange(hrtimer_wait_signaled_sync_tefmo)
80            "wait_signaled_sync: not signaled yet. Report to `componentid:1408151`: result={result:?}",
81            // LINT.ThenChange(//tools/testing/tefmocheck/string_in_log_check.go:hrtimer_wait_signaled_sync_tefmo)
82        );
83        if !logged {
84            #[cfg(all(target_os = "fuchsia", not(doc)))]
85            ::debug::backtrace_request_all_threads();
86            logged = true;
87        }
88    }
89}
90
91/// A macro that waits on a future, but if the future takes longer than
92/// `TIMEOUT_SECONDS`, we log a warning and a stack trace.
93macro_rules! log_long_op {
94    ($fut:expr) => {{
95        use futures::FutureExt;
96        let fut = $fut;
97        futures::pin_mut!(fut);
98        let mut logged = false;
99        loop {
100            let timeout = fasync::Timer::new(zx::MonotonicDuration::from_seconds(TIMEOUT_SECONDS));
101            futures::select! {
102                res = fut.as_mut().fuse() => {
103                    if logged {
104                        log_warn!("unexpected blocking is now resolved: long-running async operation at {}:{}.",
105                            file!(), line!());
106                    }
107                    break res;
108                }
109                _ = timeout.fuse() => {
110                    // Check logs for a `kBadState` status reported from the hrtimer driver.
111                    log_warn!("unexpected blocking: long-running async op at {}:{}. Report to `componentId:1408151`",
112                        file!(), line!());
113                    if !logged {
114                        #[cfg(all(target_os = "fuchsia", not(doc)))]
115                        ::debug::backtrace_request_all_threads();
116                    }
117                    logged = true;
118                }
119            }
120        }
121    }};
122}
123
124/// Waits forever asynchronously for EVENT_SIGNALED.
125async fn wait_signaled<H: HandleBased>(handle: &H) -> Result<()> {
126    log_long_op!(fasync::OnSignals::new(handle, zx::Signals::EVENT_SIGNALED))
127        .context("hr_timer_manager:wait_signaled")?;
128    Ok(())
129}
130
131/// Cancels an alarm by ID.
132async fn cancel_by_id(
133    _message_counter: &SharedMessageCounter,
134    timer_state: Option<TimerState>,
135    timer_id: &zx::Koid,
136    proxy: &fta::WakeAlarmsProxy,
137    interval_timers_pending_reschedule: &mut HashMap<zx::Koid, SharedMessageCounter>,
138    task_by_timer_id: &mut HashMap<zx::Koid, fasync::Task<()>>,
139    alarm_id: &str,
140) {
141    if let Some(task) = task_by_timer_id.remove(timer_id) {
142        // Let this task complete and get removed.
143        task.detach();
144    }
145    if let Some(timer_state) = timer_state {
146        ftrace::duration!("alarms", "starnix:hrtimer:cancel_by_id", "timer_id" => *timer_id);
147        log_debug!("cancel_by_id: START canceling timer: {:?}: alarm_id: {}", timer_id, alarm_id);
148        proxy.cancel(&alarm_id).expect("infallible");
149        log_debug!("cancel_by_id: 1/2 canceling timer: {:?}: alarm_id: {}", timer_id, alarm_id);
150
151        // Let the timer closure complete before continuing.
152        let _ = log_long_op!(timer_state.task);
153    }
154
155    // If this timer is an interval timer, we must remove it from the pending reschedule list.
156    // This does not affect container suspend, since `_message_counter` is live. It's a no-op
157    // for other timers.
158    interval_timers_pending_reschedule.remove(timer_id);
159    log_debug!("cancel_by_id: 2/2 DONE canceling timer: {timer_id:?}: alarm_id: {alarm_id}");
160}
161
162/// Called when the underlying wake alarms manager reports a fta::WakeAlarmsError
163/// as a result of a call to set_and_wait.
164fn process_alarm_protocol_error(
165    pending: &mut HashMap<zx::Koid, TimerState>,
166    timer_id: &zx::Koid,
167    error: fta::WakeAlarmsError,
168) -> Option<TimerState> {
169    match error {
170        fta::WakeAlarmsError::Unspecified => {
171            log_warn!(
172                "watch_new_hrtimer_loop: Cmd::AlarmProtocolFail: unspecified error: {error:?}"
173            );
174            pending.remove(timer_id)
175        }
176        fta::WakeAlarmsError::Dropped => {
177            log_debug!("watch_new_hrtimer_loop: Cmd::AlarmProtocolFail: alarm dropped: {error:?}");
178            // Do not remove a Dropped timer here, in contrast to other error states: a Dropped
179            // timer is a result of a Stop or a Cancel ahead of a reschedule. In both cases, that
180            // code takes care of removing the timer from the pending timers list.
181            None
182        }
183        error => {
184            log_warn!(
185                "watch_new_hrtimer_loop: Cmd::AlarmProtocolFail: unspecified error: {error:?}"
186            );
187            pending.remove(timer_id)
188        }
189    }
190}
191
192// This function is swapped out for an injected proxy in tests.
193fn connect_to_wake_alarms_async() -> Result<zx::Channel, Errno> {
194    log_debug!("connecting to wake alarms");
195    let (client, server) = zx::Channel::create();
196    fuchsia_component::client::connect_channel_to_protocol::<fta::WakeAlarmsMarker>(server)
197        .map(|()| client)
198        .map_err(|err| {
199            errno!(EINVAL, format!("Failed to connect to fuchsia.time.alarms/Wake: {err}"))
200        })
201}
202
203#[derive(Debug)]
204enum InspectHrTimerEvent {
205    Add,
206    Update,
207    Remove,
208    // The String inside will be used in fmt. But the compiler does not recognize the use when
209    // formatting with the Debug derivative.
210    Error(#[allow(dead_code)] String),
211}
212
213impl InspectHrTimerEvent {
214    fn retain_err(prev_len: usize, after_len: usize, context: &str) -> InspectHrTimerEvent {
215        InspectHrTimerEvent::Error(format!(
216            "retain the timer incorrectly, before len: {prev_len}, after len: {after_len}, context: {context}",
217        ))
218    }
219}
220
221impl std::fmt::Display for InspectHrTimerEvent {
222    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
223        write!(f, "{:?}", self)
224    }
225}
226
227#[derive(Debug)]
228struct InspectEvent {
229    event_type: InspectHrTimerEvent,
230    created_at: zx::BootInstant,
231    deadline: Option<TargetTime>,
232}
233
234impl InspectEvent {
235    fn new(event_type: InspectHrTimerEvent, deadline: Option<TargetTime>) -> Self {
236        Self { event_type, created_at: zx::BootInstant::get(), deadline }
237    }
238}
239
240#[derive(Debug)]
241struct TimerState {
242    /// The task that waits for the timer to expire.
243    task: fasync::Task<()>,
244    /// The desired deadline for the timer.
245    deadline: TargetTime,
246    /// The node that represents the current generation of this timer.
247    node: HrTimerNodeHandle,
248}
249
250impl std::fmt::Display for TimerState {
251    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
252        write!(f, "TimerState[deadline:{:?}]", self.deadline)
253    }
254}
255
256struct HrTimerManagerState {
257    /// All pending timers are stored here.
258    pending_timers: HashMap<zx::Koid, TimerState>,
259
260    /// The event that is registered with runner to allow the hrtimer to wake the kernel.
261    /// Optional, because we want the ability to inject a counter in tests.
262    message_counter: Option<OwnedMessageCounterHandle>,
263
264    /// For recording timer events.
265    events: VecDeque<InspectEvent>,
266
267    /// The last timestamp at which the hrtimer loop was started.
268    last_loop_started_timestamp: zx::BootInstant,
269
270    /// The last timestamp at which the hrtimer loop was completed.
271    last_loop_completed_timestamp: zx::BootInstant,
272
273    // Debug progress counter for Cmd::Start.
274    // TODO: b/454085350 - remove once diagnosed.
275    debug_start_stage_counter: u64,
276}
277
278impl HrTimerManagerState {
279    fn new(_parent_node: &fuchsia_inspect::Node) -> Self {
280        Self {
281            pending_timers: HashMap::new(),
282            // Initialized later in the State's lifecycle because it only becomes
283            // available after making a connection to the wake proxy.
284            message_counter: None,
285            events: VecDeque::with_capacity(INSPECT_EVENT_BUFFER_SIZE),
286            last_loop_started_timestamp: zx::BootInstant::INFINITE_PAST,
287            last_loop_completed_timestamp: zx::BootInstant::INFINITE_PAST,
288            debug_start_stage_counter: 0,
289        }
290    }
291
292    fn get_pending_timers_count(&self) -> usize {
293        self.pending_timers.len()
294    }
295
296    /// Gets a new shareable instance of the message counter.
297    fn share_message_counter(&self, new_pending_message: bool) -> SharedMessageCounter {
298        let counter_ref =
299            self.message_counter.as_ref().expect("message_counter is None, but should not be.");
300        counter_ref.share(new_pending_message)
301    }
302
303    fn record_events(&self, node: &fuchsia_inspect::Node) {
304        let events_node = node.create_child("events");
305        for (i, event) in self.events.iter().enumerate() {
306            let child = events_node.create_child(i.to_string());
307            child.record_string("type", event.event_type.to_string());
308            child.record_int("created_at", event.created_at.into_nanos());
309            if let Some(deadline) = event.deadline {
310                child.record_int("deadline", deadline.estimate_boot().unwrap().into_nanos());
311            }
312            events_node.record(child);
313        }
314        node.record(events_node);
315    }
316}
317
318/// Asynchronous commands sent to `watch_new_hrtimer_loop`.
319///
320/// The synchronous methods on HrTimerManager use these commands to communicate
321/// with the alarm manager actor that loops about in `watch_new_hrtimer_loop`.
322///
323/// This allows us to not have to share state between the synchronous and async
324/// methods of `HrTimerManager`.
325#[derive(Debug)]
326enum Cmd {
327    // Start the timer contained in `new_timer_node`.
328    // The processing loop will signal `done` to allow synchronous
329    // return from scheduling an async Cmd::Start.
330    Start {
331        new_timer_node: HrTimerNodeHandle,
332        /// Signaled once the timer is started.
333        done: zx::Event,
334        /// The Starnix container suspend lock. Keep it alive until no more
335        /// work is necessary.
336        message_counter: SharedMessageCounter,
337    },
338    /// Stop the timer noted below. `done` is similar to above.
339    Stop {
340        /// The timer to stop.
341        timer: HrTimerHandle,
342        /// Signaled once the timer is stopped.
343        done: zx::Event,
344        /// The Starnix container suspend lock. Keep it alive until no more
345        /// work is necessary.
346        message_counter: SharedMessageCounter,
347    },
348    /// Triggered by the underlying hrtimer device when an alarm expires.
349    Alarm {
350        /// The affected timer's node.
351        new_timer_node: HrTimerNodeHandle,
352        /// The wake lease provided by the underlying API.
353        lease: zx::EventPair,
354        /// The Starnix container suspend lock. Keep it alive until no more
355        /// work is necessary.
356        message_counter: SharedMessageCounter,
357    },
358    /// Install a timeline change monitor
359    MonitorUtc { timer: HrTimerHandle, counter: zx::Counter, recv: mpsc::UnboundedReceiver<bool> },
360}
361
362// Increments `counter` every time the UTC timeline changes.
363//
364// This counter is shared with UTC timers to provide UTC timeline change notification.
365//
366// Use cases:
367//
368// 1. Counter's value counts the number of times the UTC timeline changed, which is used in timer
369//    `read` calls to report the number of encountered changes, as required by `read`.
370//
371// 2. Counter's `COUNTER_POSITIVE` signal is used in `wait_async` calls on timers, as Starnix must
372//    wake such timers whenever a timeline change happens. The counter reader must reset the
373//    counter to zero after reading its value to allow for a next wake.
374//
375// Other primitives are not appropriate to use here: an Event does not remember how many times it
376// has been signaled, so does not fulfill (1). Atomics don't generate a signal on increment, so
377// don't satisfy (2). Conversely, the `wait_async` machinery on timers can already deal with
378// HandleBased objects, so a Counter can be readily used there.
379async fn run_utc_timeline_monitor(counter: zx::Counter, recv: mpsc::UnboundedReceiver<bool>) {
380    let utc_handle = crate::time::utc::duplicate_real_utc_clock_handle().inspect_err(|err| {
381        log_error!("run_utc_timeline_monitor: could not monitor UTC timeline: {err:?}")
382    });
383    if let Ok(utc_handle) = utc_handle {
384        run_utc_timeline_monitor_internal(counter, recv, utc_handle).await;
385    }
386}
387
388// See `run_utc_timeline_monitor`.
389// `utc_handle_fn` is useful for injecting in tests.
390async fn run_utc_timeline_monitor_internal(
391    counter: zx::Counter,
392    mut recv: mpsc::UnboundedReceiver<bool>,
393    utc_handle: UtcClock,
394) {
395    log_debug!("run_utc_timeline_monitor: monitoring UTC clock timeline changes: enter");
396    let koid = utc_handle.koid();
397    log_debug!(
398        "run_utc_timeline_monitor: monitoring UTC clock timeline: enter: UTC clock koid={koid:?}, counter={counter:?}"
399    );
400    let utc_handle = std::rc::Rc::new(utc_handle);
401    let utc_handle_fn = || utc_handle.clone();
402    let mut interested = false;
403    loop {
404        let utc_handle = utc_handle_fn();
405        // CLOCK_UPDATED is auto-cleared.
406        let mut updated_fut = pin!(
407            fasync::OnSignals::new(utc_handle.as_handle_ref(), zx::Signals::CLOCK_UPDATED).fuse()
408        );
409        let mut interest_fut = recv.next();
410
411        // Note: all select! branches must allow for exit when their respective futures are
412        // used up.
413        select! {
414            result = updated_fut => {
415                if result.is_err() {
416                    log_warn!("run_utc_timeline_monitor: could not wait on signals: {:?}, counter={counter:?}", result);
417                    break;
418                }
419                if interested {
420                    log_debug!("run_utc_timeline_monitor: UTC timeline updated, counter: {counter:?}");
421                    // The consumer of this `counter` should wait for COUNTER_POSITIVE, and
422                    // once it observes the value of the counter, subtract the read value from
423                    // counter.
424                    counter
425                        .add(1)
426                        // Ignore the error after logging it. Should we exit the loop here?
427                        .inspect_err(|err| {
428                            log_error!("run_utc_timeline_monitor: could not increment counter: {err:?}")
429                        })
430                        .unwrap_or(());
431                }
432            },
433            result = interest_fut => {
434                match result {
435                    Some(interest) => {
436                        log_debug!("interest change: {counter:?}, interest: {interest:?}");
437                        interested = interest;
438                    }
439                    None => {
440                        log_debug!("no longer needs counter monitoring: {counter:?}");
441                        break;
442                    }
443                }
444            },
445        };
446    }
447    log_debug!("run_utc_timeline_monitor: monitoring UTC clock timeline changes: exit");
448}
449
450/// The manager for high-resolution timers.
451///
452/// This manager is responsible for creating and managing high-resolution timers.
453pub struct HrTimerManager {
454    state: Mutex<HrTimerManagerState>,
455
456    /// The channel sender that notifies the worker thread that HrTimer driver needs to be
457    /// (re)started with a new deadline.
458    start_next_sender: OnceLock<UnboundedSender<Cmd>>,
459}
460pub type HrTimerManagerHandle = Arc<HrTimerManager>;
461
462impl HrTimerManager {
463    pub fn new(parent_node: &fuchsia_inspect::Node) -> HrTimerManagerHandle {
464        let inspect_node = parent_node.create_child("hr_timer_manager");
465        let new_manager = Arc::new(Self {
466            state: Mutex::new(HrTimerManagerState::new(&inspect_node)),
467            start_next_sender: Default::default(),
468        });
469        let manager_weak = Arc::downgrade(&new_manager);
470
471        // Create a lazy inspect node to get HrTimerManager info at read-time.
472        inspect_node.record_lazy_child("hr_timer_manager", move || {
473            let manager_ref = manager_weak.upgrade().expect("inner HrTimerManager");
474            async move {
475                // This gets the clock value directly from the kernel, it is not subject
476                // to the local runner's clock.
477                let now = zx::BootInstant::get();
478
479                let inspector = fuchsia_inspect::Inspector::default();
480                inspector.root().record_int("now_ns", now.into_nanos());
481
482                let (
483                    timers,
484                    pending_timers_count,
485                    message_counter,
486                    loop_started,
487                    loop_completed,
488                    debug_start_stage_counter,
489                ) = {
490                    let guard = manager_ref.lock();
491                    (
492                        guard
493                            .pending_timers
494                            .iter()
495                            .map(|(k, v)| (*k, v.deadline))
496                            .collect::<Vec<_>>(),
497                        guard.get_pending_timers_count(),
498                        guard.message_counter.as_ref().map(|c| c.to_string()).unwrap_or_default(),
499                        guard.last_loop_started_timestamp,
500                        guard.last_loop_completed_timestamp,
501                        guard.debug_start_stage_counter,
502                    )
503                };
504                inspector.root().record_uint("pending_timers_count", pending_timers_count as u64);
505                inspector.root().record_string("message_counter", message_counter);
506
507                // These are the deadlines we are currently waiting for. The format is:
508                // `alarm koid` -> `deadline nanos` (remains: `duration until alarm nanos`)
509                let deadlines = inspector.root().create_string_array("timers", timers.len());
510                for (i, (k, v)) in timers.into_iter().enumerate() {
511                    let remaining = v.estimate_boot().unwrap() - now;
512                    deadlines.set(
513                        i,
514                        format!(
515                            "{k:?} -> {v} ns (remains: {})",
516                            time_pretty::format_duration(remaining)
517                        ),
518                    );
519                }
520                inspector.root().record(deadlines);
521
522                inspector.root().record_int("last_loop_started_at_ns", loop_started.into_nanos());
523                inspector
524                    .root()
525                    .record_int("last_loop_completed_at_ns", loop_completed.into_nanos());
526                inspector
527                    .root()
528                    .record_uint("debug_start_stage_counter", debug_start_stage_counter);
529
530                {
531                    let guard = manager_ref.lock();
532                    guard.record_events(inspector.root());
533                }
534
535                Ok(inspector)
536            }
537            .boxed()
538        });
539        parent_node.record(inspect_node);
540        new_manager
541    }
542
543    /// Get a copy of a sender channel used for passing async command to the
544    /// event processing loop.
545    fn get_sender(&self) -> UnboundedSender<Cmd> {
546        self.start_next_sender.get().expect("start_next_sender is initialized").clone()
547    }
548
549    /// Returns the counter that tallies the timeline changes of the UTC timeline.
550    ///
551    /// # Args
552    /// - `timer`: the handle of the timer that needs monitoring of timeline changes.
553    pub fn get_timeline_change_observer(
554        &self,
555        timer: &HrTimerHandle,
556    ) -> Result<TimelineChangeObserver, Errno> {
557        let timer_id = timer.get_id();
558        let counter = zx::Counter::create();
559        let counter_clone = duplicate_handle(&counter).map_err(|err| {
560            log_error!("could not duplicate handle: {err:?}");
561            errno!(EINVAL, format!("could not duplicate handle: {err}, {timer_id:?}"))
562        })?;
563        let (send, recv) = mpsc::unbounded();
564        self.get_sender()
565            .unbounded_send(Cmd::MonitorUtc { timer: timer.clone(), counter, recv })
566            .map_err(|err| {
567            log_error!("could not send: {err:?}");
568            errno!(EINVAL, format!("could not send Cmd::Monitor: {err}, {timer_id:?}"))
569        })?;
570        Ok(TimelineChangeObserver::new(counter_clone, send))
571    }
572
573    /// Initialize the [HrTimerManager] in the context of the current system task.
574    pub fn init(self: &HrTimerManagerHandle, system_task: &CurrentTask) -> Result<(), Errno> {
575        self.init_internal(
576            system_task,
577            /*wake_channel_for_test=*/ None,
578            /*message_counter_for_test=*/ None,
579        )
580    }
581
582    // Call this init for testing instead of the one above.
583    fn init_internal(
584        self: &HrTimerManagerHandle,
585        system_task: &CurrentTask,
586        // Can be injected for testing.
587        wake_channel_for_test: Option<zx::Channel>,
588        // Can be injected for testing.
589        message_counter_for_test: Option<zx::Counter>,
590    ) -> Result<(), Errno> {
591        let (start_next_sender, start_next_receiver) = mpsc::unbounded();
592        self.start_next_sender.set(start_next_sender).map_err(|_| errno!(EEXIST))?;
593
594        let self_ref = self.clone();
595
596        // Ensure that all internal init has completed in `watch_new_hrtimer_loop`
597        // before proceeding from here.
598        let setup_done = zx::Event::create();
599        let setup_done_clone = duplicate_handle(&setup_done)?;
600
601        let closure = async move |locked_and_task: LockedAndTask<'_>| {
602            let current_thread = std::thread::current();
603            // Helps find the thread in backtraces, see wait_signaled_sync.
604            log_info!(
605                "hr_timer_manager thread: {:?} ({:?})",
606                current_thread.name(),
607                current_thread.id()
608            );
609            if let Err(e) = self_ref
610                .watch_new_hrtimer_loop(
611                    locked_and_task.current_task(),
612                    start_next_receiver,
613                    wake_channel_for_test,
614                    message_counter_for_test,
615                    Some(setup_done_clone),
616                )
617                .await
618            {
619                log_error!("while running watch_new_hrtimer_loop: {e:?}");
620            }
621            log_warn!("hr_timer_manager: finished kernel thread. should never happen in prod code");
622        };
623        let req = SpawnRequestBuilder::new()
624            .with_debug_name("hr-timer-manager")
625            .with_async_closure(closure)
626            .build();
627        system_task.kernel().kthreads.spawner().spawn_from_request(req);
628        log_info!("hr_timer_manager: waiting on setup done");
629        wait_signaled_sync(&setup_done)
630            .to_result()
631            .map_err(|status| from_status_like_fdio!(status))?;
632        log_info!("hr_timer_manager: setup done");
633
634        Ok(())
635    }
636
637    /// Notifies `timer` and wake sources about a triggered alarm.
638    ///
639    /// # Returns
640    /// - `Ok(true)` if the alarm was for the current generation and was processed.
641    /// - `Ok(false)` if the alarm was stale. Either the timer was restarted ( via `Cmd:Start`) with
642    ///    the same id before the previous generation's alarm fired, or the timer was stopped
643    ///    (via `Cmd::Stop`) or already processed.
644    fn notify_timer(
645        self: &HrTimerManagerHandle,
646        system_task: &CurrentTask,
647        triggered_node: &HrTimerNodeHandle,
648        lease: impl HandleBased,
649    ) -> Result<bool> {
650        let timer_id = triggered_node.hr_timer.get_id();
651        {
652            let guard = self.lock();
653            if let Some(active_state) = guard.pending_timers.get(&timer_id) {
654                // Wake source is deliberately not signaled here. When the triggered_node for a
655                // timer_id is not pointer-identical to the one stored in pending_timers, it is a
656                // trigger for some previous generation of timer_id, which has since been
657                // rescheduled.
658                if !Arc::ptr_eq(&active_state.node, triggered_node) {
659                    log_debug!("notify_timer: ignoring stale alarm for timer_id: {:?}", timer_id);
660                    return Ok(false);
661                }
662            } else {
663                log_debug!(
664                    "notify_timer: ignoring alarm for timer_id: {:?} (not in pending_timers)",
665                    timer_id
666                );
667                return Ok(false);
668            }
669        }
670
671        log_debug!("watch_new_hrtimer_loop: Cmd::Alarm: triggered alarm: {:?}", timer_id);
672        ftrace::duration!("alarms", "starnix:hrtimer:notify_timer", "timer_id" => timer_id);
673        self.lock().pending_timers.remove(&timer_id).map(|s| s.task.detach());
674        signal_event(
675            &triggered_node.hr_timer.event(),
676            zx::Signals::NONE,
677            zx::Signals::TIMER_SIGNALED,
678        )
679        .context("notify_timer: hrtimer signal handle")?;
680
681        // Handle wake source here.
682        let wake_source = triggered_node.wake_source.clone();
683        if let Some(wake_source) = wake_source.as_ref().and_then(|f| f.upgrade()) {
684            let lease_token = lease.into_handle();
685            // `.on_wake` must not block, else the timer management loop will stall.
686            // At the moment `.on_wake`s don't stall at all.
687            wake_source.on_wake(system_task, &lease_token);
688            // Drop the baton lease after wake leases in associated epfd
689            // are activated.
690            drop(lease_token);
691        }
692        ftrace::instant!(
693            "alarms",
694            "starnix:hrtimer:notify_timer:drop_lease",
695            ftrace::Scope::Process,
696            "timer_id" => timer_id
697        );
698        Ok(true)
699    }
700
701    // If no counter has been injected for tests, set provided `counter` to serve as that
702    // counter. Used to inject a fake counter in tests.
703    fn inject_or_set_message_counter(
704        self: &HrTimerManagerHandle,
705        message_counter: OwnedMessageCounterHandle,
706    ) {
707        let mut guard = self.lock();
708        if guard.message_counter.is_none() {
709            guard.message_counter = Some(message_counter);
710        }
711    }
712
713    fn record_inspect_on_stop(
714        self: &HrTimerManagerHandle,
715        guard: &mut MutexGuard<'_, HrTimerManagerState>,
716        prev_len: usize,
717    ) {
718        let after_len = guard.get_pending_timers_count();
719        let inspect_event_type = if after_len == prev_len {
720            None
721        } else if after_len == prev_len - 1 {
722            Some(InspectHrTimerEvent::Remove)
723        } else {
724            Some(InspectHrTimerEvent::retain_err(prev_len, after_len, "removing timer"))
725        };
726        if let Some(inspect_event_type) = inspect_event_type {
727            self.record_event(guard, inspect_event_type, None);
728        }
729    }
730
731    fn record_inspect_on_start(
732        self: &HrTimerManagerHandle,
733        guard: &mut MutexGuard<'_, HrTimerManagerState>,
734        timer_id: zx::Koid,
735        task: fasync::Task<()>,
736        deadline: TargetTime,
737        node: HrTimerNodeHandle,
738        prev_len: usize,
739    ) {
740        guard
741            .pending_timers
742            .insert(timer_id, TimerState { task, deadline, node })
743            .map(|timer_state| {
744                // This should not happen, at this point we already canceled
745                // any previous instances of the same wake alarm.
746                log_debug!(
747                    "watch_new_hrtimer_loop: removing timer task in Cmd::Start: {:?}",
748                    timer_state
749                );
750                timer_state
751            })
752            .map(|v| v.task.detach());
753
754        // Record the inspect event
755        let after_len = guard.get_pending_timers_count();
756        let inspect_event_type = if after_len == prev_len {
757            InspectHrTimerEvent::Update
758        } else if after_len == prev_len + 1 {
759            InspectHrTimerEvent::Add
760        } else {
761            InspectHrTimerEvent::retain_err(prev_len, after_len, "adding timer")
762        };
763        self.record_event(guard, inspect_event_type, Some(deadline));
764    }
765
766    /// Timer handler loop.
767    ///
768    /// # Args:
769    /// - `wake_channel_for_test`: a channel implementing `fuchsia.time.alarms/Wake`
770    ///   injected by tests only.
771    /// - `message_counter_for_test`: a zx::Counter injected only by tests, to
772    ///   emulate the wake proxy message counter.
773    /// - `setup_done`: signaled once the initial loop setup is complete. Allows
774    ///   pausing any async callers until this loop is in a runnable state.
775    async fn watch_new_hrtimer_loop(
776        self: &HrTimerManagerHandle,
777        system_task: &CurrentTask,
778        mut start_next_receiver: UnboundedReceiver<Cmd>,
779        mut wake_channel_for_test: Option<zx::Channel>,
780        message_counter_for_test: Option<zx::Counter>,
781        setup_done: Option<zx::Event>,
782    ) -> Result<()> {
783        // The values assigned to the counter are arbitrary, but unique for each assignment.
784        // This should give us hints as to where any deadlocks might occur if they do.
785        // We also take stack traces, but stack traces do not capture async stacks presently.
786        self.lock().debug_start_stage_counter = 1005;
787        ftrace::instant!("alarms", "watch_new_hrtimer_loop:init", ftrace::Scope::Process);
788        defer! {
789            log_warn!("watch_new_hrtimer_loop: exiting. This should only happen in tests.");
790        }
791
792        let (device_channel, message_counter) = {
793            defer! {
794                // Ensure that the setup_done event is signaled even if we fail to initialize
795                // correctly. This prevents the caller from blocking forever.
796                setup_done
797                    .as_ref()
798                    .map(|e| signal_event(e, zx::Signals::NONE, zx::Signals::EVENT_SIGNALED));
799            }
800            let wake_channel = wake_channel_for_test.take().unwrap_or_else(|| {
801                connect_to_wake_alarms_async().expect("connection to wake alarms async proxy")
802            });
803            self.lock().debug_start_stage_counter = 1004;
804
805            let counter_name = "wake-alarms";
806            let (device_channel, counter) = if let Some(message_counter) = message_counter_for_test
807            {
808                // For tests only.
809                (wake_channel, message_counter)
810            } else {
811                create_proxy_for_wake_events_counter_zero(wake_channel, counter_name.to_string())
812            };
813            self.lock().debug_start_stage_counter = 1003;
814            let message_counter = system_task
815                .kernel()
816                .suspend_resume_manager
817                .add_message_counter(counter_name, Some(counter));
818            self.inject_or_set_message_counter(message_counter.clone());
819            (device_channel, message_counter)
820        };
821
822        self.lock().debug_start_stage_counter = 1002;
823        let device_async_proxy =
824            fta::WakeAlarmsProxy::new(fidl::AsyncChannel::from_channel(device_channel));
825
826        // Contains suspend locks for interval (periodic) timers that expired, but have not been
827        // rescheduled yet. This allows us to defer container suspend until all such timers have
828        // been rescheduled.
829        // TODO: b/418813184 - Remove in favor of Fuchsia-specific interval timer support
830        // once it is available.
831        let mut interval_timers_pending_reschedule: HashMap<zx::Koid, SharedMessageCounter> =
832            HashMap::new();
833
834        // Per timer tasks.
835        let mut task_by_timer_id: HashMap<zx::Koid, fasync::Task<()>> = HashMap::new();
836
837        self.lock().debug_start_stage_counter = 1001;
838        ftrace::instant!("alarms", "watch_new_hrtimer_loop:init_done", ftrace::Scope::Process);
839        while let Some(cmd) = start_next_receiver.next().await {
840            {
841                let mut guard = self.lock();
842                guard.debug_start_stage_counter = 1002;
843                guard.last_loop_started_timestamp = zx::BootInstant::get();
844            }
845            ftrace::duration!("alarms", "start_next_receiver:loop");
846
847            log_debug!("watch_new_hrtimer_loop: got command: {cmd:?}");
848            self.lock().debug_start_stage_counter = 0;
849            match cmd {
850                // A new timer needs to be started.  The timer node for the timer
851                // is provided, and `done` must be signaled once the setup is
852                // complete.
853                Cmd::Start { new_timer_node, done, message_counter } => {
854                    self.lock().debug_start_stage_counter = 1;
855                    defer! {
856                        // Allow add_timer to proceed once command processing is done.
857                        let _ = signal_event(&done, zx::Signals::NONE, zx::Signals::EVENT_SIGNALED)
858                            .map_err(|err| to_errno_with_log(err));
859                    }
860
861                    let hr_timer = &new_timer_node.hr_timer;
862                    let timer_id = hr_timer.get_id();
863                    let wake_alarm_id = hr_timer.wake_alarm_id();
864                    let trace_id = hr_timer.trace_id();
865                    log_debug!(
866                        "watch_new_hrtimer_loop: Cmd::Start: timer_id: {:?}, wake_alarm_id: {}",
867                        timer_id,
868                        wake_alarm_id
869                    );
870                    ftrace::duration!("alarms", "starnix:hrtimer:start", "timer_id" => timer_id);
871                    ftrace::flow_begin!("alarms", "hrtimer_lifecycle", trace_id);
872
873                    self.lock().debug_start_stage_counter = 2;
874                    let maybe_cancel = self.lock().pending_timers.remove(&timer_id);
875                    log_long_op!(cancel_by_id(
876                        &message_counter,
877                        maybe_cancel,
878                        &timer_id,
879                        &device_async_proxy,
880                        &mut interval_timers_pending_reschedule,
881                        &mut task_by_timer_id,
882                        &wake_alarm_id,
883                    ));
884                    ftrace::instant!("alarms", "starnix:hrtimer:cancel_pre_start", ftrace::Scope::Process, "timer_id" => timer_id);
885
886                    // Signaled when the timer completed setup. We can not forward `done` because
887                    // we have post-schedule work as well.
888                    let setup_event = zx::Event::create();
889                    let deadline = new_timer_node.deadline;
890
891                    ftrace::duration!("alarms", "starnix:hrtimer:signaled", "timer_id" => timer_id);
892
893                    self.lock().debug_start_stage_counter = 3;
894                    // Make a request here. Move it into the closure after. Current FIDL semantics
895                    // ensure that even though we do not `.await` on this future, a request to
896                    // schedule a wake alarm based on this timer will be sent.
897                    let request_fut = match deadline {
898                        TargetTime::Monotonic(_) => {
899                            // If we hit this, it's a Starnix bug.
900                            panic!("can not schedule wake alarm on monotonic timeline")
901                        }
902                        TargetTime::BootInstant(boot_instant) => device_async_proxy.set_and_wait(
903                            boot_instant,
904                            fta::SetMode::NotifySetupDone(duplicate_handle(&setup_event)?),
905                            &wake_alarm_id,
906                        ),
907                        TargetTime::RealTime(utc_instant) => device_async_proxy.set_and_wait_utc(
908                            &fta::InstantUtc { timestamp_utc: utc_instant.into_nanos() },
909                            fta::SetMode::NotifySetupDone(duplicate_handle(&setup_event)?),
910                            &wake_alarm_id,
911                        ),
912                    };
913                    let mut done_sender = self.get_sender();
914                    let prev_len = self.lock().get_pending_timers_count();
915
916                    self.lock().debug_start_stage_counter = 4;
917                    let self_clone = self.clone();
918                    let new_timer_node_clone = new_timer_node.clone();
919                    let task = fasync::Task::local(async move {
920                        log_debug!(
921                            "wake_alarm_future: set_and_wait will block here: {wake_alarm_id:?}"
922                        );
923                        ftrace::instant!("alarms", "starnix:hrtimer:wait", ftrace::Scope::Process, "timer_id" => timer_id);
924                        ftrace::flow_step!("alarms", "hrtimer_lifecycle", trace_id);
925
926                        // Wait for this timer to expire. This wait can be arbitrarily long.
927                        let response = request_fut.await;
928
929                        // The counter was already incremented by the wake proxy when the alarm fired.
930                        let message_counter = self_clone.lock().share_message_counter(false);
931                        ftrace::instant!("alarms", "starnix:hrtimer:wake", ftrace::Scope::Process, "timer_id" => timer_id);
932
933                        log_debug!("wake_alarm_future: set_and_wait over: {:?}", response);
934                        match response {
935                            // Alarm.  This must be processed in the main loop because notification
936                            // requires access to &CurrentTask, which is not available here. So we
937                            // only forward it.
938                            Ok(Ok(lease)) => {
939                                log_long_op!(done_sender.send(Cmd::Alarm {
940                                    new_timer_node: new_timer_node_clone,
941                                    lease,
942                                    message_counter
943                                }))
944                                .expect("infallible");
945                            }
946                            Ok(Err(error)) => {
947                                ftrace::duration!("alarms", "starnix:hrtimer:wake_error", "timer_id" => timer_id);
948                                log_debug!(
949                                    "wake_alarm_future: protocol error: {error:?}: timer_id: {timer_id:?}"
950                                );
951                                let mut guard = self_clone.lock();
952                                let pending = &mut guard.pending_timers;
953                                process_alarm_protocol_error(pending, &timer_id, error);
954                            }
955                            Err(error) => {
956                                ftrace::duration!("alarms", "starnix:hrtimer:fidl_error", "timer_id" => timer_id);
957                                log_debug!(
958                                    "wake_alarm_future: FIDL error: {error:?}: timer_id: {timer_id:?}"
959                                );
960                                self_clone.lock().pending_timers.remove(&timer_id);
961                            }
962                        }
963                        log_debug!("wake_alarm_future: closure done for timer_id: {timer_id:?}");
964                    });
965                    self.lock().debug_start_stage_counter = 5;
966                    ftrace::instant!("alarms", "starnix:hrtimer:pre_setup_event_signal", ftrace::Scope::Process, "timer_id" => timer_id);
967
968                    // wait_signaled here should be almost instantaneous.  Blocking for a long time
969                    // here is a bug.
970                    match log_long_op!(wait_signaled(&setup_event)) {
971                        Ok(_) => {
972                            ftrace::instant!("alarms", "starnix:hrtimer:setup_event_signaled", ftrace::Scope::Process, "timer_id" => timer_id);
973                            let mut guard = self.lock();
974                            guard.debug_start_stage_counter = 6;
975                            self.record_inspect_on_start(
976                                &mut guard,
977                                timer_id,
978                                task,
979                                deadline,
980                                new_timer_node,
981                                prev_len,
982                            );
983                            log_debug!("Cmd::Start scheduled: timer_id: {:?}", timer_id);
984                        }
985                        Err(err) => {
986                            ftrace::instant!("alarms", "starnix:hrtimer:setup_event_error", ftrace::Scope::Process, "timer_id" => timer_id);
987                            to_errno_with_log(err);
988                        }
989                    }
990                    self.lock().debug_start_stage_counter = 999;
991                }
992                Cmd::Alarm { new_timer_node, lease, message_counter } => {
993                    self.lock().debug_start_stage_counter = 10;
994                    let timer = &new_timer_node.hr_timer;
995                    let timer_id = timer.get_id();
996                    ftrace::duration!("alarms", "starnix:hrtimer:alarm", "timer_id" => timer_id);
997                    ftrace::flow_step!("alarms", "hrtimer_lifecycle", timer.trace_id());
998                    match self.notify_timer(system_task, &new_timer_node, lease) {
999                        Ok(true) => {
1000                            // Alarm was for current generation, success.
1001                            // Interval timers currently need special handling: we must not suspend
1002                            // the container until the interval timer in question gets re-scheduled.
1003                            // To ensure that we stay awake, we store the suspend lock for a while.
1004                            // This prevents container suspend.
1005                            if *timer.is_interval.lock() {
1006                                interval_timers_pending_reschedule
1007                                    .insert(timer_id, message_counter);
1008                            }
1009                        }
1010                        Ok(false) => {
1011                            // Alarm was stale, ignored.
1012                        }
1013                        Err(e) => {
1014                            log_error!("watch_new_hrtimer_loop: notify_timer failed: {e:?}");
1015                        }
1016                    }
1017                    // Interval timers usually reschedule themselves. But if an interval timer
1018                    // is stopped (via Cmd::Stop) or is replaced (via Cmd::Start for the same timer
1019                    // ID) before it has a chance to reschedule, the reschedule lock will get
1020                    // dropped then.
1021                    log_debug!("Cmd::Alarm done: timer_id: {timer_id:?}");
1022                    self.lock().debug_start_stage_counter = 19;
1023                }
1024                Cmd::Stop { timer, done, message_counter } => {
1025                    self.lock().debug_start_stage_counter = 20;
1026                    defer! {
1027                        let _ = signal_event(&done, zx::Signals::NONE, zx::Signals::EVENT_SIGNALED)
1028                            .map_err(|err| to_errno_with_log(err));
1029                    }
1030                    let timer_id = timer.get_id();
1031                    log_debug!("watch_new_hrtimer_loop: Cmd::Stop: timer_id: {:?}", timer_id);
1032                    ftrace::duration!("alarms", "starnix:hrtimer:stop", "timer_id" => timer_id);
1033                    ftrace::flow_step!("alarms", "hrtimer_lifecycle", timer.trace_id());
1034
1035                    let (maybe_cancel, prev_len) = {
1036                        let mut guard = self.lock();
1037                        let prev_len = guard.get_pending_timers_count();
1038                        (guard.pending_timers.remove(&timer_id), prev_len)
1039                    };
1040
1041                    let wake_alarm_id = timer.wake_alarm_id();
1042                    log_long_op!(cancel_by_id(
1043                        &message_counter,
1044                        maybe_cancel,
1045                        &timer_id,
1046                        &device_async_proxy,
1047                        &mut interval_timers_pending_reschedule,
1048                        &mut task_by_timer_id,
1049                        &wake_alarm_id,
1050                    ));
1051                    ftrace::instant!("alarms", "starnix:hrtimer:cancel_at_stop", ftrace::Scope::Process, "timer_id" => timer_id);
1052
1053                    {
1054                        let mut guard = self.lock();
1055                        self.record_inspect_on_stop(&mut guard, prev_len);
1056                    }
1057                    log_debug!("Cmd::Stop done: {timer_id:?}");
1058                    self.lock().debug_start_stage_counter = 29;
1059                }
1060                Cmd::MonitorUtc { timer, counter, recv } => {
1061                    self.lock().debug_start_stage_counter = 30;
1062                    ftrace::duration!("alarms", "starnix:hrtimer:monitor_utc", "timer_id" => timer.get_id());
1063                    ftrace::flow_step!("alarms", "hrtimer_lifecycle", timer.trace_id());
1064                    let monitor_task = fasync::Task::local(async move {
1065                        run_utc_timeline_monitor(counter, recv).await;
1066                    });
1067                    task_by_timer_id.insert(timer.get_id(), monitor_task);
1068                    self.lock().debug_start_stage_counter = 39;
1069                }
1070            }
1071            let mut guard = self.lock();
1072            guard.debug_start_stage_counter = 90;
1073
1074            log_debug!(
1075                "watch_new_hrtimer_loop: pending timers count: {}",
1076                guard.pending_timers.len()
1077            );
1078            log_debug!("watch_new_hrtimer_loop: pending timers:       {:?}", guard.pending_timers);
1079            log_debug!(
1080                "watch_new_hrtimer_loop: message counter:      {:?}",
1081                message_counter.to_string(),
1082            );
1083            log_debug!(
1084                "watch_new_hrtimer_loop: interval timers:      {:?}",
1085                interval_timers_pending_reschedule.len(),
1086            );
1087
1088            guard.last_loop_completed_timestamp = zx::BootInstant::get();
1089            guard.debug_start_stage_counter = 99;
1090        } // while
1091
1092        Ok(())
1093    }
1094
1095    fn lock(&self) -> MutexGuard<'_, HrTimerManagerState> {
1096        self.state.lock()
1097    }
1098
1099    fn record_event(
1100        self: &HrTimerManagerHandle,
1101        guard: &mut MutexGuard<'_, HrTimerManagerState>,
1102        event_type: InspectHrTimerEvent,
1103        deadline: Option<TargetTime>,
1104    ) {
1105        if guard.events.len() >= INSPECT_EVENT_BUFFER_SIZE {
1106            guard.events.pop_front();
1107        }
1108        guard.events.push_back(InspectEvent::new(event_type, deadline));
1109    }
1110
1111    /// Add a new timer.
1112    ///
1113    /// A wake alarm is scheduled for the timer.
1114    pub fn add_timer(
1115        self: &HrTimerManagerHandle,
1116        wake_source: Option<Weak<dyn OnWakeOps>>,
1117        new_timer: &HrTimerHandle,
1118        deadline: TargetTime,
1119    ) -> Result<(), Errno> {
1120        log_debug!("add_timer: entry: {new_timer:?}, deadline: {deadline:?}");
1121        ftrace::duration!("alarms", "starnix:add_timer", "deadline" => deadline.estimate_boot().unwrap().into_nanos());
1122        ftrace::flow_step!("alarms", "hrtimer_lifecycle", new_timer.trace_id());
1123
1124        // Keep system awake until timer is scheduled.
1125        let message_counter_until_timer_scheduled = self.lock().share_message_counter(true);
1126
1127        let sender = self.get_sender();
1128        let new_timer_node = HrTimerNode::new(deadline, wake_source, new_timer.clone());
1129        let wake_alarm_scheduled = zx::Event::create();
1130        let wake_alarm_scheduled_clone = duplicate_handle(&wake_alarm_scheduled)?;
1131        let timer_id = new_timer.get_id();
1132        sender
1133            .unbounded_send(Cmd::Start {
1134                new_timer_node,
1135                message_counter: message_counter_until_timer_scheduled,
1136                done: wake_alarm_scheduled_clone,
1137            })
1138            .map_err(|_| errno!(EINVAL, "add_timer: could not send Cmd::Start"))?;
1139
1140        // Block until the wake alarm for this timer is scheduled.
1141        wait_signaled_sync(&wake_alarm_scheduled)
1142            .map_err(|_| errno!(EINVAL, "add_timer: wait_signaled_sync failed"))?;
1143
1144        log_debug!("add_timer: exit : timer_id: {timer_id:?}");
1145        Ok(())
1146    }
1147
1148    /// Remove a timer.
1149    ///
1150    /// The timer is removed if scheduled, nothing is changed if it is not.
1151    pub fn remove_timer(self: &HrTimerManagerHandle, timer: &HrTimerHandle) -> Result<(), Errno> {
1152        log_debug!("remove_timer: entry:  {timer:?}");
1153        ftrace::duration!("alarms", "starnix:remove_timer");
1154        // Keep system awake until timer is removed.
1155        let message_counter_until_removed = self.lock().share_message_counter(true);
1156
1157        let sender = self.get_sender();
1158        let done = zx::Event::create();
1159        let done_clone = duplicate_handle(&done)?;
1160        let timer_id = timer.get_id();
1161        sender
1162            .unbounded_send(Cmd::Stop {
1163                timer: timer.clone(),
1164                message_counter: message_counter_until_removed,
1165                done: done_clone,
1166            })
1167            .map_err(|_| errno!(EINVAL, "remove_timer: could not send Cmd::Stop"))?;
1168
1169        // Block until the alarm for this timer is scheduled.
1170        wait_signaled_sync(&done)
1171            .map_err(|_| errno!(EINVAL, "add_timer: wait_signaled_sync failed"))?;
1172        log_debug!("remove_timer: exit:  {timer_id:?}");
1173        Ok(())
1174    }
1175}
1176
1177#[derive(Debug)]
1178pub struct HrTimer {
1179    event: zx::Event,
1180
1181    /// True iff the timer is currently set to trigger at an interval.
1182    ///
1183    /// This is used to determine at which point the hrtimer event (not
1184    /// `HrTimer::event` but the one that is shared with the actual driver)
1185    /// should be cleared.
1186    ///
1187    /// If this is true, the timer manager will wait to clear the timer event
1188    /// until the next timer request has been sent to the driver. This prevents
1189    /// lost wake ups where the container happens to suspend between two instances
1190    /// of an interval timer triggering.
1191    pub is_interval: Mutex<bool>,
1192}
1193pub type HrTimerHandle = Arc<HrTimer>;
1194
1195impl Drop for HrTimer {
1196    fn drop(&mut self) {
1197        let wake_alarm_id = self.wake_alarm_id();
1198        ftrace::duration!("alarms", "hrtimer::drop", "timer_id" => self.get_id(), "wake_alarm_id" => &wake_alarm_id[..]);
1199        ftrace::flow_end!("alarms", "hrtimer_lifecycle", self.trace_id());
1200    }
1201}
1202
1203impl HrTimer {
1204    pub fn new() -> HrTimerHandle {
1205        let ret = Arc::new(Self { event: zx::Event::create(), is_interval: Mutex::new(false) });
1206        let wake_alarm_id = ret.wake_alarm_id();
1207        ftrace::duration!("alarms", "hrtimer::new", "timer_id" => ret.get_id(), "wake_alarm_id" => &wake_alarm_id[..]);
1208        ftrace::flow_begin!("alarms", "hrtimer_lifecycle", ret.trace_id(), "wake_alarm_id" => &wake_alarm_id[..]);
1209        ret
1210    }
1211
1212    pub fn event(&self) -> zx::Event {
1213        self.event
1214            .duplicate_handle(zx::Rights::SAME_RIGHTS)
1215            .expect("Duplicate hrtimer event handle")
1216    }
1217
1218    /// Returns the unique identifier of this [HrTimer].
1219    ///
1220    /// All holders of the same [HrTimerHandle] will see the same value here.
1221    pub fn get_id(&self) -> zx::Koid {
1222        self.event.koid().expect("infallible")
1223    }
1224
1225    /// Returns the unique alarm ID for this [HrTimer].
1226    ///
1227    /// The naming pattern is: `starnix:Koid(NNNNN):iB`, where `NNNNN` is a koid
1228    /// and B is `1` if the timer is an interval timer, or `0` otherwise.
1229    fn wake_alarm_id(&self) -> String {
1230        let i = if *self.is_interval.lock() { "i1" } else { "i0" };
1231        let koid = self.get_id();
1232        format!("starnix:{koid:?}:{i}")
1233    }
1234
1235    fn trace_id(&self) -> ftrace::Id {
1236        self.get_id().raw_koid().into()
1237    }
1238}
1239
1240impl TimerOps for HrTimerHandle {
1241    fn start(
1242        &self,
1243        current_task: &CurrentTask,
1244        source: Option<Weak<dyn OnWakeOps>>,
1245        deadline: TargetTime,
1246    ) -> Result<(), Errno> {
1247        // Before (re)starting the timer, ensure the signal is cleared.
1248        signal_event(&self.event, zx::Signals::TIMER_SIGNALED, zx::Signals::NONE)
1249            .map_err(|status| from_status_like_fdio!(status))?;
1250        current_task.kernel().hrtimer_manager.add_timer(
1251            source,
1252            self,
1253            deadline.into_resolved_utc_deadline(),
1254        )?;
1255        Ok(())
1256    }
1257
1258    fn stop(&self, kernel: &Arc<Kernel>) -> Result<(), Errno> {
1259        // Clear the signal when removing the hrtimer.
1260        signal_event(&self.event, zx::Signals::TIMER_SIGNALED, zx::Signals::NONE)
1261            .map_err(|status| from_status_like_fdio!(status))?;
1262        Ok(kernel.hrtimer_manager.remove_timer(self)?)
1263    }
1264
1265    fn as_handle_ref(&self) -> HandleRef<'_> {
1266        self.event.as_handle_ref()
1267    }
1268
1269    fn get_timeline_change_observer(
1270        &self,
1271        current_task: &CurrentTask,
1272    ) -> Option<TimelineChangeObserver> {
1273        // Should this return errno instead?
1274        current_task
1275            .kernel()
1276            .hrtimer_manager
1277            .get_timeline_change_observer(self)
1278            .inspect_err(|err| {
1279                log_error!("hr_timer_manager: could not create timeline change counter: {err:?}")
1280            })
1281            .ok()
1282    }
1283}
1284
1285/// Represents a node of `HrTimer`.
1286#[derive(Clone, Debug)]
1287struct HrTimerNode {
1288    /// The deadline of the associated `HrTimer`.
1289    deadline: TargetTime,
1290
1291    /// The source where initiated this `HrTimer`.
1292    ///
1293    /// When the timer expires, the system will be woken up if necessary. The `on_wake` callback
1294    /// will be triggered with a baton lease to prevent further suspend while Starnix handling the
1295    /// wake event.
1296    wake_source: Option<Weak<dyn OnWakeOps>>,
1297
1298    /// The underlying HrTimer.
1299    hr_timer: HrTimerHandle,
1300}
1301type HrTimerNodeHandle = Arc<HrTimerNode>;
1302
1303impl HrTimerNode {
1304    fn new(
1305        deadline: TargetTime,
1306        wake_source: Option<Weak<dyn OnWakeOps>>,
1307        hr_timer: HrTimerHandle,
1308    ) -> HrTimerNodeHandle {
1309        Arc::new(Self { deadline, wake_source, hr_timer })
1310    }
1311}
1312
1313#[cfg(test)]
1314mod tests {
1315    use super::*;
1316    use crate::testing::spawn_kernel_and_run;
1317    use crate::time::HrTimer;
1318    use fake_wake_alarms::{MAGIC_EXPIRE_DEADLINE, Response, serve_fake_wake_alarms};
1319    use fidl_fuchsia_time_alarms as fta;
1320    use fuchsia_async as fasync;
1321    use fuchsia_runtime::{UtcClockUpdate, UtcInstant};
1322    use std::sync::LazyLock;
1323    use std::thread;
1324
1325    static CLOCK_OPTS: LazyLock<zx::ClockOpts> = LazyLock::new(zx::ClockOpts::empty);
1326    const BACKSTOP_TIME: UtcInstant = UtcInstant::from_nanos(/*arbitrary*/ 222222);
1327
1328    fn create_utc_clock_for_test() -> UtcClock {
1329        let clock = UtcClock::create(*CLOCK_OPTS, Some(BACKSTOP_TIME)).unwrap();
1330        clock.update(UtcClockUpdate::builder().approximate_value(BACKSTOP_TIME)).unwrap();
1331        clock
1332    }
1333
1334    impl HrTimerManagerState {
1335        fn new_for_test() -> Self {
1336            Self {
1337                events: VecDeque::with_capacity(INSPECT_EVENT_BUFFER_SIZE),
1338                pending_timers: Default::default(),
1339                message_counter: None,
1340                last_loop_started_timestamp: zx::BootInstant::INFINITE_PAST,
1341                last_loop_completed_timestamp: zx::BootInstant::INFINITE_PAST,
1342                debug_start_stage_counter: 0,
1343            }
1344        }
1345    }
1346
1347    // Injected for testing.
1348    fn connect_factory(message_counter: zx::Counter, response_type: Response) -> zx::Channel {
1349        let (client, server) = zx::Channel::create();
1350
1351        // A separate thread is needed to allow independent execution of the server.
1352        let _detached = thread::spawn(move || {
1353            fasync::LocalExecutor::default().run_singlethreaded(async move {
1354                let stream =
1355                    fidl::endpoints::ServerEnd::<fta::WakeAlarmsMarker>::new(server).into_stream();
1356                serve_fake_wake_alarms(message_counter, response_type, stream, /*once*/ false)
1357                    .await;
1358            });
1359        });
1360        client
1361    }
1362
1363    // Initializes HrTimerManager for tests.
1364    //
1365    // # Returns
1366    //
1367    // A tuple of:
1368    // - `HrTimerManagerHandle` the unit under test
1369    // - `zx::Counter` a message counter to use in tests to observe suspend state
1370    fn init_hr_timer_manager(
1371        current_task: &CurrentTask,
1372        response_type: Response,
1373    ) -> (HrTimerManagerHandle, zx::Counter) {
1374        let manager = Arc::new(HrTimerManager {
1375            state: Mutex::new(HrTimerManagerState::new_for_test()),
1376            start_next_sender: Default::default(),
1377        });
1378        let counter = zx::Counter::create();
1379        let counter_clone = duplicate_handle(&counter).unwrap();
1380        let wake_channel = connect_factory(counter_clone, response_type);
1381        let counter_clone = duplicate_handle(&counter).unwrap();
1382        manager
1383            .init_internal(&current_task, Some(wake_channel), Some(counter_clone))
1384            .expect("infallible");
1385        (manager, counter)
1386    }
1387
1388    #[fuchsia::test]
1389    async fn test_triggering() {
1390        spawn_kernel_and_run(async |_, current_task| {
1391            let (manager, counter) = init_hr_timer_manager(current_task, Response::Immediate);
1392
1393            let timer1 = HrTimer::new();
1394            let timer2 = HrTimer::new();
1395            let timer3 = HrTimer::new();
1396
1397            manager.add_timer(None, &timer1, zx::BootInstant::from_nanos(1).into()).unwrap();
1398            manager.add_timer(None, &timer2, zx::BootInstant::from_nanos(2).into()).unwrap();
1399            manager.add_timer(None, &timer3, zx::BootInstant::from_nanos(3).into()).unwrap();
1400
1401            wait_signaled_sync(&timer1.event()).to_result().unwrap();
1402            wait_signaled_sync(&timer2.event()).to_result().unwrap();
1403            wait_signaled_sync(&timer3.event()).to_result().unwrap();
1404
1405            assert_eq!(
1406                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1407                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1408            );
1409        })
1410        .await;
1411    }
1412
1413    #[fuchsia::test]
1414    async fn test_triggering_utc() {
1415        spawn_kernel_and_run(async |_, current_task| {
1416            let (manager, counter) = init_hr_timer_manager(current_task, Response::Immediate);
1417
1418            let timer1 = HrTimer::new();
1419            let timer2 = HrTimer::new();
1420            let timer3 = HrTimer::new();
1421
1422            // All these are normally already expired as scheduled.
1423            manager.add_timer(None, &timer1, UtcInstant::from_nanos(1).into()).unwrap();
1424            manager.add_timer(None, &timer2, UtcInstant::from_nanos(2).into()).unwrap();
1425            manager.add_timer(None, &timer3, UtcInstant::from_nanos(3).into()).unwrap();
1426
1427            wait_signaled_sync(&timer1.event()).to_result().unwrap();
1428            wait_signaled_sync(&timer2.event()).to_result().unwrap();
1429            wait_signaled_sync(&timer3.event()).to_result().unwrap();
1430
1431            assert_eq!(
1432                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1433                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1434            );
1435        })
1436        .await;
1437    }
1438
1439    #[fuchsia::test]
1440    async fn test_delayed_response() {
1441        spawn_kernel_and_run(async |_, current_task| {
1442            let (manager, counter) = init_hr_timer_manager(current_task, Response::Immediate);
1443
1444            let timer = HrTimer::new();
1445
1446            manager.add_timer(None, &timer, zx::BootInstant::from_nanos(1).into()).unwrap();
1447
1448            assert_eq!(
1449                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1450                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1451            );
1452        })
1453        .await;
1454    }
1455
1456    #[fuchsia::test]
1457    async fn test_protocol_error_response() {
1458        spawn_kernel_and_run(async |_, current_task| {
1459            let (manager, counter) = init_hr_timer_manager(current_task, Response::Error);
1460
1461            let timer = HrTimer::new();
1462            manager.add_timer(None, &timer, zx::BootInstant::from_nanos(1).into()).unwrap();
1463            assert_eq!(
1464                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1465                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1466            );
1467        })
1468        .await;
1469    }
1470
1471    #[fuchsia::test]
1472    async fn reschedule_same_timer() {
1473        spawn_kernel_and_run(async |_, current_task| {
1474            let (manager, counter) = init_hr_timer_manager(current_task, Response::Delayed);
1475
1476            let timer = HrTimer::new();
1477
1478            manager.add_timer(None, &timer, zx::BootInstant::from_nanos(1).into()).unwrap();
1479            manager.add_timer(None, &timer, zx::BootInstant::from_nanos(2).into()).unwrap();
1480
1481            // Force alarm expiry.
1482            manager
1483                .add_timer(None, &timer, zx::BootInstant::from_nanos(MAGIC_EXPIRE_DEADLINE).into())
1484                .unwrap();
1485            wait_signaled_sync(&timer.event()).to_result().unwrap();
1486
1487            assert_eq!(
1488                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1489                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1490            );
1491        })
1492        .await;
1493    }
1494
1495    #[fuchsia::test]
1496    async fn rescheduling_interval_timers_forbids_suspend() {
1497        spawn_kernel_and_run(async |_, current_task| {
1498            let (hrtimer_manager, counter) = init_hr_timer_manager(current_task, Response::Delayed);
1499
1500            // Schedule an interval timer and let it expire.
1501            let timer1 = HrTimer::new();
1502            *timer1.is_interval.lock() = true;
1503            hrtimer_manager
1504                .add_timer(None, &timer1, zx::BootInstant::from_nanos(MAGIC_EXPIRE_DEADLINE).into())
1505                .unwrap();
1506            wait_signaled_sync(&timer1.event()).to_result().unwrap();
1507
1508            // Schedule a regular timer and let it expire.
1509            let timer2 = HrTimer::new();
1510            hrtimer_manager
1511                .add_timer(None, &timer2, zx::BootInstant::from_nanos(MAGIC_EXPIRE_DEADLINE).into())
1512                .unwrap();
1513            wait_signaled_sync(&timer2.event()).to_result().unwrap();
1514
1515            // When we have an expired but not rescheduled interval timer (`timer1`), and we have
1516            // an intervening timer that gets scheduled and expires (`timer2`) before `timer1` is
1517            // rescheduled, then suspend should be disallowed (counter > 0) to allow `timer1` to
1518            // be scheduled eventually.
1519            assert_eq!(
1520                counter.wait_one(zx::Signals::COUNTER_POSITIVE, zx::MonotonicInstant::INFINITE),
1521                zx::WaitResult::Ok(zx::Signals::COUNTER_POSITIVE)
1522            );
1523        })
1524        .await;
1525    }
1526
1527    #[fuchsia::test]
1528    async fn canceling_interval_timer_allows_suspend() {
1529        spawn_kernel_and_run(async |_, current_task| {
1530            let (hrtimer_manager, counter) = init_hr_timer_manager(current_task, Response::Delayed);
1531
1532            let timer1 = HrTimer::new();
1533            *timer1.is_interval.lock() = true;
1534            hrtimer_manager
1535                .add_timer(None, &timer1, zx::BootInstant::from_nanos(MAGIC_EXPIRE_DEADLINE).into())
1536                .unwrap();
1537            wait_signaled_sync(&timer1.event()).to_result().unwrap();
1538
1539            // When an interval timer expires, we should not be allowed to suspend.
1540            assert_eq!(
1541                counter.wait_one(zx::Signals::COUNTER_POSITIVE, zx::MonotonicInstant::INFINITE),
1542                zx::WaitResult::Ok(zx::Signals::COUNTER_POSITIVE)
1543            );
1544
1545            // Schedule the same timer again. This time around we do not wait for it to expire,
1546            // but cancel the timer instead.
1547            const DURATION_100S: zx::BootDuration = zx::BootDuration::from_seconds(100);
1548            let deadline2: zx::BootInstant = zx::BootInstant::after(DURATION_100S.into());
1549            hrtimer_manager.add_timer(None, &timer1, deadline2.into()).unwrap();
1550
1551            hrtimer_manager.remove_timer(&timer1).unwrap();
1552
1553            // When we cancel an interval timer, we should be allowed to suspend.
1554            assert_eq!(
1555                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1556                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1557            );
1558        })
1559        .await;
1560    }
1561
1562    #[fuchsia::test]
1563    async fn canceling_interval_timer_allows_suspend_with_flake() {
1564        spawn_kernel_and_run(async |_, current_task| {
1565            let (hrtimer_manager, counter) = init_hr_timer_manager(current_task, Response::Delayed);
1566
1567            let timer1 = HrTimer::new();
1568            *timer1.is_interval.lock() = true;
1569            hrtimer_manager
1570                .add_timer(None, &timer1, zx::BootInstant::from_nanos(MAGIC_EXPIRE_DEADLINE).into())
1571                .unwrap();
1572            wait_signaled_sync(&timer1.event()).to_result().unwrap();
1573
1574            assert_eq!(
1575                counter.wait_one(zx::Signals::COUNTER_POSITIVE, zx::MonotonicInstant::INFINITE),
1576                zx::WaitResult::Ok(zx::Signals::COUNTER_POSITIVE)
1577            );
1578            const DURATION_100S: zx::BootDuration = zx::BootDuration::from_seconds(100);
1579            let deadline2: zx::BootInstant = zx::BootInstant::after(DURATION_100S.into());
1580            hrtimer_manager.add_timer(None, &timer1, deadline2.into()).unwrap();
1581            // No pause between start and stop has led to flakes before.
1582            hrtimer_manager.remove_timer(&timer1).unwrap();
1583
1584            assert_eq!(
1585                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1586                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1587            );
1588        })
1589        .await;
1590    }
1591
1592    #[fuchsia::test]
1593    async fn utc_timeline_monitor_exits_on_interest_drop() {
1594        let counter = zx::Counter::create();
1595        let utc_clock = create_utc_clock_for_test();
1596        let (tx, rx) = mpsc::unbounded();
1597
1598        drop(tx);
1599        // Expect that this call returns, when tx no longer exists. Incorrect
1600        // handling of tx closure could cause it to hang otherwise.
1601        run_utc_timeline_monitor_internal(counter, rx, utc_clock).await;
1602    }
1603}