Skip to main content

starnix_core/time/
hr_timer_manager.rs

1// Copyright 2024 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::power::{
6    OnWakeOps, OwnedMessageCounterHandle, SharedMessageCounter,
7    create_proxy_for_wake_events_counter_zero,
8};
9use crate::task::dynamic_thread_spawner::SpawnRequestBuilder;
10use crate::task::{CurrentTask, Kernel, LockedAndTask};
11use crate::time::TargetTime;
12use crate::vfs::timer::{TimelineChangeObserver, TimerOps};
13use anyhow::{Context, Result};
14use fidl_fuchsia_time_alarms as fta;
15use fuchsia_async as fasync;
16use fuchsia_inspect::ArrayProperty;
17use fuchsia_runtime::UtcClock;
18use fuchsia_trace as ftrace;
19use futures::channel::mpsc::{self, UnboundedReceiver, UnboundedSender};
20use futures::{FutureExt, SinkExt, StreamExt, select};
21use scopeguard::defer;
22use starnix_logging::{log_debug, log_error, log_info, log_warn};
23use starnix_sync::{Mutex, MutexGuard};
24use starnix_uapi::errors::Errno;
25use starnix_uapi::{errno, from_status_like_fdio};
26use std::collections::{HashMap, VecDeque};
27use std::sync::{Arc, OnceLock, Weak};
28use zx::{HandleBased, HandleRef};
29
30/// Max value for inspect event history.
31const INSPECT_EVENT_BUFFER_SIZE: usize = 128;
32
33fn to_errno_with_log<T: std::fmt::Debug>(v: T) -> Errno {
34    log_error!("hr_timer_manager internal error: {v:?}");
35    from_status_like_fdio!(zx::Status::IO)
36}
37
38fn signal_event(
39    event: &zx::Event,
40    clear_mask: zx::Signals,
41    set_mask: zx::Signals,
42) -> Result<(), zx::Status> {
43    event
44        .signal(clear_mask, set_mask)
45        .inspect_err(|err| log_error!(err:?, clear_mask:?, set_mask:?; "while signaling event"))
46}
47
48fn duplicate_handle<H: HandleBased>(h: &H) -> Result<H, Errno> {
49    h.duplicate_handle(zx::Rights::SAME_RIGHTS).map_err(|status| from_status_like_fdio!(status))
50}
51
52const TIMEOUT_SECONDS: i64 = 40;
53//
54/// Waits forever synchronously for EVENT_SIGNALED.
55///
56/// For us there is no useful scenario where this wait times out and we can continue operating.
57fn wait_signaled_sync(event: &zx::Event) -> zx::WaitResult {
58    let mut logged = false;
59    loop {
60        let timeout =
61            zx::MonotonicInstant::after(zx::MonotonicDuration::from_seconds(TIMEOUT_SECONDS));
62        let result = event.wait_one(zx::Signals::EVENT_SIGNALED, timeout);
63        if let zx::WaitResult::Ok(_) = result {
64            if logged {
65                log_error!("wait_signaled_sync: signal resolved: result={result:?}",);
66            }
67            return result;
68        }
69        fuchsia_trace::instant!(
70            "alarms",
71            "starnix:hrtimer:wait_timeout",
72            fuchsia_trace::Scope::Process
73        );
74        // This is bad and should never happen. If it does, it's a bug that has to be found and
75        // fixed. There is no good way to proceed if these signals are not being signaled properly.
76        log_error!(
77            // Check logs for a `kBadState` status reported from the hrtimer driver.
78            // LINT.IfChange(hrtimer_wait_signaled_sync_tefmo)
79            "wait_signaled_sync: not signaled yet. Report to `componentid:1408151`: result={result:?}",
80            // LINT.ThenChange(//tools/testing/tefmocheck/string_in_log_check.go:hrtimer_wait_signaled_sync_tefmo)
81        );
82        if !logged {
83            #[cfg(all(target_os = "fuchsia", not(doc)))]
84            ::debug::backtrace_request_all_threads();
85            logged = true;
86        }
87    }
88}
89
90/// A macro that waits on a future, but if the future takes longer than
91/// `TIMEOUT_SECONDS`, we log a warning and a stack trace.
92macro_rules! log_long_op {
93    ($fut:expr) => {{
94        use futures::FutureExt;
95        let fut = $fut;
96        futures::pin_mut!(fut);
97        let mut logged = false;
98        loop {
99            let timeout = fasync::Timer::new(zx::MonotonicDuration::from_seconds(TIMEOUT_SECONDS));
100            futures::select! {
101                res = fut.as_mut().fuse() => {
102                    if logged {
103                        log_warn!("unexpected blocking is now resolved: long-running async operation at {}:{}.",
104                            file!(), line!());
105                    }
106                    break res;
107                }
108                _ = timeout.fuse() => {
109                    // Check logs for a `kBadState` status reported from the hrtimer driver.
110                    log_warn!("unexpected blocking: long-running async op at {}:{}. Report to `componentId:1408151`",
111                        file!(), line!());
112                    if !logged {
113                        #[cfg(all(target_os = "fuchsia", not(doc)))]
114                        ::debug::backtrace_request_all_threads();
115                    }
116                    logged = true;
117                }
118            }
119        }
120    }};
121}
122
123/// Waits forever asynchronously for EVENT_SIGNALED.
124async fn wait_signaled<H: HandleBased>(handle: &H) -> Result<()> {
125    log_long_op!(fasync::OnSignals::new(handle, zx::Signals::EVENT_SIGNALED))
126        .context("hr_timer_manager:wait_signaled")?;
127    Ok(())
128}
129
130/// Cancels an alarm by ID.
131async fn cancel_by_id(
132    _message_counter: &SharedMessageCounter,
133    timer_state: Option<TimerState>,
134    timer_id: &zx::Koid,
135    proxy: &fta::WakeAlarmsProxy,
136    interval_timers_pending_reschedule: &mut HashMap<zx::Koid, SharedMessageCounter>,
137    task_by_timer_id: &mut HashMap<zx::Koid, fasync::Task<()>>,
138    alarm_id: &str,
139) {
140    if let Some(task) = task_by_timer_id.remove(timer_id) {
141        // Let this task complete and get removed.
142        task.detach();
143    }
144    if let Some(timer_state) = timer_state {
145        ftrace::duration!("alarms", "starnix:hrtimer:cancel_by_id", "timer_id" => *timer_id);
146        log_debug!("cancel_by_id: START canceling timer: {:?}: alarm_id: {}", timer_id, alarm_id);
147        proxy.cancel(&alarm_id).expect("infallible");
148        log_debug!("cancel_by_id: 1/2 canceling timer: {:?}: alarm_id: {}", timer_id, alarm_id);
149
150        // Let the timer closure complete before continuing.
151        let _ = log_long_op!(timer_state.task);
152    }
153
154    // If this timer is an interval timer, we must remove it from the pending reschedule list.
155    // This does not affect container suspend, since `_message_counter` is live. It's a no-op
156    // for other timers.
157    interval_timers_pending_reschedule.remove(timer_id);
158    log_debug!("cancel_by_id: 2/2 DONE canceling timer: {timer_id:?}: alarm_id: {alarm_id}");
159}
160
161/// Called when the underlying wake alarms manager reports a fta::WakeAlarmsError
162/// as a result of a call to set_and_wait.
163fn process_alarm_protocol_error(
164    pending: &mut HashMap<zx::Koid, TimerState>,
165    timer_id: &zx::Koid,
166    error: fta::WakeAlarmsError,
167) -> Option<TimerState> {
168    match error {
169        fta::WakeAlarmsError::Unspecified => {
170            log_warn!(
171                "watch_new_hrtimer_loop: Cmd::AlarmProtocolFail: unspecified error: {error:?}"
172            );
173            pending.remove(timer_id)
174        }
175        fta::WakeAlarmsError::Dropped => {
176            log_debug!("watch_new_hrtimer_loop: Cmd::AlarmProtocolFail: alarm dropped: {error:?}");
177            // Do not remove a Dropped timer here, in contrast to other error states: a Dropped
178            // timer is a result of a Stop or a Cancel ahead of a reschedule. In both cases, that
179            // code takes care of removing the timer from the pending timers list.
180            None
181        }
182        error => {
183            log_warn!(
184                "watch_new_hrtimer_loop: Cmd::AlarmProtocolFail: unspecified error: {error:?}"
185            );
186            pending.remove(timer_id)
187        }
188    }
189}
190
191// This function is swapped out for an injected proxy in tests.
192fn connect_to_wake_alarms_async() -> Result<zx::Channel, Errno> {
193    log_debug!("connecting to wake alarms");
194    let (client, server) = zx::Channel::create();
195    fuchsia_component::client::connect_channel_to_protocol::<fta::WakeAlarmsMarker>(server)
196        .map(|()| client)
197        .map_err(|err| {
198            errno!(EINVAL, format!("Failed to connect to fuchsia.time.alarms/Wake: {err}"))
199        })
200}
201
202#[derive(Debug)]
203enum InspectHrTimerEvent {
204    Add,
205    Update,
206    Remove,
207    // The String inside will be used in fmt. But the compiler does not recognize the use when
208    // formatting with the Debug derivative.
209    Error(#[allow(dead_code)] String),
210}
211
212impl InspectHrTimerEvent {
213    fn retain_err(prev_len: usize, after_len: usize, context: &str) -> InspectHrTimerEvent {
214        InspectHrTimerEvent::Error(format!(
215            "retain the timer incorrectly, before len: {prev_len}, after len: {after_len}, context: {context}",
216        ))
217    }
218}
219
220impl std::fmt::Display for InspectHrTimerEvent {
221    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
222        write!(f, "{:?}", self)
223    }
224}
225
226#[derive(Debug)]
227struct InspectEvent {
228    event_type: InspectHrTimerEvent,
229    created_at: zx::BootInstant,
230    deadline: Option<TargetTime>,
231}
232
233impl InspectEvent {
234    fn new(event_type: InspectHrTimerEvent, deadline: Option<TargetTime>) -> Self {
235        Self { event_type, created_at: zx::BootInstant::get(), deadline }
236    }
237}
238
239#[derive(Debug)]
240struct TimerState {
241    /// The task that waits for the timer to expire.
242    task: fasync::Task<()>,
243    /// The desired deadline for the timer.
244    deadline: TargetTime,
245    /// The node that represents the current generation of this timer.
246    node: HrTimerNodeHandle,
247}
248
249impl std::fmt::Display for TimerState {
250    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
251        write!(f, "TimerState[deadline:{:?}]", self.deadline)
252    }
253}
254
255struct HrTimerManagerState {
256    /// All pending timers are stored here.
257    pending_timers: HashMap<zx::Koid, TimerState>,
258
259    /// The event that is registered with runner to allow the hrtimer to wake the kernel.
260    /// Optional, because we want the ability to inject a counter in tests.
261    message_counter: Option<OwnedMessageCounterHandle>,
262
263    /// For recording timer events.
264    events: VecDeque<InspectEvent>,
265
266    /// The last timestamp at which the hrtimer loop was started.
267    last_loop_started_timestamp: zx::BootInstant,
268
269    /// The last timestamp at which the hrtimer loop was completed.
270    last_loop_completed_timestamp: zx::BootInstant,
271
272    // Debug progress counter for Cmd::Start.
273    // TODO: b/454085350 - remove once diagnosed.
274    debug_start_stage_counter: u64,
275}
276
277impl HrTimerManagerState {
278    fn new(_parent_node: &fuchsia_inspect::Node) -> Self {
279        Self {
280            pending_timers: HashMap::new(),
281            // Initialized later in the State's lifecycle because it only becomes
282            // available after making a connection to the wake proxy.
283            message_counter: None,
284            events: VecDeque::with_capacity(INSPECT_EVENT_BUFFER_SIZE),
285            last_loop_started_timestamp: zx::BootInstant::INFINITE_PAST,
286            last_loop_completed_timestamp: zx::BootInstant::INFINITE_PAST,
287            debug_start_stage_counter: 0,
288        }
289    }
290
291    fn get_pending_timers_count(&self) -> usize {
292        self.pending_timers.len()
293    }
294
295    /// Gets a new shareable instance of the message counter.
296    fn share_message_counter(&self, new_pending_message: bool) -> SharedMessageCounter {
297        let counter_ref =
298            self.message_counter.as_ref().expect("message_counter is None, but should not be.");
299        counter_ref.share(new_pending_message)
300    }
301
302    fn record_events(&self, node: &fuchsia_inspect::Node) {
303        let events_node = node.create_child("events");
304        for (i, event) in self.events.iter().enumerate() {
305            let child = events_node.create_child(i.to_string());
306            child.record_string("type", event.event_type.to_string());
307            child.record_int("created_at", event.created_at.into_nanos());
308            if let Some(deadline) = event.deadline {
309                child.record_int("deadline", deadline.estimate_boot().unwrap().into_nanos());
310            }
311            events_node.record(child);
312        }
313        node.record(events_node);
314    }
315}
316
317/// Asynchronous commands sent to `watch_new_hrtimer_loop`.
318///
319/// The synchronous methods on HrTimerManager use these commands to communicate
320/// with the alarm manager actor that loops about in `watch_new_hrtimer_loop`.
321///
322/// This allows us to not have to share state between the synchronous and async
323/// methods of `HrTimerManager`.
324#[derive(Debug)]
325enum Cmd {
326    // Start the timer contained in `new_timer_node`.
327    // The processing loop will signal `done` to allow synchronous
328    // return from scheduling an async Cmd::Start.
329    Start {
330        new_timer_node: HrTimerNodeHandle,
331        /// Signaled once the timer is started.
332        done: zx::Event,
333        /// The Starnix container suspend lock. Keep it alive until no more
334        /// work is necessary.
335        message_counter: SharedMessageCounter,
336    },
337    /// Stop the timer noted below. `done` is similar to above.
338    Stop {
339        /// The timer to stop.
340        timer: HrTimerHandle,
341        /// Signaled once the timer is stopped.
342        done: zx::Event,
343        /// The Starnix container suspend lock. Keep it alive until no more
344        /// work is necessary.
345        message_counter: SharedMessageCounter,
346    },
347    /// Triggered by the underlying hrtimer device when an alarm expires.
348    Alarm {
349        /// The affected timer's node.
350        new_timer_node: HrTimerNodeHandle,
351        /// The wake lease provided by the underlying API.
352        lease: zx::EventPair,
353        /// The Starnix container suspend lock. Keep it alive until no more
354        /// work is necessary.
355        message_counter: SharedMessageCounter,
356    },
357    /// Install a timeline change monitor
358    MonitorUtc { timer: HrTimerHandle, counter: zx::Counter, recv: mpsc::UnboundedReceiver<bool> },
359}
360
361// Increments `counter` every time the UTC timeline changes.
362//
363// This counter is shared with UTC timers to provide UTC timeline change notification.
364//
365// Use cases:
366//
367// 1. Counter's value counts the number of times the UTC timeline changed, which is used in timer
368//    `read` calls to report the number of encountered changes, as required by `read`.
369//
370// 2. Counter's `COUNTER_POSITIVE` signal is used in `wait_async` calls on timers, as Starnix must
371//    wake such timers whenever a timeline change happens. The counter reader must reset the
372//    counter to zero after reading its value to allow for a next wake.
373//
374// Other primitives are not appropriate to use here: an Event does not remember how many times it
375// has been signaled, so does not fulfill (1). Atomics don't generate a signal on increment, so
376// don't satisfy (2). Conversely, the `wait_async` machinery on timers can already deal with
377// HandleBased objects, so a Counter can be readily used there.
378async fn run_utc_timeline_monitor(counter: zx::Counter, recv: mpsc::UnboundedReceiver<bool>) {
379    let utc_handle = crate::time::utc::duplicate_real_utc_clock_handle().inspect_err(|err| {
380        log_error!("run_utc_timeline_monitor: could not monitor UTC timeline: {err:?}")
381    });
382    if let Ok(utc_handle) = utc_handle {
383        run_utc_timeline_monitor_internal(counter, recv, utc_handle).await;
384    }
385}
386
387// See `run_utc_timeline_monitor`.
388// `utc_handle_fn` is useful for injecting in tests.
389async fn run_utc_timeline_monitor_internal(
390    counter: zx::Counter,
391    mut recv: mpsc::UnboundedReceiver<bool>,
392    utc_handle: UtcClock,
393) {
394    log_debug!("run_utc_timeline_monitor: monitoring UTC clock timeline changes: enter");
395    let koid = utc_handle.koid();
396    log_debug!(
397        "run_utc_timeline_monitor: monitoring UTC clock timeline: enter: UTC clock koid={koid:?}, counter={counter:?}"
398    );
399    let utc_handle = std::rc::Rc::new(utc_handle);
400    let utc_handle_fn = || utc_handle.clone();
401    let mut interested = false;
402    loop {
403        let utc_handle = utc_handle_fn();
404        // CLOCK_UPDATED is auto-cleared.
405        let mut updated_fut =
406            fasync::OnSignals::new(utc_handle.as_handle_ref(), zx::Signals::CLOCK_UPDATED).fuse();
407        let mut interest_fut = recv.next();
408
409        // Note: all select! branches must allow for exit when their respective futures are
410        // used up.
411        select! {
412            result = updated_fut => {
413                if result.is_err() {
414                    log_warn!("run_utc_timeline_monitor: could not wait on signals: {:?}, counter={counter:?}", result);
415                    break;
416                }
417                if interested {
418                    log_debug!("run_utc_timeline_monitor: UTC timeline updated, counter: {counter:?}");
419                    // The consumer of this `counter` should wait for COUNTER_POSITIVE, and
420                    // once it observes the value of the counter, subtract the read value from
421                    // counter.
422                    counter
423                        .add(1)
424                        // Ignore the error after logging it. Should we exit the loop here?
425                        .inspect_err(|err| {
426                            log_error!("run_utc_timeline_monitor: could not increment counter: {err:?}")
427                        })
428                        .unwrap_or(());
429                }
430            },
431            result = interest_fut => {
432                match result {
433                    Some(interest) => {
434                        log_debug!("interest change: {counter:?}, interest: {interest:?}");
435                        interested = interest;
436                    }
437                    None => {
438                        log_debug!("no longer needs counter monitoring: {counter:?}");
439                        break;
440                    }
441                }
442            },
443        };
444    }
445    log_debug!("run_utc_timeline_monitor: monitoring UTC clock timeline changes: exit");
446}
447
448/// The manager for high-resolution timers.
449///
450/// This manager is responsible for creating and managing high-resolution timers.
451pub struct HrTimerManager {
452    state: Mutex<HrTimerManagerState>,
453
454    /// The channel sender that notifies the worker thread that HrTimer driver needs to be
455    /// (re)started with a new deadline.
456    start_next_sender: OnceLock<UnboundedSender<Cmd>>,
457}
458pub type HrTimerManagerHandle = Arc<HrTimerManager>;
459
460impl HrTimerManager {
461    pub fn new(parent_node: &fuchsia_inspect::Node) -> HrTimerManagerHandle {
462        let inspect_node = parent_node.create_child("hr_timer_manager");
463        let new_manager = Arc::new(Self {
464            state: Mutex::new(HrTimerManagerState::new(&inspect_node)),
465            start_next_sender: Default::default(),
466        });
467        let manager_weak = Arc::downgrade(&new_manager);
468
469        // Create a lazy inspect node to get HrTimerManager info at read-time.
470        inspect_node.record_lazy_child("hr_timer_manager", move || {
471            let manager_ref = manager_weak.upgrade().expect("inner HrTimerManager");
472            async move {
473                // This gets the clock value directly from the kernel, it is not subject
474                // to the local runner's clock.
475                let now = zx::BootInstant::get();
476
477                let inspector = fuchsia_inspect::Inspector::default();
478                inspector.root().record_int("now_ns", now.into_nanos());
479
480                let (
481                    timers,
482                    pending_timers_count,
483                    message_counter,
484                    loop_started,
485                    loop_completed,
486                    debug_start_stage_counter,
487                ) = {
488                    let guard = manager_ref.lock();
489                    (
490                        guard
491                            .pending_timers
492                            .iter()
493                            .map(|(k, v)| (*k, v.deadline))
494                            .collect::<Vec<_>>(),
495                        guard.get_pending_timers_count(),
496                        guard.message_counter.as_ref().map(|c| c.to_string()).unwrap_or_default(),
497                        guard.last_loop_started_timestamp,
498                        guard.last_loop_completed_timestamp,
499                        guard.debug_start_stage_counter,
500                    )
501                };
502                inspector.root().record_uint("pending_timers_count", pending_timers_count as u64);
503                inspector.root().record_string("message_counter", message_counter);
504
505                // These are the deadlines we are currently waiting for. The format is:
506                // `alarm koid` -> `deadline nanos` (remains: `duration until alarm nanos`)
507                let deadlines = inspector.root().create_string_array("timers", timers.len());
508                for (i, (k, v)) in timers.into_iter().enumerate() {
509                    let remaining = v.estimate_boot().unwrap() - now;
510                    deadlines.set(
511                        i,
512                        format!(
513                            "{k:?} -> {v} ns (remains: {})",
514                            time_pretty::format_duration(remaining)
515                        ),
516                    );
517                }
518                inspector.root().record(deadlines);
519
520                inspector.root().record_int("last_loop_started_at_ns", loop_started.into_nanos());
521                inspector
522                    .root()
523                    .record_int("last_loop_completed_at_ns", loop_completed.into_nanos());
524                inspector
525                    .root()
526                    .record_uint("debug_start_stage_counter", debug_start_stage_counter);
527
528                {
529                    let guard = manager_ref.lock();
530                    guard.record_events(inspector.root());
531                }
532
533                Ok(inspector)
534            }
535            .boxed()
536        });
537        parent_node.record(inspect_node);
538        new_manager
539    }
540
541    /// Get a copy of a sender channel used for passing async command to the
542    /// event processing loop.
543    fn get_sender(&self) -> UnboundedSender<Cmd> {
544        self.start_next_sender.get().expect("start_next_sender is initialized").clone()
545    }
546
547    /// Returns the counter that tallies the timeline changes of the UTC timeline.
548    ///
549    /// # Args
550    /// - `timer`: the handle of the timer that needs monitoring of timeline changes.
551    pub fn get_timeline_change_observer(
552        &self,
553        timer: &HrTimerHandle,
554    ) -> Result<TimelineChangeObserver, Errno> {
555        let timer_id = timer.get_id();
556        let counter = zx::Counter::create();
557        let counter_clone = duplicate_handle(&counter).map_err(|err| {
558            log_error!("could not duplicate handle: {err:?}");
559            errno!(EINVAL, format!("could not duplicate handle: {err}, {timer_id:?}"))
560        })?;
561        let (send, recv) = mpsc::unbounded();
562        self.get_sender()
563            .unbounded_send(Cmd::MonitorUtc { timer: timer.clone(), counter, recv })
564            .map_err(|err| {
565            log_error!("could not send: {err:?}");
566            errno!(EINVAL, format!("could not send Cmd::Monitor: {err}, {timer_id:?}"))
567        })?;
568        Ok(TimelineChangeObserver::new(counter_clone, send))
569    }
570
571    /// Initialize the [HrTimerManager] in the context of the current system task.
572    pub fn init(self: &HrTimerManagerHandle, system_task: &CurrentTask) -> Result<(), Errno> {
573        self.init_internal(
574            system_task,
575            /*wake_channel_for_test=*/ None,
576            /*message_counter_for_test=*/ None,
577        )
578    }
579
580    // Call this init for testing instead of the one above.
581    fn init_internal(
582        self: &HrTimerManagerHandle,
583        system_task: &CurrentTask,
584        // Can be injected for testing.
585        wake_channel_for_test: Option<zx::Channel>,
586        // Can be injected for testing.
587        message_counter_for_test: Option<zx::Counter>,
588    ) -> Result<(), Errno> {
589        let (start_next_sender, start_next_receiver) = mpsc::unbounded();
590        self.start_next_sender.set(start_next_sender).map_err(|_| errno!(EEXIST))?;
591
592        let self_ref = self.clone();
593
594        // Ensure that all internal init has completed in `watch_new_hrtimer_loop`
595        // before proceeding from here.
596        let setup_done = zx::Event::create();
597        let setup_done_clone = duplicate_handle(&setup_done)?;
598
599        let closure = async move |locked_and_task: LockedAndTask<'_>| {
600            let current_thread = std::thread::current();
601            // Helps find the thread in backtraces, see wait_signaled_sync.
602            log_info!(
603                "hr_timer_manager thread: {:?} ({:?})",
604                current_thread.name(),
605                current_thread.id()
606            );
607            if let Err(e) = self_ref
608                .watch_new_hrtimer_loop(
609                    locked_and_task.current_task(),
610                    start_next_receiver,
611                    wake_channel_for_test,
612                    message_counter_for_test,
613                    Some(setup_done_clone),
614                )
615                .await
616            {
617                log_error!("while running watch_new_hrtimer_loop: {e:?}");
618            }
619            log_warn!("hr_timer_manager: finished kernel thread. should never happen in prod code");
620        };
621        let req = SpawnRequestBuilder::new()
622            .with_debug_name("hr-timer-manager")
623            .with_async_closure(closure)
624            .build();
625        system_task.kernel().kthreads.spawner().spawn_from_request(req);
626        log_info!("hr_timer_manager: waiting on setup done");
627        wait_signaled_sync(&setup_done)
628            .to_result()
629            .map_err(|status| from_status_like_fdio!(status))?;
630        log_info!("hr_timer_manager: setup done");
631
632        Ok(())
633    }
634
635    /// Notifies `timer` and wake sources about a triggered alarm.
636    ///
637    /// # Returns
638    /// - `Ok(true)` if the alarm was for the current generation and was processed.
639    /// - `Ok(false)` if the alarm was stale. Either the timer was restarted ( via `Cmd:Start`) with
640    ///    the same id before the previous generation's alarm fired, or the timer was stopped
641    ///    (via `Cmd::Stop`) or already processed.
642    fn notify_timer(
643        self: &HrTimerManagerHandle,
644        system_task: &CurrentTask,
645        triggered_node: &HrTimerNodeHandle,
646        lease: impl HandleBased,
647    ) -> Result<bool> {
648        let timer_id = triggered_node.hr_timer.get_id();
649        {
650            let guard = self.lock();
651            if let Some(active_state) = guard.pending_timers.get(&timer_id) {
652                if !Arc::ptr_eq(&active_state.node, triggered_node) {
653                    log_debug!("notify_timer: ignoring stale alarm for timer_id: {:?}", timer_id);
654                    return Ok(false);
655                }
656            } else {
657                log_debug!(
658                    "notify_timer: ignoring alarm for timer_id: {:?} (not in pending_timers)",
659                    timer_id
660                );
661                return Ok(false);
662            }
663        }
664
665        log_debug!("watch_new_hrtimer_loop: Cmd::Alarm: triggered alarm: {:?}", timer_id);
666        ftrace::duration!("alarms", "starnix:hrtimer:notify_timer", "timer_id" => timer_id);
667        self.lock().pending_timers.remove(&timer_id).map(|s| s.task.detach());
668        signal_event(
669            &triggered_node.hr_timer.event(),
670            zx::Signals::NONE,
671            zx::Signals::TIMER_SIGNALED,
672        )
673        .context("notify_timer: hrtimer signal handle")?;
674
675        // Handle wake source here.
676        let wake_source = triggered_node.wake_source.clone();
677        if let Some(wake_source) = wake_source.as_ref().and_then(|f| f.upgrade()) {
678            let lease_token = lease.into_handle();
679            wake_source.on_wake(system_task, &lease_token);
680            // Drop the baton lease after wake leases in associated epfd
681            // are activated.
682            drop(lease_token);
683        }
684        ftrace::instant!(
685            "alarms",
686            "starnix:hrtimer:notify_timer:drop_lease",
687            ftrace::Scope::Process,
688            "timer_id" => timer_id
689        );
690        Ok(true)
691    }
692
693    // If no counter has been injected for tests, set provided `counter` to serve as that
694    // counter. Used to inject a fake counter in tests.
695    fn inject_or_set_message_counter(
696        self: &HrTimerManagerHandle,
697        message_counter: OwnedMessageCounterHandle,
698    ) {
699        let mut guard = self.lock();
700        if guard.message_counter.is_none() {
701            guard.message_counter = Some(message_counter);
702        }
703    }
704
705    fn record_inspect_on_stop(
706        self: &HrTimerManagerHandle,
707        guard: &mut MutexGuard<'_, HrTimerManagerState>,
708        prev_len: usize,
709    ) {
710        let after_len = guard.get_pending_timers_count();
711        let inspect_event_type = if after_len == prev_len {
712            None
713        } else if after_len == prev_len - 1 {
714            Some(InspectHrTimerEvent::Remove)
715        } else {
716            Some(InspectHrTimerEvent::retain_err(prev_len, after_len, "removing timer"))
717        };
718        if let Some(inspect_event_type) = inspect_event_type {
719            self.record_event(guard, inspect_event_type, None);
720        }
721    }
722
723    fn record_inspect_on_start(
724        self: &HrTimerManagerHandle,
725        guard: &mut MutexGuard<'_, HrTimerManagerState>,
726        timer_id: zx::Koid,
727        task: fasync::Task<()>,
728        deadline: TargetTime,
729        node: HrTimerNodeHandle,
730        prev_len: usize,
731    ) {
732        guard
733            .pending_timers
734            .insert(timer_id, TimerState { task, deadline, node })
735            .map(|timer_state| {
736                // This should not happen, at this point we already canceled
737                // any previous instances of the same wake alarm.
738                log_debug!(
739                    "watch_new_hrtimer_loop: removing timer task in Cmd::Start: {:?}",
740                    timer_state
741                );
742                timer_state
743            })
744            .map(|v| v.task.detach());
745
746        // Record the inspect event
747        let after_len = guard.get_pending_timers_count();
748        let inspect_event_type = if after_len == prev_len {
749            InspectHrTimerEvent::Update
750        } else if after_len == prev_len + 1 {
751            InspectHrTimerEvent::Add
752        } else {
753            InspectHrTimerEvent::retain_err(prev_len, after_len, "adding timer")
754        };
755        self.record_event(guard, inspect_event_type, Some(deadline));
756    }
757
758    /// Timer handler loop.
759    ///
760    /// # Args:
761    /// - `wake_channel_for_test`: a channel implementing `fuchsia.time.alarms/Wake`
762    ///   injected by tests only.
763    /// - `message_counter_for_test`: a zx::Counter injected only by tests, to
764    ///   emulate the wake proxy message counter.
765    /// - `setup_done`: signaled once the initial loop setup is complete. Allows
766    ///   pausing any async callers until this loop is in a runnable state.
767    async fn watch_new_hrtimer_loop(
768        self: &HrTimerManagerHandle,
769        system_task: &CurrentTask,
770        mut start_next_receiver: UnboundedReceiver<Cmd>,
771        mut wake_channel_for_test: Option<zx::Channel>,
772        message_counter_for_test: Option<zx::Counter>,
773        setup_done: Option<zx::Event>,
774    ) -> Result<()> {
775        // The values assigned to the counter are arbitrary, but unique for each assignment.
776        // This should give us hints as to where any deadlocks might occur if they do.
777        // We also take stack traces, but stack traces do not capture async stacks presently.
778        self.lock().debug_start_stage_counter = 1005;
779        ftrace::instant!("alarms", "watch_new_hrtimer_loop:init", ftrace::Scope::Process);
780        defer! {
781            log_warn!("watch_new_hrtimer_loop: exiting. This should only happen in tests.");
782        }
783
784        let wake_channel = wake_channel_for_test.take().unwrap_or_else(|| {
785            connect_to_wake_alarms_async().expect("connection to wake alarms async proxy")
786        });
787        self.lock().debug_start_stage_counter = 1004;
788
789        let counter_name = "wake-alarms";
790        let (device_channel, counter) = if let Some(message_counter) = message_counter_for_test {
791            // For tests only.
792            (wake_channel, message_counter)
793        } else {
794            create_proxy_for_wake_events_counter_zero(wake_channel, counter_name.to_string())
795        };
796        self.lock().debug_start_stage_counter = 1003;
797        let message_counter = system_task
798            .kernel()
799            .suspend_resume_manager
800            .add_message_counter(counter_name, Some(counter));
801        self.inject_or_set_message_counter(message_counter.clone());
802        setup_done
803            .as_ref()
804            .map(|e| signal_event(e, zx::Signals::NONE, zx::Signals::EVENT_SIGNALED));
805
806        self.lock().debug_start_stage_counter = 1002;
807        let device_async_proxy =
808            fta::WakeAlarmsProxy::new(fidl::AsyncChannel::from_channel(device_channel));
809
810        // Contains suspend locks for interval (periodic) timers that expired, but have not been
811        // rescheduled yet. This allows us to defer container suspend until all such timers have
812        // been rescheduled.
813        // TODO: b/418813184 - Remove in favor of Fuchsia-specific interval timer support
814        // once it is available.
815        let mut interval_timers_pending_reschedule: HashMap<zx::Koid, SharedMessageCounter> =
816            HashMap::new();
817
818        // Per timer tasks.
819        let mut task_by_timer_id: HashMap<zx::Koid, fasync::Task<()>> = HashMap::new();
820
821        self.lock().debug_start_stage_counter = 1001;
822        ftrace::instant!("alarms", "watch_new_hrtimer_loop:init_done", ftrace::Scope::Process);
823        while let Some(cmd) = start_next_receiver.next().await {
824            {
825                let mut guard = self.lock();
826                guard.debug_start_stage_counter = 1002;
827                guard.last_loop_started_timestamp = zx::BootInstant::get();
828            }
829            ftrace::duration!("alarms", "start_next_receiver:loop");
830
831            log_debug!("watch_new_hrtimer_loop: got command: {cmd:?}");
832            self.lock().debug_start_stage_counter = 0;
833            match cmd {
834                // A new timer needs to be started.  The timer node for the timer
835                // is provided, and `done` must be signaled once the setup is
836                // complete.
837                Cmd::Start { new_timer_node, done, message_counter } => {
838                    self.lock().debug_start_stage_counter = 1;
839                    defer! {
840                        // Allow add_timer to proceed once command processing is done.
841                        signal_event(&done, zx::Signals::NONE, zx::Signals::EVENT_SIGNALED).map_err(|err| to_errno_with_log(err)).expect("event can be signaled");
842                    }
843
844                    let hr_timer = &new_timer_node.hr_timer;
845                    let timer_id = hr_timer.get_id();
846                    let wake_alarm_id = hr_timer.wake_alarm_id();
847                    let trace_id = hr_timer.trace_id();
848                    log_debug!(
849                        "watch_new_hrtimer_loop: Cmd::Start: timer_id: {:?}, wake_alarm_id: {}",
850                        timer_id,
851                        wake_alarm_id
852                    );
853                    ftrace::duration!("alarms", "starnix:hrtimer:start", "timer_id" => timer_id);
854                    ftrace::flow_begin!("alarms", "hrtimer_lifecycle", trace_id);
855
856                    self.lock().debug_start_stage_counter = 2;
857                    let maybe_cancel = self.lock().pending_timers.remove(&timer_id);
858                    log_long_op!(cancel_by_id(
859                        &message_counter,
860                        maybe_cancel,
861                        &timer_id,
862                        &device_async_proxy,
863                        &mut interval_timers_pending_reschedule,
864                        &mut task_by_timer_id,
865                        &wake_alarm_id,
866                    ));
867                    ftrace::instant!("alarms", "starnix:hrtimer:cancel_pre_start", ftrace::Scope::Process, "timer_id" => timer_id);
868
869                    // Signaled when the timer completed setup. We can not forward `done` because
870                    // we have post-schedule work as well.
871                    let setup_event = zx::Event::create();
872                    let deadline = new_timer_node.deadline;
873
874                    ftrace::duration!("alarms", "starnix:hrtimer:signaled", "timer_id" => timer_id);
875
876                    self.lock().debug_start_stage_counter = 3;
877                    // Make a request here. Move it into the closure after. Current FIDL semantics
878                    // ensure that even though we do not `.await` on this future, a request to
879                    // schedule a wake alarm based on this timer will be sent.
880                    let request_fut = match deadline {
881                        TargetTime::Monotonic(_) => {
882                            // If we hit this, it's a Starnix bug.
883                            panic!("can not schedule wake alarm on monotonic timeline")
884                        }
885                        TargetTime::BootInstant(boot_instant) => device_async_proxy.set_and_wait(
886                            boot_instant,
887                            fta::SetMode::NotifySetupDone(duplicate_handle(&setup_event)?),
888                            &wake_alarm_id,
889                        ),
890                        TargetTime::RealTime(utc_instant) => device_async_proxy.set_and_wait_utc(
891                            &fta::InstantUtc { timestamp_utc: utc_instant.into_nanos() },
892                            fta::SetMode::NotifySetupDone(duplicate_handle(&setup_event)?),
893                            &wake_alarm_id,
894                        ),
895                    };
896                    let mut done_sender = self.get_sender();
897                    let prev_len = self.lock().get_pending_timers_count();
898
899                    self.lock().debug_start_stage_counter = 4;
900                    let self_clone = self.clone();
901                    let new_timer_node_clone = new_timer_node.clone();
902                    let task = fasync::Task::local(async move {
903                        log_debug!(
904                            "wake_alarm_future: set_and_wait will block here: {wake_alarm_id:?}"
905                        );
906                        ftrace::instant!("alarms", "starnix:hrtimer:wait", ftrace::Scope::Process, "timer_id" => timer_id);
907                        ftrace::flow_step!("alarms", "hrtimer_lifecycle", trace_id);
908
909                        // Wait for this timer to expire. This wait can be arbitrarily long.
910                        let response = request_fut.await;
911
912                        // The counter was already incremented by the wake proxy when the alarm fired.
913                        let message_counter = self_clone.lock().share_message_counter(false);
914                        ftrace::instant!("alarms", "starnix:hrtimer:wake", ftrace::Scope::Process, "timer_id" => timer_id);
915
916                        log_debug!("wake_alarm_future: set_and_wait over: {:?}", response);
917                        match response {
918                            // Alarm.  This must be processed in the main loop because notification
919                            // requires access to &CurrentTask, which is not available here. So we
920                            // only forward it.
921                            Ok(Ok(lease)) => {
922                                log_long_op!(done_sender.send(Cmd::Alarm {
923                                    new_timer_node: new_timer_node_clone,
924                                    lease,
925                                    message_counter
926                                }))
927                                .expect("infallible");
928                            }
929                            Ok(Err(error)) => {
930                                ftrace::duration!("alarms", "starnix:hrtimer:wake_error", "timer_id" => timer_id);
931                                log_debug!(
932                                    "wake_alarm_future: protocol error: {error:?}: timer_id: {timer_id:?}"
933                                );
934                                let mut guard = self_clone.lock();
935                                let pending = &mut guard.pending_timers;
936                                process_alarm_protocol_error(pending, &timer_id, error);
937                            }
938                            Err(error) => {
939                                ftrace::duration!("alarms", "starnix:hrtimer:fidl_error", "timer_id" => timer_id);
940                                log_debug!(
941                                    "wake_alarm_future: FIDL error: {error:?}: timer_id: {timer_id:?}"
942                                );
943                                self_clone.lock().pending_timers.remove(&timer_id);
944                            }
945                        }
946                        log_debug!("wake_alarm_future: closure done for timer_id: {timer_id:?}");
947                    });
948                    self.lock().debug_start_stage_counter = 5;
949                    ftrace::instant!("alarms", "starnix:hrtimer:pre_setup_event_signal", ftrace::Scope::Process, "timer_id" => timer_id);
950
951                    // This should be almost instantaneous.  Blocking for a long time here is a
952                    // bug.
953                    log_long_op!(wait_signaled(&setup_event)).map_err(|e| to_errno_with_log(e))?;
954                    ftrace::instant!("alarms", "starnix:hrtimer:setup_event_signaled", ftrace::Scope::Process, "timer_id" => timer_id);
955                    let mut guard = self.lock();
956                    guard.debug_start_stage_counter = 6;
957                    self.record_inspect_on_start(
958                        &mut guard,
959                        timer_id,
960                        task,
961                        deadline,
962                        new_timer_node,
963                        prev_len,
964                    );
965                    log_debug!("Cmd::Start scheduled: timer_id: {:?}", timer_id);
966                    guard.debug_start_stage_counter = 999;
967                }
968                Cmd::Alarm { new_timer_node, lease, message_counter } => {
969                    self.lock().debug_start_stage_counter = 10;
970                    let timer = &new_timer_node.hr_timer;
971                    let timer_id = timer.get_id();
972                    ftrace::duration!("alarms", "starnix:hrtimer:alarm", "timer_id" => timer_id);
973                    ftrace::flow_step!("alarms", "hrtimer_lifecycle", timer.trace_id());
974                    match self.notify_timer(system_task, &new_timer_node, lease) {
975                        Ok(true) => {
976                            // Alarm was for current generation, success.
977                            // Interval timers currently need special handling: we must not suspend
978                            // the container until the interval timer in question gets re-scheduled.
979                            // To ensure that we stay awake, we store the suspend lock for a while.
980                            // This prevents container suspend.
981                            if *timer.is_interval.lock() {
982                                interval_timers_pending_reschedule
983                                    .insert(timer_id, message_counter);
984                            }
985                        }
986                        Ok(false) => {
987                            // Alarm was stale, ignored.
988                        }
989                        Err(e) => {
990                            log_error!("watch_new_hrtimer_loop: notify_timer failed: {e:?}");
991                        }
992                    }
993                    // Interval timers usually reschedule themselves. But if an interval timer
994                    // is stopped (via Cmd::Stop) or is replaced (via Cmd::Start for the same timer
995                    // ID) before it has a chance to reschedule, the reschedule lock will get
996                    // dropped then.
997                    log_debug!("Cmd::Alarm done: timer_id: {timer_id:?}");
998                    self.lock().debug_start_stage_counter = 19;
999                }
1000                Cmd::Stop { timer, done, message_counter } => {
1001                    self.lock().debug_start_stage_counter = 20;
1002                    defer! {
1003                        signal_event(&done, zx::Signals::NONE, zx::Signals::EVENT_SIGNALED).expect("can signal");
1004                    }
1005                    let timer_id = timer.get_id();
1006                    log_debug!("watch_new_hrtimer_loop: Cmd::Stop: timer_id: {:?}", timer_id);
1007                    ftrace::duration!("alarms", "starnix:hrtimer:stop", "timer_id" => timer_id);
1008                    ftrace::flow_step!("alarms", "hrtimer_lifecycle", timer.trace_id());
1009
1010                    let (maybe_cancel, prev_len) = {
1011                        let mut guard = self.lock();
1012                        let prev_len = guard.get_pending_timers_count();
1013                        (guard.pending_timers.remove(&timer_id), prev_len)
1014                    };
1015
1016                    let wake_alarm_id = timer.wake_alarm_id();
1017                    log_long_op!(cancel_by_id(
1018                        &message_counter,
1019                        maybe_cancel,
1020                        &timer_id,
1021                        &device_async_proxy,
1022                        &mut interval_timers_pending_reschedule,
1023                        &mut task_by_timer_id,
1024                        &wake_alarm_id,
1025                    ));
1026                    ftrace::instant!("alarms", "starnix:hrtimer:cancel_at_stop", ftrace::Scope::Process, "timer_id" => timer_id);
1027
1028                    {
1029                        let mut guard = self.lock();
1030                        self.record_inspect_on_stop(&mut guard, prev_len);
1031                    }
1032                    log_debug!("Cmd::Stop done: {timer_id:?}");
1033                    self.lock().debug_start_stage_counter = 29;
1034                }
1035                Cmd::MonitorUtc { timer, counter, recv } => {
1036                    self.lock().debug_start_stage_counter = 30;
1037                    ftrace::duration!("alarms", "starnix:hrtimer:monitor_utc", "timer_id" => timer.get_id());
1038                    ftrace::flow_step!("alarms", "hrtimer_lifecycle", timer.trace_id());
1039                    let monitor_task = fasync::Task::local(async move {
1040                        run_utc_timeline_monitor(counter, recv).await;
1041                    });
1042                    task_by_timer_id.insert(timer.get_id(), monitor_task);
1043                    self.lock().debug_start_stage_counter = 39;
1044                }
1045            }
1046            let mut guard = self.lock();
1047            guard.debug_start_stage_counter = 90;
1048
1049            log_debug!(
1050                "watch_new_hrtimer_loop: pending timers count: {}",
1051                guard.pending_timers.len()
1052            );
1053            log_debug!("watch_new_hrtimer_loop: pending timers:       {:?}", guard.pending_timers);
1054            log_debug!(
1055                "watch_new_hrtimer_loop: message counter:      {:?}",
1056                message_counter.to_string(),
1057            );
1058            log_debug!(
1059                "watch_new_hrtimer_loop: interval timers:      {:?}",
1060                interval_timers_pending_reschedule.len(),
1061            );
1062
1063            guard.last_loop_completed_timestamp = zx::BootInstant::get();
1064            guard.debug_start_stage_counter = 99;
1065        } // while
1066
1067        Ok(())
1068    }
1069
1070    fn lock(&self) -> MutexGuard<'_, HrTimerManagerState> {
1071        self.state.lock()
1072    }
1073
1074    fn record_event(
1075        self: &HrTimerManagerHandle,
1076        guard: &mut MutexGuard<'_, HrTimerManagerState>,
1077        event_type: InspectHrTimerEvent,
1078        deadline: Option<TargetTime>,
1079    ) {
1080        if guard.events.len() >= INSPECT_EVENT_BUFFER_SIZE {
1081            guard.events.pop_front();
1082        }
1083        guard.events.push_back(InspectEvent::new(event_type, deadline));
1084    }
1085
1086    /// Add a new timer.
1087    ///
1088    /// A wake alarm is scheduled for the timer.
1089    pub fn add_timer(
1090        self: &HrTimerManagerHandle,
1091        wake_source: Option<Weak<dyn OnWakeOps>>,
1092        new_timer: &HrTimerHandle,
1093        deadline: TargetTime,
1094    ) -> Result<(), Errno> {
1095        log_debug!("add_timer: entry: {new_timer:?}, deadline: {deadline:?}");
1096        ftrace::duration!("alarms", "starnix:add_timer", "deadline" => deadline.estimate_boot().unwrap().into_nanos());
1097        ftrace::flow_step!("alarms", "hrtimer_lifecycle", new_timer.trace_id());
1098
1099        // Keep system awake until timer is scheduled.
1100        let message_counter_until_timer_scheduled = self.lock().share_message_counter(true);
1101
1102        let sender = self.get_sender();
1103        let new_timer_node = HrTimerNode::new(deadline, wake_source, new_timer.clone());
1104        let wake_alarm_scheduled = zx::Event::create();
1105        let wake_alarm_scheduled_clone = duplicate_handle(&wake_alarm_scheduled)?;
1106        let timer_id = new_timer.get_id();
1107        sender
1108            .unbounded_send(Cmd::Start {
1109                new_timer_node,
1110                message_counter: message_counter_until_timer_scheduled,
1111                done: wake_alarm_scheduled_clone,
1112            })
1113            .map_err(|_| errno!(EINVAL, "add_timer: could not send Cmd::Start"))?;
1114
1115        // Block until the wake alarm for this timer is scheduled.
1116        wait_signaled_sync(&wake_alarm_scheduled)
1117            .map_err(|_| errno!(EINVAL, "add_timer: wait_signaled_sync failed"))?;
1118
1119        log_debug!("add_timer: exit : timer_id: {timer_id:?}");
1120        Ok(())
1121    }
1122
1123    /// Remove a timer.
1124    ///
1125    /// The timer is removed if scheduled, nothing is changed if it is not.
1126    pub fn remove_timer(self: &HrTimerManagerHandle, timer: &HrTimerHandle) -> Result<(), Errno> {
1127        log_debug!("remove_timer: entry:  {timer:?}");
1128        ftrace::duration!("alarms", "starnix:remove_timer");
1129        // Keep system awake until timer is removed.
1130        let message_counter_until_removed = self.lock().share_message_counter(true);
1131
1132        let sender = self.get_sender();
1133        let done = zx::Event::create();
1134        let done_clone = duplicate_handle(&done)?;
1135        let timer_id = timer.get_id();
1136        sender
1137            .unbounded_send(Cmd::Stop {
1138                timer: timer.clone(),
1139                message_counter: message_counter_until_removed,
1140                done: done_clone,
1141            })
1142            .map_err(|_| errno!(EINVAL, "remove_timer: could not send Cmd::Stop"))?;
1143
1144        // Block until the alarm for this timer is scheduled.
1145        wait_signaled_sync(&done)
1146            .map_err(|_| errno!(EINVAL, "add_timer: wait_signaled_sync failed"))?;
1147        log_debug!("remove_timer: exit:  {timer_id:?}");
1148        Ok(())
1149    }
1150}
1151
1152#[derive(Debug)]
1153pub struct HrTimer {
1154    event: zx::Event,
1155
1156    /// True iff the timer is currently set to trigger at an interval.
1157    ///
1158    /// This is used to determine at which point the hrtimer event (not
1159    /// `HrTimer::event` but the one that is shared with the actual driver)
1160    /// should be cleared.
1161    ///
1162    /// If this is true, the timer manager will wait to clear the timer event
1163    /// until the next timer request has been sent to the driver. This prevents
1164    /// lost wake ups where the container happens to suspend between two instances
1165    /// of an interval timer triggering.
1166    pub is_interval: Mutex<bool>,
1167}
1168pub type HrTimerHandle = Arc<HrTimer>;
1169
1170impl Drop for HrTimer {
1171    fn drop(&mut self) {
1172        let wake_alarm_id = self.wake_alarm_id();
1173        ftrace::duration!("alarms", "hrtimer::drop", "timer_id" => self.get_id(), "wake_alarm_id" => &wake_alarm_id[..]);
1174        ftrace::flow_end!("alarms", "hrtimer_lifecycle", self.trace_id());
1175    }
1176}
1177
1178impl HrTimer {
1179    pub fn new() -> HrTimerHandle {
1180        let ret = Arc::new(Self { event: zx::Event::create(), is_interval: Mutex::new(false) });
1181        let wake_alarm_id = ret.wake_alarm_id();
1182        ftrace::duration!("alarms", "hrtimer::new", "timer_id" => ret.get_id(), "wake_alarm_id" => &wake_alarm_id[..]);
1183        ftrace::flow_begin!("alarms", "hrtimer_lifecycle", ret.trace_id(), "wake_alarm_id" => &wake_alarm_id[..]);
1184        ret
1185    }
1186
1187    pub fn event(&self) -> zx::Event {
1188        self.event
1189            .duplicate_handle(zx::Rights::SAME_RIGHTS)
1190            .expect("Duplicate hrtimer event handle")
1191    }
1192
1193    /// Returns the unique identifier of this [HrTimer].
1194    ///
1195    /// All holders of the same [HrTimerHandle] will see the same value here.
1196    pub fn get_id(&self) -> zx::Koid {
1197        self.event.koid().expect("infallible")
1198    }
1199
1200    /// Returns the unique alarm ID for this [HrTimer].
1201    ///
1202    /// The naming pattern is: `starnix:Koid(NNNNN):iB`, where `NNNNN` is a koid
1203    /// and B is `1` if the timer is an interval timer, or `0` otherwise.
1204    fn wake_alarm_id(&self) -> String {
1205        let i = if *self.is_interval.lock() { "i1" } else { "i0" };
1206        let koid = self.get_id();
1207        format!("starnix:{koid:?}:{i}")
1208    }
1209
1210    fn trace_id(&self) -> ftrace::Id {
1211        self.get_id().raw_koid().into()
1212    }
1213}
1214
1215impl TimerOps for HrTimerHandle {
1216    fn start(
1217        &self,
1218        current_task: &CurrentTask,
1219        source: Option<Weak<dyn OnWakeOps>>,
1220        deadline: TargetTime,
1221    ) -> Result<(), Errno> {
1222        // Before (re)starting the timer, ensure the signal is cleared.
1223        signal_event(&self.event, zx::Signals::TIMER_SIGNALED, zx::Signals::NONE)
1224            .map_err(|status| from_status_like_fdio!(status))?;
1225        current_task.kernel().hrtimer_manager.add_timer(
1226            source,
1227            self,
1228            deadline.into_resolved_utc_deadline(),
1229        )?;
1230        Ok(())
1231    }
1232
1233    fn stop(&self, kernel: &Arc<Kernel>) -> Result<(), Errno> {
1234        // Clear the signal when removing the hrtimer.
1235        signal_event(&self.event, zx::Signals::TIMER_SIGNALED, zx::Signals::NONE)
1236            .map_err(|status| from_status_like_fdio!(status))?;
1237        Ok(kernel.hrtimer_manager.remove_timer(self)?)
1238    }
1239
1240    fn as_handle_ref(&self) -> HandleRef<'_> {
1241        self.event.as_handle_ref()
1242    }
1243
1244    fn get_timeline_change_observer(
1245        &self,
1246        current_task: &CurrentTask,
1247    ) -> Option<TimelineChangeObserver> {
1248        // Should this return errno instead?
1249        current_task
1250            .kernel()
1251            .hrtimer_manager
1252            .get_timeline_change_observer(self)
1253            .inspect_err(|err| {
1254                log_error!("hr_timer_manager: could not create timeline change counter: {err:?}")
1255            })
1256            .ok()
1257    }
1258}
1259
1260/// Represents a node of `HrTimer`.
1261#[derive(Clone, Debug)]
1262struct HrTimerNode {
1263    /// The deadline of the associated `HrTimer`.
1264    deadline: TargetTime,
1265
1266    /// The source where initiated this `HrTimer`.
1267    ///
1268    /// When the timer expires, the system will be woken up if necessary. The `on_wake` callback
1269    /// will be triggered with a baton lease to prevent further suspend while Starnix handling the
1270    /// wake event.
1271    wake_source: Option<Weak<dyn OnWakeOps>>,
1272
1273    /// The underlying HrTimer.
1274    hr_timer: HrTimerHandle,
1275}
1276type HrTimerNodeHandle = Arc<HrTimerNode>;
1277
1278impl HrTimerNode {
1279    fn new(
1280        deadline: TargetTime,
1281        wake_source: Option<Weak<dyn OnWakeOps>>,
1282        hr_timer: HrTimerHandle,
1283    ) -> HrTimerNodeHandle {
1284        Arc::new(Self { deadline, wake_source, hr_timer })
1285    }
1286}
1287
1288#[cfg(test)]
1289mod tests {
1290    use super::*;
1291    use crate::testing::spawn_kernel_and_run;
1292    use crate::time::HrTimer;
1293    use fake_wake_alarms::{MAGIC_EXPIRE_DEADLINE, Response, serve_fake_wake_alarms};
1294    use fidl_fuchsia_time_alarms as fta;
1295    use fuchsia_async as fasync;
1296    use fuchsia_runtime::{UtcClockUpdate, UtcInstant};
1297    use std::sync::LazyLock;
1298    use std::thread;
1299
1300    static CLOCK_OPTS: LazyLock<zx::ClockOpts> = LazyLock::new(zx::ClockOpts::empty);
1301    const BACKSTOP_TIME: UtcInstant = UtcInstant::from_nanos(/*arbitrary*/ 222222);
1302
1303    fn create_utc_clock_for_test() -> UtcClock {
1304        let clock = UtcClock::create(*CLOCK_OPTS, Some(BACKSTOP_TIME)).unwrap();
1305        clock.update(UtcClockUpdate::builder().approximate_value(BACKSTOP_TIME)).unwrap();
1306        clock
1307    }
1308
1309    impl HrTimerManagerState {
1310        fn new_for_test() -> Self {
1311            Self {
1312                events: VecDeque::with_capacity(INSPECT_EVENT_BUFFER_SIZE),
1313                pending_timers: Default::default(),
1314                message_counter: None,
1315                last_loop_started_timestamp: zx::BootInstant::INFINITE_PAST,
1316                last_loop_completed_timestamp: zx::BootInstant::INFINITE_PAST,
1317                debug_start_stage_counter: 0,
1318            }
1319        }
1320    }
1321
1322    // Injected for testing.
1323    fn connect_factory(message_counter: zx::Counter, response_type: Response) -> zx::Channel {
1324        let (client, server) = zx::Channel::create();
1325
1326        // A separate thread is needed to allow independent execution of the server.
1327        let _detached = thread::spawn(move || {
1328            fasync::LocalExecutor::default().run_singlethreaded(async move {
1329                let stream =
1330                    fidl::endpoints::ServerEnd::<fta::WakeAlarmsMarker>::new(server).into_stream();
1331                serve_fake_wake_alarms(message_counter, response_type, stream, /*once*/ false)
1332                    .await;
1333            });
1334        });
1335        client
1336    }
1337
1338    // Initializes HrTimerManager for tests.
1339    //
1340    // # Returns
1341    //
1342    // A tuple of:
1343    // - `HrTimerManagerHandle` the unit under test
1344    // - `zx::Counter` a message counter to use in tests to observe suspend state
1345    fn init_hr_timer_manager(
1346        current_task: &CurrentTask,
1347        response_type: Response,
1348    ) -> (HrTimerManagerHandle, zx::Counter) {
1349        let manager = Arc::new(HrTimerManager {
1350            state: Mutex::new(HrTimerManagerState::new_for_test()),
1351            start_next_sender: Default::default(),
1352        });
1353        let counter = zx::Counter::create();
1354        let counter_clone = duplicate_handle(&counter).unwrap();
1355        let wake_channel = connect_factory(counter_clone, response_type);
1356        let counter_clone = duplicate_handle(&counter).unwrap();
1357        manager
1358            .init_internal(&current_task, Some(wake_channel), Some(counter_clone))
1359            .expect("infallible");
1360        (manager, counter)
1361    }
1362
1363    #[fuchsia::test]
1364    async fn test_triggering() {
1365        spawn_kernel_and_run(async |_, current_task| {
1366            let (manager, counter) = init_hr_timer_manager(current_task, Response::Immediate);
1367
1368            let timer1 = HrTimer::new();
1369            let timer2 = HrTimer::new();
1370            let timer3 = HrTimer::new();
1371
1372            manager.add_timer(None, &timer1, zx::BootInstant::from_nanos(1).into()).unwrap();
1373            manager.add_timer(None, &timer2, zx::BootInstant::from_nanos(2).into()).unwrap();
1374            manager.add_timer(None, &timer3, zx::BootInstant::from_nanos(3).into()).unwrap();
1375
1376            wait_signaled_sync(&timer1.event()).to_result().unwrap();
1377            wait_signaled_sync(&timer2.event()).to_result().unwrap();
1378            wait_signaled_sync(&timer3.event()).to_result().unwrap();
1379
1380            assert_eq!(
1381                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1382                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1383            );
1384        })
1385        .await;
1386    }
1387
1388    #[fuchsia::test]
1389    async fn test_triggering_utc() {
1390        spawn_kernel_and_run(async |_, current_task| {
1391            let (manager, counter) = init_hr_timer_manager(current_task, Response::Immediate);
1392
1393            let timer1 = HrTimer::new();
1394            let timer2 = HrTimer::new();
1395            let timer3 = HrTimer::new();
1396
1397            // All these are normally already expired as scheduled.
1398            manager.add_timer(None, &timer1, UtcInstant::from_nanos(1).into()).unwrap();
1399            manager.add_timer(None, &timer2, UtcInstant::from_nanos(2).into()).unwrap();
1400            manager.add_timer(None, &timer3, UtcInstant::from_nanos(3).into()).unwrap();
1401
1402            wait_signaled_sync(&timer1.event()).to_result().unwrap();
1403            wait_signaled_sync(&timer2.event()).to_result().unwrap();
1404            wait_signaled_sync(&timer3.event()).to_result().unwrap();
1405
1406            assert_eq!(
1407                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1408                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1409            );
1410        })
1411        .await;
1412    }
1413
1414    #[fuchsia::test]
1415    async fn test_delayed_response() {
1416        spawn_kernel_and_run(async |_, current_task| {
1417            let (manager, counter) = init_hr_timer_manager(current_task, Response::Immediate);
1418
1419            let timer = HrTimer::new();
1420
1421            manager.add_timer(None, &timer, zx::BootInstant::from_nanos(1).into()).unwrap();
1422
1423            assert_eq!(
1424                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1425                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1426            );
1427        })
1428        .await;
1429    }
1430
1431    #[fuchsia::test]
1432    async fn test_protocol_error_response() {
1433        spawn_kernel_and_run(async |_, current_task| {
1434            let (manager, counter) = init_hr_timer_manager(current_task, Response::Error);
1435
1436            let timer = HrTimer::new();
1437            manager.add_timer(None, &timer, zx::BootInstant::from_nanos(1).into()).unwrap();
1438            assert_eq!(
1439                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1440                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1441            );
1442        })
1443        .await;
1444    }
1445
1446    #[fuchsia::test]
1447    async fn reschedule_same_timer() {
1448        spawn_kernel_and_run(async |_, current_task| {
1449            let (manager, counter) = init_hr_timer_manager(current_task, Response::Delayed);
1450
1451            let timer = HrTimer::new();
1452
1453            manager.add_timer(None, &timer, zx::BootInstant::from_nanos(1).into()).unwrap();
1454            manager.add_timer(None, &timer, zx::BootInstant::from_nanos(2).into()).unwrap();
1455
1456            // Force alarm expiry.
1457            manager
1458                .add_timer(None, &timer, zx::BootInstant::from_nanos(MAGIC_EXPIRE_DEADLINE).into())
1459                .unwrap();
1460            wait_signaled_sync(&timer.event()).to_result().unwrap();
1461
1462            assert_eq!(
1463                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1464                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1465            );
1466        })
1467        .await;
1468    }
1469
1470    #[fuchsia::test]
1471    async fn rescheduling_interval_timers_forbids_suspend() {
1472        spawn_kernel_and_run(async |_, current_task| {
1473            let (hrtimer_manager, counter) = init_hr_timer_manager(current_task, Response::Delayed);
1474
1475            // Schedule an interval timer and let it expire.
1476            let timer1 = HrTimer::new();
1477            *timer1.is_interval.lock() = true;
1478            hrtimer_manager
1479                .add_timer(None, &timer1, zx::BootInstant::from_nanos(MAGIC_EXPIRE_DEADLINE).into())
1480                .unwrap();
1481            wait_signaled_sync(&timer1.event()).to_result().unwrap();
1482
1483            // Schedule a regular timer and let it expire.
1484            let timer2 = HrTimer::new();
1485            hrtimer_manager
1486                .add_timer(None, &timer2, zx::BootInstant::from_nanos(MAGIC_EXPIRE_DEADLINE).into())
1487                .unwrap();
1488            wait_signaled_sync(&timer2.event()).to_result().unwrap();
1489
1490            // When we have an expired but not rescheduled interval timer (`timer1`), and we have
1491            // an intervening timer that gets scheduled and expires (`timer2`) before `timer1` is
1492            // rescheduled, then suspend should be disallowed (counter > 0) to allow `timer1` to
1493            // be scheduled eventually.
1494            assert_eq!(
1495                counter.wait_one(zx::Signals::COUNTER_POSITIVE, zx::MonotonicInstant::INFINITE),
1496                zx::WaitResult::Ok(zx::Signals::COUNTER_POSITIVE)
1497            );
1498        })
1499        .await;
1500    }
1501
1502    #[fuchsia::test]
1503    async fn canceling_interval_timer_allows_suspend() {
1504        spawn_kernel_and_run(async |_, current_task| {
1505            let (hrtimer_manager, counter) = init_hr_timer_manager(current_task, Response::Delayed);
1506
1507            let timer1 = HrTimer::new();
1508            *timer1.is_interval.lock() = true;
1509            hrtimer_manager
1510                .add_timer(None, &timer1, zx::BootInstant::from_nanos(MAGIC_EXPIRE_DEADLINE).into())
1511                .unwrap();
1512            wait_signaled_sync(&timer1.event()).to_result().unwrap();
1513
1514            // When an interval timer expires, we should not be allowed to suspend.
1515            assert_eq!(
1516                counter.wait_one(zx::Signals::COUNTER_POSITIVE, zx::MonotonicInstant::INFINITE),
1517                zx::WaitResult::Ok(zx::Signals::COUNTER_POSITIVE)
1518            );
1519
1520            // Schedule the same timer again. This time around we do not wait for it to expire,
1521            // but cancel the timer instead.
1522            const DURATION_100S: zx::BootDuration = zx::BootDuration::from_seconds(100);
1523            let deadline2: zx::BootInstant = zx::BootInstant::after(DURATION_100S.into());
1524            hrtimer_manager.add_timer(None, &timer1, deadline2.into()).unwrap();
1525
1526            hrtimer_manager.remove_timer(&timer1).unwrap();
1527
1528            // When we cancel an interval timer, we should be allowed to suspend.
1529            assert_eq!(
1530                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1531                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1532            );
1533        })
1534        .await;
1535    }
1536
1537    #[fuchsia::test]
1538    async fn canceling_interval_timer_allows_suspend_with_flake() {
1539        spawn_kernel_and_run(async |_, current_task| {
1540            let (hrtimer_manager, counter) = init_hr_timer_manager(current_task, Response::Delayed);
1541
1542            let timer1 = HrTimer::new();
1543            *timer1.is_interval.lock() = true;
1544            hrtimer_manager
1545                .add_timer(None, &timer1, zx::BootInstant::from_nanos(MAGIC_EXPIRE_DEADLINE).into())
1546                .unwrap();
1547            wait_signaled_sync(&timer1.event()).to_result().unwrap();
1548
1549            assert_eq!(
1550                counter.wait_one(zx::Signals::COUNTER_POSITIVE, zx::MonotonicInstant::INFINITE),
1551                zx::WaitResult::Ok(zx::Signals::COUNTER_POSITIVE)
1552            );
1553            const DURATION_100S: zx::BootDuration = zx::BootDuration::from_seconds(100);
1554            let deadline2: zx::BootInstant = zx::BootInstant::after(DURATION_100S.into());
1555            hrtimer_manager.add_timer(None, &timer1, deadline2.into()).unwrap();
1556            // No pause between start and stop has led to flakes before.
1557            hrtimer_manager.remove_timer(&timer1).unwrap();
1558
1559            assert_eq!(
1560                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1561                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1562            );
1563        })
1564        .await;
1565    }
1566
1567    #[fuchsia::test]
1568    async fn utc_timeline_monitor_exits_on_interest_drop() {
1569        let counter = zx::Counter::create();
1570        let utc_clock = create_utc_clock_for_test();
1571        let (tx, rx) = mpsc::unbounded();
1572
1573        drop(tx);
1574        // Expect that this call returns, when tx no longer exists. Incorrect
1575        // handling of tx closure could cause it to hang otherwise.
1576        run_utc_timeline_monitor_internal(counter, rx, utc_clock).await;
1577    }
1578}