starnix_core/time/
hr_timer_manager.rs

1// Copyright 2024 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::power::{
6    OnWakeOps, OwnedMessageCounterHandle, SharedMessageCounter,
7    create_proxy_for_wake_events_counter_zero,
8};
9use crate::task::dynamic_thread_spawner::SpawnRequestBuilder;
10use crate::task::{CurrentTask, Kernel, LockedAndTask};
11use crate::time::TargetTime;
12use crate::vfs::timer::{TimelineChangeObserver, TimerOps};
13use anyhow::{Context, Result};
14use fuchsia_inspect::ArrayProperty;
15use fuchsia_runtime::UtcClock;
16use futures::channel::mpsc::{self, UnboundedReceiver, UnboundedSender};
17use futures::{FutureExt, SinkExt, StreamExt, select};
18use scopeguard::defer;
19use starnix_logging::{log_debug, log_error, log_info, log_warn};
20use starnix_sync::{Mutex, MutexGuard};
21use starnix_uapi::errors::Errno;
22use starnix_uapi::{errno, from_status_like_fdio};
23use std::collections::{HashMap, VecDeque};
24use std::sync::{Arc, OnceLock, Weak};
25use zx::{HandleBased, HandleRef};
26use {fidl_fuchsia_time_alarms as fta, fuchsia_async as fasync, fuchsia_trace as ftrace};
27
28/// Max value for inspect event history.
29const INSPECT_EVENT_BUFFER_SIZE: usize = 128;
30
31fn to_errno_with_log<T: std::fmt::Debug>(v: T) -> Errno {
32    log_error!("hr_timer_manager internal error: {v:?}");
33    from_status_like_fdio!(zx::Status::IO)
34}
35
36fn signal_event(
37    event: &zx::Event,
38    clear_mask: zx::Signals,
39    set_mask: zx::Signals,
40) -> Result<(), zx::Status> {
41    event
42        .signal(clear_mask, set_mask)
43        .inspect_err(|err| log_error!(err:?, clear_mask:?, set_mask:?; "while signaling event"))
44}
45
46fn duplicate_handle<H: HandleBased>(h: &H) -> Result<H, Errno> {
47    h.duplicate_handle(zx::Rights::SAME_RIGHTS).map_err(|status| from_status_like_fdio!(status))
48}
49
50const TIMEOUT_SECONDS: i64 = 40;
51//
52/// Waits forever synchronously for EVENT_SIGNALED.
53///
54/// For us there is no useful scenario where this wait times out and we can continue operating.
55fn wait_signaled_sync(event: &zx::Event) -> zx::WaitResult {
56    let mut logged = false;
57    loop {
58        let timeout =
59            zx::MonotonicInstant::after(zx::MonotonicDuration::from_seconds(TIMEOUT_SECONDS));
60        let result = event.wait_one(zx::Signals::EVENT_SIGNALED, timeout);
61        if let zx::WaitResult::Ok(_) = result {
62            if logged {
63                log_error!("wait_signaled_sync: signal resolved: result={result:?}",);
64            }
65            return result;
66        }
67        fuchsia_trace::instant!(
68            "alarms",
69            "starnix:hrtimer:wait_timeout",
70            fuchsia_trace::Scope::Process
71        );
72        // This is bad and should never happen. If it does, it's a bug that has to be found and
73        // fixed. There is no good way to proceed if these signals are not being signaled properly.
74        log_error!(
75            // Check logs for a `kBadState` status reported from the hrtimer driver.
76            // LINT.IfChange(hrtimer_wait_signaled_sync_tefmo)
77            "wait_signaled_sync: not signaled yet. Report to `componentid:1408151`: result={result:?}",
78            // LINT.ThenChange(//tools/testing/tefmocheck/string_in_log_check.go:hrtimer_wait_signaled_sync_tefmo)
79        );
80        if !logged {
81            #[cfg(all(target_os = "fuchsia", not(doc)))]
82            ::debug::backtrace_request_all_threads();
83            logged = true;
84        }
85    }
86}
87
88/// A macro that waits on a future, but if the future takes longer than
89/// `TIMEOUT_SECONDS`, we log a warning and a stack trace.
90macro_rules! log_long_op {
91    ($fut:expr) => {{
92        use futures::FutureExt;
93        let fut = $fut;
94        futures::pin_mut!(fut);
95        let mut logged = false;
96        loop {
97            let timeout = fasync::Timer::new(zx::MonotonicDuration::from_seconds(TIMEOUT_SECONDS));
98            futures::select! {
99                res = fut.as_mut().fuse() => {
100                    if logged {
101                        log_warn!("unexpected blocking is now resolved: long-running async operation at {}:{}.",
102                            file!(), line!());
103                    }
104                    break res;
105                }
106                _ = timeout.fuse() => {
107                    // Check logs for a `kBadState` status reported from the hrtimer driver.
108                    log_warn!("unexpected blocking: long-running async op at {}:{}. Report to `componentId:1408151`",
109                        file!(), line!());
110                    if !logged {
111                        #[cfg(all(target_os = "fuchsia", not(doc)))]
112                        ::debug::backtrace_request_all_threads();
113                    }
114                    logged = true;
115                }
116            }
117        }
118    }};
119}
120
121/// Waits forever asynchronously for EVENT_SIGNALED.
122async fn wait_signaled<H: HandleBased>(handle: &H) -> Result<()> {
123    log_long_op!(fasync::OnSignals::new(handle, zx::Signals::EVENT_SIGNALED))
124        .context("hr_timer_manager:wait_signaled")?;
125    Ok(())
126}
127
128/// Cancels an alarm by ID.
129async fn cancel_by_id(
130    _message_counter: &SharedMessageCounter,
131    timer_state: Option<TimerState>,
132    timer_id: &zx::Koid,
133    proxy: &fta::WakeAlarmsProxy,
134    interval_timers_pending_reschedule: &mut HashMap<zx::Koid, SharedMessageCounter>,
135    task_by_timer_id: &mut HashMap<zx::Koid, fasync::Task<()>>,
136    alarm_id: &str,
137) {
138    if let Some(task) = task_by_timer_id.remove(timer_id) {
139        // Let this task complete and get removed.
140        task.detach();
141    }
142    if let Some(timer_state) = timer_state {
143        ftrace::duration!("alarms", "starnix:hrtimer:cancel_by_id", "timer_id" => *timer_id);
144        log_debug!("cancel_by_id: START canceling timer: {:?}: alarm_id: {}", timer_id, alarm_id);
145        proxy.cancel(&alarm_id).expect("infallible");
146        log_debug!("cancel_by_id: 1/2 canceling timer: {:?}: alarm_id: {}", timer_id, alarm_id);
147
148        // Let the timer closure complete before continuing.
149        let _ = log_long_op!(timer_state.task);
150
151        // If this timer is an interval timer, we must remove it from the pending reschedule list.
152        // This does not affect container suspend, since `_message_counter` is live. It's a no-op
153        // for other timers.
154        interval_timers_pending_reschedule.remove(timer_id);
155        log_debug!("cancel_by_id: 2/2 DONE canceling timer: {timer_id:?}: alarm_id: {alarm_id}");
156    }
157}
158
159/// Called when the underlying wake alarms manager reports a fta::WakeAlarmsError
160/// as a result of a call to set_and_wait.
161fn process_alarm_protocol_error(
162    pending: &mut HashMap<zx::Koid, TimerState>,
163    timer_id: &zx::Koid,
164    error: fta::WakeAlarmsError,
165) -> Option<TimerState> {
166    match error {
167        fta::WakeAlarmsError::Unspecified => {
168            log_warn!(
169                "watch_new_hrtimer_loop: Cmd::AlarmProtocolFail: unspecified error: {error:?}"
170            );
171            pending.remove(timer_id)
172        }
173        fta::WakeAlarmsError::Dropped => {
174            log_debug!("watch_new_hrtimer_loop: Cmd::AlarmProtocolFail: alarm dropped: {error:?}");
175            // Do not remove a Dropped timer here, in contrast to other error states: a Dropped
176            // timer is a result of a Stop or a Cancel ahead of a reschedule. In both cases, that
177            // code takes care of removing the timer from the pending timers list.
178            None
179        }
180        error => {
181            log_warn!(
182                "watch_new_hrtimer_loop: Cmd::AlarmProtocolFail: unspecified error: {error:?}"
183            );
184            pending.remove(timer_id)
185        }
186    }
187}
188
189// This function is swapped out for an injected proxy in tests.
190fn connect_to_wake_alarms_async() -> Result<zx::Channel, Errno> {
191    log_debug!("connecting to wake alarms");
192    let (client, server) = zx::Channel::create();
193    fuchsia_component::client::connect_channel_to_protocol::<fta::WakeAlarmsMarker>(server)
194        .map(|()| client)
195        .map_err(|err| {
196            errno!(EINVAL, format!("Failed to connect to fuchsia.time.alarms/Wake: {err}"))
197        })
198}
199
200#[derive(Debug)]
201enum InspectHrTimerEvent {
202    Add,
203    Update,
204    Remove,
205    // The String inside will be used in fmt. But the compiler does not recognize the use when
206    // formatting with the Debug derivative.
207    Error(#[allow(dead_code)] String),
208}
209
210impl InspectHrTimerEvent {
211    fn retain_err(prev_len: usize, after_len: usize, context: &str) -> InspectHrTimerEvent {
212        InspectHrTimerEvent::Error(format!(
213            "retain the timer incorrectly, before len: {prev_len}, after len: {after_len}, context: {context}",
214        ))
215    }
216}
217
218impl std::fmt::Display for InspectHrTimerEvent {
219    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
220        write!(f, "{:?}", self)
221    }
222}
223
224#[derive(Debug)]
225struct InspectEvent {
226    event_type: InspectHrTimerEvent,
227    created_at: zx::BootInstant,
228    deadline: Option<TargetTime>,
229}
230
231impl InspectEvent {
232    fn new(event_type: InspectHrTimerEvent, deadline: Option<TargetTime>) -> Self {
233        Self { event_type, created_at: zx::BootInstant::get(), deadline }
234    }
235}
236
237#[derive(Debug)]
238struct TimerState {
239    /// The task that waits for the timer to expire.
240    task: fasync::Task<()>,
241    /// The desired deadline for the timer.
242    deadline: TargetTime,
243}
244
245impl std::fmt::Display for TimerState {
246    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
247        write!(f, "TimerState[deadline:{:?}]", self.deadline)
248    }
249}
250
251struct HrTimerManagerState {
252    /// All pending timers are stored here.
253    pending_timers: HashMap<zx::Koid, TimerState>,
254
255    /// The event that is registered with runner to allow the hrtimer to wake the kernel.
256    /// Optional, because we want the ability to inject a counter in tests.
257    message_counter: Option<OwnedMessageCounterHandle>,
258
259    /// For recording timer events.
260    events: VecDeque<InspectEvent>,
261
262    /// The last timestamp at which the hrtimer loop was started.
263    last_loop_started_timestamp: zx::BootInstant,
264
265    /// The last timestamp at which the hrtimer loop was completed.
266    last_loop_completed_timestamp: zx::BootInstant,
267
268    // Debug progress counter for Cmd::Start.
269    // TODO: b/454085350 - remove once diagnosed.
270    debug_start_stage_counter: u64,
271}
272
273impl HrTimerManagerState {
274    fn new(_parent_node: &fuchsia_inspect::Node) -> Self {
275        Self {
276            pending_timers: HashMap::new(),
277            // Initialized later in the State's lifecycle because it only becomes
278            // available after making a connection to the wake proxy.
279            message_counter: None,
280            events: VecDeque::with_capacity(INSPECT_EVENT_BUFFER_SIZE),
281            last_loop_started_timestamp: zx::BootInstant::INFINITE_PAST,
282            last_loop_completed_timestamp: zx::BootInstant::INFINITE_PAST,
283            debug_start_stage_counter: 0,
284        }
285    }
286
287    fn get_pending_timers_count(&self) -> usize {
288        self.pending_timers.len()
289    }
290
291    /// Gets a new shareable instance of the message counter.
292    fn share_message_counter(&self, new_pending_message: bool) -> SharedMessageCounter {
293        let counter_ref =
294            self.message_counter.as_ref().expect("message_counter is None, but should not be.");
295        counter_ref.share(new_pending_message)
296    }
297
298    fn record_events(&self, node: &fuchsia_inspect::Node) {
299        let events_node = node.create_child("events");
300        for (i, event) in self.events.iter().enumerate() {
301            let child = events_node.create_child(i.to_string());
302            child.record_string("type", event.event_type.to_string());
303            child.record_int("created_at", event.created_at.into_nanos());
304            if let Some(deadline) = event.deadline {
305                child.record_int("deadline", deadline.estimate_boot().unwrap().into_nanos());
306            }
307            events_node.record(child);
308        }
309        node.record(events_node);
310    }
311}
312
313/// Asynchronous commands sent to `watch_new_hrtimer_loop`.
314///
315/// The synchronous methods on HrTimerManager use these commands to communicate
316/// with the alarm manager actor that loops about in `watch_new_hrtimer_loop`.
317///
318/// This allows us to not have to share state between the synchronous and async
319/// methods of `HrTimerManager`.
320#[derive(Debug)]
321enum Cmd {
322    // Start the timer contained in `new_timer_node`.
323    // The processing loop will signal `done` to allow synchronous
324    // return from scheduling an async Cmd::Start.
325    Start {
326        new_timer_node: HrTimerNode,
327        /// Signaled once the timer is started.
328        done: zx::Event,
329        /// The Starnix container suspend lock. Keep it alive until no more
330        /// work is necessary.
331        message_counter: SharedMessageCounter,
332    },
333    /// Stop the timer noted below. `done` is similar to above.
334    Stop {
335        /// The timer to stop.
336        timer: HrTimerHandle,
337        /// Signaled once the timer is stopped.
338        done: zx::Event,
339        /// The Starnix container suspend lock. Keep it alive until no more
340        /// work is necessary.
341        message_counter: SharedMessageCounter,
342    },
343    /// A wake alarm occurred.
344    Alarm {
345        /// The affected timer's node.
346        new_timer_node: HrTimerNode,
347        /// The wake lease provided by the underlying API.
348        lease: zx::EventPair,
349        /// The Starnix container suspend lock. Keep it alive until no more
350        /// work is necessary.
351        message_counter: SharedMessageCounter,
352    },
353    /// Install a timeline change monitor
354    MonitorUtc { timer: HrTimerHandle, counter: zx::Counter, recv: mpsc::UnboundedReceiver<bool> },
355}
356
357// Increments `counter` every time the UTC timeline changes.
358//
359// This counter is shared with UTC timers to provide UTC timeline change notification.
360//
361// Use cases:
362//
363// 1. Counter's value counts the number of times the UTC timeline changed, which is used in timer
364//    `read` calls to report the number of encountered changes, as required by `read`.
365//
366// 2. Counter's `COUNTER_POSITIVE` signal is used in `wait_async` calls on timers, as Starnix must
367//    wake such timers whenever a timeline change happens. The counter reader must reset the
368//    counter to zero after reading its value to allow for a next wake.
369//
370// Other primitives are not appropriate to use here: an Event does not remember how many times it
371// has been signaled, so does not fulfill (1). Atomics don't generate a signal on increment, so
372// don't satisfy (2). Conversely, the `wait_async` machinery on timers can already deal with
373// HandleBased objects, so a Counter can be readily used there.
374async fn run_utc_timeline_monitor(counter: zx::Counter, recv: mpsc::UnboundedReceiver<bool>) {
375    let utc_handle = crate::time::utc::duplicate_real_utc_clock_handle().inspect_err(|err| {
376        log_error!("run_utc_timeline_monitor: could not monitor UTC timeline: {err:?}")
377    });
378    if let Ok(utc_handle) = utc_handle {
379        run_utc_timeline_monitor_internal(counter, recv, utc_handle).await;
380    }
381}
382
383// See `run_utc_timeline_monitor`.
384// `utc_handle_fn` is useful for injecting in tests.
385async fn run_utc_timeline_monitor_internal(
386    counter: zx::Counter,
387    mut recv: mpsc::UnboundedReceiver<bool>,
388    utc_handle: UtcClock,
389) {
390    log_debug!("run_utc_timeline_monitor: monitoring UTC clock timeline changes: enter");
391    let koid = utc_handle.koid();
392    log_debug!(
393        "run_utc_timeline_monitor: monitoring UTC clock timeline: enter: UTC clock koid={koid:?}, counter={counter:?}"
394    );
395    let utc_handle = std::rc::Rc::new(utc_handle);
396    let utc_handle_fn = || utc_handle.clone();
397    let mut interested = false;
398    loop {
399        let utc_handle = utc_handle_fn();
400        // CLOCK_UPDATED is auto-cleared.
401        let mut updated_fut =
402            fasync::OnSignals::new(utc_handle.as_handle_ref(), zx::Signals::CLOCK_UPDATED).fuse();
403        let mut interest_fut = recv.next();
404
405        // Note: all select! branches must allow for exit when their respective futures are
406        // used up.
407        select! {
408            result = updated_fut => {
409                if result.is_err() {
410                    log_warn!("run_utc_timeline_monitor: could not wait on signals: {:?}, counter={counter:?}", result);
411                    break;
412                }
413                if interested {
414                    log_debug!("run_utc_timeline_monitor: UTC timeline updated, counter: {counter:?}");
415                    // The consumer of this `counter` should wait for COUNTER_POSITIVE, and
416                    // once it observes the value of the counter, subtract the read value from
417                    // counter.
418                    counter
419                        .add(1)
420                        // Ignore the error after logging it. Should we exit the loop here?
421                        .inspect_err(|err| {
422                            log_error!("run_utc_timeline_monitor: could not increment counter: {err:?}")
423                        })
424                        .unwrap_or(());
425                }
426            },
427            result = interest_fut => {
428                match result {
429                    Some(interest) => {
430                        log_debug!("interest change: {counter:?}, interest: {interest:?}");
431                        interested = interest;
432                    }
433                    None => {
434                        log_debug!("no longer needs counter monitoring: {counter:?}");
435                        break;
436                    }
437                }
438            },
439        };
440    }
441    log_debug!("run_utc_timeline_monitor: monitoring UTC clock timeline changes: exit");
442}
443
444/// The manager for high-resolution timers.
445///
446/// This manager is responsible for creating and managing high-resolution timers.
447pub struct HrTimerManager {
448    state: Mutex<HrTimerManagerState>,
449
450    /// The channel sender that notifies the worker thread that HrTimer driver needs to be
451    /// (re)started with a new deadline.
452    start_next_sender: OnceLock<UnboundedSender<Cmd>>,
453}
454pub type HrTimerManagerHandle = Arc<HrTimerManager>;
455
456impl HrTimerManager {
457    pub fn new(parent_node: &fuchsia_inspect::Node) -> HrTimerManagerHandle {
458        let inspect_node = parent_node.create_child("hr_timer_manager");
459        let new_manager = Arc::new(Self {
460            state: Mutex::new(HrTimerManagerState::new(&inspect_node)),
461            start_next_sender: Default::default(),
462        });
463        let manager_weak = Arc::downgrade(&new_manager);
464
465        // Create a lazy inspect node to get HrTimerManager info at read-time.
466        inspect_node.record_lazy_child("hr_timer_manager", move || {
467            let manager_ref = manager_weak.upgrade().expect("inner HrTimerManager");
468            async move {
469                // This gets the clock value directly from the kernel, it is not subject
470                // to the local runner's clock.
471                let now = zx::BootInstant::get();
472
473                let inspector = fuchsia_inspect::Inspector::default();
474                inspector.root().record_int("now_ns", now.into_nanos());
475
476                let (
477                    timers,
478                    pending_timers_count,
479                    message_counter,
480                    loop_started,
481                    loop_completed,
482                    debug_start_stage_counter,
483                ) = {
484                    let guard = manager_ref.lock();
485                    (
486                        guard
487                            .pending_timers
488                            .iter()
489                            .map(|(k, v)| (*k, v.deadline))
490                            .collect::<Vec<_>>(),
491                        guard.get_pending_timers_count(),
492                        guard.message_counter.as_ref().map(|c| c.to_string()).unwrap_or_default(),
493                        guard.last_loop_started_timestamp,
494                        guard.last_loop_completed_timestamp,
495                        guard.debug_start_stage_counter,
496                    )
497                };
498                inspector.root().record_uint("pending_timers_count", pending_timers_count as u64);
499                inspector.root().record_string("message_counter", message_counter);
500
501                // These are the deadlines we are currently waiting for. The format is:
502                // `alarm koid` -> `deadline nanos` (remains: `duration until alarm nanos`)
503                let deadlines = inspector.root().create_string_array("timers", timers.len());
504                for (i, (k, v)) in timers.into_iter().enumerate() {
505                    let remaining = v.estimate_boot().unwrap() - now;
506                    deadlines.set(
507                        i,
508                        format!(
509                            "{k:?} -> {v} ns (remains: {})",
510                            time_pretty::format_duration(remaining)
511                        ),
512                    );
513                }
514                inspector.root().record(deadlines);
515
516                inspector.root().record_int("last_loop_started_at_ns", loop_started.into_nanos());
517                inspector
518                    .root()
519                    .record_int("last_loop_completed_at_ns", loop_completed.into_nanos());
520                inspector
521                    .root()
522                    .record_uint("debug_start_stage_counter", debug_start_stage_counter);
523
524                {
525                    let guard = manager_ref.lock();
526                    guard.record_events(inspector.root());
527                }
528
529                Ok(inspector)
530            }
531            .boxed()
532        });
533        parent_node.record(inspect_node);
534        new_manager
535    }
536
537    /// Get a copy of a sender channel used for passing async command to the
538    /// event processing loop.
539    fn get_sender(&self) -> UnboundedSender<Cmd> {
540        self.start_next_sender.get().expect("start_next_sender is initialized").clone()
541    }
542
543    /// Returns the counter that tallies the timeline changes of the UTC timeline.
544    ///
545    /// # Args
546    /// - `timer`: the handle of the timer that needs monitoring of timeline changes.
547    pub fn get_timeline_change_observer(
548        &self,
549        timer: &HrTimerHandle,
550    ) -> Result<TimelineChangeObserver, Errno> {
551        let timer_id = timer.get_id();
552        let counter = zx::Counter::create();
553        let counter_clone = duplicate_handle(&counter).map_err(|err| {
554            log_error!("could not duplicate handle: {err:?}");
555            errno!(EINVAL, format!("could not duplicate handle: {err}, {timer_id:?}"))
556        })?;
557        let (send, recv) = mpsc::unbounded();
558        self.get_sender()
559            .unbounded_send(Cmd::MonitorUtc { timer: timer.clone(), counter, recv })
560            .map_err(|err| {
561            log_error!("could not send: {err:?}");
562            errno!(EINVAL, format!("could not send Cmd::Monitor: {err}, {timer_id:?}"))
563        })?;
564        Ok(TimelineChangeObserver::new(counter_clone, send))
565    }
566
567    /// Initialize the [HrTimerManager] in the context of the current system task.
568    pub fn init(self: &HrTimerManagerHandle, system_task: &CurrentTask) -> Result<(), Errno> {
569        self.init_internal(
570            system_task,
571            /*wake_channel_for_test=*/ None,
572            /*message_counter_for_test=*/ None,
573        )
574    }
575
576    // Call this init for testing instead of the one above.
577    fn init_internal(
578        self: &HrTimerManagerHandle,
579        system_task: &CurrentTask,
580        // Can be injected for testing.
581        wake_channel_for_test: Option<zx::Channel>,
582        // Can be injected for testing.
583        message_counter_for_test: Option<zx::Counter>,
584    ) -> Result<(), Errno> {
585        let (start_next_sender, start_next_receiver) = mpsc::unbounded();
586        self.start_next_sender.set(start_next_sender).map_err(|_| errno!(EEXIST))?;
587
588        let self_ref = self.clone();
589
590        // Ensure that all internal init has completed in `watch_new_hrtimer_loop`
591        // before proceeding from here.
592        let setup_done = zx::Event::create();
593        let setup_done_clone = duplicate_handle(&setup_done)?;
594
595        let closure = async move |locked_and_task: LockedAndTask<'_>| {
596            let current_thread = std::thread::current();
597            // Helps find the thread in backtraces, see wait_signaled_sync.
598            log_info!(
599                "hr_timer_manager thread: {:?} ({:?})",
600                current_thread.name(),
601                current_thread.id()
602            );
603            if let Err(e) = self_ref
604                .watch_new_hrtimer_loop(
605                    locked_and_task.current_task(),
606                    start_next_receiver,
607                    wake_channel_for_test,
608                    message_counter_for_test,
609                    Some(setup_done_clone),
610                )
611                .await
612            {
613                log_error!("while running watch_new_hrtimer_loop: {e:?}");
614            }
615            log_warn!("hr_timer_manager: finished kernel thread. should never happen in prod code");
616        };
617        let req = SpawnRequestBuilder::new()
618            .with_debug_name("hr-timer-manager")
619            .with_async_closure(closure)
620            .build();
621        system_task.kernel().kthreads.spawner().spawn_from_request(req);
622        log_info!("hr_timer_manager: waiting on setup done");
623        wait_signaled_sync(&setup_done)
624            .to_result()
625            .map_err(|status| from_status_like_fdio!(status))?;
626        log_info!("hr_timer_manager: setup done");
627
628        Ok(())
629    }
630
631    // Notifies `timer` and wake sources about a triggered alarm.
632    fn notify_timer(
633        self: &HrTimerManagerHandle,
634        system_task: &CurrentTask,
635        timer: &HrTimerNode,
636        lease: impl HandleBased,
637    ) -> Result<()> {
638        let timer_id = timer.hr_timer.get_id();
639        log_debug!("watch_new_hrtimer_loop: Cmd::Alarm: triggered alarm: {:?}", timer_id);
640        ftrace::duration!("alarms", "starnix:hrtimer:notify_timer", "timer_id" => timer_id);
641        self.lock().pending_timers.remove(&timer_id).map(|s| s.task.detach());
642        signal_event(&timer.hr_timer.event(), zx::Signals::NONE, zx::Signals::TIMER_SIGNALED)
643            .context("notify_timer: hrtimer signal handle")?;
644
645        // Handle wake source here.
646        let wake_source = timer.wake_source.clone();
647        if let Some(wake_source) = wake_source.as_ref().and_then(|f| f.upgrade()) {
648            let lease_token = lease.into_handle();
649            wake_source.on_wake(system_task, &lease_token);
650            // Drop the baton lease after wake leases in associated epfd
651            // are activated.
652            drop(lease_token);
653        }
654        ftrace::instant!("alarms", "starnix:hrtimer:notify_timer:drop_lease", ftrace::Scope::Process, "timer_id" => timer_id);
655        Ok(())
656    }
657
658    // If no counter has been injected for tests, set provided `counter` to serve as that
659    // counter. Used to inject a fake counter in tests.
660    fn inject_or_set_message_counter(
661        self: &HrTimerManagerHandle,
662        message_counter: OwnedMessageCounterHandle,
663    ) {
664        let mut guard = self.lock();
665        if guard.message_counter.is_none() {
666            guard.message_counter = Some(message_counter);
667        }
668    }
669
670    fn record_inspect_on_stop(
671        self: &HrTimerManagerHandle,
672        guard: &mut MutexGuard<'_, HrTimerManagerState>,
673        prev_len: usize,
674    ) {
675        let after_len = guard.get_pending_timers_count();
676        let inspect_event_type = if after_len == prev_len {
677            None
678        } else if after_len == prev_len - 1 {
679            Some(InspectHrTimerEvent::Remove)
680        } else {
681            Some(InspectHrTimerEvent::retain_err(prev_len, after_len, "removing timer"))
682        };
683        if let Some(inspect_event_type) = inspect_event_type {
684            self.record_event(guard, inspect_event_type, None);
685        }
686    }
687
688    fn record_inspect_on_start(
689        self: &HrTimerManagerHandle,
690        guard: &mut MutexGuard<'_, HrTimerManagerState>,
691        timer_id: zx::Koid,
692        task: fasync::Task<()>,
693        deadline: TargetTime,
694        prev_len: usize,
695    ) {
696        guard
697            .pending_timers
698            .insert(timer_id, TimerState { task, deadline })
699            .map(|timer_state| {
700                // This should not happen, at this point we already canceled
701                // any previous instances of the same wake alarm.
702                log_debug!(
703                    "watch_new_hrtimer_loop: removing timer task in Cmd::Start: {:?}",
704                    timer_state
705                );
706                timer_state
707            })
708            .map(|v| v.task.detach());
709
710        // Record the inspect event
711        let after_len = guard.get_pending_timers_count();
712        let inspect_event_type = if after_len == prev_len {
713            InspectHrTimerEvent::Update
714        } else if after_len == prev_len + 1 {
715            InspectHrTimerEvent::Add
716        } else {
717            InspectHrTimerEvent::retain_err(prev_len, after_len, "adding timer")
718        };
719        self.record_event(guard, inspect_event_type, Some(deadline));
720    }
721
722    /// Timer handler loop.
723    ///
724    /// # Args:
725    /// - `wake_channel_for_test`: a channel implementing `fuchsia.time.alarms/Wake`
726    ///   injected by tests only.
727    /// - `message_counter_for_test`: a zx::Counter injected only by tests, to
728    ///   emulate the wake proxy message counter.
729    /// - `setup_done`: signaled once the initial loop setup is complete. Allows
730    ///   pausing any async callers until this loop is in a runnable state.
731    async fn watch_new_hrtimer_loop(
732        self: &HrTimerManagerHandle,
733        system_task: &CurrentTask,
734        mut start_next_receiver: UnboundedReceiver<Cmd>,
735        mut wake_channel_for_test: Option<zx::Channel>,
736        message_counter_for_test: Option<zx::Counter>,
737        setup_done: Option<zx::Event>,
738    ) -> Result<()> {
739        // The values assigned to the counter are arbitrary, but unique for each assignment.
740        // This should give us hints as to where any deadlocks might occur if they do.
741        // We also take stack traces, but stack traces do not capture async stacks presently.
742        self.lock().debug_start_stage_counter = 1005;
743        ftrace::instant!("alarms", "watch_new_hrtimer_loop:init", ftrace::Scope::Process);
744        defer! {
745            log_warn!("watch_new_hrtimer_loop: exiting. This should only happen in tests.");
746        }
747
748        let wake_channel = wake_channel_for_test.take().unwrap_or_else(|| {
749            connect_to_wake_alarms_async().expect("connection to wake alarms async proxy")
750        });
751        self.lock().debug_start_stage_counter = 1004;
752
753        let counter_name = "wake-alarms";
754        let (device_channel, counter) = if let Some(message_counter) = message_counter_for_test {
755            // For tests only.
756            (wake_channel, message_counter)
757        } else {
758            create_proxy_for_wake_events_counter_zero(wake_channel, counter_name.to_string())
759        };
760        self.lock().debug_start_stage_counter = 1003;
761        let message_counter = system_task
762            .kernel()
763            .suspend_resume_manager
764            .add_message_counter(counter_name, Some(counter));
765        self.inject_or_set_message_counter(message_counter.clone());
766        setup_done
767            .as_ref()
768            .map(|e| signal_event(e, zx::Signals::NONE, zx::Signals::EVENT_SIGNALED));
769
770        self.lock().debug_start_stage_counter = 1002;
771        let device_async_proxy =
772            fta::WakeAlarmsProxy::new(fidl::AsyncChannel::from_channel(device_channel));
773
774        // Contains suspend locks for interval (periodic) timers that expired, but have not been
775        // rescheduled yet. This allows us to defer container suspend until all such timers have
776        // been rescheduled.
777        // TODO: b/418813184 - Remove in favor of Fuchsia-specific interval timer support
778        // once it is available.
779        let mut interval_timers_pending_reschedule: HashMap<zx::Koid, SharedMessageCounter> =
780            HashMap::new();
781
782        // Per timer tasks.
783        let mut task_by_timer_id: HashMap<zx::Koid, fasync::Task<()>> = HashMap::new();
784
785        self.lock().debug_start_stage_counter = 1001;
786        ftrace::instant!("alarms", "watch_new_hrtimer_loop:init_done", ftrace::Scope::Process);
787        while let Some(cmd) = start_next_receiver.next().await {
788            {
789                let mut guard = self.lock();
790                guard.debug_start_stage_counter = 1002;
791                guard.last_loop_started_timestamp = zx::BootInstant::get();
792            }
793            ftrace::duration!("alarms", "start_next_receiver:loop");
794
795            log_debug!("watch_new_hrtimer_loop: got command: {cmd:?}");
796            self.lock().debug_start_stage_counter = 0;
797            match cmd {
798                // A new timer needs to be started.  The timer node for the timer
799                // is provided, and `done` must be signaled once the setup is
800                // complete.
801                Cmd::Start { new_timer_node, done, message_counter } => {
802                    self.lock().debug_start_stage_counter = 1;
803                    defer! {
804                        // Allow add_timer to proceed once command processing is done.
805                        signal_event(&done, zx::Signals::NONE, zx::Signals::EVENT_SIGNALED).map_err(|err| to_errno_with_log(err)).expect("event can be signaled");
806                    }
807
808                    let hr_timer = &new_timer_node.hr_timer;
809                    let timer_id = hr_timer.get_id();
810                    let wake_alarm_id = hr_timer.wake_alarm_id();
811                    let trace_id = hr_timer.trace_id();
812                    log_debug!(
813                        "watch_new_hrtimer_loop: Cmd::Start: timer_id: {:?}, wake_alarm_id: {}",
814                        timer_id,
815                        wake_alarm_id
816                    );
817                    ftrace::duration!("alarms", "starnix:hrtimer:start", "timer_id" => timer_id);
818                    ftrace::flow_begin!("alarms", "hrtimer_lifecycle", trace_id);
819
820                    self.lock().debug_start_stage_counter = 2;
821                    let maybe_cancel = self.lock().pending_timers.remove(&timer_id);
822                    log_long_op!(cancel_by_id(
823                        &message_counter,
824                        maybe_cancel,
825                        &timer_id,
826                        &device_async_proxy,
827                        &mut interval_timers_pending_reschedule,
828                        &mut task_by_timer_id,
829                        &wake_alarm_id,
830                    ));
831                    ftrace::instant!("alarms", "starnix:hrtimer:cancel_pre_start", ftrace::Scope::Process, "timer_id" => timer_id);
832
833                    // Signaled when the timer completed setup. We can not forward `done` because
834                    // we have post-schedule work as well.
835                    let setup_event = zx::Event::create();
836                    let deadline = new_timer_node.deadline;
837
838                    ftrace::duration!("alarms", "starnix:hrtimer:signaled", "timer_id" => timer_id);
839
840                    self.lock().debug_start_stage_counter = 3;
841                    // Make a request here. Move it into the closure after. Current FIDL semantics
842                    // ensure that even though we do not `.await` on this future, a request to
843                    // schedule a wake alarm based on this timer will be sent.
844                    let request_fut = match deadline {
845                        TargetTime::Monotonic(_) => {
846                            // If we hit this, it's a Starnix bug.
847                            panic!("can not schedule wake alarm on monotonic timeline")
848                        }
849                        TargetTime::BootInstant(boot_instant) => device_async_proxy.set_and_wait(
850                            boot_instant,
851                            fta::SetMode::NotifySetupDone(duplicate_handle(&setup_event)?),
852                            &wake_alarm_id,
853                        ),
854                        TargetTime::RealTime(utc_instant) => device_async_proxy.set_and_wait_utc(
855                            &fta::InstantUtc { timestamp_utc: utc_instant.into_nanos() },
856                            fta::SetMode::NotifySetupDone(duplicate_handle(&setup_event)?),
857                            &wake_alarm_id,
858                        ),
859                    };
860                    let mut done_sender = self.get_sender();
861                    let prev_len = self.lock().get_pending_timers_count();
862
863                    self.lock().debug_start_stage_counter = 4;
864                    let self_clone = self.clone();
865                    let task = fasync::Task::local(async move {
866                        log_debug!(
867                            "wake_alarm_future: set_and_wait will block here: {wake_alarm_id:?}"
868                        );
869                        ftrace::instant!("alarms", "starnix:hrtimer:wait", ftrace::Scope::Process, "timer_id" => timer_id);
870                        ftrace::flow_step!("alarms", "hrtimer_lifecycle", trace_id);
871
872                        // Wait for this timer to expire. This wait can be arbitrarily long.
873                        let response = request_fut.await;
874
875                        // The counter was already incremented by the wake proxy when the alarm fired.
876                        let message_counter = self_clone.lock().share_message_counter(false);
877                        ftrace::instant!("alarms", "starnix:hrtimer:wake", ftrace::Scope::Process, "timer_id" => timer_id);
878
879                        log_debug!("wake_alarm_future: set_and_wait over: {:?}", response);
880                        match response {
881                            // Alarm.  This must be processed in the main loop because notification
882                            // requires access to &CurrentTask, which is not available here. So we
883                            // only forward it.
884                            Ok(Ok(lease)) => {
885                                log_long_op!(done_sender.send(Cmd::Alarm {
886                                    new_timer_node,
887                                    lease,
888                                    message_counter
889                                }))
890                                .expect("infallible");
891                            }
892                            Ok(Err(error)) => {
893                                ftrace::duration!("alarms", "starnix:hrtimer:wake_error", "timer_id" => timer_id);
894                                log_debug!(
895                                    "wake_alarm_future: protocol error: {error:?}: timer_id: {timer_id:?}"
896                                );
897                                let mut guard = self_clone.lock();
898                                let pending = &mut guard.pending_timers;
899                                process_alarm_protocol_error(pending, &timer_id, error);
900                            }
901                            Err(error) => {
902                                ftrace::duration!("alarms", "starnix:hrtimer:fidl_error", "timer_id" => timer_id);
903                                log_debug!(
904                                    "wake_alarm_future: FIDL error: {error:?}: timer_id: {timer_id:?}"
905                                );
906                                self_clone.lock().pending_timers.remove(&timer_id);
907                            }
908                        }
909                        log_debug!("wake_alarm_future: closure done for timer_id: {timer_id:?}");
910                    });
911                    self.lock().debug_start_stage_counter = 5;
912                    ftrace::instant!("alarms", "starnix:hrtimer:pre_setup_event_signal", ftrace::Scope::Process, "timer_id" => timer_id);
913
914                    // This should be almost instantaneous.  Blocking for a long time here is a
915                    // bug.
916                    log_long_op!(wait_signaled(&setup_event)).map_err(|e| to_errno_with_log(e))?;
917                    ftrace::instant!("alarms", "starnix:hrtimer:setup_event_signaled", ftrace::Scope::Process, "timer_id" => timer_id);
918                    let mut guard = self.lock();
919                    guard.debug_start_stage_counter = 6;
920                    self.record_inspect_on_start(&mut guard, timer_id, task, deadline, prev_len);
921                    log_debug!("Cmd::Start scheduled: timer_id: {:?}", timer_id);
922                    guard.debug_start_stage_counter = 999;
923                }
924                Cmd::Alarm { new_timer_node, lease, message_counter } => {
925                    self.lock().debug_start_stage_counter = 10;
926                    let timer = &new_timer_node.hr_timer;
927                    let timer_id = timer.get_id();
928                    ftrace::duration!("alarms", "starnix:hrtimer:alarm", "timer_id" => timer_id);
929                    ftrace::flow_step!("alarms", "hrtimer_lifecycle", timer.trace_id());
930                    self.notify_timer(system_task, &new_timer_node, lease)
931                        .map_err(|e| to_errno_with_log(e))?;
932
933                    // Interval timers currently need special handling: we must not suspend the
934                    // container until the interval timer in question gets re-scheduled. To
935                    // ensure that we stay awake, we store the suspend lock for a while. This
936                    // prevents container suspend.
937                    //
938                    // This map entry and its MessageCounterHandle is removed in one of the following cases:
939                    //
940                    // (1) When the interval timer eventually gets rescheduled. We
941                    // assume that for interval timers the reschedule will be imminent and that
942                    // therefore not suspending until that re-schedule happens will not unreasonably
943                    // extend the awake period.
944                    //
945                    // (2) When the timer is canceled.
946                    if *timer.is_interval.lock() {
947                        interval_timers_pending_reschedule.insert(timer_id, message_counter);
948                    }
949                    log_debug!("Cmd::Alarm done: timer_id: {timer_id:?}");
950                    self.lock().debug_start_stage_counter = 19;
951                }
952                Cmd::Stop { timer, done, message_counter } => {
953                    self.lock().debug_start_stage_counter = 20;
954                    defer! {
955                        signal_event(&done, zx::Signals::NONE, zx::Signals::EVENT_SIGNALED).expect("can signal");
956                    }
957                    let timer_id = timer.get_id();
958                    log_debug!("watch_new_hrtimer_loop: Cmd::Stop: timer_id: {:?}", timer_id);
959                    ftrace::duration!("alarms", "starnix:hrtimer:stop", "timer_id" => timer_id);
960                    ftrace::flow_step!("alarms", "hrtimer_lifecycle", timer.trace_id());
961
962                    let (maybe_cancel, prev_len) = {
963                        let mut guard = self.lock();
964                        let prev_len = guard.get_pending_timers_count();
965                        (guard.pending_timers.remove(&timer_id), prev_len)
966                    };
967
968                    let wake_alarm_id = timer.wake_alarm_id();
969                    log_long_op!(cancel_by_id(
970                        &message_counter,
971                        maybe_cancel,
972                        &timer_id,
973                        &device_async_proxy,
974                        &mut interval_timers_pending_reschedule,
975                        &mut task_by_timer_id,
976                        &wake_alarm_id,
977                    ));
978                    ftrace::instant!("alarms", "starnix:hrtimer:cancel_at_stop", ftrace::Scope::Process, "timer_id" => timer_id);
979
980                    {
981                        let mut guard = self.lock();
982                        self.record_inspect_on_stop(&mut guard, prev_len);
983                    }
984                    log_debug!("Cmd::Stop done: {timer_id:?}");
985                    self.lock().debug_start_stage_counter = 29;
986                }
987                Cmd::MonitorUtc { timer, counter, recv } => {
988                    self.lock().debug_start_stage_counter = 30;
989                    ftrace::duration!("alarms", "starnix:hrtimer:monitor_utc", "timer_id" => timer.get_id());
990                    ftrace::flow_step!("alarms", "hrtimer_lifecycle", timer.trace_id());
991                    let monitor_task = fasync::Task::local(async move {
992                        run_utc_timeline_monitor(counter, recv).await;
993                    });
994                    task_by_timer_id.insert(timer.get_id(), monitor_task);
995                    self.lock().debug_start_stage_counter = 39;
996                }
997            }
998            let mut guard = self.lock();
999            guard.debug_start_stage_counter = 90;
1000
1001            log_debug!(
1002                "watch_new_hrtimer_loop: pending timers count: {}",
1003                guard.pending_timers.len()
1004            );
1005            log_debug!("watch_new_hrtimer_loop: pending timers:       {:?}", guard.pending_timers);
1006            log_debug!(
1007                "watch_new_hrtimer_loop: message counter:      {:?}",
1008                message_counter.to_string(),
1009            );
1010            log_debug!(
1011                "watch_new_hrtimer_loop: interval timers:      {:?}",
1012                interval_timers_pending_reschedule.len(),
1013            );
1014
1015            guard.last_loop_completed_timestamp = zx::BootInstant::get();
1016            guard.debug_start_stage_counter = 99;
1017        } // while
1018
1019        Ok(())
1020    }
1021
1022    fn lock(&self) -> MutexGuard<'_, HrTimerManagerState> {
1023        self.state.lock()
1024    }
1025
1026    fn record_event(
1027        self: &HrTimerManagerHandle,
1028        guard: &mut MutexGuard<'_, HrTimerManagerState>,
1029        event_type: InspectHrTimerEvent,
1030        deadline: Option<TargetTime>,
1031    ) {
1032        if guard.events.len() >= INSPECT_EVENT_BUFFER_SIZE {
1033            guard.events.pop_front();
1034        }
1035        guard.events.push_back(InspectEvent::new(event_type, deadline));
1036    }
1037
1038    /// Add a new timer.
1039    ///
1040    /// A wake alarm is scheduled for the timer.
1041    pub fn add_timer(
1042        self: &HrTimerManagerHandle,
1043        wake_source: Option<Weak<dyn OnWakeOps>>,
1044        new_timer: &HrTimerHandle,
1045        deadline: TargetTime,
1046    ) -> Result<(), Errno> {
1047        log_debug!("add_timer: entry: {new_timer:?}, deadline: {deadline:?}");
1048        ftrace::duration!("alarms", "starnix:add_timer", "deadline" => deadline.estimate_boot().unwrap().into_nanos());
1049        ftrace::flow_step!("alarms", "hrtimer_lifecycle", new_timer.trace_id());
1050
1051        // Keep system awake until timer is scheduled.
1052        let message_counter_until_timer_scheduled = self.lock().share_message_counter(true);
1053
1054        let sender = self.get_sender();
1055        let new_timer_node = HrTimerNode::new(deadline, wake_source, new_timer.clone());
1056        let wake_alarm_scheduled = zx::Event::create();
1057        let wake_alarm_scheduled_clone = duplicate_handle(&wake_alarm_scheduled)?;
1058        let timer_id = new_timer.get_id();
1059        sender
1060            .unbounded_send(Cmd::Start {
1061                new_timer_node,
1062                message_counter: message_counter_until_timer_scheduled,
1063                done: wake_alarm_scheduled_clone,
1064            })
1065            .map_err(|_| errno!(EINVAL, "add_timer: could not send Cmd::Start"))?;
1066
1067        // Block until the wake alarm for this timer is scheduled.
1068        wait_signaled_sync(&wake_alarm_scheduled)
1069            .map_err(|_| errno!(EINVAL, "add_timer: wait_signaled_sync failed"))?;
1070
1071        log_debug!("add_timer: exit : timer_id: {timer_id:?}");
1072        Ok(())
1073    }
1074
1075    /// Remove a timer.
1076    ///
1077    /// The timer is removed if scheduled, nothing is changed if it is not.
1078    pub fn remove_timer(self: &HrTimerManagerHandle, timer: &HrTimerHandle) -> Result<(), Errno> {
1079        log_debug!("remove_timer: entry:  {timer:?}");
1080        ftrace::duration!("alarms", "starnix:remove_timer");
1081        // Keep system awake until timer is removed.
1082        let message_counter_until_removed = self.lock().share_message_counter(true);
1083
1084        let sender = self.get_sender();
1085        let done = zx::Event::create();
1086        let done_clone = duplicate_handle(&done)?;
1087        let timer_id = timer.get_id();
1088        sender
1089            .unbounded_send(Cmd::Stop {
1090                timer: timer.clone(),
1091                message_counter: message_counter_until_removed,
1092                done: done_clone,
1093            })
1094            .map_err(|_| errno!(EINVAL, "remove_timer: could not send Cmd::Stop"))?;
1095
1096        // Block until the alarm for this timer is scheduled.
1097        wait_signaled_sync(&done)
1098            .map_err(|_| errno!(EINVAL, "add_timer: wait_signaled_sync failed"))?;
1099        log_debug!("remove_timer: exit:  {timer_id:?}");
1100        Ok(())
1101    }
1102}
1103
1104#[derive(Debug)]
1105pub struct HrTimer {
1106    event: zx::Event,
1107
1108    /// True iff the timer is currently set to trigger at an interval.
1109    ///
1110    /// This is used to determine at which point the hrtimer event (not
1111    /// `HrTimer::event` but the one that is shared with the actual driver)
1112    /// should be cleared.
1113    ///
1114    /// If this is true, the timer manager will wait to clear the timer event
1115    /// until the next timer request has been sent to the driver. This prevents
1116    /// lost wake ups where the container happens to suspend between two instances
1117    /// of an interval timer triggering.
1118    pub is_interval: Mutex<bool>,
1119}
1120pub type HrTimerHandle = Arc<HrTimer>;
1121
1122impl Drop for HrTimer {
1123    fn drop(&mut self) {
1124        let wake_alarm_id = self.wake_alarm_id();
1125        ftrace::duration!("alarms", "hrtimer::drop", "timer_id" => self.get_id(), "wake_alarm_id" => &wake_alarm_id[..]);
1126        ftrace::flow_end!("alarms", "hrtimer_lifecycle", self.trace_id());
1127    }
1128}
1129
1130impl HrTimer {
1131    pub fn new() -> HrTimerHandle {
1132        let ret = Arc::new(Self { event: zx::Event::create(), is_interval: Mutex::new(false) });
1133        let wake_alarm_id = ret.wake_alarm_id();
1134        ftrace::duration!("alarms", "hrtimer::new", "timer_id" => ret.get_id(), "wake_alarm_id" => &wake_alarm_id[..]);
1135        ftrace::flow_begin!("alarms", "hrtimer_lifecycle", ret.trace_id(), "wake_alarm_id" => &wake_alarm_id[..]);
1136        ret
1137    }
1138
1139    pub fn event(&self) -> zx::Event {
1140        self.event
1141            .duplicate_handle(zx::Rights::SAME_RIGHTS)
1142            .expect("Duplicate hrtimer event handle")
1143    }
1144
1145    /// Returns the unique identifier of this [HrTimer].
1146    ///
1147    /// All holders of the same [HrTimerHandle] will see the same value here.
1148    pub fn get_id(&self) -> zx::Koid {
1149        self.event.koid().expect("infallible")
1150    }
1151
1152    /// Returns the unique alarm ID for this [HrTimer].
1153    ///
1154    /// The naming pattern is: `starnix:Koid(NNNNN):iB`, where `NNNNN` is a koid
1155    /// and B is `1` if the timer is an interval timer, or `0` otherwise.
1156    fn wake_alarm_id(&self) -> String {
1157        let i = if *self.is_interval.lock() { "i1" } else { "i0" };
1158        let koid = self.get_id();
1159        format!("starnix:{koid:?}:{i}")
1160    }
1161
1162    fn trace_id(&self) -> ftrace::Id {
1163        self.get_id().raw_koid().into()
1164    }
1165}
1166
1167impl TimerOps for HrTimerHandle {
1168    fn start(
1169        &self,
1170        current_task: &CurrentTask,
1171        source: Option<Weak<dyn OnWakeOps>>,
1172        deadline: TargetTime,
1173    ) -> Result<(), Errno> {
1174        // Before (re)starting the timer, ensure the signal is cleared.
1175        signal_event(&self.event, zx::Signals::TIMER_SIGNALED, zx::Signals::NONE)
1176            .map_err(|status| from_status_like_fdio!(status))?;
1177        current_task.kernel().hrtimer_manager.add_timer(
1178            source,
1179            self,
1180            deadline.into_resolved_utc_deadline(),
1181        )?;
1182        Ok(())
1183    }
1184
1185    fn stop(&self, kernel: &Arc<Kernel>) -> Result<(), Errno> {
1186        // Clear the signal when removing the hrtimer.
1187        signal_event(&self.event, zx::Signals::TIMER_SIGNALED, zx::Signals::NONE)
1188            .map_err(|status| from_status_like_fdio!(status))?;
1189        Ok(kernel.hrtimer_manager.remove_timer(self)?)
1190    }
1191
1192    fn as_handle_ref(&self) -> HandleRef<'_> {
1193        self.event.as_handle_ref()
1194    }
1195
1196    fn get_timeline_change_observer(
1197        &self,
1198        current_task: &CurrentTask,
1199    ) -> Option<TimelineChangeObserver> {
1200        // Should this return errno instead?
1201        current_task
1202            .kernel()
1203            .hrtimer_manager
1204            .get_timeline_change_observer(self)
1205            .inspect_err(|err| {
1206                log_error!("hr_timer_manager: could not create timeline change counter: {err:?}")
1207            })
1208            .ok()
1209    }
1210}
1211
1212/// Represents a node of `HrTimer`.
1213#[derive(Clone, Debug)]
1214struct HrTimerNode {
1215    /// The deadline of the associated `HrTimer`.
1216    deadline: TargetTime,
1217
1218    /// The source where initiated this `HrTimer`.
1219    ///
1220    /// When the timer expires, the system will be woken up if necessary. The `on_wake` callback
1221    /// will be triggered with a baton lease to prevent further suspend while Starnix handling the
1222    /// wake event.
1223    wake_source: Option<Weak<dyn OnWakeOps>>,
1224
1225    /// The underlying HrTimer.
1226    hr_timer: HrTimerHandle,
1227}
1228
1229impl HrTimerNode {
1230    fn new(
1231        deadline: TargetTime,
1232        wake_source: Option<Weak<dyn OnWakeOps>>,
1233        hr_timer: HrTimerHandle,
1234    ) -> Self {
1235        Self { deadline, wake_source, hr_timer }
1236    }
1237}
1238
1239#[cfg(test)]
1240mod tests {
1241    use super::*;
1242    use crate::testing::spawn_kernel_and_run;
1243    use crate::time::HrTimer;
1244    use fake_wake_alarms::{MAGIC_EXPIRE_DEADLINE, Response, serve_fake_wake_alarms};
1245    use fuchsia_runtime::{UtcClockUpdate, UtcInstant};
1246    use std::sync::LazyLock;
1247    use std::thread;
1248    use {fidl_fuchsia_time_alarms as fta, fuchsia_async as fasync};
1249
1250    static CLOCK_OPTS: LazyLock<zx::ClockOpts> = LazyLock::new(zx::ClockOpts::empty);
1251    const BACKSTOP_TIME: UtcInstant = UtcInstant::from_nanos(/*arbitrary*/ 222222);
1252
1253    fn create_utc_clock_for_test() -> UtcClock {
1254        let clock = UtcClock::create(*CLOCK_OPTS, Some(BACKSTOP_TIME)).unwrap();
1255        clock.update(UtcClockUpdate::builder().approximate_value(BACKSTOP_TIME)).unwrap();
1256        clock
1257    }
1258
1259    impl HrTimerManagerState {
1260        fn new_for_test() -> Self {
1261            Self {
1262                events: VecDeque::with_capacity(INSPECT_EVENT_BUFFER_SIZE),
1263                pending_timers: Default::default(),
1264                message_counter: None,
1265                last_loop_started_timestamp: zx::BootInstant::INFINITE_PAST,
1266                last_loop_completed_timestamp: zx::BootInstant::INFINITE_PAST,
1267                debug_start_stage_counter: 0,
1268            }
1269        }
1270    }
1271
1272    // Injected for testing.
1273    fn connect_factory(message_counter: zx::Counter, response_type: Response) -> zx::Channel {
1274        let (client, server) = zx::Channel::create();
1275
1276        // A separate thread is needed to allow independent execution of the server.
1277        let _detached = thread::spawn(move || {
1278            fasync::LocalExecutor::default().run_singlethreaded(async move {
1279                let stream =
1280                    fidl::endpoints::ServerEnd::<fta::WakeAlarmsMarker>::new(server).into_stream();
1281                serve_fake_wake_alarms(message_counter, response_type, stream, /*once*/ false)
1282                    .await;
1283            });
1284        });
1285        client
1286    }
1287
1288    // Initializes HrTimerManager for tests.
1289    //
1290    // # Returns
1291    //
1292    // A tuple of:
1293    // - `HrTimerManagerHandle` the unit under test
1294    // - `zx::Counter` a message counter to use in tests to observe suspend state
1295    fn init_hr_timer_manager(
1296        current_task: &CurrentTask,
1297        response_type: Response,
1298    ) -> (HrTimerManagerHandle, zx::Counter) {
1299        let manager = Arc::new(HrTimerManager {
1300            state: Mutex::new(HrTimerManagerState::new_for_test()),
1301            start_next_sender: Default::default(),
1302        });
1303        let counter = zx::Counter::create();
1304        let counter_clone = duplicate_handle(&counter).unwrap();
1305        let wake_channel = connect_factory(counter_clone, response_type);
1306        let counter_clone = duplicate_handle(&counter).unwrap();
1307        manager
1308            .init_internal(&current_task, Some(wake_channel), Some(counter_clone))
1309            .expect("infallible");
1310        (manager, counter)
1311    }
1312
1313    #[fuchsia::test]
1314    async fn test_triggering() {
1315        spawn_kernel_and_run(async |_, current_task| {
1316            let (manager, counter) = init_hr_timer_manager(current_task, Response::Immediate);
1317
1318            let timer1 = HrTimer::new();
1319            let timer2 = HrTimer::new();
1320            let timer3 = HrTimer::new();
1321
1322            manager.add_timer(None, &timer1, zx::BootInstant::from_nanos(1).into()).unwrap();
1323            manager.add_timer(None, &timer2, zx::BootInstant::from_nanos(2).into()).unwrap();
1324            manager.add_timer(None, &timer3, zx::BootInstant::from_nanos(3).into()).unwrap();
1325
1326            wait_signaled_sync(&timer1.event()).to_result().unwrap();
1327            wait_signaled_sync(&timer2.event()).to_result().unwrap();
1328            wait_signaled_sync(&timer3.event()).to_result().unwrap();
1329
1330            assert_eq!(
1331                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1332                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1333            );
1334        })
1335        .await;
1336    }
1337
1338    #[fuchsia::test]
1339    async fn test_triggering_utc() {
1340        spawn_kernel_and_run(async |_, current_task| {
1341            let (manager, counter) = init_hr_timer_manager(current_task, Response::Immediate);
1342
1343            let timer1 = HrTimer::new();
1344            let timer2 = HrTimer::new();
1345            let timer3 = HrTimer::new();
1346
1347            // All these are normally already expired as scheduled.
1348            manager.add_timer(None, &timer1, UtcInstant::from_nanos(1).into()).unwrap();
1349            manager.add_timer(None, &timer2, UtcInstant::from_nanos(2).into()).unwrap();
1350            manager.add_timer(None, &timer3, UtcInstant::from_nanos(3).into()).unwrap();
1351
1352            wait_signaled_sync(&timer1.event()).to_result().unwrap();
1353            wait_signaled_sync(&timer2.event()).to_result().unwrap();
1354            wait_signaled_sync(&timer3.event()).to_result().unwrap();
1355
1356            assert_eq!(
1357                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1358                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1359            );
1360        })
1361        .await;
1362    }
1363
1364    #[fuchsia::test]
1365    async fn test_delayed_response() {
1366        spawn_kernel_and_run(async |_, current_task| {
1367            let (manager, counter) = init_hr_timer_manager(current_task, Response::Immediate);
1368
1369            let timer = HrTimer::new();
1370
1371            manager.add_timer(None, &timer, zx::BootInstant::from_nanos(1).into()).unwrap();
1372
1373            assert_eq!(
1374                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1375                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1376            );
1377        })
1378        .await;
1379    }
1380
1381    #[fuchsia::test]
1382    async fn test_protocol_error_response() {
1383        spawn_kernel_and_run(async |_, current_task| {
1384            let (manager, counter) = init_hr_timer_manager(current_task, Response::Error);
1385
1386            let timer = HrTimer::new();
1387            manager.add_timer(None, &timer, zx::BootInstant::from_nanos(1).into()).unwrap();
1388            assert_eq!(
1389                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1390                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1391            );
1392        })
1393        .await;
1394    }
1395
1396    #[fuchsia::test]
1397    async fn reschedule_same_timer() {
1398        spawn_kernel_and_run(async |_, current_task| {
1399            let (manager, counter) = init_hr_timer_manager(current_task, Response::Delayed);
1400
1401            let timer = HrTimer::new();
1402
1403            manager.add_timer(None, &timer, zx::BootInstant::from_nanos(1).into()).unwrap();
1404            manager.add_timer(None, &timer, zx::BootInstant::from_nanos(2).into()).unwrap();
1405
1406            // Force alarm expiry.
1407            manager
1408                .add_timer(None, &timer, zx::BootInstant::from_nanos(MAGIC_EXPIRE_DEADLINE).into())
1409                .unwrap();
1410            wait_signaled_sync(&timer.event()).to_result().unwrap();
1411
1412            assert_eq!(
1413                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1414                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1415            );
1416        })
1417        .await;
1418    }
1419
1420    #[fuchsia::test]
1421    async fn rescheduling_interval_timers_forbids_suspend() {
1422        spawn_kernel_and_run(async |_, current_task| {
1423            let (hrtimer_manager, counter) = init_hr_timer_manager(current_task, Response::Delayed);
1424
1425            // Schedule an interval timer and let it expire.
1426            let timer1 = HrTimer::new();
1427            *timer1.is_interval.lock() = true;
1428            hrtimer_manager
1429                .add_timer(None, &timer1, zx::BootInstant::from_nanos(MAGIC_EXPIRE_DEADLINE).into())
1430                .unwrap();
1431            wait_signaled_sync(&timer1.event()).to_result().unwrap();
1432
1433            // Schedule a regular timer and let it expire.
1434            let timer2 = HrTimer::new();
1435            hrtimer_manager
1436                .add_timer(None, &timer2, zx::BootInstant::from_nanos(MAGIC_EXPIRE_DEADLINE).into())
1437                .unwrap();
1438            wait_signaled_sync(&timer2.event()).to_result().unwrap();
1439
1440            // When we have an expired but not rescheduled interval timer (`timer1`), and we have
1441            // an intervening timer that gets scheduled and expires (`timer2`) before `timer1` is
1442            // rescheduled, then suspend should be disallowed (counter > 0) to allow `timer1` to
1443            // be scheduled eventually.
1444            assert_eq!(
1445                counter.wait_one(zx::Signals::COUNTER_POSITIVE, zx::MonotonicInstant::INFINITE),
1446                zx::WaitResult::Ok(zx::Signals::COUNTER_POSITIVE)
1447            );
1448        })
1449        .await;
1450    }
1451
1452    #[fuchsia::test]
1453    async fn canceling_interval_timer_allows_suspend() {
1454        spawn_kernel_and_run(async |_, current_task| {
1455            let (hrtimer_manager, counter) = init_hr_timer_manager(current_task, Response::Delayed);
1456
1457            let timer1 = HrTimer::new();
1458            *timer1.is_interval.lock() = true;
1459            hrtimer_manager
1460                .add_timer(None, &timer1, zx::BootInstant::from_nanos(MAGIC_EXPIRE_DEADLINE).into())
1461                .unwrap();
1462            wait_signaled_sync(&timer1.event()).to_result().unwrap();
1463
1464            // When an interval timer expires, we should not be allowed to suspend.
1465            assert_eq!(
1466                counter.wait_one(zx::Signals::COUNTER_POSITIVE, zx::MonotonicInstant::INFINITE),
1467                zx::WaitResult::Ok(zx::Signals::COUNTER_POSITIVE)
1468            );
1469
1470            // Schedule the same timer again. This time around we do not wait for it to expire,
1471            // but cancel the timer instead.
1472            const DURATION_100S: zx::BootDuration = zx::BootDuration::from_seconds(100);
1473            let deadline2: zx::BootInstant = zx::BootInstant::after(DURATION_100S.into());
1474            hrtimer_manager.add_timer(None, &timer1, deadline2.into()).unwrap();
1475
1476            hrtimer_manager.remove_timer(&timer1).unwrap();
1477
1478            // When we cancel an interval timer, we should be allowed to suspend.
1479            assert_eq!(
1480                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1481                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1482            );
1483        })
1484        .await;
1485    }
1486
1487    #[fuchsia::test]
1488    async fn canceling_interval_timer_allows_suspend_with_flake() {
1489        spawn_kernel_and_run(async |_, current_task| {
1490            let (hrtimer_manager, counter) = init_hr_timer_manager(current_task, Response::Delayed);
1491
1492            let timer1 = HrTimer::new();
1493            *timer1.is_interval.lock() = true;
1494            hrtimer_manager
1495                .add_timer(None, &timer1, zx::BootInstant::from_nanos(MAGIC_EXPIRE_DEADLINE).into())
1496                .unwrap();
1497            wait_signaled_sync(&timer1.event()).to_result().unwrap();
1498
1499            assert_eq!(
1500                counter.wait_one(zx::Signals::COUNTER_POSITIVE, zx::MonotonicInstant::INFINITE),
1501                zx::WaitResult::Ok(zx::Signals::COUNTER_POSITIVE)
1502            );
1503            const DURATION_100S: zx::BootDuration = zx::BootDuration::from_seconds(100);
1504            let deadline2: zx::BootInstant = zx::BootInstant::after(DURATION_100S.into());
1505            hrtimer_manager.add_timer(None, &timer1, deadline2.into()).unwrap();
1506            // No pause between start and stop has led to flakes before.
1507            hrtimer_manager.remove_timer(&timer1).unwrap();
1508
1509            assert_eq!(
1510                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1511                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1512            );
1513        })
1514        .await;
1515    }
1516
1517    #[fuchsia::test]
1518    async fn utc_timeline_monitor_exits_on_interest_drop() {
1519        let counter = zx::Counter::create();
1520        let utc_clock = create_utc_clock_for_test();
1521        let (tx, rx) = mpsc::unbounded();
1522
1523        drop(tx);
1524        // Expect that this call returns, when tx no longer exists. Incorrect
1525        // handling of tx closure could cause it to hang otherwise.
1526        run_utc_timeline_monitor_internal(counter, rx, utc_clock).await;
1527    }
1528}