Skip to main content

starnix_core/time/
hr_timer_manager.rs

1// Copyright 2024 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::power::{
6    OnWakeOps, OwnedMessageCounterHandle, SharedMessageCounter,
7    create_proxy_for_wake_events_counter_zero,
8};
9use crate::task::dynamic_thread_spawner::SpawnRequestBuilder;
10use crate::task::{CurrentTask, Kernel, LockedAndTask};
11use crate::time::TargetTime;
12use crate::vfs::timer::{TimelineChangeObserver, TimerOps};
13use anyhow::{Context, Result};
14use fidl_fuchsia_time_alarms as fta;
15use fuchsia_async as fasync;
16use fuchsia_inspect::ArrayProperty;
17use fuchsia_runtime::UtcClock;
18use fuchsia_trace as ftrace;
19use futures::channel::mpsc::{self, UnboundedReceiver, UnboundedSender};
20use futures::{FutureExt, SinkExt, StreamExt, select};
21use scopeguard::defer;
22use starnix_logging::{log_debug, log_error, log_info, log_warn};
23use starnix_sync::{Mutex, MutexGuard};
24use starnix_uapi::errors::Errno;
25use starnix_uapi::{errno, from_status_like_fdio};
26use std::collections::{HashMap, VecDeque};
27use std::pin::pin;
28use std::sync::{Arc, OnceLock, Weak};
29use zx::HandleRef;
30
31/// Max value for inspect event history.
32const INSPECT_EVENT_BUFFER_SIZE: usize = 128;
33
34fn to_errno_with_log<T: std::fmt::Debug>(v: T) -> Errno {
35    log_error!("hr_timer_manager internal error: {v:?}");
36    from_status_like_fdio!(zx::Status::IO)
37}
38
39fn signal_event(
40    event: &zx::Event,
41    clear_mask: zx::Signals,
42    set_mask: zx::Signals,
43) -> Result<(), zx::Status> {
44    event
45        .signal(clear_mask, set_mask)
46        .inspect_err(|err| log_error!(err:?, clear_mask:?, set_mask:?; "while signaling event"))
47}
48
49const TIMEOUT_SECONDS: i64 = 40;
50//
51/// Waits forever synchronously for EVENT_SIGNALED.
52///
53/// For us there is no useful scenario where this wait times out and we can continue operating.
54fn wait_signaled_sync(event: &zx::Event) -> zx::WaitResult {
55    let mut logged = false;
56    loop {
57        let timeout =
58            zx::MonotonicInstant::after(zx::MonotonicDuration::from_seconds(TIMEOUT_SECONDS));
59        let result = event.wait_one(zx::Signals::EVENT_SIGNALED, timeout);
60        if let zx::WaitResult::Ok(_) = result {
61            if logged {
62                log_error!("wait_signaled_sync: signal resolved: result={result:?}",);
63            }
64            return result;
65        }
66        fuchsia_trace::instant!(
67            "alarms",
68            "starnix:hrtimer:wait_timeout",
69            fuchsia_trace::Scope::Process
70        );
71        // This is bad and should never happen. If it does, it's a bug that has to be found and
72        // fixed. There is no good way to proceed if these signals are not being signaled properly.
73        log_error!(
74            // Check logs for a `kBadState` status reported from the hrtimer driver.
75            // LINT.IfChange(hrtimer_wait_signaled_sync_tefmo)
76            "wait_signaled_sync: not signaled yet. Report to `componentid:1408151`: result={result:?}",
77            // LINT.ThenChange(//tools/testing/tefmocheck/string_in_log_check.go:hrtimer_wait_signaled_sync_tefmo)
78        );
79        if !logged {
80            #[cfg(all(target_os = "fuchsia", not(doc)))]
81            ::debug::backtrace_request_all_threads();
82            logged = true;
83        }
84    }
85}
86
87/// A macro that waits on a future, but if the future takes longer than
88/// `TIMEOUT_SECONDS`, we log a warning and a stack trace.
89macro_rules! log_long_op {
90    ($fut:expr) => {{
91        use futures::FutureExt;
92        let fut = $fut;
93        futures::pin_mut!(fut);
94        let mut logged = false;
95        loop {
96            let timeout = fasync::Timer::new(zx::MonotonicDuration::from_seconds(TIMEOUT_SECONDS));
97            futures::select! {
98                res = fut.as_mut().fuse() => {
99                    if logged {
100                        log_warn!("unexpected blocking is now resolved: long-running async operation at {}:{}.",
101                            file!(), line!());
102                    }
103                    break res;
104                }
105                _ = timeout.fuse() => {
106                    // Check logs for a `kBadState` status reported from the hrtimer driver.
107                    log_warn!("unexpected blocking: long-running async op at {}:{}. Report to `componentId:1408151`",
108                        file!(), line!());
109                    if !logged {
110                        #[cfg(all(target_os = "fuchsia", not(doc)))]
111                        ::debug::backtrace_request_all_threads();
112                    }
113                    logged = true;
114                }
115            }
116        }
117    }};
118}
119
120/// Waits forever asynchronously for EVENT_SIGNALED.
121async fn wait_signaled<H: zx::AsHandleRef>(handle: &H) -> Result<()> {
122    log_long_op!(fasync::OnSignals::new(handle, zx::Signals::EVENT_SIGNALED))
123        .context("hr_timer_manager:wait_signaled")?;
124    Ok(())
125}
126
127/// Cancels an alarm by ID.
128async fn cancel_by_id(
129    _message_counter: &SharedMessageCounter,
130    timer_state: Option<TimerState>,
131    timer_id: &zx::Koid,
132    proxy: &fta::WakeAlarmsProxy,
133    interval_timers_pending_reschedule: &mut HashMap<zx::Koid, SharedMessageCounter>,
134    task_by_timer_id: &mut HashMap<zx::Koid, fasync::Task<()>>,
135    alarm_id: &str,
136) {
137    if let Some(task) = task_by_timer_id.remove(timer_id) {
138        // Let this task complete and get removed.
139        task.detach();
140    }
141    if let Some(timer_state) = timer_state {
142        ftrace::duration!("alarms", "starnix:hrtimer:cancel_by_id", "timer_id" => *timer_id);
143        log_debug!("cancel_by_id: START canceling timer: {:?}: alarm_id: {}", timer_id, alarm_id);
144        proxy.cancel(&alarm_id).expect("infallible");
145        log_debug!("cancel_by_id: 1/2 canceling timer: {:?}: alarm_id: {}", timer_id, alarm_id);
146
147        // Let the timer closure complete before continuing.
148        let _ = log_long_op!(timer_state.task);
149    }
150
151    // If this timer is an interval timer, we must remove it from the pending reschedule list.
152    // This does not affect container suspend, since `_message_counter` is live. It's a no-op
153    // for other timers.
154    interval_timers_pending_reschedule.remove(timer_id);
155    log_debug!("cancel_by_id: 2/2 DONE canceling timer: {timer_id:?}: alarm_id: {alarm_id}");
156}
157
158/// Called when the underlying wake alarms manager reports a fta::WakeAlarmsError
159/// as a result of a call to set_and_wait.
160fn process_alarm_protocol_error(
161    pending: &mut HashMap<zx::Koid, TimerState>,
162    timer_id: &zx::Koid,
163    error: fta::WakeAlarmsError,
164) -> Option<TimerState> {
165    match error {
166        fta::WakeAlarmsError::Unspecified => {
167            log_warn!(
168                "watch_new_hrtimer_loop: Cmd::AlarmProtocolFail: unspecified error: {error:?}"
169            );
170            pending.remove(timer_id)
171        }
172        fta::WakeAlarmsError::Dropped => {
173            log_debug!("watch_new_hrtimer_loop: Cmd::AlarmProtocolFail: alarm dropped: {error:?}");
174            // Do not remove a Dropped timer here, in contrast to other error states: a Dropped
175            // timer is a result of a Stop or a Cancel ahead of a reschedule. In both cases, that
176            // code takes care of removing the timer from the pending timers list.
177            None
178        }
179        error => {
180            log_warn!(
181                "watch_new_hrtimer_loop: Cmd::AlarmProtocolFail: unspecified error: {error:?}"
182            );
183            pending.remove(timer_id)
184        }
185    }
186}
187
188// This function is swapped out for an injected proxy in tests.
189fn connect_to_wake_alarms_async() -> Result<zx::Channel, Errno> {
190    log_debug!("connecting to wake alarms");
191    let (client, server) = zx::Channel::create();
192    fuchsia_component::client::connect_channel_to_protocol::<fta::WakeAlarmsMarker>(server)
193        .map(|()| client)
194        .map_err(|err| {
195            errno!(EINVAL, format!("Failed to connect to fuchsia.time.alarms/Wake: {err}"))
196        })
197}
198
199#[derive(Debug)]
200enum InspectHrTimerEvent {
201    Add,
202    Update,
203    Remove,
204    // The String inside will be used in fmt. But the compiler does not recognize the use when
205    // formatting with the Debug derivative.
206    Error(#[allow(dead_code)] String),
207}
208
209impl InspectHrTimerEvent {
210    fn retain_err(prev_len: usize, after_len: usize, context: &str) -> InspectHrTimerEvent {
211        InspectHrTimerEvent::Error(format!(
212            "retain the timer incorrectly, before len: {prev_len}, after len: {after_len}, context: {context}",
213        ))
214    }
215}
216
217impl std::fmt::Display for InspectHrTimerEvent {
218    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
219        write!(f, "{:?}", self)
220    }
221}
222
223#[derive(Debug)]
224struct InspectEvent {
225    event_type: InspectHrTimerEvent,
226    created_at: zx::BootInstant,
227    deadline: Option<TargetTime>,
228}
229
230impl InspectEvent {
231    fn new(event_type: InspectHrTimerEvent, deadline: Option<TargetTime>) -> Self {
232        Self { event_type, created_at: zx::BootInstant::get(), deadline }
233    }
234}
235
236#[derive(Debug)]
237struct TimerState {
238    /// The task that waits for the timer to expire.
239    task: fasync::Task<()>,
240    /// The desired deadline for the timer.
241    deadline: TargetTime,
242    /// The node that represents the current generation of this timer.
243    node: HrTimerNodeHandle,
244}
245
246impl std::fmt::Display for TimerState {
247    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
248        write!(f, "TimerState[deadline:{:?}]", self.deadline)
249    }
250}
251
252struct HrTimerManagerState {
253    /// All pending timers are stored here.
254    pending_timers: HashMap<zx::Koid, TimerState>,
255
256    /// The event that is registered with runner to allow the hrtimer to wake the kernel.
257    /// Optional, because we want the ability to inject a counter in tests.
258    message_counter: Option<OwnedMessageCounterHandle>,
259
260    /// For recording timer events.
261    events: VecDeque<InspectEvent>,
262
263    /// The last timestamp at which the hrtimer loop was started.
264    last_loop_started_timestamp: zx::BootInstant,
265
266    /// The last timestamp at which the hrtimer loop was completed.
267    last_loop_completed_timestamp: zx::BootInstant,
268
269    // Debug progress counter for Cmd::Start.
270    // TODO: b/454085350 - remove once diagnosed.
271    debug_start_stage_counter: u64,
272}
273
274impl HrTimerManagerState {
275    fn new(_parent_node: &fuchsia_inspect::Node) -> Self {
276        Self {
277            pending_timers: HashMap::new(),
278            // Initialized later in the State's lifecycle because it only becomes
279            // available after making a connection to the wake proxy.
280            message_counter: None,
281            events: VecDeque::with_capacity(INSPECT_EVENT_BUFFER_SIZE),
282            last_loop_started_timestamp: zx::BootInstant::INFINITE_PAST,
283            last_loop_completed_timestamp: zx::BootInstant::INFINITE_PAST,
284            debug_start_stage_counter: 0,
285        }
286    }
287
288    fn get_pending_timers_count(&self) -> usize {
289        self.pending_timers.len()
290    }
291
292    /// Gets a new shareable instance of the message counter.
293    fn share_message_counter(&self, new_pending_message: bool) -> SharedMessageCounter {
294        let counter_ref =
295            self.message_counter.as_ref().expect("message_counter is None, but should not be.");
296        counter_ref.share(new_pending_message)
297    }
298
299    fn record_events(&self, node: &fuchsia_inspect::Node) {
300        let events_node = node.create_child("events");
301        for (i, event) in self.events.iter().enumerate() {
302            let child = events_node.create_child(i.to_string());
303            child.record_string("type", event.event_type.to_string());
304            child.record_int("created_at", event.created_at.into_nanos());
305            if let Some(deadline) = event.deadline {
306                child.record_int("deadline", deadline.estimate_boot().unwrap().into_nanos());
307            }
308            events_node.record(child);
309        }
310        node.record(events_node);
311    }
312}
313
314/// Asynchronous commands sent to `watch_new_hrtimer_loop`.
315///
316/// The synchronous methods on HrTimerManager use these commands to communicate
317/// with the alarm manager actor that loops about in `watch_new_hrtimer_loop`.
318///
319/// This allows us to not have to share state between the synchronous and async
320/// methods of `HrTimerManager`.
321#[derive(Debug)]
322enum Cmd {
323    // Start the timer contained in `new_timer_node`.
324    // The processing loop will signal `done` to allow synchronous
325    // return from scheduling an async Cmd::Start.
326    Start {
327        new_timer_node: HrTimerNodeHandle,
328        /// Signaled once the timer is started.
329        done: zx::Event,
330        /// The Starnix container suspend lock. Keep it alive until no more
331        /// work is necessary.
332        message_counter: SharedMessageCounter,
333    },
334    /// Stop the timer noted below. `done` is similar to above.
335    Stop {
336        /// The timer to stop.
337        timer: HrTimerHandle,
338        /// Signaled once the timer is stopped.
339        done: zx::Event,
340        /// The Starnix container suspend lock. Keep it alive until no more
341        /// work is necessary.
342        message_counter: SharedMessageCounter,
343    },
344    /// Triggered by the underlying hrtimer device when an alarm expires.
345    Alarm {
346        /// The affected timer's node.
347        new_timer_node: HrTimerNodeHandle,
348        /// The wake lease provided by the underlying API.
349        lease: zx::EventPair,
350        /// The Starnix container suspend lock. Keep it alive until no more
351        /// work is necessary.
352        message_counter: SharedMessageCounter,
353    },
354    /// Install a timeline change monitor
355    MonitorUtc { timer: HrTimerHandle, counter: zx::Counter, recv: mpsc::UnboundedReceiver<bool> },
356}
357
358// Increments `counter` every time the UTC timeline changes.
359//
360// This counter is shared with UTC timers to provide UTC timeline change notification.
361//
362// Use cases:
363//
364// 1. Counter's value counts the number of times the UTC timeline changed, which is used in timer
365//    `read` calls to report the number of encountered changes, as required by `read`.
366//
367// 2. Counter's `COUNTER_POSITIVE` signal is used in `wait_async` calls on timers, as Starnix must
368//    wake such timers whenever a timeline change happens. The counter reader must reset the
369//    counter to zero after reading its value to allow for a next wake.
370//
371// Other primitives are not appropriate to use here: an Event does not remember how many times it
372// has been signaled, so does not fulfill (1). Atomics don't generate a signal on increment, so
373// don't satisfy (2). Conversely, the `wait_async` machinery on timers can already deal with
374// HandleBased objects, so a Counter can be readily used there.
375async fn run_utc_timeline_monitor(counter: zx::Counter, recv: mpsc::UnboundedReceiver<bool>) {
376    let utc_handle = crate::time::utc::duplicate_real_utc_clock_handle().inspect_err(|err| {
377        log_error!("run_utc_timeline_monitor: could not monitor UTC timeline: {err:?}")
378    });
379    if let Ok(utc_handle) = utc_handle {
380        run_utc_timeline_monitor_internal(counter, recv, utc_handle).await;
381    }
382}
383
384// See `run_utc_timeline_monitor`.
385// `utc_handle_fn` is useful for injecting in tests.
386async fn run_utc_timeline_monitor_internal(
387    counter: zx::Counter,
388    mut recv: mpsc::UnboundedReceiver<bool>,
389    utc_handle: UtcClock,
390) {
391    log_debug!("run_utc_timeline_monitor: monitoring UTC clock timeline changes: enter");
392    let koid = utc_handle.koid();
393    log_debug!(
394        "run_utc_timeline_monitor: monitoring UTC clock timeline: enter: UTC clock koid={koid:?}, counter={counter:?}"
395    );
396    let utc_handle = std::rc::Rc::new(utc_handle);
397    let utc_handle_fn = || utc_handle.clone();
398    let mut interested = false;
399    loop {
400        let utc_handle = utc_handle_fn();
401        // CLOCK_UPDATED is auto-cleared.
402        let mut updated_fut = pin!(
403            fasync::OnSignals::new(utc_handle.as_handle_ref(), zx::Signals::CLOCK_UPDATED).fuse()
404        );
405        let mut interest_fut = recv.next();
406
407        // Note: all select! branches must allow for exit when their respective futures are
408        // used up.
409        select! {
410            result = updated_fut => {
411                if result.is_err() {
412                    log_warn!("run_utc_timeline_monitor: could not wait on signals: {:?}, counter={counter:?}", result);
413                    break;
414                }
415                if interested {
416                    log_debug!("run_utc_timeline_monitor: UTC timeline updated, counter: {counter:?}");
417                    // The consumer of this `counter` should wait for COUNTER_POSITIVE, and
418                    // once it observes the value of the counter, subtract the read value from
419                    // counter.
420                    counter
421                        .add(1)
422                        // Ignore the error after logging it. Should we exit the loop here?
423                        .inspect_err(|err| {
424                            log_error!("run_utc_timeline_monitor: could not increment counter: {err:?}")
425                        })
426                        .unwrap_or(());
427                }
428            },
429            result = interest_fut => {
430                match result {
431                    Some(interest) => {
432                        log_debug!("interest change: {counter:?}, interest: {interest:?}");
433                        interested = interest;
434                    }
435                    None => {
436                        log_debug!("no longer needs counter monitoring: {counter:?}");
437                        break;
438                    }
439                }
440            },
441        };
442    }
443    log_debug!("run_utc_timeline_monitor: monitoring UTC clock timeline changes: exit");
444}
445
446/// The manager for high-resolution timers.
447///
448/// This manager is responsible for creating and managing high-resolution timers.
449pub struct HrTimerManager {
450    state: Mutex<HrTimerManagerState>,
451
452    /// The channel sender that notifies the worker thread that HrTimer driver needs to be
453    /// (re)started with a new deadline.
454    start_next_sender: OnceLock<UnboundedSender<Cmd>>,
455}
456pub type HrTimerManagerHandle = Arc<HrTimerManager>;
457
458impl HrTimerManager {
459    pub fn new(parent_node: &fuchsia_inspect::Node) -> HrTimerManagerHandle {
460        let inspect_node = parent_node.create_child("hr_timer_manager");
461        let new_manager = Arc::new(Self {
462            state: Mutex::new(HrTimerManagerState::new(&inspect_node)),
463            start_next_sender: Default::default(),
464        });
465        let manager_weak = Arc::downgrade(&new_manager);
466
467        // Create a lazy inspect node to get HrTimerManager info at read-time.
468        inspect_node.record_lazy_child("hr_timer_manager", move || {
469            let manager_ref = manager_weak.upgrade().expect("inner HrTimerManager");
470            async move {
471                // This gets the clock value directly from the kernel, it is not subject
472                // to the local runner's clock.
473                let now = zx::BootInstant::get();
474
475                let inspector = fuchsia_inspect::Inspector::default();
476                inspector.root().record_int("now_ns", now.into_nanos());
477
478                let (
479                    timers,
480                    pending_timers_count,
481                    message_counter,
482                    loop_started,
483                    loop_completed,
484                    debug_start_stage_counter,
485                ) = {
486                    let guard = manager_ref.lock();
487                    (
488                        guard
489                            .pending_timers
490                            .iter()
491                            .map(|(k, v)| (*k, v.deadline))
492                            .collect::<Vec<_>>(),
493                        guard.get_pending_timers_count(),
494                        guard.message_counter.as_ref().map(|c| c.to_string()).unwrap_or_default(),
495                        guard.last_loop_started_timestamp,
496                        guard.last_loop_completed_timestamp,
497                        guard.debug_start_stage_counter,
498                    )
499                };
500                inspector.root().record_uint("pending_timers_count", pending_timers_count as u64);
501                inspector.root().record_string("message_counter", message_counter);
502
503                // These are the deadlines we are currently waiting for. The format is:
504                // `alarm koid` -> `deadline nanos` (remains: `duration until alarm nanos`)
505                let deadlines = inspector.root().create_string_array("timers", timers.len());
506                for (i, (k, v)) in timers.into_iter().enumerate() {
507                    let remaining = v.estimate_boot().unwrap() - now;
508                    deadlines.set(
509                        i,
510                        format!(
511                            "{k:?} -> {v} ns (remains: {})",
512                            time_pretty::format_duration(remaining)
513                        ),
514                    );
515                }
516                inspector.root().record(deadlines);
517
518                inspector.root().record_int("last_loop_started_at_ns", loop_started.into_nanos());
519                inspector
520                    .root()
521                    .record_int("last_loop_completed_at_ns", loop_completed.into_nanos());
522                inspector
523                    .root()
524                    .record_uint("debug_start_stage_counter", debug_start_stage_counter);
525
526                {
527                    let guard = manager_ref.lock();
528                    guard.record_events(inspector.root());
529                }
530
531                Ok(inspector)
532            }
533            .boxed()
534        });
535        parent_node.record(inspect_node);
536        new_manager
537    }
538
539    /// Get a copy of a sender channel used for passing async command to the
540    /// event processing loop.
541    fn get_sender(&self) -> UnboundedSender<Cmd> {
542        self.start_next_sender.get().expect("start_next_sender is initialized").clone()
543    }
544
545    /// Returns the counter that tallies the timeline changes of the UTC timeline.
546    ///
547    /// # Args
548    /// - `timer`: the handle of the timer that needs monitoring of timeline changes.
549    pub fn get_timeline_change_observer(
550        &self,
551        timer: &HrTimerHandle,
552    ) -> Result<TimelineChangeObserver, Errno> {
553        let timer_id = timer.get_id();
554        let counter = zx::Counter::create();
555        let counter_clone = counter
556            .duplicate_handle(zx::Rights::SAME_RIGHTS)
557            .map_err(|status| from_status_like_fdio!(status))
558            .map_err(|err| {
559                log_error!("could not duplicate handle: {err:?}");
560                errno!(EINVAL, format!("could not duplicate handle: {err}, {timer_id:?}"))
561            })?;
562        let (send, recv) = mpsc::unbounded();
563        self.get_sender()
564            .unbounded_send(Cmd::MonitorUtc { timer: timer.clone(), counter, recv })
565            .map_err(|err| {
566            log_error!("could not send: {err:?}");
567            errno!(EINVAL, format!("could not send Cmd::Monitor: {err}, {timer_id:?}"))
568        })?;
569        Ok(TimelineChangeObserver::new(counter_clone, send))
570    }
571
572    /// Initialize the [HrTimerManager] in the context of the current system task.
573    pub fn init(self: &HrTimerManagerHandle, system_task: &CurrentTask) -> Result<(), Errno> {
574        self.init_internal(
575            system_task,
576            /*wake_channel_for_test=*/ None,
577            /*message_counter_for_test=*/ None,
578        )
579    }
580
581    // Call this init for testing instead of the one above.
582    fn init_internal(
583        self: &HrTimerManagerHandle,
584        system_task: &CurrentTask,
585        // Can be injected for testing.
586        wake_channel_for_test: Option<zx::Channel>,
587        // Can be injected for testing.
588        message_counter_for_test: Option<zx::Counter>,
589    ) -> Result<(), Errno> {
590        let (start_next_sender, start_next_receiver) = mpsc::unbounded();
591        self.start_next_sender.set(start_next_sender).map_err(|_| errno!(EEXIST))?;
592
593        let self_ref = self.clone();
594
595        // Ensure that all internal init has completed in `watch_new_hrtimer_loop`
596        // before proceeding from here.
597        let setup_done = zx::Event::create();
598        let setup_done_clone = setup_done
599            .duplicate_handle(zx::Rights::SAME_RIGHTS)
600            .map_err(|status| from_status_like_fdio!(status))?;
601
602        let closure = async move |locked_and_task: LockedAndTask<'_>| {
603            let current_thread = std::thread::current();
604            // Helps find the thread in backtraces, see wait_signaled_sync.
605            log_info!(
606                "hr_timer_manager thread: {:?} ({:?})",
607                current_thread.name(),
608                current_thread.id()
609            );
610            if let Err(e) = self_ref
611                .watch_new_hrtimer_loop(
612                    locked_and_task.current_task(),
613                    start_next_receiver,
614                    wake_channel_for_test,
615                    message_counter_for_test,
616                    Some(setup_done_clone),
617                )
618                .await
619            {
620                log_error!("while running watch_new_hrtimer_loop: {e:?}");
621            }
622            log_warn!("hr_timer_manager: finished kernel thread. should never happen in prod code");
623        };
624        let req = SpawnRequestBuilder::new()
625            .with_debug_name("hr-timer-manager")
626            .with_async_closure(closure)
627            .build();
628        system_task.kernel().kthreads.spawner().spawn_from_request(req);
629        log_info!("hr_timer_manager: waiting on setup done");
630        wait_signaled_sync(&setup_done)
631            .to_result()
632            .map_err(|status| from_status_like_fdio!(status))?;
633        log_info!("hr_timer_manager: setup done");
634
635        Ok(())
636    }
637
638    /// Notifies `timer` and wake sources about a triggered alarm.
639    ///
640    /// # Returns
641    /// - `Ok(true)` if the alarm was for the current generation and was processed.
642    /// - `Ok(false)` if the alarm was stale. Either the timer was restarted ( via `Cmd:Start`) with
643    ///    the same id before the previous generation's alarm fired, or the timer was stopped
644    ///    (via `Cmd::Stop`) or already processed.
645    fn notify_timer(
646        self: &HrTimerManagerHandle,
647        system_task: &CurrentTask,
648        triggered_node: &HrTimerNodeHandle,
649        lease: zx::EventPair,
650    ) -> Result<bool> {
651        let timer_id = triggered_node.hr_timer.get_id();
652        {
653            let guard = self.lock();
654            if let Some(active_state) = guard.pending_timers.get(&timer_id) {
655                // Wake source is deliberately not signaled here. When the triggered_node for a
656                // timer_id is not pointer-identical to the one stored in pending_timers, it is a
657                // trigger for some previous generation of timer_id, which has since been
658                // rescheduled.
659                if !Arc::ptr_eq(&active_state.node, triggered_node) {
660                    log_debug!("notify_timer: ignoring stale alarm for timer_id: {:?}", timer_id);
661                    return Ok(false);
662                }
663            } else {
664                log_debug!(
665                    "notify_timer: ignoring alarm for timer_id: {:?} (not in pending_timers)",
666                    timer_id
667                );
668                return Ok(false);
669            }
670        }
671
672        log_debug!("watch_new_hrtimer_loop: Cmd::Alarm: triggered alarm: {:?}", timer_id);
673        ftrace::duration!("alarms", "starnix:hrtimer:notify_timer", "timer_id" => timer_id);
674        self.lock().pending_timers.remove(&timer_id).map(|s| s.task.detach());
675        signal_event(
676            &triggered_node.hr_timer.event(),
677            zx::Signals::NONE,
678            zx::Signals::TIMER_SIGNALED,
679        )
680        .context("notify_timer: hrtimer signal handle")?;
681
682        // Handle wake source here.
683        let wake_source = triggered_node.wake_source.clone();
684        if let Some(wake_source) = wake_source.as_ref().and_then(|f| f.upgrade()) {
685            let lease_token = lease.into_handle();
686            // `.on_wake` must not block, else the timer management loop will stall.
687            // At the moment `.on_wake`s don't stall at all.
688            wake_source.on_wake(system_task, &lease_token);
689            // Drop the baton lease after wake leases in associated epfd
690            // are activated.
691            drop(lease_token);
692        }
693        ftrace::instant!(
694            "alarms",
695            "starnix:hrtimer:notify_timer:drop_lease",
696            ftrace::Scope::Process,
697            "timer_id" => timer_id
698        );
699        Ok(true)
700    }
701
702    // If no counter has been injected for tests, set provided `counter` to serve as that
703    // counter. Used to inject a fake counter in tests.
704    fn inject_or_set_message_counter(
705        self: &HrTimerManagerHandle,
706        message_counter: OwnedMessageCounterHandle,
707    ) {
708        let mut guard = self.lock();
709        if guard.message_counter.is_none() {
710            guard.message_counter = Some(message_counter);
711        }
712    }
713
714    fn record_inspect_on_stop(
715        self: &HrTimerManagerHandle,
716        guard: &mut MutexGuard<'_, HrTimerManagerState>,
717        prev_len: usize,
718    ) {
719        let after_len = guard.get_pending_timers_count();
720        let inspect_event_type = if after_len == prev_len {
721            None
722        } else if after_len == prev_len - 1 {
723            Some(InspectHrTimerEvent::Remove)
724        } else {
725            Some(InspectHrTimerEvent::retain_err(prev_len, after_len, "removing timer"))
726        };
727        if let Some(inspect_event_type) = inspect_event_type {
728            self.record_event(guard, inspect_event_type, None);
729        }
730    }
731
732    fn record_inspect_on_start(
733        self: &HrTimerManagerHandle,
734        guard: &mut MutexGuard<'_, HrTimerManagerState>,
735        timer_id: zx::Koid,
736        task: fasync::Task<()>,
737        deadline: TargetTime,
738        node: HrTimerNodeHandle,
739        prev_len: usize,
740    ) {
741        guard
742            .pending_timers
743            .insert(timer_id, TimerState { task, deadline, node })
744            .map(|timer_state| {
745                // This should not happen, at this point we already canceled
746                // any previous instances of the same wake alarm.
747                log_debug!(
748                    "watch_new_hrtimer_loop: removing timer task in Cmd::Start: {:?}",
749                    timer_state
750                );
751                timer_state
752            })
753            .map(|v| v.task.detach());
754
755        // Record the inspect event
756        let after_len = guard.get_pending_timers_count();
757        let inspect_event_type = if after_len == prev_len {
758            InspectHrTimerEvent::Update
759        } else if after_len == prev_len + 1 {
760            InspectHrTimerEvent::Add
761        } else {
762            InspectHrTimerEvent::retain_err(prev_len, after_len, "adding timer")
763        };
764        self.record_event(guard, inspect_event_type, Some(deadline));
765    }
766
767    /// Timer handler loop.
768    ///
769    /// # Args:
770    /// - `wake_channel_for_test`: a channel implementing `fuchsia.time.alarms/Wake`
771    ///   injected by tests only.
772    /// - `message_counter_for_test`: a zx::Counter injected only by tests, to
773    ///   emulate the wake proxy message counter.
774    /// - `setup_done`: signaled once the initial loop setup is complete. Allows
775    ///   pausing any async callers until this loop is in a runnable state.
776    async fn watch_new_hrtimer_loop(
777        self: &HrTimerManagerHandle,
778        system_task: &CurrentTask,
779        mut start_next_receiver: UnboundedReceiver<Cmd>,
780        mut wake_channel_for_test: Option<zx::Channel>,
781        message_counter_for_test: Option<zx::Counter>,
782        setup_done: Option<zx::Event>,
783    ) -> Result<()> {
784        // The values assigned to the counter are arbitrary, but unique for each assignment.
785        // This should give us hints as to where any deadlocks might occur if they do.
786        // We also take stack traces, but stack traces do not capture async stacks presently.
787        self.lock().debug_start_stage_counter = 1005;
788        ftrace::instant!("alarms", "watch_new_hrtimer_loop:init", ftrace::Scope::Process);
789        defer! {
790            log_warn!("watch_new_hrtimer_loop: exiting. This should only happen in tests.");
791        }
792
793        let (device_channel, message_counter) = {
794            defer! {
795                // Ensure that the setup_done event is signaled even if we fail to initialize
796                // correctly. This prevents the caller from blocking forever.
797                setup_done
798                    .as_ref()
799                    .map(|e| signal_event(e, zx::Signals::NONE, zx::Signals::EVENT_SIGNALED));
800            }
801            let wake_channel = wake_channel_for_test.take().unwrap_or_else(|| {
802                connect_to_wake_alarms_async().expect("connection to wake alarms async proxy")
803            });
804            self.lock().debug_start_stage_counter = 1004;
805
806            let counter_name = "wake-alarms";
807            let (device_channel, counter) = if let Some(message_counter) = message_counter_for_test
808            {
809                // For tests only.
810                (wake_channel, message_counter)
811            } else {
812                create_proxy_for_wake_events_counter_zero(wake_channel, counter_name.to_string())
813            };
814            self.lock().debug_start_stage_counter = 1003;
815            let message_counter = system_task
816                .kernel()
817                .suspend_resume_manager
818                .add_message_counter(counter_name, Some(counter));
819            self.inject_or_set_message_counter(message_counter.clone());
820            (device_channel, message_counter)
821        };
822
823        self.lock().debug_start_stage_counter = 1002;
824        let device_async_proxy =
825            fta::WakeAlarmsProxy::new(fidl::AsyncChannel::from_channel(device_channel));
826
827        // Contains suspend locks for interval (periodic) timers that expired, but have not been
828        // rescheduled yet. This allows us to defer container suspend until all such timers have
829        // been rescheduled.
830        // TODO: b/418813184 - Remove in favor of Fuchsia-specific interval timer support
831        // once it is available.
832        let mut interval_timers_pending_reschedule: HashMap<zx::Koid, SharedMessageCounter> =
833            HashMap::new();
834
835        // Per timer tasks.
836        let mut task_by_timer_id: HashMap<zx::Koid, fasync::Task<()>> = HashMap::new();
837
838        self.lock().debug_start_stage_counter = 1001;
839        ftrace::instant!("alarms", "watch_new_hrtimer_loop:init_done", ftrace::Scope::Process);
840        while let Some(cmd) = start_next_receiver.next().await {
841            {
842                let mut guard = self.lock();
843                guard.debug_start_stage_counter = 1002;
844                guard.last_loop_started_timestamp = zx::BootInstant::get();
845            }
846            ftrace::duration!("alarms", "start_next_receiver:loop");
847
848            log_debug!("watch_new_hrtimer_loop: got command: {cmd:?}");
849            self.lock().debug_start_stage_counter = 0;
850            match cmd {
851                // A new timer needs to be started.  The timer node for the timer
852                // is provided, and `done` must be signaled once the setup is
853                // complete.
854                Cmd::Start { new_timer_node, done, message_counter } => {
855                    self.lock().debug_start_stage_counter = 1;
856                    defer! {
857                        // Allow add_timer to proceed once command processing is done.
858                        let _ = signal_event(&done, zx::Signals::NONE, zx::Signals::EVENT_SIGNALED)
859                            .map_err(|err| to_errno_with_log(err));
860                    }
861
862                    let hr_timer = &new_timer_node.hr_timer;
863                    let timer_id = hr_timer.get_id();
864                    let wake_alarm_id = hr_timer.wake_alarm_id();
865                    let trace_id = hr_timer.trace_id();
866                    log_debug!(
867                        "watch_new_hrtimer_loop: Cmd::Start: timer_id: {:?}, wake_alarm_id: {}",
868                        timer_id,
869                        wake_alarm_id
870                    );
871                    ftrace::duration!("alarms", "starnix:hrtimer:start", "timer_id" => timer_id);
872                    ftrace::flow_begin!("alarms", "hrtimer_lifecycle", trace_id);
873
874                    self.lock().debug_start_stage_counter = 2;
875                    let maybe_cancel = self.lock().pending_timers.remove(&timer_id);
876                    log_long_op!(cancel_by_id(
877                        &message_counter,
878                        maybe_cancel,
879                        &timer_id,
880                        &device_async_proxy,
881                        &mut interval_timers_pending_reschedule,
882                        &mut task_by_timer_id,
883                        &wake_alarm_id,
884                    ));
885                    ftrace::instant!("alarms", "starnix:hrtimer:cancel_pre_start", ftrace::Scope::Process, "timer_id" => timer_id);
886
887                    // Signaled when the timer completed setup. We can not forward `done` because
888                    // we have post-schedule work as well.
889                    let setup_event = zx::Event::create();
890                    let deadline = new_timer_node.deadline;
891
892                    ftrace::duration!("alarms", "starnix:hrtimer:signaled", "timer_id" => timer_id);
893
894                    self.lock().debug_start_stage_counter = 3;
895                    // Make a request here. Move it into the closure after. Current FIDL semantics
896                    // ensure that even though we do not `.await` on this future, a request to
897                    // schedule a wake alarm based on this timer will be sent.
898                    let request_fut = match deadline {
899                        TargetTime::Monotonic(_) => {
900                            // If we hit this, it's a Starnix bug.
901                            panic!("can not schedule wake alarm on monotonic timeline")
902                        }
903                        TargetTime::BootInstant(boot_instant) => device_async_proxy.set_and_wait(
904                            boot_instant,
905                            fta::SetMode::NotifySetupDone(
906                                setup_event
907                                    .duplicate_handle(zx::Rights::SAME_RIGHTS)
908                                    .map_err(|status| from_status_like_fdio!(status))?,
909                            ),
910                            &wake_alarm_id,
911                        ),
912                        TargetTime::RealTime(utc_instant) => device_async_proxy.set_and_wait_utc(
913                            &fta::InstantUtc { timestamp_utc: utc_instant.into_nanos() },
914                            fta::SetMode::NotifySetupDone(
915                                setup_event
916                                    .duplicate_handle(zx::Rights::SAME_RIGHTS)
917                                    .map_err(|status| from_status_like_fdio!(status))?,
918                            ),
919                            &wake_alarm_id,
920                        ),
921                    };
922                    let mut done_sender = self.get_sender();
923                    let prev_len = self.lock().get_pending_timers_count();
924
925                    self.lock().debug_start_stage_counter = 4;
926                    let self_clone = self.clone();
927                    let new_timer_node_clone = new_timer_node.clone();
928                    let task = fasync::Task::local(async move {
929                        log_debug!(
930                            "wake_alarm_future: set_and_wait will block here: {wake_alarm_id:?}"
931                        );
932                        ftrace::instant!("alarms", "starnix:hrtimer:wait", ftrace::Scope::Process, "timer_id" => timer_id);
933                        ftrace::flow_step!("alarms", "hrtimer_lifecycle", trace_id);
934
935                        // Wait for this timer to expire. This wait can be arbitrarily long.
936                        let response = request_fut.await;
937
938                        // The counter was already incremented by the wake proxy when the alarm fired.
939                        let message_counter = self_clone.lock().share_message_counter(false);
940                        ftrace::instant!("alarms", "starnix:hrtimer:wake", ftrace::Scope::Process, "timer_id" => timer_id);
941
942                        log_debug!("wake_alarm_future: set_and_wait over: {:?}", response);
943                        match response {
944                            // Alarm.  This must be processed in the main loop because notification
945                            // requires access to &CurrentTask, which is not available here. So we
946                            // only forward it.
947                            Ok(Ok(lease)) => {
948                                log_long_op!(done_sender.send(Cmd::Alarm {
949                                    new_timer_node: new_timer_node_clone,
950                                    lease,
951                                    message_counter
952                                }))
953                                .expect("infallible");
954                            }
955                            Ok(Err(error)) => {
956                                ftrace::duration!("alarms", "starnix:hrtimer:wake_error", "timer_id" => timer_id);
957                                log_debug!(
958                                    "wake_alarm_future: protocol error: {error:?}: timer_id: {timer_id:?}"
959                                );
960                                let mut guard = self_clone.lock();
961                                let pending = &mut guard.pending_timers;
962                                process_alarm_protocol_error(pending, &timer_id, error);
963                            }
964                            Err(error) => {
965                                ftrace::duration!("alarms", "starnix:hrtimer:fidl_error", "timer_id" => timer_id);
966                                log_debug!(
967                                    "wake_alarm_future: FIDL error: {error:?}: timer_id: {timer_id:?}"
968                                );
969                                self_clone.lock().pending_timers.remove(&timer_id);
970                            }
971                        }
972                        log_debug!("wake_alarm_future: closure done for timer_id: {timer_id:?}");
973                    });
974                    self.lock().debug_start_stage_counter = 5;
975                    ftrace::instant!("alarms", "starnix:hrtimer:pre_setup_event_signal", ftrace::Scope::Process, "timer_id" => timer_id);
976
977                    // wait_signaled here should be almost instantaneous.  Blocking for a long time
978                    // here is a bug.
979                    match log_long_op!(wait_signaled(&setup_event)) {
980                        Ok(_) => {
981                            ftrace::instant!("alarms", "starnix:hrtimer:setup_event_signaled", ftrace::Scope::Process, "timer_id" => timer_id);
982                            let mut guard = self.lock();
983                            guard.debug_start_stage_counter = 6;
984                            self.record_inspect_on_start(
985                                &mut guard,
986                                timer_id,
987                                task,
988                                deadline,
989                                new_timer_node,
990                                prev_len,
991                            );
992                            log_debug!("Cmd::Start scheduled: timer_id: {:?}", timer_id);
993                        }
994                        Err(err) => {
995                            ftrace::instant!("alarms", "starnix:hrtimer:setup_event_error", ftrace::Scope::Process, "timer_id" => timer_id);
996                            to_errno_with_log(err);
997                        }
998                    }
999                    self.lock().debug_start_stage_counter = 999;
1000                }
1001                Cmd::Alarm { new_timer_node, lease, message_counter } => {
1002                    self.lock().debug_start_stage_counter = 10;
1003                    let timer = &new_timer_node.hr_timer;
1004                    let timer_id = timer.get_id();
1005                    ftrace::duration!("alarms", "starnix:hrtimer:alarm", "timer_id" => timer_id);
1006                    ftrace::flow_step!("alarms", "hrtimer_lifecycle", timer.trace_id());
1007                    match self.notify_timer(system_task, &new_timer_node, lease) {
1008                        Ok(true) => {
1009                            // Alarm was for current generation, success.
1010                            // Interval timers currently need special handling: we must not suspend
1011                            // the container until the interval timer in question gets re-scheduled.
1012                            // To ensure that we stay awake, we store the suspend lock for a while.
1013                            // This prevents container suspend.
1014                            if *timer.is_interval.lock() {
1015                                interval_timers_pending_reschedule
1016                                    .insert(timer_id, message_counter);
1017                            }
1018                        }
1019                        Ok(false) => {
1020                            // Alarm was stale, ignored.
1021                        }
1022                        Err(e) => {
1023                            log_error!("watch_new_hrtimer_loop: notify_timer failed: {e:?}");
1024                        }
1025                    }
1026                    // Interval timers usually reschedule themselves. But if an interval timer
1027                    // is stopped (via Cmd::Stop) or is replaced (via Cmd::Start for the same timer
1028                    // ID) before it has a chance to reschedule, the reschedule lock will get
1029                    // dropped then.
1030                    log_debug!("Cmd::Alarm done: timer_id: {timer_id:?}");
1031                    self.lock().debug_start_stage_counter = 19;
1032                }
1033                Cmd::Stop { timer, done, message_counter } => {
1034                    self.lock().debug_start_stage_counter = 20;
1035                    defer! {
1036                        let _ = signal_event(&done, zx::Signals::NONE, zx::Signals::EVENT_SIGNALED)
1037                            .map_err(|err| to_errno_with_log(err));
1038                    }
1039                    let timer_id = timer.get_id();
1040                    log_debug!("watch_new_hrtimer_loop: Cmd::Stop: timer_id: {:?}", timer_id);
1041                    ftrace::duration!("alarms", "starnix:hrtimer:stop", "timer_id" => timer_id);
1042                    ftrace::flow_step!("alarms", "hrtimer_lifecycle", timer.trace_id());
1043
1044                    let (maybe_cancel, prev_len) = {
1045                        let mut guard = self.lock();
1046                        let prev_len = guard.get_pending_timers_count();
1047                        (guard.pending_timers.remove(&timer_id), prev_len)
1048                    };
1049
1050                    let wake_alarm_id = timer.wake_alarm_id();
1051                    log_long_op!(cancel_by_id(
1052                        &message_counter,
1053                        maybe_cancel,
1054                        &timer_id,
1055                        &device_async_proxy,
1056                        &mut interval_timers_pending_reschedule,
1057                        &mut task_by_timer_id,
1058                        &wake_alarm_id,
1059                    ));
1060                    ftrace::instant!("alarms", "starnix:hrtimer:cancel_at_stop", ftrace::Scope::Process, "timer_id" => timer_id);
1061
1062                    {
1063                        let mut guard = self.lock();
1064                        self.record_inspect_on_stop(&mut guard, prev_len);
1065                    }
1066                    log_debug!("Cmd::Stop done: {timer_id:?}");
1067                    self.lock().debug_start_stage_counter = 29;
1068                }
1069                Cmd::MonitorUtc { timer, counter, recv } => {
1070                    self.lock().debug_start_stage_counter = 30;
1071                    ftrace::duration!("alarms", "starnix:hrtimer:monitor_utc", "timer_id" => timer.get_id());
1072                    ftrace::flow_step!("alarms", "hrtimer_lifecycle", timer.trace_id());
1073                    let monitor_task = fasync::Task::local(async move {
1074                        run_utc_timeline_monitor(counter, recv).await;
1075                    });
1076                    task_by_timer_id.insert(timer.get_id(), monitor_task);
1077                    self.lock().debug_start_stage_counter = 39;
1078                }
1079            }
1080            let mut guard = self.lock();
1081            guard.debug_start_stage_counter = 90;
1082
1083            log_debug!(
1084                "watch_new_hrtimer_loop: pending timers count: {}",
1085                guard.pending_timers.len()
1086            );
1087            log_debug!("watch_new_hrtimer_loop: pending timers:       {:?}", guard.pending_timers);
1088            log_debug!(
1089                "watch_new_hrtimer_loop: message counter:      {:?}",
1090                message_counter.to_string(),
1091            );
1092            log_debug!(
1093                "watch_new_hrtimer_loop: interval timers:      {:?}",
1094                interval_timers_pending_reschedule.len(),
1095            );
1096
1097            guard.last_loop_completed_timestamp = zx::BootInstant::get();
1098            guard.debug_start_stage_counter = 99;
1099        } // while
1100
1101        Ok(())
1102    }
1103
1104    fn lock(&self) -> MutexGuard<'_, HrTimerManagerState> {
1105        self.state.lock()
1106    }
1107
1108    fn record_event(
1109        self: &HrTimerManagerHandle,
1110        guard: &mut MutexGuard<'_, HrTimerManagerState>,
1111        event_type: InspectHrTimerEvent,
1112        deadline: Option<TargetTime>,
1113    ) {
1114        if guard.events.len() >= INSPECT_EVENT_BUFFER_SIZE {
1115            guard.events.pop_front();
1116        }
1117        guard.events.push_back(InspectEvent::new(event_type, deadline));
1118    }
1119
1120    /// Add a new timer.
1121    ///
1122    /// A wake alarm is scheduled for the timer.
1123    pub fn add_timer(
1124        self: &HrTimerManagerHandle,
1125        wake_source: Option<Weak<dyn OnWakeOps>>,
1126        new_timer: &HrTimerHandle,
1127        deadline: TargetTime,
1128    ) -> Result<(), Errno> {
1129        log_debug!("add_timer: entry: {new_timer:?}, deadline: {deadline:?}");
1130        ftrace::duration!("alarms", "starnix:add_timer", "deadline" => deadline.estimate_boot().unwrap().into_nanos());
1131        ftrace::flow_step!("alarms", "hrtimer_lifecycle", new_timer.trace_id());
1132
1133        // Keep system awake until timer is scheduled.
1134        let message_counter_until_timer_scheduled = self.lock().share_message_counter(true);
1135
1136        let sender = self.get_sender();
1137        let new_timer_node = HrTimerNode::new(deadline, wake_source, new_timer.clone());
1138        let wake_alarm_scheduled = zx::Event::create();
1139        let wake_alarm_scheduled_clone = wake_alarm_scheduled
1140            .duplicate_handle(zx::Rights::SAME_RIGHTS)
1141            .map_err(|status| from_status_like_fdio!(status))?;
1142        let timer_id = new_timer.get_id();
1143        sender
1144            .unbounded_send(Cmd::Start {
1145                new_timer_node,
1146                message_counter: message_counter_until_timer_scheduled,
1147                done: wake_alarm_scheduled_clone,
1148            })
1149            .map_err(|_| errno!(EINVAL, "add_timer: could not send Cmd::Start"))?;
1150
1151        // Block until the wake alarm for this timer is scheduled.
1152        wait_signaled_sync(&wake_alarm_scheduled)
1153            .map_err(|_| errno!(EINVAL, "add_timer: wait_signaled_sync failed"))?;
1154
1155        log_debug!("add_timer: exit : timer_id: {timer_id:?}");
1156        Ok(())
1157    }
1158
1159    /// Remove a timer.
1160    ///
1161    /// The timer is removed if scheduled, nothing is changed if it is not.
1162    pub fn remove_timer(self: &HrTimerManagerHandle, timer: &HrTimerHandle) -> Result<(), Errno> {
1163        log_debug!("remove_timer: entry:  {timer:?}");
1164        ftrace::duration!("alarms", "starnix:remove_timer");
1165        // Keep system awake until timer is removed.
1166        let message_counter_until_removed = self.lock().share_message_counter(true);
1167
1168        let sender = self.get_sender();
1169        let done = zx::Event::create();
1170        let done_clone = done
1171            .duplicate_handle(zx::Rights::SAME_RIGHTS)
1172            .map_err(|status| from_status_like_fdio!(status))?;
1173        let timer_id = timer.get_id();
1174        sender
1175            .unbounded_send(Cmd::Stop {
1176                timer: timer.clone(),
1177                message_counter: message_counter_until_removed,
1178                done: done_clone,
1179            })
1180            .map_err(|_| errno!(EINVAL, "remove_timer: could not send Cmd::Stop"))?;
1181
1182        // Block until the alarm for this timer is scheduled.
1183        wait_signaled_sync(&done)
1184            .map_err(|_| errno!(EINVAL, "add_timer: wait_signaled_sync failed"))?;
1185        log_debug!("remove_timer: exit:  {timer_id:?}");
1186        Ok(())
1187    }
1188}
1189
1190#[derive(Debug)]
1191pub struct HrTimer {
1192    event: zx::Event,
1193
1194    /// True iff the timer is currently set to trigger at an interval.
1195    ///
1196    /// This is used to determine at which point the hrtimer event (not
1197    /// `HrTimer::event` but the one that is shared with the actual driver)
1198    /// should be cleared.
1199    ///
1200    /// If this is true, the timer manager will wait to clear the timer event
1201    /// until the next timer request has been sent to the driver. This prevents
1202    /// lost wake ups where the container happens to suspend between two instances
1203    /// of an interval timer triggering.
1204    pub is_interval: Mutex<bool>,
1205}
1206pub type HrTimerHandle = Arc<HrTimer>;
1207
1208impl Drop for HrTimer {
1209    fn drop(&mut self) {
1210        let wake_alarm_id = self.wake_alarm_id();
1211        ftrace::duration!("alarms", "hrtimer::drop", "timer_id" => self.get_id(), "wake_alarm_id" => &wake_alarm_id[..]);
1212        ftrace::flow_end!("alarms", "hrtimer_lifecycle", self.trace_id());
1213    }
1214}
1215
1216impl HrTimer {
1217    pub fn new() -> HrTimerHandle {
1218        let ret = Arc::new(Self { event: zx::Event::create(), is_interval: Mutex::new(false) });
1219        let wake_alarm_id = ret.wake_alarm_id();
1220        ftrace::duration!("alarms", "hrtimer::new", "timer_id" => ret.get_id(), "wake_alarm_id" => &wake_alarm_id[..]);
1221        ftrace::flow_begin!("alarms", "hrtimer_lifecycle", ret.trace_id(), "wake_alarm_id" => &wake_alarm_id[..]);
1222        ret
1223    }
1224
1225    pub fn event(&self) -> zx::Event {
1226        self.event
1227            .duplicate_handle(zx::Rights::SAME_RIGHTS)
1228            .expect("Duplicate hrtimer event handle")
1229    }
1230
1231    /// Returns the unique identifier of this [HrTimer].
1232    ///
1233    /// All holders of the same [HrTimerHandle] will see the same value here.
1234    pub fn get_id(&self) -> zx::Koid {
1235        self.event.koid().expect("infallible")
1236    }
1237
1238    /// Returns the unique alarm ID for this [HrTimer].
1239    ///
1240    /// The naming pattern is: `starnix:Koid(NNNNN):iB`, where `NNNNN` is a koid
1241    /// and B is `1` if the timer is an interval timer, or `0` otherwise.
1242    fn wake_alarm_id(&self) -> String {
1243        let i = if *self.is_interval.lock() { "i1" } else { "i0" };
1244        let koid = self.get_id();
1245        format!("starnix:{koid:?}:{i}")
1246    }
1247
1248    fn trace_id(&self) -> ftrace::Id {
1249        self.get_id().raw_koid().into()
1250    }
1251}
1252
1253impl TimerOps for HrTimerHandle {
1254    fn start(
1255        &self,
1256        current_task: &CurrentTask,
1257        source: Option<Weak<dyn OnWakeOps>>,
1258        deadline: TargetTime,
1259    ) -> Result<(), Errno> {
1260        // Before (re)starting the timer, ensure the signal is cleared.
1261        signal_event(&self.event, zx::Signals::TIMER_SIGNALED, zx::Signals::NONE)
1262            .map_err(|status| from_status_like_fdio!(status))?;
1263        current_task.kernel().hrtimer_manager.add_timer(
1264            source,
1265            self,
1266            deadline.into_resolved_utc_deadline(),
1267        )?;
1268        Ok(())
1269    }
1270
1271    fn stop(&self, kernel: &Arc<Kernel>) -> Result<(), Errno> {
1272        // Clear the signal when removing the hrtimer.
1273        signal_event(&self.event, zx::Signals::TIMER_SIGNALED, zx::Signals::NONE)
1274            .map_err(|status| from_status_like_fdio!(status))?;
1275        Ok(kernel.hrtimer_manager.remove_timer(self)?)
1276    }
1277
1278    fn as_handle_ref(&self) -> HandleRef<'_> {
1279        self.event.as_handle_ref()
1280    }
1281
1282    fn get_timeline_change_observer(
1283        &self,
1284        current_task: &CurrentTask,
1285    ) -> Option<TimelineChangeObserver> {
1286        // Should this return errno instead?
1287        current_task
1288            .kernel()
1289            .hrtimer_manager
1290            .get_timeline_change_observer(self)
1291            .inspect_err(|err| {
1292                log_error!("hr_timer_manager: could not create timeline change counter: {err:?}")
1293            })
1294            .ok()
1295    }
1296}
1297
1298/// Represents a node of `HrTimer`.
1299#[derive(Clone, Debug)]
1300struct HrTimerNode {
1301    /// The deadline of the associated `HrTimer`.
1302    deadline: TargetTime,
1303
1304    /// The source where initiated this `HrTimer`.
1305    ///
1306    /// When the timer expires, the system will be woken up if necessary. The `on_wake` callback
1307    /// will be triggered with a baton lease to prevent further suspend while Starnix handling the
1308    /// wake event.
1309    wake_source: Option<Weak<dyn OnWakeOps>>,
1310
1311    /// The underlying HrTimer.
1312    hr_timer: HrTimerHandle,
1313}
1314type HrTimerNodeHandle = Arc<HrTimerNode>;
1315
1316impl HrTimerNode {
1317    fn new(
1318        deadline: TargetTime,
1319        wake_source: Option<Weak<dyn OnWakeOps>>,
1320        hr_timer: HrTimerHandle,
1321    ) -> HrTimerNodeHandle {
1322        Arc::new(Self { deadline, wake_source, hr_timer })
1323    }
1324}
1325
1326#[cfg(test)]
1327mod tests {
1328    use super::*;
1329    use crate::testing::spawn_kernel_and_run;
1330    use crate::time::HrTimer;
1331    use fake_wake_alarms::{MAGIC_EXPIRE_DEADLINE, Response, serve_fake_wake_alarms};
1332    use fidl_fuchsia_time_alarms as fta;
1333    use fuchsia_async as fasync;
1334    use fuchsia_runtime::{UtcClockUpdate, UtcInstant};
1335    use std::sync::LazyLock;
1336    use std::thread;
1337
1338    static CLOCK_OPTS: LazyLock<zx::ClockOpts> = LazyLock::new(zx::ClockOpts::empty);
1339    const BACKSTOP_TIME: UtcInstant = UtcInstant::from_nanos(/*arbitrary*/ 222222);
1340
1341    fn create_utc_clock_for_test() -> UtcClock {
1342        let clock = UtcClock::create(*CLOCK_OPTS, Some(BACKSTOP_TIME)).unwrap();
1343        clock.update(UtcClockUpdate::builder().approximate_value(BACKSTOP_TIME)).unwrap();
1344        clock
1345    }
1346
1347    impl HrTimerManagerState {
1348        fn new_for_test() -> Self {
1349            Self {
1350                events: VecDeque::with_capacity(INSPECT_EVENT_BUFFER_SIZE),
1351                pending_timers: Default::default(),
1352                message_counter: None,
1353                last_loop_started_timestamp: zx::BootInstant::INFINITE_PAST,
1354                last_loop_completed_timestamp: zx::BootInstant::INFINITE_PAST,
1355                debug_start_stage_counter: 0,
1356            }
1357        }
1358    }
1359
1360    // Injected for testing.
1361    fn connect_factory(message_counter: zx::Counter, response_type: Response) -> zx::Channel {
1362        let (client, server) = zx::Channel::create();
1363
1364        // A separate thread is needed to allow independent execution of the server.
1365        let _detached = thread::spawn(move || {
1366            fasync::LocalExecutor::default().run_singlethreaded(async move {
1367                let stream =
1368                    fidl::endpoints::ServerEnd::<fta::WakeAlarmsMarker>::new(server).into_stream();
1369                serve_fake_wake_alarms(message_counter, response_type, stream, /*once*/ false)
1370                    .await;
1371            });
1372        });
1373        client
1374    }
1375
1376    // Initializes HrTimerManager for tests.
1377    //
1378    // # Returns
1379    //
1380    // A tuple of:
1381    // - `HrTimerManagerHandle` the unit under test
1382    // - `zx::Counter` a message counter to use in tests to observe suspend state
1383    fn init_hr_timer_manager(
1384        current_task: &CurrentTask,
1385        response_type: Response,
1386    ) -> (HrTimerManagerHandle, zx::Counter) {
1387        let manager = Arc::new(HrTimerManager {
1388            state: Mutex::new(HrTimerManagerState::new_for_test()),
1389            start_next_sender: Default::default(),
1390        });
1391        let counter = zx::Counter::create();
1392        let counter_clone = counter.duplicate_handle(zx::Rights::SAME_RIGHTS).unwrap();
1393        let wake_channel = connect_factory(counter_clone, response_type);
1394        let counter_clone = counter.duplicate_handle(zx::Rights::SAME_RIGHTS).unwrap();
1395        manager
1396            .init_internal(&current_task, Some(wake_channel), Some(counter_clone))
1397            .expect("infallible");
1398        (manager, counter)
1399    }
1400
1401    #[fuchsia::test]
1402    async fn test_triggering() {
1403        spawn_kernel_and_run(async |_, current_task| {
1404            let (manager, counter) = init_hr_timer_manager(current_task, Response::Immediate);
1405
1406            let timer1 = HrTimer::new();
1407            let timer2 = HrTimer::new();
1408            let timer3 = HrTimer::new();
1409
1410            manager.add_timer(None, &timer1, zx::BootInstant::from_nanos(1).into()).unwrap();
1411            manager.add_timer(None, &timer2, zx::BootInstant::from_nanos(2).into()).unwrap();
1412            manager.add_timer(None, &timer3, zx::BootInstant::from_nanos(3).into()).unwrap();
1413
1414            wait_signaled_sync(&timer1.event()).to_result().unwrap();
1415            wait_signaled_sync(&timer2.event()).to_result().unwrap();
1416            wait_signaled_sync(&timer3.event()).to_result().unwrap();
1417
1418            assert_eq!(
1419                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1420                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1421            );
1422        })
1423        .await;
1424    }
1425
1426    #[fuchsia::test]
1427    async fn test_triggering_utc() {
1428        spawn_kernel_and_run(async |_, current_task| {
1429            let (manager, counter) = init_hr_timer_manager(current_task, Response::Immediate);
1430
1431            let timer1 = HrTimer::new();
1432            let timer2 = HrTimer::new();
1433            let timer3 = HrTimer::new();
1434
1435            // All these are normally already expired as scheduled.
1436            manager.add_timer(None, &timer1, UtcInstant::from_nanos(1).into()).unwrap();
1437            manager.add_timer(None, &timer2, UtcInstant::from_nanos(2).into()).unwrap();
1438            manager.add_timer(None, &timer3, UtcInstant::from_nanos(3).into()).unwrap();
1439
1440            wait_signaled_sync(&timer1.event()).to_result().unwrap();
1441            wait_signaled_sync(&timer2.event()).to_result().unwrap();
1442            wait_signaled_sync(&timer3.event()).to_result().unwrap();
1443
1444            assert_eq!(
1445                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1446                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1447            );
1448        })
1449        .await;
1450    }
1451
1452    #[fuchsia::test]
1453    async fn test_delayed_response() {
1454        spawn_kernel_and_run(async |_, current_task| {
1455            let (manager, counter) = init_hr_timer_manager(current_task, Response::Immediate);
1456
1457            let timer = HrTimer::new();
1458
1459            manager.add_timer(None, &timer, zx::BootInstant::from_nanos(1).into()).unwrap();
1460
1461            assert_eq!(
1462                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1463                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1464            );
1465        })
1466        .await;
1467    }
1468
1469    #[fuchsia::test]
1470    async fn test_protocol_error_response() {
1471        spawn_kernel_and_run(async |_, current_task| {
1472            let (manager, counter) = init_hr_timer_manager(current_task, Response::Error);
1473
1474            let timer = HrTimer::new();
1475            manager.add_timer(None, &timer, zx::BootInstant::from_nanos(1).into()).unwrap();
1476            assert_eq!(
1477                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1478                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1479            );
1480        })
1481        .await;
1482    }
1483
1484    #[fuchsia::test]
1485    async fn reschedule_same_timer() {
1486        spawn_kernel_and_run(async |_, current_task| {
1487            let (manager, counter) = init_hr_timer_manager(current_task, Response::Delayed);
1488
1489            let timer = HrTimer::new();
1490
1491            manager.add_timer(None, &timer, zx::BootInstant::from_nanos(1).into()).unwrap();
1492            manager.add_timer(None, &timer, zx::BootInstant::from_nanos(2).into()).unwrap();
1493
1494            // Force alarm expiry.
1495            manager
1496                .add_timer(None, &timer, zx::BootInstant::from_nanos(MAGIC_EXPIRE_DEADLINE).into())
1497                .unwrap();
1498            wait_signaled_sync(&timer.event()).to_result().unwrap();
1499
1500            assert_eq!(
1501                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1502                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1503            );
1504        })
1505        .await;
1506    }
1507
1508    #[fuchsia::test]
1509    async fn rescheduling_interval_timers_forbids_suspend() {
1510        spawn_kernel_and_run(async |_, current_task| {
1511            let (hrtimer_manager, counter) = init_hr_timer_manager(current_task, Response::Delayed);
1512
1513            // Schedule an interval timer and let it expire.
1514            let timer1 = HrTimer::new();
1515            *timer1.is_interval.lock() = true;
1516            hrtimer_manager
1517                .add_timer(None, &timer1, zx::BootInstant::from_nanos(MAGIC_EXPIRE_DEADLINE).into())
1518                .unwrap();
1519            wait_signaled_sync(&timer1.event()).to_result().unwrap();
1520
1521            // Schedule a regular timer and let it expire.
1522            let timer2 = HrTimer::new();
1523            hrtimer_manager
1524                .add_timer(None, &timer2, zx::BootInstant::from_nanos(MAGIC_EXPIRE_DEADLINE).into())
1525                .unwrap();
1526            wait_signaled_sync(&timer2.event()).to_result().unwrap();
1527
1528            // When we have an expired but not rescheduled interval timer (`timer1`), and we have
1529            // an intervening timer that gets scheduled and expires (`timer2`) before `timer1` is
1530            // rescheduled, then suspend should be disallowed (counter > 0) to allow `timer1` to
1531            // be scheduled eventually.
1532            assert_eq!(
1533                counter.wait_one(zx::Signals::COUNTER_POSITIVE, zx::MonotonicInstant::INFINITE),
1534                zx::WaitResult::Ok(zx::Signals::COUNTER_POSITIVE)
1535            );
1536        })
1537        .await;
1538    }
1539
1540    #[fuchsia::test]
1541    async fn canceling_interval_timer_allows_suspend() {
1542        spawn_kernel_and_run(async |_, current_task| {
1543            let (hrtimer_manager, counter) = init_hr_timer_manager(current_task, Response::Delayed);
1544
1545            let timer1 = HrTimer::new();
1546            *timer1.is_interval.lock() = true;
1547            hrtimer_manager
1548                .add_timer(None, &timer1, zx::BootInstant::from_nanos(MAGIC_EXPIRE_DEADLINE).into())
1549                .unwrap();
1550            wait_signaled_sync(&timer1.event()).to_result().unwrap();
1551
1552            // When an interval timer expires, we should not be allowed to suspend.
1553            assert_eq!(
1554                counter.wait_one(zx::Signals::COUNTER_POSITIVE, zx::MonotonicInstant::INFINITE),
1555                zx::WaitResult::Ok(zx::Signals::COUNTER_POSITIVE)
1556            );
1557
1558            // Schedule the same timer again. This time around we do not wait for it to expire,
1559            // but cancel the timer instead.
1560            const DURATION_100S: zx::BootDuration = zx::BootDuration::from_seconds(100);
1561            let deadline2: zx::BootInstant = zx::BootInstant::after(DURATION_100S.into());
1562            hrtimer_manager.add_timer(None, &timer1, deadline2.into()).unwrap();
1563
1564            hrtimer_manager.remove_timer(&timer1).unwrap();
1565
1566            // When we cancel an interval timer, we should be allowed to suspend.
1567            assert_eq!(
1568                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1569                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1570            );
1571        })
1572        .await;
1573    }
1574
1575    #[fuchsia::test]
1576    async fn canceling_interval_timer_allows_suspend_with_flake() {
1577        spawn_kernel_and_run(async |_, current_task| {
1578            let (hrtimer_manager, counter) = init_hr_timer_manager(current_task, Response::Delayed);
1579
1580            let timer1 = HrTimer::new();
1581            *timer1.is_interval.lock() = true;
1582            hrtimer_manager
1583                .add_timer(None, &timer1, zx::BootInstant::from_nanos(MAGIC_EXPIRE_DEADLINE).into())
1584                .unwrap();
1585            wait_signaled_sync(&timer1.event()).to_result().unwrap();
1586
1587            assert_eq!(
1588                counter.wait_one(zx::Signals::COUNTER_POSITIVE, zx::MonotonicInstant::INFINITE),
1589                zx::WaitResult::Ok(zx::Signals::COUNTER_POSITIVE)
1590            );
1591            const DURATION_100S: zx::BootDuration = zx::BootDuration::from_seconds(100);
1592            let deadline2: zx::BootInstant = zx::BootInstant::after(DURATION_100S.into());
1593            hrtimer_manager.add_timer(None, &timer1, deadline2.into()).unwrap();
1594            // No pause between start and stop has led to flakes before.
1595            hrtimer_manager.remove_timer(&timer1).unwrap();
1596
1597            assert_eq!(
1598                counter.wait_one(zx::Signals::COUNTER_NON_POSITIVE, zx::MonotonicInstant::INFINITE),
1599                zx::WaitResult::Ok(zx::Signals::COUNTER_NON_POSITIVE)
1600            );
1601        })
1602        .await;
1603    }
1604
1605    #[fuchsia::test]
1606    async fn utc_timeline_monitor_exits_on_interest_drop() {
1607        let counter = zx::Counter::create();
1608        let utc_clock = create_utc_clock_for_test();
1609        let (tx, rx) = mpsc::unbounded();
1610
1611        drop(tx);
1612        // Expect that this call returns, when tx no longer exists. Incorrect
1613        // handling of tx closure could cause it to hang otherwise.
1614        run_utc_timeline_monitor_internal(counter, rx, utc_clock).await;
1615    }
1616}