Skip to main content

kernel_manager/
suspend.rs

1// Copyright 2025 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::Kernels;
6use anyhow::Error;
7use fidl::Peered;
8use fidl_fuchsia_starnix_runner as fstarnixrunner;
9use fuchsia_inspect::{self as inspect, UintExponentialHistogramProperty, UintProperty};
10use log::warn;
11use starnix_sync::{LockDepMutex, TerminalLock};
12use std::sync::Arc;
13use zx::Task;
14
15/// The signal that the kernel raises to indicate that it's awake.
16pub const AWAKE_SIGNAL: zx::Signals = zx::Signals::USER_0;
17
18/// The signal that the kernel raises to indicate that it's suspended.
19pub const ASLEEP_SIGNAL: zx::Signals = zx::Signals::USER_1;
20
21pub struct WakeSource {
22    handle: zx::NullableHandle,
23    name: String,
24    signals: zx::Signals,
25}
26
27impl WakeSource {
28    pub fn from_counter(counter: zx::Counter, name: String) -> Self {
29        Self { handle: counter.into_handle(), name, signals: zx::Signals::COUNTER_POSITIVE }
30    }
31
32    pub fn from_handle(handle: zx::NullableHandle, name: String, signals: zx::Signals) -> Self {
33        Self { handle, name, signals }
34    }
35
36    fn as_wait_item(&self) -> zx::WaitItem<'_> {
37        self.handle.wait_item(self.signals)
38    }
39}
40
41pub type WakeSources = std::collections::HashMap<zx::Koid, WakeSource>;
42
43pub struct SuspendContext {
44    pub wake_sources: Arc<LockDepMutex<WakeSources, TerminalLock>>,
45    pub wake_watchers: Arc<LockDepMutex<Vec<zx::EventPair>, TerminalLock>>,
46
47    /// Inspect node for suspend-related metrics.
48    pub node: inspect::Node,
49    /// Histogram recording the boot timeline duration (in nanoseconds) of successful container
50    /// suspensions.
51    pub suspend_duration_histogram: UintExponentialHistogramProperty,
52    /// The total number of times the container has attempted to suspend.
53    pub suspend_attempts_count: UintProperty,
54    /// The total number of times the container has successfully suspended.
55    pub suspend_successes_count: UintProperty,
56    /// The total number of times the container has failed to suspend.
57    pub suspend_failures_count: UintProperty,
58}
59
60impl Default for SuspendContext {
61    fn default() -> Self {
62        let inspector = inspect::component::inspector();
63        let node = inspector.root().create_child("suspend");
64        let suspend_duration_histogram = node.create_uint_exponential_histogram(
65            "suspend_duration_boot_ns",
66            inspect::ExponentialHistogramParams {
67                floor: 100_000,
68                initial_step: 100_000,
69                step_multiplier: 2,
70                buckets: 32,
71            },
72        );
73        let suspend_attempts_count = node.create_uint("suspend_attempts_count", 0);
74        let suspend_successes_count = node.create_uint("suspend_successes_count", 0);
75        let suspend_failures_count = node.create_uint("suspend_failures_count", 0);
76        Self {
77            wake_sources: Default::default(),
78            wake_watchers: Default::default(),
79            node,
80            suspend_duration_histogram,
81            suspend_attempts_count,
82            suspend_successes_count,
83            suspend_failures_count,
84        }
85    }
86}
87
88/// Suspends the container specified by the `payload`.
89pub async fn suspend_container(
90    payload: fstarnixrunner::ManagerSuspendContainerRequest,
91    suspend_context: &Arc<SuspendContext>,
92    kernels: &Kernels,
93) -> Result<
94    Result<fstarnixrunner::ManagerSuspendContainerResponse, fstarnixrunner::SuspendError>,
95    Error,
96> {
97    fuchsia_trace::duration!("power", "starnix-runner:suspending-container");
98    let Some(container_job) = payload.container_job else {
99        warn!(
100            "error suspending container: could not find container job {:?}",
101            payload.container_job
102        );
103        return Ok(Err(fstarnixrunner::SuspendError::SuspendFailure));
104    };
105
106    // These handles need to kept alive until the end of the block, as they will
107    // resume the kernel when dropped.
108    log::info!("Suspending all container processes.");
109    let _suspend_handles = match suspend_job(&container_job).await {
110        Ok(handles) => handles,
111        Err(e) => {
112            warn!("error suspending container {:?}", e);
113            fuchsia_trace::instant!(
114                "power",
115                "starnix-runner:suspend-failed-actual",
116                fuchsia_trace::Scope::Process
117            );
118            return Ok(Err(fstarnixrunner::SuspendError::SuspendFailure));
119        }
120    };
121    log::info!("Finished suspending all container processes.");
122
123    let suspend_start = zx::BootInstant::get();
124    let resume_reason = {
125        // Take locks in a scope that will be closed before awaiting to ensure no deadlock.
126        if let Some(wake_locks) = payload.wake_locks {
127            match wake_locks
128                .wait_one(zx::Signals::EVENT_SIGNALED, zx::MonotonicInstant::ZERO)
129                .to_result()
130            {
131                Ok(_) => {
132                    // There were wake locks active after suspending all processes, resume
133                    // and fail the suspend call.
134                    warn!("error suspending container: Linux wake locks exist");
135                    fuchsia_trace::instant!(
136                        "power",
137                        "starnix-runner:suspend-failed-with-wake-locks",
138                        fuchsia_trace::Scope::Process
139                    );
140                    return Ok(Err(fstarnixrunner::SuspendError::WakeLocksExist));
141                }
142                Err(_) => {}
143            };
144        }
145
146        {
147            log::info!("Notifying wake watchers of container suspend.");
148            let mut watchers = suspend_context.wake_watchers.lock();
149            let (clear_mask, set_mask) = (AWAKE_SIGNAL, ASLEEP_SIGNAL);
150            watchers.retain(|event| match event.signal_peer(clear_mask, set_mask) {
151                Err(zx::Status::PEER_CLOSED) => false,
152                Ok(()) => true,
153                Err(e) => {
154                    log::warn!("Failed to signal wake watcher of suspension: {e:?}");
155                    true
156                }
157            });
158        }
159        log::info!("Pre-drop wake lease");
160        kernels.drop_wake_lease(&container_job)?;
161        log::info!("Post-drop wake lease");
162
163        let wake_sources = suspend_context.wake_sources.lock();
164        let mut wait_items: Vec<zx::WaitItem<'_>> =
165            wake_sources.values().map(|w| w.as_wait_item()).collect();
166
167        // TODO: We will likely have to handle a larger number of wake sources in the
168        // future, at which point we may want to consider a Port-based approach. This
169        // would also allow us to unblock this thread.
170        let wait_result = {
171            fuchsia_trace::duration!("power", "starnix-runner:waiting-on-container-wake");
172            if wait_items.len() > 0 {
173                log::info!("Waiting on container to receive incoming message on wake proxies");
174                zx::object_wait_many(
175                    &mut wait_items,
176                    zx::MonotonicInstant::after(zx::Duration::from_seconds(9)),
177                )
178                .inspect_err(|e| {
179                    warn!("error waiting for wake event {:?}", e);
180                })
181                .map(|_| ())
182            } else {
183                Ok(())
184            }
185        };
186        log::info!("Finished waiting on container wake proxies.");
187
188        let mut resume_reasons: Vec<String> = Vec::new();
189        for (wake_source, wait_item) in wake_sources.values().zip(&wait_items) {
190            if (wait_item.pending() & wait_item.waiting_for()) != zx::Signals::NONE {
191                log::info!("Woke container from sleep for: {}", wake_source.name,);
192                resume_reasons.push(wake_source.name.clone());
193            }
194        }
195
196        if resume_reasons.is_empty() {
197            match wait_result {
198                // Expose the suspend timeout injected by Starnix.
199                Err(zx::Status::TIMED_OUT) => Some("starnix-container-timeout".into()),
200                // An error was already printed earlier. Ok(_) was always silent.
201                _ => None,
202            }
203        } else {
204            Some(resume_reasons.join(","))
205        }
206    };
207
208    log::info!("Pre-acquire wake lease");
209    kernels.acquire_wake_lease(&container_job).await?;
210    log::info!("Post-acquire wake lease");
211
212    log::info!("Notifying wake watchers of container wakeup.");
213    let mut watchers = suspend_context.wake_watchers.lock();
214    let (clear_mask, set_mask) = (ASLEEP_SIGNAL, AWAKE_SIGNAL);
215    watchers.retain(|event| match event.signal_peer(clear_mask, set_mask) {
216        Err(zx::Status::PEER_CLOSED) => false,
217        Ok(()) => true,
218        Err(e) => {
219            log::warn!("Failed to signal wake watcher of wakeup: {e:?}");
220            true
221        }
222    });
223
224    log::info!("Returning successfully from suspend container");
225    Ok(Ok(fstarnixrunner::ManagerSuspendContainerResponse {
226        suspend_time: Some((zx::BootInstant::get() - suspend_start).into_nanos()),
227        resume_reason,
228        ..Default::default()
229    }))
230}
231
232/// Suspends the provided `zx::Job` by suspending each process in the job individually.
233///
234/// Returns the suspend handles for all the suspended processes.
235///
236/// Returns an error if any individual suspend failed. Any suspend handles will be dropped before
237/// the error is returned.
238async fn suspend_job(kernel_job: &zx::Job) -> Result<Vec<zx::NullableHandle>, Error> {
239    let mut handles = std::collections::HashMap::<zx::Koid, zx::NullableHandle>::new();
240    loop {
241        let process_koids = kernel_job.processes().expect("failed to get processes");
242        let mut found_new_process = false;
243        let mut processes = vec![];
244
245        for process_koid in process_koids {
246            if handles.get(&process_koid).is_some() {
247                continue;
248            }
249
250            found_new_process = true;
251
252            if let Ok(process_handle) = kernel_job.get_child(&process_koid, zx::Rights::SAME_RIGHTS)
253            {
254                let process = zx::Process::from(process_handle);
255                match process.suspend() {
256                    Ok(suspend_handle) => {
257                        handles.insert(process_koid, suspend_handle);
258                    }
259                    Err(zx::Status::BAD_STATE) => {
260                        // The process was already dead or dying, and thus can't be suspended.
261                        continue;
262                    }
263                    Err(e) => {
264                        log::warn!("Failed process suspension: {:?}", e);
265                        return Err(e.into());
266                    }
267                };
268                processes.push(process);
269            }
270        }
271
272        for process in processes {
273            let threads = process.threads().expect("failed to get threads");
274            for thread_koid in &threads {
275                fuchsia_trace::duration!("power", "starnix-runner:suspend_kernel", "thread_koid" => *thread_koid);
276                if let Ok(thread_handle) = process.get_child(&thread_koid, zx::Rights::SAME_RIGHTS)
277                {
278                    let thread_obj = zx::Thread::from(thread_handle);
279                    let mut watchdog_count = 0;
280                    loop {
281                        if let Ok(info) = thread_obj.info() {
282                            if let zx::ThreadState::Blocked(zx::ThreadBlockType::Exception(_)) =
283                                info.state
284                            {
285                                let thread_name = thread_obj
286                                    .get_name()
287                                    .map(|n| n.to_string())
288                                    .unwrap_or_else(|_| "unknown".to_string());
289                                log::warn!(
290                                    "Thread {} (Koid: {:?}) is blocked on exception, skipping suspend wait.",
291                                    thread_name,
292                                    thread_koid
293                                );
294                                break;
295                            }
296                        }
297
298                        match thread_obj
299                            .wait_one(
300                                zx::Signals::THREAD_SUSPENDED | zx::Signals::THREAD_TERMINATED,
301                                zx::MonotonicInstant::after(zx::Duration::from_millis(100)),
302                            )
303                            .to_result()
304                        {
305                            Err(zx::Status::TIMED_OUT) => {
306                                watchdog_count += 1;
307                                if watchdog_count == 100 || watchdog_count % 600 == 0 {
308                                    let process_name = process
309                                        .get_name()
310                                        .map(|n| n.to_string())
311                                        .unwrap_or_else(|_| "unknown".to_string());
312                                    let thread_name = thread_obj
313                                        .get_name()
314                                        .map(|n| n.to_string())
315                                        .unwrap_or_else(|_| "unknown".to_string());
316                                    let thread_state = thread_obj
317                                        .info()
318                                        .map(|info| format!("{:?}", info.state))
319                                        .unwrap_or_else(|_| "unknown".to_string());
320                                    log::warn!(
321                                        "[SUSPEND_WATCHDOG] Timeout waiting for task suspension. Thread Koid: {:?} Name: '{}', Process: '{}', State: {}, continuing to wait...",
322                                        thread_koid,
323                                        thread_name,
324                                        process_name,
325                                        thread_state
326                                    );
327                                }
328                            }
329                            Err(e) => {
330                                log::warn!("Error waiting for task suspension: {:?}", e);
331                                return Err(e.into());
332                            }
333                            _ => break,
334                        }
335                    }
336                }
337            }
338        }
339
340        if !found_new_process {
341            break;
342        }
343    }
344
345    Ok(handles.into_values().collect())
346}