Skip to main content

kernel_manager/
suspend.rs

1// Copyright 2025 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::Kernels;
6use anyhow::Error;
7use fidl::{HandleBased, Peered};
8use fidl_fuchsia_starnix_runner as fstarnixrunner;
9use fuchsia_sync::Mutex;
10use log::warn;
11use std::sync::Arc;
12use zx::Task;
13
14/// The signal that the kernel raises to indicate that it's awake.
15pub const AWAKE_SIGNAL: zx::Signals = zx::Signals::USER_0;
16
17/// The signal that the kernel raises to indicate that it's suspended.
18pub const ASLEEP_SIGNAL: zx::Signals = zx::Signals::USER_1;
19
20pub struct WakeSource {
21    handle: zx::NullableHandle,
22    name: String,
23    signals: zx::Signals,
24}
25
26impl WakeSource {
27    pub fn from_counter(counter: zx::Counter, name: String) -> Self {
28        Self { handle: counter.into_handle(), name, signals: zx::Signals::COUNTER_POSITIVE }
29    }
30
31    pub fn from_handle(handle: zx::NullableHandle, name: String, signals: zx::Signals) -> Self {
32        Self { handle, name, signals }
33    }
34
35    fn as_wait_item(&self) -> zx::WaitItem<'_> {
36        zx::WaitItem {
37            handle: self.handle.as_handle_ref(),
38            waitfor: self.signals,
39            pending: zx::Signals::empty(),
40        }
41    }
42}
43
44pub type WakeSources = std::collections::HashMap<zx::Koid, WakeSource>;
45
46#[derive(Default)]
47pub struct SuspendContext {
48    pub wake_sources: Arc<Mutex<WakeSources>>,
49    pub wake_watchers: Arc<Mutex<Vec<zx::EventPair>>>,
50}
51
52/// Suspends the container specified by the `payload`.
53pub async fn suspend_container(
54    payload: fstarnixrunner::ManagerSuspendContainerRequest,
55    suspend_context: &Arc<SuspendContext>,
56    kernels: &Kernels,
57) -> Result<
58    Result<fstarnixrunner::ManagerSuspendContainerResponse, fstarnixrunner::SuspendError>,
59    Error,
60> {
61    fuchsia_trace::duration!("power", "starnix-runner:suspending-container");
62    let Some(container_job) = payload.container_job else {
63        warn!(
64            "error suspending container: could not find container job {:?}",
65            payload.container_job
66        );
67        return Ok(Err(fstarnixrunner::SuspendError::SuspendFailure));
68    };
69
70    // These handles need to kept alive until the end of the block, as they will
71    // resume the kernel when dropped.
72    log::info!("Suspending all container processes.");
73    let _suspend_handles = match suspend_job(&container_job).await {
74        Ok(handles) => handles,
75        Err(e) => {
76            warn!("error suspending container {:?}", e);
77            fuchsia_trace::instant!(
78                "power",
79                "starnix-runner:suspend-failed-actual",
80                fuchsia_trace::Scope::Process
81            );
82            return Ok(Err(fstarnixrunner::SuspendError::SuspendFailure));
83        }
84    };
85    log::info!("Finished suspending all container processes.");
86
87    let suspend_start = zx::BootInstant::get();
88    let resume_reason = {
89        // Take locks in a scope that will be closed before awaiting to ensure no deadlock.
90        if let Some(wake_locks) = payload.wake_locks {
91            match wake_locks
92                .wait_one(zx::Signals::EVENT_SIGNALED, zx::MonotonicInstant::ZERO)
93                .to_result()
94            {
95                Ok(_) => {
96                    // There were wake locks active after suspending all processes, resume
97                    // and fail the suspend call.
98                    warn!("error suspending container: Linux wake locks exist");
99                    fuchsia_trace::instant!(
100                        "power",
101                        "starnix-runner:suspend-failed-with-wake-locks",
102                        fuchsia_trace::Scope::Process
103                    );
104                    return Ok(Err(fstarnixrunner::SuspendError::WakeLocksExist));
105                }
106                Err(_) => {}
107            };
108        }
109
110        {
111            log::info!("Notifying wake watchers of container suspend.");
112            let mut watchers = suspend_context.wake_watchers.lock();
113            let (clear_mask, set_mask) = (AWAKE_SIGNAL, ASLEEP_SIGNAL);
114            watchers.retain(|event| match event.signal_peer(clear_mask, set_mask) {
115                Err(zx::Status::PEER_CLOSED) => false,
116                Ok(()) => true,
117                Err(e) => {
118                    log::warn!("Failed to signal wake watcher of suspension: {e:?}");
119                    true
120                }
121            });
122        }
123        log::info!("Pre-drop wake lease");
124        kernels.drop_wake_lease(&container_job)?;
125        log::info!("Post-drop wake lease");
126
127        let wake_sources = suspend_context.wake_sources.lock();
128        let mut wait_items: Vec<zx::WaitItem<'_>> =
129            wake_sources.iter().map(|(_, w)| w.as_wait_item()).collect();
130
131        // TODO: We will likely have to handle a larger number of wake sources in the
132        // future, at which point we may want to consider a Port-based approach. This
133        // would also allow us to unblock this thread.
134        let wait_result = {
135            fuchsia_trace::duration!("power", "starnix-runner:waiting-on-container-wake");
136            if wait_items.len() > 0 {
137                log::info!("Waiting on container to receive incoming message on wake proxies");
138                zx::object_wait_many(
139                    &mut wait_items,
140                    zx::MonotonicInstant::after(zx::Duration::from_seconds(9)),
141                )
142                .inspect_err(|e| {
143                    warn!("error waiting for wake event {:?}", e);
144                })
145                .map(|_| ())
146            } else {
147                Ok(())
148            }
149        };
150        log::info!("Finished waiting on container wake proxies.");
151
152        let mut resume_reasons: Vec<String> = Vec::new();
153        for wait_item in &wait_items {
154            if (wait_item.pending & wait_item.waitfor) != zx::Signals::NONE {
155                let koid = wait_item.handle.koid().unwrap();
156                if let Some(event) = wake_sources.get(&koid) {
157                    log::info!("Woke container from sleep for: {}", event.name,);
158                    resume_reasons.push(event.name.clone());
159                }
160            }
161        }
162
163        if resume_reasons.is_empty() {
164            match wait_result {
165                // Expose the suspend timeout injected by Starnix.
166                Err(zx::Status::TIMED_OUT) => Some("starnix-container-timeout".into()),
167                // An error was already printed earlier. Ok(_) was always silent.
168                _ => None,
169            }
170        } else {
171            Some(resume_reasons.join(","))
172        }
173    };
174
175    log::info!("Pre-acquire wake lease");
176    kernels.acquire_wake_lease(&container_job).await?;
177    log::info!("Post-acquire wake lease");
178
179    log::info!("Notifying wake watchers of container wakeup.");
180    let mut watchers = suspend_context.wake_watchers.lock();
181    let (clear_mask, set_mask) = (ASLEEP_SIGNAL, AWAKE_SIGNAL);
182    watchers.retain(|event| match event.signal_peer(clear_mask, set_mask) {
183        Err(zx::Status::PEER_CLOSED) => false,
184        Ok(()) => true,
185        Err(e) => {
186            log::warn!("Failed to signal wake watcher of wakeup: {e:?}");
187            true
188        }
189    });
190
191    log::info!("Returning successfully from suspend container");
192    Ok(Ok(fstarnixrunner::ManagerSuspendContainerResponse {
193        suspend_time: Some((zx::BootInstant::get() - suspend_start).into_nanos()),
194        resume_reason,
195        ..Default::default()
196    }))
197}
198
199/// Suspends the provided `zx::Job` by suspending each process in the job individually.
200///
201/// Returns the suspend handles for all the suspended processes.
202///
203/// Returns an error if any individual suspend failed. Any suspend handles will be dropped before
204/// the error is returned.
205async fn suspend_job(kernel_job: &zx::Job) -> Result<Vec<zx::NullableHandle>, Error> {
206    let mut handles = std::collections::HashMap::<zx::Koid, zx::NullableHandle>::new();
207    loop {
208        let process_koids = kernel_job.processes().expect("failed to get processes");
209        let mut found_new_process = false;
210        let mut processes = vec![];
211
212        for process_koid in process_koids {
213            if handles.get(&process_koid).is_some() {
214                continue;
215            }
216
217            found_new_process = true;
218
219            if let Ok(process_handle) = kernel_job.get_child(&process_koid, zx::Rights::SAME_RIGHTS)
220            {
221                let process = zx::Process::from(process_handle);
222                match process.suspend() {
223                    Ok(suspend_handle) => {
224                        handles.insert(process_koid, suspend_handle);
225                    }
226                    Err(zx::Status::BAD_STATE) => {
227                        // The process was already dead or dying, and thus can't be suspended.
228                        continue;
229                    }
230                    Err(e) => {
231                        log::warn!("Failed process suspension: {:?}", e);
232                        return Err(e.into());
233                    }
234                };
235                processes.push(process);
236            }
237        }
238
239        for process in processes {
240            let threads = process.threads().expect("failed to get threads");
241            for thread_koid in &threads {
242                fuchsia_trace::duration!("power", "starnix-runner:suspend_kernel", "thread_koid" => *thread_koid);
243                if let Ok(thread) = process.get_child(&thread_koid, zx::Rights::SAME_RIGHTS) {
244                    match thread
245                        .wait_one(
246                            zx::Signals::THREAD_SUSPENDED | zx::Signals::THREAD_TERMINATED,
247                            zx::MonotonicInstant::after(zx::MonotonicDuration::INFINITE),
248                        )
249                        .to_result()
250                    {
251                        Err(e) => {
252                            log::warn!("Error waiting for task suspension: {:?}", e);
253                            return Err(e.into());
254                        }
255                        _ => {}
256                    }
257                }
258            }
259        }
260
261        if !found_new_process {
262            break;
263        }
264    }
265
266    Ok(handles.into_values().collect())
267}