kernel_manager/
suspend.rs

1// Copyright 2025 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::Kernels;
6use anyhow::Error;
7use fidl::{HandleBased, Peered};
8use fidl_fuchsia_starnix_runner as fstarnixrunner;
9use fuchsia_sync::Mutex;
10use log::warn;
11use std::sync::Arc;
12use zx::Task;
13
14/// The signal that the kernel raises to indicate that it's awake.
15pub const AWAKE_SIGNAL: zx::Signals = zx::Signals::USER_0;
16
17/// The signal that the kernel raises to indicate that it's suspended.
18pub const ASLEEP_SIGNAL: zx::Signals = zx::Signals::USER_1;
19
20pub struct WakeSource {
21    handle: zx::NullableHandle,
22    name: String,
23    signals: zx::Signals,
24}
25
26impl WakeSource {
27    pub fn from_counter(counter: zx::Counter, name: String) -> Self {
28        Self { handle: counter.into_handle(), name, signals: zx::Signals::COUNTER_POSITIVE }
29    }
30
31    pub fn from_handle(handle: zx::NullableHandle, name: String, signals: zx::Signals) -> Self {
32        Self { handle, name, signals }
33    }
34
35    fn as_wait_item(&self) -> zx::WaitItem<'_> {
36        zx::WaitItem {
37            handle: self.handle.as_handle_ref(),
38            waitfor: self.signals,
39            pending: zx::Signals::empty(),
40        }
41    }
42}
43
44pub type WakeSources = std::collections::HashMap<zx::Koid, WakeSource>;
45
46#[derive(Default)]
47pub struct SuspendContext {
48    pub wake_sources: Arc<Mutex<WakeSources>>,
49    pub wake_watchers: Arc<Mutex<Vec<zx::EventPair>>>,
50}
51
52/// Suspends the container specified by the `payload`.
53pub async fn suspend_container(
54    payload: fstarnixrunner::ManagerSuspendContainerRequest,
55    suspend_context: &Arc<SuspendContext>,
56    kernels: &Kernels,
57) -> Result<
58    Result<fstarnixrunner::ManagerSuspendContainerResponse, fstarnixrunner::SuspendError>,
59    Error,
60> {
61    fuchsia_trace::duration!("power", "starnix-runner:suspending-container");
62    let Some(container_job) = payload.container_job else {
63        warn!(
64            "error suspending container: could not find container job {:?}",
65            payload.container_job
66        );
67        return Ok(Err(fstarnixrunner::SuspendError::SuspendFailure));
68    };
69
70    // These handles need to kept alive until the end of the block, as they will
71    // resume the kernel when dropped.
72    log::info!("Suspending all container processes.");
73    let _suspend_handles = match suspend_job(&container_job).await {
74        Ok(handles) => handles,
75        Err(e) => {
76            warn!("error suspending container {:?}", e);
77            fuchsia_trace::instant!(
78                "power",
79                "starnix-runner:suspend-failed-actual",
80                fuchsia_trace::Scope::Process
81            );
82            return Ok(Err(fstarnixrunner::SuspendError::SuspendFailure));
83        }
84    };
85    log::info!("Finished suspending all container processes.");
86
87    let suspend_start = zx::BootInstant::get();
88    let resume_reason = {
89        // Take locks in a scope that will be closed before awaiting to ensure no deadlock.
90        if let Some(wake_locks) = payload.wake_locks {
91            match wake_locks
92                .wait_one(zx::Signals::EVENT_SIGNALED, zx::MonotonicInstant::ZERO)
93                .to_result()
94            {
95                Ok(_) => {
96                    // There were wake locks active after suspending all processes, resume
97                    // and fail the suspend call.
98                    warn!("error suspending container: Linux wake locks exist");
99                    fuchsia_trace::instant!(
100                        "power",
101                        "starnix-runner:suspend-failed-with-wake-locks",
102                        fuchsia_trace::Scope::Process
103                    );
104                    return Ok(Err(fstarnixrunner::SuspendError::WakeLocksExist));
105                }
106                Err(_) => {}
107            };
108        }
109
110        {
111            log::info!("Notifying wake watchers of container suspend.");
112            let mut watchers = suspend_context.wake_watchers.lock();
113            let (clear_mask, set_mask) = (AWAKE_SIGNAL, ASLEEP_SIGNAL);
114            watchers.retain(|event| match event.signal_peer(clear_mask, set_mask) {
115                Err(zx::Status::PEER_CLOSED) => false,
116                Ok(()) => true,
117                Err(e) => {
118                    log::warn!("Failed to signal wake watcher of suspension: {e:?}");
119                    true
120                }
121            });
122        }
123        kernels.drop_wake_lease(&container_job)?;
124
125        let wake_sources = suspend_context.wake_sources.lock();
126        let mut wait_items: Vec<zx::WaitItem<'_>> =
127            wake_sources.iter().map(|(_, w)| w.as_wait_item()).collect();
128
129        // TODO: We will likely have to handle a larger number of wake sources in the
130        // future, at which point we may want to consider a Port-based approach. This
131        // would also allow us to unblock this thread.
132        {
133            fuchsia_trace::duration!("power", "starnix-runner:waiting-on-container-wake");
134            if wait_items.len() > 0 {
135                log::info!("Waiting on container to receive incoming message on wake proxies");
136                match zx::object_wait_many(
137                    &mut wait_items,
138                    zx::MonotonicInstant::after(zx::Duration::from_seconds(9)),
139                ) {
140                    Ok(_) => (),
141                    Err(e) => {
142                        warn!("error waiting for wake event {:?}", e);
143                    }
144                };
145            }
146        }
147        log::info!("Finished waiting on container wake proxies.");
148
149        let mut resume_reasons: Vec<String> = Vec::new();
150        for wait_item in &wait_items {
151            if (wait_item.pending & wait_item.waitfor) != zx::Signals::NONE {
152                let koid = wait_item.handle.koid().unwrap();
153                if let Some(event) = wake_sources.get(&koid) {
154                    log::info!("Woke container from sleep for: {}", event.name,);
155                    resume_reasons.push(event.name.clone());
156                }
157            }
158        }
159
160        let resume_reason =
161            if resume_reasons.is_empty() { None } else { Some(resume_reasons.join(",")) };
162        resume_reason
163    };
164
165    kernels.acquire_wake_lease(&container_job).await?;
166
167    log::info!("Notifying wake watchers of container wakeup.");
168    let mut watchers = suspend_context.wake_watchers.lock();
169    let (clear_mask, set_mask) = (ASLEEP_SIGNAL, AWAKE_SIGNAL);
170    watchers.retain(|event| match event.signal_peer(clear_mask, set_mask) {
171        Err(zx::Status::PEER_CLOSED) => false,
172        Ok(()) => true,
173        Err(e) => {
174            log::warn!("Failed to signal wake watcher of wakeup: {e:?}");
175            true
176        }
177    });
178
179    log::info!("Returning successfully from suspend container");
180    Ok(Ok(fstarnixrunner::ManagerSuspendContainerResponse {
181        suspend_time: Some((zx::BootInstant::get() - suspend_start).into_nanos()),
182        resume_reason,
183        ..Default::default()
184    }))
185}
186
187/// Suspends the provided `zx::Job` by suspending each process in the job individually.
188///
189/// Returns the suspend handles for all the suspended processes.
190///
191/// Returns an error if any individual suspend failed. Any suspend handles will be dropped before
192/// the error is returned.
193async fn suspend_job(kernel_job: &zx::Job) -> Result<Vec<zx::NullableHandle>, Error> {
194    let mut handles = std::collections::HashMap::<zx::Koid, zx::NullableHandle>::new();
195    loop {
196        let process_koids = kernel_job.processes().expect("failed to get processes");
197        let mut found_new_process = false;
198        let mut processes = vec![];
199
200        for process_koid in process_koids {
201            if handles.get(&process_koid).is_some() {
202                continue;
203            }
204
205            found_new_process = true;
206
207            if let Ok(process_handle) = kernel_job.get_child(&process_koid, zx::Rights::SAME_RIGHTS)
208            {
209                let process = zx::Process::from_handle(process_handle);
210                match process.suspend() {
211                    Ok(suspend_handle) => {
212                        handles.insert(process_koid, suspend_handle);
213                    }
214                    Err(zx::Status::BAD_STATE) => {
215                        // The process was already dead or dying, and thus can't be suspended.
216                        continue;
217                    }
218                    Err(e) => {
219                        log::warn!("Failed process suspension: {:?}", e);
220                        return Err(e.into());
221                    }
222                };
223                processes.push(process);
224            }
225        }
226
227        for process in processes {
228            let threads = process.threads().expect("failed to get threads");
229            for thread_koid in &threads {
230                fuchsia_trace::duration!("power", "starnix-runner:suspend_kernel", "thread_koid" => *thread_koid);
231                if let Ok(thread) = process.get_child(&thread_koid, zx::Rights::SAME_RIGHTS) {
232                    match thread
233                        .wait_one(
234                            zx::Signals::THREAD_SUSPENDED,
235                            zx::MonotonicInstant::after(zx::MonotonicDuration::INFINITE),
236                        )
237                        .to_result()
238                    {
239                        Err(e) => {
240                            log::warn!("Error waiting for task suspension: {:?}", e);
241                            return Err(e.into());
242                        }
243                        _ => {}
244                    }
245                }
246            }
247        }
248
249        if !found_new_process {
250            break;
251        }
252    }
253
254    Ok(handles.into_values().collect())
255}