1use crate::Kernels;
6use anyhow::Error;
7use fidl::Peered;
8use fidl_fuchsia_starnix_runner as fstarnixrunner;
9use fuchsia_inspect::{self as inspect, UintExponentialHistogramProperty, UintProperty};
10use log::warn;
11use starnix_sync::{LockDepMutex, TerminalLock};
12use std::sync::Arc;
13use zx::Task;
14
15pub const AWAKE_SIGNAL: zx::Signals = zx::Signals::USER_0;
17
18pub const ASLEEP_SIGNAL: zx::Signals = zx::Signals::USER_1;
20
21pub struct WakeSource {
22 handle: zx::NullableHandle,
23 name: String,
24 signals: zx::Signals,
25}
26
27impl WakeSource {
28 pub fn from_counter(counter: zx::Counter, name: String) -> Self {
29 Self { handle: counter.into_handle(), name, signals: zx::Signals::COUNTER_POSITIVE }
30 }
31
32 pub fn from_handle(handle: zx::NullableHandle, name: String, signals: zx::Signals) -> Self {
33 Self { handle, name, signals }
34 }
35
36 fn as_wait_item(&self) -> zx::WaitItem<'_> {
37 self.handle.wait_item(self.signals)
38 }
39}
40
41pub type WakeSources = std::collections::HashMap<zx::Koid, WakeSource>;
42
43pub struct SuspendContext {
44 pub wake_sources: Arc<LockDepMutex<WakeSources, TerminalLock>>,
45 pub wake_watchers: Arc<LockDepMutex<Vec<zx::EventPair>, TerminalLock>>,
46
47 pub node: inspect::Node,
49 pub suspend_duration_histogram: UintExponentialHistogramProperty,
52 pub suspend_attempts_count: UintProperty,
54 pub suspend_successes_count: UintProperty,
56 pub suspend_failures_count: UintProperty,
58}
59
60impl Default for SuspendContext {
61 fn default() -> Self {
62 let inspector = inspect::component::inspector();
63 let node = inspector.root().create_child("suspend");
64 let suspend_duration_histogram = node.create_uint_exponential_histogram(
65 "suspend_duration_boot_ns",
66 inspect::ExponentialHistogramParams {
67 floor: 100_000,
68 initial_step: 100_000,
69 step_multiplier: 2,
70 buckets: 32,
71 },
72 );
73 let suspend_attempts_count = node.create_uint("suspend_attempts_count", 0);
74 let suspend_successes_count = node.create_uint("suspend_successes_count", 0);
75 let suspend_failures_count = node.create_uint("suspend_failures_count", 0);
76 Self {
77 wake_sources: Default::default(),
78 wake_watchers: Default::default(),
79 node,
80 suspend_duration_histogram,
81 suspend_attempts_count,
82 suspend_successes_count,
83 suspend_failures_count,
84 }
85 }
86}
87
88pub async fn suspend_container(
90 payload: fstarnixrunner::ManagerSuspendContainerRequest,
91 suspend_context: &Arc<SuspendContext>,
92 kernels: &Kernels,
93) -> Result<
94 Result<fstarnixrunner::ManagerSuspendContainerResponse, fstarnixrunner::SuspendError>,
95 Error,
96> {
97 fuchsia_trace::duration!("power", "starnix-runner:suspending-container");
98 let Some(container_job) = payload.container_job else {
99 warn!(
100 "error suspending container: could not find container job {:?}",
101 payload.container_job
102 );
103 return Ok(Err(fstarnixrunner::SuspendError::SuspendFailure));
104 };
105
106 log::info!("Suspending all container processes.");
109 let _suspend_handles = match suspend_job(&container_job).await {
110 Ok(handles) => handles,
111 Err(e) => {
112 warn!("error suspending container {:?}", e);
113 fuchsia_trace::instant!(
114 "power",
115 "starnix-runner:suspend-failed-actual",
116 fuchsia_trace::Scope::Process
117 );
118 return Ok(Err(fstarnixrunner::SuspendError::SuspendFailure));
119 }
120 };
121 log::info!("Finished suspending all container processes.");
122
123 let suspend_start = zx::BootInstant::get();
124 let resume_reason = {
125 if let Some(wake_locks) = payload.wake_locks {
127 match wake_locks
128 .wait_one(zx::Signals::EVENT_SIGNALED, zx::MonotonicInstant::ZERO)
129 .to_result()
130 {
131 Ok(_) => {
132 warn!("error suspending container: Linux wake locks exist");
135 fuchsia_trace::instant!(
136 "power",
137 "starnix-runner:suspend-failed-with-wake-locks",
138 fuchsia_trace::Scope::Process
139 );
140 return Ok(Err(fstarnixrunner::SuspendError::WakeLocksExist));
141 }
142 Err(_) => {}
143 };
144 }
145
146 {
147 log::info!("Notifying wake watchers of container suspend.");
148 let mut watchers = suspend_context.wake_watchers.lock();
149 let (clear_mask, set_mask) = (AWAKE_SIGNAL, ASLEEP_SIGNAL);
150 watchers.retain(|event| match event.signal_peer(clear_mask, set_mask) {
151 Err(zx::Status::PEER_CLOSED) => false,
152 Ok(()) => true,
153 Err(e) => {
154 log::warn!("Failed to signal wake watcher of suspension: {e:?}");
155 true
156 }
157 });
158 }
159 log::info!("Pre-drop wake lease");
160 kernels.drop_wake_lease(&container_job)?;
161 log::info!("Post-drop wake lease");
162
163 let wake_sources = suspend_context.wake_sources.lock();
164 let mut wait_items: Vec<zx::WaitItem<'_>> =
165 wake_sources.values().map(|w| w.as_wait_item()).collect();
166
167 let wait_result = {
171 fuchsia_trace::duration!("power", "starnix-runner:waiting-on-container-wake");
172 if wait_items.len() > 0 {
173 log::info!("Waiting on container to receive incoming message on wake proxies");
174 zx::object_wait_many(
175 &mut wait_items,
176 zx::MonotonicInstant::after(zx::Duration::from_seconds(9)),
177 )
178 .inspect_err(|e| {
179 warn!("error waiting for wake event {:?}", e);
180 })
181 .map(|_| ())
182 } else {
183 Ok(())
184 }
185 };
186 log::info!("Finished waiting on container wake proxies.");
187
188 let mut resume_reasons: Vec<String> = Vec::new();
189 for (wake_source, wait_item) in wake_sources.values().zip(&wait_items) {
190 if (wait_item.pending() & wait_item.waiting_for()) != zx::Signals::NONE {
191 log::info!("Woke container from sleep for: {}", wake_source.name,);
192 resume_reasons.push(wake_source.name.clone());
193 }
194 }
195
196 if resume_reasons.is_empty() {
197 match wait_result {
198 Err(zx::Status::TIMED_OUT) => Some("starnix-container-timeout".into()),
200 _ => None,
202 }
203 } else {
204 Some(resume_reasons.join(","))
205 }
206 };
207
208 log::info!("Pre-acquire wake lease");
209 kernels.acquire_wake_lease(&container_job).await?;
210 log::info!("Post-acquire wake lease");
211
212 log::info!("Notifying wake watchers of container wakeup.");
213 let mut watchers = suspend_context.wake_watchers.lock();
214 let (clear_mask, set_mask) = (ASLEEP_SIGNAL, AWAKE_SIGNAL);
215 watchers.retain(|event| match event.signal_peer(clear_mask, set_mask) {
216 Err(zx::Status::PEER_CLOSED) => false,
217 Ok(()) => true,
218 Err(e) => {
219 log::warn!("Failed to signal wake watcher of wakeup: {e:?}");
220 true
221 }
222 });
223
224 log::info!("Returning successfully from suspend container");
225 Ok(Ok(fstarnixrunner::ManagerSuspendContainerResponse {
226 suspend_time: Some((zx::BootInstant::get() - suspend_start).into_nanos()),
227 resume_reason,
228 ..Default::default()
229 }))
230}
231
232async fn suspend_job(kernel_job: &zx::Job) -> Result<Vec<zx::NullableHandle>, Error> {
239 let mut handles = std::collections::HashMap::<zx::Koid, zx::NullableHandle>::new();
240 loop {
241 let process_koids = kernel_job.processes().expect("failed to get processes");
242 let mut found_new_process = false;
243 let mut processes = vec![];
244
245 for process_koid in process_koids {
246 if handles.get(&process_koid).is_some() {
247 continue;
248 }
249
250 found_new_process = true;
251
252 if let Ok(process_handle) = kernel_job.get_child(&process_koid, zx::Rights::SAME_RIGHTS)
253 {
254 let process = zx::Process::from(process_handle);
255 match process.suspend() {
256 Ok(suspend_handle) => {
257 handles.insert(process_koid, suspend_handle);
258 }
259 Err(zx::Status::BAD_STATE) => {
260 continue;
262 }
263 Err(e) => {
264 log::warn!("Failed process suspension: {:?}", e);
265 return Err(e.into());
266 }
267 };
268 processes.push(process);
269 }
270 }
271
272 for process in processes {
273 let threads = process.threads().expect("failed to get threads");
274 for thread_koid in &threads {
275 fuchsia_trace::duration!("power", "starnix-runner:suspend_kernel", "thread_koid" => *thread_koid);
276 if let Ok(thread_handle) = process.get_child(&thread_koid, zx::Rights::SAME_RIGHTS)
277 {
278 let thread_obj = zx::Thread::from(thread_handle);
279 let mut watchdog_count = 0;
280 loop {
281 if let Ok(info) = thread_obj.info() {
282 if let zx::ThreadState::Blocked(zx::ThreadBlockType::Exception(_)) =
283 info.state
284 {
285 let thread_name = thread_obj
286 .get_name()
287 .map(|n| n.to_string())
288 .unwrap_or_else(|_| "unknown".to_string());
289 log::warn!(
290 "Thread {} (Koid: {:?}) is blocked on exception, skipping suspend wait.",
291 thread_name,
292 thread_koid
293 );
294 break;
295 }
296 }
297
298 match thread_obj
299 .wait_one(
300 zx::Signals::THREAD_SUSPENDED | zx::Signals::THREAD_TERMINATED,
301 zx::MonotonicInstant::after(zx::Duration::from_millis(100)),
302 )
303 .to_result()
304 {
305 Err(zx::Status::TIMED_OUT) => {
306 watchdog_count += 1;
307 if watchdog_count == 100 || watchdog_count % 600 == 0 {
308 let process_name = process
309 .get_name()
310 .map(|n| n.to_string())
311 .unwrap_or_else(|_| "unknown".to_string());
312 let thread_name = thread_obj
313 .get_name()
314 .map(|n| n.to_string())
315 .unwrap_or_else(|_| "unknown".to_string());
316 let thread_state = thread_obj
317 .info()
318 .map(|info| format!("{:?}", info.state))
319 .unwrap_or_else(|_| "unknown".to_string());
320 log::warn!(
321 "[SUSPEND_WATCHDOG] Timeout waiting for task suspension. Thread Koid: {:?} Name: '{}', Process: '{}', State: {}, continuing to wait...",
322 thread_koid,
323 thread_name,
324 process_name,
325 thread_state
326 );
327 }
328 }
329 Err(e) => {
330 log::warn!("Error waiting for task suspension: {:?}", e);
331 return Err(e.into());
332 }
333 _ => break,
334 }
335 }
336 }
337 }
338 }
339
340 if !found_new_process {
341 break;
342 }
343 }
344
345 Ok(handles.into_values().collect())
346}