shutdown_shim/
lib.rs

1// Copyright 2024 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4mod reboot_reasons;
5mod shutdown_watcher;
6
7use crate::reboot_reasons::RebootReasons;
8use crate::shutdown_watcher::ShutdownWatcher;
9use anyhow::{format_err, Context};
10use fidl::endpoints::{DiscoverableProtocolMarker, ServerEnd};
11use fidl::HandleBased;
12use fidl_fuchsia_hardware_power_statecontrol::{
13    AdminMexecRequest, AdminRequest, AdminRequestStream, RebootMethodsWatcherRegisterRequestStream,
14    RebootOptions, RebootReason, RebootReason2,
15};
16use fidl_fuchsia_power::CollaborativeRebootInitiatorRequestStream;
17use fidl_fuchsia_power_internal::{
18    CollaborativeRebootReason, CollaborativeRebootSchedulerRequestStream,
19};
20use fidl_fuchsia_sys2::SystemControllerMarker;
21use fidl_fuchsia_system_state::{
22    SystemPowerState, SystemStateTransitionRequest, SystemStateTransitionRequestStream,
23};
24use fuchsia_component::client;
25use fuchsia_component::directory::{AsRefDirectory, Directory};
26use fuchsia_component::server::ServiceFs;
27use fuchsia_sync::Mutex;
28use futures::channel::mpsc;
29use futures::lock::Mutex as AMutex;
30use futures::prelude::*;
31use futures::select;
32use shutdown_shim_config::Config;
33use std::pin::pin;
34use std::sync::{Arc, LazyLock};
35use std::time::Duration;
36use {fidl_fuchsia_io as fio, fidl_fuchsia_power_system as fsystem, fuchsia_async as fasync};
37
38mod collaborative_reboot;
39
40// The amount of time that the shim will spend waiting for a manually trigger
41// system shutdown to finish before forcefully restarting the system.
42const MANUAL_SYSTEM_SHUTDOWN_TIMEOUT: Duration = Duration::from_secs(60 * 60);
43
44enum IncomingRequest {
45    SystemStateTransition(SystemStateTransitionRequestStream),
46    Admin(AdminRequestStream),
47    CollaborativeRebootInitiator(CollaborativeRebootInitiatorRequestStream),
48    CollaborativeRebootScheduler(CollaborativeRebootSchedulerRequestStream),
49    RebootMethodsWatcherRegister(RebootMethodsWatcherRegisterRequestStream),
50}
51
52pub async fn main(
53    svc: impl Directory + AsRefDirectory + 'static,
54    directory_request: ServerEnd<fio::DirectoryMarker>,
55    config_vmo: Option<zx::Vmo>,
56) -> Result<(), anyhow::Error> {
57    // Check the config
58    let config = Config::from_vmo(&config_vmo.expect("Config VMO handle must be present."))?;
59    println!("[shutdown-shim]: started with config: {:?}", config);
60
61    // Initialize the inspect framework.
62    //
63    // Note that shutdown-shim is a builtin component of ComponentManager, which
64    // means we must setup the inspector in a non-conventional way:
65    // * We must initialize the connection to the inspect sink relative to the
66    // `svc` directory.
67    // * We must not use the global `fuchsia_inspect::component::inspector()`;
68    //   this instance instance is a singleton and would be shared with
69    //   ComponentManager. Instead we declare our own local inspector.
70    let inspector = fuchsia_inspect::Inspector::new(fuchsia_inspect::InspectorConfig::default());
71    let (client, server) =
72        fidl::endpoints::create_endpoints::<fidl_fuchsia_inspect::InspectSinkMarker>();
73    // Note: The inspect server is detached, so we need not poll it.
74    let _inspect_server_task = inspect_runtime::publish(
75        &inspector,
76        inspect_runtime::PublishOptions::default().on_inspect_sink_client(client),
77    )
78    .ok_or_else(|| format_err!("failed to initialize inspect framework"))?;
79    svc.as_ref_directory()
80        .open(
81            fidl_fuchsia_inspect::InspectSinkMarker::PROTOCOL_NAME,
82            fio::Flags::PROTOCOL_SERVICE,
83            server.into_channel().into(),
84        )
85        .context("failed to connect to InspectSink")?;
86
87    let mut service_fs = ServiceFs::new();
88    service_fs.dir("svc").add_fidl_service(IncomingRequest::Admin);
89    service_fs.dir("svc").add_fidl_service(IncomingRequest::RebootMethodsWatcherRegister);
90    service_fs.dir("svc").add_fidl_service(IncomingRequest::SystemStateTransition);
91    service_fs.dir("svc").add_fidl_service(IncomingRequest::CollaborativeRebootInitiator);
92    service_fs.dir("svc").add_fidl_service(IncomingRequest::CollaborativeRebootScheduler);
93    service_fs.serve_connection(directory_request).context("failed to serve outgoing namespace")?;
94
95    let (abort_tx, mut abort_rx) = mpsc::unbounded::<()>();
96    let (cr_state, cr_cancellations) = collaborative_reboot::new(&inspector);
97    let ctx = ProgramContext {
98        svc,
99        abort_tx,
100        collaborative_reboot: cr_state,
101        shutdown_pending: Arc::new(AMutex::new(false)),
102        shutdown_watcher: ShutdownWatcher::new_with_inspector(&inspector),
103        config,
104    };
105
106    let shutdown_watcher = ctx.shutdown_watcher.clone();
107    let mut service_fut = service_fs
108        .for_each_concurrent(None, |request: IncomingRequest| async {
109            match request {
110                IncomingRequest::Admin(stream) => ctx.handle_admin_request(stream).await,
111                IncomingRequest::RebootMethodsWatcherRegister(stream) => {
112                    shutdown_watcher.clone().handle_reboot_register_request(stream).await;
113                }
114                IncomingRequest::SystemStateTransition(stream) => {
115                    ctx.handle_system_state_transition(stream).await
116                }
117                IncomingRequest::CollaborativeRebootInitiator(stream) => {
118                    ctx.collaborative_reboot.handle_initiator_requests(stream, &ctx).await
119                }
120                IncomingRequest::CollaborativeRebootScheduler(stream) => {
121                    ctx.collaborative_reboot.handle_scheduler_requests(stream).await
122                }
123            }
124        })
125        .fuse();
126    let collaborative_reboot_cancellation_fut = pin!(cr_cancellations.run());
127    let mut collaborative_reboot_cancellation_fut = collaborative_reboot_cancellation_fut.fuse();
128    let mut abort_fut = abort_rx.next().fuse();
129
130    select! {
131        () = service_fut => {},
132        () = collaborative_reboot_cancellation_fut => unreachable!(),
133        _ = abort_fut => {},
134    };
135
136    Err(format_err!("exited unexpectedly"))
137}
138
139struct ProgramContext<D: Directory + AsRefDirectory> {
140    svc: D,
141    abort_tx: mpsc::UnboundedSender<()>,
142    collaborative_reboot: collaborative_reboot::State,
143
144    /// Tracks the current shutdown request state. Used to ignore shutdown requests while a current
145    /// request is being processed.
146    shutdown_pending: Arc<AMutex<bool>>,
147
148    shutdown_watcher: Arc<ShutdownWatcher>,
149    config: Config,
150}
151
152impl<D: Directory + AsRefDirectory> ProgramContext<D> {
153    async fn handle_admin_request(&self, mut stream: AdminRequestStream) {
154        while let Ok(Some(request)) = stream.try_next().await {
155            match request {
156                AdminRequest::PowerFullyOn { responder, .. } => {
157                    let _ = responder.send(Err(zx::Status::NOT_SUPPORTED.into_raw()));
158                }
159                // TODO(https://fxbug.dev/385742868): Delete this method once
160                // it's removed from the API.
161                AdminRequest::Reboot { reason, responder } => {
162                    let _reboot_control_lease = self.acquire_shutdown_control_lease().await;
163                    let target_state = if reason == RebootReason::OutOfMemory {
164                        SystemPowerState::RebootKernelInitiated
165                    } else {
166                        SystemPowerState::Reboot
167                    };
168                    set_system_power_state(target_state);
169                    let res = self
170                        .forward_command(
171                            target_state,
172                            Some(RebootReasons::from_deprecated(&reason)),
173                            None,
174                        )
175                        .await;
176                    let _ = responder.send(res.map_err(|s| s.into_raw()));
177                }
178                AdminRequest::PerformReboot { options, responder } => {
179                    let res = self.perform_reboot(options).await;
180                    let _ = responder.send(res.map_err(|s| s.into_raw()));
181                }
182                AdminRequest::RebootToBootloader { responder } => {
183                    let _reboot_control_lease = self.acquire_shutdown_control_lease().await;
184                    let target_state = SystemPowerState::RebootBootloader;
185                    set_system_power_state(target_state);
186                    let res = self.forward_command(target_state, None, None).await;
187                    let _ = responder.send(res.map_err(|s| s.into_raw()));
188                }
189                AdminRequest::RebootToRecovery { responder } => {
190                    let _reboot_control_lease = self.acquire_shutdown_control_lease().await;
191                    let target_state = SystemPowerState::RebootRecovery;
192                    set_system_power_state(target_state);
193                    let res = self.forward_command(target_state, None, None).await;
194                    let _ = responder.send(res.map_err(|s| s.into_raw()));
195                }
196                AdminRequest::Poweroff { responder } => {
197                    let _reboot_control_lease = self.acquire_shutdown_control_lease().await;
198                    let target_state = SystemPowerState::Poweroff;
199                    set_system_power_state(target_state);
200                    let res = self.forward_command(target_state, None, None).await;
201                    let _ = responder.send(res.map_err(|s| s.into_raw()));
202                }
203                AdminRequest::SuspendToRam { responder } => {
204                    let target_state = SystemPowerState::SuspendRam;
205                    set_system_power_state(target_state);
206                    let res = self.forward_command(target_state, None, None).await;
207                    let _ = responder.send(res.map_err(|s| s.into_raw()));
208                }
209                AdminRequest::Mexec { responder, kernel_zbi, data_zbi } => {
210                    let _reboot_control_lease = self.acquire_shutdown_control_lease().await;
211                    let res = async move {
212                        let target_state = SystemPowerState::Mexec;
213                        {
214                            // Duplicate the VMOs now, as forwarding the mexec request to power-manager
215                            // will consume them.
216                            let kernel_zbi =
217                                kernel_zbi.duplicate_handle(zx::Rights::SAME_RIGHTS)?;
218                            let data_zbi = data_zbi.duplicate_handle(zx::Rights::SAME_RIGHTS)?;
219
220                            set_system_power_state(SystemPowerState::Mexec);
221                            set_mexec_kernel_zbi(kernel_zbi);
222                            set_mexec_data_zbi(data_zbi);
223                        }
224
225                        self.forward_command(
226                            target_state,
227                            None,
228                            Some(AdminMexecRequest { kernel_zbi, data_zbi }),
229                        )
230                        .await
231                    }
232                    .await;
233                    let _ = responder.send(res.map_err(|s| s.into_raw()));
234                }
235            }
236        }
237    }
238
239    async fn handle_system_state_transition(&self, mut stream: SystemStateTransitionRequestStream) {
240        while let Ok(Some(request)) = stream.try_next().await {
241            match request {
242                SystemStateTransitionRequest::GetTerminationSystemState { responder } => {
243                    let state = (*SYSTEM_STATE).lock();
244                    let _ = responder.send(state.power_state);
245                }
246                SystemStateTransitionRequest::GetMexecZbis { responder } => {
247                    let mut state = (*SYSTEM_STATE).lock();
248                    if state.power_state != SystemPowerState::Mexec {
249                        let _ = responder.send(Err(zx::Status::BAD_STATE.into_raw()));
250                        continue;
251                    }
252                    let kernel_zbi = std::mem::replace(
253                        &mut state.mexec_kernel_zbi,
254                        zx::Handle::invalid().into(),
255                    );
256                    let data_zbi =
257                        std::mem::replace(&mut state.mexec_data_zbi, zx::Handle::invalid().into());
258                    let _ = responder.send(Ok((kernel_zbi, data_zbi)));
259                }
260            }
261        }
262    }
263
264    // A handler for the `Admin.PerformReboot` method.
265    async fn perform_reboot(&self, options: RebootOptions) -> Result<(), zx::Status> {
266        println!("[shutdown-shim] rebooting with reasons [{:?}]", options.reasons);
267        let _reboot_control_lease = self.acquire_shutdown_control_lease().await;
268        let target_state = if options
269            .reasons
270            .as_ref()
271            .is_some_and(|reasons| reasons.contains(&RebootReason2::OutOfMemory))
272        {
273            SystemPowerState::RebootKernelInitiated
274        } else {
275            SystemPowerState::Reboot
276        };
277        set_system_power_state(target_state);
278        let reasons = match options.reasons {
279            Some(reasons) => Some(RebootReasons(reasons)),
280            None => None,
281        };
282        self.forward_command(target_state, reasons, None).await
283    }
284
285    async fn forward_command(
286        &self,
287        fallback_state: SystemPowerState,
288        reboot_reasons: Option<RebootReasons>,
289        _mexec_request: Option<AdminMexecRequest>,
290    ) -> Result<(), zx::Status> {
291        println!("[shutdown-shim] entering {:?} state", fallback_state);
292        // Return if shutdown is already pending
293        {
294            let mut shutdown_pending = self.shutdown_pending.lock().await;
295            if *shutdown_pending {
296                return Err(zx::Status::ALREADY_EXISTS);
297            }
298            *shutdown_pending = true;
299        }
300
301        if let Some(reasons) = reboot_reasons {
302            self.shutdown_watcher.handle_system_shutdown_message(reasons).await;
303        }
304
305        self.drive_shutdown_manually().await;
306
307        // We should block on fuchsia.sys.SystemController forever on this task, if
308        // it returns something has gone wrong.
309        eprintln!("[shutdown-shim]: we shouldn't still be running, crashing the system");
310        Self::abort(self.abort_tx.clone()).await
311    }
312
313    async fn drive_shutdown_manually(&self) {
314        let abort_tx = self.abort_tx.clone();
315        fasync::Task::spawn(async {
316            fasync::Timer::new(MANUAL_SYSTEM_SHUTDOWN_TIMEOUT).await;
317            // We shouldn't still be running at this point
318            Self::abort(abort_tx).await;
319        })
320        .detach();
321
322        if let Err(e) = self.initiate_component_shutdown().await {
323            eprintln!(
324                "[shutdown-shim]: error initiating component shutdown, system shutdown impossible: {e}"
325            );
326            // Recovery from this state is impossible. Exit with a non-zero exit code,
327            // so our critical marking causes the system to forcefully restart.
328            Self::abort(self.abort_tx.clone()).await;
329        }
330    }
331
332    async fn initiate_component_shutdown(&self) -> Result<(), anyhow::Error> {
333        println!("[shutdown-shim] shutting down components");
334        let system_controller_client = self
335            .connect_to_protocol::<SystemControllerMarker>()
336            .context("error connecting to component_manager")?;
337
338        system_controller_client.shutdown().await.context("failed to initiate shutdown")
339    }
340
341    async fn acquire_shutdown_control_lease(&self) -> Option<zx::EventPair> {
342        if !self.config.suspend_enabled {
343            return None;
344        }
345        let res = async {
346            let activity_governor = self
347                .connect_to_protocol::<fsystem::ActivityGovernorMarker>()
348                .context("error connecting to system_activity_governor")?;
349            activity_governor
350                .take_wake_lease("shutdown_control")
351                .await
352                .context("failed to take wake lease")
353        }
354        .await;
355        res.map_err(|e| {
356            eprintln!("[shutdown-shim]: {e}");
357            ()
358        })
359        .ok()
360    }
361
362    /// Cause the program to terminate.
363    async fn abort(mut abort_tx: mpsc::UnboundedSender<()>) -> ! {
364        let _ = abort_tx.send(()).await;
365        std::future::pending::<()>().await;
366        unreachable!();
367    }
368
369    fn connect_to_protocol<P: DiscoverableProtocolMarker>(
370        &self,
371    ) -> Result<P::Proxy, anyhow::Error> {
372        client::connect_to_protocol_at_dir_root::<P>(&self.svc)
373    }
374}
375
376impl<D: Directory + AsRefDirectory> collaborative_reboot::RebootActuator for ProgramContext<D> {
377    async fn perform_reboot(
378        &self,
379        reasons: Vec<CollaborativeRebootReason>,
380    ) -> Result<(), zx::Status> {
381        // Transform the reasons, and dispatch the request along the standard
382        // reboot pipeline.
383        let reasons = reasons
384            .into_iter()
385            .map(|reason| match reason {
386                CollaborativeRebootReason::NetstackMigration => RebootReason2::NetstackMigration,
387                CollaborativeRebootReason::SystemUpdate => RebootReason2::SystemUpdate,
388            })
389            .collect();
390        self.perform_reboot(RebootOptions { reasons: Some(reasons), ..Default::default() }).await
391    }
392}
393
394struct SystemState {
395    power_state: SystemPowerState,
396    mexec_kernel_zbi: zx::Vmo,
397    mexec_data_zbi: zx::Vmo,
398}
399
400impl SystemState {
401    fn new() -> Self {
402        Self {
403            power_state: SystemPowerState::FullyOn,
404            mexec_kernel_zbi: zx::Handle::invalid().into(),
405            mexec_data_zbi: zx::Handle::invalid().into(),
406        }
407    }
408}
409
410static SYSTEM_STATE: LazyLock<Mutex<SystemState>> =
411    LazyLock::new(|| Mutex::new(SystemState::new()));
412
413fn set_system_power_state(new: SystemPowerState) {
414    let mut s = (*SYSTEM_STATE).lock();
415    s.power_state = new;
416}
417
418fn set_mexec_kernel_zbi(new: zx::Vmo) {
419    let mut s = (*SYSTEM_STATE).lock();
420    s.mexec_kernel_zbi = new;
421}
422
423fn set_mexec_data_zbi(new: zx::Vmo) {
424    let mut s = (*SYSTEM_STATE).lock();
425    s.mexec_data_zbi = new;
426}