shutdown_shim/
lib.rs

1// Copyright 2024 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4mod reboot_reasons;
5mod shutdown_watcher;
6
7use crate::reboot_reasons::RebootReasons;
8use crate::shutdown_watcher::ShutdownWatcher;
9use anyhow::{format_err, Context};
10use fidl::endpoints::{DiscoverableProtocolMarker, ServerEnd};
11use fidl::HandleBased;
12use fidl_fuchsia_hardware_power_statecontrol::{
13    AdminMexecRequest, AdminRequest, AdminRequestStream, RebootMethodsWatcherRegisterRequestStream,
14    RebootOptions, RebootReason, RebootReason2,
15};
16use fidl_fuchsia_power::CollaborativeRebootInitiatorRequestStream;
17use fidl_fuchsia_power_internal::{
18    CollaborativeRebootReason, CollaborativeRebootSchedulerRequestStream,
19};
20use fidl_fuchsia_sys2::SystemControllerMarker;
21use fidl_fuchsia_system_state::{
22    SystemPowerState, SystemStateTransitionRequest, SystemStateTransitionRequestStream,
23};
24use fuchsia_component::client;
25use fuchsia_component::directory::{AsRefDirectory, Directory};
26use fuchsia_component::server::ServiceFs;
27use fuchsia_sync::Mutex;
28use futures::channel::mpsc;
29use futures::lock::Mutex as AMutex;
30use futures::prelude::*;
31use futures::select;
32use std::pin::pin;
33use std::sync::{Arc, LazyLock};
34use std::time::Duration;
35use {fidl_fuchsia_io as fio, fidl_fuchsia_power_system as fsystem, fuchsia_async as fasync};
36
37mod collaborative_reboot;
38
39// The amount of time that the shim will spend waiting for a manually trigger
40// system shutdown to finish before forcefully restarting the system.
41const MANUAL_SYSTEM_SHUTDOWN_TIMEOUT: Duration = Duration::from_secs(60 * 60);
42
43enum IncomingRequest {
44    SystemStateTransition(SystemStateTransitionRequestStream),
45    Admin(AdminRequestStream),
46    CollaborativeRebootInitiator(CollaborativeRebootInitiatorRequestStream),
47    CollaborativeRebootScheduler(CollaborativeRebootSchedulerRequestStream),
48    RebootMethodsWatcherRegister(RebootMethodsWatcherRegisterRequestStream),
49}
50
51pub async fn main(
52    svc: impl Directory + AsRefDirectory + 'static,
53    directory_request: ServerEnd<fio::DirectoryMarker>,
54) -> Result<(), anyhow::Error> {
55    println!("[shutdown-shim]: started");
56
57    // Initialize the inspect framework.
58    //
59    // Note that shutdown-shim is a builtin component of ComponentManager, which
60    // means we must setup the inspector in a non-conventional way:
61    // * We must initialize the connection to the inspect sink relative to the
62    // `svc` directory.
63    // * We must not use the global `fuchsia_inspect::component::inspector()`;
64    //   this instance instance is a singleton and would be shared with
65    //   ComponentManager. Instead we declare our own local inspector.
66    let inspector = fuchsia_inspect::Inspector::new(fuchsia_inspect::InspectorConfig::default());
67    let (client, server) =
68        fidl::endpoints::create_endpoints::<fidl_fuchsia_inspect::InspectSinkMarker>();
69    // Note: The inspect server is detached, so we need not poll it.
70    let _inspect_server_task = inspect_runtime::publish(
71        &inspector,
72        inspect_runtime::PublishOptions::default().on_inspect_sink_client(client),
73    )
74    .ok_or_else(|| format_err!("failed to initialize inspect framework"))?;
75    svc.as_ref_directory()
76        .open(
77            fidl_fuchsia_inspect::InspectSinkMarker::PROTOCOL_NAME,
78            fio::Flags::PROTOCOL_SERVICE,
79            server.into_channel().into(),
80        )
81        .context("failed to connect to InspectSink")?;
82
83    let mut service_fs = ServiceFs::new();
84    service_fs.dir("svc").add_fidl_service(IncomingRequest::Admin);
85    service_fs.dir("svc").add_fidl_service(IncomingRequest::RebootMethodsWatcherRegister);
86    service_fs.dir("svc").add_fidl_service(IncomingRequest::SystemStateTransition);
87    service_fs.dir("svc").add_fidl_service(IncomingRequest::CollaborativeRebootInitiator);
88    service_fs.dir("svc").add_fidl_service(IncomingRequest::CollaborativeRebootScheduler);
89    service_fs.serve_connection(directory_request).context("failed to serve outgoing namespace")?;
90
91    let (abort_tx, mut abort_rx) = mpsc::unbounded::<()>();
92    let (cr_state, cr_cancellations) = collaborative_reboot::new(&inspector);
93    let ctx = ProgramContext {
94        svc,
95        abort_tx,
96        collaborative_reboot: cr_state,
97        shutdown_pending: Arc::new(AMutex::new(false)),
98        shutdown_watcher: ShutdownWatcher::new_with_inspector(&inspector),
99    };
100
101    let shutdown_watcher = ctx.shutdown_watcher.clone();
102    let mut service_fut = service_fs
103        .for_each_concurrent(None, |request: IncomingRequest| async {
104            match request {
105                IncomingRequest::Admin(stream) => ctx.handle_admin_request(stream).await,
106                IncomingRequest::RebootMethodsWatcherRegister(stream) => {
107                    shutdown_watcher.clone().handle_reboot_register_request(stream).await;
108                }
109                IncomingRequest::SystemStateTransition(stream) => {
110                    ctx.handle_system_state_transition(stream).await
111                }
112                IncomingRequest::CollaborativeRebootInitiator(stream) => {
113                    ctx.collaborative_reboot.handle_initiator_requests(stream, &ctx).await
114                }
115                IncomingRequest::CollaborativeRebootScheduler(stream) => {
116                    ctx.collaborative_reboot.handle_scheduler_requests(stream).await
117                }
118            }
119        })
120        .fuse();
121    let collaborative_reboot_cancellation_fut = pin!(cr_cancellations.run());
122    let mut collaborative_reboot_cancellation_fut = collaborative_reboot_cancellation_fut.fuse();
123    let mut abort_fut = abort_rx.next().fuse();
124
125    select! {
126        () = service_fut => {},
127        () = collaborative_reboot_cancellation_fut => unreachable!(),
128        _ = abort_fut => {},
129    };
130
131    Err(format_err!("exited unexpectedly"))
132}
133
134struct ProgramContext<D: Directory + AsRefDirectory> {
135    svc: D,
136    abort_tx: mpsc::UnboundedSender<()>,
137    collaborative_reboot: collaborative_reboot::State,
138
139    /// Tracks the current shutdown request state. Used to ignore shutdown requests while a current
140    /// request is being processed.
141    shutdown_pending: Arc<AMutex<bool>>,
142
143    shutdown_watcher: Arc<ShutdownWatcher>,
144}
145
146impl<D: Directory + AsRefDirectory> ProgramContext<D> {
147    async fn handle_admin_request(&self, mut stream: AdminRequestStream) {
148        while let Ok(Some(request)) = stream.try_next().await {
149            match request {
150                AdminRequest::PowerFullyOn { responder, .. } => {
151                    let _ = responder.send(Err(zx::Status::NOT_SUPPORTED.into_raw()));
152                }
153                // TODO(https://fxbug.dev/385742868): Delete this method once
154                // it's removed from the API.
155                AdminRequest::Reboot { reason, responder } => {
156                    let _reboot_control_lease = self.acquire_shutdown_control_lease().await;
157                    let target_state = if reason == RebootReason::OutOfMemory {
158                        SystemPowerState::RebootKernelInitiated
159                    } else {
160                        SystemPowerState::Reboot
161                    };
162                    set_system_power_state(target_state);
163                    let res = self
164                        .forward_command(
165                            target_state,
166                            Some(RebootReasons::from_deprecated(&reason)),
167                            None,
168                        )
169                        .await;
170                    let _ = responder.send(res.map_err(|s| s.into_raw()));
171                }
172                AdminRequest::PerformReboot { options, responder } => {
173                    let res = self.perform_reboot(options).await;
174                    let _ = responder.send(res.map_err(|s| s.into_raw()));
175                }
176                AdminRequest::RebootToBootloader { responder } => {
177                    let _reboot_control_lease = self.acquire_shutdown_control_lease().await;
178                    let target_state = SystemPowerState::RebootBootloader;
179                    set_system_power_state(target_state);
180                    let res = self.forward_command(target_state, None, None).await;
181                    let _ = responder.send(res.map_err(|s| s.into_raw()));
182                }
183                AdminRequest::RebootToRecovery { responder } => {
184                    let _reboot_control_lease = self.acquire_shutdown_control_lease().await;
185                    let target_state = SystemPowerState::RebootRecovery;
186                    set_system_power_state(target_state);
187                    let res = self.forward_command(target_state, None, None).await;
188                    let _ = responder.send(res.map_err(|s| s.into_raw()));
189                }
190                AdminRequest::Poweroff { responder } => {
191                    let _reboot_control_lease = self.acquire_shutdown_control_lease().await;
192                    let target_state = SystemPowerState::Poweroff;
193                    set_system_power_state(target_state);
194                    let res = self.forward_command(target_state, None, None).await;
195                    let _ = responder.send(res.map_err(|s| s.into_raw()));
196                }
197                AdminRequest::SuspendToRam { responder } => {
198                    let target_state = SystemPowerState::SuspendRam;
199                    set_system_power_state(target_state);
200                    let res = self.forward_command(target_state, None, None).await;
201                    let _ = responder.send(res.map_err(|s| s.into_raw()));
202                }
203                AdminRequest::Mexec { responder, kernel_zbi, data_zbi } => {
204                    let _reboot_control_lease = self.acquire_shutdown_control_lease().await;
205                    let res = async move {
206                        let target_state = SystemPowerState::Mexec;
207                        {
208                            // Duplicate the VMOs now, as forwarding the mexec request to power-manager
209                            // will consume them.
210                            let kernel_zbi =
211                                kernel_zbi.duplicate_handle(zx::Rights::SAME_RIGHTS)?;
212                            let data_zbi = data_zbi.duplicate_handle(zx::Rights::SAME_RIGHTS)?;
213
214                            set_system_power_state(SystemPowerState::Mexec);
215                            set_mexec_kernel_zbi(kernel_zbi);
216                            set_mexec_data_zbi(data_zbi);
217                        }
218
219                        self.forward_command(
220                            target_state,
221                            None,
222                            Some(AdminMexecRequest { kernel_zbi, data_zbi }),
223                        )
224                        .await
225                    }
226                    .await;
227                    let _ = responder.send(res.map_err(|s| s.into_raw()));
228                }
229            }
230        }
231    }
232
233    async fn handle_system_state_transition(&self, mut stream: SystemStateTransitionRequestStream) {
234        while let Ok(Some(request)) = stream.try_next().await {
235            match request {
236                SystemStateTransitionRequest::GetTerminationSystemState { responder } => {
237                    let state = (*SYSTEM_STATE).lock();
238                    let _ = responder.send(state.power_state);
239                }
240                SystemStateTransitionRequest::GetMexecZbis { responder } => {
241                    let mut state = (*SYSTEM_STATE).lock();
242                    if state.power_state != SystemPowerState::Mexec {
243                        let _ = responder.send(Err(zx::Status::BAD_STATE.into_raw()));
244                        continue;
245                    }
246                    let kernel_zbi = std::mem::replace(
247                        &mut state.mexec_kernel_zbi,
248                        zx::Handle::invalid().into(),
249                    );
250                    let data_zbi =
251                        std::mem::replace(&mut state.mexec_data_zbi, zx::Handle::invalid().into());
252                    let _ = responder.send(Ok((kernel_zbi, data_zbi)));
253                }
254            }
255        }
256    }
257
258    // A handler for the `Admin.PerformReboot` method.
259    async fn perform_reboot(&self, options: RebootOptions) -> Result<(), zx::Status> {
260        println!("[shutdown-shim] rebooting with reasons [{:?}]", options.reasons);
261        let _reboot_control_lease = self.acquire_shutdown_control_lease().await;
262        let target_state = if options
263            .reasons
264            .as_ref()
265            .is_some_and(|reasons| reasons.contains(&RebootReason2::OutOfMemory))
266        {
267            SystemPowerState::RebootKernelInitiated
268        } else {
269            SystemPowerState::Reboot
270        };
271        set_system_power_state(target_state);
272        let reasons = match options.reasons {
273            Some(reasons) => Some(RebootReasons(reasons)),
274            None => None,
275        };
276        self.forward_command(target_state, reasons, None).await
277    }
278
279    async fn forward_command(
280        &self,
281        fallback_state: SystemPowerState,
282        reboot_reasons: Option<RebootReasons>,
283        _mexec_request: Option<AdminMexecRequest>,
284    ) -> Result<(), zx::Status> {
285        println!("[shutdown-shim] entering {:?} state", fallback_state);
286        // Return if shutdown is already pending
287        {
288            let mut shutdown_pending = self.shutdown_pending.lock().await;
289            if *shutdown_pending {
290                return Err(zx::Status::ALREADY_EXISTS);
291            }
292            *shutdown_pending = true;
293        }
294
295        if let Some(reasons) = reboot_reasons {
296            self.shutdown_watcher.handle_system_shutdown_message(reasons).await;
297        }
298
299        self.drive_shutdown_manually().await;
300
301        // We should block on fuchsia.sys.SystemController forever on this task, if
302        // it returns something has gone wrong.
303        eprintln!("[shutdown-shim]: we shouldn't still be running, crashing the system");
304        Self::abort(self.abort_tx.clone()).await
305    }
306
307    async fn drive_shutdown_manually(&self) {
308        let abort_tx = self.abort_tx.clone();
309        fasync::Task::spawn(async {
310            fasync::Timer::new(MANUAL_SYSTEM_SHUTDOWN_TIMEOUT).await;
311            // We shouldn't still be running at this point
312            Self::abort(abort_tx).await;
313        })
314        .detach();
315
316        if let Err(e) = self.initiate_component_shutdown().await {
317            eprintln!(
318                "[shutdown-shim]: error initiating component shutdown, system shutdown impossible: {e}"
319            );
320            // Recovery from this state is impossible. Exit with a non-zero exit code,
321            // so our critical marking causes the system to forcefully restart.
322            Self::abort(self.abort_tx.clone()).await;
323        }
324    }
325
326    async fn initiate_component_shutdown(&self) -> Result<(), anyhow::Error> {
327        println!("[shutdown-shim] shutting down components");
328        let system_controller_client = self
329            .connect_to_protocol::<SystemControllerMarker>()
330            .context("error connecting to component_manager")?;
331
332        system_controller_client.shutdown().await.context("failed to initiate shutdown")
333    }
334
335    async fn acquire_shutdown_control_lease(&self) -> Option<zx::EventPair> {
336        let res = async {
337            let activity_governor = self
338                .connect_to_protocol::<fsystem::ActivityGovernorMarker>()
339                .context("error connecting to system_activity_governor")?;
340            activity_governor
341                .take_wake_lease("shutdown_control")
342                .await
343                .context("failed to take wake lease")
344        }
345        .await;
346        res.map_err(|e| {
347            eprintln!("[shutdown-shim]: {e}");
348            ()
349        })
350        .ok()
351    }
352
353    /// Cause the program to terminate.
354    async fn abort(mut abort_tx: mpsc::UnboundedSender<()>) -> ! {
355        let _ = abort_tx.send(()).await;
356        std::future::pending::<()>().await;
357        unreachable!();
358    }
359
360    fn connect_to_protocol<P: DiscoverableProtocolMarker>(
361        &self,
362    ) -> Result<P::Proxy, anyhow::Error> {
363        client::connect_to_protocol_at_dir_root::<P>(&self.svc)
364    }
365}
366
367impl<D: Directory + AsRefDirectory> collaborative_reboot::RebootActuator for ProgramContext<D> {
368    async fn perform_reboot(
369        &self,
370        reasons: Vec<CollaborativeRebootReason>,
371    ) -> Result<(), zx::Status> {
372        // Transform the reasons, and dispatch the request along the standard
373        // reboot pipeline.
374        let reasons = reasons
375            .into_iter()
376            .map(|reason| match reason {
377                CollaborativeRebootReason::NetstackMigration => RebootReason2::NetstackMigration,
378                CollaborativeRebootReason::SystemUpdate => RebootReason2::SystemUpdate,
379            })
380            .collect();
381        self.perform_reboot(RebootOptions { reasons: Some(reasons), ..Default::default() }).await
382    }
383}
384
385struct SystemState {
386    power_state: SystemPowerState,
387    mexec_kernel_zbi: zx::Vmo,
388    mexec_data_zbi: zx::Vmo,
389}
390
391impl SystemState {
392    fn new() -> Self {
393        Self {
394            power_state: SystemPowerState::FullyOn,
395            mexec_kernel_zbi: zx::Handle::invalid().into(),
396            mexec_data_zbi: zx::Handle::invalid().into(),
397        }
398    }
399}
400
401static SYSTEM_STATE: LazyLock<Mutex<SystemState>> =
402    LazyLock::new(|| Mutex::new(SystemState::new()));
403
404fn set_system_power_state(new: SystemPowerState) {
405    let mut s = (*SYSTEM_STATE).lock();
406    s.power_state = new;
407}
408
409fn set_mexec_kernel_zbi(new: zx::Vmo) {
410    let mut s = (*SYSTEM_STATE).lock();
411    s.mexec_kernel_zbi = new;
412}
413
414fn set_mexec_data_zbi(new: zx::Vmo) {
415    let mut s = (*SYSTEM_STATE).lock();
416    s.mexec_data_zbi = new;
417}