Skip to main content

kernel_manager/
lib.rs

1// Copyright 2023 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5pub mod kernels;
6pub mod pager;
7pub mod proxy;
8pub mod suspend;
9
10use crate::pager::{Pager, run_pager};
11use anyhow::{Error, anyhow};
12use fidl::endpoints::{DiscoverableProtocolMarker, Proxy, ServerEnd};
13use fidl::{HandleBased, Peered};
14use fidl_fuchsia_component as fcomponent;
15use fidl_fuchsia_component_decl as fdecl;
16use fidl_fuchsia_component_runner as frunner;
17use fidl_fuchsia_io as fio;
18use fidl_fuchsia_starnix_container as fstarnix;
19use fidl_fuchsia_starnix_runner as fstarnixrunner;
20use fuchsia_async as fasync;
21use fuchsia_component::client as fclient;
22use fuchsia_sync::Mutex;
23use futures::TryStreamExt;
24use kernels::Kernels;
25use log::warn;
26use proxy::ChannelProxy;
27use rand::Rng;
28use std::future::Future;
29use std::sync::Arc;
30use suspend::{
31    ASLEEP_SIGNAL, AWAKE_SIGNAL, SuspendContext, WakeSource, WakeSources, suspend_container,
32};
33
34/// The name of the collection that the starnix_kernel is run in.
35const KERNEL_COLLECTION: &str = "kernels";
36
37/// The name of the protocol the kernel exposes for running containers.
38///
39/// This protocol is actually fuchsia.component.runner.ComponentRunner. We
40/// expose the implementation using this name to avoid confusion with copy
41/// of the fuchsia.component.runner.ComponentRunner protocol used for
42/// running component inside the container.
43const CONTAINER_RUNNER_PROTOCOL: &str = "fuchsia.starnix.container.Runner";
44
45#[allow(dead_code)]
46pub struct StarnixKernel {
47    /// The name of the kernel intsance in the kernels collection.
48    name: String,
49
50    /// The directory exposed by the Starnix kernel.
51    ///
52    /// This directory can be used to connect to services offered by the kernel.
53    exposed_dir: fio::DirectoryProxy,
54
55    /// An opaque token representing the container component.
56    component_instance: zx::Event,
57
58    /// The job the kernel lives under.
59    job: Arc<zx::Job>,
60
61    /// The currently active wake lease for the container.
62    wake_lease: Mutex<Option<zx::EventPair>>,
63}
64
65impl StarnixKernel {
66    /// Creates a new instance of `starnix_kernel`.
67    ///
68    /// This is done by creating a new child in the `kernels` collection.
69    ///
70    /// Returns the kernel and a future that will resolve when the kernel has stopped.
71    pub async fn create(
72        realm: fcomponent::RealmProxy,
73        kernel_url: &str,
74        start_info: frunner::ComponentStartInfo,
75        controller: ServerEnd<frunner::ComponentControllerMarker>,
76    ) -> Result<(Self, impl Future<Output = ()>), Error> {
77        let kernel_name = generate_kernel_name(&start_info)?;
78        let component_instance = start_info
79            .component_instance
80            .as_ref()
81            .ok_or_else(|| {
82                anyhow::anyhow!("expected to find component_instance in ComponentStartInfo")
83            })?
84            .duplicate_handle(zx::Rights::SAME_RIGHTS)?;
85
86        // Create the `starnix_kernel`.
87        let (controller_proxy, controller_server_end) = fidl::endpoints::create_proxy();
88        realm
89            .create_child(
90                &fdecl::CollectionRef { name: KERNEL_COLLECTION.into() },
91                &fdecl::Child {
92                    name: Some(kernel_name.clone()),
93                    url: Some(kernel_url.to_string()),
94                    startup: Some(fdecl::StartupMode::Lazy),
95                    ..Default::default()
96                },
97                fcomponent::CreateChildArgs {
98                    controller: Some(controller_server_end),
99                    ..Default::default()
100                },
101            )
102            .await?
103            .map_err(|e| anyhow::anyhow!("failed to create kernel: {:?}", e))?;
104
105        let exposed_dir = open_exposed_directory(&realm, &kernel_name, KERNEL_COLLECTION).await?;
106        let container_runner = fclient::connect_to_named_protocol_at_dir_root::<
107            frunner::ComponentRunnerMarker,
108        >(&exposed_dir, CONTAINER_RUNNER_PROTOCOL)?;
109
110        // Actually start the container.
111        container_runner.start(start_info, controller)?;
112
113        // Ask the kernel for its job.
114        let container_controller =
115            fclient::connect_to_protocol_at_dir_root::<fstarnix::ControllerMarker>(&exposed_dir)?;
116        let fstarnix::ControllerGetJobHandleResponse { job, .. } =
117            container_controller.get_job_handle().await?;
118        let Some(job) = job else {
119            anyhow::bail!("expected to find job in ControllerGetJobHandleResponse");
120        };
121
122        let kernel = Self {
123            name: kernel_name,
124            exposed_dir,
125            component_instance,
126            job: Arc::new(job),
127            wake_lease: Default::default(),
128        };
129        let on_stop = async move {
130            _ = controller_proxy.into_channel().unwrap().on_closed().await;
131        };
132        Ok((kernel, on_stop))
133    }
134
135    /// Gets the opaque token representing the container component.
136    pub fn component_instance(&self) -> &zx::Event {
137        &self.component_instance
138    }
139
140    /// Gets the job the kernel lives under.
141    pub fn job(&self) -> &Arc<zx::Job> {
142        &self.job
143    }
144
145    /// Connect to the specified protocol exposed by the kernel.
146    pub fn connect_to_protocol<P: DiscoverableProtocolMarker>(&self) -> Result<P::Proxy, Error> {
147        fclient::connect_to_protocol_at_dir_root::<P>(&self.exposed_dir)
148    }
149}
150
151/// Generate a random name for the kernel.
152///
153/// We used to include some human-readable parts in the name, but people were
154/// tempted to make them load-bearing. We now just generate 7 random alphanumeric
155/// characters.
156fn generate_kernel_name(_start_info: &frunner::ComponentStartInfo) -> Result<String, Error> {
157    let random_id: String =
158        rand::rng().sample_iter(&rand::distr::Alphanumeric).take(7).map(char::from).collect();
159    Ok(random_id)
160}
161
162async fn open_exposed_directory(
163    realm: &fcomponent::RealmProxy,
164    child_name: &str,
165    collection_name: &str,
166) -> Result<fio::DirectoryProxy, Error> {
167    let (directory_proxy, server_end) = fidl::endpoints::create_proxy::<fio::DirectoryMarker>();
168    realm
169        .open_exposed_dir(
170            &fdecl::ChildRef { name: child_name.into(), collection: Some(collection_name.into()) },
171            server_end,
172        )
173        .await?
174        .map_err(|e| {
175            anyhow!(
176                "failed to bind to child {} in collection {:?}: {:?}",
177                child_name,
178                collection_name,
179                e
180            )
181        })?;
182    Ok(directory_proxy)
183}
184
185pub struct SuspendMessage {
186    pub payload: fstarnixrunner::ManagerSuspendContainerRequest,
187    pub responder: fstarnixrunner::ManagerSuspendContainerResponder,
188}
189
190pub async fn run_suspend_worker(
191    receiver: async_channel::Receiver<SuspendMessage>,
192    suspend_context: Arc<SuspendContext>,
193    kernels: &Kernels,
194) {
195    while let Ok(msg) = receiver.recv().await {
196        match suspend_container(msg.payload, &suspend_context, &kernels).await {
197            Ok(response) => {
198                if let Err(e) = match response {
199                    Ok(o) => msg.responder.send(Ok(&o)),
200                    Err(e) => msg.responder.send(Err(e)),
201                } {
202                    warn!("error replying to suspend request: {e}");
203                }
204            }
205            Err(e) => {
206                warn!("error executing suspend: {e}");
207                let _ = msg.responder.send(Err(fstarnixrunner::SuspendError::SuspendFailure));
208            }
209        }
210    }
211}
212
213pub async fn serve_starnix_manager(
214    mut stream: fstarnixrunner::ManagerRequestStream,
215    suspend_context: Arc<SuspendContext>,
216    sender: &async_channel::Sender<(ChannelProxy, Arc<Mutex<WakeSources>>)>,
217    pager: Arc<Pager>,
218    suspend_sender: &async_channel::Sender<SuspendMessage>,
219) -> Result<(), Error> {
220    while let Some(event) = stream.try_next().await? {
221        match event {
222            fstarnixrunner::ManagerRequest::SuspendContainer { payload, responder, .. } => {
223                if let Err(e) = suspend_sender.try_send(SuspendMessage { payload, responder }) {
224                    warn!("failed to send suspend request to worker: {e}");
225                }
226            }
227            fstarnixrunner::ManagerRequest::ProxyWakeChannel { payload, .. } => {
228                let fstarnixrunner::ManagerProxyWakeChannelRequest {
229                    // TODO: Handle more than one container.
230                    container_job: Some(_container_job),
231                    remote_channel: Some(remote_channel),
232                    container_channel: Some(container_channel),
233                    name: Some(name),
234                    counter: Some(message_counter),
235                    ..
236                } = payload
237                else {
238                    continue;
239                };
240
241                suspend_context.wake_sources.lock().insert(
242                    message_counter.koid().unwrap(),
243                    WakeSource::from_counter(
244                        message_counter.duplicate_handle(zx::Rights::SAME_RIGHTS)?,
245                        name.clone(),
246                    ),
247                );
248
249                let proxy =
250                    ChannelProxy { container_channel, remote_channel, message_counter, name };
251
252                sender.try_send((proxy, suspend_context.wake_sources.clone())).unwrap();
253            }
254            fstarnixrunner::ManagerRequest::RegisterWakeWatcher { payload, responder } => {
255                if let Some(watcher) = payload.watcher {
256                    let (clear_mask, set_mask) = (ASLEEP_SIGNAL, AWAKE_SIGNAL);
257                    watcher.signal_peer(clear_mask, set_mask)?;
258
259                    suspend_context.wake_watchers.lock().push(watcher);
260                }
261                if let Err(e) = responder.send() {
262                    warn!("error registering power watcher: {e}");
263                }
264            }
265            fstarnixrunner::ManagerRequest::AddWakeSource { payload, .. } => {
266                let fstarnixrunner::ManagerAddWakeSourceRequest {
267                    // TODO: Handle more than one container.
268                    container_job: Some(_container_job),
269                    name: Some(name),
270                    handle: Some(handle),
271                    signals: Some(signals),
272                    ..
273                } = payload
274                else {
275                    continue;
276                };
277                suspend_context.wake_sources.lock().insert(
278                    handle.koid().unwrap(),
279                    WakeSource::from_handle(
280                        handle,
281                        name.clone(),
282                        zx::Signals::from_bits_truncate(signals),
283                    ),
284                );
285            }
286            fstarnixrunner::ManagerRequest::RemoveWakeSource { payload, .. } => {
287                let fstarnixrunner::ManagerRemoveWakeSourceRequest {
288                    // TODO: Handle more than one container.
289                    container_job: Some(_container_job),
290                    handle: Some(handle),
291                    ..
292                } = payload
293                else {
294                    continue;
295                };
296
297                suspend_context.wake_sources.lock().remove(&handle.koid().unwrap());
298            }
299            fstarnixrunner::ManagerRequest::CreatePager { payload, .. } => {
300                fasync::Task::spawn(run_pager(payload, pager.clone())).detach();
301            }
302            _ => {}
303        }
304    }
305    Ok(())
306}