1use crate::{
6 Features, MountAction, expose_root, parse_features, parse_numbered_handles,
7 run_container_features, serve_component_runner, serve_container_controller,
8 serve_graphical_presenter, serve_lutex_controller,
9};
10use anyhow::{Context, Error, anyhow, bail};
11use bootreason::get_or_init_android_bootreason;
12use bstr::{BString, ByteSlice};
13use devicetree::parser::parse_devicetree;
14use devicetree::types::Devicetree;
15use fidl::endpoints::{ControlHandle, RequestStream, ServerEnd};
16use fidl_fuchsia_boot as fboot;
17use fidl_fuchsia_component as fcomponent;
18use fidl_fuchsia_component_runner as frunner;
19use fidl_fuchsia_component_runner::{TaskProviderRequest, TaskProviderRequestStream};
20use fidl_fuchsia_element as felement;
21use fidl_fuchsia_feedback::CrashReporterMarker;
22use fidl_fuchsia_io as fio;
23use fidl_fuchsia_mem as fmem;
24use fidl_fuchsia_memory_attribution as fattribution;
25use fidl_fuchsia_starnix_binder as fbinder;
26use fidl_fuchsia_starnix_container as fstarcontainer;
27use fidl_fuchsia_time_external::AdjustMarker;
28use fuchsia_async as fasync;
29use fuchsia_async::DurationExt;
30use fuchsia_component::client::{connect_to_protocol, connect_to_protocol_sync};
31use fuchsia_component::server::ServiceFs;
32use fuchsia_inspect as inspect;
33use fuchsia_runtime as fruntime;
34use fuchsia_zbi as zbi;
35use futures::channel::oneshot;
36use futures::{FutureExt, StreamExt, TryStreamExt};
37use serde::Deserialize;
38use starnix_container_structured_config::Config as ContainerStructuredConfig;
39use starnix_core::device::remote_block_device::remote_block_device_init;
40use starnix_core::execution::{
41 create_init_process, create_system_task, execute_task_with_prerun_result,
42};
43use starnix_core::fs::fuchsia::new_remotefs_in_root;
44use starnix_core::fs::tmpfs::TmpFs;
45use starnix_core::security;
46use starnix_core::task::container_namespace::ContainerNamespace;
47use starnix_core::task::{
48 CurrentTask, ExitStatus, Kernel, RoleOverrides, SchedulerManager, parse_cmdline,
49};
50use starnix_core::vfs::{FileSystemOptions, FsContext, LookupContext, Namespace, WhatToMount};
51use starnix_logging::{
52 CATEGORY_STARNIX, NAME_CREATE_CONTAINER, log_debug, log_error, log_info, log_warn,
53 trace_duration,
54};
55use starnix_modules::{init_common_devices, register_common_file_systems};
56use starnix_modules_layeredfs::{LayeredFsBuilder, LayeredFsMounts};
57use starnix_modules_magma::get_magma_params;
58use starnix_modules_overlayfs::OverlayStack;
59use starnix_modules_rtc::rtc_device_init;
60use starnix_sync::{Locked, Unlocked};
61use starnix_task_command::TaskCommand;
62use starnix_uapi::errors::{ENOENT, SourceContext};
63use starnix_uapi::open_flags::OpenFlags;
64use starnix_uapi::resource_limits::Resource;
65use starnix_uapi::{errno, tid_t};
66use std::ffi::CString;
67use std::ops::DerefMut;
68use std::sync::Arc;
69use zx::Task as _;
70
71use std::sync::Weak;
72
73use crate::serve_memory_attribution_provider_container;
74use attribution_server::{AttributionServer, AttributionServerHandle};
75use fidl::HandleBased;
76
77struct ContainerMemoryAttributionManager {
79 memory_attribution_server: AttributionServerHandle,
81}
82
83impl ContainerMemoryAttributionManager {
84 pub fn new(kernel: Weak<Kernel>, component_instance: zx::Event) -> Self {
87 let memory_attribution_server = AttributionServer::new(Box::new(move || {
88 let kernel_ref = match kernel.upgrade() {
89 None => return vec![],
90 Some(k) => k,
91 };
92 attribution_info_for_kernel(kernel_ref.as_ref(), &component_instance)
93 }));
94
95 ContainerMemoryAttributionManager { memory_attribution_server }
96 }
97
98 pub fn new_observer(
100 &self,
101 control_handle: fattribution::ProviderControlHandle,
102 ) -> attribution_server::Observer {
103 self.memory_attribution_server.new_observer(control_handle)
104 }
105}
106
107fn attribution_info_for_kernel(
111 kernel: &Kernel,
112 component_instance: &zx::Event,
113) -> Vec<fattribution::AttributionUpdate> {
114 let (client_end, server_end) =
118 fidl::endpoints::create_request_stream::<fattribution::ProviderMarker>();
119 fuchsia_async::Task::spawn(serve_memory_attribution_provider_container(server_end, kernel))
120 .detach();
121
122 let starnix_kernel_id = Some(1);
123 let starnix_kernel_principal = fattribution::NewPrincipal {
124 identifier: starnix_kernel_id,
125 description: Some(fattribution::Description::Part("starnix_kernel".to_string())),
126 principal_type: Some(fattribution::PrincipalType::Part),
127 detailed_attribution: None,
131 ..Default::default()
132 };
133
134 let starnix_kernel_attribution = fattribution::UpdatedPrincipal {
135 identifier: starnix_kernel_id, resources: Some(fattribution::Resources::Data(fattribution::Data {
137 resources: vec![fattribution::Resource::ProcessMapped(fattribution::ProcessMapped {
138 process: fuchsia_runtime::process_self().koid().unwrap().raw_koid(),
139 base: 0, len: u64::max_value(),
141 hint_skip_handle_table: false,
142 })],
143 })),
144 ..Default::default()
145 };
146
147 let container_id = Some(2);
148 let new_principal = fattribution::NewPrincipal {
149 identifier: container_id,
150 description: Some(fattribution::Description::Component(
151 component_instance.duplicate_handle(zx::Rights::SAME_RIGHTS).unwrap(),
152 )),
153 principal_type: Some(fattribution::PrincipalType::Runnable),
154 detailed_attribution: Some(client_end),
155 ..Default::default()
156 };
157 let attribution = fattribution::UpdatedPrincipal {
158 identifier: container_id,
159 resources: Some(fattribution::Resources::Data(fattribution::Data {
160 resources: vec![fattribution::Resource::KernelObject(
161 fuchsia_runtime::job_default().koid().unwrap().raw_koid(),
162 )],
163 })),
164 ..Default::default()
165 };
166
167 vec![
168 fattribution::AttributionUpdate::Add(new_principal),
169 fattribution::AttributionUpdate::Add(starnix_kernel_principal),
170 fattribution::AttributionUpdate::Update(attribution),
171 fattribution::AttributionUpdate::Update(starnix_kernel_attribution),
172 ]
173}
174
175#[derive(Debug)]
176pub struct ContainerStartInfo {
177 pub program: ContainerProgram,
179
180 pub config: ContainerStructuredConfig,
181
182 outgoing_dir: Option<zx::Channel>,
186
187 pub container_namespace: ContainerNamespace,
190
191 runtime_dir: Option<ServerEnd<fio::DirectoryMarker>>,
193
194 break_on_start: Option<zx::EventPair>,
196
197 component_instance: Option<zx::Event>,
200}
201
202const MISSING_CONFIG_VMO_CONTEXT: &str = concat!(
203 "Retrieving container config VMO. ",
204 "If this fails, make sure your container CML includes ",
205 "//src/starnix/containers/container.shard.cml.",
206);
207
208impl ContainerStartInfo {
209 fn new(mut start_info: frunner::ComponentStartInfo) -> Result<Self, Error> {
210 let program = start_info.program.as_ref().context("retrieving program block")?;
211 let program: ContainerProgram =
212 runner::serde::deserialize_program(&program).context("parsing program block")?;
213
214 let encoded_config =
215 start_info.encoded_config.as_ref().context(MISSING_CONFIG_VMO_CONTEXT)?;
216 let config = match encoded_config {
217 fmem::Data::Bytes(b) => ContainerStructuredConfig::from_bytes(b),
218 fmem::Data::Buffer(b) => ContainerStructuredConfig::from_vmo(&b.vmo),
219 other => anyhow::bail!("unknown Data variant {other:?}"),
220 }
221 .context("parsing container structured config")?;
222
223 let ns = start_info.ns.take().context("retrieving container namespace")?;
224 let container_namespace = ContainerNamespace::from(ns);
225
226 let outgoing_dir = start_info.outgoing_dir.take().map(|dir| dir.into_channel());
227 let component_instance = start_info.component_instance;
228
229 Ok(Self {
230 program,
231 config,
232 outgoing_dir,
233 container_namespace,
234 component_instance,
235 break_on_start: start_info.break_on_start,
236 runtime_dir: start_info.runtime_dir,
237 })
238 }
239}
240
241#[derive(Debug, Default, Deserialize)]
242#[serde(deny_unknown_fields)]
243pub struct ContainerProgram {
244 name: String,
246
247 init: Vec<String>,
249
250 #[serde(default)]
252 kernel_cmdline: String,
253
254 #[serde(default)]
256 mounts: Vec<String>,
257
258 #[serde(default)]
260 pub features: Vec<String>,
261
262 #[serde(default)]
264 rlimits: Vec<String>,
265
266 #[serde(default)]
268 startup_file_path: String,
269
270 #[serde(default)]
274 pub default_seclabel: Option<String>,
275
276 #[serde(default = "default_uid")]
280 pub default_uid: runner::serde::StoreAsString<u32>,
281
282 pub default_ns_mount_options: Option<Vec<String>>,
286
287 #[serde(default)]
298 task_role_overrides: Vec<TaskSchedulerMapping>,
299}
300
301#[derive(Default, Deserialize)]
304struct TaskSchedulerMapping {
305 role: String,
307 process: String,
309 thread: String,
311}
312
313impl std::fmt::Debug for TaskSchedulerMapping {
314 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
315 write!(f, "process `{}` thread `{}` role `{}`", self.process, self.thread, self.role)
316 }
317}
318
319fn default_uid() -> runner::serde::StoreAsString<u32> {
320 runner::serde::StoreAsString(42)
321}
322
323fn to_cstr(str: &str) -> CString {
325 CString::new(str.to_string()).unwrap()
326}
327
328#[must_use = "The container must run serve on this config"]
329pub struct ContainerServiceConfig {
330 start_info: ContainerStartInfo,
331 request_stream: frunner::ComponentControllerRequestStream,
332 receiver: oneshot::Receiver<Result<ExitStatus, Error>>,
333}
334
335pub struct Container {
336 pub kernel: Arc<Kernel>,
338
339 memory_attribution_manager: ContainerMemoryAttributionManager,
340
341 _node: inspect::Node,
343
344 _thread_bound: std::marker::PhantomData<*mut u8>,
347}
348
349impl Container {
350 pub fn system_task(&self) -> &CurrentTask {
351 self.kernel.kthreads.system_task()
352 }
353
354 async fn serve_outgoing_directory(
355 &self,
356 outgoing_dir: Option<zx::Channel>,
357 ) -> Result<(), Error> {
358 if let Some(outgoing_dir) = outgoing_dir {
359 let mut fs = ServiceFs::new_local();
362 fs.dir("svc")
363 .add_fidl_service(ExposedServices::ComponentRunner)
364 .add_fidl_service(ExposedServices::ContainerController)
365 .add_fidl_service(ExposedServices::GraphicalPresenter)
366 .add_fidl_service(ExposedServices::LutexController);
367
368 let (fs_root, fs_root_server_end) = fidl::endpoints::create_proxy();
370 fs.add_remote("fs_root", fs_root);
371 expose_root(
372 self.kernel.kthreads.unlocked_for_async().deref_mut(),
373 self.system_task(),
374 fs_root_server_end,
375 )?;
376
377 fs.serve_connection(outgoing_dir.into()).map_err(|_| errno!(EINVAL))?;
378
379 fs.for_each_concurrent(None, |request_stream| async {
380 match request_stream {
381 ExposedServices::ComponentRunner(request_stream) => {
382 match serve_component_runner(request_stream, self.system_task()).await {
383 Ok(_) => {}
384 Err(e) => {
385 log_error!("Error serving component runner: {:?}", e);
386 }
387 }
388 }
389 ExposedServices::ContainerController(request_stream) => {
390 serve_container_controller(request_stream, self.system_task())
391 .await
392 .expect("failed to start container.")
393 }
394 ExposedServices::GraphicalPresenter(request_stream) => {
395 serve_graphical_presenter(request_stream, &self.kernel)
396 .await
397 .expect("failed to start GraphicalPresenter.")
398 }
399 ExposedServices::LutexController(request_stream) => {
400 serve_lutex_controller(request_stream, self.system_task())
401 .await
402 .expect("failed to start LutexController.")
403 }
404 }
405 })
406 .await
407 }
408 Ok(())
409 }
410
411 pub async fn serve(&self, service_config: ContainerServiceConfig) -> Result<(), Error> {
412 let (r, _) = futures::join!(
413 self.serve_outgoing_directory(service_config.start_info.outgoing_dir),
414 server_component_controller(
415 self.kernel.clone(),
416 service_config.request_stream,
417 service_config.receiver
418 )
419 );
420 r
421 }
422
423 pub fn new_memory_attribution_observer(
424 &self,
425 control_handle: fattribution::ProviderControlHandle,
426 ) -> attribution_server::Observer {
427 self.memory_attribution_manager.new_observer(control_handle)
428 }
429}
430
431enum ExposedServices {
433 ComponentRunner(frunner::ComponentRunnerRequestStream),
434 ContainerController(fstarcontainer::ControllerRequestStream),
435 GraphicalPresenter(felement::GraphicalPresenterRequestStream),
436 LutexController(fbinder::LutexControllerRequestStream),
437}
438
439type TaskResult = Result<ExitStatus, Error>;
440
441async fn server_component_controller(
442 kernel: Arc<Kernel>,
443 request_stream: frunner::ComponentControllerRequestStream,
444 task_complete: oneshot::Receiver<TaskResult>,
445) {
446 *kernel.container_control_handle.lock() = Some(request_stream.control_handle());
447
448 enum Event<T, U> {
449 Controller(T),
450 Completion(U),
451 }
452
453 let mut stream = futures::stream::select(
454 request_stream.map(Event::Controller),
455 task_complete.into_stream().map(Event::Completion),
456 );
457
458 while let Some(event) = stream.next().await {
459 match event {
460 Event::Controller(Ok(frunner::ComponentControllerRequest::Stop { .. })) => {
461 log_info!("Stopping the container.");
462 }
463 Event::Controller(Ok(frunner::ComponentControllerRequest::Kill { control_handle })) => {
464 log_info!("Killing the container's job.");
465 control_handle.shutdown_with_epitaph(zx::Status::from_raw(
466 fcomponent::Error::InstanceDied.into_primitive() as i32,
467 ));
468 fruntime::job_default().kill().expect("Failed to kill job");
469 }
470 Event::Controller(Ok(frunner::ComponentControllerRequest::_UnknownMethod {
471 ordinal,
472 method_type,
473 ..
474 })) => {
475 log_error!(ordinal, method_type:?; "Unknown component controller request received.");
476 }
477 Event::Controller(Err(e)) => {
478 log_warn!(e:?; "Container component controller channel encountered an error.");
479 }
480 Event::Completion(result) => {
481 log_info!(result:?; "init process exited.");
482 }
483 }
484
485 if !kernel.is_shutting_down() {
487 kernel.shut_down();
488 }
489 }
490
491 log_debug!("done listening for container-terminating events");
492
493 if !kernel.is_shutting_down() {
495 kernel.shut_down();
496 }
497}
498
499pub async fn create_component_from_stream(
500 mut request_stream: frunner::ComponentRunnerRequestStream,
501 kernel_extra_features: Vec<String>,
502) -> Result<(Container, ContainerServiceConfig), Error> {
503 if let Some(event) = request_stream.try_next().await? {
504 match event {
505 frunner::ComponentRunnerRequest::Start { start_info, controller, .. } => {
506 let request_stream = controller.into_stream();
507 let mut start_info = ContainerStartInfo::new(start_info)?;
508 let (sender, receiver) = oneshot::channel::<TaskResult>();
509 let container = create_container(&mut start_info, &kernel_extra_features, sender)
510 .await
511 .with_source_context(|| {
512 format!("creating container \"{}\"", start_info.program.name)
513 })?;
514 let service_config =
515 ContainerServiceConfig { start_info, request_stream, receiver };
516 return Ok((container, service_config));
517 }
518 frunner::ComponentRunnerRequest::_UnknownMethod { ordinal, .. } => {
519 log_warn!("Unknown ComponentRunner request: {ordinal}");
520 }
521 }
522 }
523 bail!("did not receive Start request");
524}
525
526async fn get_bootargs(device_tree: &Devicetree) -> Result<String, Error> {
527 device_tree
528 .root_node
529 .find("chosen")
530 .and_then(|n| {
531 n.get_property("bootargs").map(|p| {
532 let end =
533 if p.value.last() == Some(&0) { p.value.len() - 1 } else { p.value.len() };
534 match std::str::from_utf8(&p.value[..end]) {
535 Ok(s) => Ok(s.to_owned()),
536 Err(e) => {
537 log_warn!("Bootargs are not valid UTF-8: {e}");
538 Err(anyhow!("Bootargs are not valid UTF-8"))
539 }
540 }
541 })
542 })
543 .context("Couldn't find bootargs")?
544}
545
546async fn get_bootitems() -> Result<std::vec::Vec<u8>, Error> {
547 let items =
548 connect_to_protocol::<fboot::ItemsMarker>().context("Failed to connect to boot items")?;
549
550 let items_response = items
551 .get2(zbi::ZbiType::DeviceTree.into_raw(), None)
552 .await
553 .context("FIDL: Failed to get devicetree item")?
554 .map_err(|e| anyhow!("Failed to get devicetree item {:?}", e))?;
555
556 let Some(item) = items_response.last() else {
557 return Err(anyhow!("Failed to get items"));
558 };
559
560 let devicetree_vmo = &item.payload;
561 let bytes = devicetree_vmo
562 .read_to_vec(0, item.length as u64)
563 .context("Failed to read devicetree vmo")?;
564
565 Ok(bytes)
566}
567
568async fn create_container(
569 start_info: &mut ContainerStartInfo,
570 kernel_extra_features: &[String],
571 task_complete: oneshot::Sender<TaskResult>,
572) -> Result<Container, Error> {
573 trace_duration!(CATEGORY_STARNIX, NAME_CREATE_CONTAINER);
574 const DEFAULT_INIT: &str = "/container/init";
575
576 let pkg_channel = start_info.container_namespace.get_namespace_channel("/pkg").unwrap();
577 let pkg_dir_proxy = fio::DirectorySynchronousProxy::new(pkg_channel);
578
579 let device_tree: Option<Devicetree> = match get_bootitems().await {
580 Ok(items) => match parse_devicetree(&items) {
581 Ok(device_tree) => Some(device_tree),
582 Err(e) => {
583 log_warn!("Failed to parse devicetree: {e:?}");
584 None
585 }
586 },
587 Err(e) => {
588 log_warn!("Failed to get boot items for devicetree: {e:?}");
589 None
590 }
591 };
592 let mut features = parse_features(&start_info, kernel_extra_features)?;
593
594 log_debug!("Creating container with {:#?}", features);
595 let mut kernel_cmdline = BString::from(start_info.program.kernel_cmdline.as_bytes());
596 let mut android_provided_bootreason = None;
597
598 if features.android_serialno {
599 if let Some(device_tree) = &device_tree {
600 match get_bootargs(device_tree).await {
601 Ok(args) => {
602 for item in parse_cmdline(&args) {
603 if item.starts_with("androidboot.force_normal_boot") {
604 continue;
606 }
607 if item.starts_with("androidboot.bootreason") && features.android_bootreason
608 {
609 log_info!("Original devicetree bootarg {:?}", item);
613 if let Some((_, v)) = item.split_once('=') {
614 android_provided_bootreason = Some(v.to_string());
615 }
616 continue;
617 }
618 kernel_cmdline.extend(b" ");
619 kernel_cmdline.extend(item.bytes());
620 }
621 }
622 Err(err) => log_warn!("could not get bootargs: {err:?}"),
623 }
624 } else {
625 log_warn!("No devicetree available to get bootargs for android.serialno");
626 }
627 }
628 if features.android_bootreason {
629 kernel_cmdline.extend(b" androidboot.bootreason=");
630
631 let tmp_channel = start_info.container_namespace.get_namespace_channel("/tmp_lifecycle");
632 let tmp_proxy = match tmp_channel {
633 Ok(channel) => {
634 Some(fio::DirectoryProxy::new(fidl::AsyncChannel::from_channel(channel)))
635 }
636 _ => None,
637 };
638
639 match get_or_init_android_bootreason(tmp_proxy, android_provided_bootreason).await {
640 Ok(reason) => {
641 kernel_cmdline.extend(reason.bytes());
642 }
643 Err(err) => {
644 log_warn!("could not get android bootreason: {err:?}. falling back to 'unknown'");
645 kernel_cmdline.extend(b"unknown");
646 }
647 }
648 }
649 if let Some(supported_vendors) = &features.magma_supported_vendors {
650 kernel_cmdline.extend(b" ");
651 let params = get_magma_params(supported_vendors);
652 kernel_cmdline.extend(&*params);
653 }
654
655 let mut task_mappings = RoleOverrides::new();
658 for m in &start_info.program.task_role_overrides {
659 task_mappings.add(m.process.clone(), m.thread.clone(), m.role.clone());
660 }
661 let task_mappings = task_mappings.build().context("adding custom task role")?;
662 let scheduler_manager = SchedulerManager::new(task_mappings);
663
664 let crash_reporter = connect_to_protocol::<CrashReporterMarker>().unwrap();
665
666 let node = inspect::component::inspector().root().create_child("container");
667 let kernel_node = node.create_child("kernel");
668 kernel_node.record_int("created_at", zx::MonotonicInstant::get().into_nanos());
669 features.record_inspect(&kernel_node);
670
671 let security_state = security::kernel_init_security(
672 features.selinux.enabled,
673 features.selinux.options.clone(),
674 features.selinux.exceptions.clone(),
675 &kernel_node,
676 );
677
678 let time_adjustment_proxy = if features.enable_utc_time_adjustment {
681 connect_to_protocol_sync::<AdjustMarker>()
682 .map_err(|e| log_error!("could not connect to fuchsia.time.external/Adjust: {:?}", e))
683 .ok()
684 } else {
685 log_info!("UTC adjustment is forbidden.");
687 None
688 };
689
690 log_info!("final kernel cmdline: {kernel_cmdline:?}");
691 kernel_node.record_string("cmdline", kernel_cmdline.to_str_lossy());
692
693 let kernel = Kernel::new(
694 kernel_cmdline,
695 features.kernel.clone(),
696 std::mem::take(&mut features.system_limits),
697 start_info.container_namespace.try_clone()?,
698 scheduler_manager,
699 Some(crash_reporter),
700 kernel_node,
701 security_state,
702 time_adjustment_proxy,
703 device_tree,
704 )
705 .with_source_context(|| format!("creating Kernel: {}", &start_info.program.name))?;
706 let (fs_context, feature_mounts) = create_fs_context(
707 kernel.kthreads.unlocked_for_async().deref_mut(),
708 &kernel,
709 &features,
710 start_info,
711 &pkg_dir_proxy,
712 )
713 .source_context("creating FsContext")?;
714 let init_pid = kernel.pids.write().allocate_pid();
715 debug_assert_eq!(init_pid, 1);
717
718 let system_task = create_system_task(
719 kernel.kthreads.unlocked_for_async().deref_mut(),
720 &kernel,
721 Arc::clone(&fs_context),
722 )
723 .source_context("create system task")?;
724 debug_assert_eq!(system_task.tid, 2);
727
728 feature_mounts(kernel.kthreads.unlocked_for_async().deref_mut(), &system_task)
729 .source_context("mounting feature filesystems")?;
730
731 kernel.kthreads.init(system_task).source_context("initializing kthreads")?;
732 let system_task = kernel.kthreads.system_task();
733
734 kernel.syslog.init(&system_task).source_context("initializing syslog")?;
735
736 kernel.hrtimer_manager.init(system_task).source_context("initializing HrTimer manager")?;
737
738 log_info!("Initializing suspend resume manager.");
739 if let Err(e) = kernel.suspend_resume_manager.init(&system_task) {
740 log_warn!("Suspend/Resume manager initialization failed: ({e:?})");
741 }
742
743 log_info!("Initializing RTC device.");
745 rtc_device_init(kernel.kthreads.unlocked_for_async().deref_mut(), &system_task)
746 .context("in starnix_kernel_runner, while initializing RTC")?;
747
748 log_info!("Registering devices and filesystems.");
750 init_common_devices(kernel.kthreads.unlocked_for_async().deref_mut(), &kernel)?;
751 register_common_file_systems(kernel.kthreads.unlocked_for_async().deref_mut(), &kernel);
752
753 log_info!("Mounting filesystems.");
754 mount_filesystems(
755 kernel.kthreads.unlocked_for_async().deref_mut(),
756 &system_task,
757 start_info,
758 &pkg_dir_proxy,
759 )
760 .source_context("mounting filesystems")?;
761
762 {
764 log_info!("Running container features.");
765 run_container_features(
766 kernel.kthreads.unlocked_for_async().deref_mut(),
767 &system_task,
768 &features,
769 )?;
770 }
771
772 log_info!("Initializing remote block devices.");
773 init_remote_block_devices(kernel.kthreads.unlocked_for_async().deref_mut(), &system_task)
774 .source_context("initalizing remote block devices")?;
775
776 let argv = if start_info.program.init.is_empty() {
781 vec![DEFAULT_INIT.to_string()]
782 } else {
783 start_info.program.init.clone()
784 }
785 .iter()
786 .map(|s| to_cstr(s))
787 .collect::<Vec<_>>();
788
789 log_info!("Opening start_info file.");
790 let executable = system_task
791 .open_file(
792 kernel.kthreads.unlocked_for_async().deref_mut(),
793 argv[0].as_bytes().into(),
794 OpenFlags::RDONLY,
795 )
796 .with_source_context(|| format!("opening init: {:?}", &argv[0]))?;
797
798 let initial_name = if start_info.program.init.is_empty() {
799 TaskCommand::default()
800 } else {
801 TaskCommand::new(start_info.program.init[0].as_bytes())
802 };
803
804 let rlimits = parse_rlimits(&start_info.program.rlimits)?;
805
806 log_info!("Starting runtime directory.");
808 if let Some(runtime_dir) = start_info.runtime_dir.take() {
809 kernel.kthreads.spawn_future(
810 move || async move { serve_runtime_dir(runtime_dir).await },
811 "serve_runtime_dir",
812 );
813 }
814
815 if let Some(break_on_start) = start_info.break_on_start.take() {
818 log_info!("Waiting for signal from debugger before spawning init process...");
819 if let Err(e) =
820 fuchsia_async::OnSignals::new(break_on_start, zx::Signals::EVENTPAIR_PEER_CLOSED).await
821 {
822 log_warn!(e:%; "Received break_on_start eventpair but couldn't wait for PEER_CLOSED.");
823 }
824 }
825
826 log_info!("Creating init process.");
827 let init_task = create_init_process(
828 kernel.kthreads.unlocked_for_async().deref_mut(),
829 &kernel,
830 init_pid,
831 initial_name,
832 Arc::clone(&fs_context),
833 &rlimits,
834 )
835 .with_source_context(|| format!("creating init task: {:?}", &start_info.program.init))?;
836
837 execute_task_with_prerun_result(
838 kernel.kthreads.unlocked_for_async().deref_mut(),
839 init_task,
840 move |locked, init_task| {
841 parse_numbered_handles(locked, init_task, None, &init_task.live().files).expect("");
842 init_task.exec(locked, executable, argv[0].clone(), argv.clone(), vec![])
843 },
844 move |result| {
845 log_info!("Finished running init process: {:?}", result);
846 let _ = task_complete.send(result);
847 },
848 None,
849 )?;
850
851 if !start_info.program.startup_file_path.is_empty() {
852 wait_for_init_file(&start_info.program.startup_file_path, &system_task, init_pid).await?;
853 };
854
855 let memory_attribution_manager = ContainerMemoryAttributionManager::new(
856 Arc::downgrade(&kernel),
857 start_info.component_instance.take().ok_or_else(|| Error::msg("No component instance"))?,
858 );
859
860 Ok(Container {
861 kernel,
862 memory_attribution_manager,
863 _node: node,
864 _thread_bound: Default::default(),
865 })
866}
867
868fn create_fs_context(
869 locked: &mut Locked<Unlocked>,
870 kernel: &Kernel,
871 features: &Features,
872 start_info: &ContainerStartInfo,
873 pkg_dir_proxy: &fio::DirectorySynchronousProxy,
874) -> Result<(Arc<FsContext>, LayeredFsMounts), Error> {
875 let mut mounts_iter =
879 start_info.program.mounts.iter().chain(start_info.config.additional_mounts.iter());
880 let root = MountAction::new_for_root(
881 locked,
882 kernel,
883 pkg_dir_proxy,
884 mounts_iter.next().ok_or_else(|| anyhow!("Mounts list is empty"))?,
885 )?;
886 if root.path != "/" {
887 anyhow::bail!("First mount in mounts list is not the root");
888 }
889
890 let mut builder = LayeredFsBuilder::new(root.fs);
891 if features.container {
892 let component_tmpfs_options = FileSystemOptions {
895 params: kernel
896 .features
897 .ns_mount_options("#component_tmpfs")
898 .context("#component_tmpfs options")?,
899 ..Default::default()
900 };
901 let component_tmpfs = TmpFs::new_fs_with_options(locked, kernel, component_tmpfs_options)?;
902
903 let container_remotefs_options = FileSystemOptions {
905 source: "data".into(),
906 params: kernel.features.ns_mount_options("#container").context("#container options")?,
907 ..Default::default()
908 };
909 let container_remotefs = new_remotefs_in_root(
910 locked,
911 kernel,
912 pkg_dir_proxy,
913 container_remotefs_options,
914 fio::PERM_READABLE | fio::PERM_EXECUTABLE,
915 )?;
916
917 builder.add("/container", container_remotefs);
918 builder.add("/container/component", component_tmpfs);
919 }
920 if features.custom_artifacts {
921 let mount_options = FileSystemOptions {
922 params: kernel
923 .features
924 .ns_mount_options("#custom_artifacts")
925 .context("#custom_artifacts options")?,
926 ..Default::default()
927 };
928 let fs = TmpFs::new_fs_with_options(locked, kernel, mount_options)?;
929 builder.add("/custom_artifacts", fs);
930 }
931 if features.test_data {
932 let mount_options = FileSystemOptions {
933 params: kernel.features.ns_mount_options("#test_data").context("#test_data options")?,
934 ..Default::default()
935 };
936 let fs = TmpFs::new_fs_with_options(locked, kernel, mount_options)?;
937 builder.add("/test_data", fs);
938 }
939
940 let (mut root_fs, feature_mounts) = builder.build(locked, kernel);
941 if features.rootfs_rw {
942 root_fs = OverlayStack::wrap_fs_in_writable_layer(locked, kernel, root_fs)?;
943 }
944
945 Ok((FsContext::new(Namespace::new_with_flags(root_fs, root.flags)), feature_mounts))
946}
947
948fn parse_rlimits(rlimits: &[String]) -> Result<Vec<(Resource, u64)>, Error> {
949 let mut res = Vec::new();
950
951 for rlimit in rlimits {
952 let (key, value) =
953 rlimit.split_once('=').ok_or_else(|| anyhow!("Invalid rlimit: {rlimit}"))?;
954 let value = value.parse::<u64>()?;
955 let kv = match key {
956 "RLIMIT_NOFILE" => (Resource::NOFILE, value),
957 "RLIMIT_RTPRIO" => (Resource::RTPRIO, value),
958 _ => bail!("Unknown rlimit: {key}"),
959 };
960 res.push(kv);
961 }
962
963 Ok(res)
964}
965
966fn mount_filesystems(
967 locked: &mut Locked<Unlocked>,
968 system_task: &CurrentTask,
969 start_info: &ContainerStartInfo,
970 pkg_dir_proxy: &fio::DirectorySynchronousProxy,
971) -> Result<(), Error> {
972 let mut mounts_iter =
974 start_info.program.mounts.iter().chain(start_info.config.additional_mounts.iter());
975 let _ = mounts_iter.next();
976 for mount_spec in mounts_iter {
977 let action = MountAction::from_spec(locked, system_task, pkg_dir_proxy, mount_spec)
978 .with_source_context(|| format!("creating filesystem from spec: {}", &mount_spec))?;
979 let mount_point = system_task
980 .lookup_path_from_root(locked, action.path.as_ref())
981 .with_source_context(|| format!("lookup path from root: {}", action.path))?;
982 mount_point.mount(WhatToMount::Fs(action.fs), action.flags)?;
983 }
984 Ok(())
985}
986
987fn init_remote_block_devices(
988 locked: &mut Locked<Unlocked>,
989 system_task: &CurrentTask,
990) -> Result<(), Error> {
991 remote_block_device_init(locked, system_task);
992 let entries = match std::fs::read_dir("/block") {
993 Ok(entries) => entries,
994 Err(e) => {
995 log_warn!("Failed to read block directory: {}", e);
996 return Ok(());
997 }
998 };
999 for entry in entries {
1000 let entry = entry?;
1001 let path_buf = entry.path();
1002 let path = path_buf.to_str().ok_or_else(|| anyhow!("Invalid block device path"))?;
1003 let (client_end, server_end) = fidl::endpoints::create_endpoints();
1004 match fdio::service_connect(
1005 &format!("{}/fuchsia.storage.block.Block", path),
1006 server_end.into(),
1007 ) {
1008 Ok(()) => (),
1009 Err(e) => {
1010 log_warn!("Failed to connect to block device at {}: {}", path, e);
1011 continue;
1012 }
1013 }
1014 system_task.kernel().remote_block_device_registry.create_remote_block_device(
1015 locked,
1016 system_task,
1017 entry.file_name().to_str().unwrap(),
1018 client_end,
1019 )?;
1020 }
1021 Ok(())
1022}
1023
1024async fn wait_for_init_file(
1025 startup_file_path: &str,
1026 current_task: &CurrentTask,
1027 init_tid: tid_t,
1028) -> Result<(), Error> {
1029 loop {
1031 fasync::Timer::new(fasync::MonotonicDuration::from_millis(100).after_now()).await;
1032
1033 let creds = security::creds_start_internal_operation(current_task);
1034 if let Some(result) = current_task.override_creds(creds, || {
1035 let root = current_task.fs().root();
1036 let mut context = LookupContext::default();
1037
1038 match current_task.lookup_path(
1039 current_task.kernel().kthreads.unlocked_for_async().deref_mut(),
1040 &mut context,
1041 root,
1042 startup_file_path.into(),
1043 ) {
1044 Ok(_) => return Some(Ok(())),
1045 Err(error) if error == ENOENT => {}
1046 Err(error) => return Some(Err(anyhow::Error::from(error))),
1047 };
1048
1049 if current_task.get_task(init_tid).upgrade().is_none() {
1050 return Some(Err(anyhow!(
1051 "Init task terminated before startup_file_path was ready"
1052 )));
1053 }
1054
1055 None
1056 }) {
1057 return result;
1058 }
1059 }
1060}
1061
1062async fn serve_runtime_dir(runtime_dir: ServerEnd<fio::DirectoryMarker>) {
1063 let mut fs = fuchsia_component::server::ServiceFs::new();
1064 match create_job_id_vmo() {
1065 Ok(vmo) => {
1066 fs.dir("elf").add_vmo_file_at("job_id", vmo);
1067 }
1068 Err(e) => log_error!(e:%; "failed to create vmo with job id for debuggers"),
1069 }
1070 match fs.serve_connection(runtime_dir) {
1071 Ok(_) => {
1072 fs.add_fidl_service(|job_requests: TaskProviderRequestStream| {
1073 fuchsia_async::Task::local(async move {
1074 if let Err(e) = serve_task_provider(job_requests).await {
1075 log_warn!(e:?; "Error serving TaskProvider");
1076 }
1077 })
1078 .detach();
1079 });
1080 fs.collect::<()>().await;
1081 }
1082 Err(e) => log_error!("Couldn't serve runtime directory: {e:?}"),
1083 }
1084}
1085
1086fn create_job_id_vmo() -> Result<zx::Vmo, Error> {
1087 let job_id = fuchsia_runtime::job_default().koid().context("reading own job koid")?;
1088 let job_id_str = job_id.raw_koid().to_string();
1089 let job_id_vmo = zx::Vmo::create(job_id_str.len() as u64).context("creating job id vmo")?;
1090 job_id_vmo.write(job_id_str.as_bytes(), 0).context("write job id to vmo")?;
1091 Ok(job_id_vmo)
1092}
1093
1094async fn serve_task_provider(mut job_requests: TaskProviderRequestStream) -> Result<(), Error> {
1095 while let Some(request) = job_requests.next().await {
1096 match request.context("getting next TaskProvider request")? {
1097 TaskProviderRequest::GetJob { responder } => {
1098 responder
1099 .send(
1100 fuchsia_runtime::job_default()
1101 .duplicate_handle(zx::Rights::SAME_RIGHTS)
1102 .map_err(|s| s.into_raw()),
1103 )
1104 .context("sending job for runtime dir")?;
1105 }
1106 unknown => bail!("Unknown TaskProvider method {unknown:?}"),
1107 }
1108 }
1109 Ok(())
1110}
1111
1112#[cfg(test)]
1113mod test {
1114 use super::wait_for_init_file;
1115 use fuchsia_async as fasync;
1116 use futures::{SinkExt, StreamExt};
1117 #[allow(deprecated, reason = "pre-existing usage")]
1118 use starnix_core::testing::create_kernel_task_and_unlocked;
1119 use starnix_core::vfs::FdNumber;
1120 use starnix_uapi::CLONE_FS;
1121 use starnix_uapi::file_mode::{AccessCheck, FileMode};
1122 use starnix_uapi::open_flags::OpenFlags;
1123 use starnix_uapi::signals::SIGCHLD;
1124 use starnix_uapi::vfs::ResolveFlags;
1125
1126 #[fuchsia::test]
1127 async fn test_init_file_already_exists() {
1128 #[allow(deprecated, reason = "pre-existing usage")]
1129 let (_kernel, current_task, locked) = create_kernel_task_and_unlocked();
1130 let (mut sender, mut receiver) = futures::channel::mpsc::unbounded();
1131
1132 let path = "/path";
1133 current_task
1134 .open_file_at(
1135 locked,
1136 FdNumber::AT_FDCWD,
1137 path.into(),
1138 OpenFlags::CREAT,
1139 FileMode::default(),
1140 ResolveFlags::empty(),
1141 AccessCheck::default(),
1142 )
1143 .expect("Failed to create file");
1144
1145 fasync::Task::local(async move {
1146 wait_for_init_file(path, ¤t_task, current_task.get_tid())
1147 .await
1148 .expect("failed to wait for file");
1149 sender.send(()).await.expect("failed to send message");
1150 })
1151 .detach();
1152
1153 assert!(receiver.next().await.is_some());
1155 }
1156
1157 #[fuchsia::test]
1158 async fn test_init_file_wait_required() {
1159 #[allow(deprecated, reason = "pre-existing usage")]
1160 let (_kernel, current_task, locked) = create_kernel_task_and_unlocked();
1161 let (mut sender, mut receiver) = futures::channel::mpsc::unbounded();
1162
1163 let init_task = current_task.clone_task_for_test(locked, CLONE_FS as u64, Some(SIGCHLD));
1164 let path = "/path";
1165
1166 let test_init_tid = current_task.get_tid();
1167 fasync::Task::local(async move {
1168 sender.send(()).await.expect("failed to send message");
1169 wait_for_init_file(path, &init_task, test_init_tid)
1170 .await
1171 .expect("failed to wait for file");
1172 sender.send(()).await.expect("failed to send message");
1173 })
1174 .detach();
1175
1176 assert!(receiver.next().await.is_some());
1178
1179 current_task
1181 .open_file_at(
1182 locked,
1183 FdNumber::AT_FDCWD,
1184 path.into(),
1185 OpenFlags::CREAT,
1186 FileMode::default(),
1187 ResolveFlags::empty(),
1188 AccessCheck::default(),
1189 )
1190 .expect("Failed to create file");
1191
1192 assert!(receiver.next().await.is_some());
1194 }
1195
1196 #[fuchsia::test]
1197 async fn test_init_exits_before_file_exists() {
1198 #[allow(deprecated, reason = "pre-existing usage")]
1199 let (_kernel, current_task, locked) = create_kernel_task_and_unlocked();
1200 let (mut sender, mut receiver) = futures::channel::mpsc::unbounded();
1201
1202 let init_task = current_task.clone_task_for_test(locked, CLONE_FS as u64, Some(SIGCHLD));
1203 const STARTUP_FILE_PATH: &str = "/path";
1204
1205 let test_init_tid = init_task.get_tid();
1206 fasync::Task::local(async move {
1207 sender.send(()).await.expect("failed to send message");
1208 wait_for_init_file(STARTUP_FILE_PATH, ¤t_task, test_init_tid)
1209 .await
1210 .expect_err("Did not detect init exit");
1211 sender.send(()).await.expect("failed to send message");
1212 })
1213 .detach();
1214
1215 assert!(receiver.next().await.is_some());
1217
1218 std::mem::drop(init_task);
1220
1221 assert!(receiver.next().await.is_some());
1223 }
1224}