1use crate::{
6 Features, MountAction, expose_root, parse_features, parse_numbered_handles,
7 run_container_features, serve_component_runner, serve_container_controller,
8 serve_graphical_presenter, serve_lutex_controller,
9};
10use anyhow::{Context, Error, anyhow, bail};
11use bootreason::get_or_init_android_bootreason;
12use bstr::{BString, ByteSlice};
13use devicetree::parser::parse_devicetree;
14use devicetree::types::Devicetree;
15use fidl::endpoints::{ControlHandle, RequestStream, ServerEnd};
16use fidl_fuchsia_boot as fboot;
17use fidl_fuchsia_component as fcomponent;
18use fidl_fuchsia_component_runner as frunner;
19use fidl_fuchsia_component_runner::{TaskProviderRequest, TaskProviderRequestStream};
20use fidl_fuchsia_element as felement;
21use fidl_fuchsia_feedback::CrashReporterMarker;
22use fidl_fuchsia_io as fio;
23use fidl_fuchsia_mem as fmem;
24use fidl_fuchsia_memory_attribution as fattribution;
25use fidl_fuchsia_starnix_binder as fbinder;
26use fidl_fuchsia_starnix_container as fstarcontainer;
27use fidl_fuchsia_time_external::AdjustMarker;
28use fuchsia_async as fasync;
29use fuchsia_async::DurationExt;
30use fuchsia_component::client::{connect_to_protocol, connect_to_protocol_sync};
31use fuchsia_component::server::ServiceFs;
32use fuchsia_inspect as inspect;
33use fuchsia_runtime as fruntime;
34use fuchsia_zbi as zbi;
35use futures::channel::oneshot;
36use futures::{FutureExt, StreamExt, TryStreamExt};
37use serde::Deserialize;
38use starnix_container_structured_config::Config as ContainerStructuredConfig;
39use starnix_core::device::remote_block_device::remote_block_device_init;
40use starnix_core::execution::{
41 create_init_process, create_system_task, execute_task_with_prerun_result,
42};
43use starnix_core::fs::fuchsia::new_remotefs_in_root;
44use starnix_core::fs::tmpfs::TmpFs;
45use starnix_core::security;
46use starnix_core::task::container_namespace::ContainerNamespace;
47use starnix_core::task::{
48 CurrentTask, ExitStatus, Kernel, RoleOverrides, SchedulerManager, parse_cmdline,
49};
50use starnix_core::vfs::{FileSystemOptions, FsContext, LookupContext, Namespace, WhatToMount};
51use starnix_logging::{
52 CATEGORY_STARNIX, NAME_CREATE_CONTAINER, log_debug, log_error, log_info, log_warn,
53};
54use starnix_modules::{init_common_devices, register_common_file_systems};
55use starnix_modules_layeredfs::{LayeredFsBuilder, LayeredFsMounts};
56use starnix_modules_magma::get_magma_params;
57use starnix_modules_overlayfs::OverlayStack;
58use starnix_modules_rtc::rtc_device_init;
59use starnix_sync::{Locked, Unlocked};
60use starnix_task_command::TaskCommand;
61use starnix_uapi::errors::{ENOENT, SourceContext};
62use starnix_uapi::open_flags::OpenFlags;
63use starnix_uapi::resource_limits::Resource;
64use starnix_uapi::{errno, tid_t};
65use std::ffi::CString;
66use std::ops::DerefMut;
67use std::sync::Arc;
68use zx::Task as _;
69
70use std::sync::Weak;
71
72use crate::serve_memory_attribution_provider_container;
73use attribution_server::{AttributionServer, AttributionServerHandle};
74
75struct ContainerMemoryAttributionManager {
77 memory_attribution_server: AttributionServerHandle,
79}
80
81impl ContainerMemoryAttributionManager {
82 pub fn new(kernel: Weak<Kernel>, component_instance: zx::Event) -> Self {
85 let memory_attribution_server = AttributionServer::new(Box::new(move || {
86 let kernel_ref = match kernel.upgrade() {
87 None => return vec![],
88 Some(k) => k,
89 };
90 attribution_info_for_kernel(kernel_ref.as_ref(), &component_instance)
91 }));
92
93 ContainerMemoryAttributionManager { memory_attribution_server }
94 }
95
96 pub fn new_observer(
98 &self,
99 control_handle: fattribution::ProviderControlHandle,
100 ) -> attribution_server::Observer {
101 self.memory_attribution_server.new_observer(control_handle)
102 }
103}
104
105fn attribution_info_for_kernel(
109 kernel: &Kernel,
110 component_instance: &zx::Event,
111) -> Vec<fattribution::AttributionUpdate> {
112 let (client_end, server_end) =
116 fidl::endpoints::create_request_stream::<fattribution::ProviderMarker>();
117 fuchsia_async::Task::spawn(serve_memory_attribution_provider_container(server_end, kernel))
118 .detach();
119
120 let starnix_kernel_id = Some(1);
121 let starnix_kernel_principal = fattribution::NewPrincipal {
122 identifier: starnix_kernel_id,
123 description: Some(fattribution::Description::Part("starnix_kernel".to_string())),
124 principal_type: Some(fattribution::PrincipalType::Part),
125 detailed_attribution: None,
129 ..Default::default()
130 };
131
132 let starnix_kernel_attribution = fattribution::UpdatedPrincipal {
133 identifier: starnix_kernel_id, resources: Some(fattribution::Resources::Data(fattribution::Data {
135 resources: vec![fattribution::Resource::ProcessMapped(fattribution::ProcessMapped {
136 process: fuchsia_runtime::process_self().koid().unwrap().raw_koid(),
137 base: 0, len: u64::max_value(),
139 hint_skip_handle_table: false,
140 })],
141 })),
142 ..Default::default()
143 };
144
145 let container_id = Some(2);
146 let new_principal = fattribution::NewPrincipal {
147 identifier: container_id,
148 description: Some(fattribution::Description::Component(
149 component_instance.duplicate_handle(zx::Rights::SAME_RIGHTS).unwrap(),
150 )),
151 principal_type: Some(fattribution::PrincipalType::Runnable),
152 detailed_attribution: Some(client_end),
153 ..Default::default()
154 };
155 let attribution = fattribution::UpdatedPrincipal {
156 identifier: container_id,
157 resources: Some(fattribution::Resources::Data(fattribution::Data {
158 resources: vec![fattribution::Resource::KernelObject(
159 fuchsia_runtime::job_default().koid().unwrap().raw_koid(),
160 )],
161 })),
162 ..Default::default()
163 };
164
165 vec![
166 fattribution::AttributionUpdate::Add(new_principal),
167 fattribution::AttributionUpdate::Add(starnix_kernel_principal),
168 fattribution::AttributionUpdate::Update(attribution),
169 fattribution::AttributionUpdate::Update(starnix_kernel_attribution),
170 ]
171}
172
173#[derive(Debug)]
174pub struct ContainerStartInfo {
175 pub program: ContainerProgram,
177
178 pub config: ContainerStructuredConfig,
179
180 outgoing_dir: Option<zx::Channel>,
184
185 pub container_namespace: ContainerNamespace,
188
189 runtime_dir: Option<ServerEnd<fio::DirectoryMarker>>,
191
192 break_on_start: Option<zx::EventPair>,
194
195 component_instance: Option<zx::Event>,
198}
199
200const MISSING_CONFIG_VMO_CONTEXT: &str = concat!(
201 "Retrieving container config VMO. ",
202 "If this fails, make sure your container CML includes ",
203 "//src/starnix/containers/container.shard.cml.",
204);
205
206impl ContainerStartInfo {
207 fn new(mut start_info: frunner::ComponentStartInfo) -> Result<Self, Error> {
208 let program = start_info.program.as_ref().context("retrieving program block")?;
209 let program: ContainerProgram =
210 runner::serde::deserialize_program(&program).context("parsing program block")?;
211
212 let encoded_config =
213 start_info.encoded_config.as_ref().context(MISSING_CONFIG_VMO_CONTEXT)?;
214 let config = match encoded_config {
215 fmem::Data::Bytes(b) => ContainerStructuredConfig::from_bytes(b),
216 fmem::Data::Buffer(b) => ContainerStructuredConfig::from_vmo(&b.vmo),
217 other => anyhow::bail!("unknown Data variant {other:?}"),
218 }
219 .context("parsing container structured config")?;
220
221 let ns = start_info.ns.take().context("retrieving container namespace")?;
222 let container_namespace = ContainerNamespace::from(ns);
223
224 let outgoing_dir = start_info.outgoing_dir.take().map(|dir| dir.into_channel());
225 let component_instance = start_info.component_instance;
226
227 Ok(Self {
228 program,
229 config,
230 outgoing_dir,
231 container_namespace,
232 component_instance,
233 break_on_start: start_info.break_on_start,
234 runtime_dir: start_info.runtime_dir,
235 })
236 }
237}
238
239#[derive(Debug, Default, Deserialize)]
240#[serde(deny_unknown_fields)]
241pub struct ContainerProgram {
242 name: String,
244
245 init: Vec<String>,
247
248 #[serde(default)]
250 kernel_cmdline: String,
251
252 #[serde(default)]
254 mounts: Vec<String>,
255
256 #[serde(default)]
258 pub features: Vec<String>,
259
260 #[serde(default)]
262 rlimits: Vec<String>,
263
264 #[serde(default)]
266 startup_file_path: String,
267
268 #[serde(default)]
272 pub default_seclabel: Option<String>,
273
274 #[serde(default = "default_uid")]
278 pub default_uid: runner::serde::StoreAsString<u32>,
279
280 pub default_ns_mount_options: Option<Vec<String>>,
284
285 #[serde(default)]
296 task_role_overrides: Vec<TaskSchedulerMapping>,
297}
298
299#[derive(Default, Deserialize)]
302struct TaskSchedulerMapping {
303 role: String,
305 process: String,
307 thread: String,
309}
310
311impl std::fmt::Debug for TaskSchedulerMapping {
312 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
313 write!(f, "process `{}` thread `{}` role `{}`", self.process, self.thread, self.role)
314 }
315}
316
317fn default_uid() -> runner::serde::StoreAsString<u32> {
318 runner::serde::StoreAsString(42)
319}
320
321fn to_cstr(str: &str) -> CString {
323 CString::new(str.to_string()).unwrap()
324}
325
326#[must_use = "The container must run serve on this config"]
327pub struct ContainerServiceConfig {
328 start_info: ContainerStartInfo,
329 request_stream: frunner::ComponentControllerRequestStream,
330 receiver: oneshot::Receiver<Result<ExitStatus, Error>>,
331}
332
333pub struct Container {
334 pub kernel: Arc<Kernel>,
336
337 memory_attribution_manager: ContainerMemoryAttributionManager,
338
339 _node: inspect::Node,
341
342 _thread_bound: std::marker::PhantomData<*mut u8>,
345}
346
347impl Container {
348 pub fn system_task(&self) -> &CurrentTask {
349 self.kernel.kthreads.system_task()
350 }
351
352 async fn serve_outgoing_directory(
353 &self,
354 outgoing_dir: Option<zx::Channel>,
355 ) -> Result<(), Error> {
356 if let Some(outgoing_dir) = outgoing_dir {
357 let mut fs = ServiceFs::new_local();
360 fs.dir("svc")
361 .add_fidl_service(ExposedServices::ComponentRunner)
362 .add_fidl_service(ExposedServices::ContainerController)
363 .add_fidl_service(ExposedServices::GraphicalPresenter)
364 .add_fidl_service(ExposedServices::LutexController);
365
366 let (fs_root, fs_root_server_end) = fidl::endpoints::create_proxy();
368 fs.add_remote("fs_root", fs_root);
369 expose_root(
370 self.kernel.kthreads.unlocked_for_async().deref_mut(),
371 self.system_task(),
372 fs_root_server_end,
373 )?;
374
375 fs.serve_connection(outgoing_dir.into()).map_err(|_| errno!(EINVAL))?;
376
377 fs.for_each_concurrent(None, |request_stream| async {
378 match request_stream {
379 ExposedServices::ComponentRunner(request_stream) => {
380 match serve_component_runner(request_stream, self.system_task()).await {
381 Ok(_) => {}
382 Err(e) => {
383 log_error!("Error serving component runner: {:?}", e);
384 }
385 }
386 }
387 ExposedServices::ContainerController(request_stream) => {
388 serve_container_controller(request_stream, self.system_task())
389 .await
390 .expect("failed to start container.")
391 }
392 ExposedServices::GraphicalPresenter(request_stream) => {
393 serve_graphical_presenter(request_stream, &self.kernel)
394 .await
395 .expect("failed to start GraphicalPresenter.")
396 }
397 ExposedServices::LutexController(request_stream) => {
398 serve_lutex_controller(request_stream, self.system_task())
399 .await
400 .expect("failed to start LutexController.")
401 }
402 }
403 })
404 .await
405 }
406 Ok(())
407 }
408
409 pub async fn serve(&self, service_config: ContainerServiceConfig) -> Result<(), Error> {
410 let (r, _) = futures::join!(
411 self.serve_outgoing_directory(service_config.start_info.outgoing_dir),
412 server_component_controller(
413 self.kernel.clone(),
414 service_config.request_stream,
415 service_config.receiver
416 )
417 );
418 r
419 }
420
421 pub fn new_memory_attribution_observer(
422 &self,
423 control_handle: fattribution::ProviderControlHandle,
424 ) -> attribution_server::Observer {
425 self.memory_attribution_manager.new_observer(control_handle)
426 }
427}
428
429enum ExposedServices {
431 ComponentRunner(frunner::ComponentRunnerRequestStream),
432 ContainerController(fstarcontainer::ControllerRequestStream),
433 GraphicalPresenter(felement::GraphicalPresenterRequestStream),
434 LutexController(fbinder::LutexControllerRequestStream),
435}
436
437type TaskResult = Result<ExitStatus, Error>;
438
439async fn server_component_controller(
440 kernel: Arc<Kernel>,
441 request_stream: frunner::ComponentControllerRequestStream,
442 task_complete: oneshot::Receiver<TaskResult>,
443) {
444 *kernel.container_control_handle.lock() = Some(request_stream.control_handle());
445
446 enum Event<T, U> {
447 Controller(T),
448 Completion(U),
449 }
450
451 let mut stream = futures::stream::select(
452 request_stream.map(Event::Controller),
453 task_complete.into_stream().map(Event::Completion),
454 );
455
456 while let Some(event) = stream.next().await {
457 match event {
458 Event::Controller(Ok(frunner::ComponentControllerRequest::Stop { .. })) => {
459 log_info!("Stopping the container.");
460 }
461 Event::Controller(Ok(frunner::ComponentControllerRequest::Kill { control_handle })) => {
462 log_info!("Killing the container's job.");
463 control_handle.shutdown_with_epitaph(zx::Status::from_raw(
464 fcomponent::Error::InstanceDied.into_primitive() as i32,
465 ));
466 fruntime::job_default().kill().expect("Failed to kill job");
467 }
468 Event::Controller(Ok(frunner::ComponentControllerRequest::_UnknownMethod {
469 ordinal,
470 method_type,
471 ..
472 })) => {
473 log_error!(ordinal, method_type:?; "Unknown component controller request received.");
474 }
475 Event::Controller(Err(e)) => {
476 log_warn!(e:?; "Container component controller channel encountered an error.");
477 }
478 Event::Completion(result) => {
479 log_info!(result:?; "init process exited.");
480 }
481 }
482
483 if !kernel.is_shutting_down() {
485 kernel.shut_down();
486 }
487 }
488
489 log_debug!("done listening for container-terminating events");
490
491 if !kernel.is_shutting_down() {
493 kernel.shut_down();
494 }
495}
496
497pub async fn create_component_from_stream(
498 mut request_stream: frunner::ComponentRunnerRequestStream,
499 kernel_extra_features: Vec<String>,
500) -> Result<(Container, ContainerServiceConfig), Error> {
501 if let Some(event) = request_stream.try_next().await? {
502 match event {
503 frunner::ComponentRunnerRequest::Start { start_info, controller, .. } => {
504 let request_stream = controller.into_stream();
505 let mut start_info = ContainerStartInfo::new(start_info)?;
506 let (sender, receiver) = oneshot::channel::<TaskResult>();
507 let container = create_container(&mut start_info, &kernel_extra_features, sender)
508 .await
509 .with_source_context(|| {
510 format!("creating container \"{}\"", start_info.program.name)
511 })?;
512 let service_config =
513 ContainerServiceConfig { start_info, request_stream, receiver };
514 return Ok((container, service_config));
515 }
516 frunner::ComponentRunnerRequest::_UnknownMethod { ordinal, .. } => {
517 log_warn!("Unknown ComponentRunner request: {ordinal}");
518 }
519 }
520 }
521 bail!("did not receive Start request");
522}
523
524async fn get_bootargs(device_tree: &Devicetree) -> Result<String, Error> {
525 device_tree
526 .root_node
527 .find("chosen")
528 .and_then(|n| {
529 n.get_property("bootargs").map(|p| {
530 let end =
531 if p.value.last() == Some(&0) { p.value.len() - 1 } else { p.value.len() };
532 match std::str::from_utf8(&p.value[..end]) {
533 Ok(s) => Ok(s.to_owned()),
534 Err(e) => {
535 log_warn!("Bootargs are not valid UTF-8: {e}");
536 Err(anyhow!("Bootargs are not valid UTF-8"))
537 }
538 }
539 })
540 })
541 .context("Couldn't find bootargs")?
542}
543
544async fn get_bootitems() -> Result<std::vec::Vec<u8>, Error> {
545 let items =
546 connect_to_protocol::<fboot::ItemsMarker>().context("Failed to connect to boot items")?;
547
548 let items_response = items
549 .get2(zbi::ZbiType::DeviceTree.into_raw(), None)
550 .await
551 .context("FIDL: Failed to get devicetree item")?
552 .map_err(|e| anyhow!("Failed to get devicetree item {:?}", e))?;
553
554 let Some(item) = items_response.last() else {
555 return Err(anyhow!("Failed to get items"));
556 };
557
558 let devicetree_vmo = &item.payload;
559 let bytes = devicetree_vmo
560 .read_to_vec(0, item.length as u64)
561 .context("Failed to read devicetree vmo")?;
562
563 Ok(bytes)
564}
565
566async fn create_container(
567 start_info: &mut ContainerStartInfo,
568 kernel_extra_features: &[String],
569 task_complete: oneshot::Sender<TaskResult>,
570) -> Result<Container, Error> {
571 fuchsia_trace::duration!(CATEGORY_STARNIX, NAME_CREATE_CONTAINER);
572 const DEFAULT_INIT: &str = "/container/init";
573
574 let pkg_channel = start_info.container_namespace.get_namespace_channel("/pkg").unwrap();
575 let pkg_dir_proxy = fio::DirectorySynchronousProxy::new(pkg_channel);
576
577 let device_tree: Option<Devicetree> = match get_bootitems().await {
578 Ok(items) => match parse_devicetree(&items) {
579 Ok(device_tree) => Some(device_tree),
580 Err(e) => {
581 log_warn!("Failed to parse devicetree: {e:?}");
582 None
583 }
584 },
585 Err(e) => {
586 log_warn!("Failed to get boot items for devicetree: {e:?}");
587 None
588 }
589 };
590 let mut features = parse_features(&start_info, kernel_extra_features)?;
591
592 log_debug!("Creating container with {:#?}", features);
593 let mut kernel_cmdline = BString::from(start_info.program.kernel_cmdline.as_bytes());
594 let mut android_provided_bootreason = None;
595
596 if features.android_serialno {
597 if let Some(device_tree) = &device_tree {
598 match get_bootargs(device_tree).await {
599 Ok(args) => {
600 for item in parse_cmdline(&args) {
601 if item.starts_with("androidboot.force_normal_boot") {
602 continue;
604 }
605 if item.starts_with("androidboot.bootreason") && features.android_bootreason
606 {
607 log_info!("Original devicetree bootarg {:?}", item);
611 if let Some((_, v)) = item.split_once('=') {
612 android_provided_bootreason = Some(v.to_string());
613 }
614 continue;
615 }
616 kernel_cmdline.extend(b" ");
617 kernel_cmdline.extend(item.bytes());
618 }
619 }
620 Err(err) => log_warn!("could not get bootargs: {err:?}"),
621 }
622 } else {
623 log_warn!("No devicetree available to get bootargs for android.serialno");
624 }
625 }
626 if features.android_bootreason {
627 kernel_cmdline.extend(b" androidboot.bootreason=");
628
629 let tmp_channel = start_info.container_namespace.get_namespace_channel("/tmp_lifecycle");
630 let tmp_proxy = match tmp_channel {
631 Ok(channel) => {
632 Some(fio::DirectoryProxy::new(fidl::AsyncChannel::from_channel(channel)))
633 }
634 _ => None,
635 };
636
637 match get_or_init_android_bootreason(tmp_proxy, android_provided_bootreason).await {
638 Ok(reason) => {
639 kernel_cmdline.extend(reason.bytes());
640 }
641 Err(err) => {
642 log_warn!("could not get android bootreason: {err:?}. falling back to 'unknown'");
643 kernel_cmdline.extend(b"unknown");
644 }
645 }
646 }
647 if let Some(supported_vendors) = &features.magma_supported_vendors {
648 kernel_cmdline.extend(b" ");
649 let params = get_magma_params(supported_vendors);
650 kernel_cmdline.extend(&*params);
651 }
652
653 let mut task_mappings = RoleOverrides::new();
656 for m in &start_info.program.task_role_overrides {
657 task_mappings.add(m.process.clone(), m.thread.clone(), m.role.clone());
658 }
659 let task_mappings = task_mappings.build().context("adding custom task role")?;
660 let scheduler_manager = SchedulerManager::new(task_mappings);
661
662 let crash_reporter = connect_to_protocol::<CrashReporterMarker>().unwrap();
663
664 let node = inspect::component::inspector().root().create_child("container");
665 let kernel_node = node.create_child("kernel");
666 kernel_node.record_int("created_at", zx::MonotonicInstant::get().into_nanos());
667 features.record_inspect(&kernel_node);
668
669 let security_state = security::kernel_init_security(
670 features.selinux.enabled,
671 features.selinux.options.clone(),
672 features.selinux.exceptions.clone(),
673 &kernel_node,
674 );
675
676 let time_adjustment_proxy = if features.enable_utc_time_adjustment {
679 connect_to_protocol_sync::<AdjustMarker>()
680 .map_err(|e| log_error!("could not connect to fuchsia.time.external/Adjust: {:?}", e))
681 .ok()
682 } else {
683 log_info!("UTC adjustment is forbidden.");
685 None
686 };
687
688 log_info!("final kernel cmdline: {kernel_cmdline:?}");
689 kernel_node.record_string("cmdline", kernel_cmdline.to_str_lossy());
690
691 let kernel = Kernel::new(
692 kernel_cmdline,
693 features.kernel.clone(),
694 std::mem::take(&mut features.system_limits),
695 start_info.container_namespace.try_clone()?,
696 scheduler_manager,
697 Some(crash_reporter),
698 kernel_node,
699 security_state,
700 time_adjustment_proxy,
701 device_tree,
702 )
703 .with_source_context(|| format!("creating Kernel: {}", start_info.program.name))?;
704 let (fs_context, feature_mounts) = create_fs_context(
705 kernel.kthreads.unlocked_for_async().deref_mut(),
706 &kernel,
707 &features,
708 start_info,
709 &pkg_dir_proxy,
710 )
711 .source_context("creating FsContext")?;
712 let init_pid = kernel.pids.write().allocate_pid();
713 debug_assert_eq!(init_pid, 1);
715
716 let system_task = create_system_task(
717 kernel.kthreads.unlocked_for_async().deref_mut(),
718 &kernel,
719 Arc::clone(&fs_context),
720 )
721 .source_context("create system task")?;
722 debug_assert_eq!(system_task.tid, 2);
725
726 feature_mounts(kernel.kthreads.unlocked_for_async().deref_mut(), &system_task)
727 .source_context("mounting feature filesystems")?;
728
729 kernel.kthreads.init(system_task).source_context("initializing kthreads")?;
730 let system_task = kernel.kthreads.system_task();
731
732 kernel.syslog.init(&system_task).source_context("initializing syslog")?;
733
734 kernel.hrtimer_manager.init(system_task).source_context("initializing HrTimer manager")?;
735
736 log_info!("Initializing suspend resume manager.");
737 if let Err(e) = kernel.suspend_resume_manager.init(&system_task) {
738 log_warn!("Suspend/Resume manager initialization failed: ({e:?})");
739 }
740
741 log_info!("Initializing RTC device.");
743 rtc_device_init(kernel.kthreads.unlocked_for_async().deref_mut(), &system_task)
744 .context("in starnix_kernel_runner, while initializing RTC")?;
745
746 log_info!("Registering devices and filesystems.");
748 init_common_devices(kernel.kthreads.unlocked_for_async().deref_mut(), &kernel)?;
749 register_common_file_systems(kernel.kthreads.unlocked_for_async().deref_mut(), &kernel);
750
751 log_info!("Mounting filesystems.");
752 mount_filesystems(
753 kernel.kthreads.unlocked_for_async().deref_mut(),
754 &system_task,
755 start_info,
756 &pkg_dir_proxy,
757 )
758 .source_context("mounting filesystems")?;
759
760 {
762 log_info!("Running container features.");
763 run_container_features(
764 kernel.kthreads.unlocked_for_async().deref_mut(),
765 &system_task,
766 &features,
767 )?;
768 }
769
770 log_info!("Initializing remote block devices.");
771 init_remote_block_devices(kernel.kthreads.unlocked_for_async().deref_mut(), &system_task)
772 .source_context("initalizing remote block devices")?;
773
774 let argv = if start_info.program.init.is_empty() {
779 vec![DEFAULT_INIT.to_string()]
780 } else {
781 start_info.program.init.clone()
782 }
783 .iter()
784 .map(|s| to_cstr(s))
785 .collect::<Vec<_>>();
786
787 log_info!("Opening start_info file.");
788 let executable = system_task
789 .open_file(
790 kernel.kthreads.unlocked_for_async().deref_mut(),
791 argv[0].as_bytes().into(),
792 OpenFlags::RDONLY,
793 )
794 .with_source_context(|| format!("opening init: {:?}", argv[0]))?;
795
796 let initial_name = if start_info.program.init.is_empty() {
797 TaskCommand::default()
798 } else {
799 TaskCommand::new(start_info.program.init[0].as_bytes())
800 };
801
802 let rlimits = parse_rlimits(&start_info.program.rlimits)?;
803
804 log_info!("Starting runtime directory.");
806 if let Some(runtime_dir) = start_info.runtime_dir.take() {
807 kernel.kthreads.spawn_future(
808 move || async move { serve_runtime_dir(runtime_dir).await },
809 "serve_runtime_dir",
810 );
811 }
812
813 if let Some(break_on_start) = start_info.break_on_start.take() {
816 log_info!("Waiting for signal from debugger before spawning init process...");
817 if let Err(e) =
818 fuchsia_async::OnSignals::new(break_on_start, zx::Signals::EVENTPAIR_PEER_CLOSED).await
819 {
820 log_warn!(e:%; "Received break_on_start eventpair but couldn't wait for PEER_CLOSED.");
821 }
822 }
823
824 log_info!("Creating init process.");
825 let init_task = create_init_process(
826 kernel.kthreads.unlocked_for_async().deref_mut(),
827 &kernel,
828 init_pid,
829 initial_name,
830 Arc::clone(&fs_context),
831 &rlimits,
832 )
833 .with_source_context(|| format!("creating init task: {:?}", start_info.program.init))?;
834
835 execute_task_with_prerun_result(
836 kernel.kthreads.unlocked_for_async().deref_mut(),
837 init_task,
838 move |locked, init_task| {
839 parse_numbered_handles(locked, init_task, None, &init_task.running_state().files)
840 .expect("");
841 init_task.exec(locked, executable, argv[0].clone(), argv.clone(), vec![])
842 },
843 move |result| {
844 log_info!("Finished running init process: {:?}", result);
845 let _ = task_complete.send(result);
846 },
847 None,
848 )?;
849
850 if !start_info.program.startup_file_path.is_empty() {
851 wait_for_init_file(&start_info.program.startup_file_path, &system_task, init_pid).await?;
852 };
853
854 let memory_attribution_manager = ContainerMemoryAttributionManager::new(
855 Arc::downgrade(&kernel),
856 start_info.component_instance.take().ok_or_else(|| Error::msg("No component instance"))?,
857 );
858
859 Ok(Container {
860 kernel,
861 memory_attribution_manager,
862 _node: node,
863 _thread_bound: Default::default(),
864 })
865}
866
867fn create_fs_context(
868 locked: &mut Locked<Unlocked>,
869 kernel: &Kernel,
870 features: &Features,
871 start_info: &ContainerStartInfo,
872 pkg_dir_proxy: &fio::DirectorySynchronousProxy,
873) -> Result<(Arc<FsContext>, LayeredFsMounts), Error> {
874 let mut mounts_iter =
878 start_info.program.mounts.iter().chain(start_info.config.additional_mounts.iter());
879 let root = MountAction::new_for_root(
880 locked,
881 kernel,
882 pkg_dir_proxy,
883 mounts_iter.next().ok_or_else(|| anyhow!("Mounts list is empty"))?,
884 )?;
885 if root.path != "/" {
886 anyhow::bail!("First mount in mounts list is not the root");
887 }
888
889 let mut builder = LayeredFsBuilder::new(root.fs);
890 if features.container {
891 let component_tmpfs_options = FileSystemOptions {
894 params: kernel
895 .features
896 .ns_mount_options("#component_tmpfs")
897 .context("#component_tmpfs options")?,
898 ..Default::default()
899 };
900 let component_tmpfs = TmpFs::new_fs_with_options(locked, kernel, component_tmpfs_options)?;
901
902 let container_remotefs_options = FileSystemOptions {
904 source: "data".into(),
905 params: kernel.features.ns_mount_options("#container").context("#container options")?,
906 ..Default::default()
907 };
908 let container_remotefs = new_remotefs_in_root(
909 locked,
910 kernel,
911 pkg_dir_proxy,
912 container_remotefs_options,
913 fio::PERM_READABLE | fio::PERM_EXECUTABLE,
914 )?;
915
916 builder.add("/container", container_remotefs);
917 builder.add("/container/component", component_tmpfs);
918 }
919 if features.custom_artifacts {
920 let mount_options = FileSystemOptions {
921 params: kernel
922 .features
923 .ns_mount_options("#custom_artifacts")
924 .context("#custom_artifacts options")?,
925 ..Default::default()
926 };
927 let fs = TmpFs::new_fs_with_options(locked, kernel, mount_options)?;
928 builder.add("/custom_artifacts", fs);
929 }
930 if features.test_data {
931 let mount_options = FileSystemOptions {
932 params: kernel.features.ns_mount_options("#test_data").context("#test_data options")?,
933 ..Default::default()
934 };
935 let fs = TmpFs::new_fs_with_options(locked, kernel, mount_options)?;
936 builder.add("/test_data", fs);
937 }
938
939 let (mut root_fs, feature_mounts) = builder.build(locked, kernel);
940 if features.rootfs_rw {
941 root_fs = OverlayStack::wrap_fs_in_writable_layer(locked, kernel, root_fs)?;
942 }
943
944 Ok((FsContext::new(Namespace::new_with_flags(root_fs, root.flags)), feature_mounts))
945}
946
947fn parse_rlimits(rlimits: &[String]) -> Result<Vec<(Resource, u64)>, Error> {
948 let mut res = Vec::new();
949
950 for rlimit in rlimits {
951 let (key, value) =
952 rlimit.split_once('=').ok_or_else(|| anyhow!("Invalid rlimit: {rlimit}"))?;
953 let value = value.parse::<u64>()?;
954 let kv = match key {
955 "RLIMIT_NOFILE" => (Resource::NOFILE, value),
956 "RLIMIT_RTPRIO" => (Resource::RTPRIO, value),
957 _ => bail!("Unknown rlimit: {key}"),
958 };
959 res.push(kv);
960 }
961
962 Ok(res)
963}
964
965fn mount_filesystems(
966 locked: &mut Locked<Unlocked>,
967 system_task: &CurrentTask,
968 start_info: &ContainerStartInfo,
969 pkg_dir_proxy: &fio::DirectorySynchronousProxy,
970) -> Result<(), Error> {
971 let mut mounts_iter =
973 start_info.program.mounts.iter().chain(start_info.config.additional_mounts.iter());
974 let _ = mounts_iter.next();
975 for mount_spec in mounts_iter {
976 let action = MountAction::from_spec(locked, system_task, pkg_dir_proxy, mount_spec)
977 .with_source_context(|| format!("creating filesystem from spec: {}", mount_spec))?;
978 let mount_point = system_task
979 .lookup_path_from_root(locked, action.path.as_ref())
980 .with_source_context(|| format!("lookup path from root: {}", action.path))?;
981 mount_point.mount(WhatToMount::Fs(action.fs), action.flags)?;
982 }
983 Ok(())
984}
985
986fn init_remote_block_devices(
987 locked: &mut Locked<Unlocked>,
988 system_task: &CurrentTask,
989) -> Result<(), Error> {
990 remote_block_device_init(locked, system_task);
991 let entries = match std::fs::read_dir("/block") {
992 Ok(entries) => entries,
993 Err(e) => {
994 log_warn!("Failed to read block directory: {}", e);
995 return Ok(());
996 }
997 };
998 for entry in entries {
999 let entry = entry?;
1000 let path_buf = entry.path();
1001 let path = path_buf.to_str().ok_or_else(|| anyhow!("Invalid block device path"))?;
1002 let (client_end, server_end) = fidl::endpoints::create_endpoints();
1003 match fdio::service_connect(
1004 &format!("{}/fuchsia.storage.block.Block", path),
1005 server_end.into(),
1006 ) {
1007 Ok(()) => (),
1008 Err(e) => {
1009 log_warn!("Failed to connect to block device at {}: {}", path, e);
1010 continue;
1011 }
1012 }
1013 system_task.kernel().remote_block_device_registry.create_remote_block_device(
1014 locked,
1015 system_task,
1016 entry.file_name().to_str().unwrap(),
1017 client_end,
1018 )?;
1019 }
1020 Ok(())
1021}
1022
1023async fn wait_for_init_file(
1024 startup_file_path: &str,
1025 current_task: &CurrentTask,
1026 init_tid: tid_t,
1027) -> Result<(), Error> {
1028 loop {
1030 fasync::Timer::new(fasync::MonotonicDuration::from_millis(100).after_now()).await;
1031
1032 let creds = security::creds_start_internal_operation(current_task);
1033 if let Some(result) = current_task.override_creds(creds, || {
1034 let root = current_task.fs().root();
1035 let mut context = LookupContext::default();
1036
1037 match current_task.lookup_path(
1038 current_task.kernel().kthreads.unlocked_for_async().deref_mut(),
1039 &mut context,
1040 root,
1041 startup_file_path.into(),
1042 ) {
1043 Ok(_) => return Some(Ok(())),
1044 Err(error) if error == ENOENT => {}
1045 Err(error) => return Some(Err(anyhow::Error::from(error))),
1046 };
1047
1048 if current_task.get_task(init_tid).is_err() {
1049 return Some(Err(anyhow!(
1050 "Init task terminated before startup_file_path was ready"
1051 )));
1052 }
1053
1054 None
1055 }) {
1056 return result;
1057 }
1058 }
1059}
1060
1061async fn serve_runtime_dir(runtime_dir: ServerEnd<fio::DirectoryMarker>) {
1062 let mut fs = fuchsia_component::server::ServiceFs::new();
1063 match create_job_id_vmo() {
1064 Ok(vmo) => {
1065 fs.dir("elf").add_vmo_file_at("job_id", vmo);
1066 }
1067 Err(e) => log_error!(e:%; "failed to create vmo with job id for debuggers"),
1068 }
1069 match fs.serve_connection(runtime_dir) {
1070 Ok(_) => {
1071 fs.add_fidl_service(|job_requests: TaskProviderRequestStream| {
1072 fuchsia_async::Task::local(async move {
1073 if let Err(e) = serve_task_provider(job_requests).await {
1074 log_warn!(e:?; "Error serving TaskProvider");
1075 }
1076 })
1077 .detach();
1078 });
1079 fs.collect::<()>().await;
1080 }
1081 Err(e) => log_error!("Couldn't serve runtime directory: {e:?}"),
1082 }
1083}
1084
1085fn create_job_id_vmo() -> Result<zx::Vmo, Error> {
1086 let job_id = fuchsia_runtime::job_default().koid().context("reading own job koid")?;
1087 let job_id_str = job_id.raw_koid().to_string();
1088 let job_id_vmo = zx::Vmo::create(job_id_str.len() as u64).context("creating job id vmo")?;
1089 job_id_vmo.write(job_id_str.as_bytes(), 0).context("write job id to vmo")?;
1090 Ok(job_id_vmo)
1091}
1092
1093async fn serve_task_provider(mut job_requests: TaskProviderRequestStream) -> Result<(), Error> {
1094 while let Some(request) = job_requests.next().await {
1095 match request.context("getting next TaskProvider request")? {
1096 TaskProviderRequest::GetJob { responder } => {
1097 responder
1098 .send(
1099 fuchsia_runtime::job_default()
1100 .duplicate_handle(zx::Rights::SAME_RIGHTS)
1101 .map_err(|s| s.into_raw()),
1102 )
1103 .context("sending job for runtime dir")?;
1104 }
1105 unknown => bail!("Unknown TaskProvider method {unknown:?}"),
1106 }
1107 }
1108 Ok(())
1109}
1110
1111#[cfg(test)]
1112mod test {
1113 use super::wait_for_init_file;
1114 use fuchsia_async as fasync;
1115 use futures::{SinkExt, StreamExt};
1116 #[allow(deprecated, reason = "pre-existing usage")]
1117 use starnix_core::testing::create_kernel_task_and_unlocked;
1118 use starnix_core::vfs::FdNumber;
1119 use starnix_uapi::CLONE_FS;
1120 use starnix_uapi::file_mode::{AccessCheck, FileMode};
1121 use starnix_uapi::open_flags::OpenFlags;
1122 use starnix_uapi::signals::SIGCHLD;
1123 use starnix_uapi::vfs::ResolveFlags;
1124
1125 #[fuchsia::test]
1126 async fn test_init_file_already_exists() {
1127 #[allow(deprecated, reason = "pre-existing usage")]
1128 let (_kernel, current_task, locked) = create_kernel_task_and_unlocked();
1129 let (mut sender, mut receiver) = futures::channel::mpsc::unbounded();
1130
1131 let path = "/path";
1132 current_task
1133 .open_file_at(
1134 locked,
1135 FdNumber::AT_FDCWD,
1136 path.into(),
1137 OpenFlags::CREAT,
1138 FileMode::default(),
1139 ResolveFlags::empty(),
1140 AccessCheck::default(),
1141 )
1142 .expect("Failed to create file");
1143
1144 fasync::Task::local(async move {
1145 wait_for_init_file(path, ¤t_task, current_task.get_tid())
1146 .await
1147 .expect("failed to wait for file");
1148 sender.send(()).await.expect("failed to send message");
1149 })
1150 .detach();
1151
1152 assert!(receiver.next().await.is_some());
1154 }
1155
1156 #[fuchsia::test]
1157 async fn test_init_file_wait_required() {
1158 #[allow(deprecated, reason = "pre-existing usage")]
1159 let (_kernel, current_task, locked) = create_kernel_task_and_unlocked();
1160 let (mut sender, mut receiver) = futures::channel::mpsc::unbounded();
1161
1162 let init_task = current_task.clone_task_for_test(locked, CLONE_FS as u64, Some(SIGCHLD));
1163 let path = "/path";
1164
1165 let test_init_tid = current_task.get_tid();
1166 fasync::Task::local(async move {
1167 sender.send(()).await.expect("failed to send message");
1168 wait_for_init_file(path, &init_task, test_init_tid)
1169 .await
1170 .expect("failed to wait for file");
1171 sender.send(()).await.expect("failed to send message");
1172 })
1173 .detach();
1174
1175 assert!(receiver.next().await.is_some());
1177
1178 current_task
1180 .open_file_at(
1181 locked,
1182 FdNumber::AT_FDCWD,
1183 path.into(),
1184 OpenFlags::CREAT,
1185 FileMode::default(),
1186 ResolveFlags::empty(),
1187 AccessCheck::default(),
1188 )
1189 .expect("Failed to create file");
1190
1191 assert!(receiver.next().await.is_some());
1193 }
1194
1195 #[fuchsia::test]
1196 async fn test_init_exits_before_file_exists() {
1197 #[allow(deprecated, reason = "pre-existing usage")]
1198 let (_kernel, current_task, locked) = create_kernel_task_and_unlocked();
1199 let (mut sender, mut receiver) = futures::channel::mpsc::unbounded();
1200
1201 let init_task = current_task.clone_task_for_test(locked, CLONE_FS as u64, Some(SIGCHLD));
1202 const STARTUP_FILE_PATH: &str = "/path";
1203
1204 let test_init_tid = init_task.get_tid();
1205 fasync::Task::local(async move {
1206 sender.send(()).await.expect("failed to send message");
1207 wait_for_init_file(STARTUP_FILE_PATH, ¤t_task, test_init_tid)
1208 .await
1209 .expect_err("Did not detect init exit");
1210 sender.send(()).await.expect("failed to send message");
1211 })
1212 .detach();
1213
1214 assert!(receiver.next().await.is_some());
1216
1217 std::mem::drop(init_task);
1219
1220 assert!(receiver.next().await.is_some());
1222 }
1223}