1use crate::signals::{SignalInfo, send_freeze_signal};
11use crate::task::{Kernel, ThreadGroup, ThreadGroupKey, WaitQueue, Waiter};
12use crate::vfs::{FsStr, FsString, PathBuilder};
13use starnix_logging::{CATEGORY_STARNIX, log_warn, trace_duration, track_stub};
14use starnix_sync::{FileOpsCore, LockBefore, Locked, Mutex, MutexGuard, ThreadGroupLimits};
15use starnix_types::ownership::TempRef;
16use starnix_uapi::errors::Errno;
17use starnix_uapi::signals::SIGKILL;
18use starnix_uapi::{errno, error, pid_t};
19use std::collections::{BTreeMap, HashMap, HashSet, btree_map, hash_map};
20use std::ops::{Deref, DerefMut};
21use std::sync::atomic::{AtomicU64, Ordering};
22use std::sync::{Arc, Weak};
23
24use crate::signals::KernelSignal;
25
26#[derive(Debug)]
30pub struct KernelCgroups {
31 pub cgroup2: Arc<CgroupRoot>,
32}
33
34impl KernelCgroups {
35 pub fn lock_cgroup2_pid_table(&self) -> MutexGuard<'_, CgroupPidTable> {
38 self.cgroup2.pid_table.lock()
39 }
40}
41
42impl Default for KernelCgroups {
43 fn default() -> Self {
44 Self { cgroup2: CgroupRoot::new() }
45 }
46}
47
48#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
49pub enum FreezerState {
50 Thawed,
51 Frozen,
52}
53
54impl Default for FreezerState {
55 fn default() -> Self {
56 FreezerState::Thawed
57 }
58}
59
60impl std::fmt::Display for FreezerState {
61 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
62 match self {
63 FreezerState::Frozen => write!(f, "1"),
64 FreezerState::Thawed => write!(f, "0"),
65 }
66 }
67}
68
69#[derive(Default)]
70pub struct CgroupFreezerState {
71 pub self_freezer_state: FreezerState,
73 pub effective_freezer_state: FreezerState,
77}
78
79pub trait CgroupOps: Send + Sync + 'static {
81 fn id(&self) -> u64;
83
84 fn add_process(
86 &self,
87 locked: &mut Locked<FileOpsCore>,
88 thread_group: &ThreadGroup,
89 ) -> Result<(), Errno>;
90
91 fn new_child(&self, name: &FsStr) -> Result<CgroupHandle, Errno>;
94
95 fn get_children(&self) -> Result<Vec<CgroupHandle>, Errno>;
97
98 fn get_child(&self, name: &FsStr) -> Result<CgroupHandle, Errno>;
100
101 fn remove_child(&self, name: &FsStr) -> Result<CgroupHandle, Errno>;
104
105 fn get_pids(&self, kernel: &Kernel) -> Vec<pid_t>;
107
108 fn kill(&self);
110
111 fn is_populated(&self) -> bool;
113
114 fn get_freezer_state(&self) -> CgroupFreezerState;
116
117 fn freeze(&self, locked: &mut Locked<FileOpsCore>);
119
120 fn thaw(&self);
122}
123
124#[derive(Debug, Default)]
127pub struct CgroupPidTable(HashMap<ThreadGroupKey, Weak<Cgroup>>);
128impl Deref for CgroupPidTable {
129 type Target = HashMap<ThreadGroupKey, Weak<Cgroup>>;
130
131 fn deref(&self) -> &Self::Target {
132 &self.0
133 }
134}
135impl DerefMut for CgroupPidTable {
136 fn deref_mut(&mut self) -> &mut Self::Target {
137 &mut self.0
138 }
139}
140
141impl CgroupPidTable {
142 pub fn inherit_cgroup(&mut self, parent: &ThreadGroup, child: &ThreadGroup) {
145 assert!(child.read().tasks_count() == 0, "threadgroup must be newly created");
146 if let Some(weak_cgroup) = self.0.get(&parent.into()).cloned() {
147 let Some(cgroup) = weak_cgroup.upgrade() else {
148 log_warn!("ignored attempt to inherit a non-existant cgroup");
149 return;
150 };
151 assert!(
152 self.0.insert(child.into(), weak_cgroup).map(|c| c.strong_count() == 0).is_none(),
153 "child pid should not exist when inheriting"
154 );
155 cgroup.state.lock().processes.insert(child.into());
157 }
158 }
159
160 pub fn maybe_create_freeze_signal<TG: Copy + Into<ThreadGroupKey>>(
162 &self,
163 tg: TG,
164 ) -> Option<KernelSignal> {
165 let Some(weak_cgroup) = self.0.get(&tg.into()) else {
166 return None;
167 };
168 let Some(cgroup) = weak_cgroup.upgrade() else {
169 return None;
170 };
171 let state = cgroup.state.lock();
172 if state.get_effective_freezer_state() != FreezerState::Frozen {
173 return None;
174 }
175 Some(KernelSignal::Freeze(state.create_freeze_waiter()))
176 }
177}
178
179#[derive(Debug)]
191pub struct CgroupRoot {
192 pid_table: Mutex<CgroupPidTable>,
194
195 children: Mutex<CgroupChildren>,
197
198 weak_self: Weak<CgroupRoot>,
200
201 next_id: AtomicU64,
203}
204
205impl CgroupRoot {
206 pub fn new() -> Arc<CgroupRoot> {
207 Arc::new_cyclic(|weak_self| Self {
208 pid_table: Default::default(),
209 children: Default::default(),
210 weak_self: weak_self.clone(),
211 next_id: AtomicU64::new(1),
212 })
213 }
214
215 fn get_next_id(&self) -> u64 {
216 self.next_id.fetch_add(1, Ordering::Relaxed)
217 }
218
219 pub fn get_cgroup<TG: Copy + Into<ThreadGroupKey>>(&self, tg: TG) -> Option<Weak<Cgroup>> {
220 self.pid_table.lock().get(&tg.into()).cloned()
221 }
222}
223
224impl CgroupOps for CgroupRoot {
225 fn id(&self) -> u64 {
226 0
227 }
228
229 fn add_process(
230 &self,
231 locked: &mut Locked<FileOpsCore>,
232 thread_group: &ThreadGroup,
233 ) -> Result<(), Errno> {
234 let mut pid_table = self.pid_table.lock();
235 if let Some(entry) = pid_table.remove(&thread_group.into()) {
236 if let Some(cgroup) = entry.upgrade() {
238 cgroup.state.lock().remove_process(locked, thread_group)?;
239 }
240 }
241 Ok(())
245 }
246
247 fn new_child(&self, name: &FsStr) -> Result<CgroupHandle, Errno> {
248 let id = self.get_next_id();
249 let new_child = Cgroup::new(id, name, &self.weak_self, None);
250 let mut children = self.children.lock();
251 children.insert_child(name.into(), new_child)
252 }
253
254 fn get_child(&self, name: &FsStr) -> Result<CgroupHandle, Errno> {
255 let children = self.children.lock();
256 children.get_child(name).ok_or_else(|| errno!(ENOENT))
257 }
258
259 fn remove_child(&self, name: &FsStr) -> Result<CgroupHandle, Errno> {
260 let mut children = self.children.lock();
261 children.remove_child(name)
262 }
263
264 fn get_children(&self) -> Result<Vec<CgroupHandle>, Errno> {
265 let children = self.children.lock();
266 Ok(children.get_children())
267 }
268
269 fn get_pids(&self, kernel: &Kernel) -> Vec<pid_t> {
270 let controlled_pids: HashSet<pid_t> =
271 self.pid_table.lock().keys().filter_map(|v| v.upgrade().map(|tg| tg.leader)).collect();
272 let kernel_pids = kernel.pids.read().process_ids();
273 kernel_pids.into_iter().filter(|pid| !controlled_pids.contains(pid)).collect()
274 }
275
276 fn kill(&self) {
277 unreachable!("Root cgroup cannot kill its processes.");
278 }
279
280 fn is_populated(&self) -> bool {
281 false
282 }
283
284 fn get_freezer_state(&self) -> CgroupFreezerState {
285 Default::default()
286 }
287
288 fn freeze(&self, _locked: &mut Locked<FileOpsCore>) {
289 unreachable!("Root cgroup cannot freeze any processes.");
290 }
291
292 fn thaw(&self) {
293 unreachable!("Root cgroup cannot thaw any processes.");
294 }
295}
296
297#[derive(Debug, Default)]
298struct CgroupChildren(BTreeMap<FsString, CgroupHandle>);
299impl CgroupChildren {
300 fn insert_child(&mut self, name: FsString, child: CgroupHandle) -> Result<CgroupHandle, Errno> {
301 let btree_map::Entry::Vacant(child_entry) = self.0.entry(name) else {
302 return error!(EEXIST);
303 };
304 Ok(child_entry.insert(child).clone())
305 }
306
307 fn remove_child(&mut self, name: &FsStr) -> Result<CgroupHandle, Errno> {
308 let btree_map::Entry::Occupied(child_entry) = self.0.entry(name.into()) else {
309 return error!(ENOENT);
310 };
311 let child = child_entry.get();
312
313 let mut child_state = child.state.lock();
314 assert!(!child_state.deleted, "child cannot be deleted");
315
316 child_state.update_processes();
317 if !child_state.processes.is_empty() {
318 return error!(EBUSY);
319 }
320 if !child_state.children.is_empty() {
321 return error!(EBUSY);
322 }
323
324 child_state.deleted = true;
325 drop(child_state);
326
327 Ok(child_entry.remove())
328 }
329
330 fn get_child(&self, name: &FsStr) -> Option<CgroupHandle> {
331 self.0.get(name).cloned()
332 }
333
334 fn get_children(&self) -> Vec<CgroupHandle> {
335 self.0.values().cloned().collect()
336 }
337}
338
339impl Deref for CgroupChildren {
340 type Target = BTreeMap<FsString, CgroupHandle>;
341
342 fn deref(&self) -> &Self::Target {
343 &self.0
344 }
345}
346
347#[derive(Debug, Default)]
348struct CgroupState {
349 children: CgroupChildren,
351
352 processes: HashSet<ThreadGroupKey>,
354
355 deleted: bool,
357
358 wait_queue: WaitQueue,
360
361 self_freezer_state: FreezerState,
363
364 inherited_freezer_state: FreezerState,
366}
367
368impl CgroupState {
369 fn create_freeze_waiter(&self) -> Waiter {
372 let waiter = Waiter::new_ignoring_signals();
373 self.wait_queue.wait_async(&waiter);
374 waiter
375 }
376
377 fn update_processes(&mut self) {
379 self.processes.retain(|thread_group| {
380 let Some(thread_group) = thread_group.upgrade() else {
381 return false;
382 };
383 let terminating = thread_group.read().is_terminating();
384 !terminating
385 });
386 }
387
388 fn freeze_thread_group<L>(&self, locked: &mut Locked<L>, thread_group: &ThreadGroup)
389 where
390 L: LockBefore<ThreadGroupLimits>,
391 {
392 let tasks = thread_group.read().tasks().map(TempRef::into_static).collect::<Vec<_>>();
396 for task in tasks {
397 send_freeze_signal(locked, &task, self.create_freeze_waiter())
398 .expect("sending freeze signal should not fail");
399 }
400 }
401
402 fn thaw_thread_group<L>(&self, _locked: &mut Locked<L>, thread_group: &ThreadGroup)
403 where
404 L: LockBefore<ThreadGroupLimits>,
405 {
406 let tasks = thread_group.read().tasks().map(TempRef::into_static).collect::<Vec<_>>();
410 for task in tasks {
411 task.write().thaw();
412 task.interrupt();
413 }
414 }
415
416 fn get_effective_freezer_state(&self) -> FreezerState {
417 std::cmp::max(self.self_freezer_state, self.inherited_freezer_state)
418 }
419
420 fn add_process<L>(
421 &mut self,
422 locked: &mut Locked<L>,
423 thread_group: &ThreadGroup,
424 ) -> Result<(), Errno>
425 where
426 L: LockBefore<ThreadGroupLimits>,
427 {
428 if self.deleted {
429 return error!(ENOENT);
430 }
431 self.processes.insert(thread_group.into());
432
433 if self.get_effective_freezer_state() == FreezerState::Frozen {
434 self.freeze_thread_group(locked, &thread_group);
435 }
436 Ok(())
437 }
438
439 fn remove_process<L>(
440 &mut self,
441 locked: &mut Locked<L>,
442 thread_group: &ThreadGroup,
443 ) -> Result<(), Errno>
444 where
445 L: LockBefore<ThreadGroupLimits>,
446 {
447 if self.deleted {
448 return error!(ENOENT);
449 }
450 self.processes.remove(&thread_group.into());
451
452 if self.get_effective_freezer_state() == FreezerState::Frozen {
453 self.thaw_thread_group(locked, thread_group);
454 }
455 Ok(())
456 }
457
458 fn propagate_freeze<L>(&mut self, locked: &mut Locked<L>, inherited_freezer_state: FreezerState)
459 where
460 L: LockBefore<ThreadGroupLimits>,
461 {
462 let prev_effective_freezer_state = self.get_effective_freezer_state();
463 self.inherited_freezer_state = inherited_freezer_state;
464 if prev_effective_freezer_state == FreezerState::Frozen {
465 return;
466 }
467
468 for thread_group in self.processes.iter() {
469 let Some(thread_group) = thread_group.upgrade() else {
470 continue;
471 };
472 self.freeze_thread_group(locked, &thread_group);
473 }
474
475 for child in self.children.get_children() {
477 child.state.lock().propagate_freeze(locked, FreezerState::Frozen);
478 }
479 }
480
481 fn propagate_thaw(&mut self, inherited_freezer_state: FreezerState) {
482 self.inherited_freezer_state = inherited_freezer_state;
483 if self.get_effective_freezer_state() == FreezerState::Thawed {
484 self.wait_queue.notify_all();
485 for child in self.children.get_children() {
486 child.state.lock().propagate_thaw(FreezerState::Thawed);
487 }
488 }
489 }
490
491 fn propagate_kill(&self) {
492 for thread_group in self.processes.iter() {
493 let Some(thread_group) = thread_group.upgrade() else {
494 continue;
495 };
496 thread_group.write().send_signal(SignalInfo::default(SIGKILL));
497 }
498
499 for child in self.children.get_children() {
501 child.state.lock().propagate_kill();
502 }
503 }
504}
505
506#[derive(Debug)]
508pub struct Cgroup {
509 root: Weak<CgroupRoot>,
510
511 id: u64,
513
514 name: FsString,
516
517 parent: Option<Weak<Cgroup>>,
520
521 state: Mutex<CgroupState>,
523
524 weak_self: Weak<Cgroup>,
525}
526pub type CgroupHandle = Arc<Cgroup>;
527
528pub fn path_from_root(weak_cgroup: Option<Weak<Cgroup>>) -> Result<FsString, Errno> {
530 let cgroup = match weak_cgroup {
531 Some(weak_cgroup) => Weak::upgrade(&weak_cgroup).ok_or_else(|| errno!(ENODEV))?,
532 None => return Ok("/".into()),
533 };
534 let mut path = PathBuilder::new();
535 let mut current = Some(cgroup);
536 while let Some(cgroup) = current {
537 path.prepend_element(cgroup.name());
538 current = cgroup.parent()?;
539 }
540 Ok(path.build_absolute())
541}
542
543impl Cgroup {
544 pub fn new(
545 id: u64,
546 name: &FsStr,
547 root: &Weak<CgroupRoot>,
548 parent: Option<Weak<Cgroup>>,
549 ) -> CgroupHandle {
550 Arc::new_cyclic(|weak| Self {
551 id,
552 root: root.clone(),
553 name: name.to_owned(),
554 parent,
555 state: Default::default(),
556 weak_self: weak.clone(),
557 })
558 }
559
560 pub fn name(&self) -> &FsStr {
561 self.name.as_ref()
562 }
563
564 fn root(&self) -> Result<Arc<CgroupRoot>, Errno> {
565 self.root.upgrade().ok_or_else(|| errno!(ENODEV))
566 }
567
568 fn parent(&self) -> Result<Option<CgroupHandle>, Errno> {
571 self.parent.as_ref().map(|weak| weak.upgrade().ok_or_else(|| errno!(ENODEV))).transpose()
572 }
573}
574
575impl CgroupOps for Cgroup {
576 fn id(&self) -> u64 {
577 self.id
578 }
579
580 fn add_process(
581 &self,
582 locked: &mut Locked<FileOpsCore>,
583 thread_group: &ThreadGroup,
584 ) -> Result<(), Errno> {
585 let root = self.root()?;
586 let mut pid_table = root.pid_table.lock();
587 match pid_table.entry(thread_group.into()) {
588 hash_map::Entry::Occupied(mut entry) => {
589 if std::ptr::eq(self, entry.get().as_ptr()) {
592 return Ok(());
593 }
594
595 track_stub!(TODO("https://fxbug.dev/383374687"), "check permissions");
597 if let Some(other_cgroup) = entry.get().upgrade() {
598 other_cgroup.state.lock().remove_process(locked, thread_group)?;
599 }
600
601 self.state.lock().add_process(locked, thread_group)?;
602 entry.insert(self.weak_self.clone());
603 }
604 hash_map::Entry::Vacant(entry) => {
605 self.state.lock().add_process(locked, thread_group)?;
606 entry.insert(self.weak_self.clone());
607 }
608 }
609
610 Ok(())
611 }
612
613 fn new_child(&self, name: &FsStr) -> Result<CgroupHandle, Errno> {
614 let id = self.root()?.get_next_id();
615 let new_child = Cgroup::new(id, name, &self.root, Some(self.weak_self.clone()));
616 let mut state = self.state.lock();
617 if state.deleted {
618 return error!(ENOENT);
619 }
620 new_child.state.lock().inherited_freezer_state = state.get_effective_freezer_state();
622 state.children.insert_child(name.into(), new_child)
623 }
624
625 fn get_child(&self, name: &FsStr) -> Result<CgroupHandle, Errno> {
626 let state = self.state.lock();
627 state.children.get_child(name).ok_or_else(|| errno!(ENOENT))
628 }
629
630 fn remove_child(&self, name: &FsStr) -> Result<CgroupHandle, Errno> {
631 let mut state = self.state.lock();
632 if state.deleted {
633 return error!(ENOENT);
634 }
635 state.children.remove_child(name)
636 }
637
638 fn get_children(&self) -> Result<Vec<CgroupHandle>, Errno> {
639 let state = self.state.lock();
640 if state.deleted {
641 return error!(ENOENT);
642 }
643 Ok(state.children.get_children())
644 }
645
646 fn get_pids(&self, _kernel: &Kernel) -> Vec<pid_t> {
647 let mut state = self.state.lock();
648 state.update_processes();
649 state.processes.iter().filter_map(|v| v.upgrade().map(|tg| tg.leader)).collect()
650 }
651
652 fn kill(&self) {
653 trace_duration!(CATEGORY_STARNIX, "CgroupKill");
654 let state = self.state.lock();
655 state.propagate_kill();
656 }
657
658 fn is_populated(&self) -> bool {
659 let mut state = self.state.lock();
660 if state.deleted {
661 return false;
662 }
663 state.update_processes();
664 if !state.processes.is_empty() {
665 return true;
666 }
667
668 state.children.get_children().into_iter().any(|child| child.is_populated())
669 }
670
671 fn get_freezer_state(&self) -> CgroupFreezerState {
672 let state = self.state.lock();
673 CgroupFreezerState {
674 self_freezer_state: state.self_freezer_state,
675 effective_freezer_state: state.get_effective_freezer_state(),
676 }
677 }
678
679 fn freeze(&self, locked: &mut Locked<FileOpsCore>) {
680 trace_duration!(CATEGORY_STARNIX, "CgroupFreeze");
681 let mut state = self.state.lock();
682 let inherited_freezer_state = state.inherited_freezer_state;
683 state.propagate_freeze(locked, inherited_freezer_state);
684 state.self_freezer_state = FreezerState::Frozen;
685 }
686
687 fn thaw(&self) {
688 trace_duration!(CATEGORY_STARNIX, "CgroupThaw");
689 let mut state = self.state.lock();
690 state.self_freezer_state = FreezerState::Thawed;
691 let inherited_freezer_state = state.inherited_freezer_state;
692 state.propagate_thaw(inherited_freezer_state);
693 }
694}
695
696#[cfg(test)]
697mod test {
698 use super::*;
699 use crate::testing::spawn_kernel_and_run;
700 use assert_matches::assert_matches;
701 use starnix_uapi::signals::SIGCHLD;
702 use starnix_uapi::{CLONE_SIGHAND, CLONE_THREAD, CLONE_VM};
703
704 #[::fuchsia::test]
705 async fn cgroup_path_from_root() {
706 spawn_kernel_and_run(async |_, _| {
707 let root = CgroupRoot::new();
708
709 let test_cgroup =
710 root.new_child("test".into()).expect("new_child on root cgroup succeeds");
711 let child_cgroup = test_cgroup
712 .new_child("child".into())
713 .expect("new_child on non-root cgroup succeeds");
714
715 assert_eq!(path_from_root(Some(Arc::downgrade(&test_cgroup))), Ok("/test".into()));
716 assert_eq!(
717 path_from_root(Some(Arc::downgrade(&child_cgroup))),
718 Ok("/test/child".into())
719 );
720 })
721 .await;
722 }
723
724 #[::fuchsia::test]
725 async fn cgroup_clone_task_in_frozen_cgroup() {
726 spawn_kernel_and_run(async |locked, current_task| {
727 let kernel = current_task.kernel();
728 let root = &kernel.cgroups.cgroup2;
729 let cgroup = root.new_child("test".into()).expect("new_child on root cgroup succeeds");
730
731 let process = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
732 cgroup
733 .add_process(locked.cast_locked(), process.thread_group())
734 .expect("add process to cgroup");
735 cgroup.freeze(locked.cast_locked());
736 assert_eq!(cgroup.get_pids(&kernel).first(), Some(process.get_pid()).as_ref());
737 assert_eq!(
738 root.get_cgroup(process.thread_group()).unwrap().as_ptr(),
739 Arc::as_ptr(&cgroup)
740 );
741
742 let thread = process.clone_task_for_test(
743 locked,
744 (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM) as u64,
745 Some(SIGCHLD),
746 );
747
748 let thread_state = thread.read();
749 let kernel_signals = thread_state.kernel_signals_for_test();
750 assert_matches!(kernel_signals.front(), Some(KernelSignal::Freeze(_)));
751 })
752 .await;
753 }
754}