starnix_core/task/
thread_group.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::device::terminal::{Terminal, TerminalController};
6use crate::mutable_state::{state_accessor, state_implementation};
7use crate::security;
8use crate::signals::syscalls::{WaitingOptions, read_siginfo};
9use crate::signals::{
10    DeliveryAction, QueuedSignals, SignalActions, SignalDetail, SignalInfo, action_for_signal,
11    send_standard_signal,
12};
13use crate::task::interval_timer::IntervalTimerHandle;
14use crate::task::memory_attribution::MemoryAttributionLifecycleEvent;
15use crate::task::{
16    AtomicStopState, ControllingTerminal, CurrentTask, ExitStatus, Kernel, PidTable, ProcessGroup,
17    PtraceAllowedPtracers, PtraceEvent, PtraceOptions, PtraceStatus, Session, StopState, Task,
18    TaskFlags, TaskMutableState, TaskPersistentInfo, TimerTable, TypedWaitQueue, ZombiePtracees,
19    ptrace_detach,
20};
21use itertools::Itertools;
22use macro_rules_attribute::apply;
23use starnix_lifecycle::{AtomicU64Counter, DropNotifier};
24use starnix_logging::{log_debug, log_error, log_warn, track_stub};
25use starnix_sync::{
26    LockBefore, Locked, Mutex, OrderedMutex, ProcessGroupState, RwLock, ThreadGroupLimits, Unlocked,
27};
28use starnix_task_command::TaskCommand;
29use starnix_types::ownership::{OwnedRef, Releasable, TempRef, WeakRef};
30use starnix_types::stats::TaskTimeStats;
31use starnix_types::time::{itimerspec_from_itimerval, timeval_from_duration};
32use starnix_uapi::arc_key::WeakKey;
33use starnix_uapi::auth::{CAP_SYS_ADMIN, CAP_SYS_RESOURCE, Credentials};
34use starnix_uapi::errors::Errno;
35use starnix_uapi::personality::PersonalityFlags;
36use starnix_uapi::resource_limits::{Resource, ResourceLimits};
37use starnix_uapi::signals::{
38    SIGCHLD, SIGCONT, SIGHUP, SIGKILL, SIGTERM, SIGTTOU, Signal, UncheckedSignal,
39};
40use starnix_uapi::user_address::UserAddress;
41use starnix_uapi::{
42    ITIMER_PROF, ITIMER_REAL, ITIMER_VIRTUAL, SI_TKILL, SI_USER, SIG_IGN, errno, error, itimerval,
43    pid_t, rlimit, tid_t, uid_t,
44};
45use std::collections::BTreeMap;
46use std::fmt;
47use std::sync::atomic::Ordering;
48use std::sync::{Arc, Weak};
49use zx::{AsHandleRef, Koid, Status};
50
51/// A weak reference to a thread group that can be used in set and maps.
52#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
53pub struct ThreadGroupKey {
54    pid: pid_t,
55    thread_group: WeakKey<ThreadGroup>,
56}
57
58impl ThreadGroupKey {
59    /// The pid of the thread group keyed by this object.
60    ///
61    /// As the key is weak (and pid are not unique due to pid namespaces), this should not be used
62    /// as an unique identifier of the thread group.
63    pub fn pid(&self) -> pid_t {
64        self.pid
65    }
66}
67
68impl std::ops::Deref for ThreadGroupKey {
69    type Target = Weak<ThreadGroup>;
70    fn deref(&self) -> &Self::Target {
71        &self.thread_group.0
72    }
73}
74
75impl From<&ThreadGroup> for ThreadGroupKey {
76    fn from(tg: &ThreadGroup) -> Self {
77        Self { pid: tg.leader, thread_group: WeakKey::from(&tg.weak_self.upgrade().unwrap()) }
78    }
79}
80
81impl<T: AsRef<ThreadGroup>> From<T> for ThreadGroupKey {
82    fn from(tg: T) -> Self {
83        tg.as_ref().into()
84    }
85}
86
87/// Values used for waiting on the [ThreadGroup] lifecycle wait queue.
88#[repr(u64)]
89pub enum ThreadGroupLifecycleWaitValue {
90    /// Wait for updates to the WaitResults of tasks in the group.
91    ChildStatus,
92    /// Wait for updates to `stopped`.
93    Stopped,
94}
95
96impl Into<u64> for ThreadGroupLifecycleWaitValue {
97    fn into(self) -> u64 {
98        self as u64
99    }
100}
101
102/// Child process that have exited, but the zombie ptrace needs to be consumed
103/// before they can be waited for.
104#[derive(Clone, Debug)]
105pub struct DeferredZombiePTracer {
106    /// Original tracer
107    pub tracer_thread_group_key: ThreadGroupKey,
108    /// Tracee tid
109    pub tracee_tid: tid_t,
110    /// Tracee pgid
111    pub tracee_pgid: pid_t,
112    /// Tracee thread group
113    pub tracee_thread_group_key: ThreadGroupKey,
114}
115
116impl DeferredZombiePTracer {
117    fn new(tracer: &ThreadGroup, tracee: &Task) -> Self {
118        Self {
119            tracer_thread_group_key: tracer.into(),
120            tracee_tid: tracee.tid,
121            tracee_pgid: tracee.thread_group().read().process_group.leader,
122            tracee_thread_group_key: tracee.thread_group_key.clone(),
123        }
124    }
125}
126
127/// The mutable state of the ThreadGroup.
128pub struct ThreadGroupMutableState {
129    /// The parent thread group.
130    ///
131    /// The value needs to be writable so that it can be re-parent to the correct subreaper if the
132    /// parent ends before the child.
133    pub parent: Option<ThreadGroupParent>,
134
135    /// The signal this process generates on exit.
136    pub exit_signal: Option<Signal>,
137
138    /// The tasks in the thread group.
139    ///
140    /// The references to Task is weak to prevent cycles as Task have a Arc reference to their
141    /// thread group.
142    /// It is still expected that these weak references are always valid, as tasks must unregister
143    /// themselves before they are deleted.
144    tasks: BTreeMap<tid_t, TaskContainer>,
145
146    /// The children of this thread group.
147    ///
148    /// The references to ThreadGroup is weak to prevent cycles as ThreadGroup have a Arc reference
149    /// to their parent.
150    /// It is still expected that these weak references are always valid, as thread groups must unregister
151    /// themselves before they are deleted.
152    pub children: BTreeMap<pid_t, Weak<ThreadGroup>>,
153
154    /// Child tasks that have exited, but not yet been waited for.
155    pub zombie_children: Vec<OwnedRef<ZombieProcess>>,
156
157    /// ptracees of this process that have exited, but not yet been waited for.
158    pub zombie_ptracees: ZombiePtracees,
159
160    /// Child processes that have exited, but the zombie ptrace needs to be consumed
161    /// before they can be waited for.
162    pub deferred_zombie_ptracers: Vec<DeferredZombiePTracer>,
163
164    /// Unified [WaitQueue] for all waited ThreadGroup events.
165    pub lifecycle_waiters: TypedWaitQueue<ThreadGroupLifecycleWaitValue>,
166
167    /// Whether this thread group will inherit from children of dying processes in its descendant
168    /// tree.
169    pub is_child_subreaper: bool,
170
171    /// The IDs used to perform shell job control.
172    pub process_group: Arc<ProcessGroup>,
173
174    pub did_exec: bool,
175
176    /// A signal that indicates whether the process is going to become waitable
177    /// via waitid and waitpid for either WSTOPPED or WCONTINUED, depending on
178    /// the value of `stopped`. If not None, contains the SignalInfo to return.
179    pub last_signal: Option<SignalInfo>,
180
181    /// Whether the thread_group is terminating or not, and if it is, the exit info of the thread
182    /// group.
183    run_state: ThreadGroupRunState,
184
185    /// Time statistics accumulated from the children.
186    pub children_time_stats: TaskTimeStats,
187
188    /// Personality flags set with `sys_personality()`.
189    pub personality: PersonalityFlags,
190
191    /// Thread groups allowed to trace tasks in this this thread group.
192    pub allowed_ptracers: PtraceAllowedPtracers,
193
194    /// Channel to message when this thread group exits.
195    exit_notifier: Option<futures::channel::oneshot::Sender<()>>,
196
197    /// Notifier for name changes.
198    pub notifier: Option<std::sync::mpsc::Sender<MemoryAttributionLifecycleEvent>>,
199}
200
201/// A collection of `Task` objects that roughly correspond to a "process".
202///
203/// Userspace programmers often think about "threads" and "process", but those concepts have no
204/// clear analogs inside the kernel because tasks are typically created using `clone(2)`, which
205/// takes a complex set of flags that describes how much state is shared between the original task
206/// and the new task.
207///
208/// If a new task is created with the `CLONE_THREAD` flag, the new task will be placed in the same
209/// `ThreadGroup` as the original task. Userspace typically uses this flag in conjunction with the
210/// `CLONE_FILES`, `CLONE_VM`, and `CLONE_FS`, which corresponds to the userspace notion of a
211/// "thread". For example, that's how `pthread_create` behaves. In that sense, a `ThreadGroup`
212/// normally corresponds to the set of "threads" in a "process". However, this pattern is purely a
213/// userspace convention, and nothing stops userspace from using `CLONE_THREAD` without
214/// `CLONE_FILES`, for example.
215///
216/// In Starnix, a `ThreadGroup` corresponds to a Zircon process, which means we do not support the
217/// `CLONE_THREAD` flag without the `CLONE_VM` flag. If we run into problems with this limitation,
218/// we might need to revise this correspondence.
219///
220/// Each `Task` in a `ThreadGroup` has the same thread group ID (`tgid`). The task with the same
221/// `pid` as the `tgid` is called the thread group leader.
222///
223/// Thread groups are destroyed when the last task in the group exits.
224pub struct ThreadGroup {
225    /// Weak reference to the `OwnedRef` of this `ThreadGroup`. This allows to retrieve the
226    /// `TempRef` from a raw `ThreadGroup`.
227    pub weak_self: Weak<ThreadGroup>,
228
229    /// The kernel to which this thread group belongs.
230    pub kernel: Arc<Kernel>,
231
232    /// A handle to the underlying Zircon process object.
233    ///
234    /// Currently, we have a 1-to-1 mapping between thread groups and zx::process
235    /// objects. This approach might break down if/when we implement CLONE_VM
236    /// without CLONE_THREAD because that creates a situation where two thread
237    /// groups share an address space. To implement that situation, we might
238    /// need to break the 1-to-1 mapping between thread groups and zx::process
239    /// or teach zx::process to share address spaces.
240    pub process: zx::Process,
241
242    /// The lead task of this thread group.
243    ///
244    /// The lead task is typically the initial thread created in the thread group.
245    pub leader: pid_t,
246
247    /// The signal actions that are registered for this process.
248    pub signal_actions: Arc<SignalActions>,
249
250    /// The timers for this thread group (from timer_create(), etc.).
251    pub timers: TimerTable,
252
253    /// A mechanism to be notified when this `ThreadGroup` is destroyed.
254    pub drop_notifier: DropNotifier,
255
256    /// Whether the process is currently stopped.
257    ///
258    /// Must only be set when the `mutable_state` write lock is held.
259    stop_state: AtomicStopState,
260
261    /// The mutable state of the ThreadGroup.
262    mutable_state: RwLock<ThreadGroupMutableState>,
263
264    /// The resource limits for this thread group.  This is outside mutable_state
265    /// to avoid deadlocks where the thread_group lock is held when acquiring
266    /// the task lock, and vice versa.
267    pub limits: OrderedMutex<ResourceLimits, ThreadGroupLimits>,
268
269    /// The next unique identifier for a seccomp filter.  These are required to be
270    /// able to distinguish identical seccomp filters, which are treated differently
271    /// for the purposes of SECCOMP_FILTER_FLAG_TSYNC.  Inherited across clone because
272    /// seccomp filters are also inherited across clone.
273    pub next_seccomp_filter_id: AtomicU64Counter,
274
275    /// Tasks ptraced by this process
276    pub ptracees: Mutex<BTreeMap<tid_t, TaskContainer>>,
277
278    /// The signals that are currently pending for this thread group.
279    pub pending_signals: Mutex<QueuedSignals>,
280
281    /// The monotonic time at which the thread group started.
282    pub start_time: zx::MonotonicInstant,
283}
284
285impl fmt::Debug for ThreadGroup {
286    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
287        write!(
288            f,
289            "{}({})",
290            self.process.get_name().unwrap_or(zx::Name::new_lossy("<unknown>")),
291            self.leader
292        )
293    }
294}
295
296impl PartialEq for ThreadGroup {
297    fn eq(&self, other: &Self) -> bool {
298        self.leader == other.leader
299    }
300}
301
302#[cfg(any(test, debug_assertions))]
303impl Drop for ThreadGroup {
304    fn drop(&mut self) {
305        let state = self.mutable_state.get_mut();
306        assert!(state.tasks.is_empty());
307        assert!(state.children.is_empty());
308        assert!(state.zombie_children.is_empty());
309        assert!(state.zombie_ptracees.is_empty());
310        assert!(
311            state
312                .parent
313                .as_ref()
314                .and_then(|p| p.0.upgrade().as_ref().map(|p| p
315                    .read()
316                    .children
317                    .get(&self.leader)
318                    .is_none()))
319                .unwrap_or(true)
320        );
321    }
322}
323
324/// A wrapper around a `Weak<ThreadGroup>` that expects the underlying `Weak` to always be
325/// valid. The wrapper will check this at runtime during creation and upgrade.
326pub struct ThreadGroupParent(Weak<ThreadGroup>);
327
328impl ThreadGroupParent {
329    pub fn new(t: Weak<ThreadGroup>) -> Self {
330        debug_assert!(t.upgrade().is_some());
331        Self(t)
332    }
333
334    pub fn upgrade(&self) -> Arc<ThreadGroup> {
335        self.0.upgrade().expect("ThreadGroupParent references must always be valid")
336    }
337}
338
339impl Clone for ThreadGroupParent {
340    fn clone(&self) -> Self {
341        Self(self.0.clone())
342    }
343}
344
345/// A selector that can match a process. Works as a representation of the pid argument to syscalls
346/// like wait and kill.
347#[derive(Debug, Clone)]
348pub enum ProcessSelector {
349    /// Matches any process at all.
350    Any,
351    /// Matches only the process with the specified pid
352    Pid(pid_t),
353    /// Matches all the processes in the given process group
354    Pgid(pid_t),
355    /// Match the thread group with the given key
356    Process(ThreadGroupKey),
357}
358
359impl ProcessSelector {
360    pub fn match_tid(&self, tid: tid_t, pid_table: &PidTable) -> bool {
361        match *self {
362            ProcessSelector::Pid(p) => {
363                if p == tid {
364                    true
365                } else {
366                    if let Some(task_ref) = pid_table.get_task(tid).upgrade() {
367                        task_ref.get_pid() == p
368                    } else {
369                        false
370                    }
371                }
372            }
373            ProcessSelector::Any => true,
374            ProcessSelector::Pgid(pgid) => {
375                if let Some(task_ref) = pid_table.get_task(tid).upgrade() {
376                    pid_table.get_process_group(pgid).as_ref()
377                        == Some(&task_ref.thread_group().read().process_group)
378                } else {
379                    false
380                }
381            }
382            ProcessSelector::Process(ref key) => {
383                if let Some(tg) = key.upgrade() {
384                    tg.read().tasks.contains_key(&tid)
385                } else {
386                    false
387                }
388            }
389        }
390    }
391}
392
393#[derive(Clone, Debug, PartialEq, Eq)]
394pub struct ProcessExitInfo {
395    pub status: ExitStatus,
396    pub exit_signal: Option<Signal>,
397}
398
399#[derive(Clone, Debug, Default, PartialEq, Eq)]
400enum ThreadGroupRunState {
401    #[default]
402    Running,
403    Terminating(ExitStatus),
404}
405
406#[derive(Clone, Debug, PartialEq, Eq)]
407pub struct WaitResult {
408    pub pid: pid_t,
409    pub uid: uid_t,
410
411    pub exit_info: ProcessExitInfo,
412
413    /// Cumulative time stats for the process and its children.
414    pub time_stats: TaskTimeStats,
415}
416
417impl WaitResult {
418    // According to wait(2) man page, SignalInfo.signal needs to always be set to SIGCHLD
419    pub fn as_signal_info(&self) -> SignalInfo {
420        SignalInfo::new(
421            SIGCHLD,
422            self.exit_info.status.signal_info_code(),
423            SignalDetail::SIGCHLD {
424                pid: self.pid,
425                uid: self.uid,
426                status: self.exit_info.status.signal_info_status(),
427            },
428        )
429    }
430}
431
432#[derive(Debug)]
433pub struct ZombieProcess {
434    pub thread_group_key: ThreadGroupKey,
435    pub pgid: pid_t,
436    pub uid: uid_t,
437
438    pub exit_info: ProcessExitInfo,
439
440    /// Cumulative time stats for the process and its children.
441    pub time_stats: TaskTimeStats,
442
443    /// Whether dropping this ZombieProcess should imply removing the pid from
444    /// the PidTable
445    pub is_canonical: bool,
446}
447
448impl PartialEq for ZombieProcess {
449    fn eq(&self, other: &Self) -> bool {
450        // We assume only one set of ZombieProcess data per process, so this should cover it.
451        self.thread_group_key == other.thread_group_key
452            && self.pgid == other.pgid
453            && self.uid == other.uid
454            && self.is_canonical == other.is_canonical
455    }
456}
457
458impl Eq for ZombieProcess {}
459
460impl PartialOrd for ZombieProcess {
461    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
462        Some(self.cmp(other))
463    }
464}
465
466impl Ord for ZombieProcess {
467    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
468        self.thread_group_key.cmp(&other.thread_group_key)
469    }
470}
471
472impl ZombieProcess {
473    pub fn new(
474        thread_group: ThreadGroupStateRef<'_>,
475        credentials: &Credentials,
476        exit_info: ProcessExitInfo,
477    ) -> OwnedRef<Self> {
478        let time_stats = thread_group.base.time_stats() + thread_group.children_time_stats;
479        OwnedRef::new(ZombieProcess {
480            thread_group_key: thread_group.base.into(),
481            pgid: thread_group.process_group.leader,
482            uid: credentials.uid,
483            exit_info,
484            time_stats,
485            is_canonical: true,
486        })
487    }
488
489    pub fn pid(&self) -> pid_t {
490        self.thread_group_key.pid()
491    }
492
493    pub fn to_wait_result(&self) -> WaitResult {
494        WaitResult {
495            pid: self.pid(),
496            uid: self.uid,
497            exit_info: self.exit_info.clone(),
498            time_stats: self.time_stats,
499        }
500    }
501
502    pub fn as_artificial(&self) -> Self {
503        ZombieProcess {
504            thread_group_key: self.thread_group_key.clone(),
505            pgid: self.pgid,
506            uid: self.uid,
507            exit_info: self.exit_info.clone(),
508            time_stats: self.time_stats,
509            is_canonical: false,
510        }
511    }
512
513    pub fn matches_selector(&self, selector: &ProcessSelector) -> bool {
514        match *selector {
515            ProcessSelector::Any => true,
516            ProcessSelector::Pid(pid) => self.pid() == pid,
517            ProcessSelector::Pgid(pgid) => self.pgid == pgid,
518            ProcessSelector::Process(ref key) => self.thread_group_key == *key,
519        }
520    }
521
522    pub fn matches_selector_and_waiting_option(
523        &self,
524        selector: &ProcessSelector,
525        options: &WaitingOptions,
526    ) -> bool {
527        if !self.matches_selector(selector) {
528            return false;
529        }
530
531        if options.wait_for_all {
532            true
533        } else {
534            // A "clone" zombie is one which has delivered no signal, or a
535            // signal other than SIGCHLD to its parent upon termination.
536            options.wait_for_clone == (self.exit_info.exit_signal != Some(SIGCHLD))
537        }
538    }
539}
540
541impl Releasable for ZombieProcess {
542    type Context<'a> = &'a mut PidTable;
543
544    fn release<'a>(self, pids: &'a mut PidTable) {
545        if self.is_canonical {
546            pids.remove_zombie(self.pid());
547        }
548    }
549}
550
551impl ThreadGroup {
552    pub fn new<L>(
553        locked: &mut Locked<L>,
554        kernel: Arc<Kernel>,
555        process: zx::Process,
556        parent: Option<ThreadGroupWriteGuard<'_>>,
557        leader: pid_t,
558        exit_signal: Option<Signal>,
559        process_group: Arc<ProcessGroup>,
560        signal_actions: Arc<SignalActions>,
561    ) -> Arc<ThreadGroup>
562    where
563        L: LockBefore<ProcessGroupState>,
564    {
565        Arc::new_cyclic(|weak_self| {
566            let mut thread_group = ThreadGroup {
567                weak_self: weak_self.clone(),
568                kernel,
569                process,
570                leader,
571                signal_actions,
572                timers: Default::default(),
573                drop_notifier: Default::default(),
574                // A child process created via fork(2) inherits its parent's
575                // resource limits.  Resource limits are preserved across execve(2).
576                limits: OrderedMutex::new(
577                    parent
578                        .as_ref()
579                        .map(|p| p.base.limits.lock(locked.cast_locked()).clone())
580                        .unwrap_or(Default::default()),
581                ),
582                next_seccomp_filter_id: Default::default(),
583                ptracees: Default::default(),
584                stop_state: AtomicStopState::new(StopState::Awake),
585                pending_signals: Default::default(),
586                start_time: zx::MonotonicInstant::get(),
587                mutable_state: RwLock::new(ThreadGroupMutableState {
588                    parent: parent
589                        .as_ref()
590                        .map(|p| ThreadGroupParent::new(p.base.weak_self.clone())),
591                    exit_signal,
592                    tasks: BTreeMap::new(),
593                    children: BTreeMap::new(),
594                    zombie_children: vec![],
595                    zombie_ptracees: ZombiePtracees::new(),
596                    deferred_zombie_ptracers: vec![],
597                    lifecycle_waiters: TypedWaitQueue::<ThreadGroupLifecycleWaitValue>::default(),
598                    is_child_subreaper: false,
599                    process_group: Arc::clone(&process_group),
600                    did_exec: false,
601                    last_signal: None,
602                    run_state: Default::default(),
603                    children_time_stats: Default::default(),
604                    personality: parent
605                        .as_ref()
606                        .map(|p| p.personality)
607                        .unwrap_or(Default::default()),
608                    allowed_ptracers: PtraceAllowedPtracers::None,
609                    exit_notifier: None,
610                    notifier: None,
611                }),
612            };
613
614            if let Some(mut parent) = parent {
615                thread_group.next_seccomp_filter_id.reset(parent.base.next_seccomp_filter_id.get());
616                parent.children.insert(leader, weak_self.clone());
617                process_group.insert(locked, &thread_group);
618            };
619            thread_group
620        })
621    }
622
623    state_accessor!(ThreadGroup, mutable_state);
624
625    pub fn load_stopped(&self) -> StopState {
626        self.stop_state.load(Ordering::Relaxed)
627    }
628
629    // Causes the thread group to exit.  If this is being called from a task
630    // that is part of the current thread group, the caller should pass
631    // `current_task`.  If ownership issues prevent passing `current_task`, then
632    // callers should use CurrentTask::thread_group_exit instead.
633    pub fn exit(
634        &self,
635        locked: &mut Locked<Unlocked>,
636        exit_status: ExitStatus,
637        mut current_task: Option<&mut CurrentTask>,
638    ) {
639        if let Some(ref mut current_task) = current_task {
640            current_task.ptrace_event(
641                locked,
642                PtraceOptions::TRACEEXIT,
643                exit_status.signal_info_status() as u64,
644            );
645        }
646        let mut pids = self.kernel.pids.write();
647        let mut state = self.write();
648        if state.is_terminating() {
649            // The thread group is already terminating and all threads in the thread group have
650            // already been interrupted.
651            return;
652        }
653
654        state.run_state = ThreadGroupRunState::Terminating(exit_status.clone());
655
656        // Drop ptrace zombies
657        state.zombie_ptracees.release(&mut pids);
658
659        // Interrupt each task. Unlock the group because send_signal will lock the group in order
660        // to call set_stopped.
661        // SAFETY: tasks is kept on the stack. The static is required to ensure the lock on
662        // ThreadGroup can be dropped.
663        let tasks = state.tasks().map(TempRef::into_static).collect::<Vec<_>>();
664        drop(state);
665
666        // Detach from any ptraced tasks, killing the ones that set PTRACE_O_EXITKILL.
667        let tracees = self.ptracees.lock().keys().cloned().collect::<Vec<_>>();
668        for tracee in tracees {
669            if let Some(task_ref) = pids.get_task(tracee).clone().upgrade() {
670                let mut should_send_sigkill = false;
671                if let Some(ptrace) = &task_ref.read().ptrace {
672                    should_send_sigkill = ptrace.has_option(PtraceOptions::EXITKILL);
673                }
674                if should_send_sigkill {
675                    send_standard_signal(locked, task_ref.as_ref(), SignalInfo::default(SIGKILL));
676                    continue;
677                }
678
679                let _ =
680                    ptrace_detach(locked, &mut pids, self, task_ref.as_ref(), &UserAddress::NULL);
681            }
682        }
683
684        for task in tasks {
685            task.write().set_exit_status(exit_status.clone());
686            send_standard_signal(locked, &task, SignalInfo::default(SIGKILL));
687        }
688    }
689
690    pub fn add(&self, task: &TempRef<'_, Task>) -> Result<(), Errno> {
691        let mut state = self.write();
692        if state.is_terminating() {
693            if state.tasks_count() == 0 {
694                log_warn!(
695                    "Task {} with leader {} terminating while adding its first task, \
696                not sending creation notification",
697                    task.tid,
698                    self.leader
699                );
700            }
701            return error!(EINVAL);
702        }
703        state.tasks.insert(task.tid, task.into());
704
705        Ok(())
706    }
707
708    /// Remove the task from the children of this ThreadGroup.
709    ///
710    /// It is important that the task is taken as an `OwnedRef`. It ensures the tasks of the
711    /// ThreadGroup are always valid as they are still valid when removed.
712    pub fn remove<L>(&self, locked: &mut Locked<L>, pids: &mut PidTable, task: &OwnedRef<Task>)
713    where
714        L: LockBefore<ProcessGroupState>,
715    {
716        task.set_ptrace_zombie(pids);
717        pids.remove_task(task.tid);
718
719        let mut state = self.write();
720
721        let persistent_info: TaskPersistentInfo =
722            if let Some(container) = state.tasks.remove(&task.tid) {
723                container.into()
724            } else {
725                // The task has never been added. The only expected case is that this thread was
726                // already terminating.
727                debug_assert!(state.is_terminating());
728                return;
729            };
730
731        if state.tasks.is_empty() {
732            let exit_status =
733                if let ThreadGroupRunState::Terminating(exit_status) = &state.run_state {
734                    exit_status.clone()
735                } else {
736                    let exit_status = task.exit_status().unwrap_or_else(|| {
737                        log_error!("Exiting without an exit code.");
738                        ExitStatus::Exit(u8::MAX)
739                    });
740                    state.run_state = ThreadGroupRunState::Terminating(exit_status.clone());
741                    exit_status
742                };
743
744            // Replace PID table entry with a zombie.
745            let exit_info =
746                ProcessExitInfo { status: exit_status, exit_signal: state.exit_signal.clone() };
747            let zombie =
748                ZombieProcess::new(state.as_ref(), &persistent_info.real_creds(), exit_info);
749            pids.kill_process(self.leader, OwnedRef::downgrade(&zombie));
750
751            state.leave_process_group(locked, pids);
752
753            // I have no idea if dropping the lock here is correct, and I don't want to think about
754            // it. If problems do turn up with another thread observing an intermediate state of
755            // this exit operation, the solution is to unify locks. It should be sensible and
756            // possible for there to be a single lock that protects all (or nearly all) of the
757            // data accessed by both exit and wait. In gvisor and linux this is the lock on the
758            // equivalent of the PidTable. This is made more difficult by rust locks being
759            // containers that only lock the data they contain, but see
760            // https://docs.google.com/document/d/1YHrhBqNhU1WcrsYgGAu3JwwlVmFXPlwWHTJLAbwRebY/edit
761            // for an idea.
762            std::mem::drop(state);
763
764            // We will need the immediate parent and the reaper. Once we have them, we can make
765            // sure to take the locks in the right order: parent before child.
766            let parent = self.read().parent.clone();
767            let reaper = self.find_reaper();
768
769            {
770                // Reparent the children.
771                if let Some(reaper) = reaper {
772                    let reaper = reaper.upgrade();
773                    {
774                        let mut reaper_state = reaper.write();
775                        let mut state = self.write();
776                        for (_pid, weak_child) in std::mem::take(&mut state.children) {
777                            if let Some(child) = weak_child.upgrade() {
778                                let mut child_state = child.write();
779
780                                child_state.exit_signal = Some(SIGCHLD);
781                                child_state.parent =
782                                    Some(ThreadGroupParent::new(Arc::downgrade(&reaper)));
783                                reaper_state.children.insert(child.leader, weak_child.clone());
784                            }
785                        }
786                        reaper_state.zombie_children.append(&mut state.zombie_children);
787                    }
788                    ZombiePtracees::reparent(self, &reaper);
789                } else {
790                    // If we don't have a reaper then just drop the zombies.
791                    let mut state = self.write();
792                    for zombie in state.zombie_children.drain(..) {
793                        zombie.release(pids);
794                    }
795                    state.zombie_ptracees.release(pids);
796                }
797            }
798
799            #[cfg(any(test, debug_assertions))]
800            {
801                let state = self.read();
802                assert!(state.zombie_children.is_empty());
803                assert!(state.zombie_ptracees.is_empty());
804            }
805
806            if let Some(ref parent) = parent {
807                let parent = parent.upgrade();
808                let mut tracer_pid = None;
809                if let Some(ptrace) = &task.read().ptrace {
810                    tracer_pid = Some(ptrace.get_pid());
811                }
812
813                let maybe_zombie = 'compute_zombie: {
814                    if let Some(tracer_pid) = tracer_pid {
815                        if let Some(ref tracer) = pids.get_task(tracer_pid).upgrade() {
816                            break 'compute_zombie tracer
817                                .thread_group()
818                                .maybe_notify_tracer(task, pids, &parent, zombie);
819                        }
820                    }
821                    Some(zombie)
822                };
823                if let Some(zombie) = maybe_zombie {
824                    parent.do_zombie_notifications(zombie);
825                }
826            } else {
827                zombie.release(pids);
828            }
829
830            // TODO: Set the error_code on the Zircon process object. Currently missing a way
831            // to do this in Zircon. Might be easier in the new execution model.
832
833            // Once the last zircon thread stops, the zircon process will also stop executing.
834
835            if let Some(parent) = parent {
836                let parent = parent.upgrade();
837                parent.check_orphans(locked, pids);
838            }
839        }
840    }
841
842    pub fn do_zombie_notifications(&self, zombie: OwnedRef<ZombieProcess>) {
843        let mut state = self.write();
844
845        state.children.remove(&zombie.pid());
846        state
847            .deferred_zombie_ptracers
848            .retain(|dzp| dzp.tracee_thread_group_key != zombie.thread_group_key);
849
850        let exit_signal = zombie.exit_info.exit_signal;
851        let mut signal_info = zombie.to_wait_result().as_signal_info();
852
853        state.zombie_children.push(zombie);
854        state.lifecycle_waiters.notify_value(ThreadGroupLifecycleWaitValue::ChildStatus);
855
856        // Send signals
857        if let Some(exit_signal) = exit_signal {
858            signal_info.signal = exit_signal;
859            state.send_signal(signal_info);
860        }
861    }
862
863    /// Notifies the tracer if appropriate.  Returns Some(zombie) if caller
864    /// needs to notify the parent, None otherwise.  The caller should probably
865    /// invoke parent.do_zombie_notifications(zombie) on the result.
866    fn maybe_notify_tracer(
867        &self,
868        tracee: &Task,
869        mut pids: &mut PidTable,
870        parent: &ThreadGroup,
871        zombie: OwnedRef<ZombieProcess>,
872    ) -> Option<OwnedRef<ZombieProcess>> {
873        if self.read().zombie_ptracees.has_tracee(tracee.tid) {
874            if self == parent {
875                // The tracer is the parent and has not consumed the
876                // notification.  Don't bother with the ptracee stuff, and just
877                // notify the parent.
878                self.write().zombie_ptracees.remove(pids, tracee.tid);
879                return Some(zombie);
880            } else {
881                // The tracer is not the parent and the tracer has not consumed
882                // the notification.
883                {
884                    // Tell the parent to expect a notification later.
885                    let mut parent_state = parent.write();
886                    parent_state
887                        .deferred_zombie_ptracers
888                        .push(DeferredZombiePTracer::new(self, tracee));
889                    parent_state.children.remove(&tracee.get_pid());
890                }
891                // Tell the tracer that there is a notification pending.
892                let mut state = self.write();
893                state.zombie_ptracees.set_parent_of(tracee.tid, Some(zombie), parent);
894                tracee.write().notify_ptracers();
895                return None;
896            }
897        } else if self == parent {
898            // The tracer is the parent and has already consumed the parent
899            // notification.  No further action required.
900            parent.write().children.remove(&tracee.tid);
901            zombie.release(&mut pids);
902            return None;
903        }
904        // The tracer is not the parent and has already consumed the parent
905        // notification.  Notify the parent.
906        Some(zombie)
907    }
908
909    /// Find the task which will adopt our children after we die.
910    fn find_reaper(&self) -> Option<ThreadGroupParent> {
911        let mut weak_parent = self.read().parent.clone()?;
912        loop {
913            weak_parent = {
914                let parent = weak_parent.upgrade();
915                let parent_state = parent.read();
916                if parent_state.is_child_subreaper {
917                    break;
918                }
919                match parent_state.parent {
920                    Some(ref next_parent) => next_parent.clone(),
921                    None => break,
922                }
923            };
924        }
925        Some(weak_parent)
926    }
927
928    pub fn setsid<L>(&self, locked: &mut Locked<L>) -> Result<(), Errno>
929    where
930        L: LockBefore<ProcessGroupState>,
931    {
932        let pids = self.kernel.pids.read();
933        if pids.get_process_group(self.leader).is_some() {
934            return error!(EPERM);
935        }
936        let process_group = ProcessGroup::new(self.leader, None);
937        pids.add_process_group(process_group.clone());
938        self.write().set_process_group(locked, process_group, &pids);
939        self.check_orphans(locked, &pids);
940
941        Ok(())
942    }
943
944    pub fn setpgid<L>(
945        &self,
946        locked: &mut Locked<L>,
947        current_task: &CurrentTask,
948        target: &Task,
949        pgid: pid_t,
950    ) -> Result<(), Errno>
951    where
952        L: LockBefore<ProcessGroupState>,
953    {
954        let pids = self.kernel.pids.read();
955
956        {
957            let current_process_group = Arc::clone(&self.read().process_group);
958
959            // The target process must be either the current process of a child of the current process
960            let mut target_thread_group = target.thread_group().write();
961            let is_target_current_process_child =
962                target_thread_group.parent.as_ref().map(|tg| tg.upgrade().leader)
963                    == Some(self.leader);
964            if target_thread_group.leader() != self.leader && !is_target_current_process_child {
965                return error!(ESRCH);
966            }
967
968            // If the target process is a child of the current task, it must not have executed one of the exec
969            // function.
970            if is_target_current_process_child && target_thread_group.did_exec {
971                return error!(EACCES);
972            }
973
974            let new_process_group;
975            {
976                let target_process_group = &target_thread_group.process_group;
977
978                // The target process must not be a session leader and must be in the same session as the current process.
979                if target_thread_group.leader() == target_process_group.session.leader
980                    || current_process_group.session != target_process_group.session
981                {
982                    return error!(EPERM);
983                }
984
985                let target_pgid = if pgid == 0 { target_thread_group.leader() } else { pgid };
986                if target_pgid < 0 {
987                    return error!(EINVAL);
988                }
989
990                if target_pgid == target_process_group.leader {
991                    return Ok(());
992                }
993
994                // If pgid is not equal to the target process id, the associated process group must exist
995                // and be in the same session as the target process.
996                if target_pgid != target_thread_group.leader() {
997                    new_process_group =
998                        pids.get_process_group(target_pgid).ok_or_else(|| errno!(EPERM))?;
999                    if new_process_group.session != target_process_group.session {
1000                        return error!(EPERM);
1001                    }
1002                    security::check_setpgid_access(current_task, target)?;
1003                } else {
1004                    security::check_setpgid_access(current_task, target)?;
1005                    // Create a new process group
1006                    new_process_group =
1007                        ProcessGroup::new(target_pgid, Some(target_process_group.session.clone()));
1008                    pids.add_process_group(new_process_group.clone());
1009                }
1010            }
1011
1012            target_thread_group.set_process_group(locked, new_process_group, &pids);
1013        }
1014
1015        target.thread_group().check_orphans(locked, &pids);
1016
1017        Ok(())
1018    }
1019
1020    fn itimer_real(&self) -> IntervalTimerHandle {
1021        self.timers.itimer_real()
1022    }
1023
1024    pub fn set_itimer(
1025        &self,
1026        current_task: &CurrentTask,
1027        which: u32,
1028        value: itimerval,
1029    ) -> Result<itimerval, Errno> {
1030        if which == ITIMER_PROF || which == ITIMER_VIRTUAL {
1031            // We don't support setting these timers.
1032            // The gvisor test suite clears ITIMER_PROF as part of its test setup logic, so we support
1033            // clearing these values.
1034            if value.it_value.tv_sec == 0 && value.it_value.tv_usec == 0 {
1035                return Ok(itimerval::default());
1036            }
1037            track_stub!(TODO("https://fxbug.dev/322874521"), "Unsupported itimer type", which);
1038            return error!(ENOTSUP);
1039        }
1040
1041        if which != ITIMER_REAL {
1042            return error!(EINVAL);
1043        }
1044        let itimer_real = self.itimer_real();
1045        let prev_remaining = itimer_real.time_remaining();
1046        if value.it_value.tv_sec != 0 || value.it_value.tv_usec != 0 {
1047            itimer_real.arm(current_task, itimerspec_from_itimerval(value), false)?;
1048        } else {
1049            itimer_real.disarm(current_task)?;
1050        }
1051        Ok(itimerval {
1052            it_value: timeval_from_duration(prev_remaining.remainder),
1053            it_interval: timeval_from_duration(prev_remaining.interval),
1054        })
1055    }
1056
1057    pub fn get_itimer(&self, which: u32) -> Result<itimerval, Errno> {
1058        if which == ITIMER_PROF || which == ITIMER_VIRTUAL {
1059            // We don't support setting these timers, so we can accurately report that these are not set.
1060            return Ok(itimerval::default());
1061        }
1062        if which != ITIMER_REAL {
1063            return error!(EINVAL);
1064        }
1065        let remaining = self.itimer_real().time_remaining();
1066        Ok(itimerval {
1067            it_value: timeval_from_duration(remaining.remainder),
1068            it_interval: timeval_from_duration(remaining.interval),
1069        })
1070    }
1071
1072    /// Check whether the stop state is compatible with `new_stopped`. If it is return it,
1073    /// otherwise, return None.
1074    fn check_stopped_state(
1075        &self,
1076        new_stopped: StopState,
1077        finalize_only: bool,
1078    ) -> Option<StopState> {
1079        let stopped = self.load_stopped();
1080        if finalize_only && !stopped.is_stopping_or_stopped() {
1081            return Some(stopped);
1082        }
1083
1084        if stopped.is_illegal_transition(new_stopped) {
1085            return Some(stopped);
1086        }
1087
1088        return None;
1089    }
1090
1091    /// Set the stop status of the process.  If you pass |siginfo| of |None|,
1092    /// does not update the signal.  If |finalize_only| is set, will check that
1093    /// the set will be a finalize (Stopping -> Stopped or Stopped -> Stopped)
1094    /// before executing it.
1095    ///
1096    /// Returns the latest stop state after any changes.
1097    pub fn set_stopped(
1098        &self,
1099        new_stopped: StopState,
1100        siginfo: Option<SignalInfo>,
1101        finalize_only: bool,
1102    ) -> StopState {
1103        // Perform an early return check to see if we can avoid taking the lock.
1104        if let Some(stopped) = self.check_stopped_state(new_stopped, finalize_only) {
1105            return stopped;
1106        }
1107
1108        self.write().set_stopped(new_stopped, siginfo, finalize_only)
1109    }
1110
1111    /// Ensures |session| is the controlling session inside of |terminal_controller|, and returns a
1112    /// reference to the |TerminalController|.
1113    fn check_terminal_controller(
1114        session: &Arc<Session>,
1115        terminal_controller: &Option<TerminalController>,
1116    ) -> Result<(), Errno> {
1117        if let Some(terminal_controller) = terminal_controller {
1118            if let Some(terminal_session) = terminal_controller.session.upgrade() {
1119                if Arc::ptr_eq(session, &terminal_session) {
1120                    return Ok(());
1121                }
1122            }
1123        }
1124        error!(ENOTTY)
1125    }
1126
1127    pub fn get_foreground_process_group(&self, terminal: &Terminal) -> Result<pid_t, Errno> {
1128        let state = self.read();
1129        let process_group = &state.process_group;
1130        let terminal_state = terminal.read();
1131
1132        // "When fd does not refer to the controlling terminal of the calling
1133        // process, -1 is returned" - tcgetpgrp(3)
1134        Self::check_terminal_controller(&process_group.session, &terminal_state.controller)?;
1135        let pid = process_group.session.read().get_foreground_process_group_leader();
1136        Ok(pid)
1137    }
1138
1139    pub fn set_foreground_process_group<L>(
1140        &self,
1141        locked: &mut Locked<L>,
1142        current_task: &CurrentTask,
1143        terminal: &Terminal,
1144        pgid: pid_t,
1145    ) -> Result<(), Errno>
1146    where
1147        L: LockBefore<ProcessGroupState>,
1148    {
1149        let process_group;
1150        let send_ttou;
1151        {
1152            // Keep locks to ensure atomicity.
1153            let pids = self.kernel.pids.read();
1154            let state = self.read();
1155            process_group = Arc::clone(&state.process_group);
1156            let terminal_state = terminal.read();
1157            Self::check_terminal_controller(&process_group.session, &terminal_state.controller)?;
1158
1159            // pgid must be positive.
1160            if pgid < 0 {
1161                return error!(EINVAL);
1162            }
1163
1164            let new_process_group = pids.get_process_group(pgid).ok_or_else(|| errno!(ESRCH))?;
1165            if new_process_group.session != process_group.session {
1166                return error!(EPERM);
1167            }
1168
1169            let mut session_state = process_group.session.write();
1170            // If the calling process is a member of a background group and not ignoring SIGTTOU, a
1171            // SIGTTOU signal is sent to all members of this background process group.
1172            send_ttou = process_group.leader != session_state.get_foreground_process_group_leader()
1173                && !current_task.read().signal_mask().has_signal(SIGTTOU)
1174                && self.signal_actions.get(SIGTTOU).sa_handler != SIG_IGN;
1175
1176            if !send_ttou {
1177                session_state.set_foreground_process_group(&new_process_group);
1178            }
1179        }
1180
1181        // Locks must not be held when sending signals.
1182        if send_ttou {
1183            process_group.send_signals(locked, &[SIGTTOU]);
1184            return error!(EINTR);
1185        }
1186
1187        Ok(())
1188    }
1189
1190    pub fn set_controlling_terminal(
1191        &self,
1192        current_task: &CurrentTask,
1193        terminal: &Terminal,
1194        is_main: bool,
1195        steal: bool,
1196        is_readable: bool,
1197    ) -> Result<(), Errno> {
1198        // Keep locks to ensure atomicity.
1199        let state = self.read();
1200        let process_group = &state.process_group;
1201        let mut terminal_state = terminal.write();
1202        let mut session_writer = process_group.session.write();
1203
1204        // "The calling process must be a session leader and not have a
1205        // controlling terminal already." - tty_ioctl(4)
1206        if process_group.session.leader != self.leader
1207            || session_writer.controlling_terminal.is_some()
1208        {
1209            return error!(EINVAL);
1210        }
1211
1212        let mut has_admin_capability_determined = false;
1213
1214        // "If this terminal is already the controlling terminal of a different
1215        // session group, then the ioctl fails with EPERM, unless the caller
1216        // has the CAP_SYS_ADMIN capability and arg equals 1, in which case the
1217        // terminal is stolen, and all processes that had it as controlling
1218        // terminal lose it." - tty_ioctl(4)
1219        if let Some(other_session) =
1220            terminal_state.controller.as_ref().and_then(|cs| cs.session.upgrade())
1221        {
1222            if other_session != process_group.session {
1223                if !steal {
1224                    return error!(EPERM);
1225                }
1226                security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1227                has_admin_capability_determined = true;
1228
1229                // Steal the TTY away. Unlike TIOCNOTTY, don't send signals.
1230                other_session.write().controlling_terminal = None;
1231            }
1232        }
1233
1234        if !is_readable && !has_admin_capability_determined {
1235            security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1236        }
1237
1238        session_writer.controlling_terminal = Some(ControllingTerminal::new(terminal, is_main));
1239        terminal_state.controller = TerminalController::new(&process_group.session);
1240        Ok(())
1241    }
1242
1243    pub fn release_controlling_terminal<L>(
1244        &self,
1245        locked: &mut Locked<L>,
1246        _current_task: &CurrentTask,
1247        terminal: &Terminal,
1248        is_main: bool,
1249    ) -> Result<(), Errno>
1250    where
1251        L: LockBefore<ProcessGroupState>,
1252    {
1253        let process_group;
1254        {
1255            // Keep locks to ensure atomicity.
1256            let state = self.read();
1257            process_group = Arc::clone(&state.process_group);
1258            let mut terminal_state = terminal.write();
1259            let mut session_writer = process_group.session.write();
1260
1261            // tty must be the controlling terminal.
1262            Self::check_terminal_controller(&process_group.session, &terminal_state.controller)?;
1263            if !session_writer
1264                .controlling_terminal
1265                .as_ref()
1266                .map_or(false, |ct| ct.matches(terminal, is_main))
1267            {
1268                return error!(ENOTTY);
1269            }
1270
1271            // "If the process was session leader, then send SIGHUP and SIGCONT to the foreground
1272            // process group and all processes in the current session lose their controlling terminal."
1273            // - tty_ioctl(4)
1274
1275            // Remove tty as the controlling tty for each process in the session, then
1276            // send them SIGHUP and SIGCONT.
1277
1278            session_writer.controlling_terminal = None;
1279            terminal_state.controller = None;
1280        }
1281
1282        if process_group.session.leader == self.leader {
1283            process_group.send_signals(locked, &[SIGHUP, SIGCONT]);
1284        }
1285
1286        Ok(())
1287    }
1288
1289    fn check_orphans<L>(&self, locked: &mut Locked<L>, pids: &PidTable)
1290    where
1291        L: LockBefore<ProcessGroupState>,
1292    {
1293        let mut thread_groups = self.read().children().collect::<Vec<_>>();
1294        let this = self.weak_self.upgrade().unwrap();
1295        thread_groups.push(this);
1296        let process_groups =
1297            thread_groups.iter().map(|tg| Arc::clone(&tg.read().process_group)).unique();
1298        for pg in process_groups {
1299            pg.check_orphaned(locked, pids);
1300        }
1301    }
1302
1303    pub fn get_rlimit<L>(&self, locked: &mut Locked<L>, resource: Resource) -> u64
1304    where
1305        L: LockBefore<ThreadGroupLimits>,
1306    {
1307        self.limits.lock(locked).get(resource).rlim_cur
1308    }
1309
1310    /// Adjusts the rlimits of the ThreadGroup to which `target_task` belongs to.
1311    pub fn adjust_rlimits<L>(
1312        locked: &mut Locked<L>,
1313        current_task: &CurrentTask,
1314        target_task: &Task,
1315        resource: Resource,
1316        maybe_new_limit: Option<rlimit>,
1317    ) -> Result<rlimit, Errno>
1318    where
1319        L: LockBefore<ThreadGroupLimits>,
1320    {
1321        let thread_group = target_task.thread_group();
1322        let can_increase_rlimit = security::is_task_capable_noaudit(current_task, CAP_SYS_RESOURCE);
1323        let mut limit_state = thread_group.limits.lock(locked);
1324        let old_limit = limit_state.get(resource);
1325        if let Some(new_limit) = maybe_new_limit {
1326            if new_limit.rlim_max > old_limit.rlim_max && !can_increase_rlimit {
1327                return error!(EPERM);
1328            }
1329            security::task_setrlimit(current_task, &target_task, old_limit, new_limit)?;
1330            limit_state.set(resource, new_limit)
1331        }
1332        Ok(old_limit)
1333    }
1334
1335    pub fn time_stats(&self) -> TaskTimeStats {
1336        let process: &zx::Process = if zx::AsHandleRef::as_handle_ref(&self.process).is_invalid() {
1337            // `process` must be valid for all tasks, except `kthreads`. In that case get the
1338            // stats from starnix process.
1339            assert_eq!(
1340                self as *const ThreadGroup,
1341                Arc::as_ptr(&self.kernel.kthreads.system_thread_group())
1342            );
1343            &self.kernel.kthreads.starnix_process
1344        } else {
1345            &self.process
1346        };
1347
1348        let info =
1349            zx::Task::get_runtime_info(process).expect("Failed to get starnix process stats");
1350        TaskTimeStats {
1351            user_time: zx::MonotonicDuration::from_nanos(info.cpu_time),
1352            // TODO(https://fxbug.dev/42078242): How can we calculate system time?
1353            system_time: zx::MonotonicDuration::default(),
1354        }
1355    }
1356
1357    /// For each task traced by this thread_group that matches the given
1358    /// selector, acquire its TaskMutableState and ptracees lock and execute the
1359    /// given function.
1360    pub fn get_ptracees_and(
1361        &self,
1362        selector: &ProcessSelector,
1363        pids: &PidTable,
1364        f: &mut dyn FnMut(&Task, &TaskMutableState),
1365    ) {
1366        for tracee in self
1367            .ptracees
1368            .lock()
1369            .keys()
1370            .filter(|tracee_tid| selector.match_tid(**tracee_tid, &pids))
1371            .map(|tracee_tid| pids.get_task(*tracee_tid))
1372        {
1373            if let Some(task_ref) = tracee.clone().upgrade() {
1374                let task_state = task_ref.write();
1375                if task_state.ptrace.is_some() {
1376                    f(&task_ref, &task_state);
1377                }
1378            }
1379        }
1380    }
1381
1382    /// Returns a tracee whose state has changed, so that waitpid can report on
1383    /// it. If this returns a value, and the pid is being traced, the tracer
1384    /// thread is deemed to have seen the tracee ptrace-stop for the purposes of
1385    /// PTRACE_LISTEN.
1386    pub fn get_waitable_ptracee(
1387        &self,
1388        selector: &ProcessSelector,
1389        options: &WaitingOptions,
1390        pids: &mut PidTable,
1391    ) -> Option<WaitResult> {
1392        // This checks to see if the target is a zombie ptracee.
1393        let waitable_entry = self.write().zombie_ptracees.get_waitable_entry(selector, options);
1394        match waitable_entry {
1395            None => (),
1396            Some((zombie, None)) => return Some(zombie.to_wait_result()),
1397            Some((zombie, Some((tg, z)))) => {
1398                if let Some(tg) = tg.upgrade() {
1399                    if Arc::as_ptr(&tg) != self as *const Self {
1400                        tg.do_zombie_notifications(z);
1401                    } else {
1402                        {
1403                            let mut state = tg.write();
1404                            state.children.remove(&z.pid());
1405                            state
1406                                .deferred_zombie_ptracers
1407                                .retain(|dzp| dzp.tracee_thread_group_key != z.thread_group_key);
1408                        }
1409
1410                        z.release(pids);
1411                    };
1412                }
1413                return Some(zombie.to_wait_result());
1414            }
1415        }
1416
1417        let mut tasks = vec![];
1418
1419        // This checks to see if the target is a living ptracee
1420        self.get_ptracees_and(selector, pids, &mut |task: &Task, _| {
1421            tasks.push(task.weak_self.clone());
1422        });
1423        for task in tasks {
1424            let Some(task_ref) = task.upgrade() else {
1425                continue;
1426            };
1427
1428            let process_state = &mut task_ref.thread_group().write();
1429            let mut task_state = task_ref.write();
1430            if task_state
1431                .ptrace
1432                .as_ref()
1433                .is_some_and(|ptrace| ptrace.is_waitable(task_ref.load_stopped(), options))
1434            {
1435                // We've identified a potential target.  Need to return either
1436                // the process's information (if we are in group-stop) or the
1437                // thread's information (if we are in a different stop).
1438
1439                // The shared information:
1440                let mut pid: i32 = 0;
1441                let info = process_state.tasks.values().next().unwrap().info().clone();
1442                let uid = info.real_creds().uid;
1443                let mut exit_status = None;
1444                let exit_signal = process_state.exit_signal.clone();
1445                let time_stats =
1446                    process_state.base.time_stats() + process_state.children_time_stats;
1447                let task_stopped = task_ref.load_stopped();
1448
1449                #[derive(PartialEq)]
1450                enum ExitType {
1451                    None,
1452                    Cont,
1453                    Stop,
1454                    Kill,
1455                }
1456                if process_state.is_waitable() {
1457                    let ptrace = &mut task_state.ptrace;
1458                    // The information for processes, if we were in group stop.
1459                    let process_stopped = process_state.base.load_stopped();
1460                    let mut fn_type = ExitType::None;
1461                    if process_stopped == StopState::Awake && options.wait_for_continued {
1462                        fn_type = ExitType::Cont;
1463                    }
1464                    let mut event = ptrace
1465                        .as_ref()
1466                        .map_or(PtraceEvent::None, |ptrace| {
1467                            ptrace.event_data.as_ref().map_or(PtraceEvent::None, |data| data.event)
1468                        })
1469                        .clone();
1470                    // Tasks that are ptrace'd always get stop notifications.
1471                    if process_stopped == StopState::GroupStopped
1472                        && (options.wait_for_stopped || ptrace.is_some())
1473                    {
1474                        fn_type = ExitType::Stop;
1475                    }
1476                    if fn_type != ExitType::None {
1477                        let siginfo = if options.keep_waitable_state {
1478                            process_state.last_signal.clone()
1479                        } else {
1480                            process_state.last_signal.take()
1481                        };
1482                        if let Some(mut siginfo) = siginfo {
1483                            if task_ref.thread_group().load_stopped() == StopState::GroupStopped
1484                                && ptrace.as_ref().is_some_and(|ptrace| ptrace.is_seized())
1485                            {
1486                                if event == PtraceEvent::None {
1487                                    event = PtraceEvent::Stop;
1488                                }
1489                                siginfo.code |= (PtraceEvent::Stop as i32) << 8;
1490                            }
1491                            if siginfo.signal == SIGKILL {
1492                                fn_type = ExitType::Kill;
1493                            }
1494                            exit_status = match fn_type {
1495                                ExitType::Stop => Some(ExitStatus::Stop(siginfo, event)),
1496                                ExitType::Cont => Some(ExitStatus::Continue(siginfo, event)),
1497                                ExitType::Kill => Some(ExitStatus::Kill(siginfo)),
1498                                _ => None,
1499                            };
1500                        }
1501                        // Clear the wait status of the ptrace, because we're
1502                        // using the tg status instead.
1503                        ptrace
1504                            .as_mut()
1505                            .map(|ptrace| ptrace.get_last_signal(options.keep_waitable_state));
1506                    }
1507                    pid = process_state.base.leader;
1508                }
1509                if exit_status == None {
1510                    if let Some(ptrace) = task_state.ptrace.as_mut() {
1511                        // The information for the task, if we were in a non-group stop.
1512                        let mut fn_type = ExitType::None;
1513                        let event = ptrace
1514                            .event_data
1515                            .as_ref()
1516                            .map_or(PtraceEvent::None, |event| event.event);
1517                        if task_stopped == StopState::Awake {
1518                            fn_type = ExitType::Cont;
1519                        }
1520                        if task_stopped.is_stopping_or_stopped()
1521                            || ptrace.stop_status == PtraceStatus::Listening
1522                        {
1523                            fn_type = ExitType::Stop;
1524                        }
1525                        if fn_type != ExitType::None {
1526                            if let Some(siginfo) =
1527                                ptrace.get_last_signal(options.keep_waitable_state)
1528                            {
1529                                if siginfo.signal == SIGKILL {
1530                                    fn_type = ExitType::Kill;
1531                                }
1532                                exit_status = match fn_type {
1533                                    ExitType::Stop => Some(ExitStatus::Stop(siginfo, event)),
1534                                    ExitType::Cont => Some(ExitStatus::Continue(siginfo, event)),
1535                                    ExitType::Kill => Some(ExitStatus::Kill(siginfo)),
1536                                    _ => None,
1537                                };
1538                            }
1539                        }
1540                        pid = task_ref.get_tid();
1541                    }
1542                }
1543                if let Some(exit_status) = exit_status {
1544                    return Some(WaitResult {
1545                        pid,
1546                        uid,
1547                        exit_info: ProcessExitInfo { status: exit_status, exit_signal },
1548                        time_stats,
1549                    });
1550                }
1551            }
1552        }
1553        None
1554    }
1555
1556    /// Attempts to send an unchecked signal to this thread group.
1557    ///
1558    /// - `current_task`: The task that is sending the signal.
1559    /// - `unchecked_signal`: The signal that is to be sent. Unchecked, since `0` is a sentinel value
1560    /// where rights are to be checked but no signal is actually sent.
1561    ///
1562    /// # Returns
1563    /// Returns Ok(()) if the signal was sent, or the permission checks passed with a 0 signal, otherwise
1564    /// the error that was encountered.
1565    pub fn send_signal_unchecked(
1566        &self,
1567        current_task: &CurrentTask,
1568        unchecked_signal: UncheckedSignal,
1569    ) -> Result<(), Errno> {
1570        if let Some(signal) = self.check_signal_access(current_task, unchecked_signal)? {
1571            let signal_info = SignalInfo {
1572                code: SI_USER as i32,
1573                detail: SignalDetail::Kill {
1574                    pid: current_task.thread_group().leader,
1575                    uid: current_task.with_current_creds(|creds| creds.uid),
1576                },
1577                ..SignalInfo::default(signal)
1578            };
1579
1580            self.write().send_signal(signal_info);
1581        }
1582
1583        Ok(())
1584    }
1585
1586    /// Sends a signal to this thread_group without performing any access checks.
1587    ///
1588    /// # Safety
1589    /// This is unsafe, because it should only be called by tools and tests.
1590    pub unsafe fn send_signal_unchecked_debug(
1591        &self,
1592        current_task: &CurrentTask,
1593        unchecked_signal: UncheckedSignal,
1594    ) -> Result<(), Errno> {
1595        let signal = Signal::try_from(unchecked_signal)?;
1596        let signal_info = SignalInfo {
1597            code: SI_USER as i32,
1598            detail: SignalDetail::Kill {
1599                pid: current_task.thread_group().leader,
1600                uid: current_task.with_current_creds(|creds| creds.uid),
1601            },
1602            ..SignalInfo::default(signal)
1603        };
1604
1605        self.write().send_signal(signal_info);
1606        Ok(())
1607    }
1608
1609    /// Attempts to send an unchecked signal to this thread group, with info read from
1610    /// `siginfo_ref`.
1611    ///
1612    /// - `current_task`: The task that is sending the signal.
1613    /// - `unchecked_signal`: The signal that is to be sent. Unchecked, since `0` is a sentinel value
1614    /// where rights are to be checked but no signal is actually sent.
1615    /// - `siginfo_ref`: The siginfo that will be enqueued.
1616    ///
1617    /// # Returns
1618    /// Returns Ok(()) if the signal was sent, or the permission checks passed with a 0 signal, otherwise
1619    /// the error that was encountered.
1620    #[track_caller]
1621    pub fn send_signal_unchecked_with_info(
1622        &self,
1623        current_task: &CurrentTask,
1624        unchecked_signal: UncheckedSignal,
1625        siginfo_ref: UserAddress,
1626    ) -> Result<(), Errno> {
1627        if let Some(signal) = self.check_signal_access(current_task, unchecked_signal)? {
1628            let signal_info = read_siginfo(current_task, signal, siginfo_ref)?;
1629            if self.leader != current_task.get_pid()
1630                && (signal_info.code >= 0 || signal_info.code == SI_TKILL)
1631            {
1632                return error!(EPERM);
1633            }
1634
1635            self.write().send_signal(signal_info);
1636        }
1637
1638        Ok(())
1639    }
1640
1641    /// Checks whether or not `current_task` can signal this thread group with `unchecked_signal`.
1642    ///
1643    /// Returns:
1644    ///   - `Ok(Some(Signal))` if the signal passed checks and should be sent.
1645    ///   - `Ok(None)` if the signal passed checks, but should not be sent. This is used by
1646    ///   userspace for permission checks.
1647    ///   - `Err(_)` if the permission checks failed.
1648    fn check_signal_access(
1649        &self,
1650        current_task: &CurrentTask,
1651        unchecked_signal: UncheckedSignal,
1652    ) -> Result<Option<Signal>, Errno> {
1653        // Pick an arbitrary task in thread_group to check permissions.
1654        //
1655        // Tasks can technically have different credentials, but in practice they are kept in sync.
1656        let state = self.read();
1657        let target_task = state.get_live_task()?;
1658        current_task.can_signal(&target_task, unchecked_signal)?;
1659
1660        // 0 is a sentinel value used to do permission checks.
1661        if unchecked_signal.is_zero() {
1662            return Ok(None);
1663        }
1664
1665        let signal = Signal::try_from(unchecked_signal)?;
1666        security::check_signal_access(current_task, &target_task, signal)?;
1667
1668        Ok(Some(signal))
1669    }
1670
1671    /// Drive this `ThreadGroup` to exit, allowing it time to handle SIGTERM before sending SIGKILL.
1672    ///
1673    /// Returns once `ThreadGroup::exit()` has completed.
1674    ///
1675    /// Must be called from the system task.
1676    pub async fn shut_down(this: Weak<Self>) {
1677        const SHUTDOWN_SIGNAL_HANDLING_TIMEOUT: zx::MonotonicDuration =
1678            zx::MonotonicDuration::from_seconds(1);
1679
1680        // Prepare for shutting down the thread group.
1681        let (tg_name, mut on_exited) = {
1682            // Nest this upgraded access so TempRefs aren't held across await-points.
1683            let Some(this) = this.upgrade() else {
1684                return;
1685            };
1686
1687            // Register a channel to be notified when exit() is complete.
1688            let (on_exited_send, on_exited) = futures::channel::oneshot::channel();
1689            this.write().exit_notifier = Some(on_exited_send);
1690
1691            // We want to be able to log about this thread group without upgrading the WeakRef.
1692            let tg_name = format!("{this:?}");
1693
1694            (tg_name, on_exited)
1695        };
1696
1697        log_debug!(tg:% = tg_name; "shutting down thread group, sending SIGTERM");
1698        this.upgrade().map(|tg| tg.write().send_signal(SignalInfo::default(SIGTERM)));
1699
1700        // Give thread groups some time to handle SIGTERM, proceeding early if they exit
1701        let timeout = fuchsia_async::Timer::new(SHUTDOWN_SIGNAL_HANDLING_TIMEOUT);
1702        futures::pin_mut!(timeout);
1703
1704        // Use select_biased instead of on_timeout() so that we can await on on_exited later
1705        futures::select_biased! {
1706            _ = &mut on_exited => (),
1707            _ = timeout => {
1708                log_debug!(tg:% = tg_name; "sending SIGKILL");
1709                this.upgrade().map(|tg| tg.write().send_signal(SignalInfo::default(SIGKILL)));
1710            },
1711        };
1712
1713        log_debug!(tg:% = tg_name; "waiting for exit");
1714        // It doesn't matter whether ThreadGroup::exit() was called or the process exited with
1715        // a return code and dropped the sender end of the channel.
1716        on_exited.await.ok();
1717        log_debug!(tg:% = tg_name; "thread group shutdown complete");
1718    }
1719
1720    /// Returns the KOID of the process for this thread group.
1721    /// This method should be used to when mapping 32 bit linux process ids to KOIDs
1722    /// to avoid breaking the encapsulation of the zx::process within the ThreadGroup.
1723    /// This encapsulation is important since the relationship between the ThreadGroup
1724    /// and the Process may change over time. See [ThreadGroup::process] for more details.
1725    pub fn get_process_koid(&self) -> Result<Koid, Status> {
1726        self.process.get_koid()
1727    }
1728}
1729
1730#[cfg_attr(
1731    feature = "debug_and_trace_logs_enabled",
1732    allow(clippy::large_enum_variant, reason = "no need to optimize enum size in debug builds")
1733)]
1734pub enum WaitableChildResult {
1735    ReadyNow(WaitResult),
1736    ShouldWait,
1737    NoneFound,
1738}
1739
1740#[apply(state_implementation!)]
1741impl ThreadGroupMutableState<Base = ThreadGroup> {
1742    pub fn leader(&self) -> pid_t {
1743        self.base.leader
1744    }
1745
1746    pub fn leader_command(&self) -> TaskCommand {
1747        self.get_task(self.leader())
1748            .map(|l| l.command())
1749            .unwrap_or_else(|| TaskCommand::new(b"<leader exited>"))
1750    }
1751
1752    pub fn is_terminating(&self) -> bool {
1753        !matches!(self.run_state, ThreadGroupRunState::Running)
1754    }
1755
1756    pub fn children(&self) -> impl Iterator<Item = Arc<ThreadGroup>> + '_ {
1757        self.children.values().map(|v| {
1758            v.upgrade().expect("Weak references to processes in ThreadGroup must always be valid")
1759        })
1760    }
1761
1762    pub fn tasks(&self) -> impl Iterator<Item = TempRef<'_, Task>> + '_ {
1763        self.tasks.values().flat_map(|t| t.upgrade())
1764    }
1765
1766    pub fn task_ids(&self) -> impl Iterator<Item = &tid_t> {
1767        self.tasks.keys()
1768    }
1769
1770    pub fn contains_task(&self, tid: tid_t) -> bool {
1771        self.tasks.contains_key(&tid)
1772    }
1773
1774    pub fn get_task(&self, tid: tid_t) -> Option<TempRef<'_, Task>> {
1775        self.tasks.get(&tid).and_then(|t| t.upgrade())
1776    }
1777
1778    pub fn tasks_count(&self) -> usize {
1779        self.tasks.len()
1780    }
1781
1782    pub fn get_ppid(&self) -> pid_t {
1783        match &self.parent {
1784            Some(parent) => parent.upgrade().leader,
1785            None => 0,
1786        }
1787    }
1788
1789    fn set_process_group<L>(
1790        &mut self,
1791        locked: &mut Locked<L>,
1792        process_group: Arc<ProcessGroup>,
1793        pids: &PidTable,
1794    ) where
1795        L: LockBefore<ProcessGroupState>,
1796    {
1797        if self.process_group == process_group {
1798            return;
1799        }
1800        self.leave_process_group(locked, pids);
1801        self.process_group = process_group;
1802        self.process_group.insert(locked, self.base);
1803    }
1804
1805    fn leave_process_group<L>(&mut self, locked: &mut Locked<L>, pids: &PidTable)
1806    where
1807        L: LockBefore<ProcessGroupState>,
1808    {
1809        if self.process_group.remove(locked, self.base) {
1810            self.process_group.session.write().remove(self.process_group.leader);
1811            pids.remove_process_group(self.process_group.leader);
1812        }
1813    }
1814
1815    /// Indicates whether the thread group is waitable via waitid and waitpid for
1816    /// either WSTOPPED or WCONTINUED.
1817    pub fn is_waitable(&self) -> bool {
1818        return self.last_signal.is_some() && !self.base.load_stopped().is_in_progress();
1819    }
1820
1821    pub fn get_waitable_zombie(
1822        &mut self,
1823        zombie_list: &dyn Fn(&mut ThreadGroupMutableState) -> &mut Vec<OwnedRef<ZombieProcess>>,
1824        selector: &ProcessSelector,
1825        options: &WaitingOptions,
1826        pids: &mut PidTable,
1827    ) -> Option<WaitResult> {
1828        // We look for the last zombie in the vector that matches pid selector and waiting options
1829        let selected_zombie_position = zombie_list(self)
1830            .iter()
1831            .rev()
1832            .position(|zombie| zombie.matches_selector_and_waiting_option(selector, options))
1833            .map(|position_starting_from_the_back| {
1834                zombie_list(self).len() - 1 - position_starting_from_the_back
1835            });
1836
1837        selected_zombie_position.map(|position| {
1838            if options.keep_waitable_state {
1839                zombie_list(self)[position].to_wait_result()
1840            } else {
1841                let zombie = zombie_list(self).remove(position);
1842                self.children_time_stats += zombie.time_stats;
1843                let result = zombie.to_wait_result();
1844                zombie.release(pids);
1845                result
1846            }
1847        })
1848    }
1849
1850    pub fn is_correct_exit_signal(for_clone: bool, exit_code: Option<Signal>) -> bool {
1851        for_clone == (exit_code != Some(SIGCHLD))
1852    }
1853
1854    fn get_waitable_running_children(
1855        &self,
1856        selector: &ProcessSelector,
1857        options: &WaitingOptions,
1858        pids: &PidTable,
1859    ) -> WaitableChildResult {
1860        // The children whose pid matches the pid selector queried.
1861        let filter_children_by_pid_selector = |child: &ThreadGroup| match *selector {
1862            ProcessSelector::Any => true,
1863            ProcessSelector::Pid(pid) => child.leader == pid,
1864            ProcessSelector::Pgid(pgid) => {
1865                pids.get_process_group(pgid).as_ref() == Some(&child.read().process_group)
1866            }
1867            ProcessSelector::Process(ref key) => *key == ThreadGroupKey::from(child),
1868        };
1869
1870        // The children whose exit signal matches the waiting options queried.
1871        let filter_children_by_waiting_options = |child: &ThreadGroup| {
1872            if options.wait_for_all {
1873                return true;
1874            }
1875            Self::is_correct_exit_signal(options.wait_for_clone, child.read().exit_signal)
1876        };
1877
1878        // If wait_for_exited flag is disabled or no terminated children were found we look for living children.
1879        let mut selected_children = self
1880            .children
1881            .values()
1882            .map(|t| t.upgrade().unwrap())
1883            .filter(|tg| filter_children_by_pid_selector(&tg))
1884            .filter(|tg| filter_children_by_waiting_options(&tg))
1885            .peekable();
1886        if selected_children.peek().is_none() {
1887            // There still might be a process that ptrace hasn't looked at yet.
1888            if self.deferred_zombie_ptracers.iter().any(|dzp| match *selector {
1889                ProcessSelector::Any => true,
1890                ProcessSelector::Pid(pid) => dzp.tracee_thread_group_key.pid() == pid,
1891                ProcessSelector::Pgid(pgid) => pgid == dzp.tracee_pgid,
1892                ProcessSelector::Process(ref key) => *key == dzp.tracee_thread_group_key,
1893            }) {
1894                return WaitableChildResult::ShouldWait;
1895            }
1896
1897            return WaitableChildResult::NoneFound;
1898        }
1899        for child in selected_children {
1900            let child = child.write();
1901            if child.last_signal.is_some() {
1902                let build_wait_result = |mut child: ThreadGroupWriteGuard<'_>,
1903                                         exit_status: &dyn Fn(SignalInfo) -> ExitStatus|
1904                 -> WaitResult {
1905                    let siginfo = if options.keep_waitable_state {
1906                        child.last_signal.clone().unwrap()
1907                    } else {
1908                        child.last_signal.take().unwrap()
1909                    };
1910                    let exit_status = if siginfo.signal == SIGKILL {
1911                        // This overrides the stop/continue choice.
1912                        ExitStatus::Kill(siginfo)
1913                    } else {
1914                        exit_status(siginfo)
1915                    };
1916                    let info = child.tasks.values().next().unwrap().info();
1917                    let uid = info.real_creds().uid;
1918                    WaitResult {
1919                        pid: child.base.leader,
1920                        uid,
1921                        exit_info: ProcessExitInfo {
1922                            status: exit_status,
1923                            exit_signal: child.exit_signal,
1924                        },
1925                        time_stats: child.base.time_stats() + child.children_time_stats,
1926                    }
1927                };
1928                let child_stopped = child.base.load_stopped();
1929                if child_stopped == StopState::Awake && options.wait_for_continued {
1930                    return WaitableChildResult::ReadyNow(build_wait_result(child, &|siginfo| {
1931                        ExitStatus::Continue(siginfo, PtraceEvent::None)
1932                    }));
1933                }
1934                if child_stopped == StopState::GroupStopped && options.wait_for_stopped {
1935                    return WaitableChildResult::ReadyNow(build_wait_result(child, &|siginfo| {
1936                        ExitStatus::Stop(siginfo, PtraceEvent::None)
1937                    }));
1938                }
1939            }
1940        }
1941
1942        WaitableChildResult::ShouldWait
1943    }
1944
1945    /// Returns any waitable child matching the given `selector` and `options`. Returns None if no
1946    /// child matching the selector is waitable. Returns ECHILD if no child matches the selector at
1947    /// all.
1948    ///
1949    /// Will remove the waitable status from the child depending on `options`.
1950    pub fn get_waitable_child(
1951        &mut self,
1952        selector: &ProcessSelector,
1953        options: &WaitingOptions,
1954        pids: &mut PidTable,
1955    ) -> WaitableChildResult {
1956        if options.wait_for_exited {
1957            if let Some(waitable_zombie) = self.get_waitable_zombie(
1958                &|state: &mut ThreadGroupMutableState| &mut state.zombie_children,
1959                selector,
1960                options,
1961                pids,
1962            ) {
1963                return WaitableChildResult::ReadyNow(waitable_zombie);
1964            }
1965        }
1966
1967        self.get_waitable_running_children(selector, options, pids)
1968    }
1969
1970    /// Returns a task in the current thread group.
1971    pub fn get_live_task(&self) -> Result<TempRef<'_, Task>, Errno> {
1972        self.tasks
1973            .get(&self.leader())
1974            .and_then(|t| t.upgrade())
1975            .or_else(|| self.tasks().next())
1976            .ok_or_else(|| errno!(ESRCH))
1977    }
1978
1979    /// Set the stop status of the process.  If you pass |siginfo| of |None|,
1980    /// does not update the signal.  If |finalize_only| is set, will check that
1981    /// the set will be a finalize (Stopping -> Stopped or Stopped -> Stopped)
1982    /// before executing it.
1983    ///
1984    /// Returns the latest stop state after any changes.
1985    pub fn set_stopped(
1986        mut self,
1987        new_stopped: StopState,
1988        siginfo: Option<SignalInfo>,
1989        finalize_only: bool,
1990    ) -> StopState {
1991        if let Some(stopped) = self.base.check_stopped_state(new_stopped, finalize_only) {
1992            return stopped;
1993        }
1994
1995        // Thread groups don't transition to group stop if they are waking, because waking
1996        // means something told it to wake up (like a SIGCONT) but hasn't finished yet.
1997        if self.base.load_stopped() == StopState::Waking
1998            && (new_stopped == StopState::GroupStopping || new_stopped == StopState::GroupStopped)
1999        {
2000            return self.base.load_stopped();
2001        }
2002
2003        // TODO(https://g-issues.fuchsia.dev/issues/306438676): When thread
2004        // group can be stopped inside user code, tasks/thread groups will
2005        // need to be either restarted or stopped here.
2006        self.store_stopped(new_stopped);
2007        if let Some(signal) = &siginfo {
2008            // We don't want waiters to think the process was unstopped
2009            // because of a sigkill.  They will get woken when the
2010            // process dies.
2011            if signal.signal != SIGKILL {
2012                self.last_signal = siginfo;
2013            }
2014        }
2015        if new_stopped == StopState::Waking || new_stopped == StopState::ForceWaking {
2016            self.lifecycle_waiters.notify_value(ThreadGroupLifecycleWaitValue::Stopped);
2017        };
2018
2019        let parent = (!new_stopped.is_in_progress()).then(|| self.parent.clone()).flatten();
2020
2021        // Drop the lock before locking the parent.
2022        std::mem::drop(self);
2023        if let Some(parent) = parent {
2024            let parent = parent.upgrade();
2025            parent
2026                .write()
2027                .lifecycle_waiters
2028                .notify_value(ThreadGroupLifecycleWaitValue::ChildStatus);
2029        }
2030
2031        new_stopped
2032    }
2033
2034    fn store_stopped(&mut self, state: StopState) {
2035        // We don't actually use the guard but we require it to enforce that the
2036        // caller holds the thread group's mutable state lock (identified by
2037        // mutable access to the thread group's mutable state).
2038
2039        self.base.stop_state.store(state, Ordering::Relaxed)
2040    }
2041
2042    /// Sends the signal `signal_info` to this thread group.
2043    #[allow(unused_mut, reason = "needed for some but not all macro outputs")]
2044    pub fn send_signal(mut self, signal_info: SignalInfo) {
2045        let sigaction = self.base.signal_actions.get(signal_info.signal);
2046        let action = action_for_signal(&signal_info, sigaction);
2047
2048        self.base.pending_signals.lock().enqueue(signal_info.clone());
2049        let tasks: Vec<WeakRef<Task>> = self.tasks.values().map(|t| t.weak_clone()).collect();
2050
2051        // Set state to waking before interrupting any tasks.
2052        if signal_info.signal == SIGKILL {
2053            self.set_stopped(StopState::ForceWaking, Some(signal_info.clone()), false);
2054        } else if signal_info.signal == SIGCONT {
2055            self.set_stopped(StopState::Waking, Some(signal_info.clone()), false);
2056        }
2057
2058        let mut has_interrupted_task = false;
2059        for task in tasks.iter().flat_map(|t| t.upgrade()) {
2060            let mut task_state = task.write();
2061
2062            if signal_info.signal == SIGKILL {
2063                task_state.thaw();
2064                task_state.set_stopped(StopState::ForceWaking, None, None, None);
2065            } else if signal_info.signal == SIGCONT {
2066                task_state.set_stopped(StopState::Waking, None, None, None);
2067            }
2068
2069            let is_masked = task_state.is_signal_masked(signal_info.signal);
2070            let was_masked = task_state.is_signal_masked_by_saved_mask(signal_info.signal);
2071
2072            let is_queued = action != DeliveryAction::Ignore
2073                || is_masked
2074                || was_masked
2075                || task_state.is_ptraced();
2076
2077            if is_queued {
2078                task_state.notify_signal_waiters(&signal_info.signal);
2079                task_state.set_flags(TaskFlags::SIGNALS_AVAILABLE, true);
2080
2081                if !is_masked && action.must_interrupt(Some(sigaction)) && !has_interrupted_task {
2082                    // Only interrupt one task, and only interrupt if the signal was actually queued
2083                    // and the action must interrupt.
2084                    drop(task_state);
2085                    task.interrupt();
2086                    has_interrupted_task = true;
2087                }
2088            }
2089        }
2090    }
2091}
2092
2093/// Container around a weak task and a strong `TaskPersistentInfo`. It is needed to keep the
2094/// information even when the task is not upgradable, because when the task is dropped, there is a
2095/// moment where the task is not yet released, yet the weak pointer is not upgradeable anymore.
2096/// During this time, it is still necessary to access the persistent info to compute the state of
2097/// the thread for the different wait syscalls.
2098pub struct TaskContainer(WeakRef<Task>, TaskPersistentInfo);
2099
2100impl From<&TempRef<'_, Task>> for TaskContainer {
2101    fn from(task: &TempRef<'_, Task>) -> Self {
2102        Self(WeakRef::from(task), task.persistent_info.clone())
2103    }
2104}
2105
2106impl From<TaskContainer> for TaskPersistentInfo {
2107    fn from(container: TaskContainer) -> TaskPersistentInfo {
2108        container.1
2109    }
2110}
2111
2112impl TaskContainer {
2113    fn upgrade(&self) -> Option<TempRef<'_, Task>> {
2114        self.0.upgrade()
2115    }
2116
2117    fn weak_clone(&self) -> WeakRef<Task> {
2118        self.0.clone()
2119    }
2120
2121    fn info(&self) -> &TaskPersistentInfo {
2122        &self.1
2123    }
2124}
2125
2126#[cfg(test)]
2127mod test {
2128    use super::*;
2129    use crate::testing::*;
2130
2131    #[::fuchsia::test]
2132    async fn test_setsid() {
2133        spawn_kernel_and_run(async |locked, current_task| {
2134            fn get_process_group(task: &Task) -> Arc<ProcessGroup> {
2135                Arc::clone(&task.thread_group().read().process_group)
2136            }
2137            assert_eq!(current_task.thread_group().setsid(locked), error!(EPERM));
2138
2139            let child_task = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2140            assert_eq!(get_process_group(&current_task), get_process_group(&child_task));
2141
2142            let old_process_group = child_task.thread_group().read().process_group.clone();
2143            assert_eq!(child_task.thread_group().setsid(locked), Ok(()));
2144            assert_eq!(
2145                child_task.thread_group().read().process_group.session.leader,
2146                child_task.get_pid()
2147            );
2148            assert!(
2149                !old_process_group.read(locked).thread_groups().contains(child_task.thread_group())
2150            );
2151        })
2152        .await;
2153    }
2154
2155    #[::fuchsia::test]
2156    async fn test_exit_status() {
2157        spawn_kernel_and_run(async |locked, current_task| {
2158            let child = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2159            child.thread_group().exit(locked, ExitStatus::Exit(42), None);
2160            std::mem::drop(child);
2161            assert_eq!(
2162                current_task.thread_group().read().zombie_children[0].exit_info.status,
2163                ExitStatus::Exit(42)
2164            );
2165        })
2166        .await;
2167    }
2168
2169    #[::fuchsia::test]
2170    async fn test_setgpid() {
2171        spawn_kernel_and_run(async |locked, current_task| {
2172            assert_eq!(current_task.thread_group().setsid(locked), error!(EPERM));
2173
2174            let child_task1 = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2175            let child_task2 = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2176            let execd_child_task = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2177            execd_child_task.thread_group().write().did_exec = true;
2178            let other_session_child_task =
2179                current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2180            assert_eq!(other_session_child_task.thread_group().setsid(locked), Ok(()));
2181
2182            assert_eq!(
2183                child_task1.thread_group().setpgid(locked, &current_task, &current_task, 0),
2184                error!(ESRCH)
2185            );
2186            assert_eq!(
2187                current_task.thread_group().setpgid(locked, &current_task, &execd_child_task, 0),
2188                error!(EACCES)
2189            );
2190            assert_eq!(
2191                current_task.thread_group().setpgid(locked, &current_task, &current_task, 0),
2192                error!(EPERM)
2193            );
2194            assert_eq!(
2195                current_task.thread_group().setpgid(
2196                    locked,
2197                    &current_task,
2198                    &other_session_child_task,
2199                    0
2200                ),
2201                error!(EPERM)
2202            );
2203            assert_eq!(
2204                current_task.thread_group().setpgid(locked, &current_task, &child_task1, -1),
2205                error!(EINVAL)
2206            );
2207            assert_eq!(
2208                current_task.thread_group().setpgid(locked, &current_task, &child_task1, 255),
2209                error!(EPERM)
2210            );
2211            assert_eq!(
2212                current_task.thread_group().setpgid(
2213                    locked,
2214                    &current_task,
2215                    &child_task1,
2216                    other_session_child_task.tid
2217                ),
2218                error!(EPERM)
2219            );
2220
2221            assert_eq!(
2222                child_task1.thread_group().setpgid(locked, &current_task, &child_task1, 0),
2223                Ok(())
2224            );
2225            assert_eq!(
2226                child_task1.thread_group().read().process_group.session.leader,
2227                current_task.tid
2228            );
2229            assert_eq!(child_task1.thread_group().read().process_group.leader, child_task1.tid);
2230
2231            let old_process_group = child_task2.thread_group().read().process_group.clone();
2232            assert_eq!(
2233                current_task.thread_group().setpgid(
2234                    locked,
2235                    &current_task,
2236                    &child_task2,
2237                    child_task1.tid
2238                ),
2239                Ok(())
2240            );
2241            assert_eq!(child_task2.thread_group().read().process_group.leader, child_task1.tid);
2242            assert!(
2243                !old_process_group
2244                    .read(locked)
2245                    .thread_groups()
2246                    .contains(child_task2.thread_group())
2247            );
2248        })
2249        .await;
2250    }
2251
2252    #[::fuchsia::test]
2253    async fn test_adopt_children() {
2254        spawn_kernel_and_run(async |locked, current_task| {
2255            let task1 = current_task.clone_task_for_test(locked, 0, None);
2256            let task2 = task1.clone_task_for_test(locked, 0, None);
2257            let task3 = task2.clone_task_for_test(locked, 0, None);
2258
2259            assert_eq!(task3.thread_group().read().get_ppid(), task2.tid);
2260
2261            task2.thread_group().exit(locked, ExitStatus::Exit(0), None);
2262            std::mem::drop(task2);
2263
2264            // Task3 parent should be current_task.
2265            assert_eq!(task3.thread_group().read().get_ppid(), current_task.tid);
2266        })
2267        .await;
2268    }
2269}