Skip to main content

starnix_core/task/
thread_group.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::device::terminal::{Terminal, TerminalController};
6use crate::mutable_state::{state_accessor, state_implementation};
7use crate::ptrace::{
8    AtomicStopState, PtraceAllowedPtracers, PtraceEvent, PtraceOptions, PtraceStatus, StopState,
9    ZombiePtracees, ptrace_detach,
10};
11use crate::security;
12use crate::signals::syscalls::WaitingOptions;
13use crate::signals::{
14    DeliveryAction, IntoSignalInfoOptions, QueuedSignals, SignalActions, SignalDetail, SignalInfo,
15    UncheckedSignalInfo, action_for_signal, send_standard_signal,
16};
17use crate::task::memory_attribution::MemoryAttributionLifecycleEvent;
18use crate::task::{
19    ControllingTerminal, CurrentTask, ExitStatus, Kernel, PidTable, ProcessGroup, Session, Task,
20    TaskMutableState, TaskPersistentInfo, TypedWaitQueue,
21};
22use crate::time::{IntervalTimerHandle, TimerTable};
23use itertools::Itertools;
24use macro_rules_attribute::apply;
25use starnix_lifecycle::{AtomicU64Counter, DropNotifier};
26use starnix_logging::{log_debug, log_error, log_info, log_warn, track_stub};
27use starnix_sync::{
28    LockBefore, Locked, Mutex, OrderedMutex, ProcessGroupState, RwLock, ThreadGroupLimits, Unlocked,
29};
30use starnix_task_command::TaskCommand;
31use starnix_types::ownership::{OwnedRef, Releasable, TempRef, WeakRef};
32use starnix_types::stats::TaskTimeStats;
33use starnix_types::time::{itimerspec_from_itimerval, timeval_from_duration};
34use starnix_uapi::arc_key::WeakKey;
35use starnix_uapi::auth::{CAP_SYS_ADMIN, CAP_SYS_RESOURCE, Credentials};
36use starnix_uapi::errors::Errno;
37use starnix_uapi::personality::PersonalityFlags;
38use starnix_uapi::resource_limits::{Resource, ResourceLimits};
39use starnix_uapi::signals::{
40    SIGCHLD, SIGCONT, SIGHUP, SIGKILL, SIGTERM, SIGTTOU, SigSet, Signal, UncheckedSignal,
41};
42use starnix_uapi::user_address::UserAddress;
43use starnix_uapi::{
44    ITIMER_PROF, ITIMER_REAL, ITIMER_VIRTUAL, SI_TKILL, SI_USER, SIG_IGN, errno, error, itimerval,
45    pid_t, rlimit, tid_t, uid_t,
46};
47use std::collections::BTreeMap;
48use std::fmt;
49use std::sync::atomic::{AtomicBool, Ordering};
50use std::sync::{Arc, Weak};
51use zx::{Koid, Status};
52
53/// A weak reference to a thread group that can be used in set and maps.
54#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
55pub struct ThreadGroupKey {
56    pid: pid_t,
57    thread_group: WeakKey<ThreadGroup>,
58}
59
60impl ThreadGroupKey {
61    /// The pid of the thread group keyed by this object.
62    ///
63    /// As the key is weak (and pid are not unique due to pid namespaces), this should not be used
64    /// as an unique identifier of the thread group.
65    pub fn pid(&self) -> pid_t {
66        self.pid
67    }
68}
69
70impl std::ops::Deref for ThreadGroupKey {
71    type Target = Weak<ThreadGroup>;
72    fn deref(&self) -> &Self::Target {
73        &self.thread_group.0
74    }
75}
76
77impl From<&ThreadGroup> for ThreadGroupKey {
78    fn from(tg: &ThreadGroup) -> Self {
79        Self { pid: tg.leader, thread_group: WeakKey::from(&tg.weak_self.upgrade().unwrap()) }
80    }
81}
82
83impl<T: AsRef<ThreadGroup>> From<T> for ThreadGroupKey {
84    fn from(tg: T) -> Self {
85        tg.as_ref().into()
86    }
87}
88
89/// Values used for waiting on the [ThreadGroup] lifecycle wait queue.
90#[repr(u64)]
91pub enum ThreadGroupLifecycleWaitValue {
92    /// Wait for updates to the WaitResults of tasks in the group.
93    ChildStatus,
94    /// Wait for updates to `stopped`.
95    Stopped,
96}
97
98impl Into<u64> for ThreadGroupLifecycleWaitValue {
99    fn into(self) -> u64 {
100        self as u64
101    }
102}
103
104/// Child process that have exited, but the zombie ptrace needs to be consumed
105/// before they can be waited for.
106#[derive(Clone, Debug)]
107pub struct DeferredZombiePTracer {
108    /// Original tracer
109    pub tracer_thread_group_key: ThreadGroupKey,
110    /// Tracee tid
111    pub tracee_tid: tid_t,
112    /// Tracee pgid
113    pub tracee_pgid: pid_t,
114    /// Tracee thread group
115    pub tracee_thread_group_key: ThreadGroupKey,
116}
117
118impl DeferredZombiePTracer {
119    fn new(tracer: &ThreadGroup, tracee: &Task) -> Self {
120        Self {
121            tracer_thread_group_key: tracer.into(),
122            tracee_tid: tracee.tid,
123            tracee_pgid: tracee.thread_group().read().process_group.leader,
124            tracee_thread_group_key: tracee.thread_group_key.clone(),
125        }
126    }
127}
128
129/// The mutable state of the ThreadGroup.
130pub struct ThreadGroupMutableState {
131    /// The parent thread group.
132    ///
133    /// The value needs to be writable so that it can be re-parent to the correct subreaper if the
134    /// parent ends before the child.
135    pub parent: Option<ThreadGroupParent>,
136
137    /// The signal this process generates on exit.
138    pub exit_signal: Option<Signal>,
139
140    /// The tasks in the thread group.
141    ///
142    /// The references to Task is weak to prevent cycles as Task have a Arc reference to their
143    /// thread group.
144    /// It is still expected that these weak references are always valid, as tasks must unregister
145    /// themselves before they are deleted.
146    tasks: BTreeMap<tid_t, TaskContainer>,
147
148    /// The children of this thread group.
149    ///
150    /// The references to ThreadGroup is weak to prevent cycles as ThreadGroup have a Arc reference
151    /// to their parent.
152    /// It is still expected that these weak references are always valid, as thread groups must unregister
153    /// themselves before they are deleted.
154    pub children: BTreeMap<pid_t, Weak<ThreadGroup>>,
155
156    /// Child tasks that have exited, but not yet been waited for.
157    pub zombie_children: Vec<OwnedRef<ZombieProcess>>,
158
159    /// ptracees of this process that have exited, but not yet been waited for.
160    pub zombie_ptracees: ZombiePtracees,
161
162    /// Child processes that have exited, but the zombie ptrace needs to be consumed
163    /// before they can be waited for.
164    pub deferred_zombie_ptracers: Vec<DeferredZombiePTracer>,
165
166    /// Unified [WaitQueue] for all waited ThreadGroup events.
167    pub lifecycle_waiters: TypedWaitQueue<ThreadGroupLifecycleWaitValue>,
168
169    /// Whether this thread group will inherit from children of dying processes in its descendant
170    /// tree.
171    pub is_child_subreaper: bool,
172
173    /// The IDs used to perform shell job control.
174    pub process_group: Arc<ProcessGroup>,
175
176    pub did_exec: bool,
177
178    /// A signal that indicates whether the process is going to become waitable
179    /// via waitid and waitpid for either WSTOPPED or WCONTINUED, depending on
180    /// the value of `stopped`. If not None, contains the SignalInfo to return.
181    pub last_signal: Option<SignalInfo>,
182
183    /// Whether the thread_group is terminating or not, and if it is, the exit info of the thread
184    /// group.
185    run_state: ThreadGroupRunState,
186
187    /// Time statistics accumulated from the children.
188    pub children_time_stats: TaskTimeStats,
189
190    /// Personality flags set with `sys_personality()`.
191    pub personality: PersonalityFlags,
192
193    /// Thread groups allowed to trace tasks in this this thread group.
194    pub allowed_ptracers: PtraceAllowedPtracers,
195
196    /// Channel to message when this thread group exits.
197    exit_notifier: Option<futures::channel::oneshot::Sender<()>>,
198
199    /// Notifier for name changes.
200    pub notifier: Option<std::sync::mpsc::Sender<MemoryAttributionLifecycleEvent>>,
201}
202
203/// A collection of `Task` objects that roughly correspond to a "process".
204///
205/// Userspace programmers often think about "threads" and "process", but those concepts have no
206/// clear analogs inside the kernel because tasks are typically created using `clone(2)`, which
207/// takes a complex set of flags that describes how much state is shared between the original task
208/// and the new task.
209///
210/// If a new task is created with the `CLONE_THREAD` flag, the new task will be placed in the same
211/// `ThreadGroup` as the original task. Userspace typically uses this flag in conjunction with the
212/// `CLONE_FILES`, `CLONE_VM`, and `CLONE_FS`, which corresponds to the userspace notion of a
213/// "thread". For example, that's how `pthread_create` behaves. In that sense, a `ThreadGroup`
214/// normally corresponds to the set of "threads" in a "process". However, this pattern is purely a
215/// userspace convention, and nothing stops userspace from using `CLONE_THREAD` without
216/// `CLONE_FILES`, for example.
217///
218/// In Starnix, a `ThreadGroup` corresponds to a Zircon process, which means we do not support the
219/// `CLONE_THREAD` flag without the `CLONE_VM` flag. If we run into problems with this limitation,
220/// we might need to revise this correspondence.
221///
222/// Each `Task` in a `ThreadGroup` has the same thread group ID (`tgid`). The task with the same
223/// `pid` as the `tgid` is called the thread group leader.
224///
225/// Thread groups are destroyed when the last task in the group exits.
226pub struct ThreadGroup {
227    /// Weak reference to the `OwnedRef` of this `ThreadGroup`. This allows to retrieve the
228    /// `TempRef` from a raw `ThreadGroup`.
229    pub weak_self: Weak<ThreadGroup>,
230
231    /// The kernel to which this thread group belongs.
232    pub kernel: Arc<Kernel>,
233
234    /// A handle to the underlying Zircon process object.
235    ///
236    /// Currently, we have a 1-to-1 mapping between thread groups and zx::process
237    /// objects. This approach might break down if/when we implement CLONE_VM
238    /// without CLONE_THREAD because that creates a situation where two thread
239    /// groups share an address space. To implement that situation, we might
240    /// need to break the 1-to-1 mapping between thread groups and zx::process
241    /// or teach zx::process to share address spaces.
242    pub process: zx::Process,
243
244    /// The lead task of this thread group.
245    ///
246    /// The lead task is typically the initial thread created in the thread group.
247    pub leader: pid_t,
248
249    /// The signal actions that are registered for this process.
250    pub signal_actions: Arc<SignalActions>,
251
252    /// The timers for this thread group (from timer_create(), etc.).
253    pub timers: TimerTable,
254
255    /// A mechanism to be notified when this `ThreadGroup` is destroyed.
256    pub drop_notifier: DropNotifier,
257
258    /// Whether the process is currently stopped.
259    ///
260    /// Must only be set when the `mutable_state` write lock is held.
261    stop_state: AtomicStopState,
262
263    /// The mutable state of the ThreadGroup.
264    mutable_state: RwLock<ThreadGroupMutableState>,
265
266    /// The resource limits for this thread group.  This is outside mutable_state
267    /// to avoid deadlocks where the thread_group lock is held when acquiring
268    /// the task lock, and vice versa.
269    pub limits: OrderedMutex<ResourceLimits, ThreadGroupLimits>,
270
271    /// The next unique identifier for a seccomp filter.  These are required to be
272    /// able to distinguish identical seccomp filters, which are treated differently
273    /// for the purposes of SECCOMP_FILTER_FLAG_TSYNC.  Inherited across clone because
274    /// seccomp filters are also inherited across clone.
275    pub next_seccomp_filter_id: AtomicU64Counter,
276
277    /// Tasks ptraced by this process
278    pub ptracees: Mutex<BTreeMap<tid_t, TaskContainer>>,
279
280    /// The signals that are currently pending for this thread group.
281    pub pending_signals: Mutex<QueuedSignals>,
282
283    /// Whether or not there are any pending signals available for tasks in this thread group.
284    /// Used to avoid having to acquire the signal state lock in hot paths.
285    pub has_pending_signals: AtomicBool,
286
287    /// The monotonic time at which the thread group started.
288    pub start_time: zx::MonotonicInstant,
289
290    /// Whether to log syscalls at INFO level for this thread group.
291    log_syscalls_as_info: AtomicBool,
292}
293
294impl fmt::Debug for ThreadGroup {
295    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
296        write!(
297            f,
298            "{}({})",
299            self.process.get_name().unwrap_or(zx::Name::new_lossy("<unknown>")),
300            self.leader
301        )
302    }
303}
304
305impl ThreadGroup {
306    pub fn sync_syscall_log_level(&self) {
307        let command = self.read().leader_command();
308        let filters = self.kernel.syscall_log_filters.lock();
309        let should_log = filters.iter().any(|f| f.matches(&command));
310        let prev_should_log = self.log_syscalls_as_info.swap(should_log, Ordering::Relaxed);
311        let change_str = match (should_log, prev_should_log) {
312            (true, false) => Some("Enabled"),
313            (false, true) => Some("Disabled"),
314            _ => None,
315        };
316        if let Some(change_str) = change_str {
317            log_info!(
318                "{change_str} info syscall logs for thread group {} (command: {command})",
319                self.leader
320            );
321        }
322    }
323
324    #[inline]
325    pub fn syscall_log_level(&self) -> starnix_logging::Level {
326        if self.log_syscalls_as_info.load(Ordering::Relaxed) {
327            starnix_logging::Level::Info
328        } else {
329            starnix_logging::Level::Trace
330        }
331    }
332}
333
334impl PartialEq for ThreadGroup {
335    fn eq(&self, other: &Self) -> bool {
336        self.leader == other.leader
337    }
338}
339
340impl Drop for ThreadGroup {
341    fn drop(&mut self) {
342        let state = self.mutable_state.get_mut();
343        assert!(state.tasks.is_empty());
344        assert!(state.children.is_empty());
345        assert!(state.zombie_children.is_empty());
346        assert!(state.zombie_ptracees.is_empty());
347        #[cfg(any(test, debug_assertions))]
348        assert!(
349            state
350                .parent
351                .as_ref()
352                .and_then(|p| p.0.upgrade().as_ref().map(|p| p
353                    .read()
354                    .children
355                    .get(&self.leader)
356                    .is_none()))
357                .unwrap_or(true)
358        );
359    }
360}
361
362/// A wrapper around a `Weak<ThreadGroup>` that expects the underlying `Weak` to always be
363/// valid. The wrapper will check this at runtime during creation and upgrade.
364pub struct ThreadGroupParent(Weak<ThreadGroup>);
365
366impl ThreadGroupParent {
367    pub fn new(t: Weak<ThreadGroup>) -> Self {
368        debug_assert!(t.upgrade().is_some());
369        Self(t)
370    }
371
372    pub fn upgrade(&self) -> Arc<ThreadGroup> {
373        self.0.upgrade().expect("ThreadGroupParent references must always be valid")
374    }
375}
376
377impl Clone for ThreadGroupParent {
378    fn clone(&self) -> Self {
379        Self(self.0.clone())
380    }
381}
382
383/// A selector that can match a process. Works as a representation of the pid argument to syscalls
384/// like wait and kill.
385#[derive(Debug, Clone)]
386pub enum ProcessSelector {
387    /// Matches any process at all.
388    Any,
389    /// Matches only the process with the specified pid
390    Pid(pid_t),
391    /// Matches all the processes in the given process group
392    Pgid(pid_t),
393    /// Match the thread group with the given key
394    Process(ThreadGroupKey),
395}
396
397impl ProcessSelector {
398    pub fn match_tid(&self, tid: tid_t, pid_table: &PidTable) -> bool {
399        match *self {
400            ProcessSelector::Pid(p) => {
401                if p == tid {
402                    true
403                } else {
404                    if let Some(task_ref) = pid_table.get_task(tid).upgrade() {
405                        task_ref.get_pid() == p
406                    } else {
407                        false
408                    }
409                }
410            }
411            ProcessSelector::Any => true,
412            ProcessSelector::Pgid(pgid) => {
413                if let Some(task_ref) = pid_table.get_task(tid).upgrade() {
414                    pid_table.get_process_group(pgid).as_ref()
415                        == Some(&task_ref.thread_group().read().process_group)
416                } else {
417                    false
418                }
419            }
420            ProcessSelector::Process(ref key) => {
421                if let Some(tg) = key.upgrade() {
422                    tg.read().tasks.contains_key(&tid)
423                } else {
424                    false
425                }
426            }
427        }
428    }
429}
430
431#[derive(Clone, Debug, PartialEq, Eq)]
432pub struct ProcessExitInfo {
433    pub status: ExitStatus,
434    pub exit_signal: Option<Signal>,
435}
436
437#[derive(Clone, Debug, Default, PartialEq, Eq)]
438enum ThreadGroupRunState {
439    #[default]
440    Running,
441    Terminating(ExitStatus),
442}
443
444#[derive(Clone, Debug, PartialEq, Eq)]
445pub struct WaitResult {
446    pub pid: pid_t,
447    pub uid: uid_t,
448
449    pub exit_info: ProcessExitInfo,
450
451    /// Cumulative time stats for the process and its children.
452    pub time_stats: TaskTimeStats,
453}
454
455impl WaitResult {
456    // According to wait(2) man page, SignalInfo.signal needs to always be set to SIGCHLD
457    pub fn as_signal_info(&self) -> SignalInfo {
458        SignalInfo::with_detail(
459            SIGCHLD,
460            self.exit_info.status.signal_info_code(),
461            SignalDetail::SIGCHLD {
462                pid: self.pid,
463                uid: self.uid,
464                status: self.exit_info.status.signal_info_status(),
465            },
466        )
467    }
468}
469
470#[derive(Debug)]
471pub struct ZombieProcess {
472    pub thread_group_key: ThreadGroupKey,
473    pub pgid: pid_t,
474    pub uid: uid_t,
475
476    pub exit_info: ProcessExitInfo,
477
478    /// Cumulative time stats for the process and its children.
479    pub time_stats: TaskTimeStats,
480
481    /// Whether dropping this ZombieProcess should imply removing the pid from
482    /// the PidTable
483    pub is_canonical: bool,
484}
485
486impl PartialEq for ZombieProcess {
487    fn eq(&self, other: &Self) -> bool {
488        // We assume only one set of ZombieProcess data per process, so this should cover it.
489        self.thread_group_key == other.thread_group_key
490            && self.pgid == other.pgid
491            && self.uid == other.uid
492            && self.is_canonical == other.is_canonical
493    }
494}
495
496impl Eq for ZombieProcess {}
497
498impl PartialOrd for ZombieProcess {
499    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
500        Some(self.cmp(other))
501    }
502}
503
504impl Ord for ZombieProcess {
505    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
506        self.thread_group_key.cmp(&other.thread_group_key)
507    }
508}
509
510impl ZombieProcess {
511    pub fn new(
512        thread_group: ThreadGroupStateRef<'_>,
513        credentials: &Credentials,
514        exit_info: ProcessExitInfo,
515    ) -> OwnedRef<Self> {
516        let time_stats = thread_group.base.time_stats() + thread_group.children_time_stats;
517        OwnedRef::new(ZombieProcess {
518            thread_group_key: thread_group.base.into(),
519            pgid: thread_group.process_group.leader,
520            uid: credentials.uid,
521            exit_info,
522            time_stats,
523            is_canonical: true,
524        })
525    }
526
527    pub fn pid(&self) -> pid_t {
528        self.thread_group_key.pid()
529    }
530
531    pub fn to_wait_result(&self) -> WaitResult {
532        WaitResult {
533            pid: self.pid(),
534            uid: self.uid,
535            exit_info: self.exit_info.clone(),
536            time_stats: self.time_stats,
537        }
538    }
539
540    pub fn as_artificial(&self) -> Self {
541        ZombieProcess {
542            thread_group_key: self.thread_group_key.clone(),
543            pgid: self.pgid,
544            uid: self.uid,
545            exit_info: self.exit_info.clone(),
546            time_stats: self.time_stats,
547            is_canonical: false,
548        }
549    }
550
551    pub fn matches_selector(&self, selector: &ProcessSelector) -> bool {
552        match *selector {
553            ProcessSelector::Any => true,
554            ProcessSelector::Pid(pid) => self.pid() == pid,
555            ProcessSelector::Pgid(pgid) => self.pgid == pgid,
556            ProcessSelector::Process(ref key) => self.thread_group_key == *key,
557        }
558    }
559
560    pub fn matches_selector_and_waiting_option(
561        &self,
562        selector: &ProcessSelector,
563        options: &WaitingOptions,
564    ) -> bool {
565        if !self.matches_selector(selector) {
566            return false;
567        }
568
569        if options.wait_for_all {
570            true
571        } else {
572            // A "clone" zombie is one which has delivered no signal, or a
573            // signal other than SIGCHLD to its parent upon termination.
574            options.wait_for_clone == (self.exit_info.exit_signal != Some(SIGCHLD))
575        }
576    }
577}
578
579impl Releasable for ZombieProcess {
580    type Context<'a> = &'a mut PidTable;
581
582    fn release<'a>(self, pids: &'a mut PidTable) {
583        if self.is_canonical {
584            pids.remove_zombie(self.pid());
585        }
586    }
587}
588
589impl ThreadGroup {
590    pub fn new<L>(
591        locked: &mut Locked<L>,
592        kernel: Arc<Kernel>,
593        process: zx::Process,
594        parent: Option<ThreadGroupWriteGuard<'_>>,
595        leader: pid_t,
596        exit_signal: Option<Signal>,
597        process_group: Arc<ProcessGroup>,
598        signal_actions: Arc<SignalActions>,
599    ) -> Arc<ThreadGroup>
600    where
601        L: LockBefore<ProcessGroupState>,
602    {
603        Arc::new_cyclic(|weak_self| {
604            let mut thread_group = ThreadGroup {
605                weak_self: weak_self.clone(),
606                kernel,
607                process,
608                leader,
609                signal_actions,
610                timers: Default::default(),
611                drop_notifier: Default::default(),
612                // A child process created via fork(2) inherits its parent's
613                // resource limits.  Resource limits are preserved across execve(2).
614                limits: OrderedMutex::new(
615                    parent
616                        .as_ref()
617                        .map(|p| p.base.limits.lock(locked.cast_locked()).clone())
618                        .unwrap_or(Default::default()),
619                ),
620                next_seccomp_filter_id: Default::default(),
621                ptracees: Default::default(),
622                stop_state: AtomicStopState::new(StopState::Awake),
623                pending_signals: Default::default(),
624                has_pending_signals: Default::default(),
625                start_time: zx::MonotonicInstant::get(),
626                mutable_state: RwLock::new(ThreadGroupMutableState {
627                    parent: parent
628                        .as_ref()
629                        .map(|p| ThreadGroupParent::new(p.base.weak_self.clone())),
630                    exit_signal,
631                    tasks: BTreeMap::new(),
632                    children: BTreeMap::new(),
633                    zombie_children: vec![],
634                    zombie_ptracees: ZombiePtracees::new(),
635                    deferred_zombie_ptracers: vec![],
636                    lifecycle_waiters: TypedWaitQueue::<ThreadGroupLifecycleWaitValue>::default(),
637                    is_child_subreaper: false,
638                    process_group: Arc::clone(&process_group),
639                    did_exec: false,
640                    last_signal: None,
641                    run_state: Default::default(),
642                    children_time_stats: Default::default(),
643                    personality: parent
644                        .as_ref()
645                        .map(|p| p.personality)
646                        .unwrap_or(Default::default()),
647                    allowed_ptracers: PtraceAllowedPtracers::None,
648                    exit_notifier: None,
649                    notifier: None,
650                }),
651                log_syscalls_as_info: AtomicBool::new(false),
652            };
653
654            if let Some(mut parent) = parent {
655                thread_group.next_seccomp_filter_id.reset(parent.base.next_seccomp_filter_id.get());
656                parent.children.insert(leader, weak_self.clone());
657                process_group.insert(locked, &thread_group);
658            };
659            thread_group
660        })
661    }
662
663    state_accessor!(ThreadGroup, mutable_state);
664
665    pub fn load_stopped(&self) -> StopState {
666        self.stop_state.load(Ordering::Relaxed)
667    }
668
669    // Causes the thread group to exit.  If this is being called from a task
670    // that is part of the current thread group, the caller should pass
671    // `current_task`.  If ownership issues prevent passing `current_task`, then
672    // callers should use CurrentTask::thread_group_exit instead.
673    pub fn exit(
674        &self,
675        locked: &mut Locked<Unlocked>,
676        exit_status: ExitStatus,
677        mut current_task: Option<&mut CurrentTask>,
678    ) {
679        if let Some(ref mut current_task) = current_task {
680            current_task.ptrace_event(
681                locked,
682                PtraceOptions::TRACEEXIT,
683                exit_status.signal_info_status() as u64,
684            );
685        }
686        let mut pids = self.kernel.pids.write();
687        let mut state = self.write();
688        if state.is_terminating() {
689            // The thread group is already terminating and all threads in the thread group have
690            // already been interrupted.
691            return;
692        }
693
694        state.run_state = ThreadGroupRunState::Terminating(exit_status.clone());
695
696        // Drop ptrace zombies
697        state.zombie_ptracees.release(&mut pids);
698
699        // Interrupt each task. Unlock the group because send_signal will lock the group in order
700        // to call set_stopped.
701        // SAFETY: tasks is kept on the stack. The static is required to ensure the lock on
702        // ThreadGroup can be dropped.
703        let tasks = state.tasks().map(TempRef::into_static).collect::<Vec<_>>();
704        drop(state);
705
706        // Detach from any ptraced tasks, killing the ones that set PTRACE_O_EXITKILL.
707        let tracees = self.ptracees.lock().keys().cloned().collect::<Vec<_>>();
708        for tracee in tracees {
709            if let Some(task_ref) = pids.get_task(tracee).clone().upgrade() {
710                let mut should_send_sigkill = false;
711                if let Some(ptrace) = &task_ref.read().ptrace {
712                    should_send_sigkill = ptrace.has_option(PtraceOptions::EXITKILL);
713                }
714                if should_send_sigkill {
715                    send_standard_signal(locked, task_ref.as_ref(), SignalInfo::kernel(SIGKILL));
716                    continue;
717                }
718
719                let _ =
720                    ptrace_detach(locked, &mut pids, self, task_ref.as_ref(), &UserAddress::NULL);
721            }
722        }
723
724        for task in tasks {
725            task.write().set_exit_status(exit_status.clone());
726            send_standard_signal(locked, &task, SignalInfo::kernel(SIGKILL));
727        }
728    }
729
730    pub fn add(&self, task: &TempRef<'_, Task>) -> Result<(), Errno> {
731        let mut state = self.write();
732        if state.is_terminating() {
733            if state.tasks_count() == 0 {
734                log_warn!(
735                    "Task {} with leader {} terminating while adding its first task, \
736                not sending creation notification",
737                    task.tid,
738                    self.leader
739                );
740            }
741            return error!(EINVAL);
742        }
743        state.tasks.insert(task.tid, task.into());
744
745        Ok(())
746    }
747
748    /// Remove the task from the children of this ThreadGroup.
749    ///
750    /// It is important that the task is taken as an `OwnedRef`. It ensures the tasks of the
751    /// ThreadGroup are always valid as they are still valid when removed.
752    pub fn remove<L>(&self, locked: &mut Locked<L>, pids: &mut PidTable, task: &OwnedRef<Task>)
753    where
754        L: LockBefore<ProcessGroupState>,
755    {
756        task.set_ptrace_zombie(pids);
757        pids.remove_task(task.tid);
758
759        let mut state = self.write();
760
761        let persistent_info: TaskPersistentInfo =
762            if let Some(container) = state.tasks.remove(&task.tid) {
763                container.into()
764            } else {
765                // The task has never been added. The only expected case is that this thread was
766                // already terminating.
767                debug_assert!(state.is_terminating());
768                return;
769            };
770
771        if state.tasks.is_empty() {
772            let exit_status =
773                if let ThreadGroupRunState::Terminating(exit_status) = &state.run_state {
774                    exit_status.clone()
775                } else {
776                    let exit_status = task.exit_status().unwrap_or_else(|| {
777                        log_error!("Exiting without an exit code.");
778                        ExitStatus::Exit(u8::MAX)
779                    });
780                    state.run_state = ThreadGroupRunState::Terminating(exit_status.clone());
781                    exit_status
782                };
783
784            // Replace PID table entry with a zombie.
785            let exit_info =
786                ProcessExitInfo { status: exit_status, exit_signal: state.exit_signal.clone() };
787            let zombie =
788                ZombieProcess::new(state.as_ref(), &persistent_info.real_creds(), exit_info);
789            pids.kill_process(self.leader, OwnedRef::downgrade(&zombie));
790
791            state.leave_process_group(locked, pids);
792
793            // I have no idea if dropping the lock here is correct, and I don't want to think about
794            // it. If problems do turn up with another thread observing an intermediate state of
795            // this exit operation, the solution is to unify locks. It should be sensible and
796            // possible for there to be a single lock that protects all (or nearly all) of the
797            // data accessed by both exit and wait. In gvisor and linux this is the lock on the
798            // equivalent of the PidTable. This is made more difficult by rust locks being
799            // containers that only lock the data they contain, but see
800            // https://docs.google.com/document/d/1YHrhBqNhU1WcrsYgGAu3JwwlVmFXPlwWHTJLAbwRebY/edit
801            // for an idea.
802            std::mem::drop(state);
803
804            // Remove the process from the cgroup2 pid table after TG lock is dropped.
805            // This function will hold the CgroupState lock which should be before the TG lock. See
806            // more in lock_cgroup2_pid_table comments.
807            self.kernel.cgroups.lock_cgroup2_pid_table().remove_process(self.into());
808
809            // We will need the immediate parent and the reaper. Once we have them, we can make
810            // sure to take the locks in the right order: parent before child.
811            let parent = self.read().parent.clone();
812            let reaper = self.find_reaper();
813
814            {
815                // Reparent the children.
816                if let Some(reaper) = reaper {
817                    let reaper = reaper.upgrade();
818                    {
819                        let mut reaper_state = reaper.write();
820                        let mut state = self.write();
821                        for (_pid, weak_child) in std::mem::take(&mut state.children) {
822                            if let Some(child) = weak_child.upgrade() {
823                                let mut child_state = child.write();
824
825                                child_state.exit_signal = Some(SIGCHLD);
826                                child_state.parent =
827                                    Some(ThreadGroupParent::new(Arc::downgrade(&reaper)));
828                                reaper_state.children.insert(child.leader, weak_child.clone());
829                            }
830                        }
831                        reaper_state.zombie_children.append(&mut state.zombie_children);
832                    }
833                    ZombiePtracees::reparent(self, &reaper);
834                } else {
835                    // If we don't have a reaper then just drop the zombies.
836                    let mut state = self.write();
837                    for zombie in state.zombie_children.drain(..) {
838                        zombie.release(pids);
839                    }
840                    state.zombie_ptracees.release(pids);
841                }
842            }
843
844            // Clear the `parent` reference now that children have been re-`parent`ed.
845            self.write().parent = None;
846
847            #[cfg(any(test, debug_assertions))]
848            {
849                let state = self.read();
850                assert!(state.zombie_children.is_empty());
851                assert!(state.zombie_ptracees.is_empty());
852            }
853
854            if let Some(ref parent) = parent {
855                let parent = parent.upgrade();
856                let mut tracer_pid = None;
857                if let Some(ptrace) = &task.read().ptrace {
858                    tracer_pid = Some(ptrace.get_pid());
859                }
860
861                let maybe_zombie = 'compute_zombie: {
862                    if let Some(tracer_pid) = tracer_pid {
863                        if let Some(ref tracer) = pids.get_task(tracer_pid).upgrade() {
864                            break 'compute_zombie tracer
865                                .thread_group()
866                                .maybe_notify_tracer(task, pids, &parent, zombie);
867                        }
868                    }
869                    Some(zombie)
870                };
871                if let Some(zombie) = maybe_zombie {
872                    parent.do_zombie_notifications(zombie);
873                }
874            } else {
875                zombie.release(pids);
876            }
877
878            // TODO: Set the error_code on the Zircon process object. Currently missing a way
879            // to do this in Zircon. Might be easier in the new execution model.
880
881            // Once the last zircon thread stops, the zircon process will also stop executing.
882
883            if let Some(parent) = parent {
884                let parent = parent.upgrade();
885                parent.check_orphans(locked, pids);
886            }
887        }
888    }
889
890    pub fn do_zombie_notifications(&self, zombie: OwnedRef<ZombieProcess>) {
891        let mut state = self.write();
892
893        state.children.remove(&zombie.pid());
894        state
895            .deferred_zombie_ptracers
896            .retain(|dzp| dzp.tracee_thread_group_key != zombie.thread_group_key);
897
898        let exit_signal = zombie.exit_info.exit_signal;
899        let mut signal_info = zombie.to_wait_result().as_signal_info();
900
901        state.zombie_children.push(zombie);
902        state.lifecycle_waiters.notify_value(ThreadGroupLifecycleWaitValue::ChildStatus);
903
904        // Send signals
905        if let Some(exit_signal) = exit_signal {
906            signal_info.signal = exit_signal;
907            state.send_signal(signal_info);
908        }
909    }
910
911    /// Notifies the tracer if appropriate.  Returns Some(zombie) if caller
912    /// needs to notify the parent, None otherwise.  The caller should probably
913    /// invoke parent.do_zombie_notifications(zombie) on the result.
914    fn maybe_notify_tracer(
915        &self,
916        tracee: &Task,
917        mut pids: &mut PidTable,
918        parent: &ThreadGroup,
919        zombie: OwnedRef<ZombieProcess>,
920    ) -> Option<OwnedRef<ZombieProcess>> {
921        if self.read().zombie_ptracees.has_tracee(tracee.tid) {
922            if self == parent {
923                // The tracer is the parent and has not consumed the
924                // notification.  Don't bother with the ptracee stuff, and just
925                // notify the parent.
926                self.write().zombie_ptracees.remove(pids, tracee.tid);
927                return Some(zombie);
928            } else {
929                // The tracer is not the parent and the tracer has not consumed
930                // the notification.
931                {
932                    // Tell the parent to expect a notification later.
933                    let mut parent_state = parent.write();
934                    parent_state
935                        .deferred_zombie_ptracers
936                        .push(DeferredZombiePTracer::new(self, tracee));
937                    parent_state.children.remove(&tracee.get_pid());
938                }
939                // Tell the tracer that there is a notification pending.
940                let mut state = self.write();
941                state.zombie_ptracees.set_parent_of(tracee.tid, Some(zombie), parent);
942                tracee.write().notify_ptracers();
943                return None;
944            }
945        } else if self == parent {
946            // The tracer is the parent and has already consumed the parent
947            // notification.  No further action required.
948            parent.write().children.remove(&tracee.tid);
949            zombie.release(&mut pids);
950            return None;
951        }
952        // The tracer is not the parent and has already consumed the parent
953        // notification.  Notify the parent.
954        Some(zombie)
955    }
956
957    /// Find the task which will adopt our children after we die.
958    fn find_reaper(&self) -> Option<ThreadGroupParent> {
959        let mut weak_parent = self.read().parent.clone()?;
960        loop {
961            weak_parent = {
962                let parent = weak_parent.upgrade();
963                let parent_state = parent.read();
964                if parent_state.is_child_subreaper {
965                    break;
966                }
967                match parent_state.parent {
968                    Some(ref next_parent) => next_parent.clone(),
969                    None => break,
970                }
971            };
972        }
973        Some(weak_parent)
974    }
975
976    pub fn setsid<L>(&self, locked: &mut Locked<L>) -> Result<(), Errno>
977    where
978        L: LockBefore<ProcessGroupState>,
979    {
980        let pids = self.kernel.pids.read();
981        if pids.get_process_group(self.leader).is_some() {
982            return error!(EPERM);
983        }
984        let process_group = ProcessGroup::new(self.leader, None);
985        pids.add_process_group(process_group.clone());
986        self.write().set_process_group(locked, process_group, &pids);
987        self.check_orphans(locked, &pids);
988
989        Ok(())
990    }
991
992    pub fn setpgid<L>(
993        &self,
994        locked: &mut Locked<L>,
995        current_task: &CurrentTask,
996        target: &Task,
997        pgid: pid_t,
998    ) -> Result<(), Errno>
999    where
1000        L: LockBefore<ProcessGroupState>,
1001    {
1002        let pids = self.kernel.pids.read();
1003
1004        {
1005            let current_process_group = Arc::clone(&self.read().process_group);
1006
1007            // The target process must be either the current process of a child of the current process
1008            let mut target_thread_group = target.thread_group().write();
1009            let is_target_current_process_child =
1010                target_thread_group.parent.as_ref().map(|tg| tg.upgrade().leader)
1011                    == Some(self.leader);
1012            if target_thread_group.leader() != self.leader && !is_target_current_process_child {
1013                return error!(ESRCH);
1014            }
1015
1016            // If the target process is a child of the current task, it must not have executed one of the exec
1017            // function.
1018            if is_target_current_process_child && target_thread_group.did_exec {
1019                return error!(EACCES);
1020            }
1021
1022            let new_process_group;
1023            {
1024                let target_process_group = &target_thread_group.process_group;
1025
1026                // The target process must not be a session leader and must be in the same session as the current process.
1027                if target_thread_group.leader() == target_process_group.session.leader
1028                    || current_process_group.session != target_process_group.session
1029                {
1030                    return error!(EPERM);
1031                }
1032
1033                let target_pgid = if pgid == 0 { target_thread_group.leader() } else { pgid };
1034                if target_pgid < 0 {
1035                    return error!(EINVAL);
1036                }
1037
1038                if target_pgid == target_process_group.leader {
1039                    return Ok(());
1040                }
1041
1042                // If pgid is not equal to the target process id, the associated process group must exist
1043                // and be in the same session as the target process.
1044                if target_pgid != target_thread_group.leader() {
1045                    new_process_group =
1046                        pids.get_process_group(target_pgid).ok_or_else(|| errno!(EPERM))?;
1047                    if new_process_group.session != target_process_group.session {
1048                        return error!(EPERM);
1049                    }
1050                    security::check_setpgid_access(current_task, target)?;
1051                } else {
1052                    security::check_setpgid_access(current_task, target)?;
1053                    // Create a new process group
1054                    new_process_group =
1055                        ProcessGroup::new(target_pgid, Some(target_process_group.session.clone()));
1056                    pids.add_process_group(new_process_group.clone());
1057                }
1058            }
1059
1060            target_thread_group.set_process_group(locked, new_process_group, &pids);
1061        }
1062
1063        target.thread_group().check_orphans(locked, &pids);
1064
1065        Ok(())
1066    }
1067
1068    fn itimer_real(&self) -> IntervalTimerHandle {
1069        self.timers.itimer_real()
1070    }
1071
1072    pub fn set_itimer(
1073        &self,
1074        current_task: &CurrentTask,
1075        which: u32,
1076        value: itimerval,
1077    ) -> Result<itimerval, Errno> {
1078        if which == ITIMER_PROF || which == ITIMER_VIRTUAL {
1079            // We don't support setting these timers.
1080            // The gvisor test suite clears ITIMER_PROF as part of its test setup logic, so we support
1081            // clearing these values.
1082            if value.it_value.tv_sec == 0 && value.it_value.tv_usec == 0 {
1083                return Ok(itimerval::default());
1084            }
1085            track_stub!(TODO("https://fxbug.dev/322874521"), "Unsupported itimer type", which);
1086            return error!(ENOTSUP);
1087        }
1088
1089        if which != ITIMER_REAL {
1090            return error!(EINVAL);
1091        }
1092        let itimer_real = self.itimer_real();
1093        let prev_remaining = itimer_real.time_remaining();
1094        if value.it_value.tv_sec != 0 || value.it_value.tv_usec != 0 {
1095            itimer_real.arm(current_task, itimerspec_from_itimerval(value), false)?;
1096        } else {
1097            itimer_real.disarm(current_task)?;
1098        }
1099        Ok(itimerval {
1100            it_value: timeval_from_duration(prev_remaining.remainder),
1101            it_interval: timeval_from_duration(prev_remaining.interval),
1102        })
1103    }
1104
1105    pub fn get_itimer(&self, which: u32) -> Result<itimerval, Errno> {
1106        if which == ITIMER_PROF || which == ITIMER_VIRTUAL {
1107            // We don't support setting these timers, so we can accurately report that these are not set.
1108            return Ok(itimerval::default());
1109        }
1110        if which != ITIMER_REAL {
1111            return error!(EINVAL);
1112        }
1113        let remaining = self.itimer_real().time_remaining();
1114        Ok(itimerval {
1115            it_value: timeval_from_duration(remaining.remainder),
1116            it_interval: timeval_from_duration(remaining.interval),
1117        })
1118    }
1119
1120    /// Check whether the stop state is compatible with `new_stopped`. If it is return it,
1121    /// otherwise, return None.
1122    fn check_stopped_state(
1123        &self,
1124        new_stopped: StopState,
1125        finalize_only: bool,
1126    ) -> Option<StopState> {
1127        let stopped = self.load_stopped();
1128        if finalize_only && !stopped.is_stopping_or_stopped() {
1129            return Some(stopped);
1130        }
1131
1132        if stopped.is_illegal_transition(new_stopped) {
1133            return Some(stopped);
1134        }
1135
1136        return None;
1137    }
1138
1139    /// Set the stop status of the process.  If you pass |siginfo| of |None|,
1140    /// does not update the signal.  If |finalize_only| is set, will check that
1141    /// the set will be a finalize (Stopping -> Stopped or Stopped -> Stopped)
1142    /// before executing it.
1143    ///
1144    /// Returns the latest stop state after any changes.
1145    pub fn set_stopped(
1146        &self,
1147        new_stopped: StopState,
1148        siginfo: Option<SignalInfo>,
1149        finalize_only: bool,
1150    ) -> StopState {
1151        // Perform an early return check to see if we can avoid taking the lock.
1152        if let Some(stopped) = self.check_stopped_state(new_stopped, finalize_only) {
1153            return stopped;
1154        }
1155
1156        self.write().set_stopped(new_stopped, siginfo, finalize_only)
1157    }
1158
1159    /// Ensures |session| is the controlling session inside of |terminal_controller|, and returns a
1160    /// reference to the |TerminalController|.
1161    fn check_terminal_controller(
1162        session: &Arc<Session>,
1163        terminal_controller: &Option<TerminalController>,
1164    ) -> Result<(), Errno> {
1165        if let Some(terminal_controller) = terminal_controller {
1166            if let Some(terminal_session) = terminal_controller.session.upgrade() {
1167                if Arc::ptr_eq(session, &terminal_session) {
1168                    return Ok(());
1169                }
1170            }
1171        }
1172        error!(ENOTTY)
1173    }
1174
1175    pub fn get_foreground_process_group(&self, terminal: &Terminal) -> Result<pid_t, Errno> {
1176        let state = self.read();
1177        let process_group = &state.process_group;
1178        let terminal_state = terminal.read();
1179
1180        // "When fd does not refer to the controlling terminal of the calling
1181        // process, -1 is returned" - tcgetpgrp(3)
1182        Self::check_terminal_controller(&process_group.session, &terminal_state.controller)?;
1183        let pid = process_group.session.read().get_foreground_process_group_leader();
1184        Ok(pid)
1185    }
1186
1187    pub fn set_foreground_process_group<L>(
1188        &self,
1189        locked: &mut Locked<L>,
1190        current_task: &CurrentTask,
1191        terminal: &Terminal,
1192        pgid: pid_t,
1193    ) -> Result<(), Errno>
1194    where
1195        L: LockBefore<ProcessGroupState>,
1196    {
1197        let process_group;
1198        let send_ttou;
1199        {
1200            // Keep locks to ensure atomicity.
1201            let pids = self.kernel.pids.read();
1202            let state = self.read();
1203            process_group = Arc::clone(&state.process_group);
1204            let terminal_state = terminal.read();
1205            Self::check_terminal_controller(&process_group.session, &terminal_state.controller)?;
1206
1207            // pgid must be positive.
1208            if pgid < 0 {
1209                return error!(EINVAL);
1210            }
1211
1212            let new_process_group = pids.get_process_group(pgid).ok_or_else(|| errno!(ESRCH))?;
1213            if new_process_group.session != process_group.session {
1214                return error!(EPERM);
1215            }
1216
1217            let mut session_state = process_group.session.write();
1218            // If the calling process is a member of a background group and not ignoring SIGTTOU, a
1219            // SIGTTOU signal is sent to all members of this background process group.
1220            send_ttou = process_group.leader != session_state.get_foreground_process_group_leader()
1221                && !current_task.read().signal_mask().has_signal(SIGTTOU)
1222                && self.signal_actions.get(SIGTTOU).sa_handler != SIG_IGN;
1223
1224            if !send_ttou {
1225                session_state.set_foreground_process_group(&new_process_group);
1226            }
1227        }
1228
1229        // Locks must not be held when sending signals.
1230        if send_ttou {
1231            process_group.send_signals(locked, &[SIGTTOU]);
1232            return error!(EINTR);
1233        }
1234
1235        Ok(())
1236    }
1237
1238    pub fn set_controlling_terminal(
1239        &self,
1240        current_task: &CurrentTask,
1241        terminal: &Terminal,
1242        is_main: bool,
1243        steal: bool,
1244        is_readable: bool,
1245    ) -> Result<(), Errno> {
1246        // Keep locks to ensure atomicity.
1247        let state = self.read();
1248        let process_group = &state.process_group;
1249        let mut terminal_state = terminal.write();
1250        let mut session_writer = process_group.session.write();
1251
1252        // "The calling process must be a session leader and not have a
1253        // controlling terminal already." - tty_ioctl(4)
1254        if process_group.session.leader != self.leader
1255            || session_writer.controlling_terminal.is_some()
1256        {
1257            return error!(EINVAL);
1258        }
1259
1260        let mut has_admin_capability_determined = false;
1261
1262        // "If this terminal is already the controlling terminal of a different
1263        // session group, then the ioctl fails with EPERM, unless the caller
1264        // has the CAP_SYS_ADMIN capability and arg equals 1, in which case the
1265        // terminal is stolen, and all processes that had it as controlling
1266        // terminal lose it." - tty_ioctl(4)
1267        if let Some(other_session) =
1268            terminal_state.controller.as_ref().and_then(|cs| cs.session.upgrade())
1269        {
1270            if other_session != process_group.session {
1271                if !steal {
1272                    return error!(EPERM);
1273                }
1274                security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1275                has_admin_capability_determined = true;
1276
1277                // Steal the TTY away. Unlike TIOCNOTTY, don't send signals.
1278                other_session.write().controlling_terminal = None;
1279            }
1280        }
1281
1282        if !is_readable && !has_admin_capability_determined {
1283            security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1284        }
1285
1286        session_writer.controlling_terminal = Some(ControllingTerminal::new(terminal, is_main));
1287        terminal_state.controller = TerminalController::new(&process_group.session);
1288        Ok(())
1289    }
1290
1291    pub fn release_controlling_terminal<L>(
1292        &self,
1293        locked: &mut Locked<L>,
1294        _current_task: &CurrentTask,
1295        terminal: &Terminal,
1296        is_main: bool,
1297    ) -> Result<(), Errno>
1298    where
1299        L: LockBefore<ProcessGroupState>,
1300    {
1301        let process_group;
1302        {
1303            // Keep locks to ensure atomicity.
1304            let state = self.read();
1305            process_group = Arc::clone(&state.process_group);
1306            let mut terminal_state = terminal.write();
1307            let mut session_writer = process_group.session.write();
1308
1309            // tty must be the controlling terminal.
1310            Self::check_terminal_controller(&process_group.session, &terminal_state.controller)?;
1311            if !session_writer
1312                .controlling_terminal
1313                .as_ref()
1314                .map_or(false, |ct| ct.matches(terminal, is_main))
1315            {
1316                return error!(ENOTTY);
1317            }
1318
1319            // "If the process was session leader, then send SIGHUP and SIGCONT to the foreground
1320            // process group and all processes in the current session lose their controlling terminal."
1321            // - tty_ioctl(4)
1322
1323            // Remove tty as the controlling tty for each process in the session, then
1324            // send them SIGHUP and SIGCONT.
1325
1326            session_writer.controlling_terminal = None;
1327            terminal_state.controller = None;
1328        }
1329
1330        if process_group.session.leader == self.leader {
1331            process_group.send_signals(locked, &[SIGHUP, SIGCONT]);
1332        }
1333
1334        Ok(())
1335    }
1336
1337    fn check_orphans<L>(&self, locked: &mut Locked<L>, pids: &PidTable)
1338    where
1339        L: LockBefore<ProcessGroupState>,
1340    {
1341        let mut thread_groups = self.read().children().collect::<Vec<_>>();
1342        let this = self.weak_self.upgrade().unwrap();
1343        thread_groups.push(this);
1344        let process_groups =
1345            thread_groups.iter().map(|tg| Arc::clone(&tg.read().process_group)).unique();
1346        for pg in process_groups {
1347            pg.check_orphaned(locked, pids);
1348        }
1349    }
1350
1351    pub fn get_rlimit<L>(&self, locked: &mut Locked<L>, resource: Resource) -> u64
1352    where
1353        L: LockBefore<ThreadGroupLimits>,
1354    {
1355        self.limits.lock(locked).get(resource).rlim_cur
1356    }
1357
1358    /// Adjusts the rlimits of the ThreadGroup to which `target_task` belongs to.
1359    pub fn adjust_rlimits<L>(
1360        locked: &mut Locked<L>,
1361        current_task: &CurrentTask,
1362        target_task: &Task,
1363        resource: Resource,
1364        maybe_new_limit: Option<rlimit>,
1365    ) -> Result<rlimit, Errno>
1366    where
1367        L: LockBefore<ThreadGroupLimits>,
1368    {
1369        let thread_group = target_task.thread_group();
1370        let can_increase_rlimit = security::is_task_capable_noaudit(current_task, CAP_SYS_RESOURCE);
1371        let mut limit_state = thread_group.limits.lock(locked);
1372        let old_limit = limit_state.get(resource);
1373        if let Some(new_limit) = maybe_new_limit {
1374            if new_limit.rlim_max > old_limit.rlim_max && !can_increase_rlimit {
1375                return error!(EPERM);
1376            }
1377            security::task_setrlimit(current_task, &target_task, old_limit, new_limit)?;
1378            limit_state.set(resource, new_limit)
1379        }
1380        Ok(old_limit)
1381    }
1382
1383    pub fn time_stats(&self) -> TaskTimeStats {
1384        let process: &zx::Process = if self.process.as_handle_ref().is_invalid() {
1385            // `process` must be valid for all tasks, except `kthreads`. In that case get the
1386            // stats from starnix process.
1387            assert_eq!(
1388                self as *const ThreadGroup,
1389                Arc::as_ptr(&self.kernel.kthreads.system_thread_group())
1390            );
1391            &self.kernel.kthreads.starnix_process
1392        } else {
1393            &self.process
1394        };
1395
1396        let info =
1397            zx::Task::get_runtime_info(process).expect("Failed to get starnix process stats");
1398        TaskTimeStats {
1399            user_time: zx::MonotonicDuration::from_nanos(info.cpu_time),
1400            // TODO(https://fxbug.dev/42078242): How can we calculate system time?
1401            system_time: zx::MonotonicDuration::default(),
1402        }
1403    }
1404
1405    /// For each task traced by this thread_group that matches the given
1406    /// selector, acquire its TaskMutableState and ptracees lock and execute the
1407    /// given function.
1408    pub fn get_ptracees_and(
1409        &self,
1410        selector: &ProcessSelector,
1411        pids: &PidTable,
1412        f: &mut dyn FnMut(&Task, &TaskMutableState),
1413    ) {
1414        for tracee in self
1415            .ptracees
1416            .lock()
1417            .keys()
1418            .filter(|tracee_tid| selector.match_tid(**tracee_tid, &pids))
1419            .map(|tracee_tid| pids.get_task(*tracee_tid))
1420        {
1421            if let Some(task_ref) = tracee.clone().upgrade() {
1422                let task_state = task_ref.write();
1423                if task_state.ptrace.is_some() {
1424                    f(&task_ref, &task_state);
1425                }
1426            }
1427        }
1428    }
1429
1430    /// Returns a tracee whose state has changed, so that waitpid can report on
1431    /// it. If this returns a value, and the pid is being traced, the tracer
1432    /// thread is deemed to have seen the tracee ptrace-stop for the purposes of
1433    /// PTRACE_LISTEN.
1434    pub fn get_waitable_ptracee(
1435        &self,
1436        selector: &ProcessSelector,
1437        options: &WaitingOptions,
1438        pids: &mut PidTable,
1439    ) -> Option<WaitResult> {
1440        // This checks to see if the target is a zombie ptracee.
1441        let waitable_entry = self.write().zombie_ptracees.get_waitable_entry(selector, options);
1442        match waitable_entry {
1443            None => (),
1444            Some((zombie, None)) => return Some(zombie.to_wait_result()),
1445            Some((zombie, Some((tg, z)))) => {
1446                if let Some(tg) = tg.upgrade() {
1447                    if Arc::as_ptr(&tg) != self as *const Self {
1448                        tg.do_zombie_notifications(z);
1449                    } else {
1450                        {
1451                            let mut state = tg.write();
1452                            state.children.remove(&z.pid());
1453                            state
1454                                .deferred_zombie_ptracers
1455                                .retain(|dzp| dzp.tracee_thread_group_key != z.thread_group_key);
1456                        }
1457
1458                        z.release(pids);
1459                    };
1460                }
1461                return Some(zombie.to_wait_result());
1462            }
1463        }
1464
1465        let mut tasks = vec![];
1466
1467        // This checks to see if the target is a living ptracee
1468        self.get_ptracees_and(selector, pids, &mut |task: &Task, _| {
1469            tasks.push(task.weak_self.clone());
1470        });
1471        for task in tasks {
1472            let Some(task_ref) = task.upgrade() else {
1473                continue;
1474            };
1475
1476            let process_state = &mut task_ref.thread_group().write();
1477            let mut task_state = task_ref.write();
1478            if task_state
1479                .ptrace
1480                .as_ref()
1481                .is_some_and(|ptrace| ptrace.is_waitable(task_ref.load_stopped(), options))
1482            {
1483                // We've identified a potential target.  Need to return either
1484                // the process's information (if we are in group-stop) or the
1485                // thread's information (if we are in a different stop).
1486
1487                // The shared information:
1488                let mut pid: i32 = 0;
1489                let info = process_state.tasks.values().next().unwrap().info().clone();
1490                let uid = info.real_creds().uid;
1491                let mut exit_status = None;
1492                let exit_signal = process_state.exit_signal.clone();
1493                let time_stats =
1494                    process_state.base.time_stats() + process_state.children_time_stats;
1495                let task_stopped = task_ref.load_stopped();
1496
1497                #[derive(PartialEq)]
1498                enum ExitType {
1499                    None,
1500                    Cont,
1501                    Stop,
1502                    Kill,
1503                }
1504                if process_state.is_waitable() {
1505                    let ptrace = &mut task_state.ptrace;
1506                    // The information for processes, if we were in group stop.
1507                    let process_stopped = process_state.base.load_stopped();
1508                    let mut fn_type = ExitType::None;
1509                    if process_stopped == StopState::Awake && options.wait_for_continued {
1510                        fn_type = ExitType::Cont;
1511                    }
1512                    let mut event = ptrace
1513                        .as_ref()
1514                        .map_or(PtraceEvent::None, |ptrace| {
1515                            ptrace.event_data.as_ref().map_or(PtraceEvent::None, |data| data.event)
1516                        })
1517                        .clone();
1518                    // Tasks that are ptrace'd always get stop notifications.
1519                    if process_stopped == StopState::GroupStopped
1520                        && (options.wait_for_stopped || ptrace.is_some())
1521                    {
1522                        fn_type = ExitType::Stop;
1523                    }
1524                    if fn_type != ExitType::None {
1525                        let siginfo = if options.keep_waitable_state {
1526                            process_state.last_signal.clone()
1527                        } else {
1528                            process_state.last_signal.take()
1529                        };
1530                        if let Some(mut siginfo) = siginfo {
1531                            if task_ref.thread_group().load_stopped() == StopState::GroupStopped
1532                                && ptrace.as_ref().is_some_and(|ptrace| ptrace.is_seized())
1533                            {
1534                                if event == PtraceEvent::None {
1535                                    event = PtraceEvent::Stop;
1536                                }
1537                                siginfo.code |= (PtraceEvent::Stop as i32) << 8;
1538                            }
1539                            if siginfo.signal == SIGKILL {
1540                                fn_type = ExitType::Kill;
1541                            }
1542                            exit_status = match fn_type {
1543                                ExitType::Stop => Some(ExitStatus::Stop(siginfo, event)),
1544                                ExitType::Cont => Some(ExitStatus::Continue(siginfo, event)),
1545                                ExitType::Kill => Some(ExitStatus::Kill(siginfo)),
1546                                _ => None,
1547                            };
1548                        }
1549                        // Clear the wait status of the ptrace, because we're
1550                        // using the tg status instead.
1551                        ptrace
1552                            .as_mut()
1553                            .map(|ptrace| ptrace.get_last_signal(options.keep_waitable_state));
1554                    }
1555                    pid = process_state.base.leader;
1556                }
1557                if exit_status == None {
1558                    if let Some(ptrace) = task_state.ptrace.as_mut() {
1559                        // The information for the task, if we were in a non-group stop.
1560                        let mut fn_type = ExitType::None;
1561                        let event = ptrace
1562                            .event_data
1563                            .as_ref()
1564                            .map_or(PtraceEvent::None, |event| event.event);
1565                        if task_stopped == StopState::Awake {
1566                            fn_type = ExitType::Cont;
1567                        }
1568                        if task_stopped.is_stopping_or_stopped()
1569                            || ptrace.stop_status == PtraceStatus::Listening
1570                        {
1571                            fn_type = ExitType::Stop;
1572                        }
1573                        if fn_type != ExitType::None {
1574                            if let Some(siginfo) =
1575                                ptrace.get_last_signal(options.keep_waitable_state)
1576                            {
1577                                if siginfo.signal == SIGKILL {
1578                                    fn_type = ExitType::Kill;
1579                                }
1580                                exit_status = match fn_type {
1581                                    ExitType::Stop => Some(ExitStatus::Stop(siginfo, event)),
1582                                    ExitType::Cont => Some(ExitStatus::Continue(siginfo, event)),
1583                                    ExitType::Kill => Some(ExitStatus::Kill(siginfo)),
1584                                    _ => None,
1585                                };
1586                            }
1587                        }
1588                        pid = task_ref.get_tid();
1589                    }
1590                }
1591                if let Some(exit_status) = exit_status {
1592                    return Some(WaitResult {
1593                        pid,
1594                        uid,
1595                        exit_info: ProcessExitInfo { status: exit_status, exit_signal },
1596                        time_stats,
1597                    });
1598                }
1599            }
1600        }
1601        None
1602    }
1603
1604    /// Attempts to send an unchecked signal to this thread group.
1605    ///
1606    /// - `current_task`: The task that is sending the signal.
1607    /// - `unchecked_signal`: The signal that is to be sent. Unchecked, since `0` is a sentinel value
1608    /// where rights are to be checked but no signal is actually sent.
1609    ///
1610    /// # Returns
1611    /// Returns Ok(()) if the signal was sent, or the permission checks passed with a 0 signal, otherwise
1612    /// the error that was encountered.
1613    pub fn send_signal_unchecked(
1614        &self,
1615        current_task: &CurrentTask,
1616        unchecked_signal: UncheckedSignal,
1617    ) -> Result<(), Errno> {
1618        if let Some(signal) = self.check_signal_access(current_task, unchecked_signal)? {
1619            let signal_info = SignalInfo::with_detail(
1620                signal,
1621                SI_USER as i32,
1622                SignalDetail::Kill {
1623                    pid: current_task.thread_group().leader,
1624                    uid: current_task.current_creds().uid,
1625                },
1626            );
1627
1628            self.write().send_signal(signal_info);
1629        }
1630
1631        Ok(())
1632    }
1633
1634    /// Sends a signal to this thread_group without performing any access checks.
1635    ///
1636    /// # Safety
1637    /// This is unsafe, because it should only be called by tools and tests.
1638    pub unsafe fn send_signal_unchecked_debug(
1639        &self,
1640        current_task: &CurrentTask,
1641        unchecked_signal: UncheckedSignal,
1642    ) -> Result<(), Errno> {
1643        let signal = Signal::try_from(unchecked_signal)?;
1644        let signal_info = SignalInfo::with_detail(
1645            signal,
1646            SI_USER as i32,
1647            SignalDetail::Kill {
1648                pid: current_task.thread_group().leader,
1649                uid: current_task.current_creds().uid,
1650            },
1651        );
1652
1653        self.write().send_signal(signal_info);
1654        Ok(())
1655    }
1656
1657    /// Attempts to send an unchecked signal to this thread group, with info read from
1658    /// `siginfo_ref`.
1659    ///
1660    /// - `current_task`: The task that is sending the signal.
1661    /// - `unchecked_signal`: The signal that is to be sent. Unchecked, since `0` is a sentinel value
1662    /// where rights are to be checked but no signal is actually sent.
1663    /// - `siginfo_ref`: The siginfo that will be enqueued.
1664    /// - `options`: Options for how to convert the siginfo into a signal info.
1665    ///
1666    /// # Returns
1667    /// Returns Ok(()) if the signal was sent, or the permission checks passed with a 0 signal, otherwise
1668    /// the error that was encountered.
1669    #[track_caller]
1670    pub fn send_signal_unchecked_with_info(
1671        &self,
1672        current_task: &CurrentTask,
1673        unchecked_signal: UncheckedSignal,
1674        siginfo_ref: UserAddress,
1675        options: IntoSignalInfoOptions,
1676    ) -> Result<(), Errno> {
1677        if let Some(signal) = self.check_signal_access(current_task, unchecked_signal)? {
1678            let siginfo = UncheckedSignalInfo::read_from_siginfo(current_task, siginfo_ref)?;
1679            if self.leader != current_task.get_pid()
1680                && (siginfo.code() >= 0 || siginfo.code() == SI_TKILL)
1681            {
1682                return error!(EPERM);
1683            }
1684
1685            self.write().send_signal(siginfo.into_signal_info(signal, options)?);
1686        }
1687
1688        Ok(())
1689    }
1690
1691    /// Checks whether or not `current_task` can signal this thread group with `unchecked_signal`.
1692    ///
1693    /// Returns:
1694    ///   - `Ok(Some(Signal))` if the signal passed checks and should be sent.
1695    ///   - `Ok(None)` if the signal passed checks, but should not be sent. This is used by
1696    ///   userspace for permission checks.
1697    ///   - `Err(_)` if the permission checks failed.
1698    fn check_signal_access(
1699        &self,
1700        current_task: &CurrentTask,
1701        unchecked_signal: UncheckedSignal,
1702    ) -> Result<Option<Signal>, Errno> {
1703        // Pick an arbitrary task in thread_group to check permissions.
1704        //
1705        // Tasks can technically have different credentials, but in practice they are kept in sync.
1706        let state = self.read();
1707        let target_task = state.get_live_task()?;
1708        current_task.can_signal(&target_task, unchecked_signal)?;
1709
1710        // 0 is a sentinel value used to do permission checks.
1711        if unchecked_signal.is_zero() {
1712            return Ok(None);
1713        }
1714
1715        let signal = Signal::try_from(unchecked_signal)?;
1716        security::check_signal_access(current_task, &target_task, signal)?;
1717
1718        Ok(Some(signal))
1719    }
1720
1721    pub fn has_signal_queued(&self, signal: Signal) -> bool {
1722        self.pending_signals.lock().has_queued(signal)
1723    }
1724
1725    pub fn num_signals_queued(&self) -> usize {
1726        self.pending_signals.lock().num_queued()
1727    }
1728
1729    pub fn get_pending_signals(&self) -> SigSet {
1730        self.pending_signals.lock().pending()
1731    }
1732
1733    pub fn is_any_signal_allowed_by_mask(&self, mask: SigSet) -> bool {
1734        self.pending_signals.lock().is_any_allowed_by_mask(mask)
1735    }
1736
1737    pub fn take_next_signal_where<F>(&self, predicate: F) -> Option<SignalInfo>
1738    where
1739        F: Fn(&SignalInfo) -> bool,
1740    {
1741        let mut signals = self.pending_signals.lock();
1742        let r = signals.take_next_where(predicate);
1743        self.has_pending_signals.store(!signals.is_empty(), Ordering::Relaxed);
1744        r
1745    }
1746
1747    /// Drive this `ThreadGroup` to exit, allowing it time to handle SIGTERM before sending SIGKILL.
1748    ///
1749    /// Returns once `ThreadGroup::exit()` has completed.
1750    ///
1751    /// Must be called from the system task.
1752    pub async fn shut_down(this: Weak<Self>) {
1753        const SHUTDOWN_SIGNAL_HANDLING_TIMEOUT: zx::MonotonicDuration =
1754            zx::MonotonicDuration::from_seconds(1);
1755
1756        // Prepare for shutting down the thread group.
1757        let (tg_name, mut on_exited) = {
1758            // Nest this upgraded access so TempRefs aren't held across await-points.
1759            let Some(this) = this.upgrade() else {
1760                return;
1761            };
1762
1763            // Register a channel to be notified when exit() is complete.
1764            let (on_exited_send, on_exited) = futures::channel::oneshot::channel();
1765            this.write().exit_notifier = Some(on_exited_send);
1766
1767            // We want to be able to log about this thread group without upgrading the WeakRef.
1768            let tg_name = format!("{this:?}");
1769
1770            (tg_name, on_exited)
1771        };
1772
1773        log_debug!(tg:% = tg_name; "shutting down thread group, sending SIGTERM");
1774        this.upgrade().map(|tg| tg.write().send_signal(SignalInfo::kernel(SIGTERM)));
1775
1776        // Give thread groups some time to handle SIGTERM, proceeding early if they exit
1777        let timeout = fuchsia_async::Timer::new(SHUTDOWN_SIGNAL_HANDLING_TIMEOUT);
1778        futures::pin_mut!(timeout);
1779
1780        // Use select_biased instead of on_timeout() so that we can await on on_exited later
1781        futures::select_biased! {
1782            _ = &mut on_exited => (),
1783            _ = timeout => {
1784                log_debug!(tg:% = tg_name; "sending SIGKILL");
1785                this.upgrade().map(|tg| tg.write().send_signal(SignalInfo::kernel(SIGKILL)));
1786            },
1787        };
1788
1789        log_debug!(tg:% = tg_name; "waiting for exit");
1790        // It doesn't matter whether ThreadGroup::exit() was called or the process exited with
1791        // a return code and dropped the sender end of the channel.
1792        on_exited.await.ok();
1793        log_debug!(tg:% = tg_name; "thread group shutdown complete");
1794    }
1795
1796    /// Returns the KOID of the process for this thread group.
1797    /// This method should be used to when mapping 32 bit linux process ids to KOIDs
1798    /// to avoid breaking the encapsulation of the zx::process within the ThreadGroup.
1799    /// This encapsulation is important since the relationship between the ThreadGroup
1800    /// and the Process may change over time. See [ThreadGroup::process] for more details.
1801    pub fn get_process_koid(&self) -> Result<Koid, Status> {
1802        self.process.koid()
1803    }
1804}
1805
1806pub enum WaitableChildResult {
1807    ReadyNow(Box<WaitResult>),
1808    ShouldWait,
1809    NoneFound,
1810}
1811
1812#[apply(state_implementation!)]
1813impl ThreadGroupMutableState<Base = ThreadGroup> {
1814    pub fn leader(&self) -> pid_t {
1815        self.base.leader
1816    }
1817
1818    pub fn leader_command(&self) -> TaskCommand {
1819        self.get_task(self.leader())
1820            .map(|l| l.command())
1821            .unwrap_or_else(|| TaskCommand::new(b"<leader exited>"))
1822    }
1823
1824    pub fn is_terminating(&self) -> bool {
1825        !matches!(self.run_state, ThreadGroupRunState::Running)
1826    }
1827
1828    pub fn children(&self) -> impl Iterator<Item = Arc<ThreadGroup>> + '_ {
1829        self.children.values().map(|v| {
1830            v.upgrade().expect("Weak references to processes in ThreadGroup must always be valid")
1831        })
1832    }
1833
1834    pub fn tasks(&self) -> impl Iterator<Item = TempRef<'_, Task>> + '_ {
1835        self.tasks.values().flat_map(|t| t.upgrade())
1836    }
1837
1838    pub fn task_ids(&self) -> impl Iterator<Item = &tid_t> {
1839        self.tasks.keys()
1840    }
1841
1842    pub fn contains_task(&self, tid: tid_t) -> bool {
1843        self.tasks.contains_key(&tid)
1844    }
1845
1846    pub fn get_task(&self, tid: tid_t) -> Option<TempRef<'_, Task>> {
1847        self.tasks.get(&tid).and_then(|t| t.upgrade())
1848    }
1849
1850    pub fn tasks_count(&self) -> usize {
1851        self.tasks.len()
1852    }
1853
1854    pub fn get_ppid(&self) -> pid_t {
1855        match &self.parent {
1856            Some(parent) => parent.upgrade().leader,
1857            None => 0,
1858        }
1859    }
1860
1861    fn set_process_group<L>(
1862        &mut self,
1863        locked: &mut Locked<L>,
1864        process_group: Arc<ProcessGroup>,
1865        pids: &PidTable,
1866    ) where
1867        L: LockBefore<ProcessGroupState>,
1868    {
1869        if self.process_group == process_group {
1870            return;
1871        }
1872        self.leave_process_group(locked, pids);
1873        self.process_group = process_group;
1874        self.process_group.insert(locked, self.base);
1875    }
1876
1877    fn leave_process_group<L>(&mut self, locked: &mut Locked<L>, pids: &PidTable)
1878    where
1879        L: LockBefore<ProcessGroupState>,
1880    {
1881        if self.process_group.remove(locked, self.base) {
1882            self.process_group.session.write().remove(self.process_group.leader);
1883            pids.remove_process_group(self.process_group.leader);
1884        }
1885    }
1886
1887    /// Indicates whether the thread group is waitable via waitid and waitpid for
1888    /// either WSTOPPED or WCONTINUED.
1889    pub fn is_waitable(&self) -> bool {
1890        return self.last_signal.is_some() && !self.base.load_stopped().is_in_progress();
1891    }
1892
1893    pub fn get_waitable_zombie(
1894        &mut self,
1895        zombie_list: &dyn Fn(&mut ThreadGroupMutableState) -> &mut Vec<OwnedRef<ZombieProcess>>,
1896        selector: &ProcessSelector,
1897        options: &WaitingOptions,
1898        pids: &mut PidTable,
1899    ) -> Option<WaitResult> {
1900        // We look for the last zombie in the vector that matches pid selector and waiting options
1901        let selected_zombie_position = zombie_list(self)
1902            .iter()
1903            .rev()
1904            .position(|zombie| zombie.matches_selector_and_waiting_option(selector, options))
1905            .map(|position_starting_from_the_back| {
1906                zombie_list(self).len() - 1 - position_starting_from_the_back
1907            });
1908
1909        selected_zombie_position.map(|position| {
1910            if options.keep_waitable_state {
1911                zombie_list(self)[position].to_wait_result()
1912            } else {
1913                let zombie = zombie_list(self).remove(position);
1914                self.children_time_stats += zombie.time_stats;
1915                let result = zombie.to_wait_result();
1916                zombie.release(pids);
1917                result
1918            }
1919        })
1920    }
1921
1922    pub fn is_correct_exit_signal(for_clone: bool, exit_code: Option<Signal>) -> bool {
1923        for_clone == (exit_code != Some(SIGCHLD))
1924    }
1925
1926    fn get_waitable_running_children(
1927        &self,
1928        selector: &ProcessSelector,
1929        options: &WaitingOptions,
1930        pids: &PidTable,
1931    ) -> WaitableChildResult {
1932        // The children whose pid matches the pid selector queried.
1933        let filter_children_by_pid_selector = |child: &ThreadGroup| match *selector {
1934            ProcessSelector::Any => true,
1935            ProcessSelector::Pid(pid) => child.leader == pid,
1936            ProcessSelector::Pgid(pgid) => {
1937                pids.get_process_group(pgid).as_ref() == Some(&child.read().process_group)
1938            }
1939            ProcessSelector::Process(ref key) => *key == ThreadGroupKey::from(child),
1940        };
1941
1942        // The children whose exit signal matches the waiting options queried.
1943        let filter_children_by_waiting_options = |child: &ThreadGroup| {
1944            if options.wait_for_all {
1945                return true;
1946            }
1947            Self::is_correct_exit_signal(options.wait_for_clone, child.read().exit_signal)
1948        };
1949
1950        // If wait_for_exited flag is disabled or no terminated children were found we look for living children.
1951        let mut selected_children = self
1952            .children
1953            .values()
1954            .map(|t| t.upgrade().unwrap())
1955            .filter(|tg| filter_children_by_pid_selector(&tg))
1956            .filter(|tg| filter_children_by_waiting_options(&tg))
1957            .peekable();
1958        if selected_children.peek().is_none() {
1959            // There still might be a process that ptrace hasn't looked at yet.
1960            if self.deferred_zombie_ptracers.iter().any(|dzp| match *selector {
1961                ProcessSelector::Any => true,
1962                ProcessSelector::Pid(pid) => dzp.tracee_thread_group_key.pid() == pid,
1963                ProcessSelector::Pgid(pgid) => pgid == dzp.tracee_pgid,
1964                ProcessSelector::Process(ref key) => *key == dzp.tracee_thread_group_key,
1965            }) {
1966                return WaitableChildResult::ShouldWait;
1967            }
1968
1969            return WaitableChildResult::NoneFound;
1970        }
1971        for child in selected_children {
1972            let child = child.write();
1973            if child.last_signal.is_some() {
1974                let build_wait_result = |mut child: ThreadGroupWriteGuard<'_>,
1975                                         exit_status: &dyn Fn(SignalInfo) -> ExitStatus|
1976                 -> WaitResult {
1977                    let siginfo = if options.keep_waitable_state {
1978                        child.last_signal.clone().unwrap()
1979                    } else {
1980                        child.last_signal.take().unwrap()
1981                    };
1982                    let exit_status = if siginfo.signal == SIGKILL {
1983                        // This overrides the stop/continue choice.
1984                        ExitStatus::Kill(siginfo)
1985                    } else {
1986                        exit_status(siginfo)
1987                    };
1988                    let info = child.tasks.values().next().unwrap().info();
1989                    let uid = info.real_creds().uid;
1990                    WaitResult {
1991                        pid: child.base.leader,
1992                        uid,
1993                        exit_info: ProcessExitInfo {
1994                            status: exit_status,
1995                            exit_signal: child.exit_signal,
1996                        },
1997                        time_stats: child.base.time_stats() + child.children_time_stats,
1998                    }
1999                };
2000                let child_stopped = child.base.load_stopped();
2001                if child_stopped == StopState::Awake && options.wait_for_continued {
2002                    return WaitableChildResult::ReadyNow(Box::new(build_wait_result(
2003                        child,
2004                        &|siginfo| ExitStatus::Continue(siginfo, PtraceEvent::None),
2005                    )));
2006                }
2007                if child_stopped == StopState::GroupStopped && options.wait_for_stopped {
2008                    return WaitableChildResult::ReadyNow(Box::new(build_wait_result(
2009                        child,
2010                        &|siginfo| ExitStatus::Stop(siginfo, PtraceEvent::None),
2011                    )));
2012                }
2013            }
2014        }
2015
2016        WaitableChildResult::ShouldWait
2017    }
2018
2019    /// Returns any waitable child matching the given `selector` and `options`. Returns None if no
2020    /// child matching the selector is waitable. Returns ECHILD if no child matches the selector at
2021    /// all.
2022    ///
2023    /// Will remove the waitable status from the child depending on `options`.
2024    pub fn get_waitable_child(
2025        &mut self,
2026        selector: &ProcessSelector,
2027        options: &WaitingOptions,
2028        pids: &mut PidTable,
2029    ) -> WaitableChildResult {
2030        if options.wait_for_exited {
2031            if let Some(waitable_zombie) = self.get_waitable_zombie(
2032                &|state: &mut ThreadGroupMutableState| &mut state.zombie_children,
2033                selector,
2034                options,
2035                pids,
2036            ) {
2037                return WaitableChildResult::ReadyNow(Box::new(waitable_zombie));
2038            }
2039        }
2040
2041        self.get_waitable_running_children(selector, options, pids)
2042    }
2043
2044    /// Returns a task in the current thread group.
2045    pub fn get_live_task(&self) -> Result<TempRef<'_, Task>, Errno> {
2046        self.tasks
2047            .get(&self.leader())
2048            .and_then(|t| t.upgrade())
2049            .or_else(|| self.tasks().next())
2050            .ok_or_else(|| errno!(ESRCH))
2051    }
2052
2053    /// Set the stop status of the process.  If you pass |siginfo| of |None|,
2054    /// does not update the signal.  If |finalize_only| is set, will check that
2055    /// the set will be a finalize (Stopping -> Stopped or Stopped -> Stopped)
2056    /// before executing it.
2057    ///
2058    /// Returns the latest stop state after any changes.
2059    pub fn set_stopped(
2060        mut self,
2061        new_stopped: StopState,
2062        siginfo: Option<SignalInfo>,
2063        finalize_only: bool,
2064    ) -> StopState {
2065        if let Some(stopped) = self.base.check_stopped_state(new_stopped, finalize_only) {
2066            return stopped;
2067        }
2068
2069        // Thread groups don't transition to group stop if they are waking, because waking
2070        // means something told it to wake up (like a SIGCONT) but hasn't finished yet.
2071        if self.base.load_stopped() == StopState::Waking
2072            && (new_stopped == StopState::GroupStopping || new_stopped == StopState::GroupStopped)
2073        {
2074            return self.base.load_stopped();
2075        }
2076
2077        // TODO(https://g-issues.fuchsia.dev/issues/306438676): When thread
2078        // group can be stopped inside user code, tasks/thread groups will
2079        // need to be either restarted or stopped here.
2080        self.store_stopped(new_stopped);
2081        if let Some(signal) = &siginfo {
2082            // We don't want waiters to think the process was unstopped
2083            // because of a sigkill.  They will get woken when the
2084            // process dies.
2085            if signal.signal != SIGKILL {
2086                self.last_signal = siginfo;
2087            }
2088        }
2089        if new_stopped == StopState::Waking || new_stopped == StopState::ForceWaking {
2090            self.lifecycle_waiters.notify_value(ThreadGroupLifecycleWaitValue::Stopped);
2091        };
2092
2093        let parent = (!new_stopped.is_in_progress()).then(|| self.parent.clone()).flatten();
2094
2095        // Drop the lock before locking the parent.
2096        std::mem::drop(self);
2097        if let Some(parent) = parent {
2098            let parent = parent.upgrade();
2099            parent
2100                .write()
2101                .lifecycle_waiters
2102                .notify_value(ThreadGroupLifecycleWaitValue::ChildStatus);
2103        }
2104
2105        new_stopped
2106    }
2107
2108    fn store_stopped(&mut self, state: StopState) {
2109        // We don't actually use the guard but we require it to enforce that the
2110        // caller holds the thread group's mutable state lock (identified by
2111        // mutable access to the thread group's mutable state).
2112
2113        self.base.stop_state.store(state, Ordering::Relaxed)
2114    }
2115
2116    /// Sends the signal `signal_info` to this thread group.
2117    #[allow(unused_mut, reason = "needed for some but not all macro outputs")]
2118    pub fn send_signal(mut self, signal_info: SignalInfo) {
2119        let sigaction = self.base.signal_actions.get(signal_info.signal);
2120        let action = action_for_signal(&signal_info, sigaction);
2121
2122        {
2123            let mut pending_signals = self.base.pending_signals.lock();
2124            pending_signals.enqueue(signal_info.clone());
2125            self.base.has_pending_signals.store(true, Ordering::Relaxed);
2126        }
2127        let tasks: Vec<WeakRef<Task>> = self.tasks.values().map(|t| t.weak_clone()).collect();
2128
2129        // Set state to waking before interrupting any tasks.
2130        if signal_info.signal == SIGKILL {
2131            self.set_stopped(StopState::ForceWaking, Some(signal_info.clone()), false);
2132        } else if signal_info.signal == SIGCONT {
2133            self.set_stopped(StopState::Waking, Some(signal_info.clone()), false);
2134        }
2135
2136        let mut has_interrupted_task = false;
2137        for task in tasks.iter().flat_map(|t| t.upgrade()) {
2138            let mut task_state = task.write();
2139
2140            if signal_info.signal == SIGKILL {
2141                task_state.thaw();
2142                task_state.set_stopped(StopState::ForceWaking, None, None, None);
2143            } else if signal_info.signal == SIGCONT {
2144                task_state.set_stopped(StopState::Waking, None, None, None);
2145            }
2146
2147            let is_masked = task_state.is_signal_masked(signal_info.signal);
2148            let was_masked = task_state.is_signal_masked_by_saved_mask(signal_info.signal);
2149
2150            let is_queued = action != DeliveryAction::Ignore
2151                || is_masked
2152                || was_masked
2153                || task_state.is_ptraced();
2154
2155            if is_queued {
2156                task_state.notify_signal_waiters(&signal_info.signal);
2157
2158                if !is_masked && action.must_interrupt(Some(sigaction)) && !has_interrupted_task {
2159                    // Only interrupt one task, and only interrupt if the signal was actually queued
2160                    // and the action must interrupt.
2161                    drop(task_state);
2162                    task.interrupt();
2163                    has_interrupted_task = true;
2164                }
2165            }
2166        }
2167    }
2168}
2169
2170/// Container around a weak task and a strong `TaskPersistentInfo`. It is needed to keep the
2171/// information even when the task is not upgradable, because when the task is dropped, there is a
2172/// moment where the task is not yet released, yet the weak pointer is not upgradeable anymore.
2173/// During this time, it is still necessary to access the persistent info to compute the state of
2174/// the thread for the different wait syscalls.
2175pub struct TaskContainer(WeakRef<Task>, TaskPersistentInfo);
2176
2177impl From<&TempRef<'_, Task>> for TaskContainer {
2178    fn from(task: &TempRef<'_, Task>) -> Self {
2179        Self(WeakRef::from(task), task.persistent_info.clone())
2180    }
2181}
2182
2183impl From<TaskContainer> for TaskPersistentInfo {
2184    fn from(container: TaskContainer) -> TaskPersistentInfo {
2185        container.1
2186    }
2187}
2188
2189impl TaskContainer {
2190    fn upgrade(&self) -> Option<TempRef<'_, Task>> {
2191        self.0.upgrade()
2192    }
2193
2194    fn weak_clone(&self) -> WeakRef<Task> {
2195        self.0.clone()
2196    }
2197
2198    fn info(&self) -> &TaskPersistentInfo {
2199        &self.1
2200    }
2201}
2202
2203#[cfg(test)]
2204mod test {
2205    use super::*;
2206    use crate::testing::*;
2207
2208    #[::fuchsia::test]
2209    async fn test_setsid() {
2210        spawn_kernel_and_run(async |locked, current_task| {
2211            fn get_process_group(task: &Task) -> Arc<ProcessGroup> {
2212                Arc::clone(&task.thread_group().read().process_group)
2213            }
2214            assert_eq!(current_task.thread_group().setsid(locked), error!(EPERM));
2215
2216            let child_task = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2217            assert_eq!(get_process_group(&current_task), get_process_group(&child_task));
2218
2219            let old_process_group = child_task.thread_group().read().process_group.clone();
2220            assert_eq!(child_task.thread_group().setsid(locked), Ok(()));
2221            assert_eq!(
2222                child_task.thread_group().read().process_group.session.leader,
2223                child_task.get_pid()
2224            );
2225            assert!(
2226                !old_process_group.read(locked).thread_groups().contains(child_task.thread_group())
2227            );
2228        })
2229        .await;
2230    }
2231
2232    #[::fuchsia::test]
2233    async fn test_exit_status() {
2234        spawn_kernel_and_run(async |locked, current_task| {
2235            let child = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2236            child.thread_group().exit(locked, ExitStatus::Exit(42), None);
2237            std::mem::drop(child);
2238            assert_eq!(
2239                current_task.thread_group().read().zombie_children[0].exit_info.status,
2240                ExitStatus::Exit(42)
2241            );
2242        })
2243        .await;
2244    }
2245
2246    #[::fuchsia::test]
2247    async fn test_setgpid() {
2248        spawn_kernel_and_run(async |locked, current_task| {
2249            assert_eq!(current_task.thread_group().setsid(locked), error!(EPERM));
2250
2251            let child_task1 = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2252            let child_task2 = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2253            let execd_child_task = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2254            execd_child_task.thread_group().write().did_exec = true;
2255            let other_session_child_task =
2256                current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2257            assert_eq!(other_session_child_task.thread_group().setsid(locked), Ok(()));
2258
2259            assert_eq!(
2260                child_task1.thread_group().setpgid(locked, &current_task, &current_task, 0),
2261                error!(ESRCH)
2262            );
2263            assert_eq!(
2264                current_task.thread_group().setpgid(locked, &current_task, &execd_child_task, 0),
2265                error!(EACCES)
2266            );
2267            assert_eq!(
2268                current_task.thread_group().setpgid(locked, &current_task, &current_task, 0),
2269                error!(EPERM)
2270            );
2271            assert_eq!(
2272                current_task.thread_group().setpgid(
2273                    locked,
2274                    &current_task,
2275                    &other_session_child_task,
2276                    0
2277                ),
2278                error!(EPERM)
2279            );
2280            assert_eq!(
2281                current_task.thread_group().setpgid(locked, &current_task, &child_task1, -1),
2282                error!(EINVAL)
2283            );
2284            assert_eq!(
2285                current_task.thread_group().setpgid(locked, &current_task, &child_task1, 255),
2286                error!(EPERM)
2287            );
2288            assert_eq!(
2289                current_task.thread_group().setpgid(
2290                    locked,
2291                    &current_task,
2292                    &child_task1,
2293                    other_session_child_task.tid
2294                ),
2295                error!(EPERM)
2296            );
2297
2298            assert_eq!(
2299                child_task1.thread_group().setpgid(locked, &current_task, &child_task1, 0),
2300                Ok(())
2301            );
2302            assert_eq!(
2303                child_task1.thread_group().read().process_group.session.leader,
2304                current_task.tid
2305            );
2306            assert_eq!(child_task1.thread_group().read().process_group.leader, child_task1.tid);
2307
2308            let old_process_group = child_task2.thread_group().read().process_group.clone();
2309            assert_eq!(
2310                current_task.thread_group().setpgid(
2311                    locked,
2312                    &current_task,
2313                    &child_task2,
2314                    child_task1.tid
2315                ),
2316                Ok(())
2317            );
2318            assert_eq!(child_task2.thread_group().read().process_group.leader, child_task1.tid);
2319            assert!(
2320                !old_process_group
2321                    .read(locked)
2322                    .thread_groups()
2323                    .contains(child_task2.thread_group())
2324            );
2325        })
2326        .await;
2327    }
2328
2329    #[::fuchsia::test]
2330    async fn test_adopt_children() {
2331        spawn_kernel_and_run(async |locked, current_task| {
2332            let task1 = current_task.clone_task_for_test(locked, 0, None);
2333            let task2 = task1.clone_task_for_test(locked, 0, None);
2334            let task3 = task2.clone_task_for_test(locked, 0, None);
2335
2336            assert_eq!(task3.thread_group().read().get_ppid(), task2.tid);
2337
2338            task2.thread_group().exit(locked, ExitStatus::Exit(0), None);
2339            std::mem::drop(task2);
2340
2341            // Task3 parent should be current_task.
2342            assert_eq!(task3.thread_group().read().get_ppid(), current_task.tid);
2343        })
2344        .await;
2345    }
2346
2347    #[::fuchsia::test]
2348    async fn test_getppid_after_self_and_parent_exit() {
2349        spawn_kernel_and_run(async |locked, current_task| {
2350            let task1 = current_task.clone_task_for_test(locked, 0, None);
2351            let task2 = task1.clone_task_for_test(locked, 0, None);
2352
2353            // Take strong references to the ThreadGroups.
2354            let tg1 = task1.thread_group().clone();
2355            let tg2 = task2.thread_group().clone();
2356
2357            assert_eq!(tg1.read().get_ppid(), current_task.tid);
2358            assert_eq!(tg2.read().get_ppid(), task1.tid);
2359
2360            // Exit `task2` first, so that when `task1` exits, it will not be reparented to init.
2361            tg2.exit(locked, ExitStatus::Exit(0), None);
2362            std::mem::drop(task2);
2363
2364            // Exit `task1`, and drop the task and ThreadGroup.
2365            tg1.exit(locked, ExitStatus::Exit(0), None);
2366            std::mem::drop(task1);
2367            std::mem::drop(tg1);
2368
2369            // It should still be valid to call `get_ppid()` on `tg2`, though is parent ThreadGroup
2370            // no longer exists.
2371            let _ = tg2.read().get_ppid();
2372        })
2373        .await;
2374    }
2375}