starnix_core/task/
thread_group.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::device::terminal::{Terminal, TerminalController};
6use crate::mutable_state::{state_accessor, state_implementation};
7use crate::ptrace::{
8    AtomicStopState, PtraceAllowedPtracers, PtraceEvent, PtraceOptions, PtraceStatus, StopState,
9    ZombiePtracees, ptrace_detach,
10};
11use crate::security;
12use crate::signals::syscalls::WaitingOptions;
13use crate::signals::{
14    DeliveryAction, IntoSignalInfoOptions, QueuedSignals, SignalActions, SignalDetail, SignalInfo,
15    UncheckedSignalInfo, action_for_signal, send_standard_signal,
16};
17use crate::task::memory_attribution::MemoryAttributionLifecycleEvent;
18use crate::task::{
19    ControllingTerminal, CurrentTask, ExitStatus, Kernel, PidTable, ProcessGroup, Session, Task,
20    TaskMutableState, TaskPersistentInfo, TypedWaitQueue,
21};
22use crate::time::{IntervalTimerHandle, TimerTable};
23use itertools::Itertools;
24use macro_rules_attribute::apply;
25use starnix_lifecycle::{AtomicU64Counter, DropNotifier};
26use starnix_logging::{log_debug, log_error, log_info, log_warn, track_stub};
27use starnix_sync::{
28    LockBefore, Locked, Mutex, OrderedMutex, ProcessGroupState, RwLock, ThreadGroupLimits, Unlocked,
29};
30use starnix_task_command::TaskCommand;
31use starnix_types::ownership::{OwnedRef, Releasable, TempRef, WeakRef};
32use starnix_types::stats::TaskTimeStats;
33use starnix_types::time::{itimerspec_from_itimerval, timeval_from_duration};
34use starnix_uapi::arc_key::WeakKey;
35use starnix_uapi::auth::{CAP_SYS_ADMIN, CAP_SYS_RESOURCE, Credentials};
36use starnix_uapi::errors::Errno;
37use starnix_uapi::personality::PersonalityFlags;
38use starnix_uapi::resource_limits::{Resource, ResourceLimits};
39use starnix_uapi::signals::{
40    SIGCHLD, SIGCONT, SIGHUP, SIGKILL, SIGTERM, SIGTTOU, SigSet, Signal, UncheckedSignal,
41};
42use starnix_uapi::user_address::UserAddress;
43use starnix_uapi::{
44    ITIMER_PROF, ITIMER_REAL, ITIMER_VIRTUAL, SI_TKILL, SI_USER, SIG_IGN, errno, error, itimerval,
45    pid_t, rlimit, tid_t, uid_t,
46};
47use std::collections::BTreeMap;
48use std::fmt;
49use std::sync::atomic::{AtomicBool, Ordering};
50use std::sync::{Arc, Weak};
51use zx::{Koid, Status};
52
53/// A weak reference to a thread group that can be used in set and maps.
54#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
55pub struct ThreadGroupKey {
56    pid: pid_t,
57    thread_group: WeakKey<ThreadGroup>,
58}
59
60impl ThreadGroupKey {
61    /// The pid of the thread group keyed by this object.
62    ///
63    /// As the key is weak (and pid are not unique due to pid namespaces), this should not be used
64    /// as an unique identifier of the thread group.
65    pub fn pid(&self) -> pid_t {
66        self.pid
67    }
68}
69
70impl std::ops::Deref for ThreadGroupKey {
71    type Target = Weak<ThreadGroup>;
72    fn deref(&self) -> &Self::Target {
73        &self.thread_group.0
74    }
75}
76
77impl From<&ThreadGroup> for ThreadGroupKey {
78    fn from(tg: &ThreadGroup) -> Self {
79        Self { pid: tg.leader, thread_group: WeakKey::from(&tg.weak_self.upgrade().unwrap()) }
80    }
81}
82
83impl<T: AsRef<ThreadGroup>> From<T> for ThreadGroupKey {
84    fn from(tg: T) -> Self {
85        tg.as_ref().into()
86    }
87}
88
89/// Values used for waiting on the [ThreadGroup] lifecycle wait queue.
90#[repr(u64)]
91pub enum ThreadGroupLifecycleWaitValue {
92    /// Wait for updates to the WaitResults of tasks in the group.
93    ChildStatus,
94    /// Wait for updates to `stopped`.
95    Stopped,
96}
97
98impl Into<u64> for ThreadGroupLifecycleWaitValue {
99    fn into(self) -> u64 {
100        self as u64
101    }
102}
103
104/// Child process that have exited, but the zombie ptrace needs to be consumed
105/// before they can be waited for.
106#[derive(Clone, Debug)]
107pub struct DeferredZombiePTracer {
108    /// Original tracer
109    pub tracer_thread_group_key: ThreadGroupKey,
110    /// Tracee tid
111    pub tracee_tid: tid_t,
112    /// Tracee pgid
113    pub tracee_pgid: pid_t,
114    /// Tracee thread group
115    pub tracee_thread_group_key: ThreadGroupKey,
116}
117
118impl DeferredZombiePTracer {
119    fn new(tracer: &ThreadGroup, tracee: &Task) -> Self {
120        Self {
121            tracer_thread_group_key: tracer.into(),
122            tracee_tid: tracee.tid,
123            tracee_pgid: tracee.thread_group().read().process_group.leader,
124            tracee_thread_group_key: tracee.thread_group_key.clone(),
125        }
126    }
127}
128
129/// The mutable state of the ThreadGroup.
130pub struct ThreadGroupMutableState {
131    /// The parent thread group.
132    ///
133    /// The value needs to be writable so that it can be re-parent to the correct subreaper if the
134    /// parent ends before the child.
135    pub parent: Option<ThreadGroupParent>,
136
137    /// The signal this process generates on exit.
138    pub exit_signal: Option<Signal>,
139
140    /// The tasks in the thread group.
141    ///
142    /// The references to Task is weak to prevent cycles as Task have a Arc reference to their
143    /// thread group.
144    /// It is still expected that these weak references are always valid, as tasks must unregister
145    /// themselves before they are deleted.
146    tasks: BTreeMap<tid_t, TaskContainer>,
147
148    /// The children of this thread group.
149    ///
150    /// The references to ThreadGroup is weak to prevent cycles as ThreadGroup have a Arc reference
151    /// to their parent.
152    /// It is still expected that these weak references are always valid, as thread groups must unregister
153    /// themselves before they are deleted.
154    pub children: BTreeMap<pid_t, Weak<ThreadGroup>>,
155
156    /// Child tasks that have exited, but not yet been waited for.
157    pub zombie_children: Vec<OwnedRef<ZombieProcess>>,
158
159    /// ptracees of this process that have exited, but not yet been waited for.
160    pub zombie_ptracees: ZombiePtracees,
161
162    /// Child processes that have exited, but the zombie ptrace needs to be consumed
163    /// before they can be waited for.
164    pub deferred_zombie_ptracers: Vec<DeferredZombiePTracer>,
165
166    /// Unified [WaitQueue] for all waited ThreadGroup events.
167    pub lifecycle_waiters: TypedWaitQueue<ThreadGroupLifecycleWaitValue>,
168
169    /// Whether this thread group will inherit from children of dying processes in its descendant
170    /// tree.
171    pub is_child_subreaper: bool,
172
173    /// The IDs used to perform shell job control.
174    pub process_group: Arc<ProcessGroup>,
175
176    pub did_exec: bool,
177
178    /// A signal that indicates whether the process is going to become waitable
179    /// via waitid and waitpid for either WSTOPPED or WCONTINUED, depending on
180    /// the value of `stopped`. If not None, contains the SignalInfo to return.
181    pub last_signal: Option<SignalInfo>,
182
183    /// Whether the thread_group is terminating or not, and if it is, the exit info of the thread
184    /// group.
185    run_state: ThreadGroupRunState,
186
187    /// Time statistics accumulated from the children.
188    pub children_time_stats: TaskTimeStats,
189
190    /// Personality flags set with `sys_personality()`.
191    pub personality: PersonalityFlags,
192
193    /// Thread groups allowed to trace tasks in this this thread group.
194    pub allowed_ptracers: PtraceAllowedPtracers,
195
196    /// Channel to message when this thread group exits.
197    exit_notifier: Option<futures::channel::oneshot::Sender<()>>,
198
199    /// Notifier for name changes.
200    pub notifier: Option<std::sync::mpsc::Sender<MemoryAttributionLifecycleEvent>>,
201}
202
203/// A collection of `Task` objects that roughly correspond to a "process".
204///
205/// Userspace programmers often think about "threads" and "process", but those concepts have no
206/// clear analogs inside the kernel because tasks are typically created using `clone(2)`, which
207/// takes a complex set of flags that describes how much state is shared between the original task
208/// and the new task.
209///
210/// If a new task is created with the `CLONE_THREAD` flag, the new task will be placed in the same
211/// `ThreadGroup` as the original task. Userspace typically uses this flag in conjunction with the
212/// `CLONE_FILES`, `CLONE_VM`, and `CLONE_FS`, which corresponds to the userspace notion of a
213/// "thread". For example, that's how `pthread_create` behaves. In that sense, a `ThreadGroup`
214/// normally corresponds to the set of "threads" in a "process". However, this pattern is purely a
215/// userspace convention, and nothing stops userspace from using `CLONE_THREAD` without
216/// `CLONE_FILES`, for example.
217///
218/// In Starnix, a `ThreadGroup` corresponds to a Zircon process, which means we do not support the
219/// `CLONE_THREAD` flag without the `CLONE_VM` flag. If we run into problems with this limitation,
220/// we might need to revise this correspondence.
221///
222/// Each `Task` in a `ThreadGroup` has the same thread group ID (`tgid`). The task with the same
223/// `pid` as the `tgid` is called the thread group leader.
224///
225/// Thread groups are destroyed when the last task in the group exits.
226pub struct ThreadGroup {
227    /// Weak reference to the `OwnedRef` of this `ThreadGroup`. This allows to retrieve the
228    /// `TempRef` from a raw `ThreadGroup`.
229    pub weak_self: Weak<ThreadGroup>,
230
231    /// The kernel to which this thread group belongs.
232    pub kernel: Arc<Kernel>,
233
234    /// A handle to the underlying Zircon process object.
235    ///
236    /// Currently, we have a 1-to-1 mapping between thread groups and zx::process
237    /// objects. This approach might break down if/when we implement CLONE_VM
238    /// without CLONE_THREAD because that creates a situation where two thread
239    /// groups share an address space. To implement that situation, we might
240    /// need to break the 1-to-1 mapping between thread groups and zx::process
241    /// or teach zx::process to share address spaces.
242    pub process: zx::Process,
243
244    /// The lead task of this thread group.
245    ///
246    /// The lead task is typically the initial thread created in the thread group.
247    pub leader: pid_t,
248
249    /// The signal actions that are registered for this process.
250    pub signal_actions: Arc<SignalActions>,
251
252    /// The timers for this thread group (from timer_create(), etc.).
253    pub timers: TimerTable,
254
255    /// A mechanism to be notified when this `ThreadGroup` is destroyed.
256    pub drop_notifier: DropNotifier,
257
258    /// Whether the process is currently stopped.
259    ///
260    /// Must only be set when the `mutable_state` write lock is held.
261    stop_state: AtomicStopState,
262
263    /// The mutable state of the ThreadGroup.
264    mutable_state: RwLock<ThreadGroupMutableState>,
265
266    /// The resource limits for this thread group.  This is outside mutable_state
267    /// to avoid deadlocks where the thread_group lock is held when acquiring
268    /// the task lock, and vice versa.
269    pub limits: OrderedMutex<ResourceLimits, ThreadGroupLimits>,
270
271    /// The next unique identifier for a seccomp filter.  These are required to be
272    /// able to distinguish identical seccomp filters, which are treated differently
273    /// for the purposes of SECCOMP_FILTER_FLAG_TSYNC.  Inherited across clone because
274    /// seccomp filters are also inherited across clone.
275    pub next_seccomp_filter_id: AtomicU64Counter,
276
277    /// Tasks ptraced by this process
278    pub ptracees: Mutex<BTreeMap<tid_t, TaskContainer>>,
279
280    /// The signals that are currently pending for this thread group.
281    pub pending_signals: Mutex<QueuedSignals>,
282
283    /// Whether or not there are any pending signals available for tasks in this thread group.
284    /// Used to avoid having to acquire the signal state lock in hot paths.
285    pub has_pending_signals: AtomicBool,
286
287    /// The monotonic time at which the thread group started.
288    pub start_time: zx::MonotonicInstant,
289
290    /// Whether to log syscalls at INFO level for this thread group.
291    log_syscalls_as_info: AtomicBool,
292}
293
294impl fmt::Debug for ThreadGroup {
295    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
296        write!(
297            f,
298            "{}({})",
299            self.process.get_name().unwrap_or(zx::Name::new_lossy("<unknown>")),
300            self.leader
301        )
302    }
303}
304
305impl ThreadGroup {
306    pub fn sync_syscall_log_level(&self) {
307        let command = self.read().leader_command();
308        let filters = self.kernel.syscall_log_filters.lock();
309        let should_log = filters.iter().any(|f| f.matches(&command));
310        let prev_should_log = self.log_syscalls_as_info.swap(should_log, Ordering::Relaxed);
311        let change_str = match (should_log, prev_should_log) {
312            (true, false) => Some("Enabled"),
313            (false, true) => Some("Disabled"),
314            _ => None,
315        };
316        if let Some(change_str) = change_str {
317            log_info!(
318                "{change_str} info syscall logs for thread group {} (command: {command})",
319                self.leader
320            );
321        }
322    }
323
324    #[inline]
325    pub fn syscall_log_level(&self) -> starnix_logging::Level {
326        if self.log_syscalls_as_info.load(Ordering::Relaxed) {
327            starnix_logging::Level::Info
328        } else {
329            starnix_logging::Level::Trace
330        }
331    }
332}
333
334impl PartialEq for ThreadGroup {
335    fn eq(&self, other: &Self) -> bool {
336        self.leader == other.leader
337    }
338}
339
340#[cfg(any(test, debug_assertions))]
341impl Drop for ThreadGroup {
342    fn drop(&mut self) {
343        let state = self.mutable_state.get_mut();
344        assert!(state.tasks.is_empty());
345        assert!(state.children.is_empty());
346        assert!(state.zombie_children.is_empty());
347        assert!(state.zombie_ptracees.is_empty());
348        assert!(
349            state
350                .parent
351                .as_ref()
352                .and_then(|p| p.0.upgrade().as_ref().map(|p| p
353                    .read()
354                    .children
355                    .get(&self.leader)
356                    .is_none()))
357                .unwrap_or(true)
358        );
359    }
360}
361
362/// A wrapper around a `Weak<ThreadGroup>` that expects the underlying `Weak` to always be
363/// valid. The wrapper will check this at runtime during creation and upgrade.
364pub struct ThreadGroupParent(Weak<ThreadGroup>);
365
366impl ThreadGroupParent {
367    pub fn new(t: Weak<ThreadGroup>) -> Self {
368        debug_assert!(t.upgrade().is_some());
369        Self(t)
370    }
371
372    pub fn upgrade(&self) -> Arc<ThreadGroup> {
373        self.0.upgrade().expect("ThreadGroupParent references must always be valid")
374    }
375}
376
377impl Clone for ThreadGroupParent {
378    fn clone(&self) -> Self {
379        Self(self.0.clone())
380    }
381}
382
383/// A selector that can match a process. Works as a representation of the pid argument to syscalls
384/// like wait and kill.
385#[derive(Debug, Clone)]
386pub enum ProcessSelector {
387    /// Matches any process at all.
388    Any,
389    /// Matches only the process with the specified pid
390    Pid(pid_t),
391    /// Matches all the processes in the given process group
392    Pgid(pid_t),
393    /// Match the thread group with the given key
394    Process(ThreadGroupKey),
395}
396
397impl ProcessSelector {
398    pub fn match_tid(&self, tid: tid_t, pid_table: &PidTable) -> bool {
399        match *self {
400            ProcessSelector::Pid(p) => {
401                if p == tid {
402                    true
403                } else {
404                    if let Some(task_ref) = pid_table.get_task(tid).upgrade() {
405                        task_ref.get_pid() == p
406                    } else {
407                        false
408                    }
409                }
410            }
411            ProcessSelector::Any => true,
412            ProcessSelector::Pgid(pgid) => {
413                if let Some(task_ref) = pid_table.get_task(tid).upgrade() {
414                    pid_table.get_process_group(pgid).as_ref()
415                        == Some(&task_ref.thread_group().read().process_group)
416                } else {
417                    false
418                }
419            }
420            ProcessSelector::Process(ref key) => {
421                if let Some(tg) = key.upgrade() {
422                    tg.read().tasks.contains_key(&tid)
423                } else {
424                    false
425                }
426            }
427        }
428    }
429}
430
431#[derive(Clone, Debug, PartialEq, Eq)]
432pub struct ProcessExitInfo {
433    pub status: ExitStatus,
434    pub exit_signal: Option<Signal>,
435}
436
437#[derive(Clone, Debug, Default, PartialEq, Eq)]
438enum ThreadGroupRunState {
439    #[default]
440    Running,
441    Terminating(ExitStatus),
442}
443
444#[derive(Clone, Debug, PartialEq, Eq)]
445pub struct WaitResult {
446    pub pid: pid_t,
447    pub uid: uid_t,
448
449    pub exit_info: ProcessExitInfo,
450
451    /// Cumulative time stats for the process and its children.
452    pub time_stats: TaskTimeStats,
453}
454
455impl WaitResult {
456    // According to wait(2) man page, SignalInfo.signal needs to always be set to SIGCHLD
457    pub fn as_signal_info(&self) -> SignalInfo {
458        SignalInfo::with_detail(
459            SIGCHLD,
460            self.exit_info.status.signal_info_code(),
461            SignalDetail::SIGCHLD {
462                pid: self.pid,
463                uid: self.uid,
464                status: self.exit_info.status.signal_info_status(),
465            },
466        )
467    }
468}
469
470#[derive(Debug)]
471pub struct ZombieProcess {
472    pub thread_group_key: ThreadGroupKey,
473    pub pgid: pid_t,
474    pub uid: uid_t,
475
476    pub exit_info: ProcessExitInfo,
477
478    /// Cumulative time stats for the process and its children.
479    pub time_stats: TaskTimeStats,
480
481    /// Whether dropping this ZombieProcess should imply removing the pid from
482    /// the PidTable
483    pub is_canonical: bool,
484}
485
486impl PartialEq for ZombieProcess {
487    fn eq(&self, other: &Self) -> bool {
488        // We assume only one set of ZombieProcess data per process, so this should cover it.
489        self.thread_group_key == other.thread_group_key
490            && self.pgid == other.pgid
491            && self.uid == other.uid
492            && self.is_canonical == other.is_canonical
493    }
494}
495
496impl Eq for ZombieProcess {}
497
498impl PartialOrd for ZombieProcess {
499    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
500        Some(self.cmp(other))
501    }
502}
503
504impl Ord for ZombieProcess {
505    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
506        self.thread_group_key.cmp(&other.thread_group_key)
507    }
508}
509
510impl ZombieProcess {
511    pub fn new(
512        thread_group: ThreadGroupStateRef<'_>,
513        credentials: &Credentials,
514        exit_info: ProcessExitInfo,
515    ) -> OwnedRef<Self> {
516        let time_stats = thread_group.base.time_stats() + thread_group.children_time_stats;
517        OwnedRef::new(ZombieProcess {
518            thread_group_key: thread_group.base.into(),
519            pgid: thread_group.process_group.leader,
520            uid: credentials.uid,
521            exit_info,
522            time_stats,
523            is_canonical: true,
524        })
525    }
526
527    pub fn pid(&self) -> pid_t {
528        self.thread_group_key.pid()
529    }
530
531    pub fn to_wait_result(&self) -> WaitResult {
532        WaitResult {
533            pid: self.pid(),
534            uid: self.uid,
535            exit_info: self.exit_info.clone(),
536            time_stats: self.time_stats,
537        }
538    }
539
540    pub fn as_artificial(&self) -> Self {
541        ZombieProcess {
542            thread_group_key: self.thread_group_key.clone(),
543            pgid: self.pgid,
544            uid: self.uid,
545            exit_info: self.exit_info.clone(),
546            time_stats: self.time_stats,
547            is_canonical: false,
548        }
549    }
550
551    pub fn matches_selector(&self, selector: &ProcessSelector) -> bool {
552        match *selector {
553            ProcessSelector::Any => true,
554            ProcessSelector::Pid(pid) => self.pid() == pid,
555            ProcessSelector::Pgid(pgid) => self.pgid == pgid,
556            ProcessSelector::Process(ref key) => self.thread_group_key == *key,
557        }
558    }
559
560    pub fn matches_selector_and_waiting_option(
561        &self,
562        selector: &ProcessSelector,
563        options: &WaitingOptions,
564    ) -> bool {
565        if !self.matches_selector(selector) {
566            return false;
567        }
568
569        if options.wait_for_all {
570            true
571        } else {
572            // A "clone" zombie is one which has delivered no signal, or a
573            // signal other than SIGCHLD to its parent upon termination.
574            options.wait_for_clone == (self.exit_info.exit_signal != Some(SIGCHLD))
575        }
576    }
577}
578
579impl Releasable for ZombieProcess {
580    type Context<'a> = &'a mut PidTable;
581
582    fn release<'a>(self, pids: &'a mut PidTable) {
583        if self.is_canonical {
584            pids.remove_zombie(self.pid());
585        }
586    }
587}
588
589impl ThreadGroup {
590    pub fn new<L>(
591        locked: &mut Locked<L>,
592        kernel: Arc<Kernel>,
593        process: zx::Process,
594        parent: Option<ThreadGroupWriteGuard<'_>>,
595        leader: pid_t,
596        exit_signal: Option<Signal>,
597        process_group: Arc<ProcessGroup>,
598        signal_actions: Arc<SignalActions>,
599    ) -> Arc<ThreadGroup>
600    where
601        L: LockBefore<ProcessGroupState>,
602    {
603        Arc::new_cyclic(|weak_self| {
604            let mut thread_group = ThreadGroup {
605                weak_self: weak_self.clone(),
606                kernel,
607                process,
608                leader,
609                signal_actions,
610                timers: Default::default(),
611                drop_notifier: Default::default(),
612                // A child process created via fork(2) inherits its parent's
613                // resource limits.  Resource limits are preserved across execve(2).
614                limits: OrderedMutex::new(
615                    parent
616                        .as_ref()
617                        .map(|p| p.base.limits.lock(locked.cast_locked()).clone())
618                        .unwrap_or(Default::default()),
619                ),
620                next_seccomp_filter_id: Default::default(),
621                ptracees: Default::default(),
622                stop_state: AtomicStopState::new(StopState::Awake),
623                pending_signals: Default::default(),
624                has_pending_signals: Default::default(),
625                start_time: zx::MonotonicInstant::get(),
626                mutable_state: RwLock::new(ThreadGroupMutableState {
627                    parent: parent
628                        .as_ref()
629                        .map(|p| ThreadGroupParent::new(p.base.weak_self.clone())),
630                    exit_signal,
631                    tasks: BTreeMap::new(),
632                    children: BTreeMap::new(),
633                    zombie_children: vec![],
634                    zombie_ptracees: ZombiePtracees::new(),
635                    deferred_zombie_ptracers: vec![],
636                    lifecycle_waiters: TypedWaitQueue::<ThreadGroupLifecycleWaitValue>::default(),
637                    is_child_subreaper: false,
638                    process_group: Arc::clone(&process_group),
639                    did_exec: false,
640                    last_signal: None,
641                    run_state: Default::default(),
642                    children_time_stats: Default::default(),
643                    personality: parent
644                        .as_ref()
645                        .map(|p| p.personality)
646                        .unwrap_or(Default::default()),
647                    allowed_ptracers: PtraceAllowedPtracers::None,
648                    exit_notifier: None,
649                    notifier: None,
650                }),
651                log_syscalls_as_info: AtomicBool::new(false),
652            };
653
654            if let Some(mut parent) = parent {
655                thread_group.next_seccomp_filter_id.reset(parent.base.next_seccomp_filter_id.get());
656                parent.children.insert(leader, weak_self.clone());
657                process_group.insert(locked, &thread_group);
658            };
659            thread_group
660        })
661    }
662
663    state_accessor!(ThreadGroup, mutable_state);
664
665    pub fn load_stopped(&self) -> StopState {
666        self.stop_state.load(Ordering::Relaxed)
667    }
668
669    // Causes the thread group to exit.  If this is being called from a task
670    // that is part of the current thread group, the caller should pass
671    // `current_task`.  If ownership issues prevent passing `current_task`, then
672    // callers should use CurrentTask::thread_group_exit instead.
673    pub fn exit(
674        &self,
675        locked: &mut Locked<Unlocked>,
676        exit_status: ExitStatus,
677        mut current_task: Option<&mut CurrentTask>,
678    ) {
679        if let Some(ref mut current_task) = current_task {
680            current_task.ptrace_event(
681                locked,
682                PtraceOptions::TRACEEXIT,
683                exit_status.signal_info_status() as u64,
684            );
685        }
686        let mut pids = self.kernel.pids.write();
687        let mut state = self.write();
688        if state.is_terminating() {
689            // The thread group is already terminating and all threads in the thread group have
690            // already been interrupted.
691            return;
692        }
693
694        state.run_state = ThreadGroupRunState::Terminating(exit_status.clone());
695
696        // Drop ptrace zombies
697        state.zombie_ptracees.release(&mut pids);
698
699        // Interrupt each task. Unlock the group because send_signal will lock the group in order
700        // to call set_stopped.
701        // SAFETY: tasks is kept on the stack. The static is required to ensure the lock on
702        // ThreadGroup can be dropped.
703        let tasks = state.tasks().map(TempRef::into_static).collect::<Vec<_>>();
704        drop(state);
705
706        // Detach from any ptraced tasks, killing the ones that set PTRACE_O_EXITKILL.
707        let tracees = self.ptracees.lock().keys().cloned().collect::<Vec<_>>();
708        for tracee in tracees {
709            if let Some(task_ref) = pids.get_task(tracee).clone().upgrade() {
710                let mut should_send_sigkill = false;
711                if let Some(ptrace) = &task_ref.read().ptrace {
712                    should_send_sigkill = ptrace.has_option(PtraceOptions::EXITKILL);
713                }
714                if should_send_sigkill {
715                    send_standard_signal(locked, task_ref.as_ref(), SignalInfo::kernel(SIGKILL));
716                    continue;
717                }
718
719                let _ =
720                    ptrace_detach(locked, &mut pids, self, task_ref.as_ref(), &UserAddress::NULL);
721            }
722        }
723
724        for task in tasks {
725            task.write().set_exit_status(exit_status.clone());
726            send_standard_signal(locked, &task, SignalInfo::kernel(SIGKILL));
727        }
728    }
729
730    pub fn add(&self, task: &TempRef<'_, Task>) -> Result<(), Errno> {
731        let mut state = self.write();
732        if state.is_terminating() {
733            if state.tasks_count() == 0 {
734                log_warn!(
735                    "Task {} with leader {} terminating while adding its first task, \
736                not sending creation notification",
737                    task.tid,
738                    self.leader
739                );
740            }
741            return error!(EINVAL);
742        }
743        state.tasks.insert(task.tid, task.into());
744
745        Ok(())
746    }
747
748    /// Remove the task from the children of this ThreadGroup.
749    ///
750    /// It is important that the task is taken as an `OwnedRef`. It ensures the tasks of the
751    /// ThreadGroup are always valid as they are still valid when removed.
752    pub fn remove<L>(&self, locked: &mut Locked<L>, pids: &mut PidTable, task: &OwnedRef<Task>)
753    where
754        L: LockBefore<ProcessGroupState>,
755    {
756        task.set_ptrace_zombie(pids);
757        pids.remove_task(task.tid);
758
759        let mut state = self.write();
760
761        let persistent_info: TaskPersistentInfo =
762            if let Some(container) = state.tasks.remove(&task.tid) {
763                container.into()
764            } else {
765                // The task has never been added. The only expected case is that this thread was
766                // already terminating.
767                debug_assert!(state.is_terminating());
768                return;
769            };
770
771        if state.tasks.is_empty() {
772            let exit_status =
773                if let ThreadGroupRunState::Terminating(exit_status) = &state.run_state {
774                    exit_status.clone()
775                } else {
776                    let exit_status = task.exit_status().unwrap_or_else(|| {
777                        log_error!("Exiting without an exit code.");
778                        ExitStatus::Exit(u8::MAX)
779                    });
780                    state.run_state = ThreadGroupRunState::Terminating(exit_status.clone());
781                    exit_status
782                };
783
784            // Replace PID table entry with a zombie.
785            let exit_info =
786                ProcessExitInfo { status: exit_status, exit_signal: state.exit_signal.clone() };
787            let zombie =
788                ZombieProcess::new(state.as_ref(), &persistent_info.real_creds(), exit_info);
789            pids.kill_process(self.leader, OwnedRef::downgrade(&zombie));
790
791            state.leave_process_group(locked, pids);
792
793            // I have no idea if dropping the lock here is correct, and I don't want to think about
794            // it. If problems do turn up with another thread observing an intermediate state of
795            // this exit operation, the solution is to unify locks. It should be sensible and
796            // possible for there to be a single lock that protects all (or nearly all) of the
797            // data accessed by both exit and wait. In gvisor and linux this is the lock on the
798            // equivalent of the PidTable. This is made more difficult by rust locks being
799            // containers that only lock the data they contain, but see
800            // https://docs.google.com/document/d/1YHrhBqNhU1WcrsYgGAu3JwwlVmFXPlwWHTJLAbwRebY/edit
801            // for an idea.
802            std::mem::drop(state);
803
804            // Remove the process from the cgroup2 pid table after TG lock is dropped.
805            // This function will hold the CgroupState lock which should be before the TG lock. See
806            // more in lock_cgroup2_pid_table comments.
807            self.kernel.cgroups.lock_cgroup2_pid_table().remove_process(self.into());
808
809            // We will need the immediate parent and the reaper. Once we have them, we can make
810            // sure to take the locks in the right order: parent before child.
811            let parent = self.read().parent.clone();
812            let reaper = self.find_reaper();
813
814            {
815                // Reparent the children.
816                if let Some(reaper) = reaper {
817                    let reaper = reaper.upgrade();
818                    {
819                        let mut reaper_state = reaper.write();
820                        let mut state = self.write();
821                        for (_pid, weak_child) in std::mem::take(&mut state.children) {
822                            if let Some(child) = weak_child.upgrade() {
823                                let mut child_state = child.write();
824
825                                child_state.exit_signal = Some(SIGCHLD);
826                                child_state.parent =
827                                    Some(ThreadGroupParent::new(Arc::downgrade(&reaper)));
828                                reaper_state.children.insert(child.leader, weak_child.clone());
829                            }
830                        }
831                        reaper_state.zombie_children.append(&mut state.zombie_children);
832                    }
833                    ZombiePtracees::reparent(self, &reaper);
834                } else {
835                    // If we don't have a reaper then just drop the zombies.
836                    let mut state = self.write();
837                    for zombie in state.zombie_children.drain(..) {
838                        zombie.release(pids);
839                    }
840                    state.zombie_ptracees.release(pids);
841                }
842            }
843
844            #[cfg(any(test, debug_assertions))]
845            {
846                let state = self.read();
847                assert!(state.zombie_children.is_empty());
848                assert!(state.zombie_ptracees.is_empty());
849            }
850
851            if let Some(ref parent) = parent {
852                let parent = parent.upgrade();
853                let mut tracer_pid = None;
854                if let Some(ptrace) = &task.read().ptrace {
855                    tracer_pid = Some(ptrace.get_pid());
856                }
857
858                let maybe_zombie = 'compute_zombie: {
859                    if let Some(tracer_pid) = tracer_pid {
860                        if let Some(ref tracer) = pids.get_task(tracer_pid).upgrade() {
861                            break 'compute_zombie tracer
862                                .thread_group()
863                                .maybe_notify_tracer(task, pids, &parent, zombie);
864                        }
865                    }
866                    Some(zombie)
867                };
868                if let Some(zombie) = maybe_zombie {
869                    parent.do_zombie_notifications(zombie);
870                }
871            } else {
872                zombie.release(pids);
873            }
874
875            // TODO: Set the error_code on the Zircon process object. Currently missing a way
876            // to do this in Zircon. Might be easier in the new execution model.
877
878            // Once the last zircon thread stops, the zircon process will also stop executing.
879
880            if let Some(parent) = parent {
881                let parent = parent.upgrade();
882                parent.check_orphans(locked, pids);
883            }
884        }
885    }
886
887    pub fn do_zombie_notifications(&self, zombie: OwnedRef<ZombieProcess>) {
888        let mut state = self.write();
889
890        state.children.remove(&zombie.pid());
891        state
892            .deferred_zombie_ptracers
893            .retain(|dzp| dzp.tracee_thread_group_key != zombie.thread_group_key);
894
895        let exit_signal = zombie.exit_info.exit_signal;
896        let mut signal_info = zombie.to_wait_result().as_signal_info();
897
898        state.zombie_children.push(zombie);
899        state.lifecycle_waiters.notify_value(ThreadGroupLifecycleWaitValue::ChildStatus);
900
901        // Send signals
902        if let Some(exit_signal) = exit_signal {
903            signal_info.signal = exit_signal;
904            state.send_signal(signal_info);
905        }
906    }
907
908    /// Notifies the tracer if appropriate.  Returns Some(zombie) if caller
909    /// needs to notify the parent, None otherwise.  The caller should probably
910    /// invoke parent.do_zombie_notifications(zombie) on the result.
911    fn maybe_notify_tracer(
912        &self,
913        tracee: &Task,
914        mut pids: &mut PidTable,
915        parent: &ThreadGroup,
916        zombie: OwnedRef<ZombieProcess>,
917    ) -> Option<OwnedRef<ZombieProcess>> {
918        if self.read().zombie_ptracees.has_tracee(tracee.tid) {
919            if self == parent {
920                // The tracer is the parent and has not consumed the
921                // notification.  Don't bother with the ptracee stuff, and just
922                // notify the parent.
923                self.write().zombie_ptracees.remove(pids, tracee.tid);
924                return Some(zombie);
925            } else {
926                // The tracer is not the parent and the tracer has not consumed
927                // the notification.
928                {
929                    // Tell the parent to expect a notification later.
930                    let mut parent_state = parent.write();
931                    parent_state
932                        .deferred_zombie_ptracers
933                        .push(DeferredZombiePTracer::new(self, tracee));
934                    parent_state.children.remove(&tracee.get_pid());
935                }
936                // Tell the tracer that there is a notification pending.
937                let mut state = self.write();
938                state.zombie_ptracees.set_parent_of(tracee.tid, Some(zombie), parent);
939                tracee.write().notify_ptracers();
940                return None;
941            }
942        } else if self == parent {
943            // The tracer is the parent and has already consumed the parent
944            // notification.  No further action required.
945            parent.write().children.remove(&tracee.tid);
946            zombie.release(&mut pids);
947            return None;
948        }
949        // The tracer is not the parent and has already consumed the parent
950        // notification.  Notify the parent.
951        Some(zombie)
952    }
953
954    /// Find the task which will adopt our children after we die.
955    fn find_reaper(&self) -> Option<ThreadGroupParent> {
956        let mut weak_parent = self.read().parent.clone()?;
957        loop {
958            weak_parent = {
959                let parent = weak_parent.upgrade();
960                let parent_state = parent.read();
961                if parent_state.is_child_subreaper {
962                    break;
963                }
964                match parent_state.parent {
965                    Some(ref next_parent) => next_parent.clone(),
966                    None => break,
967                }
968            };
969        }
970        Some(weak_parent)
971    }
972
973    pub fn setsid<L>(&self, locked: &mut Locked<L>) -> Result<(), Errno>
974    where
975        L: LockBefore<ProcessGroupState>,
976    {
977        let pids = self.kernel.pids.read();
978        if pids.get_process_group(self.leader).is_some() {
979            return error!(EPERM);
980        }
981        let process_group = ProcessGroup::new(self.leader, None);
982        pids.add_process_group(process_group.clone());
983        self.write().set_process_group(locked, process_group, &pids);
984        self.check_orphans(locked, &pids);
985
986        Ok(())
987    }
988
989    pub fn setpgid<L>(
990        &self,
991        locked: &mut Locked<L>,
992        current_task: &CurrentTask,
993        target: &Task,
994        pgid: pid_t,
995    ) -> Result<(), Errno>
996    where
997        L: LockBefore<ProcessGroupState>,
998    {
999        let pids = self.kernel.pids.read();
1000
1001        {
1002            let current_process_group = Arc::clone(&self.read().process_group);
1003
1004            // The target process must be either the current process of a child of the current process
1005            let mut target_thread_group = target.thread_group().write();
1006            let is_target_current_process_child =
1007                target_thread_group.parent.as_ref().map(|tg| tg.upgrade().leader)
1008                    == Some(self.leader);
1009            if target_thread_group.leader() != self.leader && !is_target_current_process_child {
1010                return error!(ESRCH);
1011            }
1012
1013            // If the target process is a child of the current task, it must not have executed one of the exec
1014            // function.
1015            if is_target_current_process_child && target_thread_group.did_exec {
1016                return error!(EACCES);
1017            }
1018
1019            let new_process_group;
1020            {
1021                let target_process_group = &target_thread_group.process_group;
1022
1023                // The target process must not be a session leader and must be in the same session as the current process.
1024                if target_thread_group.leader() == target_process_group.session.leader
1025                    || current_process_group.session != target_process_group.session
1026                {
1027                    return error!(EPERM);
1028                }
1029
1030                let target_pgid = if pgid == 0 { target_thread_group.leader() } else { pgid };
1031                if target_pgid < 0 {
1032                    return error!(EINVAL);
1033                }
1034
1035                if target_pgid == target_process_group.leader {
1036                    return Ok(());
1037                }
1038
1039                // If pgid is not equal to the target process id, the associated process group must exist
1040                // and be in the same session as the target process.
1041                if target_pgid != target_thread_group.leader() {
1042                    new_process_group =
1043                        pids.get_process_group(target_pgid).ok_or_else(|| errno!(EPERM))?;
1044                    if new_process_group.session != target_process_group.session {
1045                        return error!(EPERM);
1046                    }
1047                    security::check_setpgid_access(current_task, target)?;
1048                } else {
1049                    security::check_setpgid_access(current_task, target)?;
1050                    // Create a new process group
1051                    new_process_group =
1052                        ProcessGroup::new(target_pgid, Some(target_process_group.session.clone()));
1053                    pids.add_process_group(new_process_group.clone());
1054                }
1055            }
1056
1057            target_thread_group.set_process_group(locked, new_process_group, &pids);
1058        }
1059
1060        target.thread_group().check_orphans(locked, &pids);
1061
1062        Ok(())
1063    }
1064
1065    fn itimer_real(&self) -> IntervalTimerHandle {
1066        self.timers.itimer_real()
1067    }
1068
1069    pub fn set_itimer(
1070        &self,
1071        current_task: &CurrentTask,
1072        which: u32,
1073        value: itimerval,
1074    ) -> Result<itimerval, Errno> {
1075        if which == ITIMER_PROF || which == ITIMER_VIRTUAL {
1076            // We don't support setting these timers.
1077            // The gvisor test suite clears ITIMER_PROF as part of its test setup logic, so we support
1078            // clearing these values.
1079            if value.it_value.tv_sec == 0 && value.it_value.tv_usec == 0 {
1080                return Ok(itimerval::default());
1081            }
1082            track_stub!(TODO("https://fxbug.dev/322874521"), "Unsupported itimer type", which);
1083            return error!(ENOTSUP);
1084        }
1085
1086        if which != ITIMER_REAL {
1087            return error!(EINVAL);
1088        }
1089        let itimer_real = self.itimer_real();
1090        let prev_remaining = itimer_real.time_remaining();
1091        if value.it_value.tv_sec != 0 || value.it_value.tv_usec != 0 {
1092            itimer_real.arm(current_task, itimerspec_from_itimerval(value), false)?;
1093        } else {
1094            itimer_real.disarm(current_task)?;
1095        }
1096        Ok(itimerval {
1097            it_value: timeval_from_duration(prev_remaining.remainder),
1098            it_interval: timeval_from_duration(prev_remaining.interval),
1099        })
1100    }
1101
1102    pub fn get_itimer(&self, which: u32) -> Result<itimerval, Errno> {
1103        if which == ITIMER_PROF || which == ITIMER_VIRTUAL {
1104            // We don't support setting these timers, so we can accurately report that these are not set.
1105            return Ok(itimerval::default());
1106        }
1107        if which != ITIMER_REAL {
1108            return error!(EINVAL);
1109        }
1110        let remaining = self.itimer_real().time_remaining();
1111        Ok(itimerval {
1112            it_value: timeval_from_duration(remaining.remainder),
1113            it_interval: timeval_from_duration(remaining.interval),
1114        })
1115    }
1116
1117    /// Check whether the stop state is compatible with `new_stopped`. If it is return it,
1118    /// otherwise, return None.
1119    fn check_stopped_state(
1120        &self,
1121        new_stopped: StopState,
1122        finalize_only: bool,
1123    ) -> Option<StopState> {
1124        let stopped = self.load_stopped();
1125        if finalize_only && !stopped.is_stopping_or_stopped() {
1126            return Some(stopped);
1127        }
1128
1129        if stopped.is_illegal_transition(new_stopped) {
1130            return Some(stopped);
1131        }
1132
1133        return None;
1134    }
1135
1136    /// Set the stop status of the process.  If you pass |siginfo| of |None|,
1137    /// does not update the signal.  If |finalize_only| is set, will check that
1138    /// the set will be a finalize (Stopping -> Stopped or Stopped -> Stopped)
1139    /// before executing it.
1140    ///
1141    /// Returns the latest stop state after any changes.
1142    pub fn set_stopped(
1143        &self,
1144        new_stopped: StopState,
1145        siginfo: Option<SignalInfo>,
1146        finalize_only: bool,
1147    ) -> StopState {
1148        // Perform an early return check to see if we can avoid taking the lock.
1149        if let Some(stopped) = self.check_stopped_state(new_stopped, finalize_only) {
1150            return stopped;
1151        }
1152
1153        self.write().set_stopped(new_stopped, siginfo, finalize_only)
1154    }
1155
1156    /// Ensures |session| is the controlling session inside of |terminal_controller|, and returns a
1157    /// reference to the |TerminalController|.
1158    fn check_terminal_controller(
1159        session: &Arc<Session>,
1160        terminal_controller: &Option<TerminalController>,
1161    ) -> Result<(), Errno> {
1162        if let Some(terminal_controller) = terminal_controller {
1163            if let Some(terminal_session) = terminal_controller.session.upgrade() {
1164                if Arc::ptr_eq(session, &terminal_session) {
1165                    return Ok(());
1166                }
1167            }
1168        }
1169        error!(ENOTTY)
1170    }
1171
1172    pub fn get_foreground_process_group(&self, terminal: &Terminal) -> Result<pid_t, Errno> {
1173        let state = self.read();
1174        let process_group = &state.process_group;
1175        let terminal_state = terminal.read();
1176
1177        // "When fd does not refer to the controlling terminal of the calling
1178        // process, -1 is returned" - tcgetpgrp(3)
1179        Self::check_terminal_controller(&process_group.session, &terminal_state.controller)?;
1180        let pid = process_group.session.read().get_foreground_process_group_leader();
1181        Ok(pid)
1182    }
1183
1184    pub fn set_foreground_process_group<L>(
1185        &self,
1186        locked: &mut Locked<L>,
1187        current_task: &CurrentTask,
1188        terminal: &Terminal,
1189        pgid: pid_t,
1190    ) -> Result<(), Errno>
1191    where
1192        L: LockBefore<ProcessGroupState>,
1193    {
1194        let process_group;
1195        let send_ttou;
1196        {
1197            // Keep locks to ensure atomicity.
1198            let pids = self.kernel.pids.read();
1199            let state = self.read();
1200            process_group = Arc::clone(&state.process_group);
1201            let terminal_state = terminal.read();
1202            Self::check_terminal_controller(&process_group.session, &terminal_state.controller)?;
1203
1204            // pgid must be positive.
1205            if pgid < 0 {
1206                return error!(EINVAL);
1207            }
1208
1209            let new_process_group = pids.get_process_group(pgid).ok_or_else(|| errno!(ESRCH))?;
1210            if new_process_group.session != process_group.session {
1211                return error!(EPERM);
1212            }
1213
1214            let mut session_state = process_group.session.write();
1215            // If the calling process is a member of a background group and not ignoring SIGTTOU, a
1216            // SIGTTOU signal is sent to all members of this background process group.
1217            send_ttou = process_group.leader != session_state.get_foreground_process_group_leader()
1218                && !current_task.read().signal_mask().has_signal(SIGTTOU)
1219                && self.signal_actions.get(SIGTTOU).sa_handler != SIG_IGN;
1220
1221            if !send_ttou {
1222                session_state.set_foreground_process_group(&new_process_group);
1223            }
1224        }
1225
1226        // Locks must not be held when sending signals.
1227        if send_ttou {
1228            process_group.send_signals(locked, &[SIGTTOU]);
1229            return error!(EINTR);
1230        }
1231
1232        Ok(())
1233    }
1234
1235    pub fn set_controlling_terminal(
1236        &self,
1237        current_task: &CurrentTask,
1238        terminal: &Terminal,
1239        is_main: bool,
1240        steal: bool,
1241        is_readable: bool,
1242    ) -> Result<(), Errno> {
1243        // Keep locks to ensure atomicity.
1244        let state = self.read();
1245        let process_group = &state.process_group;
1246        let mut terminal_state = terminal.write();
1247        let mut session_writer = process_group.session.write();
1248
1249        // "The calling process must be a session leader and not have a
1250        // controlling terminal already." - tty_ioctl(4)
1251        if process_group.session.leader != self.leader
1252            || session_writer.controlling_terminal.is_some()
1253        {
1254            return error!(EINVAL);
1255        }
1256
1257        let mut has_admin_capability_determined = false;
1258
1259        // "If this terminal is already the controlling terminal of a different
1260        // session group, then the ioctl fails with EPERM, unless the caller
1261        // has the CAP_SYS_ADMIN capability and arg equals 1, in which case the
1262        // terminal is stolen, and all processes that had it as controlling
1263        // terminal lose it." - tty_ioctl(4)
1264        if let Some(other_session) =
1265            terminal_state.controller.as_ref().and_then(|cs| cs.session.upgrade())
1266        {
1267            if other_session != process_group.session {
1268                if !steal {
1269                    return error!(EPERM);
1270                }
1271                security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1272                has_admin_capability_determined = true;
1273
1274                // Steal the TTY away. Unlike TIOCNOTTY, don't send signals.
1275                other_session.write().controlling_terminal = None;
1276            }
1277        }
1278
1279        if !is_readable && !has_admin_capability_determined {
1280            security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1281        }
1282
1283        session_writer.controlling_terminal = Some(ControllingTerminal::new(terminal, is_main));
1284        terminal_state.controller = TerminalController::new(&process_group.session);
1285        Ok(())
1286    }
1287
1288    pub fn release_controlling_terminal<L>(
1289        &self,
1290        locked: &mut Locked<L>,
1291        _current_task: &CurrentTask,
1292        terminal: &Terminal,
1293        is_main: bool,
1294    ) -> Result<(), Errno>
1295    where
1296        L: LockBefore<ProcessGroupState>,
1297    {
1298        let process_group;
1299        {
1300            // Keep locks to ensure atomicity.
1301            let state = self.read();
1302            process_group = Arc::clone(&state.process_group);
1303            let mut terminal_state = terminal.write();
1304            let mut session_writer = process_group.session.write();
1305
1306            // tty must be the controlling terminal.
1307            Self::check_terminal_controller(&process_group.session, &terminal_state.controller)?;
1308            if !session_writer
1309                .controlling_terminal
1310                .as_ref()
1311                .map_or(false, |ct| ct.matches(terminal, is_main))
1312            {
1313                return error!(ENOTTY);
1314            }
1315
1316            // "If the process was session leader, then send SIGHUP and SIGCONT to the foreground
1317            // process group and all processes in the current session lose their controlling terminal."
1318            // - tty_ioctl(4)
1319
1320            // Remove tty as the controlling tty for each process in the session, then
1321            // send them SIGHUP and SIGCONT.
1322
1323            session_writer.controlling_terminal = None;
1324            terminal_state.controller = None;
1325        }
1326
1327        if process_group.session.leader == self.leader {
1328            process_group.send_signals(locked, &[SIGHUP, SIGCONT]);
1329        }
1330
1331        Ok(())
1332    }
1333
1334    fn check_orphans<L>(&self, locked: &mut Locked<L>, pids: &PidTable)
1335    where
1336        L: LockBefore<ProcessGroupState>,
1337    {
1338        let mut thread_groups = self.read().children().collect::<Vec<_>>();
1339        let this = self.weak_self.upgrade().unwrap();
1340        thread_groups.push(this);
1341        let process_groups =
1342            thread_groups.iter().map(|tg| Arc::clone(&tg.read().process_group)).unique();
1343        for pg in process_groups {
1344            pg.check_orphaned(locked, pids);
1345        }
1346    }
1347
1348    pub fn get_rlimit<L>(&self, locked: &mut Locked<L>, resource: Resource) -> u64
1349    where
1350        L: LockBefore<ThreadGroupLimits>,
1351    {
1352        self.limits.lock(locked).get(resource).rlim_cur
1353    }
1354
1355    /// Adjusts the rlimits of the ThreadGroup to which `target_task` belongs to.
1356    pub fn adjust_rlimits<L>(
1357        locked: &mut Locked<L>,
1358        current_task: &CurrentTask,
1359        target_task: &Task,
1360        resource: Resource,
1361        maybe_new_limit: Option<rlimit>,
1362    ) -> Result<rlimit, Errno>
1363    where
1364        L: LockBefore<ThreadGroupLimits>,
1365    {
1366        let thread_group = target_task.thread_group();
1367        let can_increase_rlimit = security::is_task_capable_noaudit(current_task, CAP_SYS_RESOURCE);
1368        let mut limit_state = thread_group.limits.lock(locked);
1369        let old_limit = limit_state.get(resource);
1370        if let Some(new_limit) = maybe_new_limit {
1371            if new_limit.rlim_max > old_limit.rlim_max && !can_increase_rlimit {
1372                return error!(EPERM);
1373            }
1374            security::task_setrlimit(current_task, &target_task, old_limit, new_limit)?;
1375            limit_state.set(resource, new_limit)
1376        }
1377        Ok(old_limit)
1378    }
1379
1380    pub fn time_stats(&self) -> TaskTimeStats {
1381        let process: &zx::Process = if self.process.as_handle_ref().is_invalid() {
1382            // `process` must be valid for all tasks, except `kthreads`. In that case get the
1383            // stats from starnix process.
1384            assert_eq!(
1385                self as *const ThreadGroup,
1386                Arc::as_ptr(&self.kernel.kthreads.system_thread_group())
1387            );
1388            &self.kernel.kthreads.starnix_process
1389        } else {
1390            &self.process
1391        };
1392
1393        let info =
1394            zx::Task::get_runtime_info(process).expect("Failed to get starnix process stats");
1395        TaskTimeStats {
1396            user_time: zx::MonotonicDuration::from_nanos(info.cpu_time),
1397            // TODO(https://fxbug.dev/42078242): How can we calculate system time?
1398            system_time: zx::MonotonicDuration::default(),
1399        }
1400    }
1401
1402    /// For each task traced by this thread_group that matches the given
1403    /// selector, acquire its TaskMutableState and ptracees lock and execute the
1404    /// given function.
1405    pub fn get_ptracees_and(
1406        &self,
1407        selector: &ProcessSelector,
1408        pids: &PidTable,
1409        f: &mut dyn FnMut(&Task, &TaskMutableState),
1410    ) {
1411        for tracee in self
1412            .ptracees
1413            .lock()
1414            .keys()
1415            .filter(|tracee_tid| selector.match_tid(**tracee_tid, &pids))
1416            .map(|tracee_tid| pids.get_task(*tracee_tid))
1417        {
1418            if let Some(task_ref) = tracee.clone().upgrade() {
1419                let task_state = task_ref.write();
1420                if task_state.ptrace.is_some() {
1421                    f(&task_ref, &task_state);
1422                }
1423            }
1424        }
1425    }
1426
1427    /// Returns a tracee whose state has changed, so that waitpid can report on
1428    /// it. If this returns a value, and the pid is being traced, the tracer
1429    /// thread is deemed to have seen the tracee ptrace-stop for the purposes of
1430    /// PTRACE_LISTEN.
1431    pub fn get_waitable_ptracee(
1432        &self,
1433        selector: &ProcessSelector,
1434        options: &WaitingOptions,
1435        pids: &mut PidTable,
1436    ) -> Option<WaitResult> {
1437        // This checks to see if the target is a zombie ptracee.
1438        let waitable_entry = self.write().zombie_ptracees.get_waitable_entry(selector, options);
1439        match waitable_entry {
1440            None => (),
1441            Some((zombie, None)) => return Some(zombie.to_wait_result()),
1442            Some((zombie, Some((tg, z)))) => {
1443                if let Some(tg) = tg.upgrade() {
1444                    if Arc::as_ptr(&tg) != self as *const Self {
1445                        tg.do_zombie_notifications(z);
1446                    } else {
1447                        {
1448                            let mut state = tg.write();
1449                            state.children.remove(&z.pid());
1450                            state
1451                                .deferred_zombie_ptracers
1452                                .retain(|dzp| dzp.tracee_thread_group_key != z.thread_group_key);
1453                        }
1454
1455                        z.release(pids);
1456                    };
1457                }
1458                return Some(zombie.to_wait_result());
1459            }
1460        }
1461
1462        let mut tasks = vec![];
1463
1464        // This checks to see if the target is a living ptracee
1465        self.get_ptracees_and(selector, pids, &mut |task: &Task, _| {
1466            tasks.push(task.weak_self.clone());
1467        });
1468        for task in tasks {
1469            let Some(task_ref) = task.upgrade() else {
1470                continue;
1471            };
1472
1473            let process_state = &mut task_ref.thread_group().write();
1474            let mut task_state = task_ref.write();
1475            if task_state
1476                .ptrace
1477                .as_ref()
1478                .is_some_and(|ptrace| ptrace.is_waitable(task_ref.load_stopped(), options))
1479            {
1480                // We've identified a potential target.  Need to return either
1481                // the process's information (if we are in group-stop) or the
1482                // thread's information (if we are in a different stop).
1483
1484                // The shared information:
1485                let mut pid: i32 = 0;
1486                let info = process_state.tasks.values().next().unwrap().info().clone();
1487                let uid = info.real_creds().uid;
1488                let mut exit_status = None;
1489                let exit_signal = process_state.exit_signal.clone();
1490                let time_stats =
1491                    process_state.base.time_stats() + process_state.children_time_stats;
1492                let task_stopped = task_ref.load_stopped();
1493
1494                #[derive(PartialEq)]
1495                enum ExitType {
1496                    None,
1497                    Cont,
1498                    Stop,
1499                    Kill,
1500                }
1501                if process_state.is_waitable() {
1502                    let ptrace = &mut task_state.ptrace;
1503                    // The information for processes, if we were in group stop.
1504                    let process_stopped = process_state.base.load_stopped();
1505                    let mut fn_type = ExitType::None;
1506                    if process_stopped == StopState::Awake && options.wait_for_continued {
1507                        fn_type = ExitType::Cont;
1508                    }
1509                    let mut event = ptrace
1510                        .as_ref()
1511                        .map_or(PtraceEvent::None, |ptrace| {
1512                            ptrace.event_data.as_ref().map_or(PtraceEvent::None, |data| data.event)
1513                        })
1514                        .clone();
1515                    // Tasks that are ptrace'd always get stop notifications.
1516                    if process_stopped == StopState::GroupStopped
1517                        && (options.wait_for_stopped || ptrace.is_some())
1518                    {
1519                        fn_type = ExitType::Stop;
1520                    }
1521                    if fn_type != ExitType::None {
1522                        let siginfo = if options.keep_waitable_state {
1523                            process_state.last_signal.clone()
1524                        } else {
1525                            process_state.last_signal.take()
1526                        };
1527                        if let Some(mut siginfo) = siginfo {
1528                            if task_ref.thread_group().load_stopped() == StopState::GroupStopped
1529                                && ptrace.as_ref().is_some_and(|ptrace| ptrace.is_seized())
1530                            {
1531                                if event == PtraceEvent::None {
1532                                    event = PtraceEvent::Stop;
1533                                }
1534                                siginfo.code |= (PtraceEvent::Stop as i32) << 8;
1535                            }
1536                            if siginfo.signal == SIGKILL {
1537                                fn_type = ExitType::Kill;
1538                            }
1539                            exit_status = match fn_type {
1540                                ExitType::Stop => Some(ExitStatus::Stop(siginfo, event)),
1541                                ExitType::Cont => Some(ExitStatus::Continue(siginfo, event)),
1542                                ExitType::Kill => Some(ExitStatus::Kill(siginfo)),
1543                                _ => None,
1544                            };
1545                        }
1546                        // Clear the wait status of the ptrace, because we're
1547                        // using the tg status instead.
1548                        ptrace
1549                            .as_mut()
1550                            .map(|ptrace| ptrace.get_last_signal(options.keep_waitable_state));
1551                    }
1552                    pid = process_state.base.leader;
1553                }
1554                if exit_status == None {
1555                    if let Some(ptrace) = task_state.ptrace.as_mut() {
1556                        // The information for the task, if we were in a non-group stop.
1557                        let mut fn_type = ExitType::None;
1558                        let event = ptrace
1559                            .event_data
1560                            .as_ref()
1561                            .map_or(PtraceEvent::None, |event| event.event);
1562                        if task_stopped == StopState::Awake {
1563                            fn_type = ExitType::Cont;
1564                        }
1565                        if task_stopped.is_stopping_or_stopped()
1566                            || ptrace.stop_status == PtraceStatus::Listening
1567                        {
1568                            fn_type = ExitType::Stop;
1569                        }
1570                        if fn_type != ExitType::None {
1571                            if let Some(siginfo) =
1572                                ptrace.get_last_signal(options.keep_waitable_state)
1573                            {
1574                                if siginfo.signal == SIGKILL {
1575                                    fn_type = ExitType::Kill;
1576                                }
1577                                exit_status = match fn_type {
1578                                    ExitType::Stop => Some(ExitStatus::Stop(siginfo, event)),
1579                                    ExitType::Cont => Some(ExitStatus::Continue(siginfo, event)),
1580                                    ExitType::Kill => Some(ExitStatus::Kill(siginfo)),
1581                                    _ => None,
1582                                };
1583                            }
1584                        }
1585                        pid = task_ref.get_tid();
1586                    }
1587                }
1588                if let Some(exit_status) = exit_status {
1589                    return Some(WaitResult {
1590                        pid,
1591                        uid,
1592                        exit_info: ProcessExitInfo { status: exit_status, exit_signal },
1593                        time_stats,
1594                    });
1595                }
1596            }
1597        }
1598        None
1599    }
1600
1601    /// Attempts to send an unchecked signal to this thread group.
1602    ///
1603    /// - `current_task`: The task that is sending the signal.
1604    /// - `unchecked_signal`: The signal that is to be sent. Unchecked, since `0` is a sentinel value
1605    /// where rights are to be checked but no signal is actually sent.
1606    ///
1607    /// # Returns
1608    /// Returns Ok(()) if the signal was sent, or the permission checks passed with a 0 signal, otherwise
1609    /// the error that was encountered.
1610    pub fn send_signal_unchecked(
1611        &self,
1612        current_task: &CurrentTask,
1613        unchecked_signal: UncheckedSignal,
1614    ) -> Result<(), Errno> {
1615        if let Some(signal) = self.check_signal_access(current_task, unchecked_signal)? {
1616            let signal_info = SignalInfo::with_detail(
1617                signal,
1618                SI_USER as i32,
1619                SignalDetail::Kill {
1620                    pid: current_task.thread_group().leader,
1621                    uid: current_task.current_creds().uid,
1622                },
1623            );
1624
1625            self.write().send_signal(signal_info);
1626        }
1627
1628        Ok(())
1629    }
1630
1631    /// Sends a signal to this thread_group without performing any access checks.
1632    ///
1633    /// # Safety
1634    /// This is unsafe, because it should only be called by tools and tests.
1635    pub unsafe fn send_signal_unchecked_debug(
1636        &self,
1637        current_task: &CurrentTask,
1638        unchecked_signal: UncheckedSignal,
1639    ) -> Result<(), Errno> {
1640        let signal = Signal::try_from(unchecked_signal)?;
1641        let signal_info = SignalInfo::with_detail(
1642            signal,
1643            SI_USER as i32,
1644            SignalDetail::Kill {
1645                pid: current_task.thread_group().leader,
1646                uid: current_task.current_creds().uid,
1647            },
1648        );
1649
1650        self.write().send_signal(signal_info);
1651        Ok(())
1652    }
1653
1654    /// Attempts to send an unchecked signal to this thread group, with info read from
1655    /// `siginfo_ref`.
1656    ///
1657    /// - `current_task`: The task that is sending the signal.
1658    /// - `unchecked_signal`: The signal that is to be sent. Unchecked, since `0` is a sentinel value
1659    /// where rights are to be checked but no signal is actually sent.
1660    /// - `siginfo_ref`: The siginfo that will be enqueued.
1661    /// - `options`: Options for how to convert the siginfo into a signal info.
1662    ///
1663    /// # Returns
1664    /// Returns Ok(()) if the signal was sent, or the permission checks passed with a 0 signal, otherwise
1665    /// the error that was encountered.
1666    #[track_caller]
1667    pub fn send_signal_unchecked_with_info(
1668        &self,
1669        current_task: &CurrentTask,
1670        unchecked_signal: UncheckedSignal,
1671        siginfo_ref: UserAddress,
1672        options: IntoSignalInfoOptions,
1673    ) -> Result<(), Errno> {
1674        if let Some(signal) = self.check_signal_access(current_task, unchecked_signal)? {
1675            let siginfo = UncheckedSignalInfo::read_from_siginfo(current_task, siginfo_ref)?;
1676            if self.leader != current_task.get_pid()
1677                && (siginfo.code() >= 0 || siginfo.code() == SI_TKILL)
1678            {
1679                return error!(EPERM);
1680            }
1681
1682            self.write().send_signal(siginfo.into_signal_info(signal, options)?);
1683        }
1684
1685        Ok(())
1686    }
1687
1688    /// Checks whether or not `current_task` can signal this thread group with `unchecked_signal`.
1689    ///
1690    /// Returns:
1691    ///   - `Ok(Some(Signal))` if the signal passed checks and should be sent.
1692    ///   - `Ok(None)` if the signal passed checks, but should not be sent. This is used by
1693    ///   userspace for permission checks.
1694    ///   - `Err(_)` if the permission checks failed.
1695    fn check_signal_access(
1696        &self,
1697        current_task: &CurrentTask,
1698        unchecked_signal: UncheckedSignal,
1699    ) -> Result<Option<Signal>, Errno> {
1700        // Pick an arbitrary task in thread_group to check permissions.
1701        //
1702        // Tasks can technically have different credentials, but in practice they are kept in sync.
1703        let state = self.read();
1704        let target_task = state.get_live_task()?;
1705        current_task.can_signal(&target_task, unchecked_signal)?;
1706
1707        // 0 is a sentinel value used to do permission checks.
1708        if unchecked_signal.is_zero() {
1709            return Ok(None);
1710        }
1711
1712        let signal = Signal::try_from(unchecked_signal)?;
1713        security::check_signal_access(current_task, &target_task, signal)?;
1714
1715        Ok(Some(signal))
1716    }
1717
1718    pub fn has_signal_queued(&self, signal: Signal) -> bool {
1719        self.pending_signals.lock().has_queued(signal)
1720    }
1721
1722    pub fn num_signals_queued(&self) -> usize {
1723        self.pending_signals.lock().num_queued()
1724    }
1725
1726    pub fn get_pending_signals(&self) -> SigSet {
1727        self.pending_signals.lock().pending()
1728    }
1729
1730    pub fn is_any_signal_allowed_by_mask(&self, mask: SigSet) -> bool {
1731        self.pending_signals.lock().is_any_allowed_by_mask(mask)
1732    }
1733
1734    pub fn take_next_signal_where<F>(&self, predicate: F) -> Option<SignalInfo>
1735    where
1736        F: Fn(&SignalInfo) -> bool,
1737    {
1738        let mut signals = self.pending_signals.lock();
1739        let r = signals.take_next_where(predicate);
1740        self.has_pending_signals.store(!signals.is_empty(), Ordering::Relaxed);
1741        r
1742    }
1743
1744    /// Drive this `ThreadGroup` to exit, allowing it time to handle SIGTERM before sending SIGKILL.
1745    ///
1746    /// Returns once `ThreadGroup::exit()` has completed.
1747    ///
1748    /// Must be called from the system task.
1749    pub async fn shut_down(this: Weak<Self>) {
1750        const SHUTDOWN_SIGNAL_HANDLING_TIMEOUT: zx::MonotonicDuration =
1751            zx::MonotonicDuration::from_seconds(1);
1752
1753        // Prepare for shutting down the thread group.
1754        let (tg_name, mut on_exited) = {
1755            // Nest this upgraded access so TempRefs aren't held across await-points.
1756            let Some(this) = this.upgrade() else {
1757                return;
1758            };
1759
1760            // Register a channel to be notified when exit() is complete.
1761            let (on_exited_send, on_exited) = futures::channel::oneshot::channel();
1762            this.write().exit_notifier = Some(on_exited_send);
1763
1764            // We want to be able to log about this thread group without upgrading the WeakRef.
1765            let tg_name = format!("{this:?}");
1766
1767            (tg_name, on_exited)
1768        };
1769
1770        log_debug!(tg:% = tg_name; "shutting down thread group, sending SIGTERM");
1771        this.upgrade().map(|tg| tg.write().send_signal(SignalInfo::kernel(SIGTERM)));
1772
1773        // Give thread groups some time to handle SIGTERM, proceeding early if they exit
1774        let timeout = fuchsia_async::Timer::new(SHUTDOWN_SIGNAL_HANDLING_TIMEOUT);
1775        futures::pin_mut!(timeout);
1776
1777        // Use select_biased instead of on_timeout() so that we can await on on_exited later
1778        futures::select_biased! {
1779            _ = &mut on_exited => (),
1780            _ = timeout => {
1781                log_debug!(tg:% = tg_name; "sending SIGKILL");
1782                this.upgrade().map(|tg| tg.write().send_signal(SignalInfo::kernel(SIGKILL)));
1783            },
1784        };
1785
1786        log_debug!(tg:% = tg_name; "waiting for exit");
1787        // It doesn't matter whether ThreadGroup::exit() was called or the process exited with
1788        // a return code and dropped the sender end of the channel.
1789        on_exited.await.ok();
1790        log_debug!(tg:% = tg_name; "thread group shutdown complete");
1791    }
1792
1793    /// Returns the KOID of the process for this thread group.
1794    /// This method should be used to when mapping 32 bit linux process ids to KOIDs
1795    /// to avoid breaking the encapsulation of the zx::process within the ThreadGroup.
1796    /// This encapsulation is important since the relationship between the ThreadGroup
1797    /// and the Process may change over time. See [ThreadGroup::process] for more details.
1798    pub fn get_process_koid(&self) -> Result<Koid, Status> {
1799        self.process.koid()
1800    }
1801}
1802
1803#[cfg_attr(
1804    feature = "debug_and_trace_logs_enabled",
1805    allow(clippy::large_enum_variant, reason = "no need to optimize enum size in debug builds")
1806)]
1807pub enum WaitableChildResult {
1808    ReadyNow(WaitResult),
1809    ShouldWait,
1810    NoneFound,
1811}
1812
1813#[apply(state_implementation!)]
1814impl ThreadGroupMutableState<Base = ThreadGroup> {
1815    pub fn leader(&self) -> pid_t {
1816        self.base.leader
1817    }
1818
1819    pub fn leader_command(&self) -> TaskCommand {
1820        self.get_task(self.leader())
1821            .map(|l| l.command())
1822            .unwrap_or_else(|| TaskCommand::new(b"<leader exited>"))
1823    }
1824
1825    pub fn is_terminating(&self) -> bool {
1826        !matches!(self.run_state, ThreadGroupRunState::Running)
1827    }
1828
1829    pub fn children(&self) -> impl Iterator<Item = Arc<ThreadGroup>> + '_ {
1830        self.children.values().map(|v| {
1831            v.upgrade().expect("Weak references to processes in ThreadGroup must always be valid")
1832        })
1833    }
1834
1835    pub fn tasks(&self) -> impl Iterator<Item = TempRef<'_, Task>> + '_ {
1836        self.tasks.values().flat_map(|t| t.upgrade())
1837    }
1838
1839    pub fn task_ids(&self) -> impl Iterator<Item = &tid_t> {
1840        self.tasks.keys()
1841    }
1842
1843    pub fn contains_task(&self, tid: tid_t) -> bool {
1844        self.tasks.contains_key(&tid)
1845    }
1846
1847    pub fn get_task(&self, tid: tid_t) -> Option<TempRef<'_, Task>> {
1848        self.tasks.get(&tid).and_then(|t| t.upgrade())
1849    }
1850
1851    pub fn tasks_count(&self) -> usize {
1852        self.tasks.len()
1853    }
1854
1855    pub fn get_ppid(&self) -> pid_t {
1856        match &self.parent {
1857            Some(parent) => parent.upgrade().leader,
1858            None => 0,
1859        }
1860    }
1861
1862    fn set_process_group<L>(
1863        &mut self,
1864        locked: &mut Locked<L>,
1865        process_group: Arc<ProcessGroup>,
1866        pids: &PidTable,
1867    ) where
1868        L: LockBefore<ProcessGroupState>,
1869    {
1870        if self.process_group == process_group {
1871            return;
1872        }
1873        self.leave_process_group(locked, pids);
1874        self.process_group = process_group;
1875        self.process_group.insert(locked, self.base);
1876    }
1877
1878    fn leave_process_group<L>(&mut self, locked: &mut Locked<L>, pids: &PidTable)
1879    where
1880        L: LockBefore<ProcessGroupState>,
1881    {
1882        if self.process_group.remove(locked, self.base) {
1883            self.process_group.session.write().remove(self.process_group.leader);
1884            pids.remove_process_group(self.process_group.leader);
1885        }
1886    }
1887
1888    /// Indicates whether the thread group is waitable via waitid and waitpid for
1889    /// either WSTOPPED or WCONTINUED.
1890    pub fn is_waitable(&self) -> bool {
1891        return self.last_signal.is_some() && !self.base.load_stopped().is_in_progress();
1892    }
1893
1894    pub fn get_waitable_zombie(
1895        &mut self,
1896        zombie_list: &dyn Fn(&mut ThreadGroupMutableState) -> &mut Vec<OwnedRef<ZombieProcess>>,
1897        selector: &ProcessSelector,
1898        options: &WaitingOptions,
1899        pids: &mut PidTable,
1900    ) -> Option<WaitResult> {
1901        // We look for the last zombie in the vector that matches pid selector and waiting options
1902        let selected_zombie_position = zombie_list(self)
1903            .iter()
1904            .rev()
1905            .position(|zombie| zombie.matches_selector_and_waiting_option(selector, options))
1906            .map(|position_starting_from_the_back| {
1907                zombie_list(self).len() - 1 - position_starting_from_the_back
1908            });
1909
1910        selected_zombie_position.map(|position| {
1911            if options.keep_waitable_state {
1912                zombie_list(self)[position].to_wait_result()
1913            } else {
1914                let zombie = zombie_list(self).remove(position);
1915                self.children_time_stats += zombie.time_stats;
1916                let result = zombie.to_wait_result();
1917                zombie.release(pids);
1918                result
1919            }
1920        })
1921    }
1922
1923    pub fn is_correct_exit_signal(for_clone: bool, exit_code: Option<Signal>) -> bool {
1924        for_clone == (exit_code != Some(SIGCHLD))
1925    }
1926
1927    fn get_waitable_running_children(
1928        &self,
1929        selector: &ProcessSelector,
1930        options: &WaitingOptions,
1931        pids: &PidTable,
1932    ) -> WaitableChildResult {
1933        // The children whose pid matches the pid selector queried.
1934        let filter_children_by_pid_selector = |child: &ThreadGroup| match *selector {
1935            ProcessSelector::Any => true,
1936            ProcessSelector::Pid(pid) => child.leader == pid,
1937            ProcessSelector::Pgid(pgid) => {
1938                pids.get_process_group(pgid).as_ref() == Some(&child.read().process_group)
1939            }
1940            ProcessSelector::Process(ref key) => *key == ThreadGroupKey::from(child),
1941        };
1942
1943        // The children whose exit signal matches the waiting options queried.
1944        let filter_children_by_waiting_options = |child: &ThreadGroup| {
1945            if options.wait_for_all {
1946                return true;
1947            }
1948            Self::is_correct_exit_signal(options.wait_for_clone, child.read().exit_signal)
1949        };
1950
1951        // If wait_for_exited flag is disabled or no terminated children were found we look for living children.
1952        let mut selected_children = self
1953            .children
1954            .values()
1955            .map(|t| t.upgrade().unwrap())
1956            .filter(|tg| filter_children_by_pid_selector(&tg))
1957            .filter(|tg| filter_children_by_waiting_options(&tg))
1958            .peekable();
1959        if selected_children.peek().is_none() {
1960            // There still might be a process that ptrace hasn't looked at yet.
1961            if self.deferred_zombie_ptracers.iter().any(|dzp| match *selector {
1962                ProcessSelector::Any => true,
1963                ProcessSelector::Pid(pid) => dzp.tracee_thread_group_key.pid() == pid,
1964                ProcessSelector::Pgid(pgid) => pgid == dzp.tracee_pgid,
1965                ProcessSelector::Process(ref key) => *key == dzp.tracee_thread_group_key,
1966            }) {
1967                return WaitableChildResult::ShouldWait;
1968            }
1969
1970            return WaitableChildResult::NoneFound;
1971        }
1972        for child in selected_children {
1973            let child = child.write();
1974            if child.last_signal.is_some() {
1975                let build_wait_result = |mut child: ThreadGroupWriteGuard<'_>,
1976                                         exit_status: &dyn Fn(SignalInfo) -> ExitStatus|
1977                 -> WaitResult {
1978                    let siginfo = if options.keep_waitable_state {
1979                        child.last_signal.clone().unwrap()
1980                    } else {
1981                        child.last_signal.take().unwrap()
1982                    };
1983                    let exit_status = if siginfo.signal == SIGKILL {
1984                        // This overrides the stop/continue choice.
1985                        ExitStatus::Kill(siginfo)
1986                    } else {
1987                        exit_status(siginfo)
1988                    };
1989                    let info = child.tasks.values().next().unwrap().info();
1990                    let uid = info.real_creds().uid;
1991                    WaitResult {
1992                        pid: child.base.leader,
1993                        uid,
1994                        exit_info: ProcessExitInfo {
1995                            status: exit_status,
1996                            exit_signal: child.exit_signal,
1997                        },
1998                        time_stats: child.base.time_stats() + child.children_time_stats,
1999                    }
2000                };
2001                let child_stopped = child.base.load_stopped();
2002                if child_stopped == StopState::Awake && options.wait_for_continued {
2003                    return WaitableChildResult::ReadyNow(build_wait_result(child, &|siginfo| {
2004                        ExitStatus::Continue(siginfo, PtraceEvent::None)
2005                    }));
2006                }
2007                if child_stopped == StopState::GroupStopped && options.wait_for_stopped {
2008                    return WaitableChildResult::ReadyNow(build_wait_result(child, &|siginfo| {
2009                        ExitStatus::Stop(siginfo, PtraceEvent::None)
2010                    }));
2011                }
2012            }
2013        }
2014
2015        WaitableChildResult::ShouldWait
2016    }
2017
2018    /// Returns any waitable child matching the given `selector` and `options`. Returns None if no
2019    /// child matching the selector is waitable. Returns ECHILD if no child matches the selector at
2020    /// all.
2021    ///
2022    /// Will remove the waitable status from the child depending on `options`.
2023    pub fn get_waitable_child(
2024        &mut self,
2025        selector: &ProcessSelector,
2026        options: &WaitingOptions,
2027        pids: &mut PidTable,
2028    ) -> WaitableChildResult {
2029        if options.wait_for_exited {
2030            if let Some(waitable_zombie) = self.get_waitable_zombie(
2031                &|state: &mut ThreadGroupMutableState| &mut state.zombie_children,
2032                selector,
2033                options,
2034                pids,
2035            ) {
2036                return WaitableChildResult::ReadyNow(waitable_zombie);
2037            }
2038        }
2039
2040        self.get_waitable_running_children(selector, options, pids)
2041    }
2042
2043    /// Returns a task in the current thread group.
2044    pub fn get_live_task(&self) -> Result<TempRef<'_, Task>, Errno> {
2045        self.tasks
2046            .get(&self.leader())
2047            .and_then(|t| t.upgrade())
2048            .or_else(|| self.tasks().next())
2049            .ok_or_else(|| errno!(ESRCH))
2050    }
2051
2052    /// Set the stop status of the process.  If you pass |siginfo| of |None|,
2053    /// does not update the signal.  If |finalize_only| is set, will check that
2054    /// the set will be a finalize (Stopping -> Stopped or Stopped -> Stopped)
2055    /// before executing it.
2056    ///
2057    /// Returns the latest stop state after any changes.
2058    pub fn set_stopped(
2059        mut self,
2060        new_stopped: StopState,
2061        siginfo: Option<SignalInfo>,
2062        finalize_only: bool,
2063    ) -> StopState {
2064        if let Some(stopped) = self.base.check_stopped_state(new_stopped, finalize_only) {
2065            return stopped;
2066        }
2067
2068        // Thread groups don't transition to group stop if they are waking, because waking
2069        // means something told it to wake up (like a SIGCONT) but hasn't finished yet.
2070        if self.base.load_stopped() == StopState::Waking
2071            && (new_stopped == StopState::GroupStopping || new_stopped == StopState::GroupStopped)
2072        {
2073            return self.base.load_stopped();
2074        }
2075
2076        // TODO(https://g-issues.fuchsia.dev/issues/306438676): When thread
2077        // group can be stopped inside user code, tasks/thread groups will
2078        // need to be either restarted or stopped here.
2079        self.store_stopped(new_stopped);
2080        if let Some(signal) = &siginfo {
2081            // We don't want waiters to think the process was unstopped
2082            // because of a sigkill.  They will get woken when the
2083            // process dies.
2084            if signal.signal != SIGKILL {
2085                self.last_signal = siginfo;
2086            }
2087        }
2088        if new_stopped == StopState::Waking || new_stopped == StopState::ForceWaking {
2089            self.lifecycle_waiters.notify_value(ThreadGroupLifecycleWaitValue::Stopped);
2090        };
2091
2092        let parent = (!new_stopped.is_in_progress()).then(|| self.parent.clone()).flatten();
2093
2094        // Drop the lock before locking the parent.
2095        std::mem::drop(self);
2096        if let Some(parent) = parent {
2097            let parent = parent.upgrade();
2098            parent
2099                .write()
2100                .lifecycle_waiters
2101                .notify_value(ThreadGroupLifecycleWaitValue::ChildStatus);
2102        }
2103
2104        new_stopped
2105    }
2106
2107    fn store_stopped(&mut self, state: StopState) {
2108        // We don't actually use the guard but we require it to enforce that the
2109        // caller holds the thread group's mutable state lock (identified by
2110        // mutable access to the thread group's mutable state).
2111
2112        self.base.stop_state.store(state, Ordering::Relaxed)
2113    }
2114
2115    /// Sends the signal `signal_info` to this thread group.
2116    #[allow(unused_mut, reason = "needed for some but not all macro outputs")]
2117    pub fn send_signal(mut self, signal_info: SignalInfo) {
2118        let sigaction = self.base.signal_actions.get(signal_info.signal);
2119        let action = action_for_signal(&signal_info, sigaction);
2120
2121        {
2122            let mut pending_signals = self.base.pending_signals.lock();
2123            pending_signals.enqueue(signal_info.clone());
2124            self.base.has_pending_signals.store(true, Ordering::Relaxed);
2125        }
2126        let tasks: Vec<WeakRef<Task>> = self.tasks.values().map(|t| t.weak_clone()).collect();
2127
2128        // Set state to waking before interrupting any tasks.
2129        if signal_info.signal == SIGKILL {
2130            self.set_stopped(StopState::ForceWaking, Some(signal_info.clone()), false);
2131        } else if signal_info.signal == SIGCONT {
2132            self.set_stopped(StopState::Waking, Some(signal_info.clone()), false);
2133        }
2134
2135        let mut has_interrupted_task = false;
2136        for task in tasks.iter().flat_map(|t| t.upgrade()) {
2137            let mut task_state = task.write();
2138
2139            if signal_info.signal == SIGKILL {
2140                task_state.thaw();
2141                task_state.set_stopped(StopState::ForceWaking, None, None, None);
2142            } else if signal_info.signal == SIGCONT {
2143                task_state.set_stopped(StopState::Waking, None, None, None);
2144            }
2145
2146            let is_masked = task_state.is_signal_masked(signal_info.signal);
2147            let was_masked = task_state.is_signal_masked_by_saved_mask(signal_info.signal);
2148
2149            let is_queued = action != DeliveryAction::Ignore
2150                || is_masked
2151                || was_masked
2152                || task_state.is_ptraced();
2153
2154            if is_queued {
2155                task_state.notify_signal_waiters(&signal_info.signal);
2156
2157                if !is_masked && action.must_interrupt(Some(sigaction)) && !has_interrupted_task {
2158                    // Only interrupt one task, and only interrupt if the signal was actually queued
2159                    // and the action must interrupt.
2160                    drop(task_state);
2161                    task.interrupt();
2162                    has_interrupted_task = true;
2163                }
2164            }
2165        }
2166    }
2167}
2168
2169/// Container around a weak task and a strong `TaskPersistentInfo`. It is needed to keep the
2170/// information even when the task is not upgradable, because when the task is dropped, there is a
2171/// moment where the task is not yet released, yet the weak pointer is not upgradeable anymore.
2172/// During this time, it is still necessary to access the persistent info to compute the state of
2173/// the thread for the different wait syscalls.
2174pub struct TaskContainer(WeakRef<Task>, TaskPersistentInfo);
2175
2176impl From<&TempRef<'_, Task>> for TaskContainer {
2177    fn from(task: &TempRef<'_, Task>) -> Self {
2178        Self(WeakRef::from(task), task.persistent_info.clone())
2179    }
2180}
2181
2182impl From<TaskContainer> for TaskPersistentInfo {
2183    fn from(container: TaskContainer) -> TaskPersistentInfo {
2184        container.1
2185    }
2186}
2187
2188impl TaskContainer {
2189    fn upgrade(&self) -> Option<TempRef<'_, Task>> {
2190        self.0.upgrade()
2191    }
2192
2193    fn weak_clone(&self) -> WeakRef<Task> {
2194        self.0.clone()
2195    }
2196
2197    fn info(&self) -> &TaskPersistentInfo {
2198        &self.1
2199    }
2200}
2201
2202#[cfg(test)]
2203mod test {
2204    use super::*;
2205    use crate::testing::*;
2206
2207    #[::fuchsia::test]
2208    async fn test_setsid() {
2209        spawn_kernel_and_run(async |locked, current_task| {
2210            fn get_process_group(task: &Task) -> Arc<ProcessGroup> {
2211                Arc::clone(&task.thread_group().read().process_group)
2212            }
2213            assert_eq!(current_task.thread_group().setsid(locked), error!(EPERM));
2214
2215            let child_task = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2216            assert_eq!(get_process_group(&current_task), get_process_group(&child_task));
2217
2218            let old_process_group = child_task.thread_group().read().process_group.clone();
2219            assert_eq!(child_task.thread_group().setsid(locked), Ok(()));
2220            assert_eq!(
2221                child_task.thread_group().read().process_group.session.leader,
2222                child_task.get_pid()
2223            );
2224            assert!(
2225                !old_process_group.read(locked).thread_groups().contains(child_task.thread_group())
2226            );
2227        })
2228        .await;
2229    }
2230
2231    #[::fuchsia::test]
2232    async fn test_exit_status() {
2233        spawn_kernel_and_run(async |locked, current_task| {
2234            let child = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2235            child.thread_group().exit(locked, ExitStatus::Exit(42), None);
2236            std::mem::drop(child);
2237            assert_eq!(
2238                current_task.thread_group().read().zombie_children[0].exit_info.status,
2239                ExitStatus::Exit(42)
2240            );
2241        })
2242        .await;
2243    }
2244
2245    #[::fuchsia::test]
2246    async fn test_setgpid() {
2247        spawn_kernel_and_run(async |locked, current_task| {
2248            assert_eq!(current_task.thread_group().setsid(locked), error!(EPERM));
2249
2250            let child_task1 = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2251            let child_task2 = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2252            let execd_child_task = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2253            execd_child_task.thread_group().write().did_exec = true;
2254            let other_session_child_task =
2255                current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2256            assert_eq!(other_session_child_task.thread_group().setsid(locked), Ok(()));
2257
2258            assert_eq!(
2259                child_task1.thread_group().setpgid(locked, &current_task, &current_task, 0),
2260                error!(ESRCH)
2261            );
2262            assert_eq!(
2263                current_task.thread_group().setpgid(locked, &current_task, &execd_child_task, 0),
2264                error!(EACCES)
2265            );
2266            assert_eq!(
2267                current_task.thread_group().setpgid(locked, &current_task, &current_task, 0),
2268                error!(EPERM)
2269            );
2270            assert_eq!(
2271                current_task.thread_group().setpgid(
2272                    locked,
2273                    &current_task,
2274                    &other_session_child_task,
2275                    0
2276                ),
2277                error!(EPERM)
2278            );
2279            assert_eq!(
2280                current_task.thread_group().setpgid(locked, &current_task, &child_task1, -1),
2281                error!(EINVAL)
2282            );
2283            assert_eq!(
2284                current_task.thread_group().setpgid(locked, &current_task, &child_task1, 255),
2285                error!(EPERM)
2286            );
2287            assert_eq!(
2288                current_task.thread_group().setpgid(
2289                    locked,
2290                    &current_task,
2291                    &child_task1,
2292                    other_session_child_task.tid
2293                ),
2294                error!(EPERM)
2295            );
2296
2297            assert_eq!(
2298                child_task1.thread_group().setpgid(locked, &current_task, &child_task1, 0),
2299                Ok(())
2300            );
2301            assert_eq!(
2302                child_task1.thread_group().read().process_group.session.leader,
2303                current_task.tid
2304            );
2305            assert_eq!(child_task1.thread_group().read().process_group.leader, child_task1.tid);
2306
2307            let old_process_group = child_task2.thread_group().read().process_group.clone();
2308            assert_eq!(
2309                current_task.thread_group().setpgid(
2310                    locked,
2311                    &current_task,
2312                    &child_task2,
2313                    child_task1.tid
2314                ),
2315                Ok(())
2316            );
2317            assert_eq!(child_task2.thread_group().read().process_group.leader, child_task1.tid);
2318            assert!(
2319                !old_process_group
2320                    .read(locked)
2321                    .thread_groups()
2322                    .contains(child_task2.thread_group())
2323            );
2324        })
2325        .await;
2326    }
2327
2328    #[::fuchsia::test]
2329    async fn test_adopt_children() {
2330        spawn_kernel_and_run(async |locked, current_task| {
2331            let task1 = current_task.clone_task_for_test(locked, 0, None);
2332            let task2 = task1.clone_task_for_test(locked, 0, None);
2333            let task3 = task2.clone_task_for_test(locked, 0, None);
2334
2335            assert_eq!(task3.thread_group().read().get_ppid(), task2.tid);
2336
2337            task2.thread_group().exit(locked, ExitStatus::Exit(0), None);
2338            std::mem::drop(task2);
2339
2340            // Task3 parent should be current_task.
2341            assert_eq!(task3.thread_group().read().get_ppid(), current_task.tid);
2342        })
2343        .await;
2344    }
2345}