Skip to main content

starnix_core/task/
thread_group.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::device::terminal::{Terminal, TerminalController};
6use crate::mutable_state::{state_accessor, state_implementation};
7use crate::ptrace::{
8    AtomicStopState, PtraceAllowedPtracers, PtraceEvent, PtraceOptions, PtraceStatus, StopState,
9    ZombiePtracees, ptrace_detach,
10};
11use crate::security;
12use crate::signals::syscalls::WaitingOptions;
13use crate::signals::{
14    DeliveryAction, IntoSignalInfoOptions, QueuedSignals, SignalActions, SignalDetail, SignalInfo,
15    UncheckedSignalInfo, action_for_signal, send_standard_signal,
16};
17use crate::task::memory_attribution::MemoryAttributionLifecycleEvent;
18use crate::task::{
19    ControllingTerminal, CurrentTask, ExitStatus, Kernel, PidTable, ProcessGroup, Session, Task,
20    TaskMutableState, TaskPersistentInfo, TypedWaitQueue,
21};
22use crate::time::{IntervalTimerHandle, TimerTable};
23use itertools::Itertools;
24use macro_rules_attribute::apply;
25use starnix_lifecycle::{AtomicCounter, DropNotifier};
26use starnix_logging::{log_debug, log_error, log_info, log_warn, track_stub};
27use starnix_sync::{
28    LockBefore, LockDepMutex, Locked, OrderedMutex, ProcessGroupState, RwLock, RwLockWriteGuard,
29    ThreadGroupLimits, ThreadGroupPendingSignalsLock, ThreadGroupPtraceesLock, Unlocked,
30};
31use starnix_task_command::TaskCommand;
32use starnix_types::ownership::{OwnedRef, Releasable};
33use starnix_types::stats::TaskTimeStats;
34use starnix_types::time::{itimerspec_from_itimerval, timeval_from_duration};
35use starnix_uapi::arc_key::WeakKey;
36use starnix_uapi::auth::{CAP_SYS_ADMIN, CAP_SYS_RESOURCE, Credentials};
37use starnix_uapi::errors::Errno;
38use starnix_uapi::personality::PersonalityFlags;
39use starnix_uapi::resource_limits::{Resource, ResourceLimits};
40use starnix_uapi::signals::{
41    SIGCHLD, SIGCONT, SIGHUP, SIGKILL, SIGTERM, SIGTTOU, SigSet, Signal, UncheckedSignal,
42};
43use starnix_uapi::user_address::UserAddress;
44use starnix_uapi::{
45    ITIMER_PROF, ITIMER_REAL, ITIMER_VIRTUAL, SI_TKILL, SI_USER, SIG_IGN, errno, error, itimerval,
46    pid_t, rlimit, tid_t, uid_t,
47};
48use std::collections::BTreeMap;
49use std::fmt;
50use std::sync::atomic::{AtomicBool, Ordering};
51use std::sync::{Arc, OnceLock, Weak};
52use zx::{Koid, Status};
53
54#[derive(Debug)]
55pub struct ZirconProcess {
56    process: zx::Process,
57    koid: Result<Koid, Status>,
58}
59
60impl ZirconProcess {
61    pub fn new(process: zx::Process) -> Self {
62        let koid = process.koid();
63        Self { process, koid }
64    }
65
66    pub fn koid(&self) -> Result<Koid, Status> {
67        self.koid
68    }
69}
70
71impl std::ops::Deref for ZirconProcess {
72    type Target = zx::Process;
73    fn deref(&self) -> &Self::Target {
74        &self.process
75    }
76}
77
78/// A weak reference to a thread group that can be used in set and maps.
79#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
80pub struct ThreadGroupKey {
81    pid: pid_t,
82    thread_group: WeakKey<ThreadGroup>,
83}
84
85impl ThreadGroupKey {
86    /// The pid of the thread group keyed by this object.
87    ///
88    /// As the key is weak (and pid are not unique due to pid namespaces), this should not be used
89    /// as an unique identifier of the thread group.
90    pub fn pid(&self) -> pid_t {
91        self.pid
92    }
93}
94
95impl std::ops::Deref for ThreadGroupKey {
96    type Target = Weak<ThreadGroup>;
97    fn deref(&self) -> &Self::Target {
98        &self.thread_group.0
99    }
100}
101
102impl From<&ThreadGroup> for ThreadGroupKey {
103    fn from(tg: &ThreadGroup) -> Self {
104        Self { pid: tg.leader, thread_group: WeakKey::from(&tg.weak_self.upgrade().unwrap()) }
105    }
106}
107
108impl<T: AsRef<ThreadGroup>> From<T> for ThreadGroupKey {
109    fn from(tg: T) -> Self {
110        tg.as_ref().into()
111    }
112}
113
114/// Values used for waiting on the [ThreadGroup] lifecycle wait queue.
115#[repr(u64)]
116pub enum ThreadGroupLifecycleWaitValue {
117    /// Wait for updates to the WaitResults of tasks in the group.
118    ChildStatus,
119    /// Wait for updates to `stopped`.
120    Stopped,
121}
122
123impl Into<u64> for ThreadGroupLifecycleWaitValue {
124    fn into(self) -> u64 {
125        self as u64
126    }
127}
128
129/// Child process that have exited, but the zombie ptrace needs to be consumed
130/// before they can be waited for.
131#[derive(Clone, Debug)]
132pub struct DeferredZombiePTracer {
133    /// Original tracer
134    pub tracer_thread_group_key: ThreadGroupKey,
135    /// Tracee tid
136    pub tracee_tid: tid_t,
137    /// Tracee pgid
138    pub tracee_pgid: pid_t,
139    /// Tracee thread group
140    pub tracee_thread_group_key: ThreadGroupKey,
141}
142
143impl DeferredZombiePTracer {
144    fn new(tracer: &ThreadGroup, tracee: &Task) -> Self {
145        Self {
146            tracer_thread_group_key: tracer.into(),
147            tracee_tid: tracee.tid,
148            tracee_pgid: tracee.thread_group().read().process_group.leader,
149            tracee_thread_group_key: tracee.thread_group_key.clone(),
150        }
151    }
152}
153
154/// The mutable state of the ThreadGroup.
155pub struct ThreadGroupMutableState {
156    /// The parent thread group.
157    ///
158    /// The value needs to be writable so that it can be re-parent to the correct subreaper if the
159    /// parent ends before the child.
160    pub parent: Option<ThreadGroupParent>,
161
162    /// The signal this process generates on exit.
163    pub exit_signal: Option<Signal>,
164
165    /// The tasks in the thread group.
166    ///
167    /// The references to Task is weak to prevent cycles as Task have a Arc reference to their
168    /// thread group.
169    /// It is still expected that these weak references are always valid, as tasks must unregister
170    /// themselves before they are deleted.
171    tasks: BTreeMap<tid_t, TaskContainer>,
172
173    /// The children of this thread group.
174    ///
175    /// The references to ThreadGroup is weak to prevent cycles as ThreadGroup have a Arc reference
176    /// to their parent.
177    /// It is still expected that these weak references are always valid, as thread groups must unregister
178    /// themselves before they are deleted.
179    pub children: BTreeMap<pid_t, Weak<ThreadGroup>>,
180
181    /// Child tasks that have exited, but not yet been waited for.
182    pub zombie_children: Vec<OwnedRef<ZombieProcess>>,
183
184    /// ptracees of this process that have exited, but not yet been waited for.
185    pub zombie_ptracees: ZombiePtracees,
186
187    /// Child processes that have exited, but the zombie ptrace needs to be consumed
188    /// before they can be waited for.
189    pub deferred_zombie_ptracers: Vec<DeferredZombiePTracer>,
190
191    /// Unified [WaitQueue] for all waited ThreadGroup events.
192    pub lifecycle_waiters: TypedWaitQueue<ThreadGroupLifecycleWaitValue>,
193
194    /// Whether this thread group will inherit from children of dying processes in its descendant
195    /// tree.
196    pub is_child_subreaper: bool,
197
198    /// The IDs used to perform shell job control.
199    pub process_group: Arc<ProcessGroup>,
200
201    pub did_exec: bool,
202
203    /// A signal that indicates whether the process is going to become waitable
204    /// via waitid and waitpid for either WSTOPPED or WCONTINUED, depending on
205    /// the value of `stopped`. If not None, contains the SignalInfo to return.
206    pub last_signal: Option<SignalInfo>,
207
208    /// Whether the `ThreadGroup` is running or not.
209    ///
210    /// For exited thread groups, this contains the exit status.
211    run_state: ThreadGroupRunState,
212
213    /// Time statistics accumulated from the children.
214    pub children_time_stats: TaskTimeStats,
215
216    /// Personality flags set with `sys_personality()`.
217    pub personality: PersonalityFlags,
218
219    /// Thread groups allowed to trace tasks in this this thread group.
220    pub allowed_ptracers: PtraceAllowedPtracers,
221
222    /// Channel to message when this thread group exits.
223    exit_notifier: Option<futures::channel::oneshot::Sender<()>>,
224
225    /// Notifier for name changes.
226    pub notifier: Option<std::sync::mpsc::Sender<MemoryAttributionLifecycleEvent>>,
227}
228
229/// A collection of `Task` objects that roughly correspond to a "process".
230///
231/// Userspace programmers often think about "threads" and "process", but those concepts have no
232/// clear analogs inside the kernel because tasks are typically created using `clone(2)`, which
233/// takes a complex set of flags that describes how much state is shared between the original task
234/// and the new task.
235///
236/// If a new task is created with the `CLONE_THREAD` flag, the new task will be placed in the same
237/// `ThreadGroup` as the original task. Userspace typically uses this flag in conjunction with the
238/// `CLONE_FILES`, `CLONE_VM`, and `CLONE_FS`, which corresponds to the userspace notion of a
239/// "thread". For example, that's how `pthread_create` behaves. In that sense, a `ThreadGroup`
240/// normally corresponds to the set of "threads" in a "process". However, this pattern is purely a
241/// userspace convention, and nothing stops userspace from using `CLONE_THREAD` without
242/// `CLONE_FILES`, for example.
243///
244/// In Starnix, a `ThreadGroup` corresponds to a Zircon process, which means we do not support the
245/// `CLONE_THREAD` flag without the `CLONE_VM` flag. If we run into problems with this limitation,
246/// we might need to revise this correspondence.
247///
248/// Each `Task` in a `ThreadGroup` has the same thread group ID (`tgid`). The task with the same
249/// `pid` as the `tgid` is called the thread group leader.
250///
251/// Thread groups are destroyed when the last task in the group exits.
252pub struct ThreadGroup {
253    /// Weak reference to the `OwnedRef` of this `ThreadGroup`. This allows to retrieve the
254    /// `TempRef` from a raw `ThreadGroup`.
255    pub weak_self: Weak<ThreadGroup>,
256
257    /// The kernel to which this thread group belongs.
258    pub kernel: Arc<Kernel>,
259
260    /// A handle to the underlying Zircon process object.
261    ///
262    /// Currently, we have a 1-to-1 mapping between thread groups and zx::process
263    /// objects. This approach might break down if/when we implement CLONE_VM
264    /// without CLONE_THREAD because that creates a situation where two thread
265    /// groups share an address space. To implement that situation, we might
266    /// need to break the 1-to-1 mapping between thread groups and zx::process
267    /// or teach zx::process to share address spaces.
268    pub process: ZirconProcess,
269
270    /// A handle to the restricted address space for the Zircon process object.
271    pub root_vmar: zx::Vmar,
272
273    /// The lead task of this thread group.
274    ///
275    /// The lead task is typically the initial thread created in the thread group.
276    pub leader: pid_t,
277
278    // TODO(https://fxbug.dev/508746892): Remove this once the `PidTable` lock is removed.
279    /// Cached weak reference to the leader task.
280    ///
281    /// This is used to break a deadlock in signal delivery, where a reference to the leader task
282    /// must be obtained in order to do access checks in situations where the leader has exited and
283    /// is no longer in the task list.
284    pub leader_task: OnceLock<Weak<Task>>,
285
286    /// The signal actions that are registered for this process.
287    pub signal_actions: Arc<SignalActions>,
288
289    /// The timers for this thread group (from timer_create(), etc.).
290    pub timers: TimerTable,
291
292    /// A mechanism to be notified when this `ThreadGroup` is destroyed.
293    pub drop_notifier: DropNotifier,
294
295    /// Whether the process is currently stopped.
296    ///
297    /// Must only be set when the `mutable_state` write lock is held.
298    stop_state: AtomicStopState,
299
300    /// The mutable state of the ThreadGroup.
301    mutable_state: RwLock<ThreadGroupMutableState>,
302
303    /// The resource limits for this thread group.  This is outside mutable_state
304    /// to avoid deadlocks where the thread_group lock is held when acquiring
305    /// the task lock, and vice versa.
306    pub limits: OrderedMutex<ResourceLimits, ThreadGroupLimits>,
307
308    /// The next unique identifier for a seccomp filter.  These are required to be
309    /// able to distinguish identical seccomp filters, which are treated differently
310    /// for the purposes of SECCOMP_FILTER_FLAG_TSYNC.  Inherited across clone because
311    /// seccomp filters are also inherited across clone.
312    pub next_seccomp_filter_id: AtomicCounter<u64>,
313
314    /// Tasks ptraced by this process
315    pub ptracees: LockDepMutex<BTreeMap<tid_t, TaskContainer>, ThreadGroupPtraceesLock>,
316
317    /// The signals that are currently pending for this thread group.
318    pub pending_signals: LockDepMutex<QueuedSignals, ThreadGroupPendingSignalsLock>,
319
320    /// Whether or not there are any pending signals available for tasks in this thread group.
321    /// Used to avoid having to acquire the signal state lock in hot paths.
322    pub has_pending_signals: AtomicBool,
323
324    /// The monotonic time at which the thread group started.
325    pub start_time: zx::MonotonicInstant,
326
327    /// Whether to log syscalls at INFO level for this thread group.
328    log_syscalls_as_info: AtomicBool,
329}
330
331impl fmt::Debug for ThreadGroup {
332    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
333        write!(
334            f,
335            "{}({})",
336            self.process.get_name().unwrap_or(zx::Name::new_lossy("<unknown>")),
337            self.leader
338        )
339    }
340}
341
342impl ThreadGroup {
343    pub fn sync_syscall_log_level(&self) {
344        let command = self.read().leader_command();
345        let filters = self.kernel.syscall_log_filters.lock();
346        let should_log = filters.iter().any(|f| f.matches(&command));
347        let prev_should_log = self.log_syscalls_as_info.swap(should_log, Ordering::Relaxed);
348        let change_str = match (should_log, prev_should_log) {
349            (true, false) => Some("Enabled"),
350            (false, true) => Some("Disabled"),
351            _ => None,
352        };
353        if let Some(change_str) = change_str {
354            log_info!(
355                "{change_str} info syscall logs for thread group {} (command: {command})",
356                self.leader
357            );
358        }
359    }
360
361    #[inline]
362    pub fn syscall_log_level(&self) -> starnix_logging::Level {
363        if self.log_syscalls_as_info.load(Ordering::Relaxed) {
364            starnix_logging::Level::Info
365        } else {
366            starnix_logging::Level::Trace
367        }
368    }
369}
370
371impl PartialEq for ThreadGroup {
372    fn eq(&self, other: &Self) -> bool {
373        self.leader == other.leader
374    }
375}
376
377impl Drop for ThreadGroup {
378    fn drop(&mut self) {
379        let state = self.mutable_state.get_mut();
380        assert!(state.tasks.is_empty());
381        assert!(state.children.is_empty());
382        assert!(state.zombie_children.is_empty());
383        assert!(state.zombie_ptracees.is_empty());
384        #[cfg(any(test, debug_assertions))]
385        assert!(
386            state
387                .parent
388                .as_ref()
389                .and_then(|p| p.0.upgrade().as_ref().map(|p| p
390                    .read()
391                    .children
392                    .get(&self.leader)
393                    .is_none()))
394                .unwrap_or(true)
395        );
396    }
397}
398
399/// A wrapper around a `Weak<ThreadGroup>` that expects the underlying `Weak` to always be
400/// valid. The wrapper will check this at runtime during creation and upgrade.
401pub struct ThreadGroupParent(Weak<ThreadGroup>);
402
403impl ThreadGroupParent {
404    pub fn new(t: Weak<ThreadGroup>) -> Self {
405        debug_assert!(t.upgrade().is_some());
406        Self(t)
407    }
408
409    pub fn upgrade(&self) -> Arc<ThreadGroup> {
410        self.0.upgrade().expect("ThreadGroupParent references must always be valid")
411    }
412}
413
414impl Clone for ThreadGroupParent {
415    fn clone(&self) -> Self {
416        Self(self.0.clone())
417    }
418}
419
420/// A selector that can match a process. Works as a representation of the pid argument to syscalls
421/// like wait and kill.
422#[derive(Debug, Clone)]
423pub enum ProcessSelector {
424    /// Matches any process at all.
425    Any,
426    /// Matches only the process with the specified pid
427    Pid(pid_t),
428    /// Matches all the processes in the given process group
429    Pgid(pid_t),
430    /// Match the thread group with the given key
431    Process(ThreadGroupKey),
432}
433
434impl ProcessSelector {
435    pub fn match_tid(&self, tid: tid_t, pid_table: &PidTable) -> bool {
436        match *self {
437            ProcessSelector::Pid(p) => {
438                if p == tid {
439                    true
440                } else {
441                    if let Ok(task_ref) = pid_table.get_task(tid) {
442                        task_ref.get_pid() == p
443                    } else {
444                        false
445                    }
446                }
447            }
448            ProcessSelector::Any => true,
449            ProcessSelector::Pgid(pgid) => {
450                if let Ok(task_ref) = pid_table.get_task(tid) {
451                    pid_table.get_process_group(pgid).as_ref()
452                        == Some(&task_ref.thread_group().read().process_group)
453                } else {
454                    false
455                }
456            }
457            ProcessSelector::Process(ref key) => {
458                if let Some(tg) = key.upgrade() {
459                    tg.read().tasks.contains_key(&tid)
460                } else {
461                    false
462                }
463            }
464        }
465    }
466}
467
468#[derive(Clone, Debug, PartialEq, Eq)]
469pub struct ProcessExitInfo {
470    pub status: ExitStatus,
471    pub exit_signal: Option<Signal>,
472}
473
474#[derive(Clone, Debug, Default, PartialEq, Eq)]
475enum ThreadGroupRunState {
476    #[default]
477    Running,
478    Exiting(ExitStatus),
479    Exited(ExitStatus),
480}
481
482#[derive(Clone, Debug, PartialEq, Eq)]
483pub struct WaitResult {
484    pub pid: pid_t,
485    pub uid: uid_t,
486
487    pub exit_info: ProcessExitInfo,
488
489    /// Cumulative time stats for the process and its children.
490    pub time_stats: TaskTimeStats,
491}
492
493impl WaitResult {
494    // According to wait(2) man page, SignalInfo.signal needs to always be set to SIGCHLD
495    pub fn as_signal_info(&self) -> SignalInfo {
496        SignalInfo::with_detail(
497            SIGCHLD,
498            self.exit_info.status.signal_info_code(),
499            SignalDetail::SIGCHLD {
500                pid: self.pid,
501                uid: self.uid,
502                status: self.exit_info.status.signal_info_status(),
503            },
504        )
505    }
506}
507
508#[derive(Debug)]
509pub struct ZombieProcess {
510    pub thread_group_key: ThreadGroupKey,
511    pub pgid: pid_t,
512    pub uid: uid_t,
513
514    pub exit_info: ProcessExitInfo,
515
516    /// Cumulative time stats for the process and its children.
517    pub time_stats: TaskTimeStats,
518
519    /// Whether dropping this ZombieProcess should imply removing the pid from
520    /// the PidTable
521    pub is_canonical: bool,
522}
523
524impl PartialEq for ZombieProcess {
525    fn eq(&self, other: &Self) -> bool {
526        // We assume only one set of ZombieProcess data per process, so this should cover it.
527        self.thread_group_key == other.thread_group_key
528            && self.pgid == other.pgid
529            && self.uid == other.uid
530            && self.is_canonical == other.is_canonical
531    }
532}
533
534impl Eq for ZombieProcess {}
535
536impl PartialOrd for ZombieProcess {
537    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
538        Some(self.cmp(other))
539    }
540}
541
542impl Ord for ZombieProcess {
543    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
544        self.thread_group_key.cmp(&other.thread_group_key)
545    }
546}
547
548impl ZombieProcess {
549    pub fn new(
550        thread_group: ThreadGroupStateRef<'_>,
551        credentials: &Credentials,
552        exit_info: ProcessExitInfo,
553    ) -> OwnedRef<Self> {
554        let time_stats = thread_group.base.time_stats() + thread_group.children_time_stats;
555        OwnedRef::new(ZombieProcess {
556            thread_group_key: thread_group.base.into(),
557            pgid: thread_group.process_group.leader,
558            uid: credentials.uid,
559            exit_info,
560            time_stats,
561            is_canonical: true,
562        })
563    }
564
565    pub fn pid(&self) -> pid_t {
566        self.thread_group_key.pid()
567    }
568
569    pub fn to_wait_result(&self) -> WaitResult {
570        WaitResult {
571            pid: self.pid(),
572            uid: self.uid,
573            exit_info: self.exit_info.clone(),
574            time_stats: self.time_stats,
575        }
576    }
577
578    pub fn as_artificial(&self) -> Self {
579        ZombieProcess {
580            thread_group_key: self.thread_group_key.clone(),
581            pgid: self.pgid,
582            uid: self.uid,
583            exit_info: self.exit_info.clone(),
584            time_stats: self.time_stats,
585            is_canonical: false,
586        }
587    }
588
589    pub fn matches_selector(&self, selector: &ProcessSelector) -> bool {
590        match *selector {
591            ProcessSelector::Any => true,
592            ProcessSelector::Pid(pid) => self.pid() == pid,
593            ProcessSelector::Pgid(pgid) => self.pgid == pgid,
594            ProcessSelector::Process(ref key) => self.thread_group_key == *key,
595        }
596    }
597
598    pub fn matches_selector_and_waiting_option(
599        &self,
600        selector: &ProcessSelector,
601        options: &WaitingOptions,
602    ) -> bool {
603        if !self.matches_selector(selector) {
604            return false;
605        }
606
607        if options.wait_for_all {
608            true
609        } else {
610            // A "clone" zombie is one which has delivered no signal, or a
611            // signal other than SIGCHLD to its parent upon termination.
612            options.wait_for_clone == (self.exit_info.exit_signal != Some(SIGCHLD))
613        }
614    }
615}
616
617impl Releasable for ZombieProcess {
618    type Context<'a> = &'a mut PidTable;
619
620    fn release<'a>(self, pids: &'a mut PidTable) {
621        if self.is_canonical {
622            pids.remove_zombie(self.pid());
623        }
624    }
625}
626
627impl ThreadGroup {
628    /// Creates a ThreadGroup for a regular userspace process.
629    pub fn new<L>(
630        locked: &mut Locked<L>,
631        kernel: Arc<Kernel>,
632        process: zx::Process,
633        root_vmar: zx::Vmar,
634        parent: Option<ThreadGroupWriteGuard<'_>>,
635        leader: pid_t,
636        exit_signal: Option<Signal>,
637        process_group: Arc<ProcessGroup>,
638        signal_actions: Arc<SignalActions>,
639    ) -> Arc<ThreadGroup>
640    where
641        L: LockBefore<ProcessGroupState>,
642    {
643        debug_assert!(!process.is_invalid());
644        debug_assert!(!root_vmar.is_invalid());
645        Self::new_internal(
646            locked,
647            kernel,
648            process,
649            root_vmar,
650            parent,
651            leader,
652            exit_signal,
653            process_group,
654            signal_actions,
655        )
656    }
657
658    /// Creates a ThreadGroup for a kernel system task (e.g., kthreadd).
659    pub fn for_system<L>(
660        locked: &mut Locked<L>,
661        kernel: Arc<Kernel>,
662        leader: pid_t,
663        process_group: Arc<ProcessGroup>,
664    ) -> Arc<ThreadGroup>
665    where
666        L: LockBefore<ProcessGroupState>,
667    {
668        Self::new_internal(
669            locked,
670            kernel,
671            zx::Process::invalid(),
672            zx::Vmar::invalid(),
673            None,
674            leader,
675            Some(SIGCHLD),
676            process_group,
677            SignalActions::default(),
678        )
679    }
680
681    /// Creates a ThreadGroup suitable for use in tests.
682    ///
683    /// This function performs the minimal setup necessary to produce a valid `ThreadGroup`
684    /// instance. It uses an invalid handle for the root VMAR, sets no parent, and uses
685    /// default signal actions with `SIGCHLD` as the exit signal.
686    ///
687    /// This should only be used in tests where a full process environment is not required.
688    pub fn for_test<L>(
689        locked: &mut Locked<L>,
690        kernel: Arc<Kernel>,
691        process: zx::Process,
692        parent: ThreadGroupWriteGuard<'_>,
693        leader: pid_t,
694        process_group: Arc<ProcessGroup>,
695    ) -> Arc<ThreadGroup>
696    where
697        L: LockBefore<ProcessGroupState>,
698    {
699        Self::new_internal(
700            locked,
701            kernel,
702            process,
703            zx::Vmar::invalid(),
704            Some(parent),
705            leader,
706            Some(SIGCHLD),
707            process_group,
708            SignalActions::default(),
709        )
710    }
711
712    fn new_internal<L>(
713        locked: &mut Locked<L>,
714        kernel: Arc<Kernel>,
715        process: zx::Process,
716        root_vmar: zx::Vmar,
717        parent: Option<ThreadGroupWriteGuard<'_>>,
718        leader: pid_t,
719        exit_signal: Option<Signal>,
720        process_group: Arc<ProcessGroup>,
721        signal_actions: Arc<SignalActions>,
722    ) -> Arc<ThreadGroup>
723    where
724        L: LockBefore<ProcessGroupState>,
725    {
726        Arc::new_cyclic(|weak_self| {
727            let process = ZirconProcess::new(process);
728            let mut thread_group = ThreadGroup {
729                weak_self: weak_self.clone(),
730                kernel,
731                process,
732                root_vmar,
733                leader,
734                leader_task: OnceLock::new(),
735                signal_actions,
736                timers: Default::default(),
737                drop_notifier: Default::default(),
738                // A child process created via fork(2) inherits its parent's
739                // resource limits.  Resource limits are preserved across execve(2).
740                limits: OrderedMutex::new(
741                    parent
742                        .as_ref()
743                        .map(|p| p.base.limits.lock(locked.cast_locked()).clone())
744                        .unwrap_or(Default::default()),
745                ),
746                next_seccomp_filter_id: Default::default(),
747                ptracees: Default::default(),
748                stop_state: AtomicStopState::new(StopState::Awake),
749                pending_signals: Default::default(),
750                has_pending_signals: Default::default(),
751                start_time: zx::MonotonicInstant::get(),
752                mutable_state: RwLock::new(ThreadGroupMutableState {
753                    parent: parent
754                        .as_ref()
755                        .map(|p| ThreadGroupParent::new(p.base.weak_self.clone())),
756                    exit_signal,
757                    tasks: BTreeMap::new(),
758                    children: BTreeMap::new(),
759                    zombie_children: vec![],
760                    zombie_ptracees: ZombiePtracees::new(),
761                    deferred_zombie_ptracers: vec![],
762                    lifecycle_waiters: TypedWaitQueue::<ThreadGroupLifecycleWaitValue>::default(),
763                    is_child_subreaper: false,
764                    process_group: Arc::clone(&process_group),
765                    did_exec: false,
766                    last_signal: None,
767                    run_state: Default::default(),
768                    children_time_stats: Default::default(),
769                    personality: parent
770                        .as_ref()
771                        .map(|p| p.personality)
772                        .unwrap_or(Default::default()),
773                    allowed_ptracers: PtraceAllowedPtracers::None,
774                    exit_notifier: None,
775                    notifier: None,
776                }),
777                log_syscalls_as_info: AtomicBool::new(false),
778            };
779
780            if let Some(mut parent) = parent {
781                thread_group.next_seccomp_filter_id.reset(parent.base.next_seccomp_filter_id.get());
782                parent.children.insert(leader, weak_self.clone());
783                process_group.insert(locked, &thread_group);
784            };
785            thread_group
786        })
787    }
788
789    state_accessor!(ThreadGroup, mutable_state);
790
791    pub fn load_stopped(&self) -> StopState {
792        self.stop_state.load(Ordering::Relaxed)
793    }
794
795    /// Causes the thread group to exit.
796    ///
797    /// This marks the thread group as exiting and sends [`SIGKILL`] to its tasks to initiate
798    /// teardown. The thread group will not exist until the last task exits.
799    ///
800    /// If this is being called from a task that is part of the current thread group, the caller
801    /// should pass `current_task`. If ownership issues prevent passing `current_task`, then callers
802    /// should use [`CurrentTask::kill_thread_group()`] instead.
803    pub fn kill(
804        &self,
805        locked: &mut Locked<Unlocked>,
806        exit_status: ExitStatus,
807        mut current_task: Option<&mut CurrentTask>,
808    ) {
809        if let Some(ref mut current_task) = current_task {
810            current_task.ptrace_event(
811                locked,
812                PtraceOptions::TRACEEXIT,
813                exit_status.signal_info_status() as u64,
814            );
815        }
816        let mut pids = self.kernel.pids.write();
817        let mut state = self.write();
818        if !state.is_running() {
819            return;
820        }
821
822        state.run_state = ThreadGroupRunState::Exiting(exit_status.clone());
823
824        // Drop ptrace zombies
825        state.zombie_ptracees.release(&mut pids);
826
827        // Interrupt each task. Unlock the group because send_signal will lock the group in order
828        // to call set_stopped.
829        let tasks = state.tasks();
830        drop(state);
831
832        // Detach from any ptraced tasks, killing the ones that set PTRACE_O_EXITKILL.
833        let tracees = self.ptracees.lock().keys().cloned().collect::<Vec<_>>();
834        for tracee in tracees {
835            if let Ok(task_ref) = pids.get_task(tracee) {
836                let mut should_send_sigkill = false;
837                if let Some(ptrace) = &task_ref.read().ptrace {
838                    should_send_sigkill = ptrace.has_option(PtraceOptions::EXITKILL);
839                }
840                if should_send_sigkill {
841                    send_standard_signal(locked, task_ref.as_ref(), SignalInfo::kernel(SIGKILL));
842                    continue;
843                }
844
845                let _ =
846                    ptrace_detach(locked, &mut pids, self, task_ref.as_ref(), &UserAddress::NULL);
847            }
848        }
849
850        for task in tasks {
851            task.write().set_exit_status(exit_status.clone());
852            send_standard_signal(locked, &task, SignalInfo::kernel(SIGKILL));
853        }
854    }
855
856    pub fn add(&self, task: Arc<Task>) -> Result<(), Errno> {
857        let mut state = self.write();
858        if !state.is_running() {
859            if state.tasks_count() == 0 {
860                log_warn!(
861                    "Task {} with leader {} not running while adding its first task, \
862                not sending creation notification",
863                    task.tid,
864                    self.leader
865                );
866            }
867            return error!(EINVAL);
868        }
869        if task.tid == self.leader {
870            let _ = self.leader_task.set(Arc::downgrade(&task));
871        }
872        state.tasks.insert(task.tid, (&task).into());
873
874        Ok(())
875    }
876
877    /// Remove the task from the children of this ThreadGroup.
878    ///
879    /// It is important that the task is taken as an `Arc`. It ensures the tasks of the
880    /// ThreadGroup are always valid as they are still valid when removed.
881    pub fn remove<L>(
882        &self,
883        locked: &mut Locked<L>,
884        mut pids: RwLockWriteGuard<'_, PidTable>,
885        task: &Arc<Task>,
886    ) where
887        L: LockBefore<ProcessGroupState>,
888    {
889        task.set_ptrace_zombie(&mut pids);
890        pids.remove_task(task.tid);
891
892        let mut state = self.write();
893
894        let persistent_info: TaskPersistentInfo =
895            if let Some(container) = state.tasks.remove(&task.tid) {
896                container.into()
897            } else {
898                // The task has never been added. The only expected case is that this thread group
899                // is not running.
900                debug_assert!(!state.is_running());
901                return;
902            };
903
904        if state.tasks.is_empty() {
905            let exit_status = if let ThreadGroupRunState::Exiting(exit_status) = &state.run_state {
906                exit_status.clone()
907            } else {
908                let exit_status = task.exit_status().unwrap_or_else(|| {
909                    log_error!("Exiting without an exit code.");
910                    ExitStatus::Exit(u8::MAX)
911                });
912                state.set_exiting(exit_status.clone());
913                exit_status
914            };
915
916            // Replace PID table entry with a zombie.
917            let exit_info =
918                ProcessExitInfo { status: exit_status, exit_signal: state.exit_signal.clone() };
919            let zombie =
920                ZombieProcess::new(state.as_ref(), &persistent_info.real_creds(), exit_info);
921            pids.kill_process(self.leader, OwnedRef::downgrade(&zombie));
922
923            state.leave_process_group(locked, &pids);
924
925            // I have no idea if dropping the lock here is correct, and I don't want to think about
926            // it. If problems do turn up with another thread observing an intermediate state of
927            // this exit operation, the solution is to unify locks. It should be sensible and
928            // possible for there to be a single lock that protects all (or nearly all) of the
929            // data accessed by both exit and wait. In gvisor and linux this is the lock on the
930            // equivalent of the PidTable. This is made more difficult by rust locks being
931            // containers that only lock the data they contain, but see
932            // https://docs.google.com/document/d/1YHrhBqNhU1WcrsYgGAu3JwwlVmFXPlwWHTJLAbwRebY/edit
933            // for an idea.
934            std::mem::drop(state);
935
936            // Remove the process from the cgroup2 pid table after TG lock is dropped.
937            // This function will hold the CgroupState lock which should be before the TG lock. See
938            // more in lock_cgroup2_pid_table comments.
939            self.kernel.cgroups.lock_cgroup2_pid_table().remove_process(self.into());
940
941            // We will need the immediate parent and the reaper. Once we have them, we can make
942            // sure to take the locks in the right order: parent before child.
943            let parent = self.read().parent.clone();
944            let reaper = self.find_reaper();
945
946            {
947                // Reparent the children.
948                if let Some(reaper) = reaper {
949                    let reaper = reaper.upgrade();
950                    {
951                        let mut reaper_state = reaper.write();
952                        let mut state = self.write();
953                        for (_pid, weak_child) in std::mem::take(&mut state.children) {
954                            if let Some(child) = weak_child.upgrade() {
955                                let mut child_state = child.write();
956
957                                child_state.exit_signal = Some(SIGCHLD);
958                                child_state.parent =
959                                    Some(ThreadGroupParent::new(Arc::downgrade(&reaper)));
960                                reaper_state.children.insert(child.leader, weak_child.clone());
961                            }
962                        }
963                        reaper_state.zombie_children.append(&mut state.zombie_children);
964                    }
965                    ZombiePtracees::reparent(self, &reaper);
966                } else {
967                    // If we don't have a reaper then just drop the zombies.
968                    let mut state = self.write();
969                    for zombie in state.zombie_children.drain(..) {
970                        zombie.release(&mut pids);
971                    }
972                    state.zombie_ptracees.release(&mut pids);
973                }
974            }
975
976            // Clear the `parent` reference now that children have been re-`parent`ed.
977            self.write().parent = None;
978
979            #[cfg(any(test, debug_assertions))]
980            {
981                let state = self.read();
982                assert!(state.zombie_children.is_empty());
983                assert!(state.zombie_ptracees.is_empty());
984            }
985
986            if let Some(ref parent) = parent {
987                let parent = parent.upgrade();
988                let mut tracer_pid = None;
989                if let Some(ptrace) = &task.read().ptrace {
990                    tracer_pid = Some(ptrace.get_pid());
991                }
992
993                let maybe_zombie = 'compute_zombie: {
994                    if let Some(tracer_pid) = tracer_pid {
995                        if let Ok(ref tracer) = pids.get_task(tracer_pid) {
996                            break 'compute_zombie tracer
997                                .thread_group()
998                                .maybe_notify_tracer(task, &mut pids, &parent, zombie);
999                        }
1000                    }
1001                    Some(zombie)
1002                };
1003                if let Some(zombie) = maybe_zombie {
1004                    parent.do_zombie_notifications(zombie);
1005                }
1006            } else {
1007                zombie.release(&mut pids);
1008            }
1009
1010            // TODO: Set the error_code on the Zircon process object. Currently missing a way
1011            // to do this in Zircon. Might be easier in the new execution model.
1012
1013            // Once the last zircon thread stops, the zircon process will also stop executing.
1014
1015            if let Some(parent) = parent {
1016                let parent = parent.upgrade();
1017                parent.check_orphans(locked, &pids);
1018            }
1019
1020            self.write().set_exited();
1021        }
1022    }
1023
1024    pub fn do_zombie_notifications(&self, zombie: OwnedRef<ZombieProcess>) {
1025        let mut state = self.write();
1026
1027        state.children.remove(&zombie.pid());
1028        state
1029            .deferred_zombie_ptracers
1030            .retain(|dzp| dzp.tracee_thread_group_key != zombie.thread_group_key);
1031
1032        let exit_signal = zombie.exit_info.exit_signal;
1033        let mut signal_info = zombie.to_wait_result().as_signal_info();
1034
1035        state.zombie_children.push(zombie);
1036        state.lifecycle_waiters.notify_value(ThreadGroupLifecycleWaitValue::ChildStatus);
1037
1038        // Send signals
1039        if let Some(exit_signal) = exit_signal {
1040            signal_info.signal = exit_signal;
1041            state.send_signal(signal_info);
1042        }
1043    }
1044
1045    /// Notifies the tracer if appropriate.  Returns Some(zombie) if caller
1046    /// needs to notify the parent, None otherwise.  The caller should probably
1047    /// invoke parent.do_zombie_notifications(zombie) on the result.
1048    fn maybe_notify_tracer(
1049        &self,
1050        tracee: &Task,
1051        mut pids: &mut PidTable,
1052        parent: &ThreadGroup,
1053        zombie: OwnedRef<ZombieProcess>,
1054    ) -> Option<OwnedRef<ZombieProcess>> {
1055        if self.read().zombie_ptracees.has_tracee(tracee.tid) {
1056            if self == parent {
1057                // The tracer is the parent and has not consumed the
1058                // notification.  Don't bother with the ptracee stuff, and just
1059                // notify the parent.
1060                self.write().zombie_ptracees.remove(pids, tracee.tid);
1061                return Some(zombie);
1062            } else {
1063                // The tracer is not the parent and the tracer has not consumed
1064                // the notification.
1065                {
1066                    // Tell the parent to expect a notification later.
1067                    let mut parent_state = parent.write();
1068                    parent_state
1069                        .deferred_zombie_ptracers
1070                        .push(DeferredZombiePTracer::new(self, tracee));
1071                    parent_state.children.remove(&tracee.get_pid());
1072                }
1073                // Tell the tracer that there is a notification pending.
1074                let mut state = self.write();
1075                state.zombie_ptracees.set_parent_of(tracee.tid, Some(zombie), parent);
1076                tracee.write().notify_ptracers();
1077                return None;
1078            }
1079        } else if self == parent {
1080            // The tracer is the parent and has already consumed the parent
1081            // notification.  No further action required.
1082            parent.write().children.remove(&tracee.tid);
1083            zombie.release(&mut pids);
1084            return None;
1085        }
1086        // The tracer is not the parent and has already consumed the parent
1087        // notification.  Notify the parent.
1088        Some(zombie)
1089    }
1090
1091    /// Find the task which will adopt our children after we die.
1092    fn find_reaper(&self) -> Option<ThreadGroupParent> {
1093        let mut weak_parent = self.read().parent.clone()?;
1094        loop {
1095            weak_parent = {
1096                let parent = weak_parent.upgrade();
1097                let parent_state = parent.read();
1098                if parent_state.is_child_subreaper {
1099                    break;
1100                }
1101                match parent_state.parent {
1102                    Some(ref next_parent) => next_parent.clone(),
1103                    None => break,
1104                }
1105            };
1106        }
1107        Some(weak_parent)
1108    }
1109
1110    pub fn setsid<L>(&self, locked: &mut Locked<L>) -> Result<(), Errno>
1111    where
1112        L: LockBefore<ProcessGroupState>,
1113    {
1114        let pids = self.kernel.pids.read();
1115        if pids.get_process_group(self.leader).is_some() {
1116            return error!(EPERM);
1117        }
1118        let process_group = ProcessGroup::new(self.leader, None);
1119        pids.add_process_group(process_group.clone());
1120        self.write().set_process_group(locked, process_group, &pids);
1121        self.check_orphans(locked, &pids);
1122
1123        Ok(())
1124    }
1125
1126    pub fn setpgid<L>(
1127        &self,
1128        locked: &mut Locked<L>,
1129        current_task: &CurrentTask,
1130        target: &Task,
1131        pgid: pid_t,
1132    ) -> Result<(), Errno>
1133    where
1134        L: LockBefore<ProcessGroupState>,
1135    {
1136        let pids = self.kernel.pids.read();
1137
1138        {
1139            let current_process_group = Arc::clone(&self.read().process_group);
1140
1141            // The target process must be either the current process of a child of the current process
1142            let mut target_thread_group = target.thread_group().write();
1143            let is_target_current_process_child =
1144                target_thread_group.parent.as_ref().map(|tg| tg.upgrade().leader)
1145                    == Some(self.leader);
1146            if target_thread_group.leader() != self.leader && !is_target_current_process_child {
1147                return error!(ESRCH);
1148            }
1149
1150            // If the target process is a child of the current task, it must not have executed one of the exec
1151            // function.
1152            if is_target_current_process_child && target_thread_group.did_exec {
1153                return error!(EACCES);
1154            }
1155
1156            let new_process_group;
1157            {
1158                let target_process_group = &target_thread_group.process_group;
1159
1160                // The target process must not be a session leader and must be in the same session as the current process.
1161                if target_thread_group.leader() == target_process_group.session.leader
1162                    || current_process_group.session != target_process_group.session
1163                {
1164                    return error!(EPERM);
1165                }
1166
1167                let target_pgid = if pgid == 0 { target_thread_group.leader() } else { pgid };
1168                if target_pgid < 0 {
1169                    return error!(EINVAL);
1170                }
1171
1172                if target_pgid == target_process_group.leader {
1173                    return Ok(());
1174                }
1175
1176                // If pgid is not equal to the target process id, the associated process group must exist
1177                // and be in the same session as the target process.
1178                if target_pgid != target_thread_group.leader() {
1179                    new_process_group =
1180                        pids.get_process_group(target_pgid).ok_or_else(|| errno!(EPERM))?;
1181                    if new_process_group.session != target_process_group.session {
1182                        return error!(EPERM);
1183                    }
1184                    security::check_setpgid_access(current_task, target)?;
1185                } else {
1186                    security::check_setpgid_access(current_task, target)?;
1187                    // Create a new process group
1188                    new_process_group =
1189                        ProcessGroup::new(target_pgid, Some(target_process_group.session.clone()));
1190                    pids.add_process_group(new_process_group.clone());
1191                }
1192            }
1193
1194            target_thread_group.set_process_group(locked, new_process_group, &pids);
1195        }
1196
1197        target.thread_group().check_orphans(locked, &pids);
1198
1199        Ok(())
1200    }
1201
1202    fn itimer_real(&self) -> IntervalTimerHandle {
1203        self.timers.itimer_real()
1204    }
1205
1206    pub fn set_itimer(
1207        &self,
1208        current_task: &CurrentTask,
1209        which: u32,
1210        value: itimerval,
1211    ) -> Result<itimerval, Errno> {
1212        if which == ITIMER_PROF || which == ITIMER_VIRTUAL {
1213            // We don't support setting these timers.
1214            // The gvisor test suite clears ITIMER_PROF as part of its test setup logic, so we support
1215            // clearing these values.
1216            if value.it_value.tv_sec == 0 && value.it_value.tv_usec == 0 {
1217                return Ok(itimerval::default());
1218            }
1219            track_stub!(TODO("https://fxbug.dev/322874521"), "Unsupported itimer type", which);
1220            return error!(ENOTSUP);
1221        }
1222
1223        if which != ITIMER_REAL {
1224            return error!(EINVAL);
1225        }
1226        let itimer_real = self.itimer_real();
1227        let prev_remaining = itimer_real.time_remaining();
1228        if value.it_value.tv_sec != 0 || value.it_value.tv_usec != 0 {
1229            itimer_real.arm(current_task, itimerspec_from_itimerval(value), false)?;
1230        } else {
1231            itimer_real.disarm(current_task)?;
1232        }
1233        Ok(itimerval {
1234            it_value: timeval_from_duration(prev_remaining.remainder),
1235            it_interval: timeval_from_duration(prev_remaining.interval),
1236        })
1237    }
1238
1239    pub fn get_itimer(&self, which: u32) -> Result<itimerval, Errno> {
1240        if which == ITIMER_PROF || which == ITIMER_VIRTUAL {
1241            // We don't support setting these timers, so we can accurately report that these are not set.
1242            return Ok(itimerval::default());
1243        }
1244        if which != ITIMER_REAL {
1245            return error!(EINVAL);
1246        }
1247        let remaining = self.itimer_real().time_remaining();
1248        Ok(itimerval {
1249            it_value: timeval_from_duration(remaining.remainder),
1250            it_interval: timeval_from_duration(remaining.interval),
1251        })
1252    }
1253
1254    /// Check whether the stop state is compatible with `new_stopped`. If it is return it,
1255    /// otherwise, return None.
1256    fn check_stopped_state(
1257        &self,
1258        new_stopped: StopState,
1259        finalize_only: bool,
1260    ) -> Option<StopState> {
1261        let stopped = self.load_stopped();
1262        if finalize_only && !stopped.is_stopping_or_stopped() {
1263            return Some(stopped);
1264        }
1265
1266        if stopped.is_illegal_transition(new_stopped) {
1267            return Some(stopped);
1268        }
1269
1270        return None;
1271    }
1272
1273    /// Set the stop status of the process.  If you pass |siginfo| of |None|,
1274    /// does not update the signal.  If |finalize_only| is set, will check that
1275    /// the set will be a finalize (Stopping -> Stopped or Stopped -> Stopped)
1276    /// before executing it.
1277    ///
1278    /// Returns the latest stop state after any changes.
1279    pub fn set_stopped(
1280        &self,
1281        new_stopped: StopState,
1282        siginfo: Option<SignalInfo>,
1283        finalize_only: bool,
1284    ) -> StopState {
1285        // Perform an early return check to see if we can avoid taking the lock.
1286        if let Some(stopped) = self.check_stopped_state(new_stopped, finalize_only) {
1287            return stopped;
1288        }
1289
1290        self.write().set_stopped(new_stopped, siginfo, finalize_only)
1291    }
1292
1293    /// Ensures |session| is the controlling session inside of |terminal_controller|, and returns a
1294    /// reference to the |TerminalController|.
1295    fn check_terminal_controller(
1296        session: &Arc<Session>,
1297        terminal_controller: &Option<TerminalController>,
1298    ) -> Result<(), Errno> {
1299        if let Some(terminal_controller) = terminal_controller {
1300            if let Some(terminal_session) = terminal_controller.session.upgrade() {
1301                if Arc::ptr_eq(session, &terminal_session) {
1302                    return Ok(());
1303                }
1304            }
1305        }
1306        error!(ENOTTY)
1307    }
1308
1309    pub fn get_foreground_process_group(&self, terminal: &Terminal) -> Result<pid_t, Errno> {
1310        let state = self.read();
1311        let process_group = &state.process_group;
1312        let terminal_state = terminal.read();
1313
1314        // "When fd does not refer to the controlling terminal of the calling
1315        // process, -1 is returned" - tcgetpgrp(3)
1316        Self::check_terminal_controller(&process_group.session, &terminal_state.controller)?;
1317        let pid = process_group.session.read().get_foreground_process_group_leader();
1318        Ok(pid)
1319    }
1320
1321    pub fn set_foreground_process_group<L>(
1322        &self,
1323        locked: &mut Locked<L>,
1324        current_task: &CurrentTask,
1325        terminal: &Terminal,
1326        pgid: pid_t,
1327    ) -> Result<(), Errno>
1328    where
1329        L: LockBefore<ProcessGroupState>,
1330    {
1331        let process_group;
1332        let send_ttou;
1333        {
1334            // Keep locks to ensure atomicity.
1335            let pids = self.kernel.pids.read();
1336            let state = self.read();
1337            process_group = Arc::clone(&state.process_group);
1338            let terminal_state = terminal.read();
1339            Self::check_terminal_controller(&process_group.session, &terminal_state.controller)?;
1340
1341            // pgid must be positive.
1342            if pgid < 0 {
1343                return error!(EINVAL);
1344            }
1345
1346            let new_process_group = pids.get_process_group(pgid).ok_or_else(|| errno!(ESRCH))?;
1347            if new_process_group.session != process_group.session {
1348                return error!(EPERM);
1349            }
1350
1351            let mut session_state = process_group.session.write();
1352            // If the calling process is a member of a background group and not ignoring SIGTTOU, a
1353            // SIGTTOU signal is sent to all members of this background process group.
1354            send_ttou = process_group.leader != session_state.get_foreground_process_group_leader()
1355                && !current_task.read().signal_mask().has_signal(SIGTTOU)
1356                && self.signal_actions.get(SIGTTOU).sa_handler != SIG_IGN;
1357
1358            if !send_ttou {
1359                session_state.set_foreground_process_group(&new_process_group);
1360            }
1361        }
1362
1363        // Locks must not be held when sending signals.
1364        if send_ttou {
1365            process_group.send_signals(locked, &[SIGTTOU]);
1366            return error!(EINTR);
1367        }
1368
1369        Ok(())
1370    }
1371
1372    pub fn set_controlling_terminal(
1373        &self,
1374        current_task: &CurrentTask,
1375        terminal: &Terminal,
1376        is_main: bool,
1377        steal: bool,
1378        is_readable: bool,
1379    ) -> Result<(), Errno> {
1380        // Keep locks to ensure atomicity.
1381        let state = self.read();
1382        let process_group = &state.process_group;
1383        let mut terminal_state = terminal.write();
1384        let mut session_writer = process_group.session.write();
1385
1386        // "The calling process must be a session leader and not have a
1387        // controlling terminal already." - tty_ioctl(4)
1388        if process_group.session.leader != self.leader
1389            || session_writer.controlling_terminal.is_some()
1390        {
1391            return error!(EINVAL);
1392        }
1393
1394        let mut has_admin_capability_determined = false;
1395
1396        // "If this terminal is already the controlling terminal of a different
1397        // session group, then the ioctl fails with EPERM, unless the caller
1398        // has the CAP_SYS_ADMIN capability and arg equals 1, in which case the
1399        // terminal is stolen, and all processes that had it as controlling
1400        // terminal lose it." - tty_ioctl(4)
1401        if let Some(other_session) =
1402            terminal_state.controller.as_ref().and_then(|cs| cs.session.upgrade())
1403        {
1404            if other_session != process_group.session {
1405                if !steal {
1406                    return error!(EPERM);
1407                }
1408                security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1409                has_admin_capability_determined = true;
1410
1411                // Steal the TTY away. Unlike TIOCNOTTY, don't send signals.
1412                other_session.write().controlling_terminal = None;
1413            }
1414        }
1415
1416        if !is_readable && !has_admin_capability_determined {
1417            security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1418        }
1419
1420        session_writer.controlling_terminal = Some(ControllingTerminal::new(terminal, is_main));
1421        terminal_state.controller = TerminalController::new(&process_group.session);
1422        Ok(())
1423    }
1424
1425    pub fn release_controlling_terminal<L>(
1426        &self,
1427        locked: &mut Locked<L>,
1428        _current_task: &CurrentTask,
1429        terminal: &Terminal,
1430        is_main: bool,
1431    ) -> Result<(), Errno>
1432    where
1433        L: LockBefore<ProcessGroupState>,
1434    {
1435        let process_group;
1436        {
1437            // Keep locks to ensure atomicity.
1438            let state = self.read();
1439            process_group = Arc::clone(&state.process_group);
1440            let mut terminal_state = terminal.write();
1441            let mut session_writer = process_group.session.write();
1442
1443            // tty must be the controlling terminal.
1444            Self::check_terminal_controller(&process_group.session, &terminal_state.controller)?;
1445            if !session_writer
1446                .controlling_terminal
1447                .as_ref()
1448                .map_or(false, |ct| ct.matches(terminal, is_main))
1449            {
1450                return error!(ENOTTY);
1451            }
1452
1453            // "If the process was session leader, then send SIGHUP and SIGCONT to the foreground
1454            // process group and all processes in the current session lose their controlling terminal."
1455            // - tty_ioctl(4)
1456
1457            // Remove tty as the controlling tty for each process in the session, then
1458            // send them SIGHUP and SIGCONT.
1459
1460            session_writer.controlling_terminal = None;
1461            terminal_state.controller = None;
1462        }
1463
1464        if process_group.session.leader == self.leader {
1465            process_group.send_signals(locked, &[SIGHUP, SIGCONT]);
1466        }
1467
1468        Ok(())
1469    }
1470
1471    fn check_orphans<L>(&self, locked: &mut Locked<L>, pids: &PidTable)
1472    where
1473        L: LockBefore<ProcessGroupState>,
1474    {
1475        let mut thread_groups = self.read().children().collect::<Vec<_>>();
1476        let this = self.weak_self.upgrade().unwrap();
1477        thread_groups.push(this);
1478        let process_groups =
1479            thread_groups.iter().map(|tg| Arc::clone(&tg.read().process_group)).unique();
1480        for pg in process_groups {
1481            pg.check_orphaned(locked, pids);
1482        }
1483    }
1484
1485    pub fn get_rlimit<L>(&self, locked: &mut Locked<L>, resource: Resource) -> u64
1486    where
1487        L: LockBefore<ThreadGroupLimits>,
1488    {
1489        self.limits.lock(locked).get(resource).rlim_cur
1490    }
1491
1492    /// Adjusts the rlimits of the ThreadGroup to which `target_task` belongs to.
1493    pub fn adjust_rlimits<L>(
1494        locked: &mut Locked<L>,
1495        current_task: &CurrentTask,
1496        target_task: &Task,
1497        resource: Resource,
1498        maybe_new_limit: Option<rlimit>,
1499    ) -> Result<rlimit, Errno>
1500    where
1501        L: LockBefore<ThreadGroupLimits>,
1502    {
1503        let thread_group = target_task.thread_group();
1504        let can_increase_rlimit = security::is_task_capable_noaudit(current_task, CAP_SYS_RESOURCE);
1505        let mut limit_state = thread_group.limits.lock(locked);
1506        let old_limit = limit_state.get(resource);
1507        if let Some(new_limit) = maybe_new_limit {
1508            if new_limit.rlim_max > old_limit.rlim_max && !can_increase_rlimit {
1509                return error!(EPERM);
1510            }
1511            security::task_setrlimit(current_task, &target_task, old_limit, new_limit)?;
1512            limit_state.set(resource, new_limit)
1513        }
1514        Ok(old_limit)
1515    }
1516
1517    pub fn time_stats(&self) -> TaskTimeStats {
1518        let process: &zx::Process = if self.process.as_handle_ref().is_invalid() {
1519            // `process` must be valid for all tasks, except `kthreads`. In that case get the
1520            // stats from starnix process.
1521            assert_eq!(
1522                self as *const ThreadGroup,
1523                Arc::as_ptr(&self.kernel.kthreads.system_thread_group())
1524            );
1525            &self.kernel.kthreads.starnix_process
1526        } else {
1527            &self.process
1528        };
1529
1530        let info =
1531            zx::Task::get_runtime_info(process).expect("Failed to get starnix process stats");
1532        TaskTimeStats {
1533            user_time: zx::MonotonicDuration::from_nanos(info.cpu_time),
1534            // TODO(https://fxbug.dev/42078242): How can we calculate system time?
1535            system_time: zx::MonotonicDuration::default(),
1536        }
1537    }
1538
1539    /// For each task traced by this thread_group that matches the given
1540    /// selector, acquire its TaskMutableState and ptracees lock and execute the
1541    /// given function.
1542    pub fn get_ptracees_and(
1543        &self,
1544        selector: &ProcessSelector,
1545        pids: &PidTable,
1546        f: &mut dyn FnMut(&Task, &TaskMutableState),
1547    ) {
1548        for tracee in self
1549            .ptracees
1550            .lock()
1551            .keys()
1552            .filter(|tracee_tid| selector.match_tid(**tracee_tid, &pids))
1553            .map(|tracee_tid| pids.get_task(*tracee_tid))
1554        {
1555            if let Ok(task_ref) = tracee {
1556                let task_state = task_ref.write();
1557                if task_state.ptrace.is_some() {
1558                    f(&task_ref, &task_state);
1559                }
1560            }
1561        }
1562    }
1563
1564    /// Returns a tracee whose state has changed, so that waitpid can report on
1565    /// it. If this returns a value, and the pid is being traced, the tracer
1566    /// thread is deemed to have seen the tracee ptrace-stop for the purposes of
1567    /// PTRACE_LISTEN.
1568    pub fn get_waitable_ptracee(
1569        &self,
1570        selector: &ProcessSelector,
1571        options: &WaitingOptions,
1572        pids: &mut PidTable,
1573    ) -> Option<WaitResult> {
1574        // This checks to see if the target is a zombie ptracee.
1575        let waitable_entry = self.write().zombie_ptracees.get_waitable_entry(selector, options);
1576        match waitable_entry {
1577            None => (),
1578            Some((zombie, None)) => return Some(zombie.to_wait_result()),
1579            Some((zombie, Some((tg, z)))) => {
1580                if let Some(tg) = tg.upgrade() {
1581                    if Arc::as_ptr(&tg) != self as *const Self {
1582                        tg.do_zombie_notifications(z);
1583                    } else {
1584                        {
1585                            let mut state = tg.write();
1586                            state.children.remove(&z.pid());
1587                            state
1588                                .deferred_zombie_ptracers
1589                                .retain(|dzp| dzp.tracee_thread_group_key != z.thread_group_key);
1590                        }
1591
1592                        z.release(pids);
1593                    };
1594                }
1595                return Some(zombie.to_wait_result());
1596            }
1597        }
1598
1599        let mut tasks = vec![];
1600
1601        // This checks to see if the target is a running ptracee.
1602        self.get_ptracees_and(selector, pids, &mut |task: &Task, _| {
1603            tasks.push(task.weak_self.clone());
1604        });
1605        for task in tasks {
1606            let Some(task_ref) = task.upgrade() else {
1607                continue;
1608            };
1609
1610            let process_state = &mut task_ref.thread_group().write();
1611            let mut task_state = task_ref.write();
1612            if task_state
1613                .ptrace
1614                .as_ref()
1615                .is_some_and(|ptrace| ptrace.is_waitable(task_ref.load_stopped(), options))
1616            {
1617                // We've identified a potential target.  Need to return either
1618                // the process's information (if we are in group-stop) or the
1619                // thread's information (if we are in a different stop).
1620
1621                // The shared information:
1622                let mut pid: i32 = 0;
1623                let info = process_state.tasks.values().next().unwrap().info().clone();
1624                let uid = info.real_creds().uid;
1625                let mut exit_status = None;
1626                let exit_signal = process_state.exit_signal.clone();
1627                let time_stats =
1628                    process_state.base.time_stats() + process_state.children_time_stats;
1629                let task_stopped = task_ref.load_stopped();
1630
1631                #[derive(PartialEq)]
1632                enum ExitType {
1633                    None,
1634                    Cont,
1635                    Stop,
1636                    Kill,
1637                }
1638                if process_state.is_waitable() {
1639                    let ptrace = &mut task_state.ptrace;
1640                    // The information for processes, if we were in group stop.
1641                    let process_stopped = process_state.base.load_stopped();
1642                    let mut fn_type = ExitType::None;
1643                    if process_stopped == StopState::Awake && options.wait_for_continued {
1644                        fn_type = ExitType::Cont;
1645                    }
1646                    let mut event = ptrace
1647                        .as_ref()
1648                        .map_or(PtraceEvent::None, |ptrace| {
1649                            ptrace.event_data.as_ref().map_or(PtraceEvent::None, |data| data.event)
1650                        })
1651                        .clone();
1652                    // Tasks that are ptrace'd always get stop notifications.
1653                    if process_stopped == StopState::GroupStopped
1654                        && (options.wait_for_stopped || ptrace.is_some())
1655                    {
1656                        fn_type = ExitType::Stop;
1657                    }
1658                    if fn_type != ExitType::None {
1659                        let siginfo = if options.keep_waitable_state {
1660                            process_state.last_signal.clone()
1661                        } else {
1662                            process_state.last_signal.take()
1663                        };
1664                        if let Some(mut siginfo) = siginfo {
1665                            if task_ref.thread_group().load_stopped() == StopState::GroupStopped
1666                                && ptrace.as_ref().is_some_and(|ptrace| ptrace.is_seized())
1667                            {
1668                                if event == PtraceEvent::None {
1669                                    event = PtraceEvent::Stop;
1670                                }
1671                                siginfo.code |= (PtraceEvent::Stop as i32) << 8;
1672                            }
1673                            if siginfo.signal == SIGKILL {
1674                                fn_type = ExitType::Kill;
1675                            }
1676                            exit_status = match fn_type {
1677                                ExitType::Stop => Some(ExitStatus::Stop(siginfo, event)),
1678                                ExitType::Cont => Some(ExitStatus::Continue(siginfo, event)),
1679                                ExitType::Kill => Some(ExitStatus::Kill(siginfo)),
1680                                _ => None,
1681                            };
1682                        }
1683                        // Clear the wait status of the ptrace, because we're
1684                        // using the tg status instead.
1685                        ptrace
1686                            .as_mut()
1687                            .map(|ptrace| ptrace.get_last_signal(options.keep_waitable_state));
1688                    }
1689                    pid = process_state.base.leader;
1690                }
1691                if exit_status == None {
1692                    if let Some(ptrace) = task_state.ptrace.as_mut() {
1693                        // The information for the task, if we were in a non-group stop.
1694                        let mut fn_type = ExitType::None;
1695                        let event = ptrace
1696                            .event_data
1697                            .as_ref()
1698                            .map_or(PtraceEvent::None, |event| event.event);
1699                        if task_stopped == StopState::Awake {
1700                            fn_type = ExitType::Cont;
1701                        }
1702                        if task_stopped.is_stopping_or_stopped()
1703                            || ptrace.stop_status == PtraceStatus::Listening
1704                        {
1705                            fn_type = ExitType::Stop;
1706                        }
1707                        if fn_type != ExitType::None {
1708                            if let Some(siginfo) =
1709                                ptrace.get_last_signal(options.keep_waitable_state)
1710                            {
1711                                if siginfo.signal == SIGKILL {
1712                                    fn_type = ExitType::Kill;
1713                                }
1714                                exit_status = match fn_type {
1715                                    ExitType::Stop => Some(ExitStatus::Stop(siginfo, event)),
1716                                    ExitType::Cont => Some(ExitStatus::Continue(siginfo, event)),
1717                                    ExitType::Kill => Some(ExitStatus::Kill(siginfo)),
1718                                    _ => None,
1719                                };
1720                            }
1721                        }
1722                        pid = task_ref.get_tid();
1723                    }
1724                }
1725                if let Some(exit_status) = exit_status {
1726                    return Some(WaitResult {
1727                        pid,
1728                        uid,
1729                        exit_info: ProcessExitInfo { status: exit_status, exit_signal },
1730                        time_stats,
1731                    });
1732                }
1733            }
1734        }
1735        None
1736    }
1737
1738    /// Attempts to send an unchecked signal to this thread group.
1739    ///
1740    /// - `current_task`: The task that is sending the signal.
1741    /// - `unchecked_signal`: The signal that is to be sent. Unchecked, since `0` is a sentinel value
1742    /// where rights are to be checked but no signal is actually sent.
1743    ///
1744    /// # Returns
1745    /// Returns Ok(()) if the signal was sent, or the permission checks passed with a 0 signal, otherwise
1746    /// the error that was encountered.
1747    pub fn send_signal_unchecked(
1748        &self,
1749        current_task: &CurrentTask,
1750        unchecked_signal: UncheckedSignal,
1751    ) -> Result<(), Errno> {
1752        if let Some(signal) = self.check_signal_access(current_task, unchecked_signal)? {
1753            let signal_info = SignalInfo::with_detail(
1754                signal,
1755                SI_USER as i32,
1756                SignalDetail::Kill {
1757                    pid: current_task.thread_group().leader,
1758                    uid: current_task.current_creds().uid,
1759                },
1760            );
1761
1762            self.write().send_signal(signal_info);
1763        }
1764
1765        Ok(())
1766    }
1767
1768    /// Sends a signal to this thread_group without performing any access checks.
1769    ///
1770    /// # Safety
1771    /// This is unsafe, because it should only be called by tools and tests.
1772    pub unsafe fn send_signal_unchecked_debug(
1773        &self,
1774        current_task: &CurrentTask,
1775        unchecked_signal: UncheckedSignal,
1776    ) -> Result<(), Errno> {
1777        let signal = Signal::try_from(unchecked_signal)?;
1778        let signal_info = SignalInfo::with_detail(
1779            signal,
1780            SI_USER as i32,
1781            SignalDetail::Kill {
1782                pid: current_task.thread_group().leader,
1783                uid: current_task.current_creds().uid,
1784            },
1785        );
1786
1787        self.write().send_signal(signal_info);
1788        Ok(())
1789    }
1790
1791    /// Attempts to send an unchecked signal to this thread group, with info read from
1792    /// `siginfo_ref`.
1793    ///
1794    /// - `current_task`: The task that is sending the signal.
1795    /// - `unchecked_signal`: The signal that is to be sent. Unchecked, since `0` is a sentinel value
1796    /// where rights are to be checked but no signal is actually sent.
1797    /// - `siginfo_ref`: The siginfo that will be enqueued.
1798    /// - `options`: Options for how to convert the siginfo into a signal info.
1799    ///
1800    /// # Returns
1801    /// Returns Ok(()) if the signal was sent, or the permission checks passed with a 0 signal, otherwise
1802    /// the error that was encountered.
1803    #[track_caller]
1804    pub fn send_signal_unchecked_with_info(
1805        &self,
1806        current_task: &CurrentTask,
1807        unchecked_signal: UncheckedSignal,
1808        siginfo_ref: UserAddress,
1809        options: IntoSignalInfoOptions,
1810    ) -> Result<(), Errno> {
1811        if let Some(signal) = self.check_signal_access(current_task, unchecked_signal)? {
1812            let siginfo = UncheckedSignalInfo::read_from_siginfo(current_task, siginfo_ref)?;
1813            if self.leader != current_task.get_pid()
1814                && (siginfo.code() >= 0 || siginfo.code() == SI_TKILL)
1815            {
1816                return error!(EPERM);
1817            }
1818
1819            self.write().send_signal(siginfo.into_signal_info(signal, options)?);
1820        }
1821
1822        Ok(())
1823    }
1824
1825    /// Checks whether or not `current_task` can signal this thread group with `unchecked_signal`.
1826    ///
1827    /// Returns:
1828    ///   - `Ok(Some(Signal))` if the signal passed checks and should be sent.
1829    ///   - `Ok(None)` if the signal passed checks, but should not be sent. This is used by
1830    ///   userspace for permission checks.
1831    ///   - `Err(_)` if the permission checks failed.
1832    fn check_signal_access(
1833        &self,
1834        current_task: &CurrentTask,
1835        unchecked_signal: UncheckedSignal,
1836    ) -> Result<Option<Signal>, Errno> {
1837        // Pick an arbitrary task in thread_group to check permissions.
1838        //
1839        // Tasks can technically have different credentials, but in practice they are kept in sync.
1840        let target_task = self.read().get_any_task()?;
1841        current_task.can_signal(&target_task, unchecked_signal)?;
1842
1843        // 0 is a sentinel value used to do permission checks.
1844        if unchecked_signal.is_zero() {
1845            return Ok(None);
1846        }
1847
1848        let signal = Signal::try_from(unchecked_signal)?;
1849        security::check_signal_access(current_task, &target_task, signal)?;
1850
1851        Ok(Some(signal))
1852    }
1853
1854    pub fn has_signal_queued(&self, signal: Signal) -> bool {
1855        self.pending_signals.lock().has_queued(signal)
1856    }
1857
1858    pub fn num_signals_queued(&self) -> usize {
1859        self.pending_signals.lock().num_queued()
1860    }
1861
1862    pub fn get_pending_signals(&self) -> SigSet {
1863        self.pending_signals.lock().pending()
1864    }
1865
1866    pub fn is_any_signal_allowed_by_mask(&self, mask: SigSet) -> bool {
1867        self.pending_signals.lock().is_any_allowed_by_mask(mask)
1868    }
1869
1870    pub fn take_next_signal_where<F>(&self, predicate: F) -> Option<SignalInfo>
1871    where
1872        F: Fn(&SignalInfo) -> bool,
1873    {
1874        let mut signals = self.pending_signals.lock();
1875        let r = signals.take_next_where(predicate);
1876        self.has_pending_signals.store(!signals.is_empty(), Ordering::Relaxed);
1877        r
1878    }
1879
1880    /// Drive this `ThreadGroup` to exit, allowing it time to handle SIGTERM before sending SIGKILL.
1881    ///
1882    /// Returns once `ThreadGroup::exit()` has completed.
1883    ///
1884    /// Must be called from the system task.
1885    pub async fn shut_down(this: Weak<Self>) {
1886        const SHUTDOWN_SIGNAL_HANDLING_TIMEOUT: zx::MonotonicDuration =
1887            zx::MonotonicDuration::from_seconds(1);
1888
1889        // Prepare for shutting down the thread group.
1890        let (tg_name, mut on_exited) = {
1891            // Nest this upgraded access so upgraded references aren't held across await-points.
1892            let Some(this) = this.upgrade() else {
1893                return;
1894            };
1895
1896            let mut state = this.write();
1897            if state.is_exited() {
1898                // Do not set an exit notifier on an exited thread group. It will never be notified.
1899                return;
1900            }
1901
1902            // Register a channel to be notified when exit() is complete.
1903            let (on_exited_send, on_exited) = futures::channel::oneshot::channel();
1904            state.exit_notifier = Some(on_exited_send);
1905
1906            // We want to be able to log about this thread group without upgrading the `Weak`.
1907            let tg_name = format!("{this:?}");
1908
1909            (tg_name, on_exited)
1910        };
1911
1912        log_debug!(tg:% = tg_name; "shutting down thread group, sending SIGTERM");
1913        this.upgrade().map(|tg| tg.write().send_signal(SignalInfo::kernel(SIGTERM)));
1914
1915        // Give thread groups some time to handle SIGTERM, proceeding early if they exit
1916        let timeout = fuchsia_async::Timer::new(SHUTDOWN_SIGNAL_HANDLING_TIMEOUT);
1917        futures::pin_mut!(timeout);
1918
1919        // Use select_biased instead of on_timeout() so that we can await on on_exited later
1920        futures::select_biased! {
1921            _ = &mut on_exited => (),
1922            _ = timeout => {
1923                log_debug!(tg:% = tg_name; "sending SIGKILL");
1924                this.upgrade().map(|tg| tg.write().send_signal(SignalInfo::kernel(SIGKILL)));
1925            },
1926        };
1927
1928        log_debug!(tg:% = tg_name; "waiting for exit");
1929        // It doesn't matter whether ThreadGroup::exit() was called or the process exited with
1930        // a return code and dropped the sender end of the channel.
1931        on_exited.await.ok();
1932        log_debug!(tg:% = tg_name; "thread group shutdown complete");
1933    }
1934
1935    /// Returns the KOID of the process for this thread group.
1936    /// This method should be used to when mapping 32 bit linux process ids to KOIDs
1937    /// to avoid breaking the encapsulation of the zx::process within the ThreadGroup.
1938    /// This encapsulation is important since the relationship between the ThreadGroup
1939    /// and the Process may change over time. See [ThreadGroup::process] for more details.
1940    pub fn get_process_koid(&self) -> Result<Koid, Status> {
1941        self.process.koid()
1942    }
1943}
1944
1945pub enum WaitableChildResult {
1946    ReadyNow(Box<WaitResult>),
1947    ShouldWait,
1948    NoneFound,
1949}
1950
1951#[apply(state_implementation!)]
1952impl ThreadGroupMutableState<Base = ThreadGroup> {
1953    pub fn leader(&self) -> pid_t {
1954        self.base.leader
1955    }
1956
1957    pub fn leader_command(&self) -> TaskCommand {
1958        self.get_task(self.leader())
1959            .map(|l| l.command())
1960            .unwrap_or_else(|| TaskCommand::new(b"<leader exited>"))
1961    }
1962
1963    pub fn is_running(&self) -> bool {
1964        matches!(self.run_state, ThreadGroupRunState::Running)
1965    }
1966
1967    pub fn is_exited(&self) -> bool {
1968        matches!(self.run_state, ThreadGroupRunState::Exited(_))
1969    }
1970
1971    fn set_exiting(&mut self, exit_status: ExitStatus) {
1972        self.run_state = ThreadGroupRunState::Exiting(exit_status);
1973    }
1974
1975    fn set_exited(&mut self) {
1976        let ThreadGroupRunState::Exiting(exit_status) = std::mem::take(&mut self.run_state) else {
1977            panic!("Must transition from Exiting to Exited");
1978        };
1979        self.run_state = ThreadGroupRunState::Exited(exit_status);
1980
1981        if let Some(notifier) = self.exit_notifier.take() {
1982            let _ = notifier.send(());
1983        }
1984    }
1985
1986    pub fn children(&self) -> impl Iterator<Item = Arc<ThreadGroup>> + '_ {
1987        self.children.values().map(|v| {
1988            v.upgrade().expect("Weak references to processes in ThreadGroup must always be valid")
1989        })
1990    }
1991
1992    pub fn tasks(&self) -> Vec<Arc<Task>> {
1993        self.tasks.values().flat_map(|t| t.upgrade()).collect()
1994    }
1995
1996    pub fn task_ids(&self) -> impl Iterator<Item = &tid_t> {
1997        self.tasks.keys()
1998    }
1999
2000    pub fn contains_task(&self, tid: tid_t) -> bool {
2001        self.tasks.contains_key(&tid)
2002    }
2003
2004    pub fn get_task(&self, tid: tid_t) -> Option<Arc<Task>> {
2005        self.tasks.get(&tid).and_then(|t| t.upgrade())
2006    }
2007
2008    pub fn tasks_count(&self) -> usize {
2009        self.tasks.len()
2010    }
2011
2012    pub fn get_ppid(&self) -> pid_t {
2013        match &self.parent {
2014            Some(parent) => parent.upgrade().leader,
2015            None => 0,
2016        }
2017    }
2018
2019    fn set_process_group<L>(
2020        &mut self,
2021        locked: &mut Locked<L>,
2022        process_group: Arc<ProcessGroup>,
2023        pids: &PidTable,
2024    ) where
2025        L: LockBefore<ProcessGroupState>,
2026    {
2027        if self.process_group == process_group {
2028            return;
2029        }
2030        self.leave_process_group(locked, pids);
2031        self.process_group = process_group;
2032        self.process_group.insert(locked, self.base);
2033    }
2034
2035    fn leave_process_group<L>(&mut self, locked: &mut Locked<L>, pids: &PidTable)
2036    where
2037        L: LockBefore<ProcessGroupState>,
2038    {
2039        if self.process_group.remove(locked, self.base) {
2040            self.process_group.session.write().remove(self.process_group.leader);
2041            pids.remove_process_group(self.process_group.leader);
2042        }
2043    }
2044
2045    /// Indicates whether the thread group is waitable via waitid and waitpid for
2046    /// either WSTOPPED or WCONTINUED.
2047    pub fn is_waitable(&self) -> bool {
2048        return self.last_signal.is_some() && !self.base.load_stopped().is_in_progress();
2049    }
2050
2051    pub fn get_waitable_zombie(
2052        &mut self,
2053        zombie_list: &dyn Fn(&mut ThreadGroupMutableState) -> &mut Vec<OwnedRef<ZombieProcess>>,
2054        selector: &ProcessSelector,
2055        options: &WaitingOptions,
2056        pids: &mut PidTable,
2057    ) -> Option<WaitResult> {
2058        // We look for the last zombie in the vector that matches pid selector and waiting options
2059        let selected_zombie_position = zombie_list(self)
2060            .iter()
2061            .rev()
2062            .position(|zombie| zombie.matches_selector_and_waiting_option(selector, options))
2063            .map(|position_starting_from_the_back| {
2064                zombie_list(self).len() - 1 - position_starting_from_the_back
2065            });
2066
2067        selected_zombie_position.map(|position| {
2068            if options.keep_waitable_state {
2069                zombie_list(self)[position].to_wait_result()
2070            } else {
2071                let zombie = zombie_list(self).remove(position);
2072                self.children_time_stats += zombie.time_stats;
2073                let result = zombie.to_wait_result();
2074                zombie.release(pids);
2075                result
2076            }
2077        })
2078    }
2079
2080    pub fn is_correct_exit_signal(for_clone: bool, exit_code: Option<Signal>) -> bool {
2081        for_clone == (exit_code != Some(SIGCHLD))
2082    }
2083
2084    fn get_waitable_running_children(
2085        &self,
2086        selector: &ProcessSelector,
2087        options: &WaitingOptions,
2088        pids: &PidTable,
2089    ) -> WaitableChildResult {
2090        // The children whose pid matches the pid selector queried.
2091        let filter_children_by_pid_selector = |child: &ThreadGroup| match *selector {
2092            ProcessSelector::Any => true,
2093            ProcessSelector::Pid(pid) => child.leader == pid,
2094            ProcessSelector::Pgid(pgid) => {
2095                pids.get_process_group(pgid).as_ref() == Some(&child.read().process_group)
2096            }
2097            ProcessSelector::Process(ref key) => *key == ThreadGroupKey::from(child),
2098        };
2099
2100        // The children whose exit signal matches the waiting options queried.
2101        let filter_children_by_waiting_options = |child: &ThreadGroup| {
2102            if options.wait_for_all {
2103                return true;
2104            }
2105            Self::is_correct_exit_signal(options.wait_for_clone, child.read().exit_signal)
2106        };
2107
2108        // If wait_for_exited flag is disabled or no exited children were found we look for running
2109        // children.
2110        let mut selected_children = self
2111            .children
2112            .values()
2113            .map(|t| t.upgrade().unwrap())
2114            .filter(|tg| filter_children_by_pid_selector(&tg))
2115            .filter(|tg| filter_children_by_waiting_options(&tg))
2116            .peekable();
2117        if selected_children.peek().is_none() {
2118            // There still might be a process that ptrace hasn't looked at yet.
2119            if self.deferred_zombie_ptracers.iter().any(|dzp| match *selector {
2120                ProcessSelector::Any => true,
2121                ProcessSelector::Pid(pid) => dzp.tracee_thread_group_key.pid() == pid,
2122                ProcessSelector::Pgid(pgid) => pgid == dzp.tracee_pgid,
2123                ProcessSelector::Process(ref key) => *key == dzp.tracee_thread_group_key,
2124            }) {
2125                return WaitableChildResult::ShouldWait;
2126            }
2127
2128            return WaitableChildResult::NoneFound;
2129        }
2130        for child in selected_children {
2131            let child = child.write();
2132            if child.last_signal.is_some() {
2133                let build_wait_result = |mut child: ThreadGroupWriteGuard<'_>,
2134                                         exit_status: &dyn Fn(SignalInfo) -> ExitStatus|
2135                 -> WaitResult {
2136                    let siginfo = if options.keep_waitable_state {
2137                        child.last_signal.clone().unwrap()
2138                    } else {
2139                        child.last_signal.take().unwrap()
2140                    };
2141                    let exit_status = if siginfo.signal == SIGKILL {
2142                        // This overrides the stop/continue choice.
2143                        ExitStatus::Kill(siginfo)
2144                    } else {
2145                        exit_status(siginfo)
2146                    };
2147                    let info = child.tasks.values().next().unwrap().info();
2148                    let uid = info.real_creds().uid;
2149                    WaitResult {
2150                        pid: child.base.leader,
2151                        uid,
2152                        exit_info: ProcessExitInfo {
2153                            status: exit_status,
2154                            exit_signal: child.exit_signal,
2155                        },
2156                        time_stats: child.base.time_stats() + child.children_time_stats,
2157                    }
2158                };
2159                let child_stopped = child.base.load_stopped();
2160                if child_stopped == StopState::Awake && options.wait_for_continued {
2161                    return WaitableChildResult::ReadyNow(Box::new(build_wait_result(
2162                        child,
2163                        &|siginfo| ExitStatus::Continue(siginfo, PtraceEvent::None),
2164                    )));
2165                }
2166                if child_stopped == StopState::GroupStopped && options.wait_for_stopped {
2167                    return WaitableChildResult::ReadyNow(Box::new(build_wait_result(
2168                        child,
2169                        &|siginfo| ExitStatus::Stop(siginfo, PtraceEvent::None),
2170                    )));
2171                }
2172            }
2173        }
2174
2175        WaitableChildResult::ShouldWait
2176    }
2177
2178    /// Returns any waitable child matching the given `selector` and `options`. Returns None if no
2179    /// child matching the selector is waitable. Returns ECHILD if no child matches the selector at
2180    /// all.
2181    ///
2182    /// Will remove the waitable status from the child depending on `options`.
2183    pub fn get_waitable_child(
2184        &mut self,
2185        selector: &ProcessSelector,
2186        options: &WaitingOptions,
2187        pids: &mut PidTable,
2188    ) -> WaitableChildResult {
2189        if options.wait_for_exited {
2190            if let Some(waitable_zombie) = self.get_waitable_zombie(
2191                &|state: &mut ThreadGroupMutableState| &mut state.zombie_children,
2192                selector,
2193                options,
2194                pids,
2195            ) {
2196                return WaitableChildResult::ReadyNow(Box::new(waitable_zombie));
2197            }
2198        }
2199
2200        self.get_waitable_running_children(selector, options, pids)
2201    }
2202
2203    /// Returns a running task in the current thread group.
2204    pub fn get_running_task(&self) -> Result<Arc<Task>, Errno> {
2205        self.tasks
2206            .iter()
2207            .find_map(|container| container.1.upgrade().filter(|task| task.is_running()))
2208            .ok_or_else(|| errno!(ESRCH))
2209    }
2210
2211    /// Returns a task representative of the [`ThreadGroup`].
2212    ///
2213    /// If the task list contains at least one running task, an arbitrary running task is returned.
2214    /// Otherwise, if the task list is empty, the process must be a zombie. In this case, the exited
2215    /// leader task is returned.
2216    pub fn get_any_task(&self) -> Result<Arc<Task>, Errno> {
2217        self.get_running_task()
2218            .ok()
2219            .or_else(|| self.base.leader_task.get().and_then(|t| t.upgrade()))
2220            .ok_or_else(|| errno!(ESRCH))
2221    }
2222
2223    /// Set the stop status of the process.  If you pass |siginfo| of |None|,
2224    /// does not update the signal.  If |finalize_only| is set, will check that
2225    /// the set will be a finalize (Stopping -> Stopped or Stopped -> Stopped)
2226    /// before executing it.
2227    ///
2228    /// Returns the latest stop state after any changes.
2229    pub fn set_stopped(
2230        mut self,
2231        new_stopped: StopState,
2232        siginfo: Option<SignalInfo>,
2233        finalize_only: bool,
2234    ) -> StopState {
2235        if let Some(stopped) = self.base.check_stopped_state(new_stopped, finalize_only) {
2236            return stopped;
2237        }
2238
2239        // Thread groups don't transition to group stop if they are waking, because waking
2240        // means something told it to wake up (like a SIGCONT) but hasn't finished yet.
2241        if self.base.load_stopped() == StopState::Waking
2242            && (new_stopped == StopState::GroupStopping || new_stopped == StopState::GroupStopped)
2243        {
2244            return self.base.load_stopped();
2245        }
2246
2247        // TODO(https://g-issues.fuchsia.dev/issues/306438676): When thread
2248        // group can be stopped inside user code, tasks/thread groups will
2249        // need to be either restarted or stopped here.
2250        self.store_stopped(new_stopped);
2251        if let Some(signal) = &siginfo {
2252            // We don't want waiters to think the process was unstopped
2253            // because of a sigkill.  They will get woken when the
2254            // process dies.
2255            if signal.signal != SIGKILL {
2256                self.last_signal = siginfo;
2257            }
2258        }
2259        if new_stopped == StopState::Waking || new_stopped == StopState::ForceWaking {
2260            self.lifecycle_waiters.notify_value(ThreadGroupLifecycleWaitValue::Stopped);
2261        };
2262
2263        let parent = (!new_stopped.is_in_progress()).then(|| self.parent.clone()).flatten();
2264
2265        // Drop the lock before locking the parent.
2266        std::mem::drop(self);
2267        if let Some(parent) = parent {
2268            let parent = parent.upgrade();
2269            parent
2270                .write()
2271                .lifecycle_waiters
2272                .notify_value(ThreadGroupLifecycleWaitValue::ChildStatus);
2273        }
2274
2275        new_stopped
2276    }
2277
2278    fn store_stopped(&mut self, state: StopState) {
2279        // We don't actually use the guard but we require it to enforce that the
2280        // caller holds the thread group's mutable state lock (identified by
2281        // mutable access to the thread group's mutable state).
2282
2283        self.base.stop_state.store(state, Ordering::Relaxed)
2284    }
2285
2286    /// Sends the signal `signal_info` to this thread group.
2287    #[allow(unused_mut, reason = "needed for some but not all macro outputs")]
2288    pub fn send_signal(mut self, signal_info: SignalInfo) {
2289        let sigaction = self.base.signal_actions.get(signal_info.signal);
2290        let action = action_for_signal(&signal_info, sigaction);
2291
2292        {
2293            let mut pending_signals = self.base.pending_signals.lock();
2294            pending_signals.enqueue(signal_info.clone());
2295            self.base.has_pending_signals.store(true, Ordering::Relaxed);
2296        }
2297        let tasks: Vec<Weak<Task>> = self.tasks.values().map(|t| t.weak_clone()).collect();
2298
2299        // Set state to waking before interrupting any tasks.
2300        if signal_info.signal == SIGKILL {
2301            self.set_stopped(StopState::ForceWaking, Some(signal_info.clone()), false);
2302        } else if signal_info.signal == SIGCONT {
2303            self.set_stopped(StopState::Waking, Some(signal_info.clone()), false);
2304        }
2305
2306        let mut has_interrupted_task = false;
2307        for task in tasks.iter().flat_map(|t| t.upgrade()) {
2308            let mut task_state = task.write();
2309
2310            if signal_info.signal == SIGKILL {
2311                task_state.thaw();
2312                task_state.set_stopped(StopState::ForceWaking, None, None, None);
2313            } else if signal_info.signal == SIGCONT {
2314                task_state.set_stopped(StopState::Waking, None, None, None);
2315            }
2316
2317            let is_masked = task_state.is_signal_masked(signal_info.signal);
2318            let was_masked = task_state.is_signal_masked_by_saved_mask(signal_info.signal);
2319
2320            let is_queued = action != DeliveryAction::Ignore
2321                || is_masked
2322                || was_masked
2323                || task_state.is_ptraced();
2324
2325            if is_queued {
2326                task_state.notify_signal_waiters(&signal_info.signal);
2327
2328                if !is_masked && action.must_interrupt(Some(sigaction)) && !has_interrupted_task {
2329                    // Only interrupt one task, and only interrupt if the signal was actually queued
2330                    // and the action must interrupt.
2331                    drop(task_state);
2332                    task.interrupt();
2333                    has_interrupted_task = true;
2334                }
2335            }
2336        }
2337    }
2338}
2339
2340/// Container around a weak task and a strong `TaskPersistentInfo`. It is needed to keep the
2341/// information even when the task is not upgradable, because when the task is dropped, there is a
2342/// moment where the task is not yet released, yet the weak pointer is not upgradeable anymore.
2343/// During this time, it is still necessary to access the persistent info to compute the state of
2344/// the thread for the different wait syscalls.
2345pub struct TaskContainer(Weak<Task>, TaskPersistentInfo);
2346
2347impl From<&Arc<Task>> for TaskContainer {
2348    fn from(task: &Arc<Task>) -> Self {
2349        Self(Arc::downgrade(task), task.persistent_info.clone())
2350    }
2351}
2352
2353impl From<TaskContainer> for TaskPersistentInfo {
2354    fn from(container: TaskContainer) -> TaskPersistentInfo {
2355        container.1
2356    }
2357}
2358
2359impl TaskContainer {
2360    fn upgrade(&self) -> Option<Arc<Task>> {
2361        self.0.upgrade()
2362    }
2363
2364    fn weak_clone(&self) -> Weak<Task> {
2365        self.0.clone()
2366    }
2367
2368    fn info(&self) -> &TaskPersistentInfo {
2369        &self.1
2370    }
2371}
2372
2373#[cfg(test)]
2374mod test {
2375    use super::*;
2376    use crate::testing::*;
2377
2378    #[::fuchsia::test]
2379    async fn test_setsid() {
2380        spawn_kernel_and_run(async |locked, current_task| {
2381            fn get_process_group(task: &Task) -> Arc<ProcessGroup> {
2382                Arc::clone(&task.thread_group().read().process_group)
2383            }
2384            assert_eq!(current_task.thread_group().setsid(locked), error!(EPERM));
2385
2386            let child_task = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2387            assert_eq!(get_process_group(&current_task), get_process_group(&child_task));
2388
2389            let old_process_group = child_task.thread_group().read().process_group.clone();
2390            assert_eq!(child_task.thread_group().setsid(locked), Ok(()));
2391            assert_eq!(
2392                child_task.thread_group().read().process_group.session.leader,
2393                child_task.get_pid()
2394            );
2395            assert!(
2396                !old_process_group.read(locked).thread_groups().contains(child_task.thread_group())
2397            );
2398        })
2399        .await;
2400    }
2401
2402    #[::fuchsia::test]
2403    async fn test_exit_status() {
2404        spawn_kernel_and_run(async |locked, current_task| {
2405            let child = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2406            child.thread_group().kill(locked, ExitStatus::Exit(42), None);
2407            std::mem::drop(child);
2408            assert_eq!(
2409                current_task.thread_group().read().zombie_children[0].exit_info.status,
2410                ExitStatus::Exit(42)
2411            );
2412        })
2413        .await;
2414    }
2415
2416    #[::fuchsia::test]
2417    async fn test_setgpid() {
2418        spawn_kernel_and_run(async |locked, current_task| {
2419            assert_eq!(current_task.thread_group().setsid(locked), error!(EPERM));
2420
2421            let child_task1 = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2422            let child_task2 = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2423            let execd_child_task = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2424            execd_child_task.thread_group().write().did_exec = true;
2425            let other_session_child_task =
2426                current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2427            assert_eq!(other_session_child_task.thread_group().setsid(locked), Ok(()));
2428
2429            assert_eq!(
2430                child_task1.thread_group().setpgid(locked, &current_task, &current_task, 0),
2431                error!(ESRCH)
2432            );
2433            assert_eq!(
2434                current_task.thread_group().setpgid(locked, &current_task, &execd_child_task, 0),
2435                error!(EACCES)
2436            );
2437            assert_eq!(
2438                current_task.thread_group().setpgid(locked, &current_task, &current_task, 0),
2439                error!(EPERM)
2440            );
2441            assert_eq!(
2442                current_task.thread_group().setpgid(
2443                    locked,
2444                    &current_task,
2445                    &other_session_child_task,
2446                    0
2447                ),
2448                error!(EPERM)
2449            );
2450            assert_eq!(
2451                current_task.thread_group().setpgid(locked, &current_task, &child_task1, -1),
2452                error!(EINVAL)
2453            );
2454            assert_eq!(
2455                current_task.thread_group().setpgid(locked, &current_task, &child_task1, 255),
2456                error!(EPERM)
2457            );
2458            assert_eq!(
2459                current_task.thread_group().setpgid(
2460                    locked,
2461                    &current_task,
2462                    &child_task1,
2463                    other_session_child_task.tid
2464                ),
2465                error!(EPERM)
2466            );
2467
2468            assert_eq!(
2469                child_task1.thread_group().setpgid(locked, &current_task, &child_task1, 0),
2470                Ok(())
2471            );
2472            assert_eq!(
2473                child_task1.thread_group().read().process_group.session.leader,
2474                current_task.tid
2475            );
2476            assert_eq!(child_task1.thread_group().read().process_group.leader, child_task1.tid);
2477
2478            let old_process_group = child_task2.thread_group().read().process_group.clone();
2479            assert_eq!(
2480                current_task.thread_group().setpgid(
2481                    locked,
2482                    &current_task,
2483                    &child_task2,
2484                    child_task1.tid
2485                ),
2486                Ok(())
2487            );
2488            assert_eq!(child_task2.thread_group().read().process_group.leader, child_task1.tid);
2489            assert!(
2490                !old_process_group
2491                    .read(locked)
2492                    .thread_groups()
2493                    .contains(child_task2.thread_group())
2494            );
2495        })
2496        .await;
2497    }
2498
2499    #[::fuchsia::test]
2500    async fn test_adopt_children() {
2501        spawn_kernel_and_run(async |locked, current_task| {
2502            let task1 = current_task.clone_task_for_test(locked, 0, None);
2503            let task2 = task1.clone_task_for_test(locked, 0, None);
2504            let task3 = task2.clone_task_for_test(locked, 0, None);
2505
2506            assert_eq!(task3.thread_group().read().get_ppid(), task2.tid);
2507
2508            task2.thread_group().kill(locked, ExitStatus::Exit(0), None);
2509            std::mem::drop(task2);
2510
2511            // Task3 parent should be current_task.
2512            assert_eq!(task3.thread_group().read().get_ppid(), current_task.tid);
2513        })
2514        .await;
2515    }
2516
2517    #[::fuchsia::test]
2518    async fn test_getppid_after_self_and_parent_exit() {
2519        spawn_kernel_and_run(async |locked, current_task| {
2520            let task1 = current_task.clone_task_for_test(locked, 0, None);
2521            let task2 = task1.clone_task_for_test(locked, 0, None);
2522
2523            // Take strong references to the ThreadGroups.
2524            let tg1 = task1.thread_group().clone();
2525            let tg2 = task2.thread_group().clone();
2526
2527            assert_eq!(tg1.read().get_ppid(), current_task.tid);
2528            assert_eq!(tg2.read().get_ppid(), task1.tid);
2529
2530            // Exit `task2` first, so that when `task1` exits, it will not be reparented to init.
2531            tg2.kill(locked, ExitStatus::Exit(0), None);
2532            std::mem::drop(task2);
2533
2534            // Exit `task1`, and drop the task and ThreadGroup.
2535            tg1.kill(locked, ExitStatus::Exit(0), None);
2536            std::mem::drop(task1);
2537            std::mem::drop(tg1);
2538
2539            // It should still be valid to call `get_ppid()` on `tg2`, though is parent ThreadGroup
2540            // no longer exists.
2541            let _ = tg2.read().get_ppid();
2542        })
2543        .await;
2544    }
2545}