Skip to main content

starnix_core/task/
thread_group.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::device::terminal::{Terminal, TerminalController};
6use crate::mutable_state::{state_accessor, state_implementation};
7use crate::ptrace::{
8    AtomicStopState, PtraceAllowedPtracers, PtraceEvent, PtraceOptions, PtraceStatus, StopState,
9    ZombiePtracees, ptrace_detach,
10};
11use crate::security;
12use crate::signals::syscalls::WaitingOptions;
13use crate::signals::{
14    DeliveryAction, IntoSignalInfoOptions, QueuedSignals, SignalActions, SignalDetail, SignalInfo,
15    UncheckedSignalInfo, action_for_signal, send_standard_signal,
16};
17use crate::task::memory_attribution::MemoryAttributionLifecycleEvent;
18use crate::task::{
19    ControllingTerminal, CurrentTask, ExitStatus, Kernel, PidTable, ProcessGroup, Session, Task,
20    TaskMutableState, TaskPersistentInfo, TypedWaitQueue,
21};
22use crate::time::{IntervalTimerHandle, TimerTable};
23use itertools::Itertools;
24use macro_rules_attribute::apply;
25use starnix_lifecycle::{AtomicCounter, DropNotifier};
26use starnix_logging::{log_debug, log_error, log_info, log_warn, track_stub};
27use starnix_sync::{
28    LockBefore, Locked, Mutex, OrderedMutex, ProcessGroupState, RwLock, ThreadGroupLimits, Unlocked,
29};
30use starnix_task_command::TaskCommand;
31use starnix_types::ownership::{OwnedRef, Releasable};
32use starnix_types::stats::TaskTimeStats;
33use starnix_types::time::{itimerspec_from_itimerval, timeval_from_duration};
34use starnix_uapi::arc_key::WeakKey;
35use starnix_uapi::auth::{CAP_SYS_ADMIN, CAP_SYS_RESOURCE, Credentials};
36use starnix_uapi::errors::Errno;
37use starnix_uapi::personality::PersonalityFlags;
38use starnix_uapi::resource_limits::{Resource, ResourceLimits};
39use starnix_uapi::signals::{
40    SIGCHLD, SIGCONT, SIGHUP, SIGKILL, SIGTERM, SIGTTOU, SigSet, Signal, UncheckedSignal,
41};
42use starnix_uapi::user_address::UserAddress;
43use starnix_uapi::{
44    ITIMER_PROF, ITIMER_REAL, ITIMER_VIRTUAL, SI_TKILL, SI_USER, SIG_IGN, errno, error, itimerval,
45    pid_t, rlimit, tid_t, uid_t,
46};
47use std::collections::BTreeMap;
48use std::fmt;
49use std::sync::atomic::{AtomicBool, Ordering};
50use std::sync::{Arc, OnceLock, Weak};
51use zx::{Koid, Status};
52
53#[derive(Debug)]
54pub struct ZirconProcess {
55    process: zx::Process,
56    koid: Result<Koid, Status>,
57}
58
59impl ZirconProcess {
60    pub fn new(process: zx::Process) -> Self {
61        let koid = process.koid();
62        Self { process, koid }
63    }
64
65    pub fn koid(&self) -> Result<Koid, Status> {
66        self.koid
67    }
68}
69
70impl std::ops::Deref for ZirconProcess {
71    type Target = zx::Process;
72    fn deref(&self) -> &Self::Target {
73        &self.process
74    }
75}
76
77/// A weak reference to a thread group that can be used in set and maps.
78#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
79pub struct ThreadGroupKey {
80    pid: pid_t,
81    thread_group: WeakKey<ThreadGroup>,
82}
83
84impl ThreadGroupKey {
85    /// The pid of the thread group keyed by this object.
86    ///
87    /// As the key is weak (and pid are not unique due to pid namespaces), this should not be used
88    /// as an unique identifier of the thread group.
89    pub fn pid(&self) -> pid_t {
90        self.pid
91    }
92}
93
94impl std::ops::Deref for ThreadGroupKey {
95    type Target = Weak<ThreadGroup>;
96    fn deref(&self) -> &Self::Target {
97        &self.thread_group.0
98    }
99}
100
101impl From<&ThreadGroup> for ThreadGroupKey {
102    fn from(tg: &ThreadGroup) -> Self {
103        Self { pid: tg.leader, thread_group: WeakKey::from(&tg.weak_self.upgrade().unwrap()) }
104    }
105}
106
107impl<T: AsRef<ThreadGroup>> From<T> for ThreadGroupKey {
108    fn from(tg: T) -> Self {
109        tg.as_ref().into()
110    }
111}
112
113/// Values used for waiting on the [ThreadGroup] lifecycle wait queue.
114#[repr(u64)]
115pub enum ThreadGroupLifecycleWaitValue {
116    /// Wait for updates to the WaitResults of tasks in the group.
117    ChildStatus,
118    /// Wait for updates to `stopped`.
119    Stopped,
120}
121
122impl Into<u64> for ThreadGroupLifecycleWaitValue {
123    fn into(self) -> u64 {
124        self as u64
125    }
126}
127
128/// Child process that have exited, but the zombie ptrace needs to be consumed
129/// before they can be waited for.
130#[derive(Clone, Debug)]
131pub struct DeferredZombiePTracer {
132    /// Original tracer
133    pub tracer_thread_group_key: ThreadGroupKey,
134    /// Tracee tid
135    pub tracee_tid: tid_t,
136    /// Tracee pgid
137    pub tracee_pgid: pid_t,
138    /// Tracee thread group
139    pub tracee_thread_group_key: ThreadGroupKey,
140}
141
142impl DeferredZombiePTracer {
143    fn new(tracer: &ThreadGroup, tracee: &Task) -> Self {
144        Self {
145            tracer_thread_group_key: tracer.into(),
146            tracee_tid: tracee.tid,
147            tracee_pgid: tracee.thread_group().read().process_group.leader,
148            tracee_thread_group_key: tracee.thread_group_key.clone(),
149        }
150    }
151}
152
153/// The mutable state of the ThreadGroup.
154pub struct ThreadGroupMutableState {
155    /// The parent thread group.
156    ///
157    /// The value needs to be writable so that it can be re-parent to the correct subreaper if the
158    /// parent ends before the child.
159    pub parent: Option<ThreadGroupParent>,
160
161    /// The signal this process generates on exit.
162    pub exit_signal: Option<Signal>,
163
164    /// The tasks in the thread group.
165    ///
166    /// The references to Task is weak to prevent cycles as Task have a Arc reference to their
167    /// thread group.
168    /// It is still expected that these weak references are always valid, as tasks must unregister
169    /// themselves before they are deleted.
170    tasks: BTreeMap<tid_t, TaskContainer>,
171
172    /// The children of this thread group.
173    ///
174    /// The references to ThreadGroup is weak to prevent cycles as ThreadGroup have a Arc reference
175    /// to their parent.
176    /// It is still expected that these weak references are always valid, as thread groups must unregister
177    /// themselves before they are deleted.
178    pub children: BTreeMap<pid_t, Weak<ThreadGroup>>,
179
180    /// Child tasks that have exited, but not yet been waited for.
181    pub zombie_children: Vec<OwnedRef<ZombieProcess>>,
182
183    /// ptracees of this process that have exited, but not yet been waited for.
184    pub zombie_ptracees: ZombiePtracees,
185
186    /// Child processes that have exited, but the zombie ptrace needs to be consumed
187    /// before they can be waited for.
188    pub deferred_zombie_ptracers: Vec<DeferredZombiePTracer>,
189
190    /// Unified [WaitQueue] for all waited ThreadGroup events.
191    pub lifecycle_waiters: TypedWaitQueue<ThreadGroupLifecycleWaitValue>,
192
193    /// Whether this thread group will inherit from children of dying processes in its descendant
194    /// tree.
195    pub is_child_subreaper: bool,
196
197    /// The IDs used to perform shell job control.
198    pub process_group: Arc<ProcessGroup>,
199
200    pub did_exec: bool,
201
202    /// A signal that indicates whether the process is going to become waitable
203    /// via waitid and waitpid for either WSTOPPED or WCONTINUED, depending on
204    /// the value of `stopped`. If not None, contains the SignalInfo to return.
205    pub last_signal: Option<SignalInfo>,
206
207    /// Whether the thread_group is terminating or not, and if it is, the exit info of the thread
208    /// group.
209    run_state: ThreadGroupRunState,
210
211    /// Time statistics accumulated from the children.
212    pub children_time_stats: TaskTimeStats,
213
214    /// Personality flags set with `sys_personality()`.
215    pub personality: PersonalityFlags,
216
217    /// Thread groups allowed to trace tasks in this this thread group.
218    pub allowed_ptracers: PtraceAllowedPtracers,
219
220    /// Channel to message when this thread group exits.
221    exit_notifier: Option<futures::channel::oneshot::Sender<()>>,
222
223    /// Notifier for name changes.
224    pub notifier: Option<std::sync::mpsc::Sender<MemoryAttributionLifecycleEvent>>,
225}
226
227/// A collection of `Task` objects that roughly correspond to a "process".
228///
229/// Userspace programmers often think about "threads" and "process", but those concepts have no
230/// clear analogs inside the kernel because tasks are typically created using `clone(2)`, which
231/// takes a complex set of flags that describes how much state is shared between the original task
232/// and the new task.
233///
234/// If a new task is created with the `CLONE_THREAD` flag, the new task will be placed in the same
235/// `ThreadGroup` as the original task. Userspace typically uses this flag in conjunction with the
236/// `CLONE_FILES`, `CLONE_VM`, and `CLONE_FS`, which corresponds to the userspace notion of a
237/// "thread". For example, that's how `pthread_create` behaves. In that sense, a `ThreadGroup`
238/// normally corresponds to the set of "threads" in a "process". However, this pattern is purely a
239/// userspace convention, and nothing stops userspace from using `CLONE_THREAD` without
240/// `CLONE_FILES`, for example.
241///
242/// In Starnix, a `ThreadGroup` corresponds to a Zircon process, which means we do not support the
243/// `CLONE_THREAD` flag without the `CLONE_VM` flag. If we run into problems with this limitation,
244/// we might need to revise this correspondence.
245///
246/// Each `Task` in a `ThreadGroup` has the same thread group ID (`tgid`). The task with the same
247/// `pid` as the `tgid` is called the thread group leader.
248///
249/// Thread groups are destroyed when the last task in the group exits.
250pub struct ThreadGroup {
251    /// Weak reference to the `OwnedRef` of this `ThreadGroup`. This allows to retrieve the
252    /// `TempRef` from a raw `ThreadGroup`.
253    pub weak_self: Weak<ThreadGroup>,
254
255    /// The kernel to which this thread group belongs.
256    pub kernel: Arc<Kernel>,
257
258    /// A handle to the underlying Zircon process object.
259    ///
260    /// Currently, we have a 1-to-1 mapping between thread groups and zx::process
261    /// objects. This approach might break down if/when we implement CLONE_VM
262    /// without CLONE_THREAD because that creates a situation where two thread
263    /// groups share an address space. To implement that situation, we might
264    /// need to break the 1-to-1 mapping between thread groups and zx::process
265    /// or teach zx::process to share address spaces.
266    pub process: ZirconProcess,
267
268    /// A handle to the restricted address space for the Zircon process object.
269    pub root_vmar: zx::Vmar,
270
271    /// The lead task of this thread group.
272    ///
273    /// The lead task is typically the initial thread created in the thread group.
274    pub leader: pid_t,
275
276    // TODO(https://fxbug.dev/508746892): Remove this once the `PidTable` lock is removed.
277    /// Cached weak reference to the leader task.
278    ///
279    /// This is used to break a deadlock in signal delivery, where a reference to the leader task
280    /// must be obtained in order to do access checks in situations where the leader has exited and
281    /// is no longer in the task list.
282    pub leader_task: OnceLock<Weak<Task>>,
283
284    /// The signal actions that are registered for this process.
285    pub signal_actions: Arc<SignalActions>,
286
287    /// The timers for this thread group (from timer_create(), etc.).
288    pub timers: TimerTable,
289
290    /// A mechanism to be notified when this `ThreadGroup` is destroyed.
291    pub drop_notifier: DropNotifier,
292
293    /// Whether the process is currently stopped.
294    ///
295    /// Must only be set when the `mutable_state` write lock is held.
296    stop_state: AtomicStopState,
297
298    /// The mutable state of the ThreadGroup.
299    mutable_state: RwLock<ThreadGroupMutableState>,
300
301    /// The resource limits for this thread group.  This is outside mutable_state
302    /// to avoid deadlocks where the thread_group lock is held when acquiring
303    /// the task lock, and vice versa.
304    pub limits: OrderedMutex<ResourceLimits, ThreadGroupLimits>,
305
306    /// The next unique identifier for a seccomp filter.  These are required to be
307    /// able to distinguish identical seccomp filters, which are treated differently
308    /// for the purposes of SECCOMP_FILTER_FLAG_TSYNC.  Inherited across clone because
309    /// seccomp filters are also inherited across clone.
310    pub next_seccomp_filter_id: AtomicCounter<u64>,
311
312    /// Tasks ptraced by this process
313    pub ptracees: Mutex<BTreeMap<tid_t, TaskContainer>>,
314
315    /// The signals that are currently pending for this thread group.
316    pub pending_signals: Mutex<QueuedSignals>,
317
318    /// Whether or not there are any pending signals available for tasks in this thread group.
319    /// Used to avoid having to acquire the signal state lock in hot paths.
320    pub has_pending_signals: AtomicBool,
321
322    /// The monotonic time at which the thread group started.
323    pub start_time: zx::MonotonicInstant,
324
325    /// Whether to log syscalls at INFO level for this thread group.
326    log_syscalls_as_info: AtomicBool,
327}
328
329impl fmt::Debug for ThreadGroup {
330    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
331        write!(
332            f,
333            "{}({})",
334            self.process.get_name().unwrap_or(zx::Name::new_lossy("<unknown>")),
335            self.leader
336        )
337    }
338}
339
340impl ThreadGroup {
341    pub fn sync_syscall_log_level(&self) {
342        let command = self.read().leader_command();
343        let filters = self.kernel.syscall_log_filters.lock();
344        let should_log = filters.iter().any(|f| f.matches(&command));
345        let prev_should_log = self.log_syscalls_as_info.swap(should_log, Ordering::Relaxed);
346        let change_str = match (should_log, prev_should_log) {
347            (true, false) => Some("Enabled"),
348            (false, true) => Some("Disabled"),
349            _ => None,
350        };
351        if let Some(change_str) = change_str {
352            log_info!(
353                "{change_str} info syscall logs for thread group {} (command: {command})",
354                self.leader
355            );
356        }
357    }
358
359    #[inline]
360    pub fn syscall_log_level(&self) -> starnix_logging::Level {
361        if self.log_syscalls_as_info.load(Ordering::Relaxed) {
362            starnix_logging::Level::Info
363        } else {
364            starnix_logging::Level::Trace
365        }
366    }
367}
368
369impl PartialEq for ThreadGroup {
370    fn eq(&self, other: &Self) -> bool {
371        self.leader == other.leader
372    }
373}
374
375impl Drop for ThreadGroup {
376    fn drop(&mut self) {
377        let state = self.mutable_state.get_mut();
378        assert!(state.tasks.is_empty());
379        assert!(state.children.is_empty());
380        assert!(state.zombie_children.is_empty());
381        assert!(state.zombie_ptracees.is_empty());
382        #[cfg(any(test, debug_assertions))]
383        assert!(
384            state
385                .parent
386                .as_ref()
387                .and_then(|p| p.0.upgrade().as_ref().map(|p| p
388                    .read()
389                    .children
390                    .get(&self.leader)
391                    .is_none()))
392                .unwrap_or(true)
393        );
394    }
395}
396
397/// A wrapper around a `Weak<ThreadGroup>` that expects the underlying `Weak` to always be
398/// valid. The wrapper will check this at runtime during creation and upgrade.
399pub struct ThreadGroupParent(Weak<ThreadGroup>);
400
401impl ThreadGroupParent {
402    pub fn new(t: Weak<ThreadGroup>) -> Self {
403        debug_assert!(t.upgrade().is_some());
404        Self(t)
405    }
406
407    pub fn upgrade(&self) -> Arc<ThreadGroup> {
408        self.0.upgrade().expect("ThreadGroupParent references must always be valid")
409    }
410}
411
412impl Clone for ThreadGroupParent {
413    fn clone(&self) -> Self {
414        Self(self.0.clone())
415    }
416}
417
418/// A selector that can match a process. Works as a representation of the pid argument to syscalls
419/// like wait and kill.
420#[derive(Debug, Clone)]
421pub enum ProcessSelector {
422    /// Matches any process at all.
423    Any,
424    /// Matches only the process with the specified pid
425    Pid(pid_t),
426    /// Matches all the processes in the given process group
427    Pgid(pid_t),
428    /// Match the thread group with the given key
429    Process(ThreadGroupKey),
430}
431
432impl ProcessSelector {
433    pub fn match_tid(&self, tid: tid_t, pid_table: &PidTable) -> bool {
434        match *self {
435            ProcessSelector::Pid(p) => {
436                if p == tid {
437                    true
438                } else {
439                    if let Ok(task_ref) = pid_table.get_task(tid) {
440                        task_ref.get_pid() == p
441                    } else {
442                        false
443                    }
444                }
445            }
446            ProcessSelector::Any => true,
447            ProcessSelector::Pgid(pgid) => {
448                if let Ok(task_ref) = pid_table.get_task(tid) {
449                    pid_table.get_process_group(pgid).as_ref()
450                        == Some(&task_ref.thread_group().read().process_group)
451                } else {
452                    false
453                }
454            }
455            ProcessSelector::Process(ref key) => {
456                if let Some(tg) = key.upgrade() {
457                    tg.read().tasks.contains_key(&tid)
458                } else {
459                    false
460                }
461            }
462        }
463    }
464}
465
466#[derive(Clone, Debug, PartialEq, Eq)]
467pub struct ProcessExitInfo {
468    pub status: ExitStatus,
469    pub exit_signal: Option<Signal>,
470}
471
472#[derive(Clone, Debug, Default, PartialEq, Eq)]
473enum ThreadGroupRunState {
474    #[default]
475    Running,
476    Terminating(ExitStatus),
477}
478
479#[derive(Clone, Debug, PartialEq, Eq)]
480pub struct WaitResult {
481    pub pid: pid_t,
482    pub uid: uid_t,
483
484    pub exit_info: ProcessExitInfo,
485
486    /// Cumulative time stats for the process and its children.
487    pub time_stats: TaskTimeStats,
488}
489
490impl WaitResult {
491    // According to wait(2) man page, SignalInfo.signal needs to always be set to SIGCHLD
492    pub fn as_signal_info(&self) -> SignalInfo {
493        SignalInfo::with_detail(
494            SIGCHLD,
495            self.exit_info.status.signal_info_code(),
496            SignalDetail::SIGCHLD {
497                pid: self.pid,
498                uid: self.uid,
499                status: self.exit_info.status.signal_info_status(),
500            },
501        )
502    }
503}
504
505#[derive(Debug)]
506pub struct ZombieProcess {
507    pub thread_group_key: ThreadGroupKey,
508    pub pgid: pid_t,
509    pub uid: uid_t,
510
511    pub exit_info: ProcessExitInfo,
512
513    /// Cumulative time stats for the process and its children.
514    pub time_stats: TaskTimeStats,
515
516    /// Whether dropping this ZombieProcess should imply removing the pid from
517    /// the PidTable
518    pub is_canonical: bool,
519}
520
521impl PartialEq for ZombieProcess {
522    fn eq(&self, other: &Self) -> bool {
523        // We assume only one set of ZombieProcess data per process, so this should cover it.
524        self.thread_group_key == other.thread_group_key
525            && self.pgid == other.pgid
526            && self.uid == other.uid
527            && self.is_canonical == other.is_canonical
528    }
529}
530
531impl Eq for ZombieProcess {}
532
533impl PartialOrd for ZombieProcess {
534    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
535        Some(self.cmp(other))
536    }
537}
538
539impl Ord for ZombieProcess {
540    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
541        self.thread_group_key.cmp(&other.thread_group_key)
542    }
543}
544
545impl ZombieProcess {
546    pub fn new(
547        thread_group: ThreadGroupStateRef<'_>,
548        credentials: &Credentials,
549        exit_info: ProcessExitInfo,
550    ) -> OwnedRef<Self> {
551        let time_stats = thread_group.base.time_stats() + thread_group.children_time_stats;
552        OwnedRef::new(ZombieProcess {
553            thread_group_key: thread_group.base.into(),
554            pgid: thread_group.process_group.leader,
555            uid: credentials.uid,
556            exit_info,
557            time_stats,
558            is_canonical: true,
559        })
560    }
561
562    pub fn pid(&self) -> pid_t {
563        self.thread_group_key.pid()
564    }
565
566    pub fn to_wait_result(&self) -> WaitResult {
567        WaitResult {
568            pid: self.pid(),
569            uid: self.uid,
570            exit_info: self.exit_info.clone(),
571            time_stats: self.time_stats,
572        }
573    }
574
575    pub fn as_artificial(&self) -> Self {
576        ZombieProcess {
577            thread_group_key: self.thread_group_key.clone(),
578            pgid: self.pgid,
579            uid: self.uid,
580            exit_info: self.exit_info.clone(),
581            time_stats: self.time_stats,
582            is_canonical: false,
583        }
584    }
585
586    pub fn matches_selector(&self, selector: &ProcessSelector) -> bool {
587        match *selector {
588            ProcessSelector::Any => true,
589            ProcessSelector::Pid(pid) => self.pid() == pid,
590            ProcessSelector::Pgid(pgid) => self.pgid == pgid,
591            ProcessSelector::Process(ref key) => self.thread_group_key == *key,
592        }
593    }
594
595    pub fn matches_selector_and_waiting_option(
596        &self,
597        selector: &ProcessSelector,
598        options: &WaitingOptions,
599    ) -> bool {
600        if !self.matches_selector(selector) {
601            return false;
602        }
603
604        if options.wait_for_all {
605            true
606        } else {
607            // A "clone" zombie is one which has delivered no signal, or a
608            // signal other than SIGCHLD to its parent upon termination.
609            options.wait_for_clone == (self.exit_info.exit_signal != Some(SIGCHLD))
610        }
611    }
612}
613
614impl Releasable for ZombieProcess {
615    type Context<'a> = &'a mut PidTable;
616
617    fn release<'a>(self, pids: &'a mut PidTable) {
618        if self.is_canonical {
619            pids.remove_zombie(self.pid());
620        }
621    }
622}
623
624impl ThreadGroup {
625    /// Creates a ThreadGroup for a regular userspace process.
626    pub fn new<L>(
627        locked: &mut Locked<L>,
628        kernel: Arc<Kernel>,
629        process: zx::Process,
630        root_vmar: zx::Vmar,
631        parent: Option<ThreadGroupWriteGuard<'_>>,
632        leader: pid_t,
633        exit_signal: Option<Signal>,
634        process_group: Arc<ProcessGroup>,
635        signal_actions: Arc<SignalActions>,
636    ) -> Arc<ThreadGroup>
637    where
638        L: LockBefore<ProcessGroupState>,
639    {
640        debug_assert!(!process.is_invalid());
641        debug_assert!(!root_vmar.is_invalid());
642        Self::new_internal(
643            locked,
644            kernel,
645            process,
646            root_vmar,
647            parent,
648            leader,
649            exit_signal,
650            process_group,
651            signal_actions,
652        )
653    }
654
655    /// Creates a ThreadGroup for a kernel system task (e.g., kthreadd).
656    pub fn for_system<L>(
657        locked: &mut Locked<L>,
658        kernel: Arc<Kernel>,
659        leader: pid_t,
660        process_group: Arc<ProcessGroup>,
661    ) -> Arc<ThreadGroup>
662    where
663        L: LockBefore<ProcessGroupState>,
664    {
665        Self::new_internal(
666            locked,
667            kernel,
668            zx::Process::invalid(),
669            zx::Vmar::invalid(),
670            None,
671            leader,
672            Some(SIGCHLD),
673            process_group,
674            SignalActions::default(),
675        )
676    }
677
678    /// Creates a ThreadGroup suitable for use in tests.
679    ///
680    /// This function performs the minimal setup necessary to produce a valid `ThreadGroup`
681    /// instance. It uses an invalid handle for the root VMAR, sets no parent, and uses
682    /// default signal actions with `SIGCHLD` as the exit signal.
683    ///
684    /// This should only be used in tests where a full process environment is not required.
685    pub fn for_test<L>(
686        locked: &mut Locked<L>,
687        kernel: Arc<Kernel>,
688        process: zx::Process,
689        parent: ThreadGroupWriteGuard<'_>,
690        leader: pid_t,
691        process_group: Arc<ProcessGroup>,
692    ) -> Arc<ThreadGroup>
693    where
694        L: LockBefore<ProcessGroupState>,
695    {
696        Self::new_internal(
697            locked,
698            kernel,
699            process,
700            zx::Vmar::invalid(),
701            Some(parent),
702            leader,
703            Some(SIGCHLD),
704            process_group,
705            SignalActions::default(),
706        )
707    }
708
709    fn new_internal<L>(
710        locked: &mut Locked<L>,
711        kernel: Arc<Kernel>,
712        process: zx::Process,
713        root_vmar: zx::Vmar,
714        parent: Option<ThreadGroupWriteGuard<'_>>,
715        leader: pid_t,
716        exit_signal: Option<Signal>,
717        process_group: Arc<ProcessGroup>,
718        signal_actions: Arc<SignalActions>,
719    ) -> Arc<ThreadGroup>
720    where
721        L: LockBefore<ProcessGroupState>,
722    {
723        Arc::new_cyclic(|weak_self| {
724            let process = ZirconProcess::new(process);
725            let mut thread_group = ThreadGroup {
726                weak_self: weak_self.clone(),
727                kernel,
728                process,
729                root_vmar,
730                leader,
731                leader_task: OnceLock::new(),
732                signal_actions,
733                timers: Default::default(),
734                drop_notifier: Default::default(),
735                // A child process created via fork(2) inherits its parent's
736                // resource limits.  Resource limits are preserved across execve(2).
737                limits: OrderedMutex::new(
738                    parent
739                        .as_ref()
740                        .map(|p| p.base.limits.lock(locked.cast_locked()).clone())
741                        .unwrap_or(Default::default()),
742                ),
743                next_seccomp_filter_id: Default::default(),
744                ptracees: Default::default(),
745                stop_state: AtomicStopState::new(StopState::Awake),
746                pending_signals: Default::default(),
747                has_pending_signals: Default::default(),
748                start_time: zx::MonotonicInstant::get(),
749                mutable_state: RwLock::new(ThreadGroupMutableState {
750                    parent: parent
751                        .as_ref()
752                        .map(|p| ThreadGroupParent::new(p.base.weak_self.clone())),
753                    exit_signal,
754                    tasks: BTreeMap::new(),
755                    children: BTreeMap::new(),
756                    zombie_children: vec![],
757                    zombie_ptracees: ZombiePtracees::new(),
758                    deferred_zombie_ptracers: vec![],
759                    lifecycle_waiters: TypedWaitQueue::<ThreadGroupLifecycleWaitValue>::default(),
760                    is_child_subreaper: false,
761                    process_group: Arc::clone(&process_group),
762                    did_exec: false,
763                    last_signal: None,
764                    run_state: Default::default(),
765                    children_time_stats: Default::default(),
766                    personality: parent
767                        .as_ref()
768                        .map(|p| p.personality)
769                        .unwrap_or(Default::default()),
770                    allowed_ptracers: PtraceAllowedPtracers::None,
771                    exit_notifier: None,
772                    notifier: None,
773                }),
774                log_syscalls_as_info: AtomicBool::new(false),
775            };
776
777            if let Some(mut parent) = parent {
778                thread_group.next_seccomp_filter_id.reset(parent.base.next_seccomp_filter_id.get());
779                parent.children.insert(leader, weak_self.clone());
780                process_group.insert(locked, &thread_group);
781            };
782            thread_group
783        })
784    }
785
786    state_accessor!(ThreadGroup, mutable_state);
787
788    pub fn load_stopped(&self) -> StopState {
789        self.stop_state.load(Ordering::Relaxed)
790    }
791
792    // Causes the thread group to exit.  If this is being called from a task
793    // that is part of the current thread group, the caller should pass
794    // `current_task`.  If ownership issues prevent passing `current_task`, then
795    // callers should use CurrentTask::thread_group_exit instead.
796    pub fn exit(
797        &self,
798        locked: &mut Locked<Unlocked>,
799        exit_status: ExitStatus,
800        mut current_task: Option<&mut CurrentTask>,
801    ) {
802        if let Some(ref mut current_task) = current_task {
803            current_task.ptrace_event(
804                locked,
805                PtraceOptions::TRACEEXIT,
806                exit_status.signal_info_status() as u64,
807            );
808        }
809        let mut pids = self.kernel.pids.write();
810        let mut state = self.write();
811        if state.is_terminating() {
812            // The thread group is already terminating and all threads in the thread group have
813            // already been interrupted.
814            return;
815        }
816
817        state.run_state = ThreadGroupRunState::Terminating(exit_status.clone());
818
819        // Drop ptrace zombies
820        state.zombie_ptracees.release(&mut pids);
821
822        // Interrupt each task. Unlock the group because send_signal will lock the group in order
823        // to call set_stopped.
824        let tasks = state.tasks();
825        drop(state);
826
827        // Detach from any ptraced tasks, killing the ones that set PTRACE_O_EXITKILL.
828        let tracees = self.ptracees.lock().keys().cloned().collect::<Vec<_>>();
829        for tracee in tracees {
830            if let Ok(task_ref) = pids.get_task(tracee) {
831                let mut should_send_sigkill = false;
832                if let Some(ptrace) = &task_ref.read().ptrace {
833                    should_send_sigkill = ptrace.has_option(PtraceOptions::EXITKILL);
834                }
835                if should_send_sigkill {
836                    send_standard_signal(locked, task_ref.as_ref(), SignalInfo::kernel(SIGKILL));
837                    continue;
838                }
839
840                let _ =
841                    ptrace_detach(locked, &mut pids, self, task_ref.as_ref(), &UserAddress::NULL);
842            }
843        }
844
845        for task in tasks {
846            task.write().set_exit_status(exit_status.clone());
847            send_standard_signal(locked, &task, SignalInfo::kernel(SIGKILL));
848        }
849    }
850
851    pub fn add(&self, task: Arc<Task>) -> Result<(), Errno> {
852        let mut state = self.write();
853        if state.is_terminating() {
854            if state.tasks_count() == 0 {
855                log_warn!(
856                    "Task {} with leader {} terminating while adding its first task, \
857                not sending creation notification",
858                    task.tid,
859                    self.leader
860                );
861            }
862            return error!(EINVAL);
863        }
864        if task.tid == self.leader {
865            let _ = self.leader_task.set(Arc::downgrade(&task));
866        }
867        state.tasks.insert(task.tid, (&task).into());
868
869        Ok(())
870    }
871
872    /// Remove the task from the children of this ThreadGroup.
873    ///
874    /// It is important that the task is taken as an `Arc`. It ensures the tasks of the
875    /// ThreadGroup are always valid as they are still valid when removed.
876    pub fn remove<L>(&self, locked: &mut Locked<L>, pids: &mut PidTable, task: &Arc<Task>)
877    where
878        L: LockBefore<ProcessGroupState>,
879    {
880        task.set_ptrace_zombie(pids);
881        pids.remove_task(task.tid);
882
883        let mut state = self.write();
884
885        let persistent_info: TaskPersistentInfo =
886            if let Some(container) = state.tasks.remove(&task.tid) {
887                container.into()
888            } else {
889                // The task has never been added. The only expected case is that this thread was
890                // already terminating.
891                debug_assert!(state.is_terminating());
892                return;
893            };
894
895        if state.tasks.is_empty() {
896            let exit_status =
897                if let ThreadGroupRunState::Terminating(exit_status) = &state.run_state {
898                    exit_status.clone()
899                } else {
900                    let exit_status = task.exit_status().unwrap_or_else(|| {
901                        log_error!("Exiting without an exit code.");
902                        ExitStatus::Exit(u8::MAX)
903                    });
904                    state.run_state = ThreadGroupRunState::Terminating(exit_status.clone());
905                    exit_status
906                };
907
908            // Replace PID table entry with a zombie.
909            let exit_info =
910                ProcessExitInfo { status: exit_status, exit_signal: state.exit_signal.clone() };
911            let zombie =
912                ZombieProcess::new(state.as_ref(), &persistent_info.real_creds(), exit_info);
913            pids.kill_process(self.leader, OwnedRef::downgrade(&zombie));
914
915            state.leave_process_group(locked, pids);
916
917            // I have no idea if dropping the lock here is correct, and I don't want to think about
918            // it. If problems do turn up with another thread observing an intermediate state of
919            // this exit operation, the solution is to unify locks. It should be sensible and
920            // possible for there to be a single lock that protects all (or nearly all) of the
921            // data accessed by both exit and wait. In gvisor and linux this is the lock on the
922            // equivalent of the PidTable. This is made more difficult by rust locks being
923            // containers that only lock the data they contain, but see
924            // https://docs.google.com/document/d/1YHrhBqNhU1WcrsYgGAu3JwwlVmFXPlwWHTJLAbwRebY/edit
925            // for an idea.
926            std::mem::drop(state);
927
928            // Remove the process from the cgroup2 pid table after TG lock is dropped.
929            // This function will hold the CgroupState lock which should be before the TG lock. See
930            // more in lock_cgroup2_pid_table comments.
931            self.kernel.cgroups.lock_cgroup2_pid_table().remove_process(self.into());
932
933            // We will need the immediate parent and the reaper. Once we have them, we can make
934            // sure to take the locks in the right order: parent before child.
935            let parent = self.read().parent.clone();
936            let reaper = self.find_reaper();
937
938            {
939                // Reparent the children.
940                if let Some(reaper) = reaper {
941                    let reaper = reaper.upgrade();
942                    {
943                        let mut reaper_state = reaper.write();
944                        let mut state = self.write();
945                        for (_pid, weak_child) in std::mem::take(&mut state.children) {
946                            if let Some(child) = weak_child.upgrade() {
947                                let mut child_state = child.write();
948
949                                child_state.exit_signal = Some(SIGCHLD);
950                                child_state.parent =
951                                    Some(ThreadGroupParent::new(Arc::downgrade(&reaper)));
952                                reaper_state.children.insert(child.leader, weak_child.clone());
953                            }
954                        }
955                        reaper_state.zombie_children.append(&mut state.zombie_children);
956                    }
957                    ZombiePtracees::reparent(self, &reaper);
958                } else {
959                    // If we don't have a reaper then just drop the zombies.
960                    let mut state = self.write();
961                    for zombie in state.zombie_children.drain(..) {
962                        zombie.release(pids);
963                    }
964                    state.zombie_ptracees.release(pids);
965                }
966            }
967
968            // Clear the `parent` reference now that children have been re-`parent`ed.
969            self.write().parent = None;
970
971            #[cfg(any(test, debug_assertions))]
972            {
973                let state = self.read();
974                assert!(state.zombie_children.is_empty());
975                assert!(state.zombie_ptracees.is_empty());
976            }
977
978            if let Some(ref parent) = parent {
979                let parent = parent.upgrade();
980                let mut tracer_pid = None;
981                if let Some(ptrace) = &task.read().ptrace {
982                    tracer_pid = Some(ptrace.get_pid());
983                }
984
985                let maybe_zombie = 'compute_zombie: {
986                    if let Some(tracer_pid) = tracer_pid {
987                        if let Ok(ref tracer) = pids.get_task(tracer_pid) {
988                            break 'compute_zombie tracer
989                                .thread_group()
990                                .maybe_notify_tracer(task, pids, &parent, zombie);
991                        }
992                    }
993                    Some(zombie)
994                };
995                if let Some(zombie) = maybe_zombie {
996                    parent.do_zombie_notifications(zombie);
997                }
998            } else {
999                zombie.release(pids);
1000            }
1001
1002            // TODO: Set the error_code on the Zircon process object. Currently missing a way
1003            // to do this in Zircon. Might be easier in the new execution model.
1004
1005            // Once the last zircon thread stops, the zircon process will also stop executing.
1006
1007            if let Some(parent) = parent {
1008                let parent = parent.upgrade();
1009                parent.check_orphans(locked, pids);
1010            }
1011        }
1012    }
1013
1014    pub fn do_zombie_notifications(&self, zombie: OwnedRef<ZombieProcess>) {
1015        let mut state = self.write();
1016
1017        state.children.remove(&zombie.pid());
1018        state
1019            .deferred_zombie_ptracers
1020            .retain(|dzp| dzp.tracee_thread_group_key != zombie.thread_group_key);
1021
1022        let exit_signal = zombie.exit_info.exit_signal;
1023        let mut signal_info = zombie.to_wait_result().as_signal_info();
1024
1025        state.zombie_children.push(zombie);
1026        state.lifecycle_waiters.notify_value(ThreadGroupLifecycleWaitValue::ChildStatus);
1027
1028        // Send signals
1029        if let Some(exit_signal) = exit_signal {
1030            signal_info.signal = exit_signal;
1031            state.send_signal(signal_info);
1032        }
1033    }
1034
1035    /// Notifies the tracer if appropriate.  Returns Some(zombie) if caller
1036    /// needs to notify the parent, None otherwise.  The caller should probably
1037    /// invoke parent.do_zombie_notifications(zombie) on the result.
1038    fn maybe_notify_tracer(
1039        &self,
1040        tracee: &Task,
1041        mut pids: &mut PidTable,
1042        parent: &ThreadGroup,
1043        zombie: OwnedRef<ZombieProcess>,
1044    ) -> Option<OwnedRef<ZombieProcess>> {
1045        if self.read().zombie_ptracees.has_tracee(tracee.tid) {
1046            if self == parent {
1047                // The tracer is the parent and has not consumed the
1048                // notification.  Don't bother with the ptracee stuff, and just
1049                // notify the parent.
1050                self.write().zombie_ptracees.remove(pids, tracee.tid);
1051                return Some(zombie);
1052            } else {
1053                // The tracer is not the parent and the tracer has not consumed
1054                // the notification.
1055                {
1056                    // Tell the parent to expect a notification later.
1057                    let mut parent_state = parent.write();
1058                    parent_state
1059                        .deferred_zombie_ptracers
1060                        .push(DeferredZombiePTracer::new(self, tracee));
1061                    parent_state.children.remove(&tracee.get_pid());
1062                }
1063                // Tell the tracer that there is a notification pending.
1064                let mut state = self.write();
1065                state.zombie_ptracees.set_parent_of(tracee.tid, Some(zombie), parent);
1066                tracee.write().notify_ptracers();
1067                return None;
1068            }
1069        } else if self == parent {
1070            // The tracer is the parent and has already consumed the parent
1071            // notification.  No further action required.
1072            parent.write().children.remove(&tracee.tid);
1073            zombie.release(&mut pids);
1074            return None;
1075        }
1076        // The tracer is not the parent and has already consumed the parent
1077        // notification.  Notify the parent.
1078        Some(zombie)
1079    }
1080
1081    /// Find the task which will adopt our children after we die.
1082    fn find_reaper(&self) -> Option<ThreadGroupParent> {
1083        let mut weak_parent = self.read().parent.clone()?;
1084        loop {
1085            weak_parent = {
1086                let parent = weak_parent.upgrade();
1087                let parent_state = parent.read();
1088                if parent_state.is_child_subreaper {
1089                    break;
1090                }
1091                match parent_state.parent {
1092                    Some(ref next_parent) => next_parent.clone(),
1093                    None => break,
1094                }
1095            };
1096        }
1097        Some(weak_parent)
1098    }
1099
1100    pub fn setsid<L>(&self, locked: &mut Locked<L>) -> Result<(), Errno>
1101    where
1102        L: LockBefore<ProcessGroupState>,
1103    {
1104        let pids = self.kernel.pids.read();
1105        if pids.get_process_group(self.leader).is_some() {
1106            return error!(EPERM);
1107        }
1108        let process_group = ProcessGroup::new(self.leader, None);
1109        pids.add_process_group(process_group.clone());
1110        self.write().set_process_group(locked, process_group, &pids);
1111        self.check_orphans(locked, &pids);
1112
1113        Ok(())
1114    }
1115
1116    pub fn setpgid<L>(
1117        &self,
1118        locked: &mut Locked<L>,
1119        current_task: &CurrentTask,
1120        target: &Task,
1121        pgid: pid_t,
1122    ) -> Result<(), Errno>
1123    where
1124        L: LockBefore<ProcessGroupState>,
1125    {
1126        let pids = self.kernel.pids.read();
1127
1128        {
1129            let current_process_group = Arc::clone(&self.read().process_group);
1130
1131            // The target process must be either the current process of a child of the current process
1132            let mut target_thread_group = target.thread_group().write();
1133            let is_target_current_process_child =
1134                target_thread_group.parent.as_ref().map(|tg| tg.upgrade().leader)
1135                    == Some(self.leader);
1136            if target_thread_group.leader() != self.leader && !is_target_current_process_child {
1137                return error!(ESRCH);
1138            }
1139
1140            // If the target process is a child of the current task, it must not have executed one of the exec
1141            // function.
1142            if is_target_current_process_child && target_thread_group.did_exec {
1143                return error!(EACCES);
1144            }
1145
1146            let new_process_group;
1147            {
1148                let target_process_group = &target_thread_group.process_group;
1149
1150                // The target process must not be a session leader and must be in the same session as the current process.
1151                if target_thread_group.leader() == target_process_group.session.leader
1152                    || current_process_group.session != target_process_group.session
1153                {
1154                    return error!(EPERM);
1155                }
1156
1157                let target_pgid = if pgid == 0 { target_thread_group.leader() } else { pgid };
1158                if target_pgid < 0 {
1159                    return error!(EINVAL);
1160                }
1161
1162                if target_pgid == target_process_group.leader {
1163                    return Ok(());
1164                }
1165
1166                // If pgid is not equal to the target process id, the associated process group must exist
1167                // and be in the same session as the target process.
1168                if target_pgid != target_thread_group.leader() {
1169                    new_process_group =
1170                        pids.get_process_group(target_pgid).ok_or_else(|| errno!(EPERM))?;
1171                    if new_process_group.session != target_process_group.session {
1172                        return error!(EPERM);
1173                    }
1174                    security::check_setpgid_access(current_task, target)?;
1175                } else {
1176                    security::check_setpgid_access(current_task, target)?;
1177                    // Create a new process group
1178                    new_process_group =
1179                        ProcessGroup::new(target_pgid, Some(target_process_group.session.clone()));
1180                    pids.add_process_group(new_process_group.clone());
1181                }
1182            }
1183
1184            target_thread_group.set_process_group(locked, new_process_group, &pids);
1185        }
1186
1187        target.thread_group().check_orphans(locked, &pids);
1188
1189        Ok(())
1190    }
1191
1192    fn itimer_real(&self) -> IntervalTimerHandle {
1193        self.timers.itimer_real()
1194    }
1195
1196    pub fn set_itimer(
1197        &self,
1198        current_task: &CurrentTask,
1199        which: u32,
1200        value: itimerval,
1201    ) -> Result<itimerval, Errno> {
1202        if which == ITIMER_PROF || which == ITIMER_VIRTUAL {
1203            // We don't support setting these timers.
1204            // The gvisor test suite clears ITIMER_PROF as part of its test setup logic, so we support
1205            // clearing these values.
1206            if value.it_value.tv_sec == 0 && value.it_value.tv_usec == 0 {
1207                return Ok(itimerval::default());
1208            }
1209            track_stub!(TODO("https://fxbug.dev/322874521"), "Unsupported itimer type", which);
1210            return error!(ENOTSUP);
1211        }
1212
1213        if which != ITIMER_REAL {
1214            return error!(EINVAL);
1215        }
1216        let itimer_real = self.itimer_real();
1217        let prev_remaining = itimer_real.time_remaining();
1218        if value.it_value.tv_sec != 0 || value.it_value.tv_usec != 0 {
1219            itimer_real.arm(current_task, itimerspec_from_itimerval(value), false)?;
1220        } else {
1221            itimer_real.disarm(current_task)?;
1222        }
1223        Ok(itimerval {
1224            it_value: timeval_from_duration(prev_remaining.remainder),
1225            it_interval: timeval_from_duration(prev_remaining.interval),
1226        })
1227    }
1228
1229    pub fn get_itimer(&self, which: u32) -> Result<itimerval, Errno> {
1230        if which == ITIMER_PROF || which == ITIMER_VIRTUAL {
1231            // We don't support setting these timers, so we can accurately report that these are not set.
1232            return Ok(itimerval::default());
1233        }
1234        if which != ITIMER_REAL {
1235            return error!(EINVAL);
1236        }
1237        let remaining = self.itimer_real().time_remaining();
1238        Ok(itimerval {
1239            it_value: timeval_from_duration(remaining.remainder),
1240            it_interval: timeval_from_duration(remaining.interval),
1241        })
1242    }
1243
1244    /// Check whether the stop state is compatible with `new_stopped`. If it is return it,
1245    /// otherwise, return None.
1246    fn check_stopped_state(
1247        &self,
1248        new_stopped: StopState,
1249        finalize_only: bool,
1250    ) -> Option<StopState> {
1251        let stopped = self.load_stopped();
1252        if finalize_only && !stopped.is_stopping_or_stopped() {
1253            return Some(stopped);
1254        }
1255
1256        if stopped.is_illegal_transition(new_stopped) {
1257            return Some(stopped);
1258        }
1259
1260        return None;
1261    }
1262
1263    /// Set the stop status of the process.  If you pass |siginfo| of |None|,
1264    /// does not update the signal.  If |finalize_only| is set, will check that
1265    /// the set will be a finalize (Stopping -> Stopped or Stopped -> Stopped)
1266    /// before executing it.
1267    ///
1268    /// Returns the latest stop state after any changes.
1269    pub fn set_stopped(
1270        &self,
1271        new_stopped: StopState,
1272        siginfo: Option<SignalInfo>,
1273        finalize_only: bool,
1274    ) -> StopState {
1275        // Perform an early return check to see if we can avoid taking the lock.
1276        if let Some(stopped) = self.check_stopped_state(new_stopped, finalize_only) {
1277            return stopped;
1278        }
1279
1280        self.write().set_stopped(new_stopped, siginfo, finalize_only)
1281    }
1282
1283    /// Ensures |session| is the controlling session inside of |terminal_controller|, and returns a
1284    /// reference to the |TerminalController|.
1285    fn check_terminal_controller(
1286        session: &Arc<Session>,
1287        terminal_controller: &Option<TerminalController>,
1288    ) -> Result<(), Errno> {
1289        if let Some(terminal_controller) = terminal_controller {
1290            if let Some(terminal_session) = terminal_controller.session.upgrade() {
1291                if Arc::ptr_eq(session, &terminal_session) {
1292                    return Ok(());
1293                }
1294            }
1295        }
1296        error!(ENOTTY)
1297    }
1298
1299    pub fn get_foreground_process_group(&self, terminal: &Terminal) -> Result<pid_t, Errno> {
1300        let state = self.read();
1301        let process_group = &state.process_group;
1302        let terminal_state = terminal.read();
1303
1304        // "When fd does not refer to the controlling terminal of the calling
1305        // process, -1 is returned" - tcgetpgrp(3)
1306        Self::check_terminal_controller(&process_group.session, &terminal_state.controller)?;
1307        let pid = process_group.session.read().get_foreground_process_group_leader();
1308        Ok(pid)
1309    }
1310
1311    pub fn set_foreground_process_group<L>(
1312        &self,
1313        locked: &mut Locked<L>,
1314        current_task: &CurrentTask,
1315        terminal: &Terminal,
1316        pgid: pid_t,
1317    ) -> Result<(), Errno>
1318    where
1319        L: LockBefore<ProcessGroupState>,
1320    {
1321        let process_group;
1322        let send_ttou;
1323        {
1324            // Keep locks to ensure atomicity.
1325            let pids = self.kernel.pids.read();
1326            let state = self.read();
1327            process_group = Arc::clone(&state.process_group);
1328            let terminal_state = terminal.read();
1329            Self::check_terminal_controller(&process_group.session, &terminal_state.controller)?;
1330
1331            // pgid must be positive.
1332            if pgid < 0 {
1333                return error!(EINVAL);
1334            }
1335
1336            let new_process_group = pids.get_process_group(pgid).ok_or_else(|| errno!(ESRCH))?;
1337            if new_process_group.session != process_group.session {
1338                return error!(EPERM);
1339            }
1340
1341            let mut session_state = process_group.session.write();
1342            // If the calling process is a member of a background group and not ignoring SIGTTOU, a
1343            // SIGTTOU signal is sent to all members of this background process group.
1344            send_ttou = process_group.leader != session_state.get_foreground_process_group_leader()
1345                && !current_task.read().signal_mask().has_signal(SIGTTOU)
1346                && self.signal_actions.get(SIGTTOU).sa_handler != SIG_IGN;
1347
1348            if !send_ttou {
1349                session_state.set_foreground_process_group(&new_process_group);
1350            }
1351        }
1352
1353        // Locks must not be held when sending signals.
1354        if send_ttou {
1355            process_group.send_signals(locked, &[SIGTTOU]);
1356            return error!(EINTR);
1357        }
1358
1359        Ok(())
1360    }
1361
1362    pub fn set_controlling_terminal(
1363        &self,
1364        current_task: &CurrentTask,
1365        terminal: &Terminal,
1366        is_main: bool,
1367        steal: bool,
1368        is_readable: bool,
1369    ) -> Result<(), Errno> {
1370        // Keep locks to ensure atomicity.
1371        let state = self.read();
1372        let process_group = &state.process_group;
1373        let mut terminal_state = terminal.write();
1374        let mut session_writer = process_group.session.write();
1375
1376        // "The calling process must be a session leader and not have a
1377        // controlling terminal already." - tty_ioctl(4)
1378        if process_group.session.leader != self.leader
1379            || session_writer.controlling_terminal.is_some()
1380        {
1381            return error!(EINVAL);
1382        }
1383
1384        let mut has_admin_capability_determined = false;
1385
1386        // "If this terminal is already the controlling terminal of a different
1387        // session group, then the ioctl fails with EPERM, unless the caller
1388        // has the CAP_SYS_ADMIN capability and arg equals 1, in which case the
1389        // terminal is stolen, and all processes that had it as controlling
1390        // terminal lose it." - tty_ioctl(4)
1391        if let Some(other_session) =
1392            terminal_state.controller.as_ref().and_then(|cs| cs.session.upgrade())
1393        {
1394            if other_session != process_group.session {
1395                if !steal {
1396                    return error!(EPERM);
1397                }
1398                security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1399                has_admin_capability_determined = true;
1400
1401                // Steal the TTY away. Unlike TIOCNOTTY, don't send signals.
1402                other_session.write().controlling_terminal = None;
1403            }
1404        }
1405
1406        if !is_readable && !has_admin_capability_determined {
1407            security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1408        }
1409
1410        session_writer.controlling_terminal = Some(ControllingTerminal::new(terminal, is_main));
1411        terminal_state.controller = TerminalController::new(&process_group.session);
1412        Ok(())
1413    }
1414
1415    pub fn release_controlling_terminal<L>(
1416        &self,
1417        locked: &mut Locked<L>,
1418        _current_task: &CurrentTask,
1419        terminal: &Terminal,
1420        is_main: bool,
1421    ) -> Result<(), Errno>
1422    where
1423        L: LockBefore<ProcessGroupState>,
1424    {
1425        let process_group;
1426        {
1427            // Keep locks to ensure atomicity.
1428            let state = self.read();
1429            process_group = Arc::clone(&state.process_group);
1430            let mut terminal_state = terminal.write();
1431            let mut session_writer = process_group.session.write();
1432
1433            // tty must be the controlling terminal.
1434            Self::check_terminal_controller(&process_group.session, &terminal_state.controller)?;
1435            if !session_writer
1436                .controlling_terminal
1437                .as_ref()
1438                .map_or(false, |ct| ct.matches(terminal, is_main))
1439            {
1440                return error!(ENOTTY);
1441            }
1442
1443            // "If the process was session leader, then send SIGHUP and SIGCONT to the foreground
1444            // process group and all processes in the current session lose their controlling terminal."
1445            // - tty_ioctl(4)
1446
1447            // Remove tty as the controlling tty for each process in the session, then
1448            // send them SIGHUP and SIGCONT.
1449
1450            session_writer.controlling_terminal = None;
1451            terminal_state.controller = None;
1452        }
1453
1454        if process_group.session.leader == self.leader {
1455            process_group.send_signals(locked, &[SIGHUP, SIGCONT]);
1456        }
1457
1458        Ok(())
1459    }
1460
1461    fn check_orphans<L>(&self, locked: &mut Locked<L>, pids: &PidTable)
1462    where
1463        L: LockBefore<ProcessGroupState>,
1464    {
1465        let mut thread_groups = self.read().children().collect::<Vec<_>>();
1466        let this = self.weak_self.upgrade().unwrap();
1467        thread_groups.push(this);
1468        let process_groups =
1469            thread_groups.iter().map(|tg| Arc::clone(&tg.read().process_group)).unique();
1470        for pg in process_groups {
1471            pg.check_orphaned(locked, pids);
1472        }
1473    }
1474
1475    pub fn get_rlimit<L>(&self, locked: &mut Locked<L>, resource: Resource) -> u64
1476    where
1477        L: LockBefore<ThreadGroupLimits>,
1478    {
1479        self.limits.lock(locked).get(resource).rlim_cur
1480    }
1481
1482    /// Adjusts the rlimits of the ThreadGroup to which `target_task` belongs to.
1483    pub fn adjust_rlimits<L>(
1484        locked: &mut Locked<L>,
1485        current_task: &CurrentTask,
1486        target_task: &Task,
1487        resource: Resource,
1488        maybe_new_limit: Option<rlimit>,
1489    ) -> Result<rlimit, Errno>
1490    where
1491        L: LockBefore<ThreadGroupLimits>,
1492    {
1493        let thread_group = target_task.thread_group();
1494        let can_increase_rlimit = security::is_task_capable_noaudit(current_task, CAP_SYS_RESOURCE);
1495        let mut limit_state = thread_group.limits.lock(locked);
1496        let old_limit = limit_state.get(resource);
1497        if let Some(new_limit) = maybe_new_limit {
1498            if new_limit.rlim_max > old_limit.rlim_max && !can_increase_rlimit {
1499                return error!(EPERM);
1500            }
1501            security::task_setrlimit(current_task, &target_task, old_limit, new_limit)?;
1502            limit_state.set(resource, new_limit)
1503        }
1504        Ok(old_limit)
1505    }
1506
1507    pub fn time_stats(&self) -> TaskTimeStats {
1508        let process: &zx::Process = if self.process.as_handle_ref().is_invalid() {
1509            // `process` must be valid for all tasks, except `kthreads`. In that case get the
1510            // stats from starnix process.
1511            assert_eq!(
1512                self as *const ThreadGroup,
1513                Arc::as_ptr(&self.kernel.kthreads.system_thread_group())
1514            );
1515            &self.kernel.kthreads.starnix_process
1516        } else {
1517            &self.process
1518        };
1519
1520        let info =
1521            zx::Task::get_runtime_info(process).expect("Failed to get starnix process stats");
1522        TaskTimeStats {
1523            user_time: zx::MonotonicDuration::from_nanos(info.cpu_time),
1524            // TODO(https://fxbug.dev/42078242): How can we calculate system time?
1525            system_time: zx::MonotonicDuration::default(),
1526        }
1527    }
1528
1529    /// For each task traced by this thread_group that matches the given
1530    /// selector, acquire its TaskMutableState and ptracees lock and execute the
1531    /// given function.
1532    pub fn get_ptracees_and(
1533        &self,
1534        selector: &ProcessSelector,
1535        pids: &PidTable,
1536        f: &mut dyn FnMut(&Task, &TaskMutableState),
1537    ) {
1538        for tracee in self
1539            .ptracees
1540            .lock()
1541            .keys()
1542            .filter(|tracee_tid| selector.match_tid(**tracee_tid, &pids))
1543            .map(|tracee_tid| pids.get_task(*tracee_tid))
1544        {
1545            if let Ok(task_ref) = tracee {
1546                let task_state = task_ref.write();
1547                if task_state.ptrace.is_some() {
1548                    f(&task_ref, &task_state);
1549                }
1550            }
1551        }
1552    }
1553
1554    /// Returns a tracee whose state has changed, so that waitpid can report on
1555    /// it. If this returns a value, and the pid is being traced, the tracer
1556    /// thread is deemed to have seen the tracee ptrace-stop for the purposes of
1557    /// PTRACE_LISTEN.
1558    pub fn get_waitable_ptracee(
1559        &self,
1560        selector: &ProcessSelector,
1561        options: &WaitingOptions,
1562        pids: &mut PidTable,
1563    ) -> Option<WaitResult> {
1564        // This checks to see if the target is a zombie ptracee.
1565        let waitable_entry = self.write().zombie_ptracees.get_waitable_entry(selector, options);
1566        match waitable_entry {
1567            None => (),
1568            Some((zombie, None)) => return Some(zombie.to_wait_result()),
1569            Some((zombie, Some((tg, z)))) => {
1570                if let Some(tg) = tg.upgrade() {
1571                    if Arc::as_ptr(&tg) != self as *const Self {
1572                        tg.do_zombie_notifications(z);
1573                    } else {
1574                        {
1575                            let mut state = tg.write();
1576                            state.children.remove(&z.pid());
1577                            state
1578                                .deferred_zombie_ptracers
1579                                .retain(|dzp| dzp.tracee_thread_group_key != z.thread_group_key);
1580                        }
1581
1582                        z.release(pids);
1583                    };
1584                }
1585                return Some(zombie.to_wait_result());
1586            }
1587        }
1588
1589        let mut tasks = vec![];
1590
1591        // This checks to see if the target is a living ptracee
1592        self.get_ptracees_and(selector, pids, &mut |task: &Task, _| {
1593            tasks.push(task.weak_self.clone());
1594        });
1595        for task in tasks {
1596            let Some(task_ref) = task.upgrade() else {
1597                continue;
1598            };
1599
1600            let process_state = &mut task_ref.thread_group().write();
1601            let mut task_state = task_ref.write();
1602            if task_state
1603                .ptrace
1604                .as_ref()
1605                .is_some_and(|ptrace| ptrace.is_waitable(task_ref.load_stopped(), options))
1606            {
1607                // We've identified a potential target.  Need to return either
1608                // the process's information (if we are in group-stop) or the
1609                // thread's information (if we are in a different stop).
1610
1611                // The shared information:
1612                let mut pid: i32 = 0;
1613                let info = process_state.tasks.values().next().unwrap().info().clone();
1614                let uid = info.real_creds().uid;
1615                let mut exit_status = None;
1616                let exit_signal = process_state.exit_signal.clone();
1617                let time_stats =
1618                    process_state.base.time_stats() + process_state.children_time_stats;
1619                let task_stopped = task_ref.load_stopped();
1620
1621                #[derive(PartialEq)]
1622                enum ExitType {
1623                    None,
1624                    Cont,
1625                    Stop,
1626                    Kill,
1627                }
1628                if process_state.is_waitable() {
1629                    let ptrace = &mut task_state.ptrace;
1630                    // The information for processes, if we were in group stop.
1631                    let process_stopped = process_state.base.load_stopped();
1632                    let mut fn_type = ExitType::None;
1633                    if process_stopped == StopState::Awake && options.wait_for_continued {
1634                        fn_type = ExitType::Cont;
1635                    }
1636                    let mut event = ptrace
1637                        .as_ref()
1638                        .map_or(PtraceEvent::None, |ptrace| {
1639                            ptrace.event_data.as_ref().map_or(PtraceEvent::None, |data| data.event)
1640                        })
1641                        .clone();
1642                    // Tasks that are ptrace'd always get stop notifications.
1643                    if process_stopped == StopState::GroupStopped
1644                        && (options.wait_for_stopped || ptrace.is_some())
1645                    {
1646                        fn_type = ExitType::Stop;
1647                    }
1648                    if fn_type != ExitType::None {
1649                        let siginfo = if options.keep_waitable_state {
1650                            process_state.last_signal.clone()
1651                        } else {
1652                            process_state.last_signal.take()
1653                        };
1654                        if let Some(mut siginfo) = siginfo {
1655                            if task_ref.thread_group().load_stopped() == StopState::GroupStopped
1656                                && ptrace.as_ref().is_some_and(|ptrace| ptrace.is_seized())
1657                            {
1658                                if event == PtraceEvent::None {
1659                                    event = PtraceEvent::Stop;
1660                                }
1661                                siginfo.code |= (PtraceEvent::Stop as i32) << 8;
1662                            }
1663                            if siginfo.signal == SIGKILL {
1664                                fn_type = ExitType::Kill;
1665                            }
1666                            exit_status = match fn_type {
1667                                ExitType::Stop => Some(ExitStatus::Stop(siginfo, event)),
1668                                ExitType::Cont => Some(ExitStatus::Continue(siginfo, event)),
1669                                ExitType::Kill => Some(ExitStatus::Kill(siginfo)),
1670                                _ => None,
1671                            };
1672                        }
1673                        // Clear the wait status of the ptrace, because we're
1674                        // using the tg status instead.
1675                        ptrace
1676                            .as_mut()
1677                            .map(|ptrace| ptrace.get_last_signal(options.keep_waitable_state));
1678                    }
1679                    pid = process_state.base.leader;
1680                }
1681                if exit_status == None {
1682                    if let Some(ptrace) = task_state.ptrace.as_mut() {
1683                        // The information for the task, if we were in a non-group stop.
1684                        let mut fn_type = ExitType::None;
1685                        let event = ptrace
1686                            .event_data
1687                            .as_ref()
1688                            .map_or(PtraceEvent::None, |event| event.event);
1689                        if task_stopped == StopState::Awake {
1690                            fn_type = ExitType::Cont;
1691                        }
1692                        if task_stopped.is_stopping_or_stopped()
1693                            || ptrace.stop_status == PtraceStatus::Listening
1694                        {
1695                            fn_type = ExitType::Stop;
1696                        }
1697                        if fn_type != ExitType::None {
1698                            if let Some(siginfo) =
1699                                ptrace.get_last_signal(options.keep_waitable_state)
1700                            {
1701                                if siginfo.signal == SIGKILL {
1702                                    fn_type = ExitType::Kill;
1703                                }
1704                                exit_status = match fn_type {
1705                                    ExitType::Stop => Some(ExitStatus::Stop(siginfo, event)),
1706                                    ExitType::Cont => Some(ExitStatus::Continue(siginfo, event)),
1707                                    ExitType::Kill => Some(ExitStatus::Kill(siginfo)),
1708                                    _ => None,
1709                                };
1710                            }
1711                        }
1712                        pid = task_ref.get_tid();
1713                    }
1714                }
1715                if let Some(exit_status) = exit_status {
1716                    return Some(WaitResult {
1717                        pid,
1718                        uid,
1719                        exit_info: ProcessExitInfo { status: exit_status, exit_signal },
1720                        time_stats,
1721                    });
1722                }
1723            }
1724        }
1725        None
1726    }
1727
1728    /// Attempts to send an unchecked signal to this thread group.
1729    ///
1730    /// - `current_task`: The task that is sending the signal.
1731    /// - `unchecked_signal`: The signal that is to be sent. Unchecked, since `0` is a sentinel value
1732    /// where rights are to be checked but no signal is actually sent.
1733    ///
1734    /// # Returns
1735    /// Returns Ok(()) if the signal was sent, or the permission checks passed with a 0 signal, otherwise
1736    /// the error that was encountered.
1737    pub fn send_signal_unchecked(
1738        &self,
1739        current_task: &CurrentTask,
1740        unchecked_signal: UncheckedSignal,
1741    ) -> Result<(), Errno> {
1742        if let Some(signal) = self.check_signal_access(current_task, unchecked_signal)? {
1743            let signal_info = SignalInfo::with_detail(
1744                signal,
1745                SI_USER as i32,
1746                SignalDetail::Kill {
1747                    pid: current_task.thread_group().leader,
1748                    uid: current_task.current_creds().uid,
1749                },
1750            );
1751
1752            self.write().send_signal(signal_info);
1753        }
1754
1755        Ok(())
1756    }
1757
1758    /// Sends a signal to this thread_group without performing any access checks.
1759    ///
1760    /// # Safety
1761    /// This is unsafe, because it should only be called by tools and tests.
1762    pub unsafe fn send_signal_unchecked_debug(
1763        &self,
1764        current_task: &CurrentTask,
1765        unchecked_signal: UncheckedSignal,
1766    ) -> Result<(), Errno> {
1767        let signal = Signal::try_from(unchecked_signal)?;
1768        let signal_info = SignalInfo::with_detail(
1769            signal,
1770            SI_USER as i32,
1771            SignalDetail::Kill {
1772                pid: current_task.thread_group().leader,
1773                uid: current_task.current_creds().uid,
1774            },
1775        );
1776
1777        self.write().send_signal(signal_info);
1778        Ok(())
1779    }
1780
1781    /// Attempts to send an unchecked signal to this thread group, with info read from
1782    /// `siginfo_ref`.
1783    ///
1784    /// - `current_task`: The task that is sending the signal.
1785    /// - `unchecked_signal`: The signal that is to be sent. Unchecked, since `0` is a sentinel value
1786    /// where rights are to be checked but no signal is actually sent.
1787    /// - `siginfo_ref`: The siginfo that will be enqueued.
1788    /// - `options`: Options for how to convert the siginfo into a signal info.
1789    ///
1790    /// # Returns
1791    /// Returns Ok(()) if the signal was sent, or the permission checks passed with a 0 signal, otherwise
1792    /// the error that was encountered.
1793    #[track_caller]
1794    pub fn send_signal_unchecked_with_info(
1795        &self,
1796        current_task: &CurrentTask,
1797        unchecked_signal: UncheckedSignal,
1798        siginfo_ref: UserAddress,
1799        options: IntoSignalInfoOptions,
1800    ) -> Result<(), Errno> {
1801        if let Some(signal) = self.check_signal_access(current_task, unchecked_signal)? {
1802            let siginfo = UncheckedSignalInfo::read_from_siginfo(current_task, siginfo_ref)?;
1803            if self.leader != current_task.get_pid()
1804                && (siginfo.code() >= 0 || siginfo.code() == SI_TKILL)
1805            {
1806                return error!(EPERM);
1807            }
1808
1809            self.write().send_signal(siginfo.into_signal_info(signal, options)?);
1810        }
1811
1812        Ok(())
1813    }
1814
1815    /// Checks whether or not `current_task` can signal this thread group with `unchecked_signal`.
1816    ///
1817    /// Returns:
1818    ///   - `Ok(Some(Signal))` if the signal passed checks and should be sent.
1819    ///   - `Ok(None)` if the signal passed checks, but should not be sent. This is used by
1820    ///   userspace for permission checks.
1821    ///   - `Err(_)` if the permission checks failed.
1822    fn check_signal_access(
1823        &self,
1824        current_task: &CurrentTask,
1825        unchecked_signal: UncheckedSignal,
1826    ) -> Result<Option<Signal>, Errno> {
1827        // Pick an arbitrary task in thread_group to check permissions.
1828        //
1829        // Tasks can technically have different credentials, but in practice they are kept in sync.
1830        let target_task = self.read().get_any_task()?;
1831        current_task.can_signal(&target_task, unchecked_signal)?;
1832
1833        // 0 is a sentinel value used to do permission checks.
1834        if unchecked_signal.is_zero() {
1835            return Ok(None);
1836        }
1837
1838        let signal = Signal::try_from(unchecked_signal)?;
1839        security::check_signal_access(current_task, &target_task, signal)?;
1840
1841        Ok(Some(signal))
1842    }
1843
1844    pub fn has_signal_queued(&self, signal: Signal) -> bool {
1845        self.pending_signals.lock().has_queued(signal)
1846    }
1847
1848    pub fn num_signals_queued(&self) -> usize {
1849        self.pending_signals.lock().num_queued()
1850    }
1851
1852    pub fn get_pending_signals(&self) -> SigSet {
1853        self.pending_signals.lock().pending()
1854    }
1855
1856    pub fn is_any_signal_allowed_by_mask(&self, mask: SigSet) -> bool {
1857        self.pending_signals.lock().is_any_allowed_by_mask(mask)
1858    }
1859
1860    pub fn take_next_signal_where<F>(&self, predicate: F) -> Option<SignalInfo>
1861    where
1862        F: Fn(&SignalInfo) -> bool,
1863    {
1864        let mut signals = self.pending_signals.lock();
1865        let r = signals.take_next_where(predicate);
1866        self.has_pending_signals.store(!signals.is_empty(), Ordering::Relaxed);
1867        r
1868    }
1869
1870    /// Drive this `ThreadGroup` to exit, allowing it time to handle SIGTERM before sending SIGKILL.
1871    ///
1872    /// Returns once `ThreadGroup::exit()` has completed.
1873    ///
1874    /// Must be called from the system task.
1875    pub async fn shut_down(this: Weak<Self>) {
1876        const SHUTDOWN_SIGNAL_HANDLING_TIMEOUT: zx::MonotonicDuration =
1877            zx::MonotonicDuration::from_seconds(1);
1878
1879        // Prepare for shutting down the thread group.
1880        let (tg_name, mut on_exited) = {
1881            // Nest this upgraded access so upgraded references aren't held across await-points.
1882            let Some(this) = this.upgrade() else {
1883                return;
1884            };
1885
1886            // Register a channel to be notified when exit() is complete.
1887            let (on_exited_send, on_exited) = futures::channel::oneshot::channel();
1888            this.write().exit_notifier = Some(on_exited_send);
1889
1890            // We want to be able to log about this thread group without upgrading the `Weak`.
1891            let tg_name = format!("{this:?}");
1892
1893            (tg_name, on_exited)
1894        };
1895
1896        log_debug!(tg:% = tg_name; "shutting down thread group, sending SIGTERM");
1897        this.upgrade().map(|tg| tg.write().send_signal(SignalInfo::kernel(SIGTERM)));
1898
1899        // Give thread groups some time to handle SIGTERM, proceeding early if they exit
1900        let timeout = fuchsia_async::Timer::new(SHUTDOWN_SIGNAL_HANDLING_TIMEOUT);
1901        futures::pin_mut!(timeout);
1902
1903        // Use select_biased instead of on_timeout() so that we can await on on_exited later
1904        futures::select_biased! {
1905            _ = &mut on_exited => (),
1906            _ = timeout => {
1907                log_debug!(tg:% = tg_name; "sending SIGKILL");
1908                this.upgrade().map(|tg| tg.write().send_signal(SignalInfo::kernel(SIGKILL)));
1909            },
1910        };
1911
1912        log_debug!(tg:% = tg_name; "waiting for exit");
1913        // It doesn't matter whether ThreadGroup::exit() was called or the process exited with
1914        // a return code and dropped the sender end of the channel.
1915        on_exited.await.ok();
1916        log_debug!(tg:% = tg_name; "thread group shutdown complete");
1917    }
1918
1919    /// Returns the KOID of the process for this thread group.
1920    /// This method should be used to when mapping 32 bit linux process ids to KOIDs
1921    /// to avoid breaking the encapsulation of the zx::process within the ThreadGroup.
1922    /// This encapsulation is important since the relationship between the ThreadGroup
1923    /// and the Process may change over time. See [ThreadGroup::process] for more details.
1924    pub fn get_process_koid(&self) -> Result<Koid, Status> {
1925        self.process.koid()
1926    }
1927}
1928
1929pub enum WaitableChildResult {
1930    ReadyNow(Box<WaitResult>),
1931    ShouldWait,
1932    NoneFound,
1933}
1934
1935#[apply(state_implementation!)]
1936impl ThreadGroupMutableState<Base = ThreadGroup> {
1937    pub fn leader(&self) -> pid_t {
1938        self.base.leader
1939    }
1940
1941    pub fn leader_command(&self) -> TaskCommand {
1942        self.get_task(self.leader())
1943            .map(|l| l.command())
1944            .unwrap_or_else(|| TaskCommand::new(b"<leader exited>"))
1945    }
1946
1947    pub fn is_terminating(&self) -> bool {
1948        !matches!(self.run_state, ThreadGroupRunState::Running)
1949    }
1950
1951    pub fn children(&self) -> impl Iterator<Item = Arc<ThreadGroup>> + '_ {
1952        self.children.values().map(|v| {
1953            v.upgrade().expect("Weak references to processes in ThreadGroup must always be valid")
1954        })
1955    }
1956
1957    pub fn tasks(&self) -> Vec<Arc<Task>> {
1958        self.tasks.values().flat_map(|t| t.upgrade()).collect()
1959    }
1960
1961    pub fn task_ids(&self) -> impl Iterator<Item = &tid_t> {
1962        self.tasks.keys()
1963    }
1964
1965    pub fn contains_task(&self, tid: tid_t) -> bool {
1966        self.tasks.contains_key(&tid)
1967    }
1968
1969    pub fn get_task(&self, tid: tid_t) -> Option<Arc<Task>> {
1970        self.tasks.get(&tid).and_then(|t| t.upgrade())
1971    }
1972
1973    pub fn tasks_count(&self) -> usize {
1974        self.tasks.len()
1975    }
1976
1977    pub fn get_ppid(&self) -> pid_t {
1978        match &self.parent {
1979            Some(parent) => parent.upgrade().leader,
1980            None => 0,
1981        }
1982    }
1983
1984    fn set_process_group<L>(
1985        &mut self,
1986        locked: &mut Locked<L>,
1987        process_group: Arc<ProcessGroup>,
1988        pids: &PidTable,
1989    ) where
1990        L: LockBefore<ProcessGroupState>,
1991    {
1992        if self.process_group == process_group {
1993            return;
1994        }
1995        self.leave_process_group(locked, pids);
1996        self.process_group = process_group;
1997        self.process_group.insert(locked, self.base);
1998    }
1999
2000    fn leave_process_group<L>(&mut self, locked: &mut Locked<L>, pids: &PidTable)
2001    where
2002        L: LockBefore<ProcessGroupState>,
2003    {
2004        if self.process_group.remove(locked, self.base) {
2005            self.process_group.session.write().remove(self.process_group.leader);
2006            pids.remove_process_group(self.process_group.leader);
2007        }
2008    }
2009
2010    /// Indicates whether the thread group is waitable via waitid and waitpid for
2011    /// either WSTOPPED or WCONTINUED.
2012    pub fn is_waitable(&self) -> bool {
2013        return self.last_signal.is_some() && !self.base.load_stopped().is_in_progress();
2014    }
2015
2016    pub fn get_waitable_zombie(
2017        &mut self,
2018        zombie_list: &dyn Fn(&mut ThreadGroupMutableState) -> &mut Vec<OwnedRef<ZombieProcess>>,
2019        selector: &ProcessSelector,
2020        options: &WaitingOptions,
2021        pids: &mut PidTable,
2022    ) -> Option<WaitResult> {
2023        // We look for the last zombie in the vector that matches pid selector and waiting options
2024        let selected_zombie_position = zombie_list(self)
2025            .iter()
2026            .rev()
2027            .position(|zombie| zombie.matches_selector_and_waiting_option(selector, options))
2028            .map(|position_starting_from_the_back| {
2029                zombie_list(self).len() - 1 - position_starting_from_the_back
2030            });
2031
2032        selected_zombie_position.map(|position| {
2033            if options.keep_waitable_state {
2034                zombie_list(self)[position].to_wait_result()
2035            } else {
2036                let zombie = zombie_list(self).remove(position);
2037                self.children_time_stats += zombie.time_stats;
2038                let result = zombie.to_wait_result();
2039                zombie.release(pids);
2040                result
2041            }
2042        })
2043    }
2044
2045    pub fn is_correct_exit_signal(for_clone: bool, exit_code: Option<Signal>) -> bool {
2046        for_clone == (exit_code != Some(SIGCHLD))
2047    }
2048
2049    fn get_waitable_running_children(
2050        &self,
2051        selector: &ProcessSelector,
2052        options: &WaitingOptions,
2053        pids: &PidTable,
2054    ) -> WaitableChildResult {
2055        // The children whose pid matches the pid selector queried.
2056        let filter_children_by_pid_selector = |child: &ThreadGroup| match *selector {
2057            ProcessSelector::Any => true,
2058            ProcessSelector::Pid(pid) => child.leader == pid,
2059            ProcessSelector::Pgid(pgid) => {
2060                pids.get_process_group(pgid).as_ref() == Some(&child.read().process_group)
2061            }
2062            ProcessSelector::Process(ref key) => *key == ThreadGroupKey::from(child),
2063        };
2064
2065        // The children whose exit signal matches the waiting options queried.
2066        let filter_children_by_waiting_options = |child: &ThreadGroup| {
2067            if options.wait_for_all {
2068                return true;
2069            }
2070            Self::is_correct_exit_signal(options.wait_for_clone, child.read().exit_signal)
2071        };
2072
2073        // If wait_for_exited flag is disabled or no terminated children were found we look for living children.
2074        let mut selected_children = self
2075            .children
2076            .values()
2077            .map(|t| t.upgrade().unwrap())
2078            .filter(|tg| filter_children_by_pid_selector(&tg))
2079            .filter(|tg| filter_children_by_waiting_options(&tg))
2080            .peekable();
2081        if selected_children.peek().is_none() {
2082            // There still might be a process that ptrace hasn't looked at yet.
2083            if self.deferred_zombie_ptracers.iter().any(|dzp| match *selector {
2084                ProcessSelector::Any => true,
2085                ProcessSelector::Pid(pid) => dzp.tracee_thread_group_key.pid() == pid,
2086                ProcessSelector::Pgid(pgid) => pgid == dzp.tracee_pgid,
2087                ProcessSelector::Process(ref key) => *key == dzp.tracee_thread_group_key,
2088            }) {
2089                return WaitableChildResult::ShouldWait;
2090            }
2091
2092            return WaitableChildResult::NoneFound;
2093        }
2094        for child in selected_children {
2095            let child = child.write();
2096            if child.last_signal.is_some() {
2097                let build_wait_result = |mut child: ThreadGroupWriteGuard<'_>,
2098                                         exit_status: &dyn Fn(SignalInfo) -> ExitStatus|
2099                 -> WaitResult {
2100                    let siginfo = if options.keep_waitable_state {
2101                        child.last_signal.clone().unwrap()
2102                    } else {
2103                        child.last_signal.take().unwrap()
2104                    };
2105                    let exit_status = if siginfo.signal == SIGKILL {
2106                        // This overrides the stop/continue choice.
2107                        ExitStatus::Kill(siginfo)
2108                    } else {
2109                        exit_status(siginfo)
2110                    };
2111                    let info = child.tasks.values().next().unwrap().info();
2112                    let uid = info.real_creds().uid;
2113                    WaitResult {
2114                        pid: child.base.leader,
2115                        uid,
2116                        exit_info: ProcessExitInfo {
2117                            status: exit_status,
2118                            exit_signal: child.exit_signal,
2119                        },
2120                        time_stats: child.base.time_stats() + child.children_time_stats,
2121                    }
2122                };
2123                let child_stopped = child.base.load_stopped();
2124                if child_stopped == StopState::Awake && options.wait_for_continued {
2125                    return WaitableChildResult::ReadyNow(Box::new(build_wait_result(
2126                        child,
2127                        &|siginfo| ExitStatus::Continue(siginfo, PtraceEvent::None),
2128                    )));
2129                }
2130                if child_stopped == StopState::GroupStopped && options.wait_for_stopped {
2131                    return WaitableChildResult::ReadyNow(Box::new(build_wait_result(
2132                        child,
2133                        &|siginfo| ExitStatus::Stop(siginfo, PtraceEvent::None),
2134                    )));
2135                }
2136            }
2137        }
2138
2139        WaitableChildResult::ShouldWait
2140    }
2141
2142    /// Returns any waitable child matching the given `selector` and `options`. Returns None if no
2143    /// child matching the selector is waitable. Returns ECHILD if no child matches the selector at
2144    /// all.
2145    ///
2146    /// Will remove the waitable status from the child depending on `options`.
2147    pub fn get_waitable_child(
2148        &mut self,
2149        selector: &ProcessSelector,
2150        options: &WaitingOptions,
2151        pids: &mut PidTable,
2152    ) -> WaitableChildResult {
2153        if options.wait_for_exited {
2154            if let Some(waitable_zombie) = self.get_waitable_zombie(
2155                &|state: &mut ThreadGroupMutableState| &mut state.zombie_children,
2156                selector,
2157                options,
2158                pids,
2159            ) {
2160                return WaitableChildResult::ReadyNow(Box::new(waitable_zombie));
2161            }
2162        }
2163
2164        self.get_waitable_running_children(selector, options, pids)
2165    }
2166
2167    /// Returns a task in the current thread group.
2168    pub fn get_live_task(&self) -> Result<Arc<Task>, Errno> {
2169        self.tasks
2170            .iter()
2171            .find_map(|container| container.1.upgrade().filter(|task| task.live().is_ok()))
2172            .ok_or_else(|| errno!(ESRCH))
2173    }
2174
2175    /// Returns a task representative of the [`ThreadGroup`].
2176    ///
2177    /// If the task list contains at least one live task, an arbitrary live task is returned.
2178    /// Otherwise, if the task list is empty, the process must be a zombie. In this case, the exited
2179    /// leader task is returned.
2180    pub fn get_any_task(&self) -> Result<Arc<Task>, Errno> {
2181        self.get_live_task()
2182            .ok()
2183            .or_else(|| self.base.leader_task.get().and_then(|t| t.upgrade()))
2184            .ok_or_else(|| errno!(ESRCH))
2185    }
2186
2187    /// Set the stop status of the process.  If you pass |siginfo| of |None|,
2188    /// does not update the signal.  If |finalize_only| is set, will check that
2189    /// the set will be a finalize (Stopping -> Stopped or Stopped -> Stopped)
2190    /// before executing it.
2191    ///
2192    /// Returns the latest stop state after any changes.
2193    pub fn set_stopped(
2194        mut self,
2195        new_stopped: StopState,
2196        siginfo: Option<SignalInfo>,
2197        finalize_only: bool,
2198    ) -> StopState {
2199        if let Some(stopped) = self.base.check_stopped_state(new_stopped, finalize_only) {
2200            return stopped;
2201        }
2202
2203        // Thread groups don't transition to group stop if they are waking, because waking
2204        // means something told it to wake up (like a SIGCONT) but hasn't finished yet.
2205        if self.base.load_stopped() == StopState::Waking
2206            && (new_stopped == StopState::GroupStopping || new_stopped == StopState::GroupStopped)
2207        {
2208            return self.base.load_stopped();
2209        }
2210
2211        // TODO(https://g-issues.fuchsia.dev/issues/306438676): When thread
2212        // group can be stopped inside user code, tasks/thread groups will
2213        // need to be either restarted or stopped here.
2214        self.store_stopped(new_stopped);
2215        if let Some(signal) = &siginfo {
2216            // We don't want waiters to think the process was unstopped
2217            // because of a sigkill.  They will get woken when the
2218            // process dies.
2219            if signal.signal != SIGKILL {
2220                self.last_signal = siginfo;
2221            }
2222        }
2223        if new_stopped == StopState::Waking || new_stopped == StopState::ForceWaking {
2224            self.lifecycle_waiters.notify_value(ThreadGroupLifecycleWaitValue::Stopped);
2225        };
2226
2227        let parent = (!new_stopped.is_in_progress()).then(|| self.parent.clone()).flatten();
2228
2229        // Drop the lock before locking the parent.
2230        std::mem::drop(self);
2231        if let Some(parent) = parent {
2232            let parent = parent.upgrade();
2233            parent
2234                .write()
2235                .lifecycle_waiters
2236                .notify_value(ThreadGroupLifecycleWaitValue::ChildStatus);
2237        }
2238
2239        new_stopped
2240    }
2241
2242    fn store_stopped(&mut self, state: StopState) {
2243        // We don't actually use the guard but we require it to enforce that the
2244        // caller holds the thread group's mutable state lock (identified by
2245        // mutable access to the thread group's mutable state).
2246
2247        self.base.stop_state.store(state, Ordering::Relaxed)
2248    }
2249
2250    /// Sends the signal `signal_info` to this thread group.
2251    #[allow(unused_mut, reason = "needed for some but not all macro outputs")]
2252    pub fn send_signal(mut self, signal_info: SignalInfo) {
2253        let sigaction = self.base.signal_actions.get(signal_info.signal);
2254        let action = action_for_signal(&signal_info, sigaction);
2255
2256        {
2257            let mut pending_signals = self.base.pending_signals.lock();
2258            pending_signals.enqueue(signal_info.clone());
2259            self.base.has_pending_signals.store(true, Ordering::Relaxed);
2260        }
2261        let tasks: Vec<Weak<Task>> = self.tasks.values().map(|t| t.weak_clone()).collect();
2262
2263        // Set state to waking before interrupting any tasks.
2264        if signal_info.signal == SIGKILL {
2265            self.set_stopped(StopState::ForceWaking, Some(signal_info.clone()), false);
2266        } else if signal_info.signal == SIGCONT {
2267            self.set_stopped(StopState::Waking, Some(signal_info.clone()), false);
2268        }
2269
2270        let mut has_interrupted_task = false;
2271        for task in tasks.iter().flat_map(|t| t.upgrade()) {
2272            let mut task_state = task.write();
2273
2274            if signal_info.signal == SIGKILL {
2275                task_state.thaw();
2276                task_state.set_stopped(StopState::ForceWaking, None, None, None);
2277            } else if signal_info.signal == SIGCONT {
2278                task_state.set_stopped(StopState::Waking, None, None, None);
2279            }
2280
2281            let is_masked = task_state.is_signal_masked(signal_info.signal);
2282            let was_masked = task_state.is_signal_masked_by_saved_mask(signal_info.signal);
2283
2284            let is_queued = action != DeliveryAction::Ignore
2285                || is_masked
2286                || was_masked
2287                || task_state.is_ptraced();
2288
2289            if is_queued {
2290                task_state.notify_signal_waiters(&signal_info.signal);
2291
2292                if !is_masked && action.must_interrupt(Some(sigaction)) && !has_interrupted_task {
2293                    // Only interrupt one task, and only interrupt if the signal was actually queued
2294                    // and the action must interrupt.
2295                    drop(task_state);
2296                    task.interrupt();
2297                    has_interrupted_task = true;
2298                }
2299            }
2300        }
2301    }
2302}
2303
2304/// Container around a weak task and a strong `TaskPersistentInfo`. It is needed to keep the
2305/// information even when the task is not upgradable, because when the task is dropped, there is a
2306/// moment where the task is not yet released, yet the weak pointer is not upgradeable anymore.
2307/// During this time, it is still necessary to access the persistent info to compute the state of
2308/// the thread for the different wait syscalls.
2309pub struct TaskContainer(Weak<Task>, TaskPersistentInfo);
2310
2311impl From<&Arc<Task>> for TaskContainer {
2312    fn from(task: &Arc<Task>) -> Self {
2313        Self(Arc::downgrade(task), task.persistent_info.clone())
2314    }
2315}
2316
2317impl From<TaskContainer> for TaskPersistentInfo {
2318    fn from(container: TaskContainer) -> TaskPersistentInfo {
2319        container.1
2320    }
2321}
2322
2323impl TaskContainer {
2324    fn upgrade(&self) -> Option<Arc<Task>> {
2325        self.0.upgrade()
2326    }
2327
2328    fn weak_clone(&self) -> Weak<Task> {
2329        self.0.clone()
2330    }
2331
2332    fn info(&self) -> &TaskPersistentInfo {
2333        &self.1
2334    }
2335}
2336
2337#[cfg(test)]
2338mod test {
2339    use super::*;
2340    use crate::testing::*;
2341
2342    #[::fuchsia::test]
2343    async fn test_setsid() {
2344        spawn_kernel_and_run(async |locked, current_task| {
2345            fn get_process_group(task: &Task) -> Arc<ProcessGroup> {
2346                Arc::clone(&task.thread_group().read().process_group)
2347            }
2348            assert_eq!(current_task.thread_group().setsid(locked), error!(EPERM));
2349
2350            let child_task = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2351            assert_eq!(get_process_group(&current_task), get_process_group(&child_task));
2352
2353            let old_process_group = child_task.thread_group().read().process_group.clone();
2354            assert_eq!(child_task.thread_group().setsid(locked), Ok(()));
2355            assert_eq!(
2356                child_task.thread_group().read().process_group.session.leader,
2357                child_task.get_pid()
2358            );
2359            assert!(
2360                !old_process_group.read(locked).thread_groups().contains(child_task.thread_group())
2361            );
2362        })
2363        .await;
2364    }
2365
2366    #[::fuchsia::test]
2367    async fn test_exit_status() {
2368        spawn_kernel_and_run(async |locked, current_task| {
2369            let child = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2370            child.thread_group().exit(locked, ExitStatus::Exit(42), None);
2371            std::mem::drop(child);
2372            assert_eq!(
2373                current_task.thread_group().read().zombie_children[0].exit_info.status,
2374                ExitStatus::Exit(42)
2375            );
2376        })
2377        .await;
2378    }
2379
2380    #[::fuchsia::test]
2381    async fn test_setgpid() {
2382        spawn_kernel_and_run(async |locked, current_task| {
2383            assert_eq!(current_task.thread_group().setsid(locked), error!(EPERM));
2384
2385            let child_task1 = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2386            let child_task2 = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2387            let execd_child_task = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2388            execd_child_task.thread_group().write().did_exec = true;
2389            let other_session_child_task =
2390                current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2391            assert_eq!(other_session_child_task.thread_group().setsid(locked), Ok(()));
2392
2393            assert_eq!(
2394                child_task1.thread_group().setpgid(locked, &current_task, &current_task, 0),
2395                error!(ESRCH)
2396            );
2397            assert_eq!(
2398                current_task.thread_group().setpgid(locked, &current_task, &execd_child_task, 0),
2399                error!(EACCES)
2400            );
2401            assert_eq!(
2402                current_task.thread_group().setpgid(locked, &current_task, &current_task, 0),
2403                error!(EPERM)
2404            );
2405            assert_eq!(
2406                current_task.thread_group().setpgid(
2407                    locked,
2408                    &current_task,
2409                    &other_session_child_task,
2410                    0
2411                ),
2412                error!(EPERM)
2413            );
2414            assert_eq!(
2415                current_task.thread_group().setpgid(locked, &current_task, &child_task1, -1),
2416                error!(EINVAL)
2417            );
2418            assert_eq!(
2419                current_task.thread_group().setpgid(locked, &current_task, &child_task1, 255),
2420                error!(EPERM)
2421            );
2422            assert_eq!(
2423                current_task.thread_group().setpgid(
2424                    locked,
2425                    &current_task,
2426                    &child_task1,
2427                    other_session_child_task.tid
2428                ),
2429                error!(EPERM)
2430            );
2431
2432            assert_eq!(
2433                child_task1.thread_group().setpgid(locked, &current_task, &child_task1, 0),
2434                Ok(())
2435            );
2436            assert_eq!(
2437                child_task1.thread_group().read().process_group.session.leader,
2438                current_task.tid
2439            );
2440            assert_eq!(child_task1.thread_group().read().process_group.leader, child_task1.tid);
2441
2442            let old_process_group = child_task2.thread_group().read().process_group.clone();
2443            assert_eq!(
2444                current_task.thread_group().setpgid(
2445                    locked,
2446                    &current_task,
2447                    &child_task2,
2448                    child_task1.tid
2449                ),
2450                Ok(())
2451            );
2452            assert_eq!(child_task2.thread_group().read().process_group.leader, child_task1.tid);
2453            assert!(
2454                !old_process_group
2455                    .read(locked)
2456                    .thread_groups()
2457                    .contains(child_task2.thread_group())
2458            );
2459        })
2460        .await;
2461    }
2462
2463    #[::fuchsia::test]
2464    async fn test_adopt_children() {
2465        spawn_kernel_and_run(async |locked, current_task| {
2466            let task1 = current_task.clone_task_for_test(locked, 0, None);
2467            let task2 = task1.clone_task_for_test(locked, 0, None);
2468            let task3 = task2.clone_task_for_test(locked, 0, None);
2469
2470            assert_eq!(task3.thread_group().read().get_ppid(), task2.tid);
2471
2472            task2.thread_group().exit(locked, ExitStatus::Exit(0), None);
2473            std::mem::drop(task2);
2474
2475            // Task3 parent should be current_task.
2476            assert_eq!(task3.thread_group().read().get_ppid(), current_task.tid);
2477        })
2478        .await;
2479    }
2480
2481    #[::fuchsia::test]
2482    async fn test_getppid_after_self_and_parent_exit() {
2483        spawn_kernel_and_run(async |locked, current_task| {
2484            let task1 = current_task.clone_task_for_test(locked, 0, None);
2485            let task2 = task1.clone_task_for_test(locked, 0, None);
2486
2487            // Take strong references to the ThreadGroups.
2488            let tg1 = task1.thread_group().clone();
2489            let tg2 = task2.thread_group().clone();
2490
2491            assert_eq!(tg1.read().get_ppid(), current_task.tid);
2492            assert_eq!(tg2.read().get_ppid(), task1.tid);
2493
2494            // Exit `task2` first, so that when `task1` exits, it will not be reparented to init.
2495            tg2.exit(locked, ExitStatus::Exit(0), None);
2496            std::mem::drop(task2);
2497
2498            // Exit `task1`, and drop the task and ThreadGroup.
2499            tg1.exit(locked, ExitStatus::Exit(0), None);
2500            std::mem::drop(task1);
2501            std::mem::drop(tg1);
2502
2503            // It should still be valid to call `get_ppid()` on `tg2`, though is parent ThreadGroup
2504            // no longer exists.
2505            let _ = tg2.read().get_ppid();
2506        })
2507        .await;
2508    }
2509}