Skip to main content

starnix_core/task/
thread_group.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::device::terminal::{Terminal, TerminalController};
6use crate::mutable_state::{state_accessor, state_implementation};
7use crate::ptrace::{
8    AtomicStopState, PtraceAllowedPtracers, PtraceEvent, PtraceOptions, PtraceStatus, StopState,
9    ZombiePtracees, ptrace_detach,
10};
11use crate::security;
12use crate::signals::syscalls::WaitingOptions;
13use crate::signals::{
14    DeliveryAction, IntoSignalInfoOptions, QueuedSignals, SignalActions, SignalDetail, SignalInfo,
15    UncheckedSignalInfo, action_for_signal, send_standard_signal,
16};
17use crate::task::memory_attribution::MemoryAttributionLifecycleEvent;
18use crate::task::{
19    ControllingTerminal, CurrentTask, ExitStatus, Kernel, PidTable, ProcessGroup, Session, Task,
20    TaskMutableState, TaskPersistentInfo, TypedWaitQueue,
21};
22use crate::time::{IntervalTimerHandle, TimerTable};
23use itertools::Itertools;
24use macro_rules_attribute::apply;
25use starnix_lifecycle::{AtomicU64Counter, DropNotifier};
26use starnix_logging::{log_debug, log_error, log_info, log_warn, track_stub};
27use starnix_sync::{
28    LockBefore, Locked, Mutex, OrderedMutex, ProcessGroupState, RwLock, ThreadGroupLimits, Unlocked,
29};
30use starnix_task_command::TaskCommand;
31use starnix_types::ownership::{OwnedRef, Releasable, TempRef, WeakRef};
32use starnix_types::stats::TaskTimeStats;
33use starnix_types::time::{itimerspec_from_itimerval, timeval_from_duration};
34use starnix_uapi::arc_key::WeakKey;
35use starnix_uapi::auth::{CAP_SYS_ADMIN, CAP_SYS_RESOURCE, Credentials};
36use starnix_uapi::errors::Errno;
37use starnix_uapi::personality::PersonalityFlags;
38use starnix_uapi::resource_limits::{Resource, ResourceLimits};
39use starnix_uapi::signals::{
40    SIGCHLD, SIGCONT, SIGHUP, SIGKILL, SIGTERM, SIGTTOU, SigSet, Signal, UncheckedSignal,
41};
42use starnix_uapi::user_address::UserAddress;
43use starnix_uapi::{
44    ITIMER_PROF, ITIMER_REAL, ITIMER_VIRTUAL, SI_TKILL, SI_USER, SIG_IGN, errno, error, itimerval,
45    pid_t, rlimit, tid_t, uid_t,
46};
47use std::collections::BTreeMap;
48use std::fmt;
49use std::sync::atomic::{AtomicBool, Ordering};
50use std::sync::{Arc, Weak};
51use zx::{Koid, Status};
52
53/// A weak reference to a thread group that can be used in set and maps.
54#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
55pub struct ThreadGroupKey {
56    pid: pid_t,
57    thread_group: WeakKey<ThreadGroup>,
58}
59
60impl ThreadGroupKey {
61    /// The pid of the thread group keyed by this object.
62    ///
63    /// As the key is weak (and pid are not unique due to pid namespaces), this should not be used
64    /// as an unique identifier of the thread group.
65    pub fn pid(&self) -> pid_t {
66        self.pid
67    }
68}
69
70impl std::ops::Deref for ThreadGroupKey {
71    type Target = Weak<ThreadGroup>;
72    fn deref(&self) -> &Self::Target {
73        &self.thread_group.0
74    }
75}
76
77impl From<&ThreadGroup> for ThreadGroupKey {
78    fn from(tg: &ThreadGroup) -> Self {
79        Self { pid: tg.leader, thread_group: WeakKey::from(&tg.weak_self.upgrade().unwrap()) }
80    }
81}
82
83impl<T: AsRef<ThreadGroup>> From<T> for ThreadGroupKey {
84    fn from(tg: T) -> Self {
85        tg.as_ref().into()
86    }
87}
88
89/// Values used for waiting on the [ThreadGroup] lifecycle wait queue.
90#[repr(u64)]
91pub enum ThreadGroupLifecycleWaitValue {
92    /// Wait for updates to the WaitResults of tasks in the group.
93    ChildStatus,
94    /// Wait for updates to `stopped`.
95    Stopped,
96}
97
98impl Into<u64> for ThreadGroupLifecycleWaitValue {
99    fn into(self) -> u64 {
100        self as u64
101    }
102}
103
104/// Child process that have exited, but the zombie ptrace needs to be consumed
105/// before they can be waited for.
106#[derive(Clone, Debug)]
107pub struct DeferredZombiePTracer {
108    /// Original tracer
109    pub tracer_thread_group_key: ThreadGroupKey,
110    /// Tracee tid
111    pub tracee_tid: tid_t,
112    /// Tracee pgid
113    pub tracee_pgid: pid_t,
114    /// Tracee thread group
115    pub tracee_thread_group_key: ThreadGroupKey,
116}
117
118impl DeferredZombiePTracer {
119    fn new(tracer: &ThreadGroup, tracee: &Task) -> Self {
120        Self {
121            tracer_thread_group_key: tracer.into(),
122            tracee_tid: tracee.tid,
123            tracee_pgid: tracee.thread_group().read().process_group.leader,
124            tracee_thread_group_key: tracee.thread_group_key.clone(),
125        }
126    }
127}
128
129/// The mutable state of the ThreadGroup.
130pub struct ThreadGroupMutableState {
131    /// The parent thread group.
132    ///
133    /// The value needs to be writable so that it can be re-parent to the correct subreaper if the
134    /// parent ends before the child.
135    pub parent: Option<ThreadGroupParent>,
136
137    /// The signal this process generates on exit.
138    pub exit_signal: Option<Signal>,
139
140    /// The tasks in the thread group.
141    ///
142    /// The references to Task is weak to prevent cycles as Task have a Arc reference to their
143    /// thread group.
144    /// It is still expected that these weak references are always valid, as tasks must unregister
145    /// themselves before they are deleted.
146    tasks: BTreeMap<tid_t, TaskContainer>,
147
148    /// The children of this thread group.
149    ///
150    /// The references to ThreadGroup is weak to prevent cycles as ThreadGroup have a Arc reference
151    /// to their parent.
152    /// It is still expected that these weak references are always valid, as thread groups must unregister
153    /// themselves before they are deleted.
154    pub children: BTreeMap<pid_t, Weak<ThreadGroup>>,
155
156    /// Child tasks that have exited, but not yet been waited for.
157    pub zombie_children: Vec<OwnedRef<ZombieProcess>>,
158
159    /// ptracees of this process that have exited, but not yet been waited for.
160    pub zombie_ptracees: ZombiePtracees,
161
162    /// Child processes that have exited, but the zombie ptrace needs to be consumed
163    /// before they can be waited for.
164    pub deferred_zombie_ptracers: Vec<DeferredZombiePTracer>,
165
166    /// Unified [WaitQueue] for all waited ThreadGroup events.
167    pub lifecycle_waiters: TypedWaitQueue<ThreadGroupLifecycleWaitValue>,
168
169    /// Whether this thread group will inherit from children of dying processes in its descendant
170    /// tree.
171    pub is_child_subreaper: bool,
172
173    /// The IDs used to perform shell job control.
174    pub process_group: Arc<ProcessGroup>,
175
176    pub did_exec: bool,
177
178    /// A signal that indicates whether the process is going to become waitable
179    /// via waitid and waitpid for either WSTOPPED or WCONTINUED, depending on
180    /// the value of `stopped`. If not None, contains the SignalInfo to return.
181    pub last_signal: Option<SignalInfo>,
182
183    /// Whether the thread_group is terminating or not, and if it is, the exit info of the thread
184    /// group.
185    run_state: ThreadGroupRunState,
186
187    /// Time statistics accumulated from the children.
188    pub children_time_stats: TaskTimeStats,
189
190    /// Personality flags set with `sys_personality()`.
191    pub personality: PersonalityFlags,
192
193    /// Thread groups allowed to trace tasks in this this thread group.
194    pub allowed_ptracers: PtraceAllowedPtracers,
195
196    /// Channel to message when this thread group exits.
197    exit_notifier: Option<futures::channel::oneshot::Sender<()>>,
198
199    /// Notifier for name changes.
200    pub notifier: Option<std::sync::mpsc::Sender<MemoryAttributionLifecycleEvent>>,
201}
202
203/// A collection of `Task` objects that roughly correspond to a "process".
204///
205/// Userspace programmers often think about "threads" and "process", but those concepts have no
206/// clear analogs inside the kernel because tasks are typically created using `clone(2)`, which
207/// takes a complex set of flags that describes how much state is shared between the original task
208/// and the new task.
209///
210/// If a new task is created with the `CLONE_THREAD` flag, the new task will be placed in the same
211/// `ThreadGroup` as the original task. Userspace typically uses this flag in conjunction with the
212/// `CLONE_FILES`, `CLONE_VM`, and `CLONE_FS`, which corresponds to the userspace notion of a
213/// "thread". For example, that's how `pthread_create` behaves. In that sense, a `ThreadGroup`
214/// normally corresponds to the set of "threads" in a "process". However, this pattern is purely a
215/// userspace convention, and nothing stops userspace from using `CLONE_THREAD` without
216/// `CLONE_FILES`, for example.
217///
218/// In Starnix, a `ThreadGroup` corresponds to a Zircon process, which means we do not support the
219/// `CLONE_THREAD` flag without the `CLONE_VM` flag. If we run into problems with this limitation,
220/// we might need to revise this correspondence.
221///
222/// Each `Task` in a `ThreadGroup` has the same thread group ID (`tgid`). The task with the same
223/// `pid` as the `tgid` is called the thread group leader.
224///
225/// Thread groups are destroyed when the last task in the group exits.
226pub struct ThreadGroup {
227    /// Weak reference to the `OwnedRef` of this `ThreadGroup`. This allows to retrieve the
228    /// `TempRef` from a raw `ThreadGroup`.
229    pub weak_self: Weak<ThreadGroup>,
230
231    /// The kernel to which this thread group belongs.
232    pub kernel: Arc<Kernel>,
233
234    /// A handle to the underlying Zircon process object.
235    ///
236    /// Currently, we have a 1-to-1 mapping between thread groups and zx::process
237    /// objects. This approach might break down if/when we implement CLONE_VM
238    /// without CLONE_THREAD because that creates a situation where two thread
239    /// groups share an address space. To implement that situation, we might
240    /// need to break the 1-to-1 mapping between thread groups and zx::process
241    /// or teach zx::process to share address spaces.
242    pub process: zx::Process,
243
244    /// A handle to the restricted address space for the Zircon process object.
245    pub root_vmar: zx::Vmar,
246
247    /// The lead task of this thread group.
248    ///
249    /// The lead task is typically the initial thread created in the thread group.
250    pub leader: pid_t,
251
252    /// The signal actions that are registered for this process.
253    pub signal_actions: Arc<SignalActions>,
254
255    /// The timers for this thread group (from timer_create(), etc.).
256    pub timers: TimerTable,
257
258    /// A mechanism to be notified when this `ThreadGroup` is destroyed.
259    pub drop_notifier: DropNotifier,
260
261    /// Whether the process is currently stopped.
262    ///
263    /// Must only be set when the `mutable_state` write lock is held.
264    stop_state: AtomicStopState,
265
266    /// The mutable state of the ThreadGroup.
267    mutable_state: RwLock<ThreadGroupMutableState>,
268
269    /// The resource limits for this thread group.  This is outside mutable_state
270    /// to avoid deadlocks where the thread_group lock is held when acquiring
271    /// the task lock, and vice versa.
272    pub limits: OrderedMutex<ResourceLimits, ThreadGroupLimits>,
273
274    /// The next unique identifier for a seccomp filter.  These are required to be
275    /// able to distinguish identical seccomp filters, which are treated differently
276    /// for the purposes of SECCOMP_FILTER_FLAG_TSYNC.  Inherited across clone because
277    /// seccomp filters are also inherited across clone.
278    pub next_seccomp_filter_id: AtomicU64Counter,
279
280    /// Tasks ptraced by this process
281    pub ptracees: Mutex<BTreeMap<tid_t, TaskContainer>>,
282
283    /// The signals that are currently pending for this thread group.
284    pub pending_signals: Mutex<QueuedSignals>,
285
286    /// Whether or not there are any pending signals available for tasks in this thread group.
287    /// Used to avoid having to acquire the signal state lock in hot paths.
288    pub has_pending_signals: AtomicBool,
289
290    /// The monotonic time at which the thread group started.
291    pub start_time: zx::MonotonicInstant,
292
293    /// Whether to log syscalls at INFO level for this thread group.
294    log_syscalls_as_info: AtomicBool,
295}
296
297impl fmt::Debug for ThreadGroup {
298    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
299        write!(
300            f,
301            "{}({})",
302            self.process.get_name().unwrap_or(zx::Name::new_lossy("<unknown>")),
303            self.leader
304        )
305    }
306}
307
308impl ThreadGroup {
309    pub fn sync_syscall_log_level(&self) {
310        let command = self.read().leader_command();
311        let filters = self.kernel.syscall_log_filters.lock();
312        let should_log = filters.iter().any(|f| f.matches(&command));
313        let prev_should_log = self.log_syscalls_as_info.swap(should_log, Ordering::Relaxed);
314        let change_str = match (should_log, prev_should_log) {
315            (true, false) => Some("Enabled"),
316            (false, true) => Some("Disabled"),
317            _ => None,
318        };
319        if let Some(change_str) = change_str {
320            log_info!(
321                "{change_str} info syscall logs for thread group {} (command: {command})",
322                self.leader
323            );
324        }
325    }
326
327    #[inline]
328    pub fn syscall_log_level(&self) -> starnix_logging::Level {
329        if self.log_syscalls_as_info.load(Ordering::Relaxed) {
330            starnix_logging::Level::Info
331        } else {
332            starnix_logging::Level::Trace
333        }
334    }
335}
336
337impl PartialEq for ThreadGroup {
338    fn eq(&self, other: &Self) -> bool {
339        self.leader == other.leader
340    }
341}
342
343impl Drop for ThreadGroup {
344    fn drop(&mut self) {
345        let state = self.mutable_state.get_mut();
346        assert!(state.tasks.is_empty());
347        assert!(state.children.is_empty());
348        assert!(state.zombie_children.is_empty());
349        assert!(state.zombie_ptracees.is_empty());
350        #[cfg(any(test, debug_assertions))]
351        assert!(
352            state
353                .parent
354                .as_ref()
355                .and_then(|p| p.0.upgrade().as_ref().map(|p| p
356                    .read()
357                    .children
358                    .get(&self.leader)
359                    .is_none()))
360                .unwrap_or(true)
361        );
362    }
363}
364
365/// A wrapper around a `Weak<ThreadGroup>` that expects the underlying `Weak` to always be
366/// valid. The wrapper will check this at runtime during creation and upgrade.
367pub struct ThreadGroupParent(Weak<ThreadGroup>);
368
369impl ThreadGroupParent {
370    pub fn new(t: Weak<ThreadGroup>) -> Self {
371        debug_assert!(t.upgrade().is_some());
372        Self(t)
373    }
374
375    pub fn upgrade(&self) -> Arc<ThreadGroup> {
376        self.0.upgrade().expect("ThreadGroupParent references must always be valid")
377    }
378}
379
380impl Clone for ThreadGroupParent {
381    fn clone(&self) -> Self {
382        Self(self.0.clone())
383    }
384}
385
386/// A selector that can match a process. Works as a representation of the pid argument to syscalls
387/// like wait and kill.
388#[derive(Debug, Clone)]
389pub enum ProcessSelector {
390    /// Matches any process at all.
391    Any,
392    /// Matches only the process with the specified pid
393    Pid(pid_t),
394    /// Matches all the processes in the given process group
395    Pgid(pid_t),
396    /// Match the thread group with the given key
397    Process(ThreadGroupKey),
398}
399
400impl ProcessSelector {
401    pub fn match_tid(&self, tid: tid_t, pid_table: &PidTable) -> bool {
402        match *self {
403            ProcessSelector::Pid(p) => {
404                if p == tid {
405                    true
406                } else {
407                    if let Some(task_ref) = pid_table.get_task(tid).upgrade() {
408                        task_ref.get_pid() == p
409                    } else {
410                        false
411                    }
412                }
413            }
414            ProcessSelector::Any => true,
415            ProcessSelector::Pgid(pgid) => {
416                if let Some(task_ref) = pid_table.get_task(tid).upgrade() {
417                    pid_table.get_process_group(pgid).as_ref()
418                        == Some(&task_ref.thread_group().read().process_group)
419                } else {
420                    false
421                }
422            }
423            ProcessSelector::Process(ref key) => {
424                if let Some(tg) = key.upgrade() {
425                    tg.read().tasks.contains_key(&tid)
426                } else {
427                    false
428                }
429            }
430        }
431    }
432}
433
434#[derive(Clone, Debug, PartialEq, Eq)]
435pub struct ProcessExitInfo {
436    pub status: ExitStatus,
437    pub exit_signal: Option<Signal>,
438}
439
440#[derive(Clone, Debug, Default, PartialEq, Eq)]
441enum ThreadGroupRunState {
442    #[default]
443    Running,
444    Terminating(ExitStatus),
445}
446
447#[derive(Clone, Debug, PartialEq, Eq)]
448pub struct WaitResult {
449    pub pid: pid_t,
450    pub uid: uid_t,
451
452    pub exit_info: ProcessExitInfo,
453
454    /// Cumulative time stats for the process and its children.
455    pub time_stats: TaskTimeStats,
456}
457
458impl WaitResult {
459    // According to wait(2) man page, SignalInfo.signal needs to always be set to SIGCHLD
460    pub fn as_signal_info(&self) -> SignalInfo {
461        SignalInfo::with_detail(
462            SIGCHLD,
463            self.exit_info.status.signal_info_code(),
464            SignalDetail::SIGCHLD {
465                pid: self.pid,
466                uid: self.uid,
467                status: self.exit_info.status.signal_info_status(),
468            },
469        )
470    }
471}
472
473#[derive(Debug)]
474pub struct ZombieProcess {
475    pub thread_group_key: ThreadGroupKey,
476    pub pgid: pid_t,
477    pub uid: uid_t,
478
479    pub exit_info: ProcessExitInfo,
480
481    /// Cumulative time stats for the process and its children.
482    pub time_stats: TaskTimeStats,
483
484    /// Whether dropping this ZombieProcess should imply removing the pid from
485    /// the PidTable
486    pub is_canonical: bool,
487}
488
489impl PartialEq for ZombieProcess {
490    fn eq(&self, other: &Self) -> bool {
491        // We assume only one set of ZombieProcess data per process, so this should cover it.
492        self.thread_group_key == other.thread_group_key
493            && self.pgid == other.pgid
494            && self.uid == other.uid
495            && self.is_canonical == other.is_canonical
496    }
497}
498
499impl Eq for ZombieProcess {}
500
501impl PartialOrd for ZombieProcess {
502    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
503        Some(self.cmp(other))
504    }
505}
506
507impl Ord for ZombieProcess {
508    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
509        self.thread_group_key.cmp(&other.thread_group_key)
510    }
511}
512
513impl ZombieProcess {
514    pub fn new(
515        thread_group: ThreadGroupStateRef<'_>,
516        credentials: &Credentials,
517        exit_info: ProcessExitInfo,
518    ) -> OwnedRef<Self> {
519        let time_stats = thread_group.base.time_stats() + thread_group.children_time_stats;
520        OwnedRef::new(ZombieProcess {
521            thread_group_key: thread_group.base.into(),
522            pgid: thread_group.process_group.leader,
523            uid: credentials.uid,
524            exit_info,
525            time_stats,
526            is_canonical: true,
527        })
528    }
529
530    pub fn pid(&self) -> pid_t {
531        self.thread_group_key.pid()
532    }
533
534    pub fn to_wait_result(&self) -> WaitResult {
535        WaitResult {
536            pid: self.pid(),
537            uid: self.uid,
538            exit_info: self.exit_info.clone(),
539            time_stats: self.time_stats,
540        }
541    }
542
543    pub fn as_artificial(&self) -> Self {
544        ZombieProcess {
545            thread_group_key: self.thread_group_key.clone(),
546            pgid: self.pgid,
547            uid: self.uid,
548            exit_info: self.exit_info.clone(),
549            time_stats: self.time_stats,
550            is_canonical: false,
551        }
552    }
553
554    pub fn matches_selector(&self, selector: &ProcessSelector) -> bool {
555        match *selector {
556            ProcessSelector::Any => true,
557            ProcessSelector::Pid(pid) => self.pid() == pid,
558            ProcessSelector::Pgid(pgid) => self.pgid == pgid,
559            ProcessSelector::Process(ref key) => self.thread_group_key == *key,
560        }
561    }
562
563    pub fn matches_selector_and_waiting_option(
564        &self,
565        selector: &ProcessSelector,
566        options: &WaitingOptions,
567    ) -> bool {
568        if !self.matches_selector(selector) {
569            return false;
570        }
571
572        if options.wait_for_all {
573            true
574        } else {
575            // A "clone" zombie is one which has delivered no signal, or a
576            // signal other than SIGCHLD to its parent upon termination.
577            options.wait_for_clone == (self.exit_info.exit_signal != Some(SIGCHLD))
578        }
579    }
580}
581
582impl Releasable for ZombieProcess {
583    type Context<'a> = &'a mut PidTable;
584
585    fn release<'a>(self, pids: &'a mut PidTable) {
586        if self.is_canonical {
587            pids.remove_zombie(self.pid());
588        }
589    }
590}
591
592impl ThreadGroup {
593    pub fn new<L>(
594        locked: &mut Locked<L>,
595        kernel: Arc<Kernel>,
596        process: zx::Process,
597        root_vmar: zx::Vmar,
598        parent: Option<ThreadGroupWriteGuard<'_>>,
599        leader: pid_t,
600        exit_signal: Option<Signal>,
601        process_group: Arc<ProcessGroup>,
602        signal_actions: Arc<SignalActions>,
603    ) -> Arc<ThreadGroup>
604    where
605        L: LockBefore<ProcessGroupState>,
606    {
607        Arc::new_cyclic(|weak_self| {
608            let mut thread_group = ThreadGroup {
609                weak_self: weak_self.clone(),
610                kernel,
611                process,
612                root_vmar,
613                leader,
614                signal_actions,
615                timers: Default::default(),
616                drop_notifier: Default::default(),
617                // A child process created via fork(2) inherits its parent's
618                // resource limits.  Resource limits are preserved across execve(2).
619                limits: OrderedMutex::new(
620                    parent
621                        .as_ref()
622                        .map(|p| p.base.limits.lock(locked.cast_locked()).clone())
623                        .unwrap_or(Default::default()),
624                ),
625                next_seccomp_filter_id: Default::default(),
626                ptracees: Default::default(),
627                stop_state: AtomicStopState::new(StopState::Awake),
628                pending_signals: Default::default(),
629                has_pending_signals: Default::default(),
630                start_time: zx::MonotonicInstant::get(),
631                mutable_state: RwLock::new(ThreadGroupMutableState {
632                    parent: parent
633                        .as_ref()
634                        .map(|p| ThreadGroupParent::new(p.base.weak_self.clone())),
635                    exit_signal,
636                    tasks: BTreeMap::new(),
637                    children: BTreeMap::new(),
638                    zombie_children: vec![],
639                    zombie_ptracees: ZombiePtracees::new(),
640                    deferred_zombie_ptracers: vec![],
641                    lifecycle_waiters: TypedWaitQueue::<ThreadGroupLifecycleWaitValue>::default(),
642                    is_child_subreaper: false,
643                    process_group: Arc::clone(&process_group),
644                    did_exec: false,
645                    last_signal: None,
646                    run_state: Default::default(),
647                    children_time_stats: Default::default(),
648                    personality: parent
649                        .as_ref()
650                        .map(|p| p.personality)
651                        .unwrap_or(Default::default()),
652                    allowed_ptracers: PtraceAllowedPtracers::None,
653                    exit_notifier: None,
654                    notifier: None,
655                }),
656                log_syscalls_as_info: AtomicBool::new(false),
657            };
658
659            if let Some(mut parent) = parent {
660                thread_group.next_seccomp_filter_id.reset(parent.base.next_seccomp_filter_id.get());
661                parent.children.insert(leader, weak_self.clone());
662                process_group.insert(locked, &thread_group);
663            };
664            thread_group
665        })
666    }
667
668    state_accessor!(ThreadGroup, mutable_state);
669
670    pub fn load_stopped(&self) -> StopState {
671        self.stop_state.load(Ordering::Relaxed)
672    }
673
674    // Causes the thread group to exit.  If this is being called from a task
675    // that is part of the current thread group, the caller should pass
676    // `current_task`.  If ownership issues prevent passing `current_task`, then
677    // callers should use CurrentTask::thread_group_exit instead.
678    pub fn exit(
679        &self,
680        locked: &mut Locked<Unlocked>,
681        exit_status: ExitStatus,
682        mut current_task: Option<&mut CurrentTask>,
683    ) {
684        if let Some(ref mut current_task) = current_task {
685            current_task.ptrace_event(
686                locked,
687                PtraceOptions::TRACEEXIT,
688                exit_status.signal_info_status() as u64,
689            );
690        }
691        let mut pids = self.kernel.pids.write();
692        let mut state = self.write();
693        if state.is_terminating() {
694            // The thread group is already terminating and all threads in the thread group have
695            // already been interrupted.
696            return;
697        }
698
699        state.run_state = ThreadGroupRunState::Terminating(exit_status.clone());
700
701        // Drop ptrace zombies
702        state.zombie_ptracees.release(&mut pids);
703
704        // Interrupt each task. Unlock the group because send_signal will lock the group in order
705        // to call set_stopped.
706        // SAFETY: tasks is kept on the stack. The static is required to ensure the lock on
707        // ThreadGroup can be dropped.
708        let tasks = state.tasks().map(TempRef::into_static).collect::<Vec<_>>();
709        drop(state);
710
711        // Detach from any ptraced tasks, killing the ones that set PTRACE_O_EXITKILL.
712        let tracees = self.ptracees.lock().keys().cloned().collect::<Vec<_>>();
713        for tracee in tracees {
714            if let Some(task_ref) = pids.get_task(tracee).clone().upgrade() {
715                let mut should_send_sigkill = false;
716                if let Some(ptrace) = &task_ref.read().ptrace {
717                    should_send_sigkill = ptrace.has_option(PtraceOptions::EXITKILL);
718                }
719                if should_send_sigkill {
720                    send_standard_signal(locked, task_ref.as_ref(), SignalInfo::kernel(SIGKILL));
721                    continue;
722                }
723
724                let _ =
725                    ptrace_detach(locked, &mut pids, self, task_ref.as_ref(), &UserAddress::NULL);
726            }
727        }
728
729        for task in tasks {
730            task.write().set_exit_status(exit_status.clone());
731            send_standard_signal(locked, &task, SignalInfo::kernel(SIGKILL));
732        }
733    }
734
735    pub fn add(&self, task: &TempRef<'_, Task>) -> Result<(), Errno> {
736        let mut state = self.write();
737        if state.is_terminating() {
738            if state.tasks_count() == 0 {
739                log_warn!(
740                    "Task {} with leader {} terminating while adding its first task, \
741                not sending creation notification",
742                    task.tid,
743                    self.leader
744                );
745            }
746            return error!(EINVAL);
747        }
748        state.tasks.insert(task.tid, task.into());
749
750        Ok(())
751    }
752
753    /// Remove the task from the children of this ThreadGroup.
754    ///
755    /// It is important that the task is taken as an `OwnedRef`. It ensures the tasks of the
756    /// ThreadGroup are always valid as they are still valid when removed.
757    pub fn remove<L>(&self, locked: &mut Locked<L>, pids: &mut PidTable, task: &OwnedRef<Task>)
758    where
759        L: LockBefore<ProcessGroupState>,
760    {
761        task.set_ptrace_zombie(pids);
762        pids.remove_task(task.tid);
763
764        let mut state = self.write();
765
766        let persistent_info: TaskPersistentInfo =
767            if let Some(container) = state.tasks.remove(&task.tid) {
768                container.into()
769            } else {
770                // The task has never been added. The only expected case is that this thread was
771                // already terminating.
772                debug_assert!(state.is_terminating());
773                return;
774            };
775
776        if state.tasks.is_empty() {
777            let exit_status =
778                if let ThreadGroupRunState::Terminating(exit_status) = &state.run_state {
779                    exit_status.clone()
780                } else {
781                    let exit_status = task.exit_status().unwrap_or_else(|| {
782                        log_error!("Exiting without an exit code.");
783                        ExitStatus::Exit(u8::MAX)
784                    });
785                    state.run_state = ThreadGroupRunState::Terminating(exit_status.clone());
786                    exit_status
787                };
788
789            // Replace PID table entry with a zombie.
790            let exit_info =
791                ProcessExitInfo { status: exit_status, exit_signal: state.exit_signal.clone() };
792            let zombie =
793                ZombieProcess::new(state.as_ref(), &persistent_info.real_creds(), exit_info);
794            pids.kill_process(self.leader, OwnedRef::downgrade(&zombie));
795
796            state.leave_process_group(locked, pids);
797
798            // I have no idea if dropping the lock here is correct, and I don't want to think about
799            // it. If problems do turn up with another thread observing an intermediate state of
800            // this exit operation, the solution is to unify locks. It should be sensible and
801            // possible for there to be a single lock that protects all (or nearly all) of the
802            // data accessed by both exit and wait. In gvisor and linux this is the lock on the
803            // equivalent of the PidTable. This is made more difficult by rust locks being
804            // containers that only lock the data they contain, but see
805            // https://docs.google.com/document/d/1YHrhBqNhU1WcrsYgGAu3JwwlVmFXPlwWHTJLAbwRebY/edit
806            // for an idea.
807            std::mem::drop(state);
808
809            // Remove the process from the cgroup2 pid table after TG lock is dropped.
810            // This function will hold the CgroupState lock which should be before the TG lock. See
811            // more in lock_cgroup2_pid_table comments.
812            self.kernel.cgroups.lock_cgroup2_pid_table().remove_process(self.into());
813
814            // We will need the immediate parent and the reaper. Once we have them, we can make
815            // sure to take the locks in the right order: parent before child.
816            let parent = self.read().parent.clone();
817            let reaper = self.find_reaper();
818
819            {
820                // Reparent the children.
821                if let Some(reaper) = reaper {
822                    let reaper = reaper.upgrade();
823                    {
824                        let mut reaper_state = reaper.write();
825                        let mut state = self.write();
826                        for (_pid, weak_child) in std::mem::take(&mut state.children) {
827                            if let Some(child) = weak_child.upgrade() {
828                                let mut child_state = child.write();
829
830                                child_state.exit_signal = Some(SIGCHLD);
831                                child_state.parent =
832                                    Some(ThreadGroupParent::new(Arc::downgrade(&reaper)));
833                                reaper_state.children.insert(child.leader, weak_child.clone());
834                            }
835                        }
836                        reaper_state.zombie_children.append(&mut state.zombie_children);
837                    }
838                    ZombiePtracees::reparent(self, &reaper);
839                } else {
840                    // If we don't have a reaper then just drop the zombies.
841                    let mut state = self.write();
842                    for zombie in state.zombie_children.drain(..) {
843                        zombie.release(pids);
844                    }
845                    state.zombie_ptracees.release(pids);
846                }
847            }
848
849            // Clear the `parent` reference now that children have been re-`parent`ed.
850            self.write().parent = None;
851
852            #[cfg(any(test, debug_assertions))]
853            {
854                let state = self.read();
855                assert!(state.zombie_children.is_empty());
856                assert!(state.zombie_ptracees.is_empty());
857            }
858
859            if let Some(ref parent) = parent {
860                let parent = parent.upgrade();
861                let mut tracer_pid = None;
862                if let Some(ptrace) = &task.read().ptrace {
863                    tracer_pid = Some(ptrace.get_pid());
864                }
865
866                let maybe_zombie = 'compute_zombie: {
867                    if let Some(tracer_pid) = tracer_pid {
868                        if let Some(ref tracer) = pids.get_task(tracer_pid).upgrade() {
869                            break 'compute_zombie tracer
870                                .thread_group()
871                                .maybe_notify_tracer(task, pids, &parent, zombie);
872                        }
873                    }
874                    Some(zombie)
875                };
876                if let Some(zombie) = maybe_zombie {
877                    parent.do_zombie_notifications(zombie);
878                }
879            } else {
880                zombie.release(pids);
881            }
882
883            // TODO: Set the error_code on the Zircon process object. Currently missing a way
884            // to do this in Zircon. Might be easier in the new execution model.
885
886            // Once the last zircon thread stops, the zircon process will also stop executing.
887
888            if let Some(parent) = parent {
889                let parent = parent.upgrade();
890                parent.check_orphans(locked, pids);
891            }
892        }
893    }
894
895    pub fn do_zombie_notifications(&self, zombie: OwnedRef<ZombieProcess>) {
896        let mut state = self.write();
897
898        state.children.remove(&zombie.pid());
899        state
900            .deferred_zombie_ptracers
901            .retain(|dzp| dzp.tracee_thread_group_key != zombie.thread_group_key);
902
903        let exit_signal = zombie.exit_info.exit_signal;
904        let mut signal_info = zombie.to_wait_result().as_signal_info();
905
906        state.zombie_children.push(zombie);
907        state.lifecycle_waiters.notify_value(ThreadGroupLifecycleWaitValue::ChildStatus);
908
909        // Send signals
910        if let Some(exit_signal) = exit_signal {
911            signal_info.signal = exit_signal;
912            state.send_signal(signal_info);
913        }
914    }
915
916    /// Notifies the tracer if appropriate.  Returns Some(zombie) if caller
917    /// needs to notify the parent, None otherwise.  The caller should probably
918    /// invoke parent.do_zombie_notifications(zombie) on the result.
919    fn maybe_notify_tracer(
920        &self,
921        tracee: &Task,
922        mut pids: &mut PidTable,
923        parent: &ThreadGroup,
924        zombie: OwnedRef<ZombieProcess>,
925    ) -> Option<OwnedRef<ZombieProcess>> {
926        if self.read().zombie_ptracees.has_tracee(tracee.tid) {
927            if self == parent {
928                // The tracer is the parent and has not consumed the
929                // notification.  Don't bother with the ptracee stuff, and just
930                // notify the parent.
931                self.write().zombie_ptracees.remove(pids, tracee.tid);
932                return Some(zombie);
933            } else {
934                // The tracer is not the parent and the tracer has not consumed
935                // the notification.
936                {
937                    // Tell the parent to expect a notification later.
938                    let mut parent_state = parent.write();
939                    parent_state
940                        .deferred_zombie_ptracers
941                        .push(DeferredZombiePTracer::new(self, tracee));
942                    parent_state.children.remove(&tracee.get_pid());
943                }
944                // Tell the tracer that there is a notification pending.
945                let mut state = self.write();
946                state.zombie_ptracees.set_parent_of(tracee.tid, Some(zombie), parent);
947                tracee.write().notify_ptracers();
948                return None;
949            }
950        } else if self == parent {
951            // The tracer is the parent and has already consumed the parent
952            // notification.  No further action required.
953            parent.write().children.remove(&tracee.tid);
954            zombie.release(&mut pids);
955            return None;
956        }
957        // The tracer is not the parent and has already consumed the parent
958        // notification.  Notify the parent.
959        Some(zombie)
960    }
961
962    /// Find the task which will adopt our children after we die.
963    fn find_reaper(&self) -> Option<ThreadGroupParent> {
964        let mut weak_parent = self.read().parent.clone()?;
965        loop {
966            weak_parent = {
967                let parent = weak_parent.upgrade();
968                let parent_state = parent.read();
969                if parent_state.is_child_subreaper {
970                    break;
971                }
972                match parent_state.parent {
973                    Some(ref next_parent) => next_parent.clone(),
974                    None => break,
975                }
976            };
977        }
978        Some(weak_parent)
979    }
980
981    pub fn setsid<L>(&self, locked: &mut Locked<L>) -> Result<(), Errno>
982    where
983        L: LockBefore<ProcessGroupState>,
984    {
985        let pids = self.kernel.pids.read();
986        if pids.get_process_group(self.leader).is_some() {
987            return error!(EPERM);
988        }
989        let process_group = ProcessGroup::new(self.leader, None);
990        pids.add_process_group(process_group.clone());
991        self.write().set_process_group(locked, process_group, &pids);
992        self.check_orphans(locked, &pids);
993
994        Ok(())
995    }
996
997    pub fn setpgid<L>(
998        &self,
999        locked: &mut Locked<L>,
1000        current_task: &CurrentTask,
1001        target: &Task,
1002        pgid: pid_t,
1003    ) -> Result<(), Errno>
1004    where
1005        L: LockBefore<ProcessGroupState>,
1006    {
1007        let pids = self.kernel.pids.read();
1008
1009        {
1010            let current_process_group = Arc::clone(&self.read().process_group);
1011
1012            // The target process must be either the current process of a child of the current process
1013            let mut target_thread_group = target.thread_group().write();
1014            let is_target_current_process_child =
1015                target_thread_group.parent.as_ref().map(|tg| tg.upgrade().leader)
1016                    == Some(self.leader);
1017            if target_thread_group.leader() != self.leader && !is_target_current_process_child {
1018                return error!(ESRCH);
1019            }
1020
1021            // If the target process is a child of the current task, it must not have executed one of the exec
1022            // function.
1023            if is_target_current_process_child && target_thread_group.did_exec {
1024                return error!(EACCES);
1025            }
1026
1027            let new_process_group;
1028            {
1029                let target_process_group = &target_thread_group.process_group;
1030
1031                // The target process must not be a session leader and must be in the same session as the current process.
1032                if target_thread_group.leader() == target_process_group.session.leader
1033                    || current_process_group.session != target_process_group.session
1034                {
1035                    return error!(EPERM);
1036                }
1037
1038                let target_pgid = if pgid == 0 { target_thread_group.leader() } else { pgid };
1039                if target_pgid < 0 {
1040                    return error!(EINVAL);
1041                }
1042
1043                if target_pgid == target_process_group.leader {
1044                    return Ok(());
1045                }
1046
1047                // If pgid is not equal to the target process id, the associated process group must exist
1048                // and be in the same session as the target process.
1049                if target_pgid != target_thread_group.leader() {
1050                    new_process_group =
1051                        pids.get_process_group(target_pgid).ok_or_else(|| errno!(EPERM))?;
1052                    if new_process_group.session != target_process_group.session {
1053                        return error!(EPERM);
1054                    }
1055                    security::check_setpgid_access(current_task, target)?;
1056                } else {
1057                    security::check_setpgid_access(current_task, target)?;
1058                    // Create a new process group
1059                    new_process_group =
1060                        ProcessGroup::new(target_pgid, Some(target_process_group.session.clone()));
1061                    pids.add_process_group(new_process_group.clone());
1062                }
1063            }
1064
1065            target_thread_group.set_process_group(locked, new_process_group, &pids);
1066        }
1067
1068        target.thread_group().check_orphans(locked, &pids);
1069
1070        Ok(())
1071    }
1072
1073    fn itimer_real(&self) -> IntervalTimerHandle {
1074        self.timers.itimer_real()
1075    }
1076
1077    pub fn set_itimer(
1078        &self,
1079        current_task: &CurrentTask,
1080        which: u32,
1081        value: itimerval,
1082    ) -> Result<itimerval, Errno> {
1083        if which == ITIMER_PROF || which == ITIMER_VIRTUAL {
1084            // We don't support setting these timers.
1085            // The gvisor test suite clears ITIMER_PROF as part of its test setup logic, so we support
1086            // clearing these values.
1087            if value.it_value.tv_sec == 0 && value.it_value.tv_usec == 0 {
1088                return Ok(itimerval::default());
1089            }
1090            track_stub!(TODO("https://fxbug.dev/322874521"), "Unsupported itimer type", which);
1091            return error!(ENOTSUP);
1092        }
1093
1094        if which != ITIMER_REAL {
1095            return error!(EINVAL);
1096        }
1097        let itimer_real = self.itimer_real();
1098        let prev_remaining = itimer_real.time_remaining();
1099        if value.it_value.tv_sec != 0 || value.it_value.tv_usec != 0 {
1100            itimer_real.arm(current_task, itimerspec_from_itimerval(value), false)?;
1101        } else {
1102            itimer_real.disarm(current_task)?;
1103        }
1104        Ok(itimerval {
1105            it_value: timeval_from_duration(prev_remaining.remainder),
1106            it_interval: timeval_from_duration(prev_remaining.interval),
1107        })
1108    }
1109
1110    pub fn get_itimer(&self, which: u32) -> Result<itimerval, Errno> {
1111        if which == ITIMER_PROF || which == ITIMER_VIRTUAL {
1112            // We don't support setting these timers, so we can accurately report that these are not set.
1113            return Ok(itimerval::default());
1114        }
1115        if which != ITIMER_REAL {
1116            return error!(EINVAL);
1117        }
1118        let remaining = self.itimer_real().time_remaining();
1119        Ok(itimerval {
1120            it_value: timeval_from_duration(remaining.remainder),
1121            it_interval: timeval_from_duration(remaining.interval),
1122        })
1123    }
1124
1125    /// Check whether the stop state is compatible with `new_stopped`. If it is return it,
1126    /// otherwise, return None.
1127    fn check_stopped_state(
1128        &self,
1129        new_stopped: StopState,
1130        finalize_only: bool,
1131    ) -> Option<StopState> {
1132        let stopped = self.load_stopped();
1133        if finalize_only && !stopped.is_stopping_or_stopped() {
1134            return Some(stopped);
1135        }
1136
1137        if stopped.is_illegal_transition(new_stopped) {
1138            return Some(stopped);
1139        }
1140
1141        return None;
1142    }
1143
1144    /// Set the stop status of the process.  If you pass |siginfo| of |None|,
1145    /// does not update the signal.  If |finalize_only| is set, will check that
1146    /// the set will be a finalize (Stopping -> Stopped or Stopped -> Stopped)
1147    /// before executing it.
1148    ///
1149    /// Returns the latest stop state after any changes.
1150    pub fn set_stopped(
1151        &self,
1152        new_stopped: StopState,
1153        siginfo: Option<SignalInfo>,
1154        finalize_only: bool,
1155    ) -> StopState {
1156        // Perform an early return check to see if we can avoid taking the lock.
1157        if let Some(stopped) = self.check_stopped_state(new_stopped, finalize_only) {
1158            return stopped;
1159        }
1160
1161        self.write().set_stopped(new_stopped, siginfo, finalize_only)
1162    }
1163
1164    /// Ensures |session| is the controlling session inside of |terminal_controller|, and returns a
1165    /// reference to the |TerminalController|.
1166    fn check_terminal_controller(
1167        session: &Arc<Session>,
1168        terminal_controller: &Option<TerminalController>,
1169    ) -> Result<(), Errno> {
1170        if let Some(terminal_controller) = terminal_controller {
1171            if let Some(terminal_session) = terminal_controller.session.upgrade() {
1172                if Arc::ptr_eq(session, &terminal_session) {
1173                    return Ok(());
1174                }
1175            }
1176        }
1177        error!(ENOTTY)
1178    }
1179
1180    pub fn get_foreground_process_group(&self, terminal: &Terminal) -> Result<pid_t, Errno> {
1181        let state = self.read();
1182        let process_group = &state.process_group;
1183        let terminal_state = terminal.read();
1184
1185        // "When fd does not refer to the controlling terminal of the calling
1186        // process, -1 is returned" - tcgetpgrp(3)
1187        Self::check_terminal_controller(&process_group.session, &terminal_state.controller)?;
1188        let pid = process_group.session.read().get_foreground_process_group_leader();
1189        Ok(pid)
1190    }
1191
1192    pub fn set_foreground_process_group<L>(
1193        &self,
1194        locked: &mut Locked<L>,
1195        current_task: &CurrentTask,
1196        terminal: &Terminal,
1197        pgid: pid_t,
1198    ) -> Result<(), Errno>
1199    where
1200        L: LockBefore<ProcessGroupState>,
1201    {
1202        let process_group;
1203        let send_ttou;
1204        {
1205            // Keep locks to ensure atomicity.
1206            let pids = self.kernel.pids.read();
1207            let state = self.read();
1208            process_group = Arc::clone(&state.process_group);
1209            let terminal_state = terminal.read();
1210            Self::check_terminal_controller(&process_group.session, &terminal_state.controller)?;
1211
1212            // pgid must be positive.
1213            if pgid < 0 {
1214                return error!(EINVAL);
1215            }
1216
1217            let new_process_group = pids.get_process_group(pgid).ok_or_else(|| errno!(ESRCH))?;
1218            if new_process_group.session != process_group.session {
1219                return error!(EPERM);
1220            }
1221
1222            let mut session_state = process_group.session.write();
1223            // If the calling process is a member of a background group and not ignoring SIGTTOU, a
1224            // SIGTTOU signal is sent to all members of this background process group.
1225            send_ttou = process_group.leader != session_state.get_foreground_process_group_leader()
1226                && !current_task.read().signal_mask().has_signal(SIGTTOU)
1227                && self.signal_actions.get(SIGTTOU).sa_handler != SIG_IGN;
1228
1229            if !send_ttou {
1230                session_state.set_foreground_process_group(&new_process_group);
1231            }
1232        }
1233
1234        // Locks must not be held when sending signals.
1235        if send_ttou {
1236            process_group.send_signals(locked, &[SIGTTOU]);
1237            return error!(EINTR);
1238        }
1239
1240        Ok(())
1241    }
1242
1243    pub fn set_controlling_terminal(
1244        &self,
1245        current_task: &CurrentTask,
1246        terminal: &Terminal,
1247        is_main: bool,
1248        steal: bool,
1249        is_readable: bool,
1250    ) -> Result<(), Errno> {
1251        // Keep locks to ensure atomicity.
1252        let state = self.read();
1253        let process_group = &state.process_group;
1254        let mut terminal_state = terminal.write();
1255        let mut session_writer = process_group.session.write();
1256
1257        // "The calling process must be a session leader and not have a
1258        // controlling terminal already." - tty_ioctl(4)
1259        if process_group.session.leader != self.leader
1260            || session_writer.controlling_terminal.is_some()
1261        {
1262            return error!(EINVAL);
1263        }
1264
1265        let mut has_admin_capability_determined = false;
1266
1267        // "If this terminal is already the controlling terminal of a different
1268        // session group, then the ioctl fails with EPERM, unless the caller
1269        // has the CAP_SYS_ADMIN capability and arg equals 1, in which case the
1270        // terminal is stolen, and all processes that had it as controlling
1271        // terminal lose it." - tty_ioctl(4)
1272        if let Some(other_session) =
1273            terminal_state.controller.as_ref().and_then(|cs| cs.session.upgrade())
1274        {
1275            if other_session != process_group.session {
1276                if !steal {
1277                    return error!(EPERM);
1278                }
1279                security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1280                has_admin_capability_determined = true;
1281
1282                // Steal the TTY away. Unlike TIOCNOTTY, don't send signals.
1283                other_session.write().controlling_terminal = None;
1284            }
1285        }
1286
1287        if !is_readable && !has_admin_capability_determined {
1288            security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1289        }
1290
1291        session_writer.controlling_terminal = Some(ControllingTerminal::new(terminal, is_main));
1292        terminal_state.controller = TerminalController::new(&process_group.session);
1293        Ok(())
1294    }
1295
1296    pub fn release_controlling_terminal<L>(
1297        &self,
1298        locked: &mut Locked<L>,
1299        _current_task: &CurrentTask,
1300        terminal: &Terminal,
1301        is_main: bool,
1302    ) -> Result<(), Errno>
1303    where
1304        L: LockBefore<ProcessGroupState>,
1305    {
1306        let process_group;
1307        {
1308            // Keep locks to ensure atomicity.
1309            let state = self.read();
1310            process_group = Arc::clone(&state.process_group);
1311            let mut terminal_state = terminal.write();
1312            let mut session_writer = process_group.session.write();
1313
1314            // tty must be the controlling terminal.
1315            Self::check_terminal_controller(&process_group.session, &terminal_state.controller)?;
1316            if !session_writer
1317                .controlling_terminal
1318                .as_ref()
1319                .map_or(false, |ct| ct.matches(terminal, is_main))
1320            {
1321                return error!(ENOTTY);
1322            }
1323
1324            // "If the process was session leader, then send SIGHUP and SIGCONT to the foreground
1325            // process group and all processes in the current session lose their controlling terminal."
1326            // - tty_ioctl(4)
1327
1328            // Remove tty as the controlling tty for each process in the session, then
1329            // send them SIGHUP and SIGCONT.
1330
1331            session_writer.controlling_terminal = None;
1332            terminal_state.controller = None;
1333        }
1334
1335        if process_group.session.leader == self.leader {
1336            process_group.send_signals(locked, &[SIGHUP, SIGCONT]);
1337        }
1338
1339        Ok(())
1340    }
1341
1342    fn check_orphans<L>(&self, locked: &mut Locked<L>, pids: &PidTable)
1343    where
1344        L: LockBefore<ProcessGroupState>,
1345    {
1346        let mut thread_groups = self.read().children().collect::<Vec<_>>();
1347        let this = self.weak_self.upgrade().unwrap();
1348        thread_groups.push(this);
1349        let process_groups =
1350            thread_groups.iter().map(|tg| Arc::clone(&tg.read().process_group)).unique();
1351        for pg in process_groups {
1352            pg.check_orphaned(locked, pids);
1353        }
1354    }
1355
1356    pub fn get_rlimit<L>(&self, locked: &mut Locked<L>, resource: Resource) -> u64
1357    where
1358        L: LockBefore<ThreadGroupLimits>,
1359    {
1360        self.limits.lock(locked).get(resource).rlim_cur
1361    }
1362
1363    /// Adjusts the rlimits of the ThreadGroup to which `target_task` belongs to.
1364    pub fn adjust_rlimits<L>(
1365        locked: &mut Locked<L>,
1366        current_task: &CurrentTask,
1367        target_task: &Task,
1368        resource: Resource,
1369        maybe_new_limit: Option<rlimit>,
1370    ) -> Result<rlimit, Errno>
1371    where
1372        L: LockBefore<ThreadGroupLimits>,
1373    {
1374        let thread_group = target_task.thread_group();
1375        let can_increase_rlimit = security::is_task_capable_noaudit(current_task, CAP_SYS_RESOURCE);
1376        let mut limit_state = thread_group.limits.lock(locked);
1377        let old_limit = limit_state.get(resource);
1378        if let Some(new_limit) = maybe_new_limit {
1379            if new_limit.rlim_max > old_limit.rlim_max && !can_increase_rlimit {
1380                return error!(EPERM);
1381            }
1382            security::task_setrlimit(current_task, &target_task, old_limit, new_limit)?;
1383            limit_state.set(resource, new_limit)
1384        }
1385        Ok(old_limit)
1386    }
1387
1388    pub fn time_stats(&self) -> TaskTimeStats {
1389        let process: &zx::Process = if self.process.as_handle_ref().is_invalid() {
1390            // `process` must be valid for all tasks, except `kthreads`. In that case get the
1391            // stats from starnix process.
1392            assert_eq!(
1393                self as *const ThreadGroup,
1394                Arc::as_ptr(&self.kernel.kthreads.system_thread_group())
1395            );
1396            &self.kernel.kthreads.starnix_process
1397        } else {
1398            &self.process
1399        };
1400
1401        let info =
1402            zx::Task::get_runtime_info(process).expect("Failed to get starnix process stats");
1403        TaskTimeStats {
1404            user_time: zx::MonotonicDuration::from_nanos(info.cpu_time),
1405            // TODO(https://fxbug.dev/42078242): How can we calculate system time?
1406            system_time: zx::MonotonicDuration::default(),
1407        }
1408    }
1409
1410    /// For each task traced by this thread_group that matches the given
1411    /// selector, acquire its TaskMutableState and ptracees lock and execute the
1412    /// given function.
1413    pub fn get_ptracees_and(
1414        &self,
1415        selector: &ProcessSelector,
1416        pids: &PidTable,
1417        f: &mut dyn FnMut(&Task, &TaskMutableState),
1418    ) {
1419        for tracee in self
1420            .ptracees
1421            .lock()
1422            .keys()
1423            .filter(|tracee_tid| selector.match_tid(**tracee_tid, &pids))
1424            .map(|tracee_tid| pids.get_task(*tracee_tid))
1425        {
1426            if let Some(task_ref) = tracee.clone().upgrade() {
1427                let task_state = task_ref.write();
1428                if task_state.ptrace.is_some() {
1429                    f(&task_ref, &task_state);
1430                }
1431            }
1432        }
1433    }
1434
1435    /// Returns a tracee whose state has changed, so that waitpid can report on
1436    /// it. If this returns a value, and the pid is being traced, the tracer
1437    /// thread is deemed to have seen the tracee ptrace-stop for the purposes of
1438    /// PTRACE_LISTEN.
1439    pub fn get_waitable_ptracee(
1440        &self,
1441        selector: &ProcessSelector,
1442        options: &WaitingOptions,
1443        pids: &mut PidTable,
1444    ) -> Option<WaitResult> {
1445        // This checks to see if the target is a zombie ptracee.
1446        let waitable_entry = self.write().zombie_ptracees.get_waitable_entry(selector, options);
1447        match waitable_entry {
1448            None => (),
1449            Some((zombie, None)) => return Some(zombie.to_wait_result()),
1450            Some((zombie, Some((tg, z)))) => {
1451                if let Some(tg) = tg.upgrade() {
1452                    if Arc::as_ptr(&tg) != self as *const Self {
1453                        tg.do_zombie_notifications(z);
1454                    } else {
1455                        {
1456                            let mut state = tg.write();
1457                            state.children.remove(&z.pid());
1458                            state
1459                                .deferred_zombie_ptracers
1460                                .retain(|dzp| dzp.tracee_thread_group_key != z.thread_group_key);
1461                        }
1462
1463                        z.release(pids);
1464                    };
1465                }
1466                return Some(zombie.to_wait_result());
1467            }
1468        }
1469
1470        let mut tasks = vec![];
1471
1472        // This checks to see if the target is a living ptracee
1473        self.get_ptracees_and(selector, pids, &mut |task: &Task, _| {
1474            tasks.push(task.weak_self.clone());
1475        });
1476        for task in tasks {
1477            let Some(task_ref) = task.upgrade() else {
1478                continue;
1479            };
1480
1481            let process_state = &mut task_ref.thread_group().write();
1482            let mut task_state = task_ref.write();
1483            if task_state
1484                .ptrace
1485                .as_ref()
1486                .is_some_and(|ptrace| ptrace.is_waitable(task_ref.load_stopped(), options))
1487            {
1488                // We've identified a potential target.  Need to return either
1489                // the process's information (if we are in group-stop) or the
1490                // thread's information (if we are in a different stop).
1491
1492                // The shared information:
1493                let mut pid: i32 = 0;
1494                let info = process_state.tasks.values().next().unwrap().info().clone();
1495                let uid = info.real_creds().uid;
1496                let mut exit_status = None;
1497                let exit_signal = process_state.exit_signal.clone();
1498                let time_stats =
1499                    process_state.base.time_stats() + process_state.children_time_stats;
1500                let task_stopped = task_ref.load_stopped();
1501
1502                #[derive(PartialEq)]
1503                enum ExitType {
1504                    None,
1505                    Cont,
1506                    Stop,
1507                    Kill,
1508                }
1509                if process_state.is_waitable() {
1510                    let ptrace = &mut task_state.ptrace;
1511                    // The information for processes, if we were in group stop.
1512                    let process_stopped = process_state.base.load_stopped();
1513                    let mut fn_type = ExitType::None;
1514                    if process_stopped == StopState::Awake && options.wait_for_continued {
1515                        fn_type = ExitType::Cont;
1516                    }
1517                    let mut event = ptrace
1518                        .as_ref()
1519                        .map_or(PtraceEvent::None, |ptrace| {
1520                            ptrace.event_data.as_ref().map_or(PtraceEvent::None, |data| data.event)
1521                        })
1522                        .clone();
1523                    // Tasks that are ptrace'd always get stop notifications.
1524                    if process_stopped == StopState::GroupStopped
1525                        && (options.wait_for_stopped || ptrace.is_some())
1526                    {
1527                        fn_type = ExitType::Stop;
1528                    }
1529                    if fn_type != ExitType::None {
1530                        let siginfo = if options.keep_waitable_state {
1531                            process_state.last_signal.clone()
1532                        } else {
1533                            process_state.last_signal.take()
1534                        };
1535                        if let Some(mut siginfo) = siginfo {
1536                            if task_ref.thread_group().load_stopped() == StopState::GroupStopped
1537                                && ptrace.as_ref().is_some_and(|ptrace| ptrace.is_seized())
1538                            {
1539                                if event == PtraceEvent::None {
1540                                    event = PtraceEvent::Stop;
1541                                }
1542                                siginfo.code |= (PtraceEvent::Stop as i32) << 8;
1543                            }
1544                            if siginfo.signal == SIGKILL {
1545                                fn_type = ExitType::Kill;
1546                            }
1547                            exit_status = match fn_type {
1548                                ExitType::Stop => Some(ExitStatus::Stop(siginfo, event)),
1549                                ExitType::Cont => Some(ExitStatus::Continue(siginfo, event)),
1550                                ExitType::Kill => Some(ExitStatus::Kill(siginfo)),
1551                                _ => None,
1552                            };
1553                        }
1554                        // Clear the wait status of the ptrace, because we're
1555                        // using the tg status instead.
1556                        ptrace
1557                            .as_mut()
1558                            .map(|ptrace| ptrace.get_last_signal(options.keep_waitable_state));
1559                    }
1560                    pid = process_state.base.leader;
1561                }
1562                if exit_status == None {
1563                    if let Some(ptrace) = task_state.ptrace.as_mut() {
1564                        // The information for the task, if we were in a non-group stop.
1565                        let mut fn_type = ExitType::None;
1566                        let event = ptrace
1567                            .event_data
1568                            .as_ref()
1569                            .map_or(PtraceEvent::None, |event| event.event);
1570                        if task_stopped == StopState::Awake {
1571                            fn_type = ExitType::Cont;
1572                        }
1573                        if task_stopped.is_stopping_or_stopped()
1574                            || ptrace.stop_status == PtraceStatus::Listening
1575                        {
1576                            fn_type = ExitType::Stop;
1577                        }
1578                        if fn_type != ExitType::None {
1579                            if let Some(siginfo) =
1580                                ptrace.get_last_signal(options.keep_waitable_state)
1581                            {
1582                                if siginfo.signal == SIGKILL {
1583                                    fn_type = ExitType::Kill;
1584                                }
1585                                exit_status = match fn_type {
1586                                    ExitType::Stop => Some(ExitStatus::Stop(siginfo, event)),
1587                                    ExitType::Cont => Some(ExitStatus::Continue(siginfo, event)),
1588                                    ExitType::Kill => Some(ExitStatus::Kill(siginfo)),
1589                                    _ => None,
1590                                };
1591                            }
1592                        }
1593                        pid = task_ref.get_tid();
1594                    }
1595                }
1596                if let Some(exit_status) = exit_status {
1597                    return Some(WaitResult {
1598                        pid,
1599                        uid,
1600                        exit_info: ProcessExitInfo { status: exit_status, exit_signal },
1601                        time_stats,
1602                    });
1603                }
1604            }
1605        }
1606        None
1607    }
1608
1609    /// Attempts to send an unchecked signal to this thread group.
1610    ///
1611    /// - `current_task`: The task that is sending the signal.
1612    /// - `unchecked_signal`: The signal that is to be sent. Unchecked, since `0` is a sentinel value
1613    /// where rights are to be checked but no signal is actually sent.
1614    ///
1615    /// # Returns
1616    /// Returns Ok(()) if the signal was sent, or the permission checks passed with a 0 signal, otherwise
1617    /// the error that was encountered.
1618    pub fn send_signal_unchecked(
1619        &self,
1620        current_task: &CurrentTask,
1621        unchecked_signal: UncheckedSignal,
1622    ) -> Result<(), Errno> {
1623        if let Some(signal) = self.check_signal_access(current_task, unchecked_signal)? {
1624            let signal_info = SignalInfo::with_detail(
1625                signal,
1626                SI_USER as i32,
1627                SignalDetail::Kill {
1628                    pid: current_task.thread_group().leader,
1629                    uid: current_task.current_creds().uid,
1630                },
1631            );
1632
1633            self.write().send_signal(signal_info);
1634        }
1635
1636        Ok(())
1637    }
1638
1639    /// Sends a signal to this thread_group without performing any access checks.
1640    ///
1641    /// # Safety
1642    /// This is unsafe, because it should only be called by tools and tests.
1643    pub unsafe fn send_signal_unchecked_debug(
1644        &self,
1645        current_task: &CurrentTask,
1646        unchecked_signal: UncheckedSignal,
1647    ) -> Result<(), Errno> {
1648        let signal = Signal::try_from(unchecked_signal)?;
1649        let signal_info = SignalInfo::with_detail(
1650            signal,
1651            SI_USER as i32,
1652            SignalDetail::Kill {
1653                pid: current_task.thread_group().leader,
1654                uid: current_task.current_creds().uid,
1655            },
1656        );
1657
1658        self.write().send_signal(signal_info);
1659        Ok(())
1660    }
1661
1662    /// Attempts to send an unchecked signal to this thread group, with info read from
1663    /// `siginfo_ref`.
1664    ///
1665    /// - `current_task`: The task that is sending the signal.
1666    /// - `unchecked_signal`: The signal that is to be sent. Unchecked, since `0` is a sentinel value
1667    /// where rights are to be checked but no signal is actually sent.
1668    /// - `siginfo_ref`: The siginfo that will be enqueued.
1669    /// - `options`: Options for how to convert the siginfo into a signal info.
1670    ///
1671    /// # Returns
1672    /// Returns Ok(()) if the signal was sent, or the permission checks passed with a 0 signal, otherwise
1673    /// the error that was encountered.
1674    #[track_caller]
1675    pub fn send_signal_unchecked_with_info(
1676        &self,
1677        current_task: &CurrentTask,
1678        unchecked_signal: UncheckedSignal,
1679        siginfo_ref: UserAddress,
1680        options: IntoSignalInfoOptions,
1681    ) -> Result<(), Errno> {
1682        if let Some(signal) = self.check_signal_access(current_task, unchecked_signal)? {
1683            let siginfo = UncheckedSignalInfo::read_from_siginfo(current_task, siginfo_ref)?;
1684            if self.leader != current_task.get_pid()
1685                && (siginfo.code() >= 0 || siginfo.code() == SI_TKILL)
1686            {
1687                return error!(EPERM);
1688            }
1689
1690            self.write().send_signal(siginfo.into_signal_info(signal, options)?);
1691        }
1692
1693        Ok(())
1694    }
1695
1696    /// Checks whether or not `current_task` can signal this thread group with `unchecked_signal`.
1697    ///
1698    /// Returns:
1699    ///   - `Ok(Some(Signal))` if the signal passed checks and should be sent.
1700    ///   - `Ok(None)` if the signal passed checks, but should not be sent. This is used by
1701    ///   userspace for permission checks.
1702    ///   - `Err(_)` if the permission checks failed.
1703    fn check_signal_access(
1704        &self,
1705        current_task: &CurrentTask,
1706        unchecked_signal: UncheckedSignal,
1707    ) -> Result<Option<Signal>, Errno> {
1708        // Pick an arbitrary task in thread_group to check permissions.
1709        //
1710        // Tasks can technically have different credentials, but in practice they are kept in sync.
1711        let state = self.read();
1712        let target_task = state.get_live_task()?;
1713        current_task.can_signal(&target_task, unchecked_signal)?;
1714
1715        // 0 is a sentinel value used to do permission checks.
1716        if unchecked_signal.is_zero() {
1717            return Ok(None);
1718        }
1719
1720        let signal = Signal::try_from(unchecked_signal)?;
1721        security::check_signal_access(current_task, &target_task, signal)?;
1722
1723        Ok(Some(signal))
1724    }
1725
1726    pub fn has_signal_queued(&self, signal: Signal) -> bool {
1727        self.pending_signals.lock().has_queued(signal)
1728    }
1729
1730    pub fn num_signals_queued(&self) -> usize {
1731        self.pending_signals.lock().num_queued()
1732    }
1733
1734    pub fn get_pending_signals(&self) -> SigSet {
1735        self.pending_signals.lock().pending()
1736    }
1737
1738    pub fn is_any_signal_allowed_by_mask(&self, mask: SigSet) -> bool {
1739        self.pending_signals.lock().is_any_allowed_by_mask(mask)
1740    }
1741
1742    pub fn take_next_signal_where<F>(&self, predicate: F) -> Option<SignalInfo>
1743    where
1744        F: Fn(&SignalInfo) -> bool,
1745    {
1746        let mut signals = self.pending_signals.lock();
1747        let r = signals.take_next_where(predicate);
1748        self.has_pending_signals.store(!signals.is_empty(), Ordering::Relaxed);
1749        r
1750    }
1751
1752    /// Drive this `ThreadGroup` to exit, allowing it time to handle SIGTERM before sending SIGKILL.
1753    ///
1754    /// Returns once `ThreadGroup::exit()` has completed.
1755    ///
1756    /// Must be called from the system task.
1757    pub async fn shut_down(this: Weak<Self>) {
1758        const SHUTDOWN_SIGNAL_HANDLING_TIMEOUT: zx::MonotonicDuration =
1759            zx::MonotonicDuration::from_seconds(1);
1760
1761        // Prepare for shutting down the thread group.
1762        let (tg_name, mut on_exited) = {
1763            // Nest this upgraded access so TempRefs aren't held across await-points.
1764            let Some(this) = this.upgrade() else {
1765                return;
1766            };
1767
1768            // Register a channel to be notified when exit() is complete.
1769            let (on_exited_send, on_exited) = futures::channel::oneshot::channel();
1770            this.write().exit_notifier = Some(on_exited_send);
1771
1772            // We want to be able to log about this thread group without upgrading the WeakRef.
1773            let tg_name = format!("{this:?}");
1774
1775            (tg_name, on_exited)
1776        };
1777
1778        log_debug!(tg:% = tg_name; "shutting down thread group, sending SIGTERM");
1779        this.upgrade().map(|tg| tg.write().send_signal(SignalInfo::kernel(SIGTERM)));
1780
1781        // Give thread groups some time to handle SIGTERM, proceeding early if they exit
1782        let timeout = fuchsia_async::Timer::new(SHUTDOWN_SIGNAL_HANDLING_TIMEOUT);
1783        futures::pin_mut!(timeout);
1784
1785        // Use select_biased instead of on_timeout() so that we can await on on_exited later
1786        futures::select_biased! {
1787            _ = &mut on_exited => (),
1788            _ = timeout => {
1789                log_debug!(tg:% = tg_name; "sending SIGKILL");
1790                this.upgrade().map(|tg| tg.write().send_signal(SignalInfo::kernel(SIGKILL)));
1791            },
1792        };
1793
1794        log_debug!(tg:% = tg_name; "waiting for exit");
1795        // It doesn't matter whether ThreadGroup::exit() was called or the process exited with
1796        // a return code and dropped the sender end of the channel.
1797        on_exited.await.ok();
1798        log_debug!(tg:% = tg_name; "thread group shutdown complete");
1799    }
1800
1801    /// Returns the KOID of the process for this thread group.
1802    /// This method should be used to when mapping 32 bit linux process ids to KOIDs
1803    /// to avoid breaking the encapsulation of the zx::process within the ThreadGroup.
1804    /// This encapsulation is important since the relationship between the ThreadGroup
1805    /// and the Process may change over time. See [ThreadGroup::process] for more details.
1806    pub fn get_process_koid(&self) -> Result<Koid, Status> {
1807        self.process.koid()
1808    }
1809}
1810
1811pub enum WaitableChildResult {
1812    ReadyNow(Box<WaitResult>),
1813    ShouldWait,
1814    NoneFound,
1815}
1816
1817#[apply(state_implementation!)]
1818impl ThreadGroupMutableState<Base = ThreadGroup> {
1819    pub fn leader(&self) -> pid_t {
1820        self.base.leader
1821    }
1822
1823    pub fn leader_command(&self) -> TaskCommand {
1824        self.get_task(self.leader())
1825            .map(|l| l.command())
1826            .unwrap_or_else(|| TaskCommand::new(b"<leader exited>"))
1827    }
1828
1829    pub fn is_terminating(&self) -> bool {
1830        !matches!(self.run_state, ThreadGroupRunState::Running)
1831    }
1832
1833    pub fn children(&self) -> impl Iterator<Item = Arc<ThreadGroup>> + '_ {
1834        self.children.values().map(|v| {
1835            v.upgrade().expect("Weak references to processes in ThreadGroup must always be valid")
1836        })
1837    }
1838
1839    pub fn tasks(&self) -> impl Iterator<Item = TempRef<'_, Task>> + '_ {
1840        self.tasks.values().flat_map(|t| t.upgrade())
1841    }
1842
1843    pub fn task_ids(&self) -> impl Iterator<Item = &tid_t> {
1844        self.tasks.keys()
1845    }
1846
1847    pub fn contains_task(&self, tid: tid_t) -> bool {
1848        self.tasks.contains_key(&tid)
1849    }
1850
1851    pub fn get_task(&self, tid: tid_t) -> Option<TempRef<'_, Task>> {
1852        self.tasks.get(&tid).and_then(|t| t.upgrade())
1853    }
1854
1855    pub fn tasks_count(&self) -> usize {
1856        self.tasks.len()
1857    }
1858
1859    pub fn get_ppid(&self) -> pid_t {
1860        match &self.parent {
1861            Some(parent) => parent.upgrade().leader,
1862            None => 0,
1863        }
1864    }
1865
1866    fn set_process_group<L>(
1867        &mut self,
1868        locked: &mut Locked<L>,
1869        process_group: Arc<ProcessGroup>,
1870        pids: &PidTable,
1871    ) where
1872        L: LockBefore<ProcessGroupState>,
1873    {
1874        if self.process_group == process_group {
1875            return;
1876        }
1877        self.leave_process_group(locked, pids);
1878        self.process_group = process_group;
1879        self.process_group.insert(locked, self.base);
1880    }
1881
1882    fn leave_process_group<L>(&mut self, locked: &mut Locked<L>, pids: &PidTable)
1883    where
1884        L: LockBefore<ProcessGroupState>,
1885    {
1886        if self.process_group.remove(locked, self.base) {
1887            self.process_group.session.write().remove(self.process_group.leader);
1888            pids.remove_process_group(self.process_group.leader);
1889        }
1890    }
1891
1892    /// Indicates whether the thread group is waitable via waitid and waitpid for
1893    /// either WSTOPPED or WCONTINUED.
1894    pub fn is_waitable(&self) -> bool {
1895        return self.last_signal.is_some() && !self.base.load_stopped().is_in_progress();
1896    }
1897
1898    pub fn get_waitable_zombie(
1899        &mut self,
1900        zombie_list: &dyn Fn(&mut ThreadGroupMutableState) -> &mut Vec<OwnedRef<ZombieProcess>>,
1901        selector: &ProcessSelector,
1902        options: &WaitingOptions,
1903        pids: &mut PidTable,
1904    ) -> Option<WaitResult> {
1905        // We look for the last zombie in the vector that matches pid selector and waiting options
1906        let selected_zombie_position = zombie_list(self)
1907            .iter()
1908            .rev()
1909            .position(|zombie| zombie.matches_selector_and_waiting_option(selector, options))
1910            .map(|position_starting_from_the_back| {
1911                zombie_list(self).len() - 1 - position_starting_from_the_back
1912            });
1913
1914        selected_zombie_position.map(|position| {
1915            if options.keep_waitable_state {
1916                zombie_list(self)[position].to_wait_result()
1917            } else {
1918                let zombie = zombie_list(self).remove(position);
1919                self.children_time_stats += zombie.time_stats;
1920                let result = zombie.to_wait_result();
1921                zombie.release(pids);
1922                result
1923            }
1924        })
1925    }
1926
1927    pub fn is_correct_exit_signal(for_clone: bool, exit_code: Option<Signal>) -> bool {
1928        for_clone == (exit_code != Some(SIGCHLD))
1929    }
1930
1931    fn get_waitable_running_children(
1932        &self,
1933        selector: &ProcessSelector,
1934        options: &WaitingOptions,
1935        pids: &PidTable,
1936    ) -> WaitableChildResult {
1937        // The children whose pid matches the pid selector queried.
1938        let filter_children_by_pid_selector = |child: &ThreadGroup| match *selector {
1939            ProcessSelector::Any => true,
1940            ProcessSelector::Pid(pid) => child.leader == pid,
1941            ProcessSelector::Pgid(pgid) => {
1942                pids.get_process_group(pgid).as_ref() == Some(&child.read().process_group)
1943            }
1944            ProcessSelector::Process(ref key) => *key == ThreadGroupKey::from(child),
1945        };
1946
1947        // The children whose exit signal matches the waiting options queried.
1948        let filter_children_by_waiting_options = |child: &ThreadGroup| {
1949            if options.wait_for_all {
1950                return true;
1951            }
1952            Self::is_correct_exit_signal(options.wait_for_clone, child.read().exit_signal)
1953        };
1954
1955        // If wait_for_exited flag is disabled or no terminated children were found we look for living children.
1956        let mut selected_children = self
1957            .children
1958            .values()
1959            .map(|t| t.upgrade().unwrap())
1960            .filter(|tg| filter_children_by_pid_selector(&tg))
1961            .filter(|tg| filter_children_by_waiting_options(&tg))
1962            .peekable();
1963        if selected_children.peek().is_none() {
1964            // There still might be a process that ptrace hasn't looked at yet.
1965            if self.deferred_zombie_ptracers.iter().any(|dzp| match *selector {
1966                ProcessSelector::Any => true,
1967                ProcessSelector::Pid(pid) => dzp.tracee_thread_group_key.pid() == pid,
1968                ProcessSelector::Pgid(pgid) => pgid == dzp.tracee_pgid,
1969                ProcessSelector::Process(ref key) => *key == dzp.tracee_thread_group_key,
1970            }) {
1971                return WaitableChildResult::ShouldWait;
1972            }
1973
1974            return WaitableChildResult::NoneFound;
1975        }
1976        for child in selected_children {
1977            let child = child.write();
1978            if child.last_signal.is_some() {
1979                let build_wait_result = |mut child: ThreadGroupWriteGuard<'_>,
1980                                         exit_status: &dyn Fn(SignalInfo) -> ExitStatus|
1981                 -> WaitResult {
1982                    let siginfo = if options.keep_waitable_state {
1983                        child.last_signal.clone().unwrap()
1984                    } else {
1985                        child.last_signal.take().unwrap()
1986                    };
1987                    let exit_status = if siginfo.signal == SIGKILL {
1988                        // This overrides the stop/continue choice.
1989                        ExitStatus::Kill(siginfo)
1990                    } else {
1991                        exit_status(siginfo)
1992                    };
1993                    let info = child.tasks.values().next().unwrap().info();
1994                    let uid = info.real_creds().uid;
1995                    WaitResult {
1996                        pid: child.base.leader,
1997                        uid,
1998                        exit_info: ProcessExitInfo {
1999                            status: exit_status,
2000                            exit_signal: child.exit_signal,
2001                        },
2002                        time_stats: child.base.time_stats() + child.children_time_stats,
2003                    }
2004                };
2005                let child_stopped = child.base.load_stopped();
2006                if child_stopped == StopState::Awake && options.wait_for_continued {
2007                    return WaitableChildResult::ReadyNow(Box::new(build_wait_result(
2008                        child,
2009                        &|siginfo| ExitStatus::Continue(siginfo, PtraceEvent::None),
2010                    )));
2011                }
2012                if child_stopped == StopState::GroupStopped && options.wait_for_stopped {
2013                    return WaitableChildResult::ReadyNow(Box::new(build_wait_result(
2014                        child,
2015                        &|siginfo| ExitStatus::Stop(siginfo, PtraceEvent::None),
2016                    )));
2017                }
2018            }
2019        }
2020
2021        WaitableChildResult::ShouldWait
2022    }
2023
2024    /// Returns any waitable child matching the given `selector` and `options`. Returns None if no
2025    /// child matching the selector is waitable. Returns ECHILD if no child matches the selector at
2026    /// all.
2027    ///
2028    /// Will remove the waitable status from the child depending on `options`.
2029    pub fn get_waitable_child(
2030        &mut self,
2031        selector: &ProcessSelector,
2032        options: &WaitingOptions,
2033        pids: &mut PidTable,
2034    ) -> WaitableChildResult {
2035        if options.wait_for_exited {
2036            if let Some(waitable_zombie) = self.get_waitable_zombie(
2037                &|state: &mut ThreadGroupMutableState| &mut state.zombie_children,
2038                selector,
2039                options,
2040                pids,
2041            ) {
2042                return WaitableChildResult::ReadyNow(Box::new(waitable_zombie));
2043            }
2044        }
2045
2046        self.get_waitable_running_children(selector, options, pids)
2047    }
2048
2049    /// Returns a task in the current thread group.
2050    pub fn get_live_task(&self) -> Result<TempRef<'_, Task>, Errno> {
2051        self.tasks
2052            .get(&self.leader())
2053            .and_then(|t| t.upgrade())
2054            .or_else(|| self.tasks().next())
2055            .ok_or_else(|| errno!(ESRCH))
2056    }
2057
2058    /// Set the stop status of the process.  If you pass |siginfo| of |None|,
2059    /// does not update the signal.  If |finalize_only| is set, will check that
2060    /// the set will be a finalize (Stopping -> Stopped or Stopped -> Stopped)
2061    /// before executing it.
2062    ///
2063    /// Returns the latest stop state after any changes.
2064    pub fn set_stopped(
2065        mut self,
2066        new_stopped: StopState,
2067        siginfo: Option<SignalInfo>,
2068        finalize_only: bool,
2069    ) -> StopState {
2070        if let Some(stopped) = self.base.check_stopped_state(new_stopped, finalize_only) {
2071            return stopped;
2072        }
2073
2074        // Thread groups don't transition to group stop if they are waking, because waking
2075        // means something told it to wake up (like a SIGCONT) but hasn't finished yet.
2076        if self.base.load_stopped() == StopState::Waking
2077            && (new_stopped == StopState::GroupStopping || new_stopped == StopState::GroupStopped)
2078        {
2079            return self.base.load_stopped();
2080        }
2081
2082        // TODO(https://g-issues.fuchsia.dev/issues/306438676): When thread
2083        // group can be stopped inside user code, tasks/thread groups will
2084        // need to be either restarted or stopped here.
2085        self.store_stopped(new_stopped);
2086        if let Some(signal) = &siginfo {
2087            // We don't want waiters to think the process was unstopped
2088            // because of a sigkill.  They will get woken when the
2089            // process dies.
2090            if signal.signal != SIGKILL {
2091                self.last_signal = siginfo;
2092            }
2093        }
2094        if new_stopped == StopState::Waking || new_stopped == StopState::ForceWaking {
2095            self.lifecycle_waiters.notify_value(ThreadGroupLifecycleWaitValue::Stopped);
2096        };
2097
2098        let parent = (!new_stopped.is_in_progress()).then(|| self.parent.clone()).flatten();
2099
2100        // Drop the lock before locking the parent.
2101        std::mem::drop(self);
2102        if let Some(parent) = parent {
2103            let parent = parent.upgrade();
2104            parent
2105                .write()
2106                .lifecycle_waiters
2107                .notify_value(ThreadGroupLifecycleWaitValue::ChildStatus);
2108        }
2109
2110        new_stopped
2111    }
2112
2113    fn store_stopped(&mut self, state: StopState) {
2114        // We don't actually use the guard but we require it to enforce that the
2115        // caller holds the thread group's mutable state lock (identified by
2116        // mutable access to the thread group's mutable state).
2117
2118        self.base.stop_state.store(state, Ordering::Relaxed)
2119    }
2120
2121    /// Sends the signal `signal_info` to this thread group.
2122    #[allow(unused_mut, reason = "needed for some but not all macro outputs")]
2123    pub fn send_signal(mut self, signal_info: SignalInfo) {
2124        let sigaction = self.base.signal_actions.get(signal_info.signal);
2125        let action = action_for_signal(&signal_info, sigaction);
2126
2127        {
2128            let mut pending_signals = self.base.pending_signals.lock();
2129            pending_signals.enqueue(signal_info.clone());
2130            self.base.has_pending_signals.store(true, Ordering::Relaxed);
2131        }
2132        let tasks: Vec<WeakRef<Task>> = self.tasks.values().map(|t| t.weak_clone()).collect();
2133
2134        // Set state to waking before interrupting any tasks.
2135        if signal_info.signal == SIGKILL {
2136            self.set_stopped(StopState::ForceWaking, Some(signal_info.clone()), false);
2137        } else if signal_info.signal == SIGCONT {
2138            self.set_stopped(StopState::Waking, Some(signal_info.clone()), false);
2139        }
2140
2141        let mut has_interrupted_task = false;
2142        for task in tasks.iter().flat_map(|t| t.upgrade()) {
2143            let mut task_state = task.write();
2144
2145            if signal_info.signal == SIGKILL {
2146                task_state.thaw();
2147                task_state.set_stopped(StopState::ForceWaking, None, None, None);
2148            } else if signal_info.signal == SIGCONT {
2149                task_state.set_stopped(StopState::Waking, None, None, None);
2150            }
2151
2152            let is_masked = task_state.is_signal_masked(signal_info.signal);
2153            let was_masked = task_state.is_signal_masked_by_saved_mask(signal_info.signal);
2154
2155            let is_queued = action != DeliveryAction::Ignore
2156                || is_masked
2157                || was_masked
2158                || task_state.is_ptraced();
2159
2160            if is_queued {
2161                task_state.notify_signal_waiters(&signal_info.signal);
2162
2163                if !is_masked && action.must_interrupt(Some(sigaction)) && !has_interrupted_task {
2164                    // Only interrupt one task, and only interrupt if the signal was actually queued
2165                    // and the action must interrupt.
2166                    drop(task_state);
2167                    task.interrupt();
2168                    has_interrupted_task = true;
2169                }
2170            }
2171        }
2172    }
2173}
2174
2175/// Container around a weak task and a strong `TaskPersistentInfo`. It is needed to keep the
2176/// information even when the task is not upgradable, because when the task is dropped, there is a
2177/// moment where the task is not yet released, yet the weak pointer is not upgradeable anymore.
2178/// During this time, it is still necessary to access the persistent info to compute the state of
2179/// the thread for the different wait syscalls.
2180pub struct TaskContainer(WeakRef<Task>, TaskPersistentInfo);
2181
2182impl From<&TempRef<'_, Task>> for TaskContainer {
2183    fn from(task: &TempRef<'_, Task>) -> Self {
2184        Self(WeakRef::from(task), task.persistent_info.clone())
2185    }
2186}
2187
2188impl From<TaskContainer> for TaskPersistentInfo {
2189    fn from(container: TaskContainer) -> TaskPersistentInfo {
2190        container.1
2191    }
2192}
2193
2194impl TaskContainer {
2195    fn upgrade(&self) -> Option<TempRef<'_, Task>> {
2196        self.0.upgrade()
2197    }
2198
2199    fn weak_clone(&self) -> WeakRef<Task> {
2200        self.0.clone()
2201    }
2202
2203    fn info(&self) -> &TaskPersistentInfo {
2204        &self.1
2205    }
2206}
2207
2208#[cfg(test)]
2209mod test {
2210    use super::*;
2211    use crate::testing::*;
2212
2213    #[::fuchsia::test]
2214    async fn test_setsid() {
2215        spawn_kernel_and_run(async |locked, current_task| {
2216            fn get_process_group(task: &Task) -> Arc<ProcessGroup> {
2217                Arc::clone(&task.thread_group().read().process_group)
2218            }
2219            assert_eq!(current_task.thread_group().setsid(locked), error!(EPERM));
2220
2221            let child_task = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2222            assert_eq!(get_process_group(&current_task), get_process_group(&child_task));
2223
2224            let old_process_group = child_task.thread_group().read().process_group.clone();
2225            assert_eq!(child_task.thread_group().setsid(locked), Ok(()));
2226            assert_eq!(
2227                child_task.thread_group().read().process_group.session.leader,
2228                child_task.get_pid()
2229            );
2230            assert!(
2231                !old_process_group.read(locked).thread_groups().contains(child_task.thread_group())
2232            );
2233        })
2234        .await;
2235    }
2236
2237    #[::fuchsia::test]
2238    async fn test_exit_status() {
2239        spawn_kernel_and_run(async |locked, current_task| {
2240            let child = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2241            child.thread_group().exit(locked, ExitStatus::Exit(42), None);
2242            std::mem::drop(child);
2243            assert_eq!(
2244                current_task.thread_group().read().zombie_children[0].exit_info.status,
2245                ExitStatus::Exit(42)
2246            );
2247        })
2248        .await;
2249    }
2250
2251    #[::fuchsia::test]
2252    async fn test_setgpid() {
2253        spawn_kernel_and_run(async |locked, current_task| {
2254            assert_eq!(current_task.thread_group().setsid(locked), error!(EPERM));
2255
2256            let child_task1 = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2257            let child_task2 = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2258            let execd_child_task = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2259            execd_child_task.thread_group().write().did_exec = true;
2260            let other_session_child_task =
2261                current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2262            assert_eq!(other_session_child_task.thread_group().setsid(locked), Ok(()));
2263
2264            assert_eq!(
2265                child_task1.thread_group().setpgid(locked, &current_task, &current_task, 0),
2266                error!(ESRCH)
2267            );
2268            assert_eq!(
2269                current_task.thread_group().setpgid(locked, &current_task, &execd_child_task, 0),
2270                error!(EACCES)
2271            );
2272            assert_eq!(
2273                current_task.thread_group().setpgid(locked, &current_task, &current_task, 0),
2274                error!(EPERM)
2275            );
2276            assert_eq!(
2277                current_task.thread_group().setpgid(
2278                    locked,
2279                    &current_task,
2280                    &other_session_child_task,
2281                    0
2282                ),
2283                error!(EPERM)
2284            );
2285            assert_eq!(
2286                current_task.thread_group().setpgid(locked, &current_task, &child_task1, -1),
2287                error!(EINVAL)
2288            );
2289            assert_eq!(
2290                current_task.thread_group().setpgid(locked, &current_task, &child_task1, 255),
2291                error!(EPERM)
2292            );
2293            assert_eq!(
2294                current_task.thread_group().setpgid(
2295                    locked,
2296                    &current_task,
2297                    &child_task1,
2298                    other_session_child_task.tid
2299                ),
2300                error!(EPERM)
2301            );
2302
2303            assert_eq!(
2304                child_task1.thread_group().setpgid(locked, &current_task, &child_task1, 0),
2305                Ok(())
2306            );
2307            assert_eq!(
2308                child_task1.thread_group().read().process_group.session.leader,
2309                current_task.tid
2310            );
2311            assert_eq!(child_task1.thread_group().read().process_group.leader, child_task1.tid);
2312
2313            let old_process_group = child_task2.thread_group().read().process_group.clone();
2314            assert_eq!(
2315                current_task.thread_group().setpgid(
2316                    locked,
2317                    &current_task,
2318                    &child_task2,
2319                    child_task1.tid
2320                ),
2321                Ok(())
2322            );
2323            assert_eq!(child_task2.thread_group().read().process_group.leader, child_task1.tid);
2324            assert!(
2325                !old_process_group
2326                    .read(locked)
2327                    .thread_groups()
2328                    .contains(child_task2.thread_group())
2329            );
2330        })
2331        .await;
2332    }
2333
2334    #[::fuchsia::test]
2335    async fn test_adopt_children() {
2336        spawn_kernel_and_run(async |locked, current_task| {
2337            let task1 = current_task.clone_task_for_test(locked, 0, None);
2338            let task2 = task1.clone_task_for_test(locked, 0, None);
2339            let task3 = task2.clone_task_for_test(locked, 0, None);
2340
2341            assert_eq!(task3.thread_group().read().get_ppid(), task2.tid);
2342
2343            task2.thread_group().exit(locked, ExitStatus::Exit(0), None);
2344            std::mem::drop(task2);
2345
2346            // Task3 parent should be current_task.
2347            assert_eq!(task3.thread_group().read().get_ppid(), current_task.tid);
2348        })
2349        .await;
2350    }
2351
2352    #[::fuchsia::test]
2353    async fn test_getppid_after_self_and_parent_exit() {
2354        spawn_kernel_and_run(async |locked, current_task| {
2355            let task1 = current_task.clone_task_for_test(locked, 0, None);
2356            let task2 = task1.clone_task_for_test(locked, 0, None);
2357
2358            // Take strong references to the ThreadGroups.
2359            let tg1 = task1.thread_group().clone();
2360            let tg2 = task2.thread_group().clone();
2361
2362            assert_eq!(tg1.read().get_ppid(), current_task.tid);
2363            assert_eq!(tg2.read().get_ppid(), task1.tid);
2364
2365            // Exit `task2` first, so that when `task1` exits, it will not be reparented to init.
2366            tg2.exit(locked, ExitStatus::Exit(0), None);
2367            std::mem::drop(task2);
2368
2369            // Exit `task1`, and drop the task and ThreadGroup.
2370            tg1.exit(locked, ExitStatus::Exit(0), None);
2371            std::mem::drop(task1);
2372            std::mem::drop(tg1);
2373
2374            // It should still be valid to call `get_ppid()` on `tg2`, though is parent ThreadGroup
2375            // no longer exists.
2376            let _ = tg2.read().get_ppid();
2377        })
2378        .await;
2379    }
2380}