starnix_core/task/
thread_group.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::device::terminal::{Terminal, TerminalController};
6use crate::mutable_state::{state_accessor, state_implementation};
7use crate::ptrace::{
8    AtomicStopState, PtraceAllowedPtracers, PtraceEvent, PtraceOptions, PtraceStatus, StopState,
9    ZombiePtracees, ptrace_detach,
10};
11use crate::security;
12use crate::signals::syscalls::WaitingOptions;
13use crate::signals::{
14    DeliveryAction, IntoSignalInfoOptions, QueuedSignals, SignalActions, SignalDetail, SignalInfo,
15    UncheckedSignalInfo, action_for_signal, send_standard_signal,
16};
17use crate::task::memory_attribution::MemoryAttributionLifecycleEvent;
18use crate::task::{
19    ControllingTerminal, CurrentTask, ExitStatus, Kernel, PidTable, ProcessGroup, Session, Task,
20    TaskFlags, TaskMutableState, TaskPersistentInfo, TypedWaitQueue,
21};
22use crate::time::{IntervalTimerHandle, TimerTable};
23use itertools::Itertools;
24use macro_rules_attribute::apply;
25use starnix_lifecycle::{AtomicU64Counter, DropNotifier};
26use starnix_logging::{log_debug, log_error, log_warn, track_stub};
27use starnix_sync::{
28    LockBefore, Locked, Mutex, OrderedMutex, ProcessGroupState, RwLock, ThreadGroupLimits, Unlocked,
29};
30use starnix_task_command::TaskCommand;
31use starnix_types::ownership::{OwnedRef, Releasable, TempRef, WeakRef};
32use starnix_types::stats::TaskTimeStats;
33use starnix_types::time::{itimerspec_from_itimerval, timeval_from_duration};
34use starnix_uapi::arc_key::WeakKey;
35use starnix_uapi::auth::{CAP_SYS_ADMIN, CAP_SYS_RESOURCE, Credentials};
36use starnix_uapi::errors::Errno;
37use starnix_uapi::personality::PersonalityFlags;
38use starnix_uapi::resource_limits::{Resource, ResourceLimits};
39use starnix_uapi::signals::{
40    SIGCHLD, SIGCONT, SIGHUP, SIGKILL, SIGTERM, SIGTTOU, Signal, UncheckedSignal,
41};
42use starnix_uapi::user_address::UserAddress;
43use starnix_uapi::{
44    ITIMER_PROF, ITIMER_REAL, ITIMER_VIRTUAL, SI_TKILL, SI_USER, SIG_IGN, errno, error, itimerval,
45    pid_t, rlimit, tid_t, uid_t,
46};
47use std::collections::BTreeMap;
48use std::fmt;
49use std::sync::atomic::Ordering;
50use std::sync::{Arc, Weak};
51use zx::{Koid, Status};
52
53/// A weak reference to a thread group that can be used in set and maps.
54#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
55pub struct ThreadGroupKey {
56    pid: pid_t,
57    thread_group: WeakKey<ThreadGroup>,
58}
59
60impl ThreadGroupKey {
61    /// The pid of the thread group keyed by this object.
62    ///
63    /// As the key is weak (and pid are not unique due to pid namespaces), this should not be used
64    /// as an unique identifier of the thread group.
65    pub fn pid(&self) -> pid_t {
66        self.pid
67    }
68}
69
70impl std::ops::Deref for ThreadGroupKey {
71    type Target = Weak<ThreadGroup>;
72    fn deref(&self) -> &Self::Target {
73        &self.thread_group.0
74    }
75}
76
77impl From<&ThreadGroup> for ThreadGroupKey {
78    fn from(tg: &ThreadGroup) -> Self {
79        Self { pid: tg.leader, thread_group: WeakKey::from(&tg.weak_self.upgrade().unwrap()) }
80    }
81}
82
83impl<T: AsRef<ThreadGroup>> From<T> for ThreadGroupKey {
84    fn from(tg: T) -> Self {
85        tg.as_ref().into()
86    }
87}
88
89/// Values used for waiting on the [ThreadGroup] lifecycle wait queue.
90#[repr(u64)]
91pub enum ThreadGroupLifecycleWaitValue {
92    /// Wait for updates to the WaitResults of tasks in the group.
93    ChildStatus,
94    /// Wait for updates to `stopped`.
95    Stopped,
96}
97
98impl Into<u64> for ThreadGroupLifecycleWaitValue {
99    fn into(self) -> u64 {
100        self as u64
101    }
102}
103
104/// Child process that have exited, but the zombie ptrace needs to be consumed
105/// before they can be waited for.
106#[derive(Clone, Debug)]
107pub struct DeferredZombiePTracer {
108    /// Original tracer
109    pub tracer_thread_group_key: ThreadGroupKey,
110    /// Tracee tid
111    pub tracee_tid: tid_t,
112    /// Tracee pgid
113    pub tracee_pgid: pid_t,
114    /// Tracee thread group
115    pub tracee_thread_group_key: ThreadGroupKey,
116}
117
118impl DeferredZombiePTracer {
119    fn new(tracer: &ThreadGroup, tracee: &Task) -> Self {
120        Self {
121            tracer_thread_group_key: tracer.into(),
122            tracee_tid: tracee.tid,
123            tracee_pgid: tracee.thread_group().read().process_group.leader,
124            tracee_thread_group_key: tracee.thread_group_key.clone(),
125        }
126    }
127}
128
129/// The mutable state of the ThreadGroup.
130pub struct ThreadGroupMutableState {
131    /// The parent thread group.
132    ///
133    /// The value needs to be writable so that it can be re-parent to the correct subreaper if the
134    /// parent ends before the child.
135    pub parent: Option<ThreadGroupParent>,
136
137    /// The signal this process generates on exit.
138    pub exit_signal: Option<Signal>,
139
140    /// The tasks in the thread group.
141    ///
142    /// The references to Task is weak to prevent cycles as Task have a Arc reference to their
143    /// thread group.
144    /// It is still expected that these weak references are always valid, as tasks must unregister
145    /// themselves before they are deleted.
146    tasks: BTreeMap<tid_t, TaskContainer>,
147
148    /// The children of this thread group.
149    ///
150    /// The references to ThreadGroup is weak to prevent cycles as ThreadGroup have a Arc reference
151    /// to their parent.
152    /// It is still expected that these weak references are always valid, as thread groups must unregister
153    /// themselves before they are deleted.
154    pub children: BTreeMap<pid_t, Weak<ThreadGroup>>,
155
156    /// Child tasks that have exited, but not yet been waited for.
157    pub zombie_children: Vec<OwnedRef<ZombieProcess>>,
158
159    /// ptracees of this process that have exited, but not yet been waited for.
160    pub zombie_ptracees: ZombiePtracees,
161
162    /// Child processes that have exited, but the zombie ptrace needs to be consumed
163    /// before they can be waited for.
164    pub deferred_zombie_ptracers: Vec<DeferredZombiePTracer>,
165
166    /// Unified [WaitQueue] for all waited ThreadGroup events.
167    pub lifecycle_waiters: TypedWaitQueue<ThreadGroupLifecycleWaitValue>,
168
169    /// Whether this thread group will inherit from children of dying processes in its descendant
170    /// tree.
171    pub is_child_subreaper: bool,
172
173    /// The IDs used to perform shell job control.
174    pub process_group: Arc<ProcessGroup>,
175
176    pub did_exec: bool,
177
178    /// A signal that indicates whether the process is going to become waitable
179    /// via waitid and waitpid for either WSTOPPED or WCONTINUED, depending on
180    /// the value of `stopped`. If not None, contains the SignalInfo to return.
181    pub last_signal: Option<SignalInfo>,
182
183    /// Whether the thread_group is terminating or not, and if it is, the exit info of the thread
184    /// group.
185    run_state: ThreadGroupRunState,
186
187    /// Time statistics accumulated from the children.
188    pub children_time_stats: TaskTimeStats,
189
190    /// Personality flags set with `sys_personality()`.
191    pub personality: PersonalityFlags,
192
193    /// Thread groups allowed to trace tasks in this this thread group.
194    pub allowed_ptracers: PtraceAllowedPtracers,
195
196    /// Channel to message when this thread group exits.
197    exit_notifier: Option<futures::channel::oneshot::Sender<()>>,
198
199    /// Notifier for name changes.
200    pub notifier: Option<std::sync::mpsc::Sender<MemoryAttributionLifecycleEvent>>,
201}
202
203/// A collection of `Task` objects that roughly correspond to a "process".
204///
205/// Userspace programmers often think about "threads" and "process", but those concepts have no
206/// clear analogs inside the kernel because tasks are typically created using `clone(2)`, which
207/// takes a complex set of flags that describes how much state is shared between the original task
208/// and the new task.
209///
210/// If a new task is created with the `CLONE_THREAD` flag, the new task will be placed in the same
211/// `ThreadGroup` as the original task. Userspace typically uses this flag in conjunction with the
212/// `CLONE_FILES`, `CLONE_VM`, and `CLONE_FS`, which corresponds to the userspace notion of a
213/// "thread". For example, that's how `pthread_create` behaves. In that sense, a `ThreadGroup`
214/// normally corresponds to the set of "threads" in a "process". However, this pattern is purely a
215/// userspace convention, and nothing stops userspace from using `CLONE_THREAD` without
216/// `CLONE_FILES`, for example.
217///
218/// In Starnix, a `ThreadGroup` corresponds to a Zircon process, which means we do not support the
219/// `CLONE_THREAD` flag without the `CLONE_VM` flag. If we run into problems with this limitation,
220/// we might need to revise this correspondence.
221///
222/// Each `Task` in a `ThreadGroup` has the same thread group ID (`tgid`). The task with the same
223/// `pid` as the `tgid` is called the thread group leader.
224///
225/// Thread groups are destroyed when the last task in the group exits.
226pub struct ThreadGroup {
227    /// Weak reference to the `OwnedRef` of this `ThreadGroup`. This allows to retrieve the
228    /// `TempRef` from a raw `ThreadGroup`.
229    pub weak_self: Weak<ThreadGroup>,
230
231    /// The kernel to which this thread group belongs.
232    pub kernel: Arc<Kernel>,
233
234    /// A handle to the underlying Zircon process object.
235    ///
236    /// Currently, we have a 1-to-1 mapping between thread groups and zx::process
237    /// objects. This approach might break down if/when we implement CLONE_VM
238    /// without CLONE_THREAD because that creates a situation where two thread
239    /// groups share an address space. To implement that situation, we might
240    /// need to break the 1-to-1 mapping between thread groups and zx::process
241    /// or teach zx::process to share address spaces.
242    pub process: zx::Process,
243
244    /// The lead task of this thread group.
245    ///
246    /// The lead task is typically the initial thread created in the thread group.
247    pub leader: pid_t,
248
249    /// The signal actions that are registered for this process.
250    pub signal_actions: Arc<SignalActions>,
251
252    /// The timers for this thread group (from timer_create(), etc.).
253    pub timers: TimerTable,
254
255    /// A mechanism to be notified when this `ThreadGroup` is destroyed.
256    pub drop_notifier: DropNotifier,
257
258    /// Whether the process is currently stopped.
259    ///
260    /// Must only be set when the `mutable_state` write lock is held.
261    stop_state: AtomicStopState,
262
263    /// The mutable state of the ThreadGroup.
264    mutable_state: RwLock<ThreadGroupMutableState>,
265
266    /// The resource limits for this thread group.  This is outside mutable_state
267    /// to avoid deadlocks where the thread_group lock is held when acquiring
268    /// the task lock, and vice versa.
269    pub limits: OrderedMutex<ResourceLimits, ThreadGroupLimits>,
270
271    /// The next unique identifier for a seccomp filter.  These are required to be
272    /// able to distinguish identical seccomp filters, which are treated differently
273    /// for the purposes of SECCOMP_FILTER_FLAG_TSYNC.  Inherited across clone because
274    /// seccomp filters are also inherited across clone.
275    pub next_seccomp_filter_id: AtomicU64Counter,
276
277    /// Tasks ptraced by this process
278    pub ptracees: Mutex<BTreeMap<tid_t, TaskContainer>>,
279
280    /// The signals that are currently pending for this thread group.
281    pub pending_signals: Mutex<QueuedSignals>,
282
283    /// The monotonic time at which the thread group started.
284    pub start_time: zx::MonotonicInstant,
285}
286
287impl fmt::Debug for ThreadGroup {
288    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
289        write!(
290            f,
291            "{}({})",
292            self.process.get_name().unwrap_or(zx::Name::new_lossy("<unknown>")),
293            self.leader
294        )
295    }
296}
297
298impl PartialEq for ThreadGroup {
299    fn eq(&self, other: &Self) -> bool {
300        self.leader == other.leader
301    }
302}
303
304#[cfg(any(test, debug_assertions))]
305impl Drop for ThreadGroup {
306    fn drop(&mut self) {
307        let state = self.mutable_state.get_mut();
308        assert!(state.tasks.is_empty());
309        assert!(state.children.is_empty());
310        assert!(state.zombie_children.is_empty());
311        assert!(state.zombie_ptracees.is_empty());
312        assert!(
313            state
314                .parent
315                .as_ref()
316                .and_then(|p| p.0.upgrade().as_ref().map(|p| p
317                    .read()
318                    .children
319                    .get(&self.leader)
320                    .is_none()))
321                .unwrap_or(true)
322        );
323    }
324}
325
326/// A wrapper around a `Weak<ThreadGroup>` that expects the underlying `Weak` to always be
327/// valid. The wrapper will check this at runtime during creation and upgrade.
328pub struct ThreadGroupParent(Weak<ThreadGroup>);
329
330impl ThreadGroupParent {
331    pub fn new(t: Weak<ThreadGroup>) -> Self {
332        debug_assert!(t.upgrade().is_some());
333        Self(t)
334    }
335
336    pub fn upgrade(&self) -> Arc<ThreadGroup> {
337        self.0.upgrade().expect("ThreadGroupParent references must always be valid")
338    }
339}
340
341impl Clone for ThreadGroupParent {
342    fn clone(&self) -> Self {
343        Self(self.0.clone())
344    }
345}
346
347/// A selector that can match a process. Works as a representation of the pid argument to syscalls
348/// like wait and kill.
349#[derive(Debug, Clone)]
350pub enum ProcessSelector {
351    /// Matches any process at all.
352    Any,
353    /// Matches only the process with the specified pid
354    Pid(pid_t),
355    /// Matches all the processes in the given process group
356    Pgid(pid_t),
357    /// Match the thread group with the given key
358    Process(ThreadGroupKey),
359}
360
361impl ProcessSelector {
362    pub fn match_tid(&self, tid: tid_t, pid_table: &PidTable) -> bool {
363        match *self {
364            ProcessSelector::Pid(p) => {
365                if p == tid {
366                    true
367                } else {
368                    if let Some(task_ref) = pid_table.get_task(tid).upgrade() {
369                        task_ref.get_pid() == p
370                    } else {
371                        false
372                    }
373                }
374            }
375            ProcessSelector::Any => true,
376            ProcessSelector::Pgid(pgid) => {
377                if let Some(task_ref) = pid_table.get_task(tid).upgrade() {
378                    pid_table.get_process_group(pgid).as_ref()
379                        == Some(&task_ref.thread_group().read().process_group)
380                } else {
381                    false
382                }
383            }
384            ProcessSelector::Process(ref key) => {
385                if let Some(tg) = key.upgrade() {
386                    tg.read().tasks.contains_key(&tid)
387                } else {
388                    false
389                }
390            }
391        }
392    }
393}
394
395#[derive(Clone, Debug, PartialEq, Eq)]
396pub struct ProcessExitInfo {
397    pub status: ExitStatus,
398    pub exit_signal: Option<Signal>,
399}
400
401#[derive(Clone, Debug, Default, PartialEq, Eq)]
402enum ThreadGroupRunState {
403    #[default]
404    Running,
405    Terminating(ExitStatus),
406}
407
408#[derive(Clone, Debug, PartialEq, Eq)]
409pub struct WaitResult {
410    pub pid: pid_t,
411    pub uid: uid_t,
412
413    pub exit_info: ProcessExitInfo,
414
415    /// Cumulative time stats for the process and its children.
416    pub time_stats: TaskTimeStats,
417}
418
419impl WaitResult {
420    // According to wait(2) man page, SignalInfo.signal needs to always be set to SIGCHLD
421    pub fn as_signal_info(&self) -> SignalInfo {
422        SignalInfo::new(
423            SIGCHLD,
424            self.exit_info.status.signal_info_code(),
425            SignalDetail::SIGCHLD {
426                pid: self.pid,
427                uid: self.uid,
428                status: self.exit_info.status.signal_info_status(),
429            },
430        )
431    }
432}
433
434#[derive(Debug)]
435pub struct ZombieProcess {
436    pub thread_group_key: ThreadGroupKey,
437    pub pgid: pid_t,
438    pub uid: uid_t,
439
440    pub exit_info: ProcessExitInfo,
441
442    /// Cumulative time stats for the process and its children.
443    pub time_stats: TaskTimeStats,
444
445    /// Whether dropping this ZombieProcess should imply removing the pid from
446    /// the PidTable
447    pub is_canonical: bool,
448}
449
450impl PartialEq for ZombieProcess {
451    fn eq(&self, other: &Self) -> bool {
452        // We assume only one set of ZombieProcess data per process, so this should cover it.
453        self.thread_group_key == other.thread_group_key
454            && self.pgid == other.pgid
455            && self.uid == other.uid
456            && self.is_canonical == other.is_canonical
457    }
458}
459
460impl Eq for ZombieProcess {}
461
462impl PartialOrd for ZombieProcess {
463    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
464        Some(self.cmp(other))
465    }
466}
467
468impl Ord for ZombieProcess {
469    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
470        self.thread_group_key.cmp(&other.thread_group_key)
471    }
472}
473
474impl ZombieProcess {
475    pub fn new(
476        thread_group: ThreadGroupStateRef<'_>,
477        credentials: &Credentials,
478        exit_info: ProcessExitInfo,
479    ) -> OwnedRef<Self> {
480        let time_stats = thread_group.base.time_stats() + thread_group.children_time_stats;
481        OwnedRef::new(ZombieProcess {
482            thread_group_key: thread_group.base.into(),
483            pgid: thread_group.process_group.leader,
484            uid: credentials.uid,
485            exit_info,
486            time_stats,
487            is_canonical: true,
488        })
489    }
490
491    pub fn pid(&self) -> pid_t {
492        self.thread_group_key.pid()
493    }
494
495    pub fn to_wait_result(&self) -> WaitResult {
496        WaitResult {
497            pid: self.pid(),
498            uid: self.uid,
499            exit_info: self.exit_info.clone(),
500            time_stats: self.time_stats,
501        }
502    }
503
504    pub fn as_artificial(&self) -> Self {
505        ZombieProcess {
506            thread_group_key: self.thread_group_key.clone(),
507            pgid: self.pgid,
508            uid: self.uid,
509            exit_info: self.exit_info.clone(),
510            time_stats: self.time_stats,
511            is_canonical: false,
512        }
513    }
514
515    pub fn matches_selector(&self, selector: &ProcessSelector) -> bool {
516        match *selector {
517            ProcessSelector::Any => true,
518            ProcessSelector::Pid(pid) => self.pid() == pid,
519            ProcessSelector::Pgid(pgid) => self.pgid == pgid,
520            ProcessSelector::Process(ref key) => self.thread_group_key == *key,
521        }
522    }
523
524    pub fn matches_selector_and_waiting_option(
525        &self,
526        selector: &ProcessSelector,
527        options: &WaitingOptions,
528    ) -> bool {
529        if !self.matches_selector(selector) {
530            return false;
531        }
532
533        if options.wait_for_all {
534            true
535        } else {
536            // A "clone" zombie is one which has delivered no signal, or a
537            // signal other than SIGCHLD to its parent upon termination.
538            options.wait_for_clone == (self.exit_info.exit_signal != Some(SIGCHLD))
539        }
540    }
541}
542
543impl Releasable for ZombieProcess {
544    type Context<'a> = &'a mut PidTable;
545
546    fn release<'a>(self, pids: &'a mut PidTable) {
547        if self.is_canonical {
548            pids.remove_zombie(self.pid());
549        }
550    }
551}
552
553impl ThreadGroup {
554    pub fn new<L>(
555        locked: &mut Locked<L>,
556        kernel: Arc<Kernel>,
557        process: zx::Process,
558        parent: Option<ThreadGroupWriteGuard<'_>>,
559        leader: pid_t,
560        exit_signal: Option<Signal>,
561        process_group: Arc<ProcessGroup>,
562        signal_actions: Arc<SignalActions>,
563    ) -> Arc<ThreadGroup>
564    where
565        L: LockBefore<ProcessGroupState>,
566    {
567        Arc::new_cyclic(|weak_self| {
568            let mut thread_group = ThreadGroup {
569                weak_self: weak_self.clone(),
570                kernel,
571                process,
572                leader,
573                signal_actions,
574                timers: Default::default(),
575                drop_notifier: Default::default(),
576                // A child process created via fork(2) inherits its parent's
577                // resource limits.  Resource limits are preserved across execve(2).
578                limits: OrderedMutex::new(
579                    parent
580                        .as_ref()
581                        .map(|p| p.base.limits.lock(locked.cast_locked()).clone())
582                        .unwrap_or(Default::default()),
583                ),
584                next_seccomp_filter_id: Default::default(),
585                ptracees: Default::default(),
586                stop_state: AtomicStopState::new(StopState::Awake),
587                pending_signals: Default::default(),
588                start_time: zx::MonotonicInstant::get(),
589                mutable_state: RwLock::new(ThreadGroupMutableState {
590                    parent: parent
591                        .as_ref()
592                        .map(|p| ThreadGroupParent::new(p.base.weak_self.clone())),
593                    exit_signal,
594                    tasks: BTreeMap::new(),
595                    children: BTreeMap::new(),
596                    zombie_children: vec![],
597                    zombie_ptracees: ZombiePtracees::new(),
598                    deferred_zombie_ptracers: vec![],
599                    lifecycle_waiters: TypedWaitQueue::<ThreadGroupLifecycleWaitValue>::default(),
600                    is_child_subreaper: false,
601                    process_group: Arc::clone(&process_group),
602                    did_exec: false,
603                    last_signal: None,
604                    run_state: Default::default(),
605                    children_time_stats: Default::default(),
606                    personality: parent
607                        .as_ref()
608                        .map(|p| p.personality)
609                        .unwrap_or(Default::default()),
610                    allowed_ptracers: PtraceAllowedPtracers::None,
611                    exit_notifier: None,
612                    notifier: None,
613                }),
614            };
615
616            if let Some(mut parent) = parent {
617                thread_group.next_seccomp_filter_id.reset(parent.base.next_seccomp_filter_id.get());
618                parent.children.insert(leader, weak_self.clone());
619                process_group.insert(locked, &thread_group);
620            };
621            thread_group
622        })
623    }
624
625    state_accessor!(ThreadGroup, mutable_state);
626
627    pub fn load_stopped(&self) -> StopState {
628        self.stop_state.load(Ordering::Relaxed)
629    }
630
631    // Causes the thread group to exit.  If this is being called from a task
632    // that is part of the current thread group, the caller should pass
633    // `current_task`.  If ownership issues prevent passing `current_task`, then
634    // callers should use CurrentTask::thread_group_exit instead.
635    pub fn exit(
636        &self,
637        locked: &mut Locked<Unlocked>,
638        exit_status: ExitStatus,
639        mut current_task: Option<&mut CurrentTask>,
640    ) {
641        if let Some(ref mut current_task) = current_task {
642            current_task.ptrace_event(
643                locked,
644                PtraceOptions::TRACEEXIT,
645                exit_status.signal_info_status() as u64,
646            );
647        }
648        let mut pids = self.kernel.pids.write();
649        let mut state = self.write();
650        if state.is_terminating() {
651            // The thread group is already terminating and all threads in the thread group have
652            // already been interrupted.
653            return;
654        }
655
656        state.run_state = ThreadGroupRunState::Terminating(exit_status.clone());
657
658        // Drop ptrace zombies
659        state.zombie_ptracees.release(&mut pids);
660
661        // Interrupt each task. Unlock the group because send_signal will lock the group in order
662        // to call set_stopped.
663        // SAFETY: tasks is kept on the stack. The static is required to ensure the lock on
664        // ThreadGroup can be dropped.
665        let tasks = state.tasks().map(TempRef::into_static).collect::<Vec<_>>();
666        drop(state);
667
668        // Detach from any ptraced tasks, killing the ones that set PTRACE_O_EXITKILL.
669        let tracees = self.ptracees.lock().keys().cloned().collect::<Vec<_>>();
670        for tracee in tracees {
671            if let Some(task_ref) = pids.get_task(tracee).clone().upgrade() {
672                let mut should_send_sigkill = false;
673                if let Some(ptrace) = &task_ref.read().ptrace {
674                    should_send_sigkill = ptrace.has_option(PtraceOptions::EXITKILL);
675                }
676                if should_send_sigkill {
677                    send_standard_signal(locked, task_ref.as_ref(), SignalInfo::default(SIGKILL));
678                    continue;
679                }
680
681                let _ =
682                    ptrace_detach(locked, &mut pids, self, task_ref.as_ref(), &UserAddress::NULL);
683            }
684        }
685
686        for task in tasks {
687            task.write().set_exit_status(exit_status.clone());
688            send_standard_signal(locked, &task, SignalInfo::default(SIGKILL));
689        }
690    }
691
692    pub fn add(&self, task: &TempRef<'_, Task>) -> Result<(), Errno> {
693        let mut state = self.write();
694        if state.is_terminating() {
695            if state.tasks_count() == 0 {
696                log_warn!(
697                    "Task {} with leader {} terminating while adding its first task, \
698                not sending creation notification",
699                    task.tid,
700                    self.leader
701                );
702            }
703            return error!(EINVAL);
704        }
705        state.tasks.insert(task.tid, task.into());
706
707        Ok(())
708    }
709
710    /// Remove the task from the children of this ThreadGroup.
711    ///
712    /// It is important that the task is taken as an `OwnedRef`. It ensures the tasks of the
713    /// ThreadGroup are always valid as they are still valid when removed.
714    pub fn remove<L>(&self, locked: &mut Locked<L>, pids: &mut PidTable, task: &OwnedRef<Task>)
715    where
716        L: LockBefore<ProcessGroupState>,
717    {
718        task.set_ptrace_zombie(pids);
719        pids.remove_task(task.tid);
720
721        let mut state = self.write();
722
723        let persistent_info: TaskPersistentInfo =
724            if let Some(container) = state.tasks.remove(&task.tid) {
725                container.into()
726            } else {
727                // The task has never been added. The only expected case is that this thread was
728                // already terminating.
729                debug_assert!(state.is_terminating());
730                return;
731            };
732
733        if state.tasks.is_empty() {
734            let exit_status =
735                if let ThreadGroupRunState::Terminating(exit_status) = &state.run_state {
736                    exit_status.clone()
737                } else {
738                    let exit_status = task.exit_status().unwrap_or_else(|| {
739                        log_error!("Exiting without an exit code.");
740                        ExitStatus::Exit(u8::MAX)
741                    });
742                    state.run_state = ThreadGroupRunState::Terminating(exit_status.clone());
743                    exit_status
744                };
745
746            // Replace PID table entry with a zombie.
747            let exit_info =
748                ProcessExitInfo { status: exit_status, exit_signal: state.exit_signal.clone() };
749            let zombie =
750                ZombieProcess::new(state.as_ref(), &persistent_info.real_creds(), exit_info);
751            pids.kill_process(self.leader, OwnedRef::downgrade(&zombie));
752
753            state.leave_process_group(locked, pids);
754
755            // I have no idea if dropping the lock here is correct, and I don't want to think about
756            // it. If problems do turn up with another thread observing an intermediate state of
757            // this exit operation, the solution is to unify locks. It should be sensible and
758            // possible for there to be a single lock that protects all (or nearly all) of the
759            // data accessed by both exit and wait. In gvisor and linux this is the lock on the
760            // equivalent of the PidTable. This is made more difficult by rust locks being
761            // containers that only lock the data they contain, but see
762            // https://docs.google.com/document/d/1YHrhBqNhU1WcrsYgGAu3JwwlVmFXPlwWHTJLAbwRebY/edit
763            // for an idea.
764            std::mem::drop(state);
765
766            // Remove the process from the cgroup2 pid table after TG lock is dropped.
767            // This function will hold the CgroupState lock which should be before the TG lock. See
768            // more in lock_cgroup2_pid_table comments.
769            self.kernel.cgroups.lock_cgroup2_pid_table().remove_process(self.into());
770
771            // We will need the immediate parent and the reaper. Once we have them, we can make
772            // sure to take the locks in the right order: parent before child.
773            let parent = self.read().parent.clone();
774            let reaper = self.find_reaper();
775
776            {
777                // Reparent the children.
778                if let Some(reaper) = reaper {
779                    let reaper = reaper.upgrade();
780                    {
781                        let mut reaper_state = reaper.write();
782                        let mut state = self.write();
783                        for (_pid, weak_child) in std::mem::take(&mut state.children) {
784                            if let Some(child) = weak_child.upgrade() {
785                                let mut child_state = child.write();
786
787                                child_state.exit_signal = Some(SIGCHLD);
788                                child_state.parent =
789                                    Some(ThreadGroupParent::new(Arc::downgrade(&reaper)));
790                                reaper_state.children.insert(child.leader, weak_child.clone());
791                            }
792                        }
793                        reaper_state.zombie_children.append(&mut state.zombie_children);
794                    }
795                    ZombiePtracees::reparent(self, &reaper);
796                } else {
797                    // If we don't have a reaper then just drop the zombies.
798                    let mut state = self.write();
799                    for zombie in state.zombie_children.drain(..) {
800                        zombie.release(pids);
801                    }
802                    state.zombie_ptracees.release(pids);
803                }
804            }
805
806            #[cfg(any(test, debug_assertions))]
807            {
808                let state = self.read();
809                assert!(state.zombie_children.is_empty());
810                assert!(state.zombie_ptracees.is_empty());
811            }
812
813            if let Some(ref parent) = parent {
814                let parent = parent.upgrade();
815                let mut tracer_pid = None;
816                if let Some(ptrace) = &task.read().ptrace {
817                    tracer_pid = Some(ptrace.get_pid());
818                }
819
820                let maybe_zombie = 'compute_zombie: {
821                    if let Some(tracer_pid) = tracer_pid {
822                        if let Some(ref tracer) = pids.get_task(tracer_pid).upgrade() {
823                            break 'compute_zombie tracer
824                                .thread_group()
825                                .maybe_notify_tracer(task, pids, &parent, zombie);
826                        }
827                    }
828                    Some(zombie)
829                };
830                if let Some(zombie) = maybe_zombie {
831                    parent.do_zombie_notifications(zombie);
832                }
833            } else {
834                zombie.release(pids);
835            }
836
837            // TODO: Set the error_code on the Zircon process object. Currently missing a way
838            // to do this in Zircon. Might be easier in the new execution model.
839
840            // Once the last zircon thread stops, the zircon process will also stop executing.
841
842            if let Some(parent) = parent {
843                let parent = parent.upgrade();
844                parent.check_orphans(locked, pids);
845            }
846        }
847    }
848
849    pub fn do_zombie_notifications(&self, zombie: OwnedRef<ZombieProcess>) {
850        let mut state = self.write();
851
852        state.children.remove(&zombie.pid());
853        state
854            .deferred_zombie_ptracers
855            .retain(|dzp| dzp.tracee_thread_group_key != zombie.thread_group_key);
856
857        let exit_signal = zombie.exit_info.exit_signal;
858        let mut signal_info = zombie.to_wait_result().as_signal_info();
859
860        state.zombie_children.push(zombie);
861        state.lifecycle_waiters.notify_value(ThreadGroupLifecycleWaitValue::ChildStatus);
862
863        // Send signals
864        if let Some(exit_signal) = exit_signal {
865            signal_info.signal = exit_signal;
866            state.send_signal(signal_info);
867        }
868    }
869
870    /// Notifies the tracer if appropriate.  Returns Some(zombie) if caller
871    /// needs to notify the parent, None otherwise.  The caller should probably
872    /// invoke parent.do_zombie_notifications(zombie) on the result.
873    fn maybe_notify_tracer(
874        &self,
875        tracee: &Task,
876        mut pids: &mut PidTable,
877        parent: &ThreadGroup,
878        zombie: OwnedRef<ZombieProcess>,
879    ) -> Option<OwnedRef<ZombieProcess>> {
880        if self.read().zombie_ptracees.has_tracee(tracee.tid) {
881            if self == parent {
882                // The tracer is the parent and has not consumed the
883                // notification.  Don't bother with the ptracee stuff, and just
884                // notify the parent.
885                self.write().zombie_ptracees.remove(pids, tracee.tid);
886                return Some(zombie);
887            } else {
888                // The tracer is not the parent and the tracer has not consumed
889                // the notification.
890                {
891                    // Tell the parent to expect a notification later.
892                    let mut parent_state = parent.write();
893                    parent_state
894                        .deferred_zombie_ptracers
895                        .push(DeferredZombiePTracer::new(self, tracee));
896                    parent_state.children.remove(&tracee.get_pid());
897                }
898                // Tell the tracer that there is a notification pending.
899                let mut state = self.write();
900                state.zombie_ptracees.set_parent_of(tracee.tid, Some(zombie), parent);
901                tracee.write().notify_ptracers();
902                return None;
903            }
904        } else if self == parent {
905            // The tracer is the parent and has already consumed the parent
906            // notification.  No further action required.
907            parent.write().children.remove(&tracee.tid);
908            zombie.release(&mut pids);
909            return None;
910        }
911        // The tracer is not the parent and has already consumed the parent
912        // notification.  Notify the parent.
913        Some(zombie)
914    }
915
916    /// Find the task which will adopt our children after we die.
917    fn find_reaper(&self) -> Option<ThreadGroupParent> {
918        let mut weak_parent = self.read().parent.clone()?;
919        loop {
920            weak_parent = {
921                let parent = weak_parent.upgrade();
922                let parent_state = parent.read();
923                if parent_state.is_child_subreaper {
924                    break;
925                }
926                match parent_state.parent {
927                    Some(ref next_parent) => next_parent.clone(),
928                    None => break,
929                }
930            };
931        }
932        Some(weak_parent)
933    }
934
935    pub fn setsid<L>(&self, locked: &mut Locked<L>) -> Result<(), Errno>
936    where
937        L: LockBefore<ProcessGroupState>,
938    {
939        let pids = self.kernel.pids.read();
940        if pids.get_process_group(self.leader).is_some() {
941            return error!(EPERM);
942        }
943        let process_group = ProcessGroup::new(self.leader, None);
944        pids.add_process_group(process_group.clone());
945        self.write().set_process_group(locked, process_group, &pids);
946        self.check_orphans(locked, &pids);
947
948        Ok(())
949    }
950
951    pub fn setpgid<L>(
952        &self,
953        locked: &mut Locked<L>,
954        current_task: &CurrentTask,
955        target: &Task,
956        pgid: pid_t,
957    ) -> Result<(), Errno>
958    where
959        L: LockBefore<ProcessGroupState>,
960    {
961        let pids = self.kernel.pids.read();
962
963        {
964            let current_process_group = Arc::clone(&self.read().process_group);
965
966            // The target process must be either the current process of a child of the current process
967            let mut target_thread_group = target.thread_group().write();
968            let is_target_current_process_child =
969                target_thread_group.parent.as_ref().map(|tg| tg.upgrade().leader)
970                    == Some(self.leader);
971            if target_thread_group.leader() != self.leader && !is_target_current_process_child {
972                return error!(ESRCH);
973            }
974
975            // If the target process is a child of the current task, it must not have executed one of the exec
976            // function.
977            if is_target_current_process_child && target_thread_group.did_exec {
978                return error!(EACCES);
979            }
980
981            let new_process_group;
982            {
983                let target_process_group = &target_thread_group.process_group;
984
985                // The target process must not be a session leader and must be in the same session as the current process.
986                if target_thread_group.leader() == target_process_group.session.leader
987                    || current_process_group.session != target_process_group.session
988                {
989                    return error!(EPERM);
990                }
991
992                let target_pgid = if pgid == 0 { target_thread_group.leader() } else { pgid };
993                if target_pgid < 0 {
994                    return error!(EINVAL);
995                }
996
997                if target_pgid == target_process_group.leader {
998                    return Ok(());
999                }
1000
1001                // If pgid is not equal to the target process id, the associated process group must exist
1002                // and be in the same session as the target process.
1003                if target_pgid != target_thread_group.leader() {
1004                    new_process_group =
1005                        pids.get_process_group(target_pgid).ok_or_else(|| errno!(EPERM))?;
1006                    if new_process_group.session != target_process_group.session {
1007                        return error!(EPERM);
1008                    }
1009                    security::check_setpgid_access(current_task, target)?;
1010                } else {
1011                    security::check_setpgid_access(current_task, target)?;
1012                    // Create a new process group
1013                    new_process_group =
1014                        ProcessGroup::new(target_pgid, Some(target_process_group.session.clone()));
1015                    pids.add_process_group(new_process_group.clone());
1016                }
1017            }
1018
1019            target_thread_group.set_process_group(locked, new_process_group, &pids);
1020        }
1021
1022        target.thread_group().check_orphans(locked, &pids);
1023
1024        Ok(())
1025    }
1026
1027    fn itimer_real(&self) -> IntervalTimerHandle {
1028        self.timers.itimer_real()
1029    }
1030
1031    pub fn set_itimer(
1032        &self,
1033        current_task: &CurrentTask,
1034        which: u32,
1035        value: itimerval,
1036    ) -> Result<itimerval, Errno> {
1037        if which == ITIMER_PROF || which == ITIMER_VIRTUAL {
1038            // We don't support setting these timers.
1039            // The gvisor test suite clears ITIMER_PROF as part of its test setup logic, so we support
1040            // clearing these values.
1041            if value.it_value.tv_sec == 0 && value.it_value.tv_usec == 0 {
1042                return Ok(itimerval::default());
1043            }
1044            track_stub!(TODO("https://fxbug.dev/322874521"), "Unsupported itimer type", which);
1045            return error!(ENOTSUP);
1046        }
1047
1048        if which != ITIMER_REAL {
1049            return error!(EINVAL);
1050        }
1051        let itimer_real = self.itimer_real();
1052        let prev_remaining = itimer_real.time_remaining();
1053        if value.it_value.tv_sec != 0 || value.it_value.tv_usec != 0 {
1054            itimer_real.arm(current_task, itimerspec_from_itimerval(value), false)?;
1055        } else {
1056            itimer_real.disarm(current_task)?;
1057        }
1058        Ok(itimerval {
1059            it_value: timeval_from_duration(prev_remaining.remainder),
1060            it_interval: timeval_from_duration(prev_remaining.interval),
1061        })
1062    }
1063
1064    pub fn get_itimer(&self, which: u32) -> Result<itimerval, Errno> {
1065        if which == ITIMER_PROF || which == ITIMER_VIRTUAL {
1066            // We don't support setting these timers, so we can accurately report that these are not set.
1067            return Ok(itimerval::default());
1068        }
1069        if which != ITIMER_REAL {
1070            return error!(EINVAL);
1071        }
1072        let remaining = self.itimer_real().time_remaining();
1073        Ok(itimerval {
1074            it_value: timeval_from_duration(remaining.remainder),
1075            it_interval: timeval_from_duration(remaining.interval),
1076        })
1077    }
1078
1079    /// Check whether the stop state is compatible with `new_stopped`. If it is return it,
1080    /// otherwise, return None.
1081    fn check_stopped_state(
1082        &self,
1083        new_stopped: StopState,
1084        finalize_only: bool,
1085    ) -> Option<StopState> {
1086        let stopped = self.load_stopped();
1087        if finalize_only && !stopped.is_stopping_or_stopped() {
1088            return Some(stopped);
1089        }
1090
1091        if stopped.is_illegal_transition(new_stopped) {
1092            return Some(stopped);
1093        }
1094
1095        return None;
1096    }
1097
1098    /// Set the stop status of the process.  If you pass |siginfo| of |None|,
1099    /// does not update the signal.  If |finalize_only| is set, will check that
1100    /// the set will be a finalize (Stopping -> Stopped or Stopped -> Stopped)
1101    /// before executing it.
1102    ///
1103    /// Returns the latest stop state after any changes.
1104    pub fn set_stopped(
1105        &self,
1106        new_stopped: StopState,
1107        siginfo: Option<SignalInfo>,
1108        finalize_only: bool,
1109    ) -> StopState {
1110        // Perform an early return check to see if we can avoid taking the lock.
1111        if let Some(stopped) = self.check_stopped_state(new_stopped, finalize_only) {
1112            return stopped;
1113        }
1114
1115        self.write().set_stopped(new_stopped, siginfo, finalize_only)
1116    }
1117
1118    /// Ensures |session| is the controlling session inside of |terminal_controller|, and returns a
1119    /// reference to the |TerminalController|.
1120    fn check_terminal_controller(
1121        session: &Arc<Session>,
1122        terminal_controller: &Option<TerminalController>,
1123    ) -> Result<(), Errno> {
1124        if let Some(terminal_controller) = terminal_controller {
1125            if let Some(terminal_session) = terminal_controller.session.upgrade() {
1126                if Arc::ptr_eq(session, &terminal_session) {
1127                    return Ok(());
1128                }
1129            }
1130        }
1131        error!(ENOTTY)
1132    }
1133
1134    pub fn get_foreground_process_group(&self, terminal: &Terminal) -> Result<pid_t, Errno> {
1135        let state = self.read();
1136        let process_group = &state.process_group;
1137        let terminal_state = terminal.read();
1138
1139        // "When fd does not refer to the controlling terminal of the calling
1140        // process, -1 is returned" - tcgetpgrp(3)
1141        Self::check_terminal_controller(&process_group.session, &terminal_state.controller)?;
1142        let pid = process_group.session.read().get_foreground_process_group_leader();
1143        Ok(pid)
1144    }
1145
1146    pub fn set_foreground_process_group<L>(
1147        &self,
1148        locked: &mut Locked<L>,
1149        current_task: &CurrentTask,
1150        terminal: &Terminal,
1151        pgid: pid_t,
1152    ) -> Result<(), Errno>
1153    where
1154        L: LockBefore<ProcessGroupState>,
1155    {
1156        let process_group;
1157        let send_ttou;
1158        {
1159            // Keep locks to ensure atomicity.
1160            let pids = self.kernel.pids.read();
1161            let state = self.read();
1162            process_group = Arc::clone(&state.process_group);
1163            let terminal_state = terminal.read();
1164            Self::check_terminal_controller(&process_group.session, &terminal_state.controller)?;
1165
1166            // pgid must be positive.
1167            if pgid < 0 {
1168                return error!(EINVAL);
1169            }
1170
1171            let new_process_group = pids.get_process_group(pgid).ok_or_else(|| errno!(ESRCH))?;
1172            if new_process_group.session != process_group.session {
1173                return error!(EPERM);
1174            }
1175
1176            let mut session_state = process_group.session.write();
1177            // If the calling process is a member of a background group and not ignoring SIGTTOU, a
1178            // SIGTTOU signal is sent to all members of this background process group.
1179            send_ttou = process_group.leader != session_state.get_foreground_process_group_leader()
1180                && !current_task.read().signal_mask().has_signal(SIGTTOU)
1181                && self.signal_actions.get(SIGTTOU).sa_handler != SIG_IGN;
1182
1183            if !send_ttou {
1184                session_state.set_foreground_process_group(&new_process_group);
1185            }
1186        }
1187
1188        // Locks must not be held when sending signals.
1189        if send_ttou {
1190            process_group.send_signals(locked, &[SIGTTOU]);
1191            return error!(EINTR);
1192        }
1193
1194        Ok(())
1195    }
1196
1197    pub fn set_controlling_terminal(
1198        &self,
1199        current_task: &CurrentTask,
1200        terminal: &Terminal,
1201        is_main: bool,
1202        steal: bool,
1203        is_readable: bool,
1204    ) -> Result<(), Errno> {
1205        // Keep locks to ensure atomicity.
1206        let state = self.read();
1207        let process_group = &state.process_group;
1208        let mut terminal_state = terminal.write();
1209        let mut session_writer = process_group.session.write();
1210
1211        // "The calling process must be a session leader and not have a
1212        // controlling terminal already." - tty_ioctl(4)
1213        if process_group.session.leader != self.leader
1214            || session_writer.controlling_terminal.is_some()
1215        {
1216            return error!(EINVAL);
1217        }
1218
1219        let mut has_admin_capability_determined = false;
1220
1221        // "If this terminal is already the controlling terminal of a different
1222        // session group, then the ioctl fails with EPERM, unless the caller
1223        // has the CAP_SYS_ADMIN capability and arg equals 1, in which case the
1224        // terminal is stolen, and all processes that had it as controlling
1225        // terminal lose it." - tty_ioctl(4)
1226        if let Some(other_session) =
1227            terminal_state.controller.as_ref().and_then(|cs| cs.session.upgrade())
1228        {
1229            if other_session != process_group.session {
1230                if !steal {
1231                    return error!(EPERM);
1232                }
1233                security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1234                has_admin_capability_determined = true;
1235
1236                // Steal the TTY away. Unlike TIOCNOTTY, don't send signals.
1237                other_session.write().controlling_terminal = None;
1238            }
1239        }
1240
1241        if !is_readable && !has_admin_capability_determined {
1242            security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1243        }
1244
1245        session_writer.controlling_terminal = Some(ControllingTerminal::new(terminal, is_main));
1246        terminal_state.controller = TerminalController::new(&process_group.session);
1247        Ok(())
1248    }
1249
1250    pub fn release_controlling_terminal<L>(
1251        &self,
1252        locked: &mut Locked<L>,
1253        _current_task: &CurrentTask,
1254        terminal: &Terminal,
1255        is_main: bool,
1256    ) -> Result<(), Errno>
1257    where
1258        L: LockBefore<ProcessGroupState>,
1259    {
1260        let process_group;
1261        {
1262            // Keep locks to ensure atomicity.
1263            let state = self.read();
1264            process_group = Arc::clone(&state.process_group);
1265            let mut terminal_state = terminal.write();
1266            let mut session_writer = process_group.session.write();
1267
1268            // tty must be the controlling terminal.
1269            Self::check_terminal_controller(&process_group.session, &terminal_state.controller)?;
1270            if !session_writer
1271                .controlling_terminal
1272                .as_ref()
1273                .map_or(false, |ct| ct.matches(terminal, is_main))
1274            {
1275                return error!(ENOTTY);
1276            }
1277
1278            // "If the process was session leader, then send SIGHUP and SIGCONT to the foreground
1279            // process group and all processes in the current session lose their controlling terminal."
1280            // - tty_ioctl(4)
1281
1282            // Remove tty as the controlling tty for each process in the session, then
1283            // send them SIGHUP and SIGCONT.
1284
1285            session_writer.controlling_terminal = None;
1286            terminal_state.controller = None;
1287        }
1288
1289        if process_group.session.leader == self.leader {
1290            process_group.send_signals(locked, &[SIGHUP, SIGCONT]);
1291        }
1292
1293        Ok(())
1294    }
1295
1296    fn check_orphans<L>(&self, locked: &mut Locked<L>, pids: &PidTable)
1297    where
1298        L: LockBefore<ProcessGroupState>,
1299    {
1300        let mut thread_groups = self.read().children().collect::<Vec<_>>();
1301        let this = self.weak_self.upgrade().unwrap();
1302        thread_groups.push(this);
1303        let process_groups =
1304            thread_groups.iter().map(|tg| Arc::clone(&tg.read().process_group)).unique();
1305        for pg in process_groups {
1306            pg.check_orphaned(locked, pids);
1307        }
1308    }
1309
1310    pub fn get_rlimit<L>(&self, locked: &mut Locked<L>, resource: Resource) -> u64
1311    where
1312        L: LockBefore<ThreadGroupLimits>,
1313    {
1314        self.limits.lock(locked).get(resource).rlim_cur
1315    }
1316
1317    /// Adjusts the rlimits of the ThreadGroup to which `target_task` belongs to.
1318    pub fn adjust_rlimits<L>(
1319        locked: &mut Locked<L>,
1320        current_task: &CurrentTask,
1321        target_task: &Task,
1322        resource: Resource,
1323        maybe_new_limit: Option<rlimit>,
1324    ) -> Result<rlimit, Errno>
1325    where
1326        L: LockBefore<ThreadGroupLimits>,
1327    {
1328        let thread_group = target_task.thread_group();
1329        let can_increase_rlimit = security::is_task_capable_noaudit(current_task, CAP_SYS_RESOURCE);
1330        let mut limit_state = thread_group.limits.lock(locked);
1331        let old_limit = limit_state.get(resource);
1332        if let Some(new_limit) = maybe_new_limit {
1333            if new_limit.rlim_max > old_limit.rlim_max && !can_increase_rlimit {
1334                return error!(EPERM);
1335            }
1336            security::task_setrlimit(current_task, &target_task, old_limit, new_limit)?;
1337            limit_state.set(resource, new_limit)
1338        }
1339        Ok(old_limit)
1340    }
1341
1342    pub fn time_stats(&self) -> TaskTimeStats {
1343        let process: &zx::Process = if self.process.as_handle_ref().is_invalid() {
1344            // `process` must be valid for all tasks, except `kthreads`. In that case get the
1345            // stats from starnix process.
1346            assert_eq!(
1347                self as *const ThreadGroup,
1348                Arc::as_ptr(&self.kernel.kthreads.system_thread_group())
1349            );
1350            &self.kernel.kthreads.starnix_process
1351        } else {
1352            &self.process
1353        };
1354
1355        let info =
1356            zx::Task::get_runtime_info(process).expect("Failed to get starnix process stats");
1357        TaskTimeStats {
1358            user_time: zx::MonotonicDuration::from_nanos(info.cpu_time),
1359            // TODO(https://fxbug.dev/42078242): How can we calculate system time?
1360            system_time: zx::MonotonicDuration::default(),
1361        }
1362    }
1363
1364    /// For each task traced by this thread_group that matches the given
1365    /// selector, acquire its TaskMutableState and ptracees lock and execute the
1366    /// given function.
1367    pub fn get_ptracees_and(
1368        &self,
1369        selector: &ProcessSelector,
1370        pids: &PidTable,
1371        f: &mut dyn FnMut(&Task, &TaskMutableState),
1372    ) {
1373        for tracee in self
1374            .ptracees
1375            .lock()
1376            .keys()
1377            .filter(|tracee_tid| selector.match_tid(**tracee_tid, &pids))
1378            .map(|tracee_tid| pids.get_task(*tracee_tid))
1379        {
1380            if let Some(task_ref) = tracee.clone().upgrade() {
1381                let task_state = task_ref.write();
1382                if task_state.ptrace.is_some() {
1383                    f(&task_ref, &task_state);
1384                }
1385            }
1386        }
1387    }
1388
1389    /// Returns a tracee whose state has changed, so that waitpid can report on
1390    /// it. If this returns a value, and the pid is being traced, the tracer
1391    /// thread is deemed to have seen the tracee ptrace-stop for the purposes of
1392    /// PTRACE_LISTEN.
1393    pub fn get_waitable_ptracee(
1394        &self,
1395        selector: &ProcessSelector,
1396        options: &WaitingOptions,
1397        pids: &mut PidTable,
1398    ) -> Option<WaitResult> {
1399        // This checks to see if the target is a zombie ptracee.
1400        let waitable_entry = self.write().zombie_ptracees.get_waitable_entry(selector, options);
1401        match waitable_entry {
1402            None => (),
1403            Some((zombie, None)) => return Some(zombie.to_wait_result()),
1404            Some((zombie, Some((tg, z)))) => {
1405                if let Some(tg) = tg.upgrade() {
1406                    if Arc::as_ptr(&tg) != self as *const Self {
1407                        tg.do_zombie_notifications(z);
1408                    } else {
1409                        {
1410                            let mut state = tg.write();
1411                            state.children.remove(&z.pid());
1412                            state
1413                                .deferred_zombie_ptracers
1414                                .retain(|dzp| dzp.tracee_thread_group_key != z.thread_group_key);
1415                        }
1416
1417                        z.release(pids);
1418                    };
1419                }
1420                return Some(zombie.to_wait_result());
1421            }
1422        }
1423
1424        let mut tasks = vec![];
1425
1426        // This checks to see if the target is a living ptracee
1427        self.get_ptracees_and(selector, pids, &mut |task: &Task, _| {
1428            tasks.push(task.weak_self.clone());
1429        });
1430        for task in tasks {
1431            let Some(task_ref) = task.upgrade() else {
1432                continue;
1433            };
1434
1435            let process_state = &mut task_ref.thread_group().write();
1436            let mut task_state = task_ref.write();
1437            if task_state
1438                .ptrace
1439                .as_ref()
1440                .is_some_and(|ptrace| ptrace.is_waitable(task_ref.load_stopped(), options))
1441            {
1442                // We've identified a potential target.  Need to return either
1443                // the process's information (if we are in group-stop) or the
1444                // thread's information (if we are in a different stop).
1445
1446                // The shared information:
1447                let mut pid: i32 = 0;
1448                let info = process_state.tasks.values().next().unwrap().info().clone();
1449                let uid = info.real_creds().uid;
1450                let mut exit_status = None;
1451                let exit_signal = process_state.exit_signal.clone();
1452                let time_stats =
1453                    process_state.base.time_stats() + process_state.children_time_stats;
1454                let task_stopped = task_ref.load_stopped();
1455
1456                #[derive(PartialEq)]
1457                enum ExitType {
1458                    None,
1459                    Cont,
1460                    Stop,
1461                    Kill,
1462                }
1463                if process_state.is_waitable() {
1464                    let ptrace = &mut task_state.ptrace;
1465                    // The information for processes, if we were in group stop.
1466                    let process_stopped = process_state.base.load_stopped();
1467                    let mut fn_type = ExitType::None;
1468                    if process_stopped == StopState::Awake && options.wait_for_continued {
1469                        fn_type = ExitType::Cont;
1470                    }
1471                    let mut event = ptrace
1472                        .as_ref()
1473                        .map_or(PtraceEvent::None, |ptrace| {
1474                            ptrace.event_data.as_ref().map_or(PtraceEvent::None, |data| data.event)
1475                        })
1476                        .clone();
1477                    // Tasks that are ptrace'd always get stop notifications.
1478                    if process_stopped == StopState::GroupStopped
1479                        && (options.wait_for_stopped || ptrace.is_some())
1480                    {
1481                        fn_type = ExitType::Stop;
1482                    }
1483                    if fn_type != ExitType::None {
1484                        let siginfo = if options.keep_waitable_state {
1485                            process_state.last_signal.clone()
1486                        } else {
1487                            process_state.last_signal.take()
1488                        };
1489                        if let Some(mut siginfo) = siginfo {
1490                            if task_ref.thread_group().load_stopped() == StopState::GroupStopped
1491                                && ptrace.as_ref().is_some_and(|ptrace| ptrace.is_seized())
1492                            {
1493                                if event == PtraceEvent::None {
1494                                    event = PtraceEvent::Stop;
1495                                }
1496                                siginfo.code |= (PtraceEvent::Stop as i32) << 8;
1497                            }
1498                            if siginfo.signal == SIGKILL {
1499                                fn_type = ExitType::Kill;
1500                            }
1501                            exit_status = match fn_type {
1502                                ExitType::Stop => Some(ExitStatus::Stop(siginfo, event)),
1503                                ExitType::Cont => Some(ExitStatus::Continue(siginfo, event)),
1504                                ExitType::Kill => Some(ExitStatus::Kill(siginfo)),
1505                                _ => None,
1506                            };
1507                        }
1508                        // Clear the wait status of the ptrace, because we're
1509                        // using the tg status instead.
1510                        ptrace
1511                            .as_mut()
1512                            .map(|ptrace| ptrace.get_last_signal(options.keep_waitable_state));
1513                    }
1514                    pid = process_state.base.leader;
1515                }
1516                if exit_status == None {
1517                    if let Some(ptrace) = task_state.ptrace.as_mut() {
1518                        // The information for the task, if we were in a non-group stop.
1519                        let mut fn_type = ExitType::None;
1520                        let event = ptrace
1521                            .event_data
1522                            .as_ref()
1523                            .map_or(PtraceEvent::None, |event| event.event);
1524                        if task_stopped == StopState::Awake {
1525                            fn_type = ExitType::Cont;
1526                        }
1527                        if task_stopped.is_stopping_or_stopped()
1528                            || ptrace.stop_status == PtraceStatus::Listening
1529                        {
1530                            fn_type = ExitType::Stop;
1531                        }
1532                        if fn_type != ExitType::None {
1533                            if let Some(siginfo) =
1534                                ptrace.get_last_signal(options.keep_waitable_state)
1535                            {
1536                                if siginfo.signal == SIGKILL {
1537                                    fn_type = ExitType::Kill;
1538                                }
1539                                exit_status = match fn_type {
1540                                    ExitType::Stop => Some(ExitStatus::Stop(siginfo, event)),
1541                                    ExitType::Cont => Some(ExitStatus::Continue(siginfo, event)),
1542                                    ExitType::Kill => Some(ExitStatus::Kill(siginfo)),
1543                                    _ => None,
1544                                };
1545                            }
1546                        }
1547                        pid = task_ref.get_tid();
1548                    }
1549                }
1550                if let Some(exit_status) = exit_status {
1551                    return Some(WaitResult {
1552                        pid,
1553                        uid,
1554                        exit_info: ProcessExitInfo { status: exit_status, exit_signal },
1555                        time_stats,
1556                    });
1557                }
1558            }
1559        }
1560        None
1561    }
1562
1563    /// Attempts to send an unchecked signal to this thread group.
1564    ///
1565    /// - `current_task`: The task that is sending the signal.
1566    /// - `unchecked_signal`: The signal that is to be sent. Unchecked, since `0` is a sentinel value
1567    /// where rights are to be checked but no signal is actually sent.
1568    ///
1569    /// # Returns
1570    /// Returns Ok(()) if the signal was sent, or the permission checks passed with a 0 signal, otherwise
1571    /// the error that was encountered.
1572    pub fn send_signal_unchecked(
1573        &self,
1574        current_task: &CurrentTask,
1575        unchecked_signal: UncheckedSignal,
1576    ) -> Result<(), Errno> {
1577        if let Some(signal) = self.check_signal_access(current_task, unchecked_signal)? {
1578            let signal_info = SignalInfo {
1579                code: SI_USER as i32,
1580                detail: SignalDetail::Kill {
1581                    pid: current_task.thread_group().leader,
1582                    uid: current_task.current_creds().uid,
1583                },
1584                ..SignalInfo::default(signal)
1585            };
1586
1587            self.write().send_signal(signal_info);
1588        }
1589
1590        Ok(())
1591    }
1592
1593    /// Sends a signal to this thread_group without performing any access checks.
1594    ///
1595    /// # Safety
1596    /// This is unsafe, because it should only be called by tools and tests.
1597    pub unsafe fn send_signal_unchecked_debug(
1598        &self,
1599        current_task: &CurrentTask,
1600        unchecked_signal: UncheckedSignal,
1601    ) -> Result<(), Errno> {
1602        let signal = Signal::try_from(unchecked_signal)?;
1603        let signal_info = SignalInfo {
1604            code: SI_USER as i32,
1605            detail: SignalDetail::Kill {
1606                pid: current_task.thread_group().leader,
1607                uid: current_task.current_creds().uid,
1608            },
1609            ..SignalInfo::default(signal)
1610        };
1611
1612        self.write().send_signal(signal_info);
1613        Ok(())
1614    }
1615
1616    /// Attempts to send an unchecked signal to this thread group, with info read from
1617    /// `siginfo_ref`.
1618    ///
1619    /// - `current_task`: The task that is sending the signal.
1620    /// - `unchecked_signal`: The signal that is to be sent. Unchecked, since `0` is a sentinel value
1621    /// where rights are to be checked but no signal is actually sent.
1622    /// - `siginfo_ref`: The siginfo that will be enqueued.
1623    /// - `options`: Options for how to convert the siginfo into a signal info.
1624    ///
1625    /// # Returns
1626    /// Returns Ok(()) if the signal was sent, or the permission checks passed with a 0 signal, otherwise
1627    /// the error that was encountered.
1628    #[track_caller]
1629    pub fn send_signal_unchecked_with_info(
1630        &self,
1631        current_task: &CurrentTask,
1632        unchecked_signal: UncheckedSignal,
1633        siginfo_ref: UserAddress,
1634        options: IntoSignalInfoOptions,
1635    ) -> Result<(), Errno> {
1636        if let Some(signal) = self.check_signal_access(current_task, unchecked_signal)? {
1637            let siginfo = UncheckedSignalInfo::read_from_siginfo(current_task, siginfo_ref)?;
1638            if self.leader != current_task.get_pid()
1639                && (siginfo.code() >= 0 || siginfo.code() == SI_TKILL)
1640            {
1641                return error!(EPERM);
1642            }
1643
1644            self.write().send_signal(siginfo.into_signal_info(signal, options)?);
1645        }
1646
1647        Ok(())
1648    }
1649
1650    /// Checks whether or not `current_task` can signal this thread group with `unchecked_signal`.
1651    ///
1652    /// Returns:
1653    ///   - `Ok(Some(Signal))` if the signal passed checks and should be sent.
1654    ///   - `Ok(None)` if the signal passed checks, but should not be sent. This is used by
1655    ///   userspace for permission checks.
1656    ///   - `Err(_)` if the permission checks failed.
1657    fn check_signal_access(
1658        &self,
1659        current_task: &CurrentTask,
1660        unchecked_signal: UncheckedSignal,
1661    ) -> Result<Option<Signal>, Errno> {
1662        // Pick an arbitrary task in thread_group to check permissions.
1663        //
1664        // Tasks can technically have different credentials, but in practice they are kept in sync.
1665        let state = self.read();
1666        let target_task = state.get_live_task()?;
1667        current_task.can_signal(&target_task, unchecked_signal)?;
1668
1669        // 0 is a sentinel value used to do permission checks.
1670        if unchecked_signal.is_zero() {
1671            return Ok(None);
1672        }
1673
1674        let signal = Signal::try_from(unchecked_signal)?;
1675        security::check_signal_access(current_task, &target_task, signal)?;
1676
1677        Ok(Some(signal))
1678    }
1679
1680    /// Drive this `ThreadGroup` to exit, allowing it time to handle SIGTERM before sending SIGKILL.
1681    ///
1682    /// Returns once `ThreadGroup::exit()` has completed.
1683    ///
1684    /// Must be called from the system task.
1685    pub async fn shut_down(this: Weak<Self>) {
1686        const SHUTDOWN_SIGNAL_HANDLING_TIMEOUT: zx::MonotonicDuration =
1687            zx::MonotonicDuration::from_seconds(1);
1688
1689        // Prepare for shutting down the thread group.
1690        let (tg_name, mut on_exited) = {
1691            // Nest this upgraded access so TempRefs aren't held across await-points.
1692            let Some(this) = this.upgrade() else {
1693                return;
1694            };
1695
1696            // Register a channel to be notified when exit() is complete.
1697            let (on_exited_send, on_exited) = futures::channel::oneshot::channel();
1698            this.write().exit_notifier = Some(on_exited_send);
1699
1700            // We want to be able to log about this thread group without upgrading the WeakRef.
1701            let tg_name = format!("{this:?}");
1702
1703            (tg_name, on_exited)
1704        };
1705
1706        log_debug!(tg:% = tg_name; "shutting down thread group, sending SIGTERM");
1707        this.upgrade().map(|tg| tg.write().send_signal(SignalInfo::default(SIGTERM)));
1708
1709        // Give thread groups some time to handle SIGTERM, proceeding early if they exit
1710        let timeout = fuchsia_async::Timer::new(SHUTDOWN_SIGNAL_HANDLING_TIMEOUT);
1711        futures::pin_mut!(timeout);
1712
1713        // Use select_biased instead of on_timeout() so that we can await on on_exited later
1714        futures::select_biased! {
1715            _ = &mut on_exited => (),
1716            _ = timeout => {
1717                log_debug!(tg:% = tg_name; "sending SIGKILL");
1718                this.upgrade().map(|tg| tg.write().send_signal(SignalInfo::default(SIGKILL)));
1719            },
1720        };
1721
1722        log_debug!(tg:% = tg_name; "waiting for exit");
1723        // It doesn't matter whether ThreadGroup::exit() was called or the process exited with
1724        // a return code and dropped the sender end of the channel.
1725        on_exited.await.ok();
1726        log_debug!(tg:% = tg_name; "thread group shutdown complete");
1727    }
1728
1729    /// Returns the KOID of the process for this thread group.
1730    /// This method should be used to when mapping 32 bit linux process ids to KOIDs
1731    /// to avoid breaking the encapsulation of the zx::process within the ThreadGroup.
1732    /// This encapsulation is important since the relationship between the ThreadGroup
1733    /// and the Process may change over time. See [ThreadGroup::process] for more details.
1734    pub fn get_process_koid(&self) -> Result<Koid, Status> {
1735        self.process.koid()
1736    }
1737}
1738
1739#[cfg_attr(
1740    feature = "debug_and_trace_logs_enabled",
1741    allow(clippy::large_enum_variant, reason = "no need to optimize enum size in debug builds")
1742)]
1743pub enum WaitableChildResult {
1744    ReadyNow(WaitResult),
1745    ShouldWait,
1746    NoneFound,
1747}
1748
1749#[apply(state_implementation!)]
1750impl ThreadGroupMutableState<Base = ThreadGroup> {
1751    pub fn leader(&self) -> pid_t {
1752        self.base.leader
1753    }
1754
1755    pub fn leader_command(&self) -> TaskCommand {
1756        self.get_task(self.leader())
1757            .map(|l| l.command())
1758            .unwrap_or_else(|| TaskCommand::new(b"<leader exited>"))
1759    }
1760
1761    pub fn is_terminating(&self) -> bool {
1762        !matches!(self.run_state, ThreadGroupRunState::Running)
1763    }
1764
1765    pub fn children(&self) -> impl Iterator<Item = Arc<ThreadGroup>> + '_ {
1766        self.children.values().map(|v| {
1767            v.upgrade().expect("Weak references to processes in ThreadGroup must always be valid")
1768        })
1769    }
1770
1771    pub fn tasks(&self) -> impl Iterator<Item = TempRef<'_, Task>> + '_ {
1772        self.tasks.values().flat_map(|t| t.upgrade())
1773    }
1774
1775    pub fn task_ids(&self) -> impl Iterator<Item = &tid_t> {
1776        self.tasks.keys()
1777    }
1778
1779    pub fn contains_task(&self, tid: tid_t) -> bool {
1780        self.tasks.contains_key(&tid)
1781    }
1782
1783    pub fn get_task(&self, tid: tid_t) -> Option<TempRef<'_, Task>> {
1784        self.tasks.get(&tid).and_then(|t| t.upgrade())
1785    }
1786
1787    pub fn tasks_count(&self) -> usize {
1788        self.tasks.len()
1789    }
1790
1791    pub fn get_ppid(&self) -> pid_t {
1792        match &self.parent {
1793            Some(parent) => parent.upgrade().leader,
1794            None => 0,
1795        }
1796    }
1797
1798    fn set_process_group<L>(
1799        &mut self,
1800        locked: &mut Locked<L>,
1801        process_group: Arc<ProcessGroup>,
1802        pids: &PidTable,
1803    ) where
1804        L: LockBefore<ProcessGroupState>,
1805    {
1806        if self.process_group == process_group {
1807            return;
1808        }
1809        self.leave_process_group(locked, pids);
1810        self.process_group = process_group;
1811        self.process_group.insert(locked, self.base);
1812    }
1813
1814    fn leave_process_group<L>(&mut self, locked: &mut Locked<L>, pids: &PidTable)
1815    where
1816        L: LockBefore<ProcessGroupState>,
1817    {
1818        if self.process_group.remove(locked, self.base) {
1819            self.process_group.session.write().remove(self.process_group.leader);
1820            pids.remove_process_group(self.process_group.leader);
1821        }
1822    }
1823
1824    /// Indicates whether the thread group is waitable via waitid and waitpid for
1825    /// either WSTOPPED or WCONTINUED.
1826    pub fn is_waitable(&self) -> bool {
1827        return self.last_signal.is_some() && !self.base.load_stopped().is_in_progress();
1828    }
1829
1830    pub fn get_waitable_zombie(
1831        &mut self,
1832        zombie_list: &dyn Fn(&mut ThreadGroupMutableState) -> &mut Vec<OwnedRef<ZombieProcess>>,
1833        selector: &ProcessSelector,
1834        options: &WaitingOptions,
1835        pids: &mut PidTable,
1836    ) -> Option<WaitResult> {
1837        // We look for the last zombie in the vector that matches pid selector and waiting options
1838        let selected_zombie_position = zombie_list(self)
1839            .iter()
1840            .rev()
1841            .position(|zombie| zombie.matches_selector_and_waiting_option(selector, options))
1842            .map(|position_starting_from_the_back| {
1843                zombie_list(self).len() - 1 - position_starting_from_the_back
1844            });
1845
1846        selected_zombie_position.map(|position| {
1847            if options.keep_waitable_state {
1848                zombie_list(self)[position].to_wait_result()
1849            } else {
1850                let zombie = zombie_list(self).remove(position);
1851                self.children_time_stats += zombie.time_stats;
1852                let result = zombie.to_wait_result();
1853                zombie.release(pids);
1854                result
1855            }
1856        })
1857    }
1858
1859    pub fn is_correct_exit_signal(for_clone: bool, exit_code: Option<Signal>) -> bool {
1860        for_clone == (exit_code != Some(SIGCHLD))
1861    }
1862
1863    fn get_waitable_running_children(
1864        &self,
1865        selector: &ProcessSelector,
1866        options: &WaitingOptions,
1867        pids: &PidTable,
1868    ) -> WaitableChildResult {
1869        // The children whose pid matches the pid selector queried.
1870        let filter_children_by_pid_selector = |child: &ThreadGroup| match *selector {
1871            ProcessSelector::Any => true,
1872            ProcessSelector::Pid(pid) => child.leader == pid,
1873            ProcessSelector::Pgid(pgid) => {
1874                pids.get_process_group(pgid).as_ref() == Some(&child.read().process_group)
1875            }
1876            ProcessSelector::Process(ref key) => *key == ThreadGroupKey::from(child),
1877        };
1878
1879        // The children whose exit signal matches the waiting options queried.
1880        let filter_children_by_waiting_options = |child: &ThreadGroup| {
1881            if options.wait_for_all {
1882                return true;
1883            }
1884            Self::is_correct_exit_signal(options.wait_for_clone, child.read().exit_signal)
1885        };
1886
1887        // If wait_for_exited flag is disabled or no terminated children were found we look for living children.
1888        let mut selected_children = self
1889            .children
1890            .values()
1891            .map(|t| t.upgrade().unwrap())
1892            .filter(|tg| filter_children_by_pid_selector(&tg))
1893            .filter(|tg| filter_children_by_waiting_options(&tg))
1894            .peekable();
1895        if selected_children.peek().is_none() {
1896            // There still might be a process that ptrace hasn't looked at yet.
1897            if self.deferred_zombie_ptracers.iter().any(|dzp| match *selector {
1898                ProcessSelector::Any => true,
1899                ProcessSelector::Pid(pid) => dzp.tracee_thread_group_key.pid() == pid,
1900                ProcessSelector::Pgid(pgid) => pgid == dzp.tracee_pgid,
1901                ProcessSelector::Process(ref key) => *key == dzp.tracee_thread_group_key,
1902            }) {
1903                return WaitableChildResult::ShouldWait;
1904            }
1905
1906            return WaitableChildResult::NoneFound;
1907        }
1908        for child in selected_children {
1909            let child = child.write();
1910            if child.last_signal.is_some() {
1911                let build_wait_result = |mut child: ThreadGroupWriteGuard<'_>,
1912                                         exit_status: &dyn Fn(SignalInfo) -> ExitStatus|
1913                 -> WaitResult {
1914                    let siginfo = if options.keep_waitable_state {
1915                        child.last_signal.clone().unwrap()
1916                    } else {
1917                        child.last_signal.take().unwrap()
1918                    };
1919                    let exit_status = if siginfo.signal == SIGKILL {
1920                        // This overrides the stop/continue choice.
1921                        ExitStatus::Kill(siginfo)
1922                    } else {
1923                        exit_status(siginfo)
1924                    };
1925                    let info = child.tasks.values().next().unwrap().info();
1926                    let uid = info.real_creds().uid;
1927                    WaitResult {
1928                        pid: child.base.leader,
1929                        uid,
1930                        exit_info: ProcessExitInfo {
1931                            status: exit_status,
1932                            exit_signal: child.exit_signal,
1933                        },
1934                        time_stats: child.base.time_stats() + child.children_time_stats,
1935                    }
1936                };
1937                let child_stopped = child.base.load_stopped();
1938                if child_stopped == StopState::Awake && options.wait_for_continued {
1939                    return WaitableChildResult::ReadyNow(build_wait_result(child, &|siginfo| {
1940                        ExitStatus::Continue(siginfo, PtraceEvent::None)
1941                    }));
1942                }
1943                if child_stopped == StopState::GroupStopped && options.wait_for_stopped {
1944                    return WaitableChildResult::ReadyNow(build_wait_result(child, &|siginfo| {
1945                        ExitStatus::Stop(siginfo, PtraceEvent::None)
1946                    }));
1947                }
1948            }
1949        }
1950
1951        WaitableChildResult::ShouldWait
1952    }
1953
1954    /// Returns any waitable child matching the given `selector` and `options`. Returns None if no
1955    /// child matching the selector is waitable. Returns ECHILD if no child matches the selector at
1956    /// all.
1957    ///
1958    /// Will remove the waitable status from the child depending on `options`.
1959    pub fn get_waitable_child(
1960        &mut self,
1961        selector: &ProcessSelector,
1962        options: &WaitingOptions,
1963        pids: &mut PidTable,
1964    ) -> WaitableChildResult {
1965        if options.wait_for_exited {
1966            if let Some(waitable_zombie) = self.get_waitable_zombie(
1967                &|state: &mut ThreadGroupMutableState| &mut state.zombie_children,
1968                selector,
1969                options,
1970                pids,
1971            ) {
1972                return WaitableChildResult::ReadyNow(waitable_zombie);
1973            }
1974        }
1975
1976        self.get_waitable_running_children(selector, options, pids)
1977    }
1978
1979    /// Returns a task in the current thread group.
1980    pub fn get_live_task(&self) -> Result<TempRef<'_, Task>, Errno> {
1981        self.tasks
1982            .get(&self.leader())
1983            .and_then(|t| t.upgrade())
1984            .or_else(|| self.tasks().next())
1985            .ok_or_else(|| errno!(ESRCH))
1986    }
1987
1988    /// Set the stop status of the process.  If you pass |siginfo| of |None|,
1989    /// does not update the signal.  If |finalize_only| is set, will check that
1990    /// the set will be a finalize (Stopping -> Stopped or Stopped -> Stopped)
1991    /// before executing it.
1992    ///
1993    /// Returns the latest stop state after any changes.
1994    pub fn set_stopped(
1995        mut self,
1996        new_stopped: StopState,
1997        siginfo: Option<SignalInfo>,
1998        finalize_only: bool,
1999    ) -> StopState {
2000        if let Some(stopped) = self.base.check_stopped_state(new_stopped, finalize_only) {
2001            return stopped;
2002        }
2003
2004        // Thread groups don't transition to group stop if they are waking, because waking
2005        // means something told it to wake up (like a SIGCONT) but hasn't finished yet.
2006        if self.base.load_stopped() == StopState::Waking
2007            && (new_stopped == StopState::GroupStopping || new_stopped == StopState::GroupStopped)
2008        {
2009            return self.base.load_stopped();
2010        }
2011
2012        // TODO(https://g-issues.fuchsia.dev/issues/306438676): When thread
2013        // group can be stopped inside user code, tasks/thread groups will
2014        // need to be either restarted or stopped here.
2015        self.store_stopped(new_stopped);
2016        if let Some(signal) = &siginfo {
2017            // We don't want waiters to think the process was unstopped
2018            // because of a sigkill.  They will get woken when the
2019            // process dies.
2020            if signal.signal != SIGKILL {
2021                self.last_signal = siginfo;
2022            }
2023        }
2024        if new_stopped == StopState::Waking || new_stopped == StopState::ForceWaking {
2025            self.lifecycle_waiters.notify_value(ThreadGroupLifecycleWaitValue::Stopped);
2026        };
2027
2028        let parent = (!new_stopped.is_in_progress()).then(|| self.parent.clone()).flatten();
2029
2030        // Drop the lock before locking the parent.
2031        std::mem::drop(self);
2032        if let Some(parent) = parent {
2033            let parent = parent.upgrade();
2034            parent
2035                .write()
2036                .lifecycle_waiters
2037                .notify_value(ThreadGroupLifecycleWaitValue::ChildStatus);
2038        }
2039
2040        new_stopped
2041    }
2042
2043    fn store_stopped(&mut self, state: StopState) {
2044        // We don't actually use the guard but we require it to enforce that the
2045        // caller holds the thread group's mutable state lock (identified by
2046        // mutable access to the thread group's mutable state).
2047
2048        self.base.stop_state.store(state, Ordering::Relaxed)
2049    }
2050
2051    /// Sends the signal `signal_info` to this thread group.
2052    #[allow(unused_mut, reason = "needed for some but not all macro outputs")]
2053    pub fn send_signal(mut self, signal_info: SignalInfo) {
2054        let sigaction = self.base.signal_actions.get(signal_info.signal);
2055        let action = action_for_signal(&signal_info, sigaction);
2056
2057        self.base.pending_signals.lock().enqueue(signal_info.clone());
2058        let tasks: Vec<WeakRef<Task>> = self.tasks.values().map(|t| t.weak_clone()).collect();
2059
2060        // Set state to waking before interrupting any tasks.
2061        if signal_info.signal == SIGKILL {
2062            self.set_stopped(StopState::ForceWaking, Some(signal_info.clone()), false);
2063        } else if signal_info.signal == SIGCONT {
2064            self.set_stopped(StopState::Waking, Some(signal_info.clone()), false);
2065        }
2066
2067        let mut has_interrupted_task = false;
2068        for task in tasks.iter().flat_map(|t| t.upgrade()) {
2069            let mut task_state = task.write();
2070
2071            if signal_info.signal == SIGKILL {
2072                task_state.thaw();
2073                task_state.set_stopped(StopState::ForceWaking, None, None, None);
2074            } else if signal_info.signal == SIGCONT {
2075                task_state.set_stopped(StopState::Waking, None, None, None);
2076            }
2077
2078            let is_masked = task_state.is_signal_masked(signal_info.signal);
2079            let was_masked = task_state.is_signal_masked_by_saved_mask(signal_info.signal);
2080
2081            let is_queued = action != DeliveryAction::Ignore
2082                || is_masked
2083                || was_masked
2084                || task_state.is_ptraced();
2085
2086            if is_queued {
2087                task_state.notify_signal_waiters(&signal_info.signal);
2088                task_state.set_flags(TaskFlags::SIGNALS_AVAILABLE, true);
2089
2090                if !is_masked && action.must_interrupt(Some(sigaction)) && !has_interrupted_task {
2091                    // Only interrupt one task, and only interrupt if the signal was actually queued
2092                    // and the action must interrupt.
2093                    drop(task_state);
2094                    task.interrupt();
2095                    has_interrupted_task = true;
2096                }
2097            }
2098        }
2099    }
2100}
2101
2102/// Container around a weak task and a strong `TaskPersistentInfo`. It is needed to keep the
2103/// information even when the task is not upgradable, because when the task is dropped, there is a
2104/// moment where the task is not yet released, yet the weak pointer is not upgradeable anymore.
2105/// During this time, it is still necessary to access the persistent info to compute the state of
2106/// the thread for the different wait syscalls.
2107pub struct TaskContainer(WeakRef<Task>, TaskPersistentInfo);
2108
2109impl From<&TempRef<'_, Task>> for TaskContainer {
2110    fn from(task: &TempRef<'_, Task>) -> Self {
2111        Self(WeakRef::from(task), task.persistent_info.clone())
2112    }
2113}
2114
2115impl From<TaskContainer> for TaskPersistentInfo {
2116    fn from(container: TaskContainer) -> TaskPersistentInfo {
2117        container.1
2118    }
2119}
2120
2121impl TaskContainer {
2122    fn upgrade(&self) -> Option<TempRef<'_, Task>> {
2123        self.0.upgrade()
2124    }
2125
2126    fn weak_clone(&self) -> WeakRef<Task> {
2127        self.0.clone()
2128    }
2129
2130    fn info(&self) -> &TaskPersistentInfo {
2131        &self.1
2132    }
2133}
2134
2135#[cfg(test)]
2136mod test {
2137    use super::*;
2138    use crate::testing::*;
2139
2140    #[::fuchsia::test]
2141    async fn test_setsid() {
2142        spawn_kernel_and_run(async |locked, current_task| {
2143            fn get_process_group(task: &Task) -> Arc<ProcessGroup> {
2144                Arc::clone(&task.thread_group().read().process_group)
2145            }
2146            assert_eq!(current_task.thread_group().setsid(locked), error!(EPERM));
2147
2148            let child_task = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2149            assert_eq!(get_process_group(&current_task), get_process_group(&child_task));
2150
2151            let old_process_group = child_task.thread_group().read().process_group.clone();
2152            assert_eq!(child_task.thread_group().setsid(locked), Ok(()));
2153            assert_eq!(
2154                child_task.thread_group().read().process_group.session.leader,
2155                child_task.get_pid()
2156            );
2157            assert!(
2158                !old_process_group.read(locked).thread_groups().contains(child_task.thread_group())
2159            );
2160        })
2161        .await;
2162    }
2163
2164    #[::fuchsia::test]
2165    async fn test_exit_status() {
2166        spawn_kernel_and_run(async |locked, current_task| {
2167            let child = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2168            child.thread_group().exit(locked, ExitStatus::Exit(42), None);
2169            std::mem::drop(child);
2170            assert_eq!(
2171                current_task.thread_group().read().zombie_children[0].exit_info.status,
2172                ExitStatus::Exit(42)
2173            );
2174        })
2175        .await;
2176    }
2177
2178    #[::fuchsia::test]
2179    async fn test_setgpid() {
2180        spawn_kernel_and_run(async |locked, current_task| {
2181            assert_eq!(current_task.thread_group().setsid(locked), error!(EPERM));
2182
2183            let child_task1 = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2184            let child_task2 = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2185            let execd_child_task = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2186            execd_child_task.thread_group().write().did_exec = true;
2187            let other_session_child_task =
2188                current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
2189            assert_eq!(other_session_child_task.thread_group().setsid(locked), Ok(()));
2190
2191            assert_eq!(
2192                child_task1.thread_group().setpgid(locked, &current_task, &current_task, 0),
2193                error!(ESRCH)
2194            );
2195            assert_eq!(
2196                current_task.thread_group().setpgid(locked, &current_task, &execd_child_task, 0),
2197                error!(EACCES)
2198            );
2199            assert_eq!(
2200                current_task.thread_group().setpgid(locked, &current_task, &current_task, 0),
2201                error!(EPERM)
2202            );
2203            assert_eq!(
2204                current_task.thread_group().setpgid(
2205                    locked,
2206                    &current_task,
2207                    &other_session_child_task,
2208                    0
2209                ),
2210                error!(EPERM)
2211            );
2212            assert_eq!(
2213                current_task.thread_group().setpgid(locked, &current_task, &child_task1, -1),
2214                error!(EINVAL)
2215            );
2216            assert_eq!(
2217                current_task.thread_group().setpgid(locked, &current_task, &child_task1, 255),
2218                error!(EPERM)
2219            );
2220            assert_eq!(
2221                current_task.thread_group().setpgid(
2222                    locked,
2223                    &current_task,
2224                    &child_task1,
2225                    other_session_child_task.tid
2226                ),
2227                error!(EPERM)
2228            );
2229
2230            assert_eq!(
2231                child_task1.thread_group().setpgid(locked, &current_task, &child_task1, 0),
2232                Ok(())
2233            );
2234            assert_eq!(
2235                child_task1.thread_group().read().process_group.session.leader,
2236                current_task.tid
2237            );
2238            assert_eq!(child_task1.thread_group().read().process_group.leader, child_task1.tid);
2239
2240            let old_process_group = child_task2.thread_group().read().process_group.clone();
2241            assert_eq!(
2242                current_task.thread_group().setpgid(
2243                    locked,
2244                    &current_task,
2245                    &child_task2,
2246                    child_task1.tid
2247                ),
2248                Ok(())
2249            );
2250            assert_eq!(child_task2.thread_group().read().process_group.leader, child_task1.tid);
2251            assert!(
2252                !old_process_group
2253                    .read(locked)
2254                    .thread_groups()
2255                    .contains(child_task2.thread_group())
2256            );
2257        })
2258        .await;
2259    }
2260
2261    #[::fuchsia::test]
2262    async fn test_adopt_children() {
2263        spawn_kernel_and_run(async |locked, current_task| {
2264            let task1 = current_task.clone_task_for_test(locked, 0, None);
2265            let task2 = task1.clone_task_for_test(locked, 0, None);
2266            let task3 = task2.clone_task_for_test(locked, 0, None);
2267
2268            assert_eq!(task3.thread_group().read().get_ppid(), task2.tid);
2269
2270            task2.thread_group().exit(locked, ExitStatus::Exit(0), None);
2271            std::mem::drop(task2);
2272
2273            // Task3 parent should be current_task.
2274            assert_eq!(task3.thread_group().read().get_ppid(), current_task.tid);
2275        })
2276        .await;
2277    }
2278}