starnix_core/task/
task.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::{MemoryAccessor, MemoryAccessorExt, MemoryManager, TaskMemoryAccessor};
6use crate::mutable_state::{state_accessor, state_implementation};
7use crate::ptrace::{
8    AtomicStopState, PtraceEvent, PtraceEventData, PtraceState, PtraceStatus, StopState,
9};
10use crate::security;
11use crate::signals::{KernelSignal, RunState, SignalDetail, SignalInfo, SignalState};
12use crate::task::memory_attribution::MemoryAttributionLifecycleEvent;
13use crate::task::tracing::KoidPair;
14use crate::task::{
15    AbstractUnixSocketNamespace, AbstractVsockSocketNamespace, CurrentTask, EventHandler, Kernel,
16    NormalPriority, PidTable, ProcessEntryRef, ProcessExitInfo, RealtimePriority, SchedulerState,
17    SchedulingPolicy, SeccompFilterContainer, SeccompState, SeccompStateValue, ThreadGroup,
18    ThreadGroupKey, ThreadState, UtsNamespaceHandle, WaitCanceler, Waiter, ZombieProcess,
19};
20use crate::vfs::{FdTable, FsContext, FsNodeHandle, FsString};
21use bitflags::bitflags;
22use fuchsia_rcu::{RcuArc, RcuOptionArc, RcuReadGuard};
23use macro_rules_attribute::apply;
24use starnix_logging::{log_warn, set_zx_name};
25use starnix_registers::{HeapRegs, RegisterStorageEnum};
26use starnix_sync::{
27    LockBefore, Locked, Mutex, MutexGuard, RwLock, RwLockReadGuard, RwLockWriteGuard, TaskRelease,
28    TerminalLock,
29};
30use starnix_task_command::TaskCommand;
31use starnix_types::arch::ArchWidth;
32use starnix_types::ownership::{OwnedRef, Releasable, ReleaseGuard, TempRef, WeakRef};
33use starnix_types::stats::TaskTimeStats;
34use starnix_uapi::auth::{Credentials, FsCred};
35use starnix_uapi::errors::Errno;
36use starnix_uapi::signals::{SIGCHLD, SigSet, Signal, sigaltstack_contains_pointer};
37use starnix_uapi::user_address::{
38    ArchSpecific, MappingMultiArchUserRef, UserAddress, UserCString, UserRef,
39};
40use starnix_uapi::{
41    CLD_CONTINUED, CLD_DUMPED, CLD_EXITED, CLD_KILLED, CLD_STOPPED, CLD_TRAPPED,
42    FUTEX_BITSET_MATCH_ANY, errno, error, from_status_like_fdio, pid_t, sigaction_t, sigaltstack,
43    tid_t, uapi,
44};
45use std::collections::VecDeque;
46use std::mem::MaybeUninit;
47use std::ops::Deref;
48use std::sync::atomic::{AtomicBool, AtomicU8, Ordering};
49use std::sync::{Arc, Weak};
50use std::{cmp, fmt};
51use zx::{Signals, Task as _};
52
53#[derive(Clone, Debug, Eq, PartialEq)]
54pub enum ExitStatus {
55    Exit(u8),
56    Kill(SignalInfo),
57    CoreDump(SignalInfo),
58    // The second field for Stop and Continue contains the type of ptrace stop
59    // event that made it stop / continue, if applicable (PTRACE_EVENT_STOP,
60    // PTRACE_EVENT_FORK, etc)
61    Stop(SignalInfo, PtraceEvent),
62    Continue(SignalInfo, PtraceEvent),
63}
64impl ExitStatus {
65    /// Converts the given exit status to a status code suitable for returning from wait syscalls.
66    pub fn wait_status(&self) -> i32 {
67        match self {
68            ExitStatus::Exit(status) => (*status as i32) << 8,
69            ExitStatus::Kill(siginfo) => siginfo.signal.number() as i32,
70            ExitStatus::CoreDump(siginfo) => (siginfo.signal.number() as i32) | 0x80,
71            ExitStatus::Continue(siginfo, trace_event) => {
72                let trace_event_val = *trace_event as u32;
73                if trace_event_val != 0 {
74                    (siginfo.signal.number() as i32) | (trace_event_val << 16) as i32
75                } else {
76                    0xffff
77                }
78            }
79            ExitStatus::Stop(siginfo, trace_event) => {
80                let trace_event_val = *trace_event as u32;
81                (0x7f + ((siginfo.signal.number() as i32) << 8)) | (trace_event_val << 16) as i32
82            }
83        }
84    }
85
86    pub fn signal_info_code(&self) -> i32 {
87        match self {
88            ExitStatus::Exit(_) => CLD_EXITED as i32,
89            ExitStatus::Kill(_) => CLD_KILLED as i32,
90            ExitStatus::CoreDump(_) => CLD_DUMPED as i32,
91            ExitStatus::Stop(_, _) => CLD_STOPPED as i32,
92            ExitStatus::Continue(_, _) => CLD_CONTINUED as i32,
93        }
94    }
95
96    pub fn signal_info_status(&self) -> i32 {
97        match self {
98            ExitStatus::Exit(status) => *status as i32,
99            ExitStatus::Kill(siginfo)
100            | ExitStatus::CoreDump(siginfo)
101            | ExitStatus::Continue(siginfo, _)
102            | ExitStatus::Stop(siginfo, _) => siginfo.signal.number() as i32,
103        }
104    }
105}
106
107bitflags! {
108    #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
109    pub struct TaskFlags: u8 {
110        const EXITED = 0x1;
111        const SIGNALS_AVAILABLE = 0x2;
112        const TEMPORARY_SIGNAL_MASK = 0x4;
113        /// Whether the executor should dump the stack of this task when it exits.
114        /// Currently used to implement ExitStatus::CoreDump.
115        const DUMP_ON_EXIT = 0x8;
116        const KERNEL_SIGNALS_AVAILABLE = 0x10;
117    }
118}
119
120pub struct AtomicTaskFlags {
121    flags: AtomicU8,
122}
123
124impl AtomicTaskFlags {
125    fn new(flags: TaskFlags) -> Self {
126        Self { flags: AtomicU8::new(flags.bits()) }
127    }
128
129    fn load(&self, ordering: Ordering) -> TaskFlags {
130        let flags = self.flags.load(ordering);
131        // We only ever store values from a `TaskFlags`.
132        TaskFlags::from_bits_retain(flags)
133    }
134
135    fn swap(&self, flags: TaskFlags, ordering: Ordering) -> TaskFlags {
136        let flags = self.flags.swap(flags.bits(), ordering);
137        // We only ever store values from a `TaskFlags`.
138        TaskFlags::from_bits_retain(flags)
139    }
140}
141
142/// This contains thread state that tracers can inspect and modify.  It is
143/// captured when a thread stops, and optionally copied back (if dirty) when a
144/// thread starts again.  An alternative implementation would involve the
145/// tracers acting on thread state directly; however, this would involve sharing
146/// CurrentTask structures across multiple threads, which goes against the
147/// intent of the design of CurrentTask.
148pub struct CapturedThreadState {
149    /// The thread state of the traced task.  This is copied out when the thread
150    /// stops.
151    pub thread_state: ThreadState<HeapRegs>,
152
153    /// Indicates that the last ptrace operation changed the thread state, so it
154    /// should be written back to the original thread.
155    pub dirty: bool,
156}
157
158impl ArchSpecific for CapturedThreadState {
159    fn is_arch32(&self) -> bool {
160        self.thread_state.is_arch32()
161    }
162}
163
164#[derive(Debug)]
165pub struct RobustList {
166    pub next: RobustListPtr,
167}
168
169pub type RobustListPtr =
170    MappingMultiArchUserRef<RobustList, uapi::robust_list, uapi::arch32::robust_list>;
171
172impl From<uapi::robust_list> for RobustList {
173    fn from(robust_list: uapi::robust_list) -> Self {
174        Self { next: RobustListPtr::from(robust_list.next) }
175    }
176}
177
178#[cfg(target_arch = "aarch64")]
179impl From<uapi::arch32::robust_list> for RobustList {
180    fn from(robust_list: uapi::arch32::robust_list) -> Self {
181        Self { next: RobustListPtr::from(robust_list.next) }
182    }
183}
184
185#[derive(Debug)]
186pub struct RobustListHead {
187    pub list: RobustList,
188    pub futex_offset: isize,
189}
190
191pub type RobustListHeadPtr =
192    MappingMultiArchUserRef<RobustListHead, uapi::robust_list_head, uapi::arch32::robust_list_head>;
193
194impl From<uapi::robust_list_head> for RobustListHead {
195    fn from(robust_list_head: uapi::robust_list_head) -> Self {
196        Self {
197            list: robust_list_head.list.into(),
198            futex_offset: robust_list_head.futex_offset as isize,
199        }
200    }
201}
202
203#[cfg(target_arch = "aarch64")]
204impl From<uapi::arch32::robust_list_head> for RobustListHead {
205    fn from(robust_list_head: uapi::arch32::robust_list_head) -> Self {
206        Self {
207            list: robust_list_head.list.into(),
208            futex_offset: robust_list_head.futex_offset as isize,
209        }
210    }
211}
212
213pub struct TaskMutableState {
214    // See https://man7.org/linux/man-pages/man2/set_tid_address.2.html
215    pub clear_child_tid: UserRef<tid_t>,
216
217    /// Signal handler related state. This is grouped together for when atomicity is needed during
218    /// signal sending and delivery.
219    signals: SignalState,
220
221    /// Internal signals that have a higher priority than a regular signal.
222    ///
223    /// Storing in a separate queue outside of `SignalState` ensures the internal signals will
224    /// never be ignored or masked when dequeuing. Higher priority ensures that no user signals
225    /// will jump the queue, e.g. ptrace, which delays the delivery.
226    ///
227    /// This design is not about observable consequence, but about convenient implementation.
228    kernel_signals: VecDeque<KernelSignal>,
229
230    /// The exit status that this task exited with.
231    exit_status: Option<ExitStatus>,
232
233    /// Desired scheduler state for the task.
234    pub scheduler_state: SchedulerState,
235
236    /// The UTS namespace assigned to this thread.
237    ///
238    /// This field is kept in the mutable state because the UTS namespace of a thread
239    /// can be forked using `clone()` or `unshare()` syscalls.
240    ///
241    /// We use UtsNamespaceHandle because the UTS properties can be modified
242    /// by any other thread that shares this namespace.
243    pub uts_ns: UtsNamespaceHandle,
244
245    /// Bit that determines whether a newly started program can have privileges its parent does
246    /// not have.  See Documentation/prctl/no_new_privs.txt in the Linux kernel for details.
247    /// Note that Starnix does not currently implement the relevant privileges (e.g.,
248    /// setuid/setgid binaries).  So, you can set this, but it does nothing other than get
249    /// propagated to children.
250    ///
251    /// The documentation indicates that this can only ever be set to
252    /// true, and it cannot be reverted to false.  Accessor methods
253    /// for this field ensure this property.
254    no_new_privs: bool,
255
256    /// Userspace hint about how to adjust the OOM score for this process.
257    pub oom_score_adj: i32,
258
259    /// List of currently installed seccomp_filters
260    pub seccomp_filters: SeccompFilterContainer,
261
262    /// A pointer to the head of the robust futex list of this thread in
263    /// userspace. See get_robust_list(2)
264    pub robust_list_head: RobustListHeadPtr,
265
266    /// The timer slack used to group timer expirations for the calling thread.
267    ///
268    /// Timers may expire up to `timerslack_ns` late, but never early.
269    ///
270    /// If this value is 0, the task's default timerslack is used.
271    pub timerslack_ns: u64,
272
273    /// The default value for `timerslack_ns`. This value cannot change during the lifetime of a
274    /// task.
275    ///
276    /// This value is set to the `timerslack_ns` of the creating thread, and thus is not constant
277    /// across tasks.
278    pub default_timerslack_ns: u64,
279
280    /// Information that a tracer needs to communicate with this process, if it
281    /// is being traced.
282    pub ptrace: Option<Box<PtraceState>>,
283
284    /// Information that a tracer needs to inspect this process.
285    pub captured_thread_state: Option<Box<CapturedThreadState>>,
286}
287
288impl TaskMutableState {
289    pub fn no_new_privs(&self) -> bool {
290        self.no_new_privs
291    }
292
293    /// Sets the value of no_new_privs to true.  It is an error to set
294    /// it to anything else.
295    pub fn enable_no_new_privs(&mut self) {
296        self.no_new_privs = true;
297    }
298
299    pub fn get_timerslack<T: zx::Timeline>(&self) -> zx::Duration<T> {
300        zx::Duration::from_nanos(self.timerslack_ns as i64)
301    }
302
303    /// Sets the current timerslack of the task to `ns`.
304    ///
305    /// If `ns` is zero, the current timerslack gets reset to the task's default timerslack.
306    pub fn set_timerslack_ns(&mut self, ns: u64) {
307        if ns == 0 {
308            self.timerslack_ns = self.default_timerslack_ns;
309        } else {
310            self.timerslack_ns = ns;
311        }
312    }
313
314    pub fn is_ptraced(&self) -> bool {
315        self.ptrace.is_some()
316    }
317
318    pub fn is_ptrace_listening(&self) -> bool {
319        self.ptrace.as_ref().is_some_and(|ptrace| ptrace.stop_status == PtraceStatus::Listening)
320    }
321
322    pub fn ptrace_on_signal_consume(&mut self) -> bool {
323        self.ptrace.as_mut().is_some_and(|ptrace: &mut Box<PtraceState>| {
324            if ptrace.stop_status.is_continuing() {
325                ptrace.stop_status = PtraceStatus::Default;
326                false
327            } else {
328                true
329            }
330        })
331    }
332
333    pub fn notify_ptracers(&mut self) {
334        if let Some(ptrace) = &self.ptrace {
335            ptrace.tracer_waiters().notify_all();
336        }
337    }
338
339    pub fn wait_on_ptracer(&self, waiter: &Waiter) {
340        if let Some(ptrace) = &self.ptrace {
341            ptrace.tracee_waiters.wait_async(&waiter);
342        }
343    }
344
345    pub fn notify_ptracees(&mut self) {
346        if let Some(ptrace) = &self.ptrace {
347            ptrace.tracee_waiters.notify_all();
348        }
349    }
350
351    pub fn take_captured_state(&mut self) -> Option<Box<CapturedThreadState>> {
352        if self.captured_thread_state.is_some() {
353            let mut state = None;
354            std::mem::swap(&mut state, &mut self.captured_thread_state);
355            return state;
356        }
357        None
358    }
359
360    pub fn copy_state_from(&mut self, current_task: &CurrentTask) {
361        self.captured_thread_state = Some(Box::new(CapturedThreadState {
362            thread_state: current_task.thread_state.extended_snapshot::<HeapRegs>(),
363            dirty: false,
364        }));
365    }
366
367    /// Returns the task's currently active signal mask.
368    pub fn signal_mask(&self) -> SigSet {
369        self.signals.mask()
370    }
371
372    /// Returns true if `signal` is currently blocked by this task's signal mask.
373    pub fn is_signal_masked(&self, signal: Signal) -> bool {
374        self.signals.mask().has_signal(signal)
375    }
376
377    /// Returns true if `signal` is blocked by the saved signal mask.
378    ///
379    /// Note that the current signal mask may still not be blocking the signal.
380    pub fn is_signal_masked_by_saved_mask(&self, signal: Signal) -> bool {
381        self.signals.saved_mask().is_some_and(|mask| mask.has_signal(signal))
382    }
383
384    /// Removes the currently active, temporary, signal mask and restores the
385    /// previously active signal mask.
386    pub fn restore_signal_mask(&mut self) {
387        self.signals.restore_mask();
388    }
389
390    /// Returns true if the task's current `RunState` is blocked.
391    pub fn is_blocked(&self) -> bool {
392        self.signals.run_state.is_blocked()
393    }
394
395    /// Sets the task's `RunState` to `run_state`.
396    pub fn set_run_state(&mut self, run_state: RunState) {
397        self.signals.run_state = run_state;
398    }
399
400    pub fn run_state(&self) -> RunState {
401        self.signals.run_state.clone()
402    }
403
404    pub fn on_signal_stack(&self, stack_pointer_register: u64) -> bool {
405        self.signals
406            .alt_stack
407            .map(|signal_stack| sigaltstack_contains_pointer(&signal_stack, stack_pointer_register))
408            .unwrap_or(false)
409    }
410
411    pub fn set_sigaltstack(&mut self, stack: Option<sigaltstack>) {
412        self.signals.alt_stack = stack;
413    }
414
415    pub fn sigaltstack(&self) -> Option<sigaltstack> {
416        self.signals.alt_stack
417    }
418
419    pub fn wait_on_signal(&mut self, waiter: &Waiter) {
420        self.signals.signal_wait.wait_async(waiter);
421    }
422
423    pub fn signals_mut(&mut self) -> &mut SignalState {
424        &mut self.signals
425    }
426
427    pub fn wait_on_signal_fd_events(
428        &self,
429        waiter: &Waiter,
430        mask: SigSet,
431        handler: EventHandler,
432    ) -> WaitCanceler {
433        self.signals.signal_wait.wait_async_signal_mask(waiter, mask, handler)
434    }
435
436    pub fn notify_signal_waiters(&self, signal: &Signal) {
437        self.signals.signal_wait.notify_signal(signal);
438    }
439
440    /// Thaw the task if has been frozen
441    pub fn thaw(&mut self) {
442        if let RunState::Frozen(waiter) = self.run_state() {
443            waiter.notify();
444        }
445    }
446
447    pub fn is_frozen(&self) -> bool {
448        matches!(self.run_state(), RunState::Frozen(_))
449    }
450
451    #[cfg(test)]
452    pub fn kernel_signals_for_test(&self) -> &VecDeque<KernelSignal> {
453        &self.kernel_signals
454    }
455}
456
457#[apply(state_implementation!)]
458impl TaskMutableState<Base = Task> {
459    pub fn set_stopped(
460        &mut self,
461        stopped: StopState,
462        siginfo: Option<SignalInfo>,
463        current_task: Option<&CurrentTask>,
464        event: Option<PtraceEventData>,
465    ) {
466        if stopped.ptrace_only() && self.ptrace.is_none() {
467            return;
468        }
469
470        if self.base.load_stopped().is_illegal_transition(stopped) {
471            return;
472        }
473
474        // TODO(https://g-issues.fuchsia.dev/issues/306438676): When task can be
475        // stopped inside user code, task will need to be either restarted or
476        // stopped here.
477        self.store_stopped(stopped);
478        if stopped.is_stopped() {
479            if let Some(ref current_task) = current_task {
480                self.copy_state_from(current_task);
481            }
482        }
483        if let Some(ptrace) = &mut self.ptrace {
484            ptrace.set_last_signal(siginfo);
485            ptrace.set_last_event(event);
486        }
487        if stopped == StopState::Waking || stopped == StopState::ForceWaking {
488            self.notify_ptracees();
489        }
490        if !stopped.is_in_progress() {
491            self.notify_ptracers();
492        }
493    }
494
495    /// Enqueues a signal at the back of the task's signal queue.
496    pub fn enqueue_signal(&mut self, signal: SignalInfo) {
497        self.signals.enqueue(signal);
498        self.set_flags(TaskFlags::SIGNALS_AVAILABLE, self.signals.is_any_pending());
499    }
500
501    /// Enqueues the signal, allowing the signal to skip straight to the front of the task's queue.
502    ///
503    /// `enqueue_signal` is the more common API to use.
504    ///
505    /// Note that this will not guarantee that the signal is dequeued before any process-directed
506    /// signals.
507    pub fn enqueue_signal_front(&mut self, signal: SignalInfo) {
508        self.signals.enqueue(signal);
509        self.set_flags(TaskFlags::SIGNALS_AVAILABLE, self.signals.is_any_pending());
510    }
511
512    /// Sets the current signal mask of the task.
513    pub fn set_signal_mask(&mut self, mask: SigSet) {
514        self.signals.set_mask(mask);
515        self.set_flags(TaskFlags::SIGNALS_AVAILABLE, self.signals.is_any_pending());
516    }
517
518    /// Sets a temporary signal mask for the task.
519    ///
520    /// This mask should be removed by a matching call to `restore_signal_mask`.
521    pub fn set_temporary_signal_mask(&mut self, mask: SigSet) {
522        self.signals.set_temporary_mask(mask);
523        self.set_flags(TaskFlags::SIGNALS_AVAILABLE, self.signals.is_any_pending());
524    }
525
526    /// Returns the number of pending signals for this task, without considering the signal mask.
527    pub fn pending_signal_count(&self) -> usize {
528        self.signals.num_queued() + self.base.thread_group().num_signals_queued()
529    }
530
531    /// Returns `true` if `signal` is pending for this task, without considering the signal mask.
532    pub fn has_signal_pending(&self, signal: Signal) -> bool {
533        self.signals.has_queued(signal) || self.base.thread_group().has_signal_queued(signal)
534    }
535
536    // Prepare a SignalInfo to be sent to the tracer, if any.
537    pub fn prepare_signal_info(
538        &mut self,
539        stopped: StopState,
540    ) -> Option<(Weak<ThreadGroup>, SignalInfo)> {
541        if !stopped.is_stopped() {
542            return None;
543        }
544
545        if let Some(ptrace) = &self.ptrace {
546            if let Some(last_signal) = ptrace.get_last_signal_ref() {
547                let signal_info = SignalInfo::with_detail(
548                    SIGCHLD,
549                    CLD_TRAPPED as i32,
550                    SignalDetail::SIGCHLD {
551                        pid: self.base.tid,
552                        uid: self.base.real_creds().uid,
553                        status: last_signal.signal.number() as i32,
554                    },
555                );
556
557                return Some((ptrace.core_state.thread_group.clone(), signal_info));
558            }
559        }
560
561        None
562    }
563
564    pub fn set_ptrace(&mut self, tracer: Option<Box<PtraceState>>) -> Result<(), Errno> {
565        if tracer.is_some() && self.ptrace.is_some() {
566            return error!(EPERM);
567        }
568
569        if tracer.is_none() {
570            // Handle the case where this is called while the thread group is being released.
571            if let Ok(tg_stop_state) = self.base.thread_group().load_stopped().as_in_progress() {
572                self.set_stopped(tg_stop_state, None, None, None);
573            }
574        }
575        self.ptrace = tracer;
576        Ok(())
577    }
578
579    pub fn can_accept_ptrace_commands(&mut self) -> bool {
580        !self.base.load_stopped().is_waking_or_awake()
581            && self.is_ptraced()
582            && !self.is_ptrace_listening()
583    }
584
585    fn store_stopped(&mut self, state: StopState) {
586        // We don't actually use the guard but we require it to enforce that the
587        // caller holds the thread group's mutable state lock (identified by
588        // mutable access to the thread group's mutable state).
589
590        self.base.stop_state.store(state, Ordering::Relaxed)
591    }
592
593    pub fn update_flags(&mut self, clear: TaskFlags, set: TaskFlags) {
594        // We don't actually use the guard but we require it to enforce that the
595        // caller holds the task's mutable state lock (identified by mutable
596        // access to the task's mutable state).
597
598        debug_assert_eq!(clear ^ set, clear | set);
599        let observed = self.base.flags();
600        let swapped = self.base.flags.swap((observed | set) & !clear, Ordering::Relaxed);
601        debug_assert_eq!(swapped, observed);
602    }
603
604    pub fn set_flags(&mut self, flag: TaskFlags, v: bool) {
605        let (clear, set) = if v { (TaskFlags::empty(), flag) } else { (flag, TaskFlags::empty()) };
606
607        self.update_flags(clear, set);
608    }
609
610    pub fn set_exit_status(&mut self, status: ExitStatus) {
611        self.set_flags(TaskFlags::EXITED, true);
612        self.exit_status = Some(status);
613    }
614
615    pub fn set_exit_status_if_not_already(&mut self, status: ExitStatus) {
616        self.set_flags(TaskFlags::EXITED, true);
617        self.exit_status.get_or_insert(status);
618    }
619
620    /// The set of pending signals for the task, including the signals pending for the thread
621    /// group.
622    pub fn pending_signals(&self) -> SigSet {
623        self.signals.pending() | self.base.thread_group().get_pending_signals()
624    }
625
626    /// The set of pending signals for the task specifically, not including the signals pending
627    /// for the thread group.
628    pub fn task_specific_pending_signals(&self) -> SigSet {
629        self.signals.pending()
630    }
631
632    /// Returns true if any currently pending signal is allowed by `mask`.
633    pub fn is_any_signal_allowed_by_mask(&self, mask: SigSet) -> bool {
634        self.signals.is_any_allowed_by_mask(mask)
635            || self.base.thread_group().is_any_signal_allowed_by_mask(mask)
636    }
637
638    /// Returns whether or not a signal is pending for this task, taking the current
639    /// signal mask into account.
640    pub fn is_any_signal_pending(&self) -> bool {
641        let mask = self.signal_mask();
642        self.signals.is_any_pending()
643            || self.base.thread_group().is_any_signal_allowed_by_mask(mask)
644    }
645
646    /// Returns the next pending signal that passes `predicate`.
647    fn take_next_signal_where<F>(&mut self, predicate: F) -> Option<SignalInfo>
648    where
649        F: Fn(&SignalInfo) -> bool,
650    {
651        if let Some(signal) = self.base.thread_group().take_next_signal_where(&predicate) {
652            Some(signal)
653        } else {
654            let s = self.signals.take_next_where(&predicate);
655            self.set_flags(TaskFlags::SIGNALS_AVAILABLE, self.signals.is_any_pending());
656            s
657        }
658    }
659
660    /// Removes and returns the next pending `signal` for this task.
661    ///
662    /// Returns `None` if `siginfo` is a blocked signal, or no such signal is pending.
663    pub fn take_specific_signal(&mut self, siginfo: SignalInfo) -> Option<SignalInfo> {
664        let signal_mask = self.signal_mask();
665        if signal_mask.has_signal(siginfo.signal) {
666            return None;
667        }
668
669        let predicate = |s: &SignalInfo| s.signal == siginfo.signal;
670        self.take_next_signal_where(predicate)
671    }
672
673    /// Removes and returns a pending signal that is unblocked by the current signal mask.
674    ///
675    /// Returns `None` if there are no unblocked signals pending.
676    pub fn take_any_signal(&mut self) -> Option<SignalInfo> {
677        self.take_signal_with_mask(self.signal_mask())
678    }
679
680    /// Removes and returns a pending signal that is unblocked by `signal_mask`.
681    ///
682    /// Returns `None` if there are no signals pending that are unblocked by `signal_mask`.
683    pub fn take_signal_with_mask(&mut self, signal_mask: SigSet) -> Option<SignalInfo> {
684        let predicate = |s: &SignalInfo| !signal_mask.has_signal(s.signal) || s.force;
685        self.take_next_signal_where(predicate)
686    }
687
688    /// Enqueues an internal signal at the back of the task's kernel signal queue.
689    pub fn enqueue_kernel_signal(&mut self, signal: KernelSignal) {
690        self.kernel_signals.push_back(signal);
691        self.set_flags(TaskFlags::KERNEL_SIGNALS_AVAILABLE, true);
692    }
693
694    /// Removes and returns a pending internal signal.
695    ///
696    /// Returns `None` if there are no signals pending.
697    pub fn take_kernel_signal(&mut self) -> Option<KernelSignal> {
698        let signal = self.kernel_signals.pop_front();
699        if self.kernel_signals.is_empty() {
700            self.set_flags(TaskFlags::KERNEL_SIGNALS_AVAILABLE, false);
701        }
702        signal
703    }
704
705    #[cfg(test)]
706    pub fn queued_signal_count(&self, signal: Signal) -> usize {
707        self.signals.queued_count(signal)
708            + self.base.thread_group().pending_signals.lock().queued_count(signal)
709    }
710}
711
712#[derive(Debug, Clone, Copy, PartialEq, Eq)]
713pub enum TaskStateCode {
714    // Task is being executed.
715    Running,
716
717    // Task is waiting for an event.
718    Sleeping,
719
720    // Tracing stop
721    TracingStop,
722
723    // Task has exited.
724    Zombie,
725}
726
727impl TaskStateCode {
728    pub fn code_char(&self) -> char {
729        match self {
730            TaskStateCode::Running => 'R',
731            TaskStateCode::Sleeping => 'S',
732            TaskStateCode::TracingStop => 't',
733            TaskStateCode::Zombie => 'Z',
734        }
735    }
736
737    pub fn name(&self) -> &'static str {
738        match self {
739            TaskStateCode::Running => "running",
740            TaskStateCode::Sleeping => "sleeping",
741            TaskStateCode::TracingStop => "tracing stop",
742            TaskStateCode::Zombie => "zombie",
743        }
744    }
745}
746
747/// The information of the task that needs to be available to the `ThreadGroup` while computing
748/// which process a wait can target. It is necessary to shared this data with the `ThreadGroup` so
749/// that it is available while the task is being dropped and so is not accessible from a weak
750/// pointer.
751#[derive(Debug)]
752pub struct TaskPersistentInfoState {
753    /// Immutable information about the task
754    tid: tid_t,
755    thread_group_key: ThreadGroupKey,
756
757    /// The command of this task.
758    command: Mutex<TaskCommand>,
759
760    /// The security credentials for this task. These are only set when the task is the CurrentTask,
761    /// or on task creation.
762    creds: RcuArc<Credentials>,
763
764    // A lock for the security credentials. Writers must take the lock, readers that need to ensure
765    // that the task state does not change may take the lock.
766    creds_lock: RwLock<()>,
767}
768
769/// Guard for reading locked credentials.
770pub struct CredentialsReadGuard<'a> {
771    _lock: RwLockReadGuard<'a, ()>,
772    creds: RcuReadGuard<Credentials>,
773}
774
775impl<'a> Deref for CredentialsReadGuard<'a> {
776    type Target = Credentials;
777
778    fn deref(&self) -> &Self::Target {
779        self.creds.deref()
780    }
781}
782
783/// Guard for writing credentials. No `CredentialsReadGuard` to the same task can concurrently
784///  exist.
785pub struct CredentialsWriteGuard<'a> {
786    _lock: RwLockWriteGuard<'a, ()>,
787    creds: &'a RcuArc<Credentials>,
788}
789
790impl<'a> CredentialsWriteGuard<'a> {
791    pub fn update(&mut self, creds: Arc<Credentials>) {
792        self.creds.update(creds);
793    }
794}
795
796impl TaskPersistentInfoState {
797    fn new(
798        tid: tid_t,
799        thread_group_key: ThreadGroupKey,
800        command: TaskCommand,
801        creds: Arc<Credentials>,
802    ) -> TaskPersistentInfo {
803        Arc::new(Self {
804            tid,
805            thread_group_key,
806            command: Mutex::new(command),
807            creds: RcuArc::new(creds),
808            creds_lock: RwLock::new(()),
809        })
810    }
811
812    pub fn tid(&self) -> tid_t {
813        self.tid
814    }
815
816    pub fn pid(&self) -> pid_t {
817        self.thread_group_key.pid()
818    }
819
820    pub fn command_guard(&self) -> MutexGuard<'_, TaskCommand> {
821        self.command.lock()
822    }
823
824    /// Snapshots the credentials, returning a short-lived RCU-guarded reference.
825    pub fn real_creds(&self) -> RcuReadGuard<Credentials> {
826        self.creds.read()
827    }
828
829    /// Snapshots the credentials, returning a new reference. Use this if you need to stash the
830    /// credentials somewhere.
831    pub fn clone_creds(&self) -> Arc<Credentials> {
832        self.creds.to_arc()
833    }
834
835    /// Returns a read lock on the credentials. This is appropriate if you need to guarantee that
836    ///  the Task's credentials will not change during a security-sensitive operation.
837    pub fn lock_creds(&self) -> CredentialsReadGuard<'_> {
838        let lock = self.creds_lock.read();
839        CredentialsReadGuard { _lock: lock, creds: self.creds.read() }
840    }
841
842    /// Locks the credentials for writing.
843    /// SAFETY: Only use from CurrentTask, and keep the subjective credentials stored in CurrentTask
844    /// in sync.
845    pub(in crate::task) unsafe fn write_creds(&self) -> CredentialsWriteGuard<'_> {
846        let lock = self.creds_lock.write();
847        CredentialsWriteGuard { _lock: lock, creds: &self.creds }
848    }
849}
850
851pub type TaskPersistentInfo = Arc<TaskPersistentInfoState>;
852
853/// A unit of execution.
854///
855/// A task is the primary unit of execution in the Starnix kernel. Most tasks are *user* tasks,
856/// which have an associated Zircon thread. The Zircon thread switches between restricted mode,
857/// in which the thread runs userspace code, and normal mode, in which the thread runs Starnix
858/// code.
859///
860/// Tasks track the resources used by userspace by referencing various objects, such as an
861/// `FdTable`, a `MemoryManager`, and an `FsContext`. Many tasks can share references to these
862/// objects. In principle, which objects are shared between which tasks can be largely arbitrary,
863/// but there are common patterns of sharing. For example, tasks created with `pthread_create`
864/// will share the `FdTable`, `MemoryManager`, and `FsContext` and are often called "threads" by
865/// userspace programmers. Tasks created by `posix_spawn` do not share these objects and are often
866/// called "processes" by userspace programmers. However, inside the kernel, there is no clear
867/// definition of a "thread" or a "process".
868///
869/// During boot, the kernel creates the first task, often called `init`. The vast majority of other
870/// tasks are created as transitive clones (e.g., using `clone(2)`) of that task. Sometimes, the
871/// kernel will create new tasks from whole cloth, either with a corresponding userspace component
872/// or to represent some background work inside the kernel.
873///
874/// See also `CurrentTask`, which represents the task corresponding to the thread that is currently
875/// executing.
876pub struct Task {
877    /// Weak reference to the `OwnedRef` of this `Task`. This allows to retrieve the
878    /// `TempRef` from a raw `Task`.
879    pub weak_self: WeakRef<Self>,
880
881    /// A unique identifier for this task.
882    ///
883    /// This value can be read in userspace using `gettid(2)`. In general, this value
884    /// is different from the value return by `getpid(2)`, which returns the `id` of the leader
885    /// of the `thread_group`.
886    pub tid: tid_t,
887
888    /// The process key of this task.
889    pub thread_group_key: ThreadGroupKey,
890
891    /// The kernel to which this thread group belongs.
892    pub kernel: Arc<Kernel>,
893
894    /// The thread group to which this task belongs.
895    ///
896    /// The group of tasks in a thread group roughly corresponds to the userspace notion of a
897    /// process.
898    pub thread_group: Arc<ThreadGroup>,
899
900    /// A handle to the underlying Zircon thread object.
901    ///
902    /// Some tasks lack an underlying Zircon thread. These tasks are used internally by the
903    /// Starnix kernel to track background work, typically on a `kthread`.
904    pub thread: RwLock<Option<Arc<zx::Thread>>>,
905
906    /// The file descriptor table for this task.
907    ///
908    /// This table can be share by many tasks.
909    pub files: FdTable,
910
911    /// The memory manager for this task.  This is `None` only for system tasks.
912    pub mm: RcuOptionArc<MemoryManager>,
913
914    /// The file system for this task.
915    fs: RcuOptionArc<FsContext>,
916
917    /// The namespace for abstract AF_UNIX sockets for this task.
918    pub abstract_socket_namespace: Arc<AbstractUnixSocketNamespace>,
919
920    /// The namespace for AF_VSOCK for this task.
921    pub abstract_vsock_namespace: Arc<AbstractVsockSocketNamespace>,
922
923    /// The stop state of the task, distinct from the stop state of the thread group.
924    ///
925    /// Must only be set when the `mutable_state` write lock is held.
926    stop_state: AtomicStopState,
927
928    /// The flags for the task.
929    ///
930    /// Must only be set the then `mutable_state` write lock is held.
931    flags: AtomicTaskFlags,
932
933    /// The mutable state of the Task.
934    mutable_state: RwLock<TaskMutableState>,
935
936    /// The information of the task that needs to be available to the `ThreadGroup` while computing
937    /// which process a wait can target.
938    /// Contains the command line, the task credentials and the exit signal.
939    /// See `TaskPersistentInfo` for more information.
940    pub persistent_info: TaskPersistentInfo,
941
942    /// For vfork and clone() with CLONE_VFORK, this is set when the task exits or calls execve().
943    /// It allows the calling task to block until the fork has been completed. Only populated
944    /// when created with the CLONE_VFORK flag.
945    vfork_event: Option<Arc<zx::Event>>,
946
947    /// Variable that can tell you whether there are currently seccomp
948    /// filters without holding a lock
949    pub seccomp_filter_state: SeccompState,
950
951    /// Tell you whether you are tracing syscall entry / exit without a lock.
952    pub trace_syscalls: AtomicBool,
953
954    // The pid directory, so it doesn't have to be generated and thrown away on every access.
955    // See https://fxbug.dev/291962828 for details.
956    pub proc_pid_directory_cache: Mutex<Option<FsNodeHandle>>,
957
958    /// The Linux Security Modules state for this thread group. This should be the last member of
959    /// this struct.
960    pub security_state: security::TaskState,
961}
962
963/// The decoded cross-platform parts we care about for page fault exception reports.
964#[derive(Debug)]
965pub struct PageFaultExceptionReport {
966    pub faulting_address: u64,
967    pub not_present: bool, // Set when the page fault was due to a not-present page.
968    pub is_write: bool,    // Set when the triggering memory operation was a write.
969    pub is_execute: bool,  // Set when the triggering memory operation was an execute.
970}
971
972impl Task {
973    pub fn kernel(&self) -> &Arc<Kernel> {
974        &self.kernel
975    }
976
977    pub fn thread_group(&self) -> &Arc<ThreadGroup> {
978        &self.thread_group
979    }
980
981    pub fn has_same_address_space(&self, other: Option<&Arc<MemoryManager>>) -> bool {
982        match (self.mm(), other) {
983            (Ok(this), Some(other)) => Arc::ptr_eq(&this, other),
984            (Err(_), None) => true,
985            _ => false,
986        }
987    }
988
989    pub fn flags(&self) -> TaskFlags {
990        self.flags.load(Ordering::Relaxed)
991    }
992
993    /// When the task exits, if there is a notification that needs to propagate
994    /// to a ptracer, make sure it will propagate.
995    pub fn set_ptrace_zombie(&self, pids: &mut crate::task::PidTable) {
996        let pgid = self.thread_group().read().process_group.leader;
997        let exit_signal = self.thread_group().read().exit_signal.clone();
998        let mut state = self.write();
999        state.set_stopped(StopState::ForceAwake, None, None, None);
1000        if let Some(ptrace) = &mut state.ptrace {
1001            // Add a zombie that the ptracer will notice.
1002            ptrace.last_signal_waitable = true;
1003            let tracer_pid = ptrace.get_pid();
1004            let tracer_tg = pids.get_thread_group(tracer_pid);
1005            if let Some(tracer_tg) = tracer_tg {
1006                drop(state);
1007                let mut tracer_state = tracer_tg.write();
1008
1009                let exit_status = self.exit_status().unwrap_or_else(|| {
1010                    starnix_logging::log_error!("Exiting without an exit code.");
1011                    ExitStatus::Exit(u8::MAX)
1012                });
1013                let uid = self.real_creds().uid;
1014                let exit_info = ProcessExitInfo { status: exit_status, exit_signal };
1015                let zombie = ZombieProcess {
1016                    thread_group_key: self.thread_group_key.clone(),
1017                    pgid,
1018                    uid,
1019                    exit_info: exit_info,
1020                    // ptrace doesn't need this.
1021                    time_stats: TaskTimeStats::default(),
1022                    is_canonical: false,
1023                };
1024
1025                tracer_state.zombie_ptracees.add(pids, self.tid, zombie);
1026            };
1027        }
1028    }
1029
1030    /// Disconnects this task from the tracer, if the tracer is still running.
1031    pub fn ptrace_disconnect(&mut self, pids: &PidTable) {
1032        let mut state = self.write();
1033        let ptracer_pid = state.ptrace.as_ref().map(|ptrace| ptrace.get_pid());
1034        if let Some(ptracer_pid) = ptracer_pid {
1035            let _ = state.set_ptrace(None);
1036            if let Some(ProcessEntryRef::Process(tg)) = pids.get_process(ptracer_pid) {
1037                let tid = self.get_tid();
1038                drop(state);
1039                tg.ptracees.lock().remove(&tid);
1040            }
1041        }
1042    }
1043
1044    pub fn exit_status(&self) -> Option<ExitStatus> {
1045        self.is_exitted().then(|| self.read().exit_status.clone()).flatten()
1046    }
1047
1048    pub fn is_exitted(&self) -> bool {
1049        self.flags().contains(TaskFlags::EXITED)
1050    }
1051
1052    pub fn load_stopped(&self) -> StopState {
1053        self.stop_state.load(Ordering::Relaxed)
1054    }
1055
1056    /// Upgrade a Reference to a Task, returning a ESRCH errno if the reference cannot be borrowed.
1057    pub fn from_weak(weak: &WeakRef<Task>) -> Result<TempRef<'_, Task>, Errno> {
1058        weak.upgrade().ok_or_else(|| errno!(ESRCH))
1059    }
1060
1061    /// Internal function for creating a Task object. Useful when you need to specify the value of
1062    /// every field. create_process and create_thread are more likely to be what you want.
1063    ///
1064    /// Any fields that should be initialized fresh for every task, even if the task was created
1065    /// with fork, are initialized to their defaults inside this function. All other fields are
1066    /// passed as parameters.
1067    #[allow(clippy::let_and_return)]
1068    pub fn new(
1069        tid: tid_t,
1070        command: TaskCommand,
1071        thread_group: Arc<ThreadGroup>,
1072        thread: Option<zx::Thread>,
1073        files: FdTable,
1074        mm: Option<Arc<MemoryManager>>,
1075        // The only case where fs should be None if when building the initial task that is the
1076        // used to build the initial FsContext.
1077        fs: Arc<FsContext>,
1078        creds: Arc<Credentials>,
1079        abstract_socket_namespace: Arc<AbstractUnixSocketNamespace>,
1080        abstract_vsock_namespace: Arc<AbstractVsockSocketNamespace>,
1081        signal_mask: SigSet,
1082        kernel_signals: VecDeque<KernelSignal>,
1083        vfork_event: Option<Arc<zx::Event>>,
1084        scheduler_state: SchedulerState,
1085        uts_ns: UtsNamespaceHandle,
1086        no_new_privs: bool,
1087        seccomp_filter_state: SeccompState,
1088        seccomp_filters: SeccompFilterContainer,
1089        robust_list_head: RobustListHeadPtr,
1090        timerslack_ns: u64,
1091        security_state: security::TaskState,
1092    ) -> OwnedRef<Self> {
1093        let thread_group_key = ThreadGroupKey::from(&thread_group);
1094        OwnedRef::new_cyclic(|weak_self| {
1095            let task = Task {
1096                weak_self,
1097                tid,
1098                thread_group_key: thread_group_key.clone(),
1099                kernel: Arc::clone(&thread_group.kernel),
1100                thread_group,
1101                thread: RwLock::new(thread.map(Arc::new)),
1102                files,
1103                mm: RcuOptionArc::new(mm),
1104                fs: RcuOptionArc::new(Some(fs)),
1105                abstract_socket_namespace,
1106                abstract_vsock_namespace,
1107                vfork_event,
1108                stop_state: AtomicStopState::new(StopState::Awake),
1109                flags: AtomicTaskFlags::new(TaskFlags::empty()),
1110                mutable_state: RwLock::new(TaskMutableState {
1111                    clear_child_tid: UserRef::default(),
1112                    signals: SignalState::with_mask(signal_mask),
1113                    kernel_signals,
1114                    exit_status: None,
1115                    scheduler_state,
1116                    uts_ns,
1117                    no_new_privs,
1118                    oom_score_adj: Default::default(),
1119                    seccomp_filters,
1120                    robust_list_head,
1121                    timerslack_ns,
1122                    // The default timerslack is set to the current timerslack of the creating thread.
1123                    default_timerslack_ns: timerslack_ns,
1124                    ptrace: None,
1125                    captured_thread_state: None,
1126                }),
1127                persistent_info: TaskPersistentInfoState::new(
1128                    tid,
1129                    thread_group_key,
1130                    command,
1131                    creds,
1132                ),
1133                seccomp_filter_state,
1134                trace_syscalls: AtomicBool::new(false),
1135                proc_pid_directory_cache: Mutex::new(None),
1136                security_state,
1137            };
1138
1139            #[cfg(any(test, debug_assertions))]
1140            {
1141                // Note that `Kernel::pids` is already locked by the caller of `Task::new()`.
1142                let _l1 = task.read();
1143                let _l2 = task.persistent_info.lock_creds();
1144                let _l3 = task.persistent_info.command_guard();
1145            }
1146            task
1147        })
1148    }
1149
1150    state_accessor!(Task, mutable_state);
1151
1152    /// Returns the real credentials of the task as a short-lived RCU-guarded reference. These
1153    /// credentials are used to check permissions for actions performed on the task. If the task
1154    /// itself is performing an action, use `CurrentTask::current_creds` instead. This does not
1155    /// lock the credentials.
1156    pub fn real_creds(&self) -> RcuReadGuard<Credentials> {
1157        self.persistent_info.real_creds()
1158    }
1159
1160    /// Returns a new long-lived reference to the real credentials of the task.  These credentials
1161    /// are used to check permissions for actions performed on the task. If the task itself is
1162    /// performing an action, use `CurrentTask::current_creds` instead. This does not lock the
1163    /// credentials.
1164    pub fn clone_creds(&self) -> Arc<Credentials> {
1165        self.persistent_info.clone_creds()
1166    }
1167
1168    pub fn ptracer_task(&self) -> WeakRef<Task> {
1169        let ptracer = {
1170            let state = self.read();
1171            state.ptrace.as_ref().map(|p| p.core_state.pid)
1172        };
1173
1174        let Some(ptracer) = ptracer else {
1175            return WeakRef::default();
1176        };
1177
1178        self.get_task(ptracer)
1179    }
1180
1181    pub fn fs(&self) -> Arc<FsContext> {
1182        self.fs.to_option_arc().expect("fs must be set")
1183    }
1184
1185    pub fn has_shared_fs(&self) -> bool {
1186        let maybe_fs = self.fs.to_option_arc();
1187        // This check is incorrect because someone else could be holding a temporary Arc to the
1188        // FsContext and therefore increasing the strong count.
1189        maybe_fs.is_some_and(|fs| Arc::strong_count(&fs) > 2usize)
1190    }
1191
1192    #[track_caller]
1193    pub fn mm(&self) -> Result<Arc<MemoryManager>, Errno> {
1194        self.mm.to_option_arc().ok_or_else(|| errno!(EINVAL))
1195    }
1196
1197    pub fn unshare_fs(&self) {
1198        let fs = self.fs().fork();
1199        self.fs.update(Some(fs));
1200    }
1201
1202    /// Modify the given elements of the scheduler state with new values and update the
1203    /// task's thread's role.
1204    pub(crate) fn set_scheduler_policy_priority_and_reset_on_fork(
1205        &self,
1206        policy: SchedulingPolicy,
1207        priority: RealtimePriority,
1208        reset_on_fork: bool,
1209    ) -> Result<(), Errno> {
1210        self.update_scheduler_state_then_role(|scheduler_state| {
1211            scheduler_state.policy = policy;
1212            scheduler_state.realtime_priority = priority;
1213            scheduler_state.reset_on_fork = reset_on_fork;
1214        })
1215    }
1216
1217    /// Modify the scheduler state's priority and update the task's thread's role.
1218    pub(crate) fn set_scheduler_priority(&self, priority: RealtimePriority) -> Result<(), Errno> {
1219        self.update_scheduler_state_then_role(|scheduler_state| {
1220            scheduler_state.realtime_priority = priority
1221        })
1222    }
1223
1224    /// Modify the scheduler state's nice and update the task's thread's role.
1225    pub(crate) fn set_scheduler_nice(&self, nice: NormalPriority) -> Result<(), Errno> {
1226        self.update_scheduler_state_then_role(|scheduler_state| {
1227            scheduler_state.normal_priority = nice
1228        })
1229    }
1230
1231    /// Overwrite the existing scheduler state with a new one and update the task's thread's role.
1232    pub fn set_scheduler_state(&self, scheduler_state: SchedulerState) -> Result<(), Errno> {
1233        self.update_scheduler_state_then_role(|task_scheduler_state| {
1234            *task_scheduler_state = scheduler_state
1235        })
1236    }
1237
1238    /// Update the task's thread's role based on its current scheduler state without making any
1239    /// changes to the state.
1240    ///
1241    /// This should be called on tasks that have newly created threads, e.g. after cloning.
1242    pub fn sync_scheduler_state_to_role(&self) -> Result<(), Errno> {
1243        self.update_scheduler_state_then_role(|_| {})
1244    }
1245
1246    fn update_scheduler_state_then_role(
1247        &self,
1248        updater: impl FnOnce(&mut SchedulerState),
1249    ) -> Result<(), Errno> {
1250        let new_scheduler_state = {
1251            // Hold the task state lock as briefly as possible, it's not needed to update the role.
1252            let mut state = self.write();
1253            updater(&mut state.scheduler_state);
1254            state.scheduler_state
1255        };
1256        self.thread_group().kernel.scheduler.set_thread_role(self, new_scheduler_state)?;
1257        Ok(())
1258    }
1259
1260    /// Signals the vfork event, if any, to unblock waiters.
1261    pub fn signal_vfork(&self) {
1262        if let Some(event) = &self.vfork_event {
1263            if let Err(status) = event.signal(Signals::NONE, Signals::USER_0) {
1264                log_warn!("Failed to set vfork signal {status}");
1265            }
1266        };
1267    }
1268
1269    /// Blocks the caller until the task has exited or executed execve(). This is used to implement
1270    /// vfork() and clone(... CLONE_VFORK, ...). The task must have created with CLONE_EXECVE.
1271    pub fn wait_for_execve(&self, task_to_wait: WeakRef<Task>) -> Result<(), Errno> {
1272        let event = task_to_wait.upgrade().and_then(|t| t.vfork_event.clone());
1273        if let Some(event) = event {
1274            event
1275                .wait_one(zx::Signals::USER_0, zx::MonotonicInstant::INFINITE)
1276                .map_err(|status| from_status_like_fdio!(status))?;
1277        }
1278        Ok(())
1279    }
1280
1281    /// If needed, clear the child tid for this task.
1282    ///
1283    /// Userspace can ask us to clear the child tid and issue a futex wake at
1284    /// the child tid address when we tear down a task. For example, bionic
1285    /// uses this mechanism to implement pthread_join. The thread that calls
1286    /// pthread_join sleeps using FUTEX_WAIT on the child tid address. We wake
1287    /// them up here to let them know the thread is done.
1288    pub fn clear_child_tid_if_needed<L>(&self, locked: &mut Locked<L>) -> Result<(), Errno>
1289    where
1290        L: LockBefore<TerminalLock>,
1291    {
1292        let mut state = self.write();
1293        let user_tid = state.clear_child_tid;
1294        if !user_tid.is_null() {
1295            let zero: tid_t = 0;
1296            self.write_object(user_tid, &zero)?;
1297            self.kernel().shared_futexes.wake(
1298                locked,
1299                self,
1300                user_tid.addr(),
1301                usize::MAX,
1302                FUTEX_BITSET_MATCH_ANY,
1303            )?;
1304            state.clear_child_tid = UserRef::default();
1305        }
1306        Ok(())
1307    }
1308
1309    pub fn get_task(&self, tid: tid_t) -> WeakRef<Task> {
1310        self.kernel().pids.read().get_task(tid)
1311    }
1312
1313    pub fn get_pid(&self) -> pid_t {
1314        self.thread_group_key.pid()
1315    }
1316
1317    pub fn get_tid(&self) -> tid_t {
1318        self.tid
1319    }
1320
1321    pub fn is_leader(&self) -> bool {
1322        self.get_pid() == self.get_tid()
1323    }
1324
1325    pub fn read_argv(&self, max_len: usize) -> Result<Vec<FsString>, Errno> {
1326        // argv is empty for kthreads
1327        let Ok(mm) = self.mm() else {
1328            return Ok(vec![]);
1329        };
1330        let (argv_start, argv_end) = {
1331            let mm_state = mm.state.read();
1332            (mm_state.argv_start, mm_state.argv_end)
1333        };
1334
1335        let len_to_read = std::cmp::min(argv_end - argv_start, max_len);
1336        self.read_nul_delimited_c_string_list(argv_start, len_to_read)
1337    }
1338
1339    pub fn read_argv0(&self) -> Result<FsString, Errno> {
1340        // argv is empty for kthreads
1341        let Ok(mm) = self.mm() else {
1342            return Ok(FsString::default());
1343        };
1344        let argv_start = {
1345            let mm_state = mm.state.read();
1346            mm_state.argv_start
1347        };
1348        // Assuming a 64-bit arch width is fine for a type that's just u8's on all arches.
1349        let argv_start = UserCString::new(&ArchWidth::Arch64, argv_start);
1350        self.read_path(argv_start)
1351    }
1352
1353    pub fn read_env(&self, max_len: usize) -> Result<Vec<FsString>, Errno> {
1354        // environment is empty for kthreads
1355        let Ok(mm) = self.mm() else { return Ok(vec![]) };
1356        let (env_start, env_end) = {
1357            let mm_state = mm.state.read();
1358            (mm_state.environ_start, mm_state.environ_end)
1359        };
1360
1361        let len_to_read = std::cmp::min(env_end - env_start, max_len);
1362        self.read_nul_delimited_c_string_list(env_start, len_to_read)
1363    }
1364
1365    pub fn thread_runtime_info(&self) -> Result<zx::TaskRuntimeInfo, Errno> {
1366        self.thread
1367            .read()
1368            .as_ref()
1369            .ok_or_else(|| errno!(EINVAL))?
1370            .get_runtime_info()
1371            .map_err(|status| from_status_like_fdio!(status))
1372    }
1373
1374    pub fn real_fscred(&self) -> FsCred {
1375        self.real_creds().as_fscred()
1376    }
1377
1378    /// Interrupts the current task.
1379    ///
1380    /// This will interrupt any blocking syscalls if the task is blocked on one.
1381    /// The signal_state of the task must not be locked.
1382    pub fn interrupt(&self) {
1383        self.read().signals.run_state.wake();
1384        if let Some(thread) = self.thread.read().as_ref() {
1385            #[allow(
1386                clippy::undocumented_unsafe_blocks,
1387                reason = "Force documented unsafe blocks in Starnix"
1388            )]
1389            let status = unsafe { zx::sys::zx_restricted_kick(thread.raw_handle(), 0) };
1390            if status != zx::sys::ZX_OK {
1391                // zx_restricted_kick() could return ZX_ERR_BAD_STATE if the target thread is already in the
1392                // DYING or DEAD states. That's fine since it means that the task is in the process of
1393                // tearing down, so allow it.
1394                assert_eq!(status, zx::sys::ZX_ERR_BAD_STATE);
1395            }
1396        }
1397    }
1398
1399    pub fn command(&self) -> TaskCommand {
1400        self.persistent_info.command.lock().clone()
1401    }
1402
1403    pub fn set_command_name(&self, mut new_name: TaskCommand) {
1404        // If we're going to update the process name, see if we can get a longer one than normally
1405        // provided in the Linux uapi. Only choose the argv0-based name if it's a superset of the
1406        // uapi-provided name to avoid clobbering the name provided by the user.
1407        if let Ok(argv0) = self.read_argv0() {
1408            let argv0 = TaskCommand::from_path_bytes(&argv0);
1409            if let Some(embedded_name) = argv0.try_embed(&new_name) {
1410                new_name = embedded_name;
1411            }
1412        }
1413
1414        // Acquire this before modifying Zircon state to ensure consistency under concurrent access.
1415        // Ideally this would also guard the logic above to read argv[0] but we can't due to lock
1416        // cycles with SELinux checks.
1417        let mut command_guard = self.persistent_info.command_guard();
1418
1419        // Set the name on the Linux thread.
1420        if let Some(thread) = self.thread.read().as_ref() {
1421            set_zx_name(&**thread, new_name.as_bytes());
1422        }
1423
1424        // If this is the thread group leader, use this name for the process too.
1425        if self.is_leader() {
1426            set_zx_name(&self.thread_group().process, new_name.as_bytes());
1427            let _ = zx::Thread::raise_user_exception(
1428                zx::RaiseExceptionOptions::TARGET_JOB_DEBUGGER,
1429                zx::sys::ZX_EXCP_USER_CODE_PROCESS_NAME_CHANGED,
1430                0,
1431            );
1432        }
1433
1434        // Avoid a lock cycle by dropping the guard before notifying memory attribution of the
1435        // change.
1436        *command_guard = new_name;
1437        drop(command_guard);
1438
1439        if self.is_leader() {
1440            if let Some(notifier) = &self.thread_group().read().notifier {
1441                let _ = notifier.send(MemoryAttributionLifecycleEvent::name_change(self.tid));
1442            }
1443        }
1444    }
1445
1446    pub fn set_seccomp_state(&self, state: SeccompStateValue) -> Result<(), Errno> {
1447        self.seccomp_filter_state.set(&state)
1448    }
1449
1450    pub fn state_code(&self) -> TaskStateCode {
1451        let status = self.read();
1452        if status.exit_status.is_some() {
1453            TaskStateCode::Zombie
1454        } else if status.signals.run_state.is_blocked() {
1455            let stop_state = self.load_stopped();
1456            if stop_state.ptrace_only() && stop_state.is_stopped() {
1457                TaskStateCode::TracingStop
1458            } else {
1459                TaskStateCode::Sleeping
1460            }
1461        } else {
1462            TaskStateCode::Running
1463        }
1464    }
1465
1466    pub fn time_stats(&self) -> TaskTimeStats {
1467        use zx::Task;
1468        let info = match &*self.thread.read() {
1469            Some(thread) => thread.get_runtime_info().expect("Failed to get thread stats"),
1470            None => return TaskTimeStats::default(),
1471        };
1472
1473        TaskTimeStats {
1474            user_time: zx::MonotonicDuration::from_nanos(info.cpu_time),
1475            // TODO(https://fxbug.dev/42078242): How can we calculate system time?
1476            system_time: zx::MonotonicDuration::default(),
1477        }
1478    }
1479
1480    pub fn get_signal_action(&self, signal: Signal) -> sigaction_t {
1481        self.thread_group().signal_actions.get(signal)
1482    }
1483
1484    pub fn should_check_for_pending_signals(&self) -> bool {
1485        self.flags().intersects(
1486            TaskFlags::KERNEL_SIGNALS_AVAILABLE
1487                | TaskFlags::SIGNALS_AVAILABLE
1488                | TaskFlags::TEMPORARY_SIGNAL_MASK,
1489        ) || self.thread_group.has_pending_signals.load(Ordering::Relaxed)
1490    }
1491
1492    pub fn record_pid_koid_mapping(&self) {
1493        let Some(ref mapping_table) = *self.kernel().pid_to_koid_mapping.read() else { return };
1494
1495        let pkoid = self.thread_group().get_process_koid().ok();
1496        let tkoid = self.thread.read().as_ref().and_then(|t| t.koid().ok());
1497        mapping_table.write().insert(self.tid, KoidPair { process: pkoid, thread: tkoid });
1498    }
1499}
1500
1501impl Releasable for Task {
1502    type Context<'a> = (
1503        ThreadState<RegisterStorageEnum>,
1504        &'a mut Locked<TaskRelease>,
1505        RwLockWriteGuard<'a, PidTable>,
1506    );
1507
1508    fn release<'a>(mut self, context: Self::Context<'a>) {
1509        let (thread_state, locked, pids) = context;
1510
1511        *self.proc_pid_directory_cache.get_mut() = None;
1512        self.ptrace_disconnect(&pids);
1513
1514        std::mem::drop(pids);
1515
1516        self.files.release();
1517
1518        self.signal_vfork();
1519
1520        // Drop fields that can end up owning a FsNode to ensure no FsNode are owned by this task.
1521        self.fs.update(None);
1522        self.mm.update(None);
1523
1524        // Rebuild a temporary CurrentTask to run the release actions that requires a CurrentState.
1525        let current_task = CurrentTask::new(OwnedRef::new(self), thread_state.into());
1526
1527        // Apply any delayed releasers left.
1528        current_task.trigger_delayed_releaser(locked);
1529
1530        // Drop the task now that is has been released. This requires to take it from the OwnedRef
1531        // and from the resulting ReleaseGuard.
1532        let CurrentTask { mut task, .. } = current_task;
1533        let task = OwnedRef::take(&mut task).expect("task should not have been re-owned");
1534        let _task: Self = ReleaseGuard::take(task);
1535    }
1536}
1537
1538impl MemoryAccessor for Task {
1539    fn read_memory<'a>(
1540        &self,
1541        addr: UserAddress,
1542        bytes: &'a mut [MaybeUninit<u8>],
1543    ) -> Result<&'a mut [u8], Errno> {
1544        // Using a `Task` to read memory generally indicates that the memory
1545        // is being read from a task different than the `CurrentTask`. When
1546        // this `Task` is not current, its address space is not mapped
1547        // so we need to go through the VMO.
1548        self.mm()?.syscall_read_memory(addr, bytes)
1549    }
1550
1551    fn read_memory_partial_until_null_byte<'a>(
1552        &self,
1553        addr: UserAddress,
1554        bytes: &'a mut [MaybeUninit<u8>],
1555    ) -> Result<&'a mut [u8], Errno> {
1556        // Using a `Task` to read memory generally indicates that the memory
1557        // is being read from a task different than the `CurrentTask`. When
1558        // this `Task` is not current, its address space is not mapped
1559        // so we need to go through the VMO.
1560        self.mm()?.syscall_read_memory_partial_until_null_byte(addr, bytes)
1561    }
1562
1563    fn read_memory_partial<'a>(
1564        &self,
1565        addr: UserAddress,
1566        bytes: &'a mut [MaybeUninit<u8>],
1567    ) -> Result<&'a mut [u8], Errno> {
1568        // Using a `Task` to read memory generally indicates that the memory
1569        // is being read from a task different than the `CurrentTask`. When
1570        // this `Task` is not current, its address space is not mapped
1571        // so we need to go through the VMO.
1572        self.mm()?.syscall_read_memory_partial(addr, bytes)
1573    }
1574
1575    fn write_memory(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
1576        // Using a `Task` to write memory generally indicates that the memory
1577        // is being written to a task different than the `CurrentTask`. When
1578        // this `Task` is not current, its address space is not mapped
1579        // so we need to go through the VMO.
1580        self.mm()?.syscall_write_memory(addr, bytes)
1581    }
1582
1583    fn write_memory_partial(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
1584        // Using a `Task` to write memory generally indicates that the memory
1585        // is being written to a task different than the `CurrentTask`. When
1586        // this `Task` is not current, its address space is not mapped
1587        // so we need to go through the VMO.
1588        self.mm()?.syscall_write_memory_partial(addr, bytes)
1589    }
1590
1591    fn zero(&self, addr: UserAddress, length: usize) -> Result<usize, Errno> {
1592        // Using a `Task` to zero memory generally indicates that the memory
1593        // is being zeroed from a task different than the `CurrentTask`. When
1594        // this `Task` is not current, its address space is not mapped
1595        // so we need to go through the VMO.
1596        self.mm()?.syscall_zero(addr, length)
1597    }
1598}
1599
1600impl TaskMemoryAccessor for Task {
1601    fn maximum_valid_address(&self) -> Option<UserAddress> {
1602        self.mm().map(|mm| mm.maximum_valid_user_address).ok()
1603    }
1604}
1605
1606impl fmt::Debug for Task {
1607    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1608        write!(
1609            f,
1610            "{}:{}[{}]",
1611            self.thread_group().leader,
1612            self.tid,
1613            self.persistent_info.command.lock()
1614        )
1615    }
1616}
1617
1618impl cmp::PartialEq for Task {
1619    fn eq(&self, other: &Self) -> bool {
1620        let ptr: *const Task = self;
1621        let other_ptr: *const Task = other;
1622        ptr == other_ptr
1623    }
1624}
1625
1626impl cmp::Eq for Task {}
1627
1628#[cfg(test)]
1629mod test {
1630    use super::*;
1631    use crate::testing::*;
1632    use starnix_uapi::auth::{CAP_SYS_ADMIN, Capabilities};
1633    use starnix_uapi::resource_limits::Resource;
1634    use starnix_uapi::signals::SIGCHLD;
1635    use starnix_uapi::{CLONE_SIGHAND, CLONE_THREAD, CLONE_VM, rlimit};
1636
1637    #[::fuchsia::test]
1638    async fn test_tid_allocation() {
1639        spawn_kernel_and_run(async |locked, current_task| {
1640            let kernel = current_task.kernel();
1641            assert_eq!(current_task.get_tid(), 1);
1642            let another_current = create_task(locked, &kernel, "another-task");
1643            let another_tid = another_current.get_tid();
1644            assert!(another_tid >= 2);
1645
1646            let pids = kernel.pids.read();
1647            assert_eq!(pids.get_task(1).upgrade().unwrap().get_tid(), 1);
1648            assert_eq!(pids.get_task(another_tid).upgrade().unwrap().get_tid(), another_tid);
1649        })
1650        .await;
1651    }
1652
1653    #[::fuchsia::test]
1654    async fn test_clone_pid_and_parent_pid() {
1655        spawn_kernel_and_run(async |locked, current_task| {
1656            let thread = current_task.clone_task_for_test(
1657                locked,
1658                (CLONE_THREAD | CLONE_VM | CLONE_SIGHAND) as u64,
1659                Some(SIGCHLD),
1660            );
1661            assert_eq!(current_task.get_pid(), thread.get_pid());
1662            assert_ne!(current_task.get_tid(), thread.get_tid());
1663            assert_eq!(current_task.thread_group().leader, thread.thread_group().leader);
1664
1665            let child_task = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
1666            assert_ne!(current_task.get_pid(), child_task.get_pid());
1667            assert_ne!(current_task.get_tid(), child_task.get_tid());
1668            assert_eq!(current_task.get_pid(), child_task.thread_group().read().get_ppid());
1669        })
1670        .await;
1671    }
1672
1673    #[::fuchsia::test]
1674    async fn test_root_capabilities() {
1675        spawn_kernel_and_run(async |_, current_task| {
1676            assert!(security::is_task_capable_noaudit(current_task, CAP_SYS_ADMIN));
1677            assert_eq!(current_task.real_creds().cap_inheritable, Capabilities::empty());
1678
1679            current_task.set_creds(Credentials::with_ids(1, 1));
1680            assert!(!security::is_task_capable_noaudit(current_task, CAP_SYS_ADMIN));
1681        })
1682        .await;
1683    }
1684
1685    #[::fuchsia::test]
1686    async fn test_clone_rlimit() {
1687        spawn_kernel_and_run(async |locked, current_task| {
1688            let prev_fsize = current_task.thread_group().get_rlimit(locked, Resource::FSIZE);
1689            assert_ne!(prev_fsize, 10);
1690            current_task
1691                .thread_group()
1692                .limits
1693                .lock(locked)
1694                .set(Resource::FSIZE, rlimit { rlim_cur: 10, rlim_max: 100 });
1695            let current_fsize = current_task.thread_group().get_rlimit(locked, Resource::FSIZE);
1696            assert_eq!(current_fsize, 10);
1697
1698            let child_task = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
1699            let child_fsize = child_task.thread_group().get_rlimit(locked, Resource::FSIZE);
1700            assert_eq!(child_fsize, 10)
1701        })
1702        .await;
1703    }
1704}