Skip to main content

starnix_core/task/
task.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::{MemoryAccessor, MemoryAccessorExt, MemoryManager, TaskMemoryAccessor};
6use crate::mutable_state::{state_accessor, state_implementation};
7use crate::ptrace::{
8    AtomicStopState, PtraceEvent, PtraceEventData, PtraceState, PtraceStatus, StopState,
9};
10use crate::signals::{KernelSignal, RunState, SignalDetail, SignalInfo, SignalState};
11use crate::task::memory_attribution::MemoryAttributionLifecycleEvent;
12use crate::task::tracing::KoidPair;
13use crate::task::{
14    AbstractUnixSocketNamespace, AbstractVsockSocketNamespace, CurrentTask, EventHandler, Kernel,
15    NormalPriority, ProcessExitInfo, RealtimePriority, SchedulerState, SchedulingPolicy,
16    SeccompFilterContainer, SeccompState, SeccompStateValue, ThreadGroup, ThreadGroupKey,
17    ThreadState, UtsNamespaceHandle, WaitCanceler, Waiter, ZombieProcess,
18};
19use crate::vfs::{FdTable, FsContext, FsNodeHandle, FsString};
20use atomic_bitflags::atomic_bitflags;
21use fuchsia_rcu::{RcuArc, RcuOptionArc, RcuOptionBox, RcuReadGuard};
22use macro_rules_attribute::apply;
23use starnix_logging::{log_warn, set_zx_name};
24use starnix_registers::HeapRegs;
25use starnix_sync::{
26    LockBefore, Locked, Mutex, MutexGuard, RwLock, RwLockReadGuard, RwLockWriteGuard, TerminalLock,
27};
28use starnix_task_command::TaskCommand;
29use starnix_types::arch::ArchWidth;
30use starnix_types::stats::TaskTimeStats;
31use starnix_uapi::auth::{Credentials, FsCred};
32use starnix_uapi::errors::Errno;
33use starnix_uapi::signals::{SIGCHLD, SigSet, Signal, sigaltstack_contains_pointer};
34use starnix_uapi::user_address::{
35    ArchSpecific, MappingMultiArchUserRef, UserAddress, UserCString, UserRef,
36};
37use starnix_uapi::{
38    CLD_CONTINUED, CLD_DUMPED, CLD_EXITED, CLD_KILLED, CLD_STOPPED, CLD_TRAPPED,
39    FUTEX_BITSET_MATCH_ANY, errno, error, from_status_like_fdio, pid_t, sigaction_t, sigaltstack,
40    tid_t, uapi,
41};
42use std::collections::VecDeque;
43use std::mem::MaybeUninit;
44use std::ops::Deref;
45use std::sync::atomic::{AtomicBool, Ordering};
46use std::sync::{Arc, Weak};
47use std::{cmp, fmt};
48use zx::{Signals, Task as _};
49
50#[derive(Clone, Debug, Eq, PartialEq)]
51pub enum ExitStatus {
52    Exit(u8),
53    Kill(SignalInfo),
54    CoreDump(SignalInfo),
55    // The second field for Stop and Continue contains the type of ptrace stop
56    // event that made it stop / continue, if applicable (PTRACE_EVENT_STOP,
57    // PTRACE_EVENT_FORK, etc)
58    Stop(SignalInfo, PtraceEvent),
59    Continue(SignalInfo, PtraceEvent),
60}
61impl ExitStatus {
62    /// Converts the given exit status to a status code suitable for returning from wait syscalls.
63    pub fn wait_status(&self) -> i32 {
64        match self {
65            ExitStatus::Exit(status) => (*status as i32) << 8,
66            ExitStatus::Kill(siginfo) => siginfo.signal.number() as i32,
67            ExitStatus::CoreDump(siginfo) => (siginfo.signal.number() as i32) | 0x80,
68            ExitStatus::Continue(siginfo, trace_event) => {
69                let trace_event_val = *trace_event as u32;
70                if trace_event_val != 0 {
71                    (siginfo.signal.number() as i32) | (trace_event_val << 16) as i32
72                } else {
73                    0xffff
74                }
75            }
76            ExitStatus::Stop(siginfo, trace_event) => {
77                let trace_event_val = *trace_event as u32;
78                (0x7f + ((siginfo.signal.number() as i32) << 8)) | (trace_event_val << 16) as i32
79            }
80        }
81    }
82
83    pub fn signal_info_code(&self) -> i32 {
84        match self {
85            ExitStatus::Exit(_) => CLD_EXITED as i32,
86            ExitStatus::Kill(_) => CLD_KILLED as i32,
87            ExitStatus::CoreDump(_) => CLD_DUMPED as i32,
88            ExitStatus::Stop(_, _) => CLD_STOPPED as i32,
89            ExitStatus::Continue(_, _) => CLD_CONTINUED as i32,
90        }
91    }
92
93    pub fn signal_info_status(&self) -> i32 {
94        match self {
95            ExitStatus::Exit(status) => *status as i32,
96            ExitStatus::Kill(siginfo)
97            | ExitStatus::CoreDump(siginfo)
98            | ExitStatus::Continue(siginfo, _)
99            | ExitStatus::Stop(siginfo, _) => siginfo.signal.number() as i32,
100        }
101    }
102}
103
104atomic_bitflags! {
105    #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
106    pub struct TaskFlags: u8 {
107        const EXITED                   = 1 << 0;
108        const SIGNALS_AVAILABLE        = 1 << 1;
109        const TEMPORARY_SIGNAL_MASK    = 1 << 2;
110        /// Whether the executor should dump the stack of this task when it exits.
111        /// Currently used to implement ExitStatus::CoreDump.
112        const DUMP_ON_EXIT             = 1 << 3;
113        const KERNEL_SIGNALS_AVAILABLE = 1 << 4;
114        /// Whether the executor has successfully spawned a thread for this task.
115        const SPAWNED                  = 1 << 5;
116    }
117}
118
119/// This contains thread state that tracers can inspect and modify.  It is
120/// captured when a thread stops, and optionally copied back (if dirty) when a
121/// thread starts again.  An alternative implementation would involve the
122/// tracers acting on thread state directly; however, this would involve sharing
123/// CurrentTask structures across multiple threads, which goes against the
124/// intent of the design of CurrentTask.
125pub struct CapturedThreadState {
126    /// The thread state of the traced task.  This is copied out when the thread
127    /// stops.
128    pub thread_state: ThreadState<HeapRegs>,
129
130    /// Indicates that the last ptrace operation changed the thread state, so it
131    /// should be written back to the original thread.
132    pub dirty: bool,
133}
134
135impl ArchSpecific for CapturedThreadState {
136    fn is_arch32(&self) -> bool {
137        self.thread_state.is_arch32()
138    }
139}
140
141#[derive(Debug)]
142pub struct RobustList {
143    pub next: RobustListPtr,
144}
145
146pub type RobustListPtr =
147    MappingMultiArchUserRef<RobustList, uapi::robust_list, uapi::arch32::robust_list>;
148
149impl From<uapi::robust_list> for RobustList {
150    fn from(robust_list: uapi::robust_list) -> Self {
151        Self { next: RobustListPtr::from(robust_list.next) }
152    }
153}
154
155#[cfg(target_arch = "aarch64")]
156impl From<uapi::arch32::robust_list> for RobustList {
157    fn from(robust_list: uapi::arch32::robust_list) -> Self {
158        Self { next: RobustListPtr::from(robust_list.next) }
159    }
160}
161
162#[derive(Debug)]
163pub struct RobustListHead {
164    pub list: RobustList,
165    pub futex_offset: isize,
166}
167
168pub type RobustListHeadPtr =
169    MappingMultiArchUserRef<RobustListHead, uapi::robust_list_head, uapi::arch32::robust_list_head>;
170
171impl From<uapi::robust_list_head> for RobustListHead {
172    fn from(robust_list_head: uapi::robust_list_head) -> Self {
173        Self {
174            list: robust_list_head.list.into(),
175            futex_offset: robust_list_head.futex_offset as isize,
176        }
177    }
178}
179
180#[cfg(target_arch = "aarch64")]
181impl From<uapi::arch32::robust_list_head> for RobustListHead {
182    fn from(robust_list_head: uapi::arch32::robust_list_head) -> Self {
183        Self {
184            list: robust_list_head.list.into(),
185            futex_offset: robust_list_head.futex_offset as isize,
186        }
187    }
188}
189
190pub struct TaskMutableState {
191    // See https://man7.org/linux/man-pages/man2/set_tid_address.2.html
192    pub clear_child_tid: UserRef<tid_t>,
193
194    /// Signal handler related state. This is grouped together for when atomicity is needed during
195    /// signal sending and delivery.
196    signals: SignalState,
197
198    /// Internal signals that have a higher priority than a regular signal.
199    ///
200    /// Storing in a separate queue outside of `SignalState` ensures the internal signals will
201    /// never be ignored or masked when dequeuing. Higher priority ensures that no user signals
202    /// will jump the queue, e.g. ptrace, which delays the delivery.
203    ///
204    /// This design is not about observable consequence, but about convenient implementation.
205    kernel_signals: VecDeque<KernelSignal>,
206
207    /// The exit status that this task exited with.
208    exit_status: Option<ExitStatus>,
209
210    /// Desired scheduler state for the task.
211    pub scheduler_state: SchedulerState,
212
213    /// The UTS namespace assigned to this thread.
214    ///
215    /// This field is kept in the mutable state because the UTS namespace of a thread
216    /// can be forked using `clone()` or `unshare()` syscalls.
217    ///
218    /// We use UtsNamespaceHandle because the UTS properties can be modified
219    /// by any other thread that shares this namespace.
220    pub uts_ns: UtsNamespaceHandle,
221
222    /// Bit that determines whether a newly started program can have privileges its parent does
223    /// not have.  See Documentation/prctl/no_new_privs.txt in the Linux kernel for details.
224    /// Note that Starnix does not currently implement the relevant privileges (e.g.,
225    /// setuid/setgid binaries).  So, you can set this, but it does nothing other than get
226    /// propagated to children.
227    ///
228    /// The documentation indicates that this can only ever be set to
229    /// true, and it cannot be reverted to false.  Accessor methods
230    /// for this field ensure this property.
231    no_new_privs: bool,
232
233    /// Userspace hint about how to adjust the OOM score for this process.
234    pub oom_score_adj: i32,
235
236    /// List of currently installed seccomp_filters
237    pub seccomp_filters: SeccompFilterContainer,
238
239    /// A pointer to the head of the robust futex list of this thread in
240    /// userspace. See get_robust_list(2)
241    pub robust_list_head: RobustListHeadPtr,
242
243    /// The timer slack used to group timer expirations for the calling thread.
244    ///
245    /// Timers may expire up to `timerslack_ns` late, but never early.
246    ///
247    /// If this value is 0, the task's default timerslack is used.
248    pub timerslack_ns: u64,
249
250    /// The default value for `timerslack_ns`. This value cannot change during the lifetime of a
251    /// task.
252    ///
253    /// This value is set to the `timerslack_ns` of the creating thread, and thus is not constant
254    /// across tasks.
255    pub default_timerslack_ns: u64,
256
257    /// Information that a tracer needs to communicate with this process, if it
258    /// is being traced.
259    pub ptrace: Option<Box<PtraceState>>,
260
261    /// Information that a tracer needs to inspect this process.
262    pub captured_thread_state: Option<Box<CapturedThreadState>>,
263}
264
265impl TaskMutableState {
266    pub fn no_new_privs(&self) -> bool {
267        self.no_new_privs
268    }
269
270    /// Sets the value of no_new_privs to true.  It is an error to set
271    /// it to anything else.
272    pub fn enable_no_new_privs(&mut self) {
273        self.no_new_privs = true;
274    }
275
276    pub fn get_timerslack<T: zx::Timeline>(&self) -> zx::Duration<T> {
277        zx::Duration::from_nanos(self.timerslack_ns as i64)
278    }
279
280    /// Sets the current timerslack of the task to `ns`.
281    ///
282    /// If `ns` is zero, the current timerslack gets reset to the task's default timerslack.
283    pub fn set_timerslack_ns(&mut self, ns: u64) {
284        if ns == 0 {
285            self.timerslack_ns = self.default_timerslack_ns;
286        } else {
287            self.timerslack_ns = ns;
288        }
289    }
290
291    pub fn is_ptraced(&self) -> bool {
292        self.ptrace.is_some()
293    }
294
295    pub fn is_ptrace_listening(&self) -> bool {
296        self.ptrace.as_ref().is_some_and(|ptrace| ptrace.stop_status == PtraceStatus::Listening)
297    }
298
299    pub fn ptrace_on_signal_consume(&mut self) -> bool {
300        self.ptrace.as_mut().is_some_and(|ptrace: &mut Box<PtraceState>| {
301            if ptrace.stop_status.is_continuing() {
302                ptrace.stop_status = PtraceStatus::Default;
303                false
304            } else {
305                true
306            }
307        })
308    }
309
310    pub fn notify_ptracers(&mut self) {
311        if let Some(ptrace) = &self.ptrace {
312            ptrace.tracer_waiters().notify_all();
313        }
314    }
315
316    pub fn wait_on_ptracer(&self, waiter: &Waiter) {
317        if let Some(ptrace) = &self.ptrace {
318            ptrace.tracee_waiters.wait_async(&waiter);
319        }
320    }
321
322    pub fn notify_ptracees(&mut self) {
323        if let Some(ptrace) = &self.ptrace {
324            ptrace.tracee_waiters.notify_all();
325        }
326    }
327
328    pub fn take_captured_state(&mut self) -> Option<Box<CapturedThreadState>> {
329        if self.captured_thread_state.is_some() {
330            let mut state = None;
331            std::mem::swap(&mut state, &mut self.captured_thread_state);
332            return state;
333        }
334        None
335    }
336
337    pub fn copy_state_from(&mut self, current_task: &CurrentTask) {
338        self.captured_thread_state = Some(Box::new(CapturedThreadState {
339            thread_state: current_task.thread_state.extended_snapshot::<HeapRegs>(),
340            dirty: false,
341        }));
342    }
343
344    /// Returns the task's currently active signal mask.
345    pub fn signal_mask(&self) -> SigSet {
346        self.signals.mask()
347    }
348
349    /// Returns true if `signal` is currently blocked by this task's signal mask.
350    pub fn is_signal_masked(&self, signal: Signal) -> bool {
351        self.signals.mask().has_signal(signal)
352    }
353
354    /// Returns true if `signal` is blocked by the saved signal mask.
355    ///
356    /// Note that the current signal mask may still not be blocking the signal.
357    pub fn is_signal_masked_by_saved_mask(&self, signal: Signal) -> bool {
358        self.signals.saved_mask().is_some_and(|mask| mask.has_signal(signal))
359    }
360
361    /// Removes the currently active, temporary, signal mask and restores the
362    /// previously active signal mask.
363    pub fn restore_signal_mask(&mut self) {
364        self.signals.restore_mask();
365    }
366
367    /// Returns true if the task's current `RunState` is blocked.
368    pub fn is_blocked(&self) -> bool {
369        self.signals.run_state.is_blocked()
370    }
371
372    /// Sets the task's `RunState` to `run_state`.
373    pub fn set_run_state(&mut self, run_state: RunState) {
374        self.signals.run_state = run_state;
375    }
376
377    pub fn run_state(&self) -> RunState {
378        self.signals.run_state.clone()
379    }
380
381    pub fn on_signal_stack(&self, stack_pointer_register: u64) -> bool {
382        self.signals
383            .alt_stack
384            .map(|signal_stack| sigaltstack_contains_pointer(&signal_stack, stack_pointer_register))
385            .unwrap_or(false)
386    }
387
388    pub fn set_sigaltstack(&mut self, stack: Option<sigaltstack>) {
389        self.signals.alt_stack = stack;
390    }
391
392    pub fn sigaltstack(&self) -> Option<sigaltstack> {
393        self.signals.alt_stack
394    }
395
396    pub fn wait_on_signal(&mut self, waiter: &Waiter) {
397        self.signals.signal_wait.wait_async(waiter);
398    }
399
400    pub fn signals_mut(&mut self) -> &mut SignalState {
401        &mut self.signals
402    }
403
404    pub fn wait_on_signal_fd_events(
405        &self,
406        waiter: &Waiter,
407        mask: SigSet,
408        handler: EventHandler,
409    ) -> WaitCanceler {
410        self.signals.signal_wait.wait_async_signal_mask(waiter, mask, handler)
411    }
412
413    pub fn notify_signal_waiters(&self, signal: &Signal) {
414        self.signals.signal_wait.notify_signal(signal);
415    }
416
417    /// Thaw the task if has been frozen
418    pub fn thaw(&mut self) {
419        if let RunState::Frozen(waiter) = self.run_state() {
420            waiter.notify();
421        }
422    }
423
424    pub fn is_frozen(&self) -> bool {
425        matches!(self.run_state(), RunState::Frozen(_))
426    }
427
428    #[cfg(test)]
429    pub fn kernel_signals_for_test(&self) -> &VecDeque<KernelSignal> {
430        &self.kernel_signals
431    }
432}
433
434#[apply(state_implementation!)]
435impl TaskMutableState<Base = Task> {
436    pub fn set_stopped(
437        &mut self,
438        stopped: StopState,
439        siginfo: Option<SignalInfo>,
440        current_task: Option<&CurrentTask>,
441        event: Option<PtraceEventData>,
442    ) {
443        if stopped.ptrace_only() && self.ptrace.is_none() {
444            return;
445        }
446
447        if self.base.load_stopped().is_illegal_transition(stopped) {
448            return;
449        }
450
451        // TODO(https://g-issues.fuchsia.dev/issues/306438676): When task can be
452        // stopped inside user code, task will need to be either restarted or
453        // stopped here.
454        self.store_stopped(stopped);
455        if stopped.is_stopped() {
456            if let Some(ref current_task) = current_task {
457                self.copy_state_from(current_task);
458            }
459        }
460        if let Some(ptrace) = &mut self.ptrace {
461            ptrace.set_last_signal(siginfo);
462            ptrace.set_last_event(event);
463        }
464        if stopped == StopState::Waking || stopped == StopState::ForceWaking {
465            self.notify_ptracees();
466        }
467        if !stopped.is_in_progress() {
468            self.notify_ptracers();
469        }
470    }
471
472    /// Enqueues a signal at the back of the task's signal queue.
473    pub fn enqueue_signal(&mut self, signal: SignalInfo) {
474        self.signals.enqueue(signal);
475        self.set_flags(TaskFlags::SIGNALS_AVAILABLE, self.signals.is_any_pending());
476    }
477
478    /// Enqueues the signal, allowing the signal to skip straight to the front of the task's queue.
479    ///
480    /// `enqueue_signal` is the more common API to use.
481    ///
482    /// Note that this will not guarantee that the signal is dequeued before any process-directed
483    /// signals.
484    pub fn enqueue_signal_front(&mut self, signal: SignalInfo) {
485        self.signals.enqueue(signal);
486        self.set_flags(TaskFlags::SIGNALS_AVAILABLE, self.signals.is_any_pending());
487    }
488
489    /// Sets the current signal mask of the task.
490    pub fn set_signal_mask(&mut self, mask: SigSet) {
491        self.signals.set_mask(mask);
492        self.set_flags(TaskFlags::SIGNALS_AVAILABLE, self.signals.is_any_pending());
493    }
494
495    /// Sets a temporary signal mask for the task.
496    ///
497    /// This mask should be removed by a matching call to `restore_signal_mask`.
498    pub fn set_temporary_signal_mask(&mut self, mask: SigSet) {
499        self.signals.set_temporary_mask(mask);
500        self.set_flags(TaskFlags::SIGNALS_AVAILABLE, self.signals.is_any_pending());
501    }
502
503    /// Returns the number of pending signals for this task, without considering the signal mask.
504    pub fn pending_signal_count(&self) -> usize {
505        self.signals.num_queued() + self.base.thread_group().num_signals_queued()
506    }
507
508    /// Returns `true` if `signal` is pending for this task, without considering the signal mask.
509    pub fn has_signal_pending(&self, signal: Signal) -> bool {
510        self.signals.has_queued(signal) || self.base.thread_group().has_signal_queued(signal)
511    }
512
513    // Prepare a SignalInfo to be sent to the tracer, if any.
514    pub fn prepare_signal_info(
515        &mut self,
516        stopped: StopState,
517    ) -> Option<(Weak<ThreadGroup>, SignalInfo)> {
518        if !stopped.is_stopped() {
519            return None;
520        }
521
522        if let Some(ptrace) = &self.ptrace {
523            if let Some(last_signal) = ptrace.get_last_signal_ref() {
524                let signal_info = SignalInfo::with_detail(
525                    SIGCHLD,
526                    CLD_TRAPPED as i32,
527                    SignalDetail::SIGCHLD {
528                        pid: self.base.tid,
529                        uid: self.base.real_creds().uid,
530                        status: last_signal.signal.number() as i32,
531                    },
532                );
533
534                return Some((ptrace.core_state.thread_group.clone(), signal_info));
535            }
536        }
537
538        None
539    }
540
541    pub fn set_ptrace(&mut self, tracer: Option<Box<PtraceState>>) -> Result<(), Errno> {
542        if tracer.is_some() && self.ptrace.is_some() {
543            return error!(EPERM);
544        }
545
546        if tracer.is_none() {
547            // Handle the case where this is called while the thread group is being released.
548            if let Ok(tg_stop_state) = self.base.thread_group().load_stopped().as_in_progress() {
549                self.set_stopped(tg_stop_state, None, None, None);
550            }
551        }
552        self.ptrace = tracer;
553        Ok(())
554    }
555
556    pub fn can_accept_ptrace_commands(&mut self) -> bool {
557        !self.base.load_stopped().is_waking_or_awake()
558            && self.is_ptraced()
559            && !self.is_ptrace_listening()
560    }
561
562    fn store_stopped(&mut self, state: StopState) {
563        // We don't actually use the guard but we require it to enforce that the
564        // caller holds the thread group's mutable state lock (identified by
565        // mutable access to the thread group's mutable state).
566
567        self.base.stop_state.store(state, Ordering::Relaxed)
568    }
569
570    pub fn update_flags(&mut self, clear: TaskFlags, set: TaskFlags) {
571        // We don't actually use the guard but we require it to enforce that the
572        // caller holds the task's mutable state lock (identified by mutable
573        // access to the task's mutable state).
574
575        debug_assert_eq!(clear ^ set, clear | set);
576        let observed = self.base.flags();
577        let swapped = self.base.flags.swap((observed | set) & !clear, Ordering::Relaxed);
578        debug_assert_eq!(swapped, observed);
579    }
580
581    pub fn set_flags(&mut self, flag: TaskFlags, v: bool) {
582        let (clear, set) = if v { (TaskFlags::empty(), flag) } else { (flag, TaskFlags::empty()) };
583
584        self.update_flags(clear, set);
585    }
586
587    pub fn set_spawned(&mut self) {
588        self.set_flags(TaskFlags::SPAWNED, true);
589    }
590
591    pub fn set_exit_status(&mut self, status: ExitStatus) {
592        self.set_flags(TaskFlags::EXITED, true);
593        self.exit_status = Some(status);
594    }
595
596    pub fn set_exit_status_if_not_already(&mut self, status: ExitStatus) {
597        self.set_flags(TaskFlags::EXITED, true);
598        self.exit_status.get_or_insert(status);
599    }
600
601    /// The set of pending signals for the task, including the signals pending for the thread
602    /// group.
603    pub fn pending_signals(&self) -> SigSet {
604        self.signals.pending() | self.base.thread_group().get_pending_signals()
605    }
606
607    /// The set of pending signals for the task specifically, not including the signals pending
608    /// for the thread group.
609    pub fn task_specific_pending_signals(&self) -> SigSet {
610        self.signals.pending()
611    }
612
613    /// Returns true if any currently pending signal is allowed by `mask`.
614    pub fn is_any_signal_allowed_by_mask(&self, mask: SigSet) -> bool {
615        self.signals.is_any_allowed_by_mask(mask)
616            || self.base.thread_group().is_any_signal_allowed_by_mask(mask)
617    }
618
619    /// Returns whether or not a signal is pending for this task, taking the current
620    /// signal mask into account.
621    pub fn is_any_signal_pending(&self) -> bool {
622        let mask = self.signal_mask();
623        self.signals.is_any_pending()
624            || self.base.thread_group().is_any_signal_allowed_by_mask(mask)
625    }
626
627    /// Returns the next pending signal that passes `predicate`.
628    fn take_next_signal_where<F>(&mut self, predicate: F) -> Option<SignalInfo>
629    where
630        F: Fn(&SignalInfo) -> bool,
631    {
632        if let Some(signal) = self.base.thread_group().take_next_signal_where(&predicate) {
633            Some(signal)
634        } else {
635            let s = self.signals.take_next_where(&predicate);
636            self.set_flags(TaskFlags::SIGNALS_AVAILABLE, self.signals.is_any_pending());
637            s
638        }
639    }
640
641    /// Removes and returns the next pending `signal` for this task.
642    ///
643    /// Returns `None` if `siginfo` is a blocked signal, or no such signal is pending.
644    pub fn take_specific_signal(&mut self, siginfo: SignalInfo) -> Option<SignalInfo> {
645        let signal_mask = self.signal_mask();
646        if signal_mask.has_signal(siginfo.signal) {
647            return None;
648        }
649
650        let predicate = |s: &SignalInfo| s.signal == siginfo.signal;
651        self.take_next_signal_where(predicate)
652    }
653
654    /// Removes and returns a pending signal that is unblocked by the current signal mask.
655    ///
656    /// Returns `None` if there are no unblocked signals pending.
657    pub fn take_any_signal(&mut self) -> Option<SignalInfo> {
658        self.take_signal_with_mask(self.signal_mask())
659    }
660
661    /// Removes and returns a pending signal that is unblocked by `signal_mask`.
662    ///
663    /// Returns `None` if there are no signals pending that are unblocked by `signal_mask`.
664    pub fn take_signal_with_mask(&mut self, signal_mask: SigSet) -> Option<SignalInfo> {
665        let predicate = |s: &SignalInfo| !signal_mask.has_signal(s.signal) || s.force;
666        self.take_next_signal_where(predicate)
667    }
668
669    /// Enqueues an internal signal at the back of the task's kernel signal queue.
670    pub fn enqueue_kernel_signal(&mut self, signal: KernelSignal) {
671        self.kernel_signals.push_back(signal);
672        self.set_flags(TaskFlags::KERNEL_SIGNALS_AVAILABLE, true);
673    }
674
675    /// Removes and returns a pending internal signal.
676    ///
677    /// Returns `None` if there are no signals pending.
678    pub fn take_kernel_signal(&mut self) -> Option<KernelSignal> {
679        let signal = self.kernel_signals.pop_front();
680        if self.kernel_signals.is_empty() {
681            self.set_flags(TaskFlags::KERNEL_SIGNALS_AVAILABLE, false);
682        }
683        signal
684    }
685
686    #[cfg(test)]
687    pub fn queued_signal_count(&self, signal: Signal) -> usize {
688        self.signals.queued_count(signal)
689            + self.base.thread_group().pending_signals.lock().queued_count(signal)
690    }
691}
692
693/// A synchronized container for an optional Zircon thread and its cached KOID.
694#[derive(Debug)]
695pub struct ZirconThread {
696    thread: Option<Arc<zx::Thread>>,
697    koid: Option<zx::Koid>,
698}
699
700impl ZirconThread {
701    pub fn new(thread: Option<Arc<zx::Thread>>) -> Self {
702        let koid = thread.as_ref().and_then(|t| t.koid().ok());
703        Self { thread, koid }
704    }
705
706    pub fn set(&mut self, thread: Arc<zx::Thread>) {
707        self.koid = thread.koid().ok();
708        self.thread = Some(thread);
709    }
710
711    pub fn koid(&self) -> Option<zx::Koid> {
712        self.koid
713    }
714}
715
716impl std::ops::Deref for ZirconThread {
717    type Target = Option<Arc<zx::Thread>>;
718    fn deref(&self) -> &Self::Target {
719        &self.thread
720    }
721}
722
723/// The live state of a task.
724///
725/// This structure contains the state of a task that is only relevant while the task is alive. It
726/// is dropped when the task enters the zombie state.
727pub struct TaskLiveState {
728    /// A handle to the underlying Zircon thread object.
729    ///
730    /// Some tasks lack an underlying Zircon thread. These tasks are used internally by the
731    /// Starnix kernel to track background work, typically on a `kthread`.
732    pub thread: RwLock<ZirconThread>,
733
734    /// The file descriptor table for this task.
735    ///
736    /// This table can be share by many tasks.
737    pub files: FdTable,
738
739    /// The memory manager for this task.  This is `None` only for system tasks.
740    pub mm: RcuOptionArc<MemoryManager>,
741
742    /// The file system for this task.
743    pub fs: RcuArc<FsContext>,
744
745    /// The namespace for abstract AF_UNIX sockets for this task.
746    pub abstract_socket_namespace: Arc<AbstractUnixSocketNamespace>,
747
748    /// The namespace for AF_VSOCK for this task.
749    pub abstract_vsock_namespace: Arc<AbstractVsockSocketNamespace>,
750
751    /// The pid directory, so it doesn't have to be generated and thrown away on every access.
752    /// See https://fxbug.dev/291962828 for details.
753    pub proc_pid_directory_cache: RcuOptionBox<FsNodeHandle>,
754}
755
756impl TaskLiveState {
757    pub fn mm(&self) -> Result<Arc<MemoryManager>, Errno> {
758        self.mm.to_option_arc().ok_or_else(|| errno!(EINVAL))
759    }
760
761    pub fn fs(&self) -> Arc<FsContext> {
762        self.fs.to_arc()
763    }
764}
765
766#[derive(Debug, Clone, Copy, PartialEq, Eq)]
767pub enum TaskStateCode {
768    // Task is being executed.
769    Running,
770
771    // Task is waiting for an event.
772    Sleeping,
773
774    // Tracing stop
775    TracingStop,
776
777    // Task has exited.
778    Zombie,
779}
780
781impl TaskStateCode {
782    pub fn code_char(&self) -> char {
783        match self {
784            TaskStateCode::Running => 'R',
785            TaskStateCode::Sleeping => 'S',
786            TaskStateCode::TracingStop => 't',
787            TaskStateCode::Zombie => 'Z',
788        }
789    }
790
791    pub fn name(&self) -> &'static str {
792        match self {
793            TaskStateCode::Running => "running",
794            TaskStateCode::Sleeping => "sleeping",
795            TaskStateCode::TracingStop => "tracing stop",
796            TaskStateCode::Zombie => "zombie",
797        }
798    }
799}
800
801/// The information of the task that needs to be available to the `ThreadGroup` while computing
802/// which process a wait can target. It is necessary to shared this data with the `ThreadGroup` so
803/// that it is available while the task is being dropped and so is not accessible from a weak
804/// pointer.
805#[derive(Debug)]
806pub struct TaskPersistentInfoState {
807    /// Immutable information about the task
808    tid: tid_t,
809    thread_group_key: ThreadGroupKey,
810
811    /// The command of this task.
812    command: Mutex<TaskCommand>,
813
814    /// The security credentials for this task. These are only set when the task is the CurrentTask,
815    /// or on task creation.
816    creds: RcuArc<Credentials>,
817
818    // A lock for the security credentials. Writers must take the lock, readers that need to ensure
819    // that the task state does not change may take the lock.
820    creds_lock: RwLock<()>,
821}
822
823/// Guard for reading locked credentials.
824pub struct CredentialsReadGuard<'a> {
825    _lock: RwLockReadGuard<'a, ()>,
826    creds: RcuReadGuard<Credentials>,
827}
828
829impl<'a> Deref for CredentialsReadGuard<'a> {
830    type Target = Credentials;
831
832    fn deref(&self) -> &Self::Target {
833        self.creds.deref()
834    }
835}
836
837/// Guard for writing credentials. No `CredentialsReadGuard` to the same task can concurrently
838///  exist.
839pub struct CredentialsWriteGuard<'a> {
840    _lock: RwLockWriteGuard<'a, ()>,
841    creds: &'a RcuArc<Credentials>,
842}
843
844impl<'a> CredentialsWriteGuard<'a> {
845    pub fn update(&mut self, creds: Arc<Credentials>) {
846        self.creds.update(creds);
847    }
848}
849
850impl TaskPersistentInfoState {
851    fn new(
852        tid: tid_t,
853        thread_group_key: ThreadGroupKey,
854        command: TaskCommand,
855        creds: Arc<Credentials>,
856    ) -> TaskPersistentInfo {
857        Arc::new(Self {
858            tid,
859            thread_group_key,
860            command: Mutex::new(command),
861            creds: RcuArc::new(creds),
862            creds_lock: RwLock::new(()),
863        })
864    }
865
866    pub fn tid(&self) -> tid_t {
867        self.tid
868    }
869
870    pub fn pid(&self) -> pid_t {
871        self.thread_group_key.pid()
872    }
873
874    pub fn command_guard(&self) -> MutexGuard<'_, TaskCommand> {
875        self.command.lock()
876    }
877
878    /// Snapshots the credentials, returning a short-lived RCU-guarded reference.
879    pub fn real_creds(&self) -> RcuReadGuard<Credentials> {
880        self.creds.read()
881    }
882
883    /// Snapshots the credentials, returning a new reference. Use this if you need to stash the
884    /// credentials somewhere.
885    pub fn clone_creds(&self) -> Arc<Credentials> {
886        self.creds.to_arc()
887    }
888
889    /// Returns a read lock on the credentials. This is appropriate if you need to guarantee that
890    ///  the Task's credentials will not change during a security-sensitive operation.
891    pub fn lock_creds(&self) -> CredentialsReadGuard<'_> {
892        let lock = self.creds_lock.read();
893        CredentialsReadGuard { _lock: lock, creds: self.creds.read() }
894    }
895
896    /// Locks the credentials for writing.
897    /// SAFETY: Only use from CurrentTask, and keep the subjective credentials stored in CurrentTask
898    /// in sync.
899    pub(in crate::task) unsafe fn write_creds(&self) -> CredentialsWriteGuard<'_> {
900        let lock = self.creds_lock.write();
901        CredentialsWriteGuard { _lock: lock, creds: &self.creds }
902    }
903}
904
905pub type TaskPersistentInfo = Arc<TaskPersistentInfoState>;
906
907/// A unit of execution.
908///
909/// A task is the primary unit of execution in the Starnix kernel. Most tasks are *user* tasks,
910/// which have an associated Zircon thread. The Zircon thread switches between restricted mode,
911/// in which the thread runs userspace code, and normal mode, in which the thread runs Starnix
912/// code.
913///
914/// Tasks track the resources used by userspace by referencing various objects, such as an
915/// `FdTable`, a `MemoryManager`, and an `FsContext`. Many tasks can share references to these
916/// objects. In principle, which objects are shared between which tasks can be largely arbitrary,
917/// but there are common patterns of sharing. For example, tasks created with `pthread_create`
918/// will share the `FdTable`, `MemoryManager`, and `FsContext` and are often called "threads" by
919/// userspace programmers. Tasks created by `posix_spawn` do not share these objects and are often
920/// called "processes" by userspace programmers. However, inside the kernel, there is no clear
921/// definition of a "thread" or a "process".
922///
923/// During boot, the kernel creates the first task, often called `init`. The vast majority of other
924/// tasks are created as transitive clones (e.g., using `clone(2)`) of that task. Sometimes, the
925/// kernel will create new tasks from whole cloth, either with a corresponding userspace component
926/// or to represent some background work inside the kernel.
927///
928/// See also `CurrentTask`, which represents the task corresponding to the thread that is currently
929/// executing.
930pub struct Task {
931    /// Weak reference to this `Task`. This allows us to retrieve an `Arc` from a raw `Task`.
932    pub weak_self: Weak<Self>,
933
934    /// A unique identifier for this task.
935    ///
936    /// This value can be read in userspace using `gettid(2)`. In general, this value
937    /// is different from the value return by `getpid(2)`, which returns the `id` of the leader
938    /// of the `thread_group`.
939    pub tid: tid_t,
940
941    /// The process key of this task.
942    pub thread_group_key: ThreadGroupKey,
943
944    /// The kernel to which this thread group belongs.
945    pub kernel: Arc<Kernel>,
946
947    /// The thread group to which this task belongs.
948    ///
949    /// The group of tasks in a thread group roughly corresponds to the userspace notion of a
950    /// process.
951    pub thread_group: Arc<ThreadGroup>,
952
953    /// The live state of the task.
954    ///
955    /// This is `None` for zombie tasks.
956    pub live_state: RcuOptionBox<TaskLiveState>,
957
958    /// The stop state of the task, distinct from the stop state of the thread group.
959    ///
960    /// Must only be set when the `mutable_state` write lock is held.
961    stop_state: AtomicStopState,
962
963    /// The flags for the task.
964    ///
965    /// Must only be set the then `mutable_state` write lock is held.
966    flags: AtomicTaskFlags,
967
968    /// The mutable state of the Task.
969    mutable_state: RwLock<TaskMutableState>,
970
971    /// The information of the task that needs to be available to the `ThreadGroup` while computing
972    /// which process a wait can target.
973    /// Contains the command line, the task credentials and the exit signal.
974    /// See `TaskPersistentInfo` for more information.
975    pub persistent_info: TaskPersistentInfo,
976
977    /// For vfork and clone() with CLONE_VFORK, this is set when the task exits or calls execve().
978    /// It allows the calling task to block until the fork has been completed. Only populated
979    /// when created with the CLONE_VFORK flag.
980    vfork_event: Option<Arc<zx::Event>>,
981
982    /// Variable that can tell you whether there are currently seccomp
983    /// filters without holding a lock
984    pub seccomp_filter_state: SeccompState,
985
986    /// Tell you whether you are tracing syscall entry / exit without a lock.
987    pub trace_syscalls: AtomicBool,
988}
989
990/// The decoded cross-platform parts we care about for page fault exception reports.
991#[derive(Debug)]
992pub struct PageFaultExceptionReport {
993    pub faulting_address: u64,
994    pub not_present: bool, // Set when the page fault was due to a not-present page.
995    pub is_write: bool,    // Set when the triggering memory operation was a write.
996    pub is_execute: bool,  // Set when the triggering memory operation was an execute.
997}
998
999impl Task {
1000    pub fn kernel(&self) -> &Arc<Kernel> {
1001        &self.kernel
1002    }
1003
1004    pub fn thread_group(&self) -> &Arc<ThreadGroup> {
1005        &self.thread_group
1006    }
1007
1008    pub fn has_same_address_space(&self, other: Option<&Arc<MemoryManager>>) -> bool {
1009        match (self.mm(), other) {
1010            (Ok(this), Some(other)) => Arc::ptr_eq(&this, other),
1011            (Err(_), None) => true,
1012            _ => false,
1013        }
1014    }
1015
1016    pub fn flags(&self) -> TaskFlags {
1017        self.flags.load(Ordering::Relaxed)
1018    }
1019
1020    pub fn is_spawned(&self) -> bool {
1021        self.flags().contains(TaskFlags::SPAWNED)
1022    }
1023
1024    /// When the task exits, if there is a notification that needs to propagate
1025    /// to a ptracer, make sure it will propagate.
1026    pub fn set_ptrace_zombie(&self, pids: &mut crate::task::PidTable) {
1027        let pgid = self.thread_group().read().process_group.leader;
1028        let exit_signal = self.thread_group().read().exit_signal.clone();
1029        let mut state = self.write();
1030        state.set_stopped(StopState::ForceAwake, None, None, None);
1031        if let Some(ptrace) = &mut state.ptrace {
1032            // Add a zombie that the ptracer will notice.
1033            ptrace.last_signal_waitable = true;
1034            let tracer_pid = ptrace.get_pid();
1035            let tracer_tg = pids.get_thread_group(tracer_pid);
1036            if let Some(tracer_tg) = tracer_tg {
1037                drop(state);
1038                let mut tracer_state = tracer_tg.write();
1039
1040                let exit_status = self.exit_status().unwrap_or_else(|| {
1041                    starnix_logging::log_error!("Exiting without an exit code.");
1042                    ExitStatus::Exit(u8::MAX)
1043                });
1044                let uid = self.real_creds().uid;
1045                let exit_info = ProcessExitInfo { status: exit_status, exit_signal };
1046                let zombie = ZombieProcess {
1047                    thread_group_key: self.thread_group_key.clone(),
1048                    pgid,
1049                    uid,
1050                    exit_info: exit_info,
1051                    // ptrace doesn't need this.
1052                    time_stats: TaskTimeStats::default(),
1053                    is_canonical: false,
1054                };
1055
1056                tracer_state.zombie_ptracees.add(pids, self.tid, zombie);
1057            };
1058        }
1059    }
1060
1061    /// Disconnects this task from the tracer.
1062    pub fn ptrace_disconnect(&self) {
1063        // Get a reference to the ptracer thread group through the weak reference in PtraceCoreState
1064        // to avoid acquiring a PidTable lock.
1065        let tracer_tg = self
1066            .read()
1067            .ptrace
1068            .as_ref()
1069            .map(|p| p.core_state.thread_group.clone())
1070            .and_then(|tg| tg.upgrade());
1071        if let Some(tg) = tracer_tg {
1072            tg.ptracees.lock().remove(&self.tid);
1073        }
1074    }
1075
1076    pub fn exit_status(&self) -> Option<ExitStatus> {
1077        self.is_exitted().then(|| self.read().exit_status.clone()).flatten()
1078    }
1079
1080    pub fn is_exitted(&self) -> bool {
1081        self.flags().contains(TaskFlags::EXITED)
1082    }
1083
1084    pub fn load_stopped(&self) -> StopState {
1085        self.stop_state.load(Ordering::Relaxed)
1086    }
1087
1088    /// Upgrade a [`Weak<Task>`], returning [`Err(ESRCH)`] if the reference cannot be borrowed.
1089    pub fn from_weak(weak: &Weak<Task>) -> Result<Arc<Task>, Errno> {
1090        weak.upgrade().ok_or_else(|| errno!(ESRCH))
1091    }
1092
1093    /// Internal function for creating a Task object. Useful when you need to specify the value of
1094    /// every field. create_process and create_thread are more likely to be what you want.
1095    ///
1096    /// Any fields that should be initialized fresh for every task, even if the task was created
1097    /// with fork, are initialized to their defaults inside this function. All other fields are
1098    /// passed as parameters.
1099    #[allow(clippy::let_and_return)]
1100    pub fn new(
1101        tid: tid_t,
1102        command: TaskCommand,
1103        thread_group: Arc<ThreadGroup>,
1104        thread: Option<zx::Thread>,
1105        files: FdTable,
1106        mm: Option<Arc<MemoryManager>>,
1107        // The only case where fs should be None if when building the initial task that is the
1108        // used to build the initial FsContext.
1109        fs: Arc<FsContext>,
1110        creds: Arc<Credentials>,
1111        abstract_socket_namespace: Arc<AbstractUnixSocketNamespace>,
1112        abstract_vsock_namespace: Arc<AbstractVsockSocketNamespace>,
1113        signal_mask: SigSet,
1114        kernel_signals: VecDeque<KernelSignal>,
1115        vfork_event: Option<Arc<zx::Event>>,
1116        scheduler_state: SchedulerState,
1117        uts_ns: UtsNamespaceHandle,
1118        no_new_privs: bool,
1119        seccomp_filter_state: SeccompState,
1120        seccomp_filters: SeccompFilterContainer,
1121        robust_list_head: RobustListHeadPtr,
1122        timerslack_ns: u64,
1123    ) -> Arc<Self> {
1124        let thread_group_key = ThreadGroupKey::from(&thread_group);
1125        Arc::new_cyclic(|weak_self| {
1126            let task = Task {
1127                weak_self: weak_self.clone(),
1128                tid,
1129                thread_group_key: thread_group_key.clone(),
1130                kernel: Arc::clone(&thread_group.kernel),
1131                thread_group,
1132                live_state: RcuOptionBox::new(Some(TaskLiveState {
1133                    thread: RwLock::new(ZirconThread::new(thread.map(Arc::new))),
1134                    files,
1135                    mm: RcuOptionArc::new(mm),
1136                    fs: RcuArc::new(fs),
1137                    abstract_socket_namespace,
1138                    abstract_vsock_namespace,
1139                    proc_pid_directory_cache: Default::default(),
1140                })),
1141                vfork_event,
1142                stop_state: AtomicStopState::new(StopState::Awake),
1143                flags: AtomicTaskFlags::new(TaskFlags::empty()),
1144                mutable_state: RwLock::new(TaskMutableState {
1145                    clear_child_tid: UserRef::default(),
1146                    signals: SignalState::with_mask(signal_mask),
1147                    kernel_signals,
1148                    exit_status: None,
1149                    scheduler_state,
1150                    uts_ns,
1151                    no_new_privs,
1152                    oom_score_adj: Default::default(),
1153                    seccomp_filters,
1154                    robust_list_head,
1155                    timerslack_ns,
1156                    // The default timerslack is set to the current timerslack of the creating thread.
1157                    default_timerslack_ns: timerslack_ns,
1158                    ptrace: None,
1159                    captured_thread_state: None,
1160                }),
1161                persistent_info: TaskPersistentInfoState::new(
1162                    tid,
1163                    thread_group_key,
1164                    command,
1165                    creds,
1166                ),
1167                seccomp_filter_state,
1168                trace_syscalls: AtomicBool::new(false),
1169            };
1170
1171            #[cfg(any(test, debug_assertions))]
1172            {
1173                // Note that `Kernel::pids` is already locked by the caller of `Task::new()`.
1174                let _l1 = task.read();
1175                let _l2 = task.persistent_info.lock_creds();
1176                let _l3 = task.persistent_info.command_guard();
1177            }
1178            task
1179        })
1180    }
1181
1182    state_accessor!(Task, mutable_state);
1183
1184    /// Returns the real credentials of the task as a short-lived RCU-guarded reference. These
1185    /// credentials are used to check permissions for actions performed on the task. If the task
1186    /// itself is performing an action, use `CurrentTask::current_creds` instead. This does not
1187    /// lock the credentials.
1188    pub fn real_creds(&self) -> RcuReadGuard<Credentials> {
1189        self.persistent_info.real_creds()
1190    }
1191
1192    /// Returns a new long-lived reference to the real credentials of the task.  These credentials
1193    /// are used to check permissions for actions performed on the task. If the task itself is
1194    /// performing an action, use `CurrentTask::current_creds` instead. This does not lock the
1195    /// credentials.
1196    pub fn clone_creds(&self) -> Arc<Credentials> {
1197        self.persistent_info.clone_creds()
1198    }
1199
1200    pub fn ptracer_task(&self) -> Option<Arc<Task>> {
1201        self.get_task(self.read().ptrace.as_ref().map(|p| p.core_state.pid)?).ok()
1202    }
1203
1204    /// Returns the live state of the task, if it exists.
1205    ///
1206    /// # Errors
1207    ///
1208    /// Returns [`Err(ESRCH)`] if the task has already transitioned to a zombie state and its live
1209    /// resources have been dropped.
1210    #[track_caller]
1211    pub fn live(&self) -> Result<RcuReadGuard<TaskLiveState>, Errno> {
1212        self.live_state.read().ok_or_else(|| errno!(ESRCH))
1213    }
1214
1215    /// Returns the memory manager of the task, if it exists.
1216    ///
1217    /// # Errors
1218    ///
1219    /// Returns [`Err(errno)`] where `errno` is:
1220    ///
1221    ///   - `ESRCH`: the task is dead and its live resources have been dropped.
1222    ///   - `EINVAL`: the task does not have a memory manager.
1223    #[track_caller]
1224    pub fn mm(&self) -> Result<Arc<MemoryManager>, Errno> {
1225        self.live()?.mm.to_option_arc().ok_or_else(|| errno!(EINVAL))
1226    }
1227
1228    /// Modify the given elements of the scheduler state with new values and update the
1229    /// task's thread's role.
1230    pub(crate) fn set_scheduler_policy_priority_and_reset_on_fork(
1231        &self,
1232        policy: SchedulingPolicy,
1233        priority: RealtimePriority,
1234        reset_on_fork: bool,
1235    ) -> Result<(), Errno> {
1236        self.update_scheduler_state_then_role(|scheduler_state| {
1237            scheduler_state.policy = policy;
1238            scheduler_state.realtime_priority = priority;
1239            scheduler_state.reset_on_fork = reset_on_fork;
1240        })
1241    }
1242
1243    /// Modify the scheduler state's priority and update the task's thread's role.
1244    pub(crate) fn set_scheduler_priority(&self, priority: RealtimePriority) -> Result<(), Errno> {
1245        self.update_scheduler_state_then_role(|scheduler_state| {
1246            scheduler_state.realtime_priority = priority
1247        })
1248    }
1249
1250    /// Modify the scheduler state's nice and update the task's thread's role.
1251    pub(crate) fn set_scheduler_nice(&self, nice: NormalPriority) -> Result<(), Errno> {
1252        self.update_scheduler_state_then_role(|scheduler_state| {
1253            scheduler_state.normal_priority = nice
1254        })
1255    }
1256
1257    /// Overwrite the existing scheduler state with a new one and update the task's thread's role.
1258    pub fn set_scheduler_state(&self, scheduler_state: SchedulerState) -> Result<(), Errno> {
1259        self.update_scheduler_state_then_role(|task_scheduler_state| {
1260            *task_scheduler_state = scheduler_state
1261        })
1262    }
1263
1264    /// Update the task's thread's role based on its current scheduler state without making any
1265    /// changes to the state.
1266    ///
1267    /// This should be called on tasks that have newly created threads, e.g. after cloning.
1268    pub fn sync_scheduler_state_to_role(&self) -> Result<(), Errno> {
1269        self.update_scheduler_state_then_role(|_| {})
1270    }
1271
1272    fn update_scheduler_state_then_role(
1273        &self,
1274        updater: impl FnOnce(&mut SchedulerState),
1275    ) -> Result<(), Errno> {
1276        let new_scheduler_state = {
1277            // Hold the task state lock as briefly as possible, it's not needed to update the role.
1278            let mut state = self.write();
1279            updater(&mut state.scheduler_state);
1280            state.scheduler_state
1281        };
1282        self.thread_group().kernel.scheduler.set_thread_role(self, new_scheduler_state)?;
1283        Ok(())
1284    }
1285
1286    /// Signals the vfork event, if any, to unblock waiters.
1287    pub fn signal_vfork(&self) {
1288        if let Some(event) = &self.vfork_event {
1289            if let Err(status) = event.signal(Signals::NONE, Signals::USER_0) {
1290                log_warn!("Failed to set vfork signal {status}");
1291            }
1292        };
1293    }
1294
1295    /// Blocks the caller until the task has exited or executed execve(). This is used to implement
1296    /// vfork() and clone(... CLONE_VFORK, ...). The task must have created with CLONE_EXECVE.
1297    pub fn wait_for_execve(&self, task_to_wait: Weak<Task>) -> Result<(), Errno> {
1298        let event = task_to_wait.upgrade().and_then(|t| t.vfork_event.clone());
1299        if let Some(event) = event {
1300            event
1301                .wait_one(zx::Signals::USER_0, zx::MonotonicInstant::INFINITE)
1302                .map_err(|status| from_status_like_fdio!(status))?;
1303        }
1304        Ok(())
1305    }
1306
1307    /// If needed, clear the child tid for this task.
1308    ///
1309    /// Userspace can ask us to clear the child tid and issue a futex wake at
1310    /// the child tid address when we tear down a task. For example, bionic
1311    /// uses this mechanism to implement pthread_join. The thread that calls
1312    /// pthread_join sleeps using FUTEX_WAIT on the child tid address. We wake
1313    /// them up here to let them know the thread is done.
1314    pub fn clear_child_tid_if_needed<L>(&self, locked: &mut Locked<L>) -> Result<(), Errno>
1315    where
1316        L: LockBefore<TerminalLock>,
1317    {
1318        let mut state = self.write();
1319        let user_tid = state.clear_child_tid;
1320        if !user_tid.is_null() {
1321            let zero: tid_t = 0;
1322            self.write_object(user_tid, &zero)?;
1323            self.kernel().shared_futexes.wake(
1324                locked,
1325                self,
1326                user_tid.addr(),
1327                usize::MAX,
1328                FUTEX_BITSET_MATCH_ANY,
1329            )?;
1330            state.clear_child_tid = UserRef::default();
1331        }
1332        Ok(())
1333    }
1334
1335    pub fn get_task(&self, tid: tid_t) -> Result<Arc<Task>, Errno> {
1336        self.kernel().pids.read().get_task(tid)
1337    }
1338
1339    pub fn get_pid(&self) -> pid_t {
1340        self.thread_group_key.pid()
1341    }
1342
1343    pub fn get_tid(&self) -> tid_t {
1344        self.tid
1345    }
1346
1347    pub fn is_leader(&self) -> bool {
1348        self.get_pid() == self.get_tid()
1349    }
1350
1351    pub fn read_argv(&self, max_len: usize) -> Result<Vec<FsString>, Errno> {
1352        // argv is empty for kthreads
1353        let Ok(mm) = self.mm() else {
1354            return Ok(vec![]);
1355        };
1356        let (argv_start, argv_end) = {
1357            let mm_state = mm.state.read();
1358            (mm_state.argv_start, mm_state.argv_end)
1359        };
1360
1361        let len_to_read = std::cmp::min(argv_end - argv_start, max_len);
1362        self.read_nul_delimited_c_string_list(argv_start, len_to_read)
1363    }
1364
1365    pub fn read_argv0(&self) -> Result<FsString, Errno> {
1366        // argv is empty for kthreads
1367        let Ok(mm) = self.mm() else {
1368            return Ok(FsString::default());
1369        };
1370        let argv_start = {
1371            let mm_state = mm.state.read();
1372            mm_state.argv_start
1373        };
1374        // Assuming a 64-bit arch width is fine for a type that's just u8's on all arches.
1375        let argv_start = UserCString::new(&ArchWidth::Arch64, argv_start);
1376        self.read_path(argv_start)
1377    }
1378
1379    pub fn read_env(&self, max_len: usize) -> Result<Vec<FsString>, Errno> {
1380        // environment is empty for kthreads
1381        let Ok(mm) = self.mm() else { return Ok(vec![]) };
1382        let (env_start, env_end) = {
1383            let mm_state = mm.state.read();
1384            (mm_state.environ_start, mm_state.environ_end)
1385        };
1386
1387        let len_to_read = std::cmp::min(env_end - env_start, max_len);
1388        self.read_nul_delimited_c_string_list(env_start, len_to_read)
1389    }
1390
1391    pub fn thread_runtime_info(&self) -> Result<zx::TaskRuntimeInfo, Errno> {
1392        self.live()?
1393            .thread
1394            .read()
1395            .as_ref()
1396            .ok_or_else(|| errno!(EINVAL))?
1397            .get_runtime_info()
1398            .map_err(|status| from_status_like_fdio!(status))
1399    }
1400
1401    pub fn real_fscred(&self) -> FsCred {
1402        self.real_creds().as_fscred()
1403    }
1404
1405    /// Interrupts the current task.
1406    ///
1407    /// This will interrupt any blocking syscalls if the task is blocked on one.
1408    /// The signal_state of the task must not be locked.
1409    pub fn interrupt(&self) {
1410        let Ok(live) = self.live() else {
1411            log_warn!("Cannot interrupt dead task {}", self.get_tid());
1412            return;
1413        };
1414
1415        self.read().signals.run_state.wake();
1416        if let Some(thread) = live.thread.read().as_ref() {
1417            #[allow(
1418                clippy::undocumented_unsafe_blocks,
1419                reason = "Force documented unsafe blocks in Starnix"
1420            )]
1421            let status = unsafe { zx::sys::zx_restricted_kick(thread.raw_handle(), 0) };
1422            if status != zx::sys::ZX_OK {
1423                // zx_restricted_kick() could return ZX_ERR_BAD_STATE if the target thread is already in the
1424                // DYING or DEAD states. That's fine since it means that the task is in the process of
1425                // tearing down, so allow it.
1426                assert_eq!(status, zx::sys::ZX_ERR_BAD_STATE);
1427            }
1428        }
1429    }
1430
1431    pub fn command(&self) -> TaskCommand {
1432        self.persistent_info.command.lock().clone()
1433    }
1434
1435    pub fn set_command_name(&self, mut new_name: TaskCommand) {
1436        let Ok(live) = self.live() else {
1437            log_warn!("Cannot set command name for dead task {}", self.get_tid());
1438            return;
1439        };
1440
1441        // If we're going to update the process name, see if we can get a longer one than normally
1442        // provided in the Linux uapi. Only choose the argv0-based name if it's a superset of the
1443        // uapi-provided name to avoid clobbering the name provided by the user.
1444        if let Ok(argv0) = self.read_argv0() {
1445            let argv0 = TaskCommand::from_path_bytes(&argv0);
1446            if let Some(embedded_name) = argv0.try_embed(&new_name) {
1447                new_name = embedded_name;
1448            }
1449        }
1450
1451        // Acquire this before modifying Zircon state to ensure consistency under concurrent access.
1452        // Ideally this would also guard the logic above to read argv[0] but we can't due to lock
1453        // cycles with SELinux checks.
1454        let mut command_guard = self.persistent_info.command_guard();
1455
1456        // Set the name on the Linux thread.
1457        if let Some(thread) = live.thread.read().as_ref() {
1458            set_zx_name(&**thread, new_name.as_bytes());
1459        }
1460
1461        // If this is the thread group leader, use this name for the process too.
1462        if self.is_leader() {
1463            set_zx_name(&*self.thread_group().process, new_name.as_bytes());
1464            let _ = zx::Thread::raise_user_exception(
1465                zx::RaiseExceptionOptions::TARGET_JOB_DEBUGGER,
1466                zx::sys::ZX_EXCP_USER_CODE_PROCESS_NAME_CHANGED,
1467                0,
1468            );
1469        }
1470
1471        // Avoid a lock cycle by dropping the guard before notifying memory attribution of the
1472        // change.
1473        *command_guard = new_name;
1474        drop(command_guard);
1475
1476        if self.is_leader() {
1477            if let Some(notifier) = &self.thread_group().read().notifier {
1478                let _ = notifier.send(MemoryAttributionLifecycleEvent::name_change(self.tid));
1479            }
1480        }
1481    }
1482
1483    pub fn set_seccomp_state(&self, state: SeccompStateValue) -> Result<(), Errno> {
1484        self.seccomp_filter_state.set(&state)
1485    }
1486
1487    pub fn state_code(&self) -> TaskStateCode {
1488        let status = self.read();
1489        if status.exit_status.is_some() {
1490            TaskStateCode::Zombie
1491        } else if status.signals.run_state.is_blocked() {
1492            let stop_state = self.load_stopped();
1493            if stop_state.ptrace_only() && stop_state.is_stopped() {
1494                TaskStateCode::TracingStop
1495            } else {
1496                TaskStateCode::Sleeping
1497            }
1498        } else {
1499            TaskStateCode::Running
1500        }
1501    }
1502
1503    pub fn time_stats(&self) -> TaskTimeStats {
1504        use zx::Task;
1505        // TODO(https://fxbug.dev/297440106): Return time stats for zombie tasks.
1506        let live = match self.live() {
1507            Ok(live) => live,
1508            Err(_) => return TaskTimeStats::default(),
1509        };
1510        let info = match live.thread.read().as_ref() {
1511            Some(thread) => thread.get_runtime_info().expect("Failed to get thread stats"),
1512            None => return TaskTimeStats::default(),
1513        };
1514
1515        TaskTimeStats {
1516            user_time: zx::MonotonicDuration::from_nanos(info.cpu_time),
1517            // TODO(https://fxbug.dev/42078242): How can we calculate system time?
1518            system_time: zx::MonotonicDuration::default(),
1519        }
1520    }
1521
1522    pub fn get_signal_action(&self, signal: Signal) -> sigaction_t {
1523        self.thread_group().signal_actions.get(signal)
1524    }
1525
1526    pub fn should_check_for_pending_signals(&self) -> bool {
1527        self.flags().intersects(
1528            TaskFlags::KERNEL_SIGNALS_AVAILABLE
1529                | TaskFlags::SIGNALS_AVAILABLE
1530                | TaskFlags::TEMPORARY_SIGNAL_MASK,
1531        ) || self.thread_group.has_pending_signals.load(Ordering::Relaxed)
1532    }
1533
1534    pub fn record_pid_koid_mapping(&self) {
1535        let Ok(live) = self.live() else {
1536            log_warn!("Cannot record pid/koid mapping for dead task {}", self.get_tid());
1537            return;
1538        };
1539
1540        let Some(ref mapping_table) = *self.kernel().pid_to_koid_mapping.read() else { return };
1541
1542        let pkoid = self.thread_group().get_process_koid().ok();
1543        let tkoid = live.thread.read().koid();
1544        mapping_table.write().insert(self.tid, KoidPair { process: pkoid, thread: tkoid });
1545    }
1546}
1547
1548impl Drop for Task {
1549    fn drop(&mut self) {
1550        debug_assert!(self.live_state.read().is_none());
1551    }
1552}
1553
1554impl MemoryAccessor for Task {
1555    fn read_memory<'a>(
1556        &self,
1557        addr: UserAddress,
1558        bytes: &'a mut [MaybeUninit<u8>],
1559    ) -> Result<&'a mut [u8], Errno> {
1560        // Using a `Task` to read memory generally indicates that the memory
1561        // is being read from a task different than the `CurrentTask`. When
1562        // this `Task` is not current, its address space is not mapped
1563        // so we need to go through the VMO.
1564        self.mm()?.syscall_read_memory(addr, bytes)
1565    }
1566
1567    fn read_memory_partial_until_null_byte<'a>(
1568        &self,
1569        addr: UserAddress,
1570        bytes: &'a mut [MaybeUninit<u8>],
1571    ) -> Result<&'a mut [u8], Errno> {
1572        // Using a `Task` to read memory generally indicates that the memory
1573        // is being read from a task different than the `CurrentTask`. When
1574        // this `Task` is not current, its address space is not mapped
1575        // so we need to go through the VMO.
1576        self.mm()?.syscall_read_memory_partial_until_null_byte(addr, bytes)
1577    }
1578
1579    fn read_memory_partial<'a>(
1580        &self,
1581        addr: UserAddress,
1582        bytes: &'a mut [MaybeUninit<u8>],
1583    ) -> Result<&'a mut [u8], Errno> {
1584        // Using a `Task` to read memory generally indicates that the memory
1585        // is being read from a task different than the `CurrentTask`. When
1586        // this `Task` is not current, its address space is not mapped
1587        // so we need to go through the VMO.
1588        self.mm()?.syscall_read_memory_partial(addr, bytes)
1589    }
1590
1591    fn write_memory(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
1592        // Using a `Task` to write memory generally indicates that the memory
1593        // is being written to a task different than the `CurrentTask`. When
1594        // this `Task` is not current, its address space is not mapped
1595        // so we need to go through the VMO.
1596        self.mm()?.syscall_write_memory(addr, bytes)
1597    }
1598
1599    fn write_memory_partial(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
1600        // Using a `Task` to write memory generally indicates that the memory
1601        // is being written to a task different than the `CurrentTask`. When
1602        // this `Task` is not current, its address space is not mapped
1603        // so we need to go through the VMO.
1604        self.mm()?.syscall_write_memory_partial(addr, bytes)
1605    }
1606
1607    fn zero(&self, addr: UserAddress, length: usize) -> Result<usize, Errno> {
1608        // Using a `Task` to zero memory generally indicates that the memory
1609        // is being zeroed from a task different than the `CurrentTask`. When
1610        // this `Task` is not current, its address space is not mapped
1611        // so we need to go through the VMO.
1612        self.mm()?.syscall_zero(addr, length)
1613    }
1614}
1615
1616impl TaskMemoryAccessor for Task {
1617    fn maximum_valid_address(&self) -> Option<UserAddress> {
1618        self.mm().map(|mm| mm.maximum_valid_user_address).ok()
1619    }
1620}
1621
1622impl fmt::Debug for Task {
1623    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1624        write!(
1625            f,
1626            "{}:{}[{}]",
1627            self.thread_group().leader,
1628            self.tid,
1629            self.persistent_info.command.lock()
1630        )
1631    }
1632}
1633
1634impl cmp::PartialEq for Task {
1635    fn eq(&self, other: &Self) -> bool {
1636        let ptr: *const Task = self;
1637        let other_ptr: *const Task = other;
1638        ptr == other_ptr
1639    }
1640}
1641
1642impl cmp::Eq for Task {}
1643
1644#[cfg(test)]
1645mod test {
1646    use super::*;
1647    use crate::security;
1648    use crate::testing::*;
1649    use starnix_uapi::auth::{CAP_SYS_ADMIN, Capabilities};
1650    use starnix_uapi::resource_limits::Resource;
1651    use starnix_uapi::signals::SIGCHLD;
1652    use starnix_uapi::{CLONE_SIGHAND, CLONE_THREAD, CLONE_VM, rlimit};
1653
1654    #[::fuchsia::test]
1655    async fn test_tid_allocation() {
1656        spawn_kernel_and_run(async |locked, current_task| {
1657            let kernel = current_task.kernel();
1658            assert_eq!(current_task.get_tid(), 1);
1659            let another_current = create_task(locked, &kernel, "another-task");
1660            let another_tid = another_current.get_tid();
1661            assert!(another_tid >= 2);
1662
1663            let pids = kernel.pids.read();
1664            assert_eq!(pids.get_task(1).unwrap().get_tid(), 1);
1665            assert_eq!(pids.get_task(another_tid).unwrap().get_tid(), another_tid);
1666        })
1667        .await;
1668    }
1669
1670    #[::fuchsia::test]
1671    async fn test_clone_pid_and_parent_pid() {
1672        spawn_kernel_and_run(async |locked, current_task| {
1673            let thread = current_task.clone_task_for_test(
1674                locked,
1675                (CLONE_THREAD | CLONE_VM | CLONE_SIGHAND) as u64,
1676                Some(SIGCHLD),
1677            );
1678            assert_eq!(current_task.get_pid(), thread.get_pid());
1679            assert_ne!(current_task.get_tid(), thread.get_tid());
1680            assert_eq!(current_task.thread_group().leader, thread.thread_group().leader);
1681
1682            let child_task = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
1683            assert_ne!(current_task.get_pid(), child_task.get_pid());
1684            assert_ne!(current_task.get_tid(), child_task.get_tid());
1685            assert_eq!(current_task.get_pid(), child_task.thread_group().read().get_ppid());
1686        })
1687        .await;
1688    }
1689
1690    #[::fuchsia::test]
1691    async fn test_root_capabilities() {
1692        spawn_kernel_and_run(async |_, current_task| {
1693            assert!(security::is_task_capable_noaudit(current_task, CAP_SYS_ADMIN));
1694            assert_eq!(current_task.real_creds().cap_inheritable, Capabilities::empty());
1695
1696            current_task.set_creds(Credentials::with_ids(1, 1));
1697            assert!(!security::is_task_capable_noaudit(current_task, CAP_SYS_ADMIN));
1698        })
1699        .await;
1700    }
1701
1702    #[::fuchsia::test]
1703    async fn test_is_spawned() {
1704        spawn_kernel_and_run(async |locked, current_task| {
1705            // The init task should be marked as spawned, because it is executing.
1706            assert!(current_task.is_spawned());
1707
1708            // A cloned task should not be marked as spawned, because it has not yet been executed.
1709            let child = current_task
1710                .clone_task(
1711                    locked,
1712                    0,
1713                    Some(SIGCHLD),
1714                    UserRef::default(),
1715                    UserRef::default(),
1716                    UserRef::default(),
1717                )
1718                .expect("failed to create task in test");
1719            assert!(!child.is_spawned());
1720            child.release(locked);
1721
1722            // A cloned task for a test should be marked as spawned, because we intentionally avoid
1723            // spawning threads for test tasks but want them to behave as normal tasks.
1724            let test_child = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
1725            assert!(test_child.is_spawned());
1726        })
1727        .await;
1728    }
1729
1730    #[::fuchsia::test]
1731    async fn test_clone_rlimit() {
1732        spawn_kernel_and_run(async |locked, current_task| {
1733            let prev_fsize = current_task.thread_group().get_rlimit(locked, Resource::FSIZE);
1734            assert_ne!(prev_fsize, 10);
1735            current_task
1736                .thread_group()
1737                .limits
1738                .lock(locked)
1739                .set(Resource::FSIZE, rlimit { rlim_cur: 10, rlim_max: 100 });
1740            let current_fsize = current_task.thread_group().get_rlimit(locked, Resource::FSIZE);
1741            assert_eq!(current_fsize, 10);
1742
1743            let child_task = current_task.clone_task_for_test(locked, 0, Some(SIGCHLD));
1744            let child_fsize = child_task.thread_group().get_rlimit(locked, Resource::FSIZE);
1745            assert_eq!(child_fsize, 10)
1746        })
1747        .await;
1748    }
1749}