Skip to main content

starnix_core/task/
syscalls.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::execution::execute_task;
6use crate::mm::{DumpPolicy, MemoryAccessor, MemoryAccessorExt, PAGE_SIZE};
7use crate::ptrace::{
8    PR_SET_PTRACER_ANY, PtraceAllowedPtracers, PtraceAttachType, PtraceOptions, ptrace_attach,
9    ptrace_dispatch, ptrace_traceme,
10};
11use crate::security;
12use crate::signals::syscalls::RUsagePtr;
13use crate::task::{
14    CurrentTask, ExitStatus, NormalPriority, SchedulingPolicy, SeccompAction, SeccompStateValue,
15    SyslogAccess, Task, ThreadGroup, max_priority_for_sched_policy, min_priority_for_sched_policy,
16};
17use crate::vfs::{
18    FdNumber, FileHandle, MountNamespaceFile, PidFdFileObject, UserBuffersOutputBuffer,
19    VecOutputBuffer,
20};
21use starnix_logging::{log_error, log_info, log_trace, track_stub};
22use starnix_sync::{Locked, RwLock, Unlocked};
23use starnix_syscalls::SyscallResult;
24use starnix_task_command::TaskCommand;
25use starnix_types::ownership::WeakRef;
26use starnix_types::time::timeval_from_duration;
27use starnix_uapi::auth::{
28    CAP_SETGID, CAP_SETPCAP, CAP_SETUID, CAP_SYS_ADMIN, CAP_SYS_NICE, CAP_SYS_RESOURCE,
29    CAP_SYS_TTY_CONFIG, Capabilities, Credentials, PTRACE_MODE_READ_REALCREDS, SecureBits,
30};
31use starnix_uapi::errors::{ENAMETOOLONG, Errno};
32use starnix_uapi::file_mode::{Access, AccessCheck, FileMode};
33use starnix_uapi::kcmp::KcmpResource;
34use starnix_uapi::open_flags::OpenFlags;
35use starnix_uapi::resource_limits::Resource;
36use starnix_uapi::signals::{Signal, UncheckedSignal};
37use starnix_uapi::syslog::SyslogAction;
38use starnix_uapi::user_address::{
39    ArchSpecific, MappingMultiArchUserRef, MultiArchUserRef, UserAddress, UserCString,
40    UserCStringPtr, UserRef,
41};
42use starnix_uapi::vfs::ResolveFlags;
43use starnix_uapi::{
44    __user_cap_data_struct, __user_cap_header_struct, _LINUX_CAPABILITY_VERSION_1,
45    _LINUX_CAPABILITY_VERSION_2, _LINUX_CAPABILITY_VERSION_3, AT_EMPTY_PATH, AT_SYMLINK_NOFOLLOW,
46    BPF_MAXINSNS, CLONE_ARGS_SIZE_VER0, CLONE_ARGS_SIZE_VER1, CLONE_ARGS_SIZE_VER2, CLONE_FILES,
47    CLONE_FS, CLONE_NEWNS, CLONE_NEWUTS, CLONE_SETTLS, CLONE_VFORK, NGROUPS_MAX, PR_CAP_AMBIENT,
48    PR_CAP_AMBIENT_CLEAR_ALL, PR_CAP_AMBIENT_IS_SET, PR_CAP_AMBIENT_LOWER, PR_CAP_AMBIENT_RAISE,
49    PR_CAPBSET_DROP, PR_CAPBSET_READ, PR_GET_CHILD_SUBREAPER, PR_GET_DUMPABLE, PR_GET_KEEPCAPS,
50    PR_GET_NAME, PR_GET_NO_NEW_PRIVS, PR_GET_SECCOMP, PR_GET_SECUREBITS, PR_SET_CHILD_SUBREAPER,
51    PR_SET_DUMPABLE, PR_SET_KEEPCAPS, PR_SET_NAME, PR_SET_NO_NEW_PRIVS, PR_SET_PDEATHSIG,
52    PR_SET_PTRACER, PR_SET_SECCOMP, PR_SET_SECUREBITS, PR_SET_TIMERSLACK, PR_SET_VMA,
53    PR_SET_VMA_ANON_NAME, PRIO_PROCESS, PTRACE_ATTACH, PTRACE_SEIZE, PTRACE_TRACEME,
54    RUSAGE_CHILDREN, SCHED_RESET_ON_FORK, SECCOMP_FILTER_FLAG_LOG,
55    SECCOMP_FILTER_FLAG_NEW_LISTENER, SECCOMP_FILTER_FLAG_SPEC_ALLOW, SECCOMP_FILTER_FLAG_TSYNC,
56    SECCOMP_FILTER_FLAG_TSYNC_ESRCH, SECCOMP_GET_ACTION_AVAIL, SECCOMP_GET_NOTIF_SIZES,
57    SECCOMP_MODE_FILTER, SECCOMP_MODE_STRICT, SECCOMP_SET_MODE_FILTER, SECCOMP_SET_MODE_STRICT,
58    c_char, c_int, clone_args, errno, error, gid_t, pid_t, rlimit, rusage, sched_param,
59    sock_filter, uapi, uid_t,
60};
61use static_assertions::const_assert;
62use std::cmp;
63use std::ffi::CString;
64use std::sync::{Arc, LazyLock};
65use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout};
66
67#[cfg(target_arch = "aarch64")]
68use starnix_uapi::{PR_GET_TAGGED_ADDR_CTRL, PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE};
69
70pub type SockFProgPtr =
71    MappingMultiArchUserRef<SockFProg, uapi::sock_fprog, uapi::arch32::sock_fprog>;
72pub type SockFilterPtr = MultiArchUserRef<uapi::sock_filter, uapi::arch32::sock_filter>;
73
74pub struct SockFProg {
75    pub len: u32,
76    pub filter: SockFilterPtr,
77}
78
79uapi::arch_map_data! {
80    BidiTryFrom<SockFProg, sock_fprog> {
81        len = len;
82        filter = filter;
83    }
84}
85
86uapi::check_arch_independent_layout! {
87    sched_param {
88        sched_priority,
89    }
90}
91
92pub fn do_clone(
93    locked: &mut Locked<Unlocked>,
94    current_task: &mut CurrentTask,
95    args: &clone_args,
96) -> Result<pid_t, Errno> {
97    security::check_task_create_access(current_task)?;
98
99    let child_exit_signal = if args.exit_signal == 0 {
100        None
101    } else {
102        Some(Signal::try_from(UncheckedSignal::new(args.exit_signal))?)
103    };
104
105    let mut new_task = current_task.clone_task(
106        locked,
107        args.flags,
108        child_exit_signal,
109        UserRef::<pid_t>::new(UserAddress::from(args.parent_tid)),
110        UserRef::<pid_t>::new(UserAddress::from(args.child_tid)),
111        UserRef::<FdNumber>::new(UserAddress::from(args.pidfd)),
112    )?;
113
114    // Set the result register to 0 for the return value from clone in the
115    // cloned process.
116    new_task.thread_state.registers.set_return_register(0);
117    let (trace_kind, ptrace_state) = current_task.get_ptrace_core_state_for_clone(args);
118
119    if args.stack != 0 {
120        // In clone() the `stack` argument points to the top of the stack, while in clone3()
121        // `stack` points to the bottom of the stack. Therefore, in clone3() we need to add
122        // `stack_size` to calculate the stack pointer. Note that in clone() `stack_size` is 0.
123        new_task
124            .thread_state
125            .registers
126            .set_stack_pointer_register(args.stack.wrapping_add(args.stack_size));
127    }
128
129    if args.flags & (CLONE_SETTLS as u64) != 0 {
130        new_task.thread_state.registers.set_thread_pointer_register(args.tls);
131    }
132
133    let tid = new_task.task.tid;
134    let task_ref = WeakRef::from(&new_task.task);
135    execute_task(locked, new_task, |_, _| Ok(()), |_| {}, ptrace_state)?;
136
137    current_task.ptrace_event(locked, trace_kind, tid as u64);
138
139    if args.flags & (CLONE_VFORK as u64) != 0 {
140        current_task.wait_for_execve(task_ref)?;
141        current_task.ptrace_event(locked, PtraceOptions::TRACEVFORKDONE, tid as u64);
142    }
143
144    Ok(tid)
145}
146
147pub fn sys_clone3(
148    locked: &mut Locked<Unlocked>,
149    current_task: &mut CurrentTask,
150    user_clone_args: UserRef<clone_args>,
151    user_clone_args_size: usize,
152) -> Result<pid_t, Errno> {
153    // Only these specific sized versions are supported.
154    if !(user_clone_args_size == CLONE_ARGS_SIZE_VER0 as usize
155        || user_clone_args_size == CLONE_ARGS_SIZE_VER1 as usize
156        || user_clone_args_size == CLONE_ARGS_SIZE_VER2 as usize)
157    {
158        return error!(EINVAL);
159    }
160
161    // The most recent version of the struct size should match our definition.
162    const_assert!(std::mem::size_of::<clone_args>() == CLONE_ARGS_SIZE_VER2 as usize);
163
164    let clone_args = current_task.read_object_partial(user_clone_args, user_clone_args_size)?;
165    do_clone(locked, current_task, &clone_args)
166}
167
168fn read_c_string_vector(
169    mm: &CurrentTask,
170    user_vector: UserCStringPtr,
171    elem_limit: usize,
172    vec_limit: usize,
173) -> Result<(Vec<CString>, usize), Errno> {
174    let mut user_current = user_vector;
175    let mut vector: Vec<CString> = vec![];
176    let mut vec_size: usize = 0;
177    loop {
178        let user_string = mm.read_multi_arch_ptr(user_current)?;
179        if user_string.is_null() {
180            break;
181        }
182        let string = mm
183            .read_c_string_to_vec(user_string, elem_limit)
184            .map_err(|e| if e.code == ENAMETOOLONG { errno!(E2BIG) } else { e })?;
185        let cstring = CString::new(string).map_err(|_| errno!(EINVAL))?;
186        vec_size =
187            vec_size.checked_add(cstring.as_bytes_with_nul().len()).ok_or_else(|| errno!(E2BIG))?;
188        if vec_size > vec_limit {
189            return error!(E2BIG);
190        }
191        vector.push(cstring);
192        user_current = user_current.next()?;
193    }
194    Ok((vector, vec_size))
195}
196
197pub fn sys_execve(
198    locked: &mut Locked<Unlocked>,
199    current_task: &mut CurrentTask,
200    user_path: UserCString,
201    user_argv: UserCStringPtr,
202    user_environ: UserCStringPtr,
203) -> Result<(), Errno> {
204    sys_execveat(locked, current_task, FdNumber::AT_FDCWD, user_path, user_argv, user_environ, 0)
205}
206
207pub fn sys_execveat(
208    locked: &mut Locked<Unlocked>,
209    current_task: &mut CurrentTask,
210    dir_fd: FdNumber,
211    user_path: UserCString,
212    user_argv: UserCStringPtr,
213    user_environ: UserCStringPtr,
214    flags: u32,
215) -> Result<(), Errno> {
216    if flags & !(AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW) != 0 {
217        return error!(EINVAL);
218    }
219
220    // Calculate the limit for argv and environ size as 1/4 of the stack size, floored at 32 pages.
221    // See the Limits sections in https://man7.org/linux/man-pages/man2/execve.2.html
222    const PAGE_LIMIT: usize = 32;
223    let page_limit_size: usize = PAGE_LIMIT * *PAGE_SIZE as usize;
224    let rlimit = current_task.thread_group().get_rlimit(locked, Resource::STACK);
225    let stack_limit = rlimit / 4;
226    let argv_env_limit = cmp::max(page_limit_size, stack_limit as usize);
227
228    // The limit per argument or environment variable is 32 pages.
229    // See the Limits sections in https://man7.org/linux/man-pages/man2/execve.2.html
230    let (argv, argv_size) = if user_argv.is_null() {
231        (Vec::new(), 0)
232    } else {
233        read_c_string_vector(current_task, user_argv, page_limit_size, argv_env_limit)?
234    };
235
236    let (environ, _) = if user_environ.is_null() {
237        (Vec::new(), 0)
238    } else {
239        read_c_string_vector(
240            current_task,
241            user_environ,
242            page_limit_size,
243            argv_env_limit - argv_size,
244        )?
245    };
246
247    let path = &current_task.read_path(user_path)?;
248
249    log_trace!(argv:?, environ:?, flags:?; "execveat({dir_fd}, {path})");
250
251    let mut open_flags = OpenFlags::RDONLY;
252
253    if flags & AT_SYMLINK_NOFOLLOW != 0 {
254        open_flags |= OpenFlags::NOFOLLOW;
255    }
256
257    let executable = if path.is_empty() {
258        if flags & AT_EMPTY_PATH == 0 {
259            // If AT_EMPTY_PATH is not set, this is an error.
260            return error!(ENOENT);
261        }
262
263        // O_PATH allowed for:
264        //
265        //   Passing the file descriptor as the dirfd argument of
266        //   openat() and the other "*at()" system calls.  This
267        //   includes linkat(2) with AT_EMPTY_PATH (or via procfs
268        //   using AT_SYMLINK_FOLLOW) even if the file is not a
269        //   directory.
270        //
271        // See https://man7.org/linux/man-pages/man2/open.2.html
272        let file = current_task.get_file_allowing_opath(dir_fd)?;
273
274        // We are forced to reopen the file with O_RDONLY to get access to the underlying VMO.
275        // Note that skip the access check in the arguments in case the file mode does
276        // not actually have the read permission bit.
277        //
278        // This can happen because a file could have --x--x--x mode permissions and then
279        // be opened with O_PATH. Internally, the file operations would all be stubbed out
280        // for that file, which is undesirable here.
281        //
282        // See https://man7.org/linux/man-pages/man3/fexecve.3.html#DESCRIPTION
283        file.name.open(
284            locked,
285            current_task,
286            OpenFlags::RDONLY,
287            AccessCheck::check_for(Access::EXEC),
288        )?
289    } else {
290        current_task.open_file_at(
291            locked,
292            dir_fd,
293            path.as_ref(),
294            open_flags,
295            FileMode::default(),
296            ResolveFlags::empty(),
297            AccessCheck::check_for(Access::EXEC),
298        )?
299    };
300
301    // This path can affect script resolution (the path is appended to the script args)
302    // and the auxiliary value `AT_EXECFN` from the syscall `getauxval()`
303    let path = if dir_fd == FdNumber::AT_FDCWD {
304        // The file descriptor is CWD, so the path is exactly
305        // what the user specified.
306        path.to_vec()
307    } else {
308        // The path is `/dev/fd/N/P` where N is the file descriptor
309        // number and P is the user-provided path (if relative and non-empty).
310        //
311        // See https://man7.org/linux/man-pages/man2/execveat.2.html#NOTES
312        match path.first() {
313            Some(b'/') => {
314                // The user-provided path is absolute, so dir_fd is ignored.
315                path.to_vec()
316            }
317            Some(_) => {
318                // User-provided path is relative, append it.
319                let mut new_path = format!("/dev/fd/{}/", dir_fd.raw()).into_bytes();
320                new_path.append(&mut path.to_vec());
321                new_path
322            }
323            // User-provided path is empty
324            None => format!("/dev/fd/{}", dir_fd.raw()).into_bytes(),
325        }
326    };
327
328    let path = CString::new(path).map_err(|_| errno!(EINVAL))?;
329
330    current_task.exec(locked, executable, path, argv, environ)?;
331    Ok(())
332}
333
334pub fn sys_getcpu(
335    _locked: &mut Locked<Unlocked>,
336    current_task: &CurrentTask,
337    cpu_out: UserRef<u32>,
338    node_out: UserRef<u32>,
339) -> Result<(), Errno> {
340    // "When either cpu or node is NULL nothing is written to the respective pointer."
341    // from https://man7.org/linux/man-pages/man2/getcpu.2.html
342    if !cpu_out.is_null() {
343        let thread_stats = current_task
344            .live()
345            .thread
346            .read()
347            .as_ref()
348            .expect("current thread is never None when executing")
349            .stats()
350            .map_err(|e| errno!(EINVAL, format!("getting thread stats failed {e:?}")))?;
351        current_task.write_object(cpu_out, &thread_stats.last_scheduled_cpu)?;
352    }
353    if !node_out.is_null() {
354        // Zircon does not yet have a concept of NUMA task scheduling, always tell userspace that
355        // it's on the "first" node which should be true for non-NUMA systems.
356        track_stub!(TODO("https://fxbug.dev/325643815"), "getcpu() numa node");
357        current_task.write_object(node_out, &0)?;
358    }
359    Ok(())
360}
361
362pub fn sys_getpid(
363    _locked: &mut Locked<Unlocked>,
364    current_task: &CurrentTask,
365) -> Result<pid_t, Errno> {
366    Ok(current_task.get_pid())
367}
368
369pub fn sys_gettid(
370    _locked: &mut Locked<Unlocked>,
371    current_task: &CurrentTask,
372) -> Result<pid_t, Errno> {
373    Ok(current_task.get_tid())
374}
375
376pub fn sys_getppid(
377    _locked: &mut Locked<Unlocked>,
378    current_task: &CurrentTask,
379) -> Result<pid_t, Errno> {
380    Ok(current_task.thread_group().read().get_ppid())
381}
382
383fn get_task_or_current(current_task: &CurrentTask, pid: pid_t) -> WeakRef<Task> {
384    if pid == 0 { current_task.weak_task() } else { current_task.get_task(pid) }
385}
386
387pub fn sys_getsid(
388    _locked: &mut Locked<Unlocked>,
389    current_task: &CurrentTask,
390    pid: pid_t,
391) -> Result<pid_t, Errno> {
392    let weak = get_task_or_current(current_task, pid);
393    let target_task = Task::from_weak(&weak)?;
394    security::check_task_getsid(current_task, &target_task)?;
395    let sid = target_task.thread_group().read().process_group.session.leader;
396    Ok(sid)
397}
398
399pub fn sys_getpgid(
400    _locked: &mut Locked<Unlocked>,
401    current_task: &CurrentTask,
402    pid: pid_t,
403) -> Result<pid_t, Errno> {
404    let weak = get_task_or_current(current_task, pid);
405    let task = Task::from_weak(&weak)?;
406
407    security::check_getpgid_access(current_task, &task)?;
408    let pgid = task.thread_group().read().process_group.leader;
409    Ok(pgid)
410}
411
412pub fn sys_setpgid(
413    locked: &mut Locked<Unlocked>,
414    current_task: &CurrentTask,
415    pid: pid_t,
416    pgid: pid_t,
417) -> Result<(), Errno> {
418    let weak = get_task_or_current(current_task, pid);
419    let task = Task::from_weak(&weak)?;
420
421    current_task.thread_group().setpgid(locked, current_task, &task, pgid)?;
422    Ok(())
423}
424
425impl CurrentTask {
426    /// Returns true if the `current_task`'s effective user ID (EUID) is the same as the
427    /// EUID or UID of the `target_task`. We describe this as the current task being
428    /// "EUID-friendly" to the target and it enables actions to be performed that would
429    /// otherwise require additional privileges.
430    ///
431    /// See "The caller needs an effective user ID equal to the real user ID or effective
432    /// user ID of the [target]" at sched_setaffinity(2), comparable language at
433    /// setpriority(2), more ambiguous language at sched_setscheduler(2), and no
434    /// particular specification at sched_setparam(2).
435    fn is_euid_friendly_with(&self, target_task: &Task) -> bool {
436        let self_creds = self.current_creds();
437        let target_creds = target_task.real_creds();
438        self_creds.euid == target_creds.uid || self_creds.euid == target_creds.euid
439    }
440}
441
442// A non-root process is allowed to set any of its three uids to the value of any other. The
443// CAP_SETUID capability bypasses these checks and allows setting any uid to any integer. Likewise
444// for gids.
445fn new_uid_allowed(current_task: &CurrentTask, uid: uid_t) -> bool {
446    let current_creds = current_task.current_creds();
447    uid == current_creds.uid
448        || uid == current_creds.euid
449        || uid == current_creds.saved_uid
450        || security::is_task_capable_noaudit(current_task, CAP_SETUID)
451}
452
453fn new_gid_allowed(current_task: &CurrentTask, gid: gid_t) -> bool {
454    let current_creds = current_task.current_creds();
455    gid == current_creds.gid
456        || gid == current_creds.egid
457        || gid == current_creds.saved_gid
458        || security::is_task_capable_noaudit(current_task, CAP_SETGID)
459}
460
461pub fn sys_getuid(
462    _locked: &mut Locked<Unlocked>,
463    current_task: &CurrentTask,
464) -> Result<uid_t, Errno> {
465    Ok(current_task.current_creds().uid)
466}
467
468pub fn sys_getgid(
469    _locked: &mut Locked<Unlocked>,
470    current_task: &CurrentTask,
471) -> Result<gid_t, Errno> {
472    Ok(current_task.current_creds().gid)
473}
474
475pub fn sys_setuid(
476    _locked: &mut Locked<Unlocked>,
477    current_task: &CurrentTask,
478    uid: uid_t,
479) -> Result<(), Errno> {
480    if uid == gid_t::MAX {
481        return error!(EINVAL);
482    }
483    if !new_uid_allowed(&current_task, uid) {
484        return error!(EPERM);
485    }
486
487    let mut creds = Credentials::clone(&current_task.current_creds());
488    let prev = creds.copy_user_credentials();
489    creds.euid = uid;
490    creds.fsuid = uid;
491    if security::is_task_capable_noaudit(current_task, CAP_SETUID) {
492        creds.uid = uid;
493        creds.saved_uid = uid;
494    }
495
496    creds.update_capabilities(prev);
497    current_task.set_creds(creds);
498    Ok(())
499}
500
501pub fn sys_setgid(
502    _locked: &mut Locked<Unlocked>,
503    current_task: &CurrentTask,
504    gid: gid_t,
505) -> Result<(), Errno> {
506    if gid == gid_t::MAX {
507        return error!(EINVAL);
508    }
509    if !new_gid_allowed(&current_task, gid) {
510        return error!(EPERM);
511    }
512
513    let mut creds = Credentials::clone(&current_task.current_creds());
514    creds.egid = gid;
515    creds.fsgid = gid;
516    if security::is_task_capable_noaudit(current_task, CAP_SETGID) {
517        creds.gid = gid;
518        creds.saved_gid = gid;
519    }
520    current_task.set_creds(creds);
521    Ok(())
522}
523
524pub fn sys_geteuid(
525    _locked: &mut Locked<Unlocked>,
526    current_task: &CurrentTask,
527) -> Result<uid_t, Errno> {
528    Ok(current_task.current_creds().euid)
529}
530
531pub fn sys_getegid(
532    _locked: &mut Locked<Unlocked>,
533    current_task: &CurrentTask,
534) -> Result<gid_t, Errno> {
535    Ok(current_task.current_creds().egid)
536}
537
538pub fn sys_setfsuid(
539    _locked: &mut Locked<Unlocked>,
540    current_task: &CurrentTask,
541    fsuid: uid_t,
542) -> Result<uid_t, Errno> {
543    let mut creds = Credentials::clone(&current_task.current_creds());
544    let prev = creds.copy_user_credentials();
545    if fsuid != u32::MAX && new_uid_allowed(&current_task, fsuid) {
546        creds.fsuid = fsuid;
547        creds.update_capabilities(prev);
548        current_task.set_creds(creds);
549    }
550
551    Ok(prev.fsuid)
552}
553
554pub fn sys_setfsgid(
555    _locked: &mut Locked<Unlocked>,
556    current_task: &CurrentTask,
557    fsgid: gid_t,
558) -> Result<gid_t, Errno> {
559    let mut creds = Credentials::clone(&current_task.current_creds());
560    let prev = creds.copy_user_credentials();
561    let prev_fsgid = creds.fsgid;
562
563    if fsgid != u32::MAX && new_gid_allowed(&current_task, fsgid) {
564        creds.fsgid = fsgid;
565        creds.update_capabilities(prev);
566        current_task.set_creds(creds);
567    }
568
569    Ok(prev_fsgid)
570}
571
572pub fn sys_getresuid(
573    _locked: &mut Locked<Unlocked>,
574    current_task: &CurrentTask,
575    ruid_addr: UserRef<uid_t>,
576    euid_addr: UserRef<uid_t>,
577    suid_addr: UserRef<uid_t>,
578) -> Result<(), Errno> {
579    let creds = current_task.current_creds();
580    current_task.write_object(ruid_addr, &creds.uid)?;
581    current_task.write_object(euid_addr, &creds.euid)?;
582    current_task.write_object(suid_addr, &creds.saved_uid)?;
583    Ok(())
584}
585
586pub fn sys_getresgid(
587    _locked: &mut Locked<Unlocked>,
588    current_task: &CurrentTask,
589    rgid_addr: UserRef<gid_t>,
590    egid_addr: UserRef<gid_t>,
591    sgid_addr: UserRef<gid_t>,
592) -> Result<(), Errno> {
593    let creds = current_task.current_creds();
594    current_task.write_object(rgid_addr, &creds.gid)?;
595    current_task.write_object(egid_addr, &creds.egid)?;
596    current_task.write_object(sgid_addr, &creds.saved_gid)?;
597    Ok(())
598}
599
600pub fn sys_setreuid(
601    _locked: &mut Locked<Unlocked>,
602    current_task: &CurrentTask,
603    ruid: uid_t,
604    euid: uid_t,
605) -> Result<(), Errno> {
606    // Linux __sys_setreuid() uses asymmetric checks: ruid cannot be set
607    // to saved_uid, while euid can. This prevents regaining root via
608    // setreuid after a privilege drop when setresuid would be required.
609    let validate_ruid = |uid: uid_t| {
610        let creds = current_task.current_creds();
611        uid == u32::MAX
612            || uid == creds.uid
613            || uid == creds.euid
614            || security::is_task_capable_noaudit(current_task, CAP_SETUID)
615    };
616    let validate_euid = |uid: uid_t| {
617        let creds = current_task.current_creds();
618        uid == u32::MAX
619            || uid == creds.uid
620            || uid == creds.euid
621            || uid == creds.saved_uid
622            || security::is_task_capable_noaudit(current_task, CAP_SETUID)
623    };
624    if !validate_ruid(ruid) || !validate_euid(euid) {
625        return error!(EPERM);
626    }
627
628    let mut creds = Credentials::clone(&current_task.current_creds());
629    let prev = creds.copy_user_credentials();
630    let is_ruid_set = ruid != u32::MAX;
631    if is_ruid_set {
632        creds.uid = ruid;
633    }
634    let is_euid_set = euid != u32::MAX;
635    if is_euid_set {
636        creds.euid = euid;
637        creds.fsuid = euid;
638    }
639
640    // If the real user ID is set (i.e., ruid is not -1) or the effective
641    // user ID is set to a value not equal to the previous real user ID,
642    // the saved set-user-ID will be set to the new effective user ID.
643    if is_ruid_set || (is_euid_set && euid != prev.uid) {
644        creds.saved_uid = creds.euid;
645    }
646
647    creds.update_capabilities(prev);
648    current_task.set_creds(creds);
649    Ok(())
650}
651
652pub fn sys_setregid(
653    _locked: &mut Locked<Unlocked>,
654    current_task: &CurrentTask,
655    rgid: gid_t,
656    egid: gid_t,
657) -> Result<(), Errno> {
658    // Same asymmetric permission model as setreuid — see above.
659    let validate_rgid = |gid: gid_t| {
660        let creds = current_task.current_creds();
661        gid == u32::MAX
662            || gid == creds.gid
663            || gid == creds.egid
664            || security::is_task_capable_noaudit(current_task, CAP_SETGID)
665    };
666    let validate_egid = |gid: gid_t| {
667        let creds = current_task.current_creds();
668        gid == u32::MAX
669            || gid == creds.gid
670            || gid == creds.egid
671            || gid == creds.saved_gid
672            || security::is_task_capable_noaudit(current_task, CAP_SETGID)
673    };
674    if !validate_rgid(rgid) || !validate_egid(egid) {
675        return error!(EPERM);
676    }
677
678    let mut creds = Credentials::clone(&current_task.current_creds());
679    let previous_rgid = creds.gid;
680    let is_rgid_set = rgid != u32::MAX;
681    if is_rgid_set {
682        creds.gid = rgid;
683    }
684    let is_egid_set = egid != u32::MAX;
685    if is_egid_set {
686        creds.egid = egid;
687        creds.fsgid = egid;
688    }
689
690    // If the real group ID is set (i.e., rgid is not -1) or the effective
691    // group ID is set to a value not equal to the previous real group ID,
692    // the saved set-group-ID will be set to the new effective group ID.
693    if is_rgid_set || (is_egid_set && egid != previous_rgid) {
694        creds.saved_gid = creds.egid;
695    }
696
697    current_task.set_creds(creds);
698    Ok(())
699}
700
701pub fn sys_setresuid(
702    _locked: &mut Locked<Unlocked>,
703    current_task: &CurrentTask,
704    ruid: uid_t,
705    euid: uid_t,
706    suid: uid_t,
707) -> Result<(), Errno> {
708    let allowed = |uid| uid == u32::MAX || new_uid_allowed(&current_task, uid);
709    if !allowed(ruid) || !allowed(euid) || !allowed(suid) {
710        return error!(EPERM);
711    }
712
713    let mut creds = Credentials::clone(&current_task.current_creds());
714    let prev = creds.copy_user_credentials();
715    if ruid != u32::MAX {
716        creds.uid = ruid;
717    }
718    if euid != u32::MAX {
719        creds.euid = euid;
720        creds.fsuid = euid;
721    }
722    if suid != u32::MAX {
723        creds.saved_uid = suid;
724    }
725    creds.update_capabilities(prev);
726    current_task.set_creds(creds);
727    Ok(())
728}
729
730pub fn sys_setresgid(
731    _locked: &mut Locked<Unlocked>,
732    current_task: &CurrentTask,
733    rgid: gid_t,
734    egid: gid_t,
735    sgid: gid_t,
736) -> Result<(), Errno> {
737    let allowed = |gid| gid == u32::MAX || new_gid_allowed(&current_task, gid);
738    if !allowed(rgid) || !allowed(egid) || !allowed(sgid) {
739        return error!(EPERM);
740    }
741
742    let mut creds = Credentials::clone(&current_task.current_creds());
743    if rgid != u32::MAX {
744        creds.gid = rgid;
745    }
746    if egid != u32::MAX {
747        creds.egid = egid;
748        creds.fsgid = egid;
749    }
750    if sgid != u32::MAX {
751        creds.saved_gid = sgid;
752    }
753    current_task.set_creds(creds);
754    Ok(())
755}
756
757pub fn sys_exit(
758    _locked: &mut Locked<Unlocked>,
759    current_task: &CurrentTask,
760    code: i32,
761) -> Result<(), Errno> {
762    // Only change the current exit status if this has not been already set by exit_group, as
763    // otherwise it has priority.
764    current_task.write().set_exit_status_if_not_already(ExitStatus::Exit(code as u8));
765    Ok(())
766}
767
768pub fn sys_exit_group(
769    locked: &mut Locked<Unlocked>,
770    current_task: &mut CurrentTask,
771    code: i32,
772) -> Result<(), Errno> {
773    current_task.thread_group_exit(locked, ExitStatus::Exit(code as u8));
774    Ok(())
775}
776
777pub fn sys_sched_getscheduler(
778    _locked: &mut Locked<Unlocked>,
779    current_task: &CurrentTask,
780    pid: pid_t,
781) -> Result<u32, Errno> {
782    if pid < 0 {
783        return error!(EINVAL);
784    }
785
786    let weak = get_task_or_current(current_task, pid);
787    let target_task = Task::from_weak(&weak)?;
788    security::check_getsched_access(current_task, target_task.as_ref())?;
789    let current_scheduler_state = target_task.read().scheduler_state;
790    Ok(current_scheduler_state.policy_for_sched_getscheduler())
791}
792
793pub fn sys_sched_setscheduler(
794    locked: &mut Locked<Unlocked>,
795    current_task: &CurrentTask,
796    pid: pid_t,
797    policy: u32,
798    param: UserRef<sched_param>,
799) -> Result<(), Errno> {
800    // Parse & validate the arguments.
801    if pid < 0 || param.is_null() {
802        return error!(EINVAL);
803    }
804
805    let weak = get_task_or_current(current_task, pid);
806    let target_task = Task::from_weak(&weak)?;
807
808    let reset_on_fork = policy & SCHED_RESET_ON_FORK != 0;
809
810    let policy = SchedulingPolicy::try_from(policy & !SCHED_RESET_ON_FORK)?;
811    let realtime_priority =
812        policy.realtime_priority_from(current_task.read_object(param)?.sched_priority)?;
813
814    // TODO: https://fxbug.dev/425143440 - we probably want to improve the locking here.
815    let current_state = target_task.read().scheduler_state;
816
817    // Check capabilities and permissions, if required, for the operation.
818    let euid_friendly = current_task.is_euid_friendly_with(&target_task);
819    let strengthening = current_state.realtime_priority < realtime_priority;
820    let rlimited = strengthening
821        && realtime_priority
822            .exceeds(target_task.thread_group().get_rlimit(locked, Resource::RTPRIO));
823    let clearing_reset_on_fork = current_state.reset_on_fork && !reset_on_fork;
824    let caught_in_idle_trap = current_state.policy == SchedulingPolicy::Idle
825        && policy != SchedulingPolicy::Idle
826        && current_state
827            .normal_priority
828            .exceeds(target_task.thread_group().get_rlimit(locked, Resource::NICE));
829    if !euid_friendly || rlimited || clearing_reset_on_fork || caught_in_idle_trap {
830        security::check_task_capable(current_task, CAP_SYS_NICE)?;
831    }
832
833    security::check_setsched_access(current_task, &target_task)?;
834
835    // Apply the new scheduler configuration to the task.
836    target_task.set_scheduler_policy_priority_and_reset_on_fork(
837        policy,
838        realtime_priority,
839        reset_on_fork,
840    )?;
841
842    Ok(())
843}
844
845const CPU_SET_SIZE: usize = 128;
846
847#[repr(C)]
848#[derive(Debug, Copy, Clone, IntoBytes, FromBytes, KnownLayout, Immutable)]
849pub struct CpuSet {
850    bits: [u8; CPU_SET_SIZE],
851}
852
853impl Default for CpuSet {
854    fn default() -> Self {
855        Self { bits: [0; CPU_SET_SIZE] }
856    }
857}
858
859fn check_cpu_set_alignment(current_task: &CurrentTask, cpusetsize: u32) -> Result<(), Errno> {
860    let alignment = if current_task.is_arch32() { 4 } else { 8 };
861    if cpusetsize < alignment || cpusetsize % alignment != 0 {
862        return error!(EINVAL);
863    }
864    Ok(())
865}
866
867fn get_default_cpu_set() -> CpuSet {
868    let mut result = CpuSet::default();
869    let mut cpus_count = zx::system_get_num_cpus();
870    let cpus_count_max = (CPU_SET_SIZE * 8) as u32;
871    if cpus_count > cpus_count_max {
872        log_error!("cpus_count={cpus_count}, greater than the {cpus_count_max} max supported.");
873        cpus_count = cpus_count_max;
874    }
875    let mut index = 0;
876    while cpus_count > 0 {
877        let count = std::cmp::min(cpus_count, 8);
878        let (shl, overflow) = 1_u8.overflowing_shl(count);
879        let mask = if overflow { u8::max_value() } else { shl - 1 };
880        result.bits[index] = mask;
881        index += 1;
882        cpus_count -= count;
883    }
884    result
885}
886
887pub fn sys_sched_getaffinity(
888    _locked: &mut Locked<Unlocked>,
889    current_task: &CurrentTask,
890    pid: pid_t,
891    cpusetsize: u32,
892    user_mask: UserAddress,
893) -> Result<usize, Errno> {
894    if pid < 0 {
895        return error!(EINVAL);
896    }
897
898    check_cpu_set_alignment(current_task, cpusetsize)?;
899
900    let weak = get_task_or_current(current_task, pid);
901    let _task = Task::from_weak(&weak)?;
902
903    // sched_setaffinity() is not implemented. Fake affinity mask based on the number of CPUs.
904    let mask = get_default_cpu_set();
905    let mask_size = std::cmp::min(cpusetsize as usize, CPU_SET_SIZE);
906    current_task.write_memory(user_mask, &mask.bits[..mask_size])?;
907    track_stub!(TODO("https://fxbug.dev/322874659"), "sched_getaffinity");
908    Ok(mask_size)
909}
910
911pub fn sys_sched_setaffinity(
912    _locked: &mut Locked<Unlocked>,
913    current_task: &CurrentTask,
914    pid: pid_t,
915    cpusetsize: u32,
916    user_mask: UserAddress,
917) -> Result<(), Errno> {
918    if pid < 0 {
919        return error!(EINVAL);
920    }
921    let weak = get_task_or_current(current_task, pid);
922    let target_task = Task::from_weak(&weak)?;
923
924    check_cpu_set_alignment(current_task, cpusetsize)?;
925
926    let mask_size = std::cmp::min(cpusetsize as usize, CPU_SET_SIZE);
927    let mut mask = CpuSet::default();
928    current_task.read_memory_to_slice(user_mask, &mut mask.bits[..mask_size])?;
929
930    // Specified mask must include at least one valid CPU.
931    let max_mask = get_default_cpu_set();
932    let mut has_valid_cpu_in_mask = false;
933    for (l1, l2) in std::iter::zip(max_mask.bits, mask.bits) {
934        has_valid_cpu_in_mask = has_valid_cpu_in_mask || (l1 & l2 > 0);
935    }
936    if !has_valid_cpu_in_mask {
937        return error!(EINVAL);
938    }
939
940    if !current_task.is_euid_friendly_with(&target_task) {
941        security::check_task_capable(current_task, CAP_SYS_NICE)?;
942    }
943
944    // Currently, we ignore the mask and act as if the system reset the mask
945    // immediately to allowing all CPUs.
946    track_stub!(TODO("https://fxbug.dev/322874889"), "sched_setaffinity");
947    Ok(())
948}
949
950pub fn sys_sched_getparam(
951    _locked: &mut Locked<Unlocked>,
952    current_task: &CurrentTask,
953    pid: pid_t,
954    param: UserRef<sched_param>,
955) -> Result<(), Errno> {
956    if pid < 0 || param.is_null() {
957        return error!(EINVAL);
958    }
959
960    let weak = get_task_or_current(current_task, pid);
961    let target_task = Task::from_weak(&weak)?;
962    let param_value = target_task.read().scheduler_state.get_sched_param();
963    current_task.write_object(param, &param_value)?;
964    Ok(())
965}
966
967pub fn sys_sched_setparam(
968    locked: &mut Locked<Unlocked>,
969    current_task: &CurrentTask,
970    pid: pid_t,
971    param: UserRef<sched_param>,
972) -> Result<(), Errno> {
973    // Parse & validate the arguments.
974    if pid < 0 || param.is_null() {
975        return error!(EINVAL);
976    }
977    let weak = get_task_or_current(current_task, pid);
978    let target_task = Task::from_weak(&weak)?;
979
980    // TODO: https://fxbug.dev/425143440 - we probably want to improve the locking here.
981    let current_state = target_task.read().scheduler_state;
982
983    let realtime_priority = current_state
984        .policy
985        .realtime_priority_from(current_task.read_object(param)?.sched_priority)?;
986
987    // Check capabilities and permissions, if required, for the operation.
988    let euid_friendly = current_task.is_euid_friendly_with(&target_task);
989    let strengthening = current_state.realtime_priority < realtime_priority;
990    let rlimited = strengthening
991        && realtime_priority
992            .exceeds(target_task.thread_group().get_rlimit(locked, Resource::RTPRIO));
993    if !euid_friendly || rlimited {
994        security::check_task_capable(current_task, CAP_SYS_NICE)?;
995    }
996
997    security::check_setsched_access(current_task, &target_task)?;
998
999    // Apply the new scheduler configuration to the task.
1000    target_task.set_scheduler_priority(realtime_priority)?;
1001
1002    Ok(())
1003}
1004
1005pub fn sys_sched_get_priority_min(
1006    _locked: &mut Locked<Unlocked>,
1007    _ctx: &CurrentTask,
1008    policy: u32,
1009) -> Result<u8, Errno> {
1010    min_priority_for_sched_policy(policy)
1011}
1012
1013pub fn sys_sched_get_priority_max(
1014    _locked: &mut Locked<Unlocked>,
1015    _ctx: &CurrentTask,
1016    policy: u32,
1017) -> Result<u8, Errno> {
1018    max_priority_for_sched_policy(policy)
1019}
1020
1021pub fn sys_ioprio_set(
1022    _locked: &mut Locked<Unlocked>,
1023    _current_task: &mut CurrentTask,
1024    _which: i32,
1025    _who: i32,
1026    _ioprio: i32,
1027) -> Result<(), Errno> {
1028    track_stub!(TODO("https://fxbug.dev/297591758"), "ioprio_set()");
1029    error!(ENOSYS)
1030}
1031
1032pub fn sys_prctl(
1033    locked: &mut Locked<Unlocked>,
1034    current_task: &mut CurrentTask,
1035    option: u32,
1036    arg2: u64,
1037    arg3: u64,
1038    arg4: u64,
1039    arg5: u64,
1040) -> Result<SyscallResult, Errno> {
1041    match option {
1042        PR_SET_VMA => {
1043            if arg2 != PR_SET_VMA_ANON_NAME as u64 {
1044                track_stub!(TODO("https://fxbug.dev/322874826"), "prctl PR_SET_VMA", arg2);
1045                return error!(ENOSYS);
1046            }
1047            let addr = UserAddress::from(arg3);
1048            let length = arg4 as usize;
1049            let name_addr = UserAddress::from(arg5);
1050            let name = if name_addr.is_null() {
1051                None
1052            } else {
1053                let name = UserCString::new(current_task, UserAddress::from(arg5));
1054                let name = current_task.read_c_string_to_vec(name, 256).map_err(|e| {
1055                    // An overly long name produces EINVAL and not ENAMETOOLONG in Linux 5.15.
1056                    if e.code == ENAMETOOLONG { errno!(EINVAL) } else { e }
1057                })?;
1058                // Some characters are forbidden in VMA names.
1059                if name.iter().any(|b| {
1060                    matches!(b,
1061                        0..=0x1f |
1062                        0x7f..=0xff |
1063                        b'\\' | b'`' | b'$' | b'[' | b']'
1064                    )
1065                }) {
1066                    return error!(EINVAL);
1067                }
1068                Some(name)
1069            };
1070            current_task.mm()?.set_mapping_name(addr, length, name)?;
1071            Ok(().into())
1072        }
1073        PR_SET_DUMPABLE => {
1074            let mm = current_task.mm()?;
1075            let mut dumpable = mm.dumpable.lock(locked);
1076            *dumpable = if arg2 == 1 { DumpPolicy::User } else { DumpPolicy::Disable };
1077            Ok(().into())
1078        }
1079        PR_GET_DUMPABLE => {
1080            let mm = current_task.mm()?;
1081            let dumpable = mm.dumpable.lock(locked);
1082            Ok(match *dumpable {
1083                DumpPolicy::Disable => 0.into(),
1084                DumpPolicy::User => 1.into(),
1085            })
1086        }
1087        PR_SET_PDEATHSIG => {
1088            track_stub!(TODO("https://fxbug.dev/322874397"), "PR_SET_PDEATHSIG");
1089            Ok(().into())
1090        }
1091        PR_SET_NAME => {
1092            let addr = UserAddress::from(arg2);
1093            let name = TaskCommand::new(&current_task.read_memory_to_array::<16>(addr)?);
1094            current_task.set_command_name(name);
1095            if current_task.tid == current_task.thread_group.leader {
1096                current_task.thread_group.sync_syscall_log_level();
1097            }
1098            Ok(0.into())
1099        }
1100        PR_GET_NAME => {
1101            let addr = UserAddress::from(arg2);
1102            let name = current_task.command().prctl_name();
1103            current_task.write_memory(addr, &name[..])?;
1104            Ok(().into())
1105        }
1106        PR_SET_PTRACER => {
1107            let allowed_ptracers = if arg2 == PR_SET_PTRACER_ANY as u64 {
1108                PtraceAllowedPtracers::Any
1109            } else if arg2 == 0 {
1110                PtraceAllowedPtracers::None
1111            } else {
1112                if current_task.kernel().pids.read().get_task(arg2 as i32).upgrade().is_none() {
1113                    return error!(EINVAL);
1114                }
1115                PtraceAllowedPtracers::Some(arg2 as pid_t)
1116            };
1117            current_task.thread_group().write().allowed_ptracers = allowed_ptracers;
1118            Ok(().into())
1119        }
1120        PR_GET_KEEPCAPS => {
1121            Ok(current_task.current_creds().securebits.contains(SecureBits::KEEP_CAPS).into())
1122        }
1123        PR_SET_KEEPCAPS => {
1124            if arg2 != 0 && arg2 != 1 {
1125                return error!(EINVAL);
1126            }
1127            let mut creds = Credentials::clone(&current_task.current_creds());
1128            creds.securebits.set(SecureBits::KEEP_CAPS, arg2 != 0);
1129            current_task.set_creds(creds);
1130            Ok(().into())
1131        }
1132        PR_SET_NO_NEW_PRIVS => {
1133            // If any args are set other than arg2 to 1, this should return einval
1134            if arg2 != 1 || arg3 != 0 || arg4 != 0 || arg5 != 0 {
1135                return error!(EINVAL);
1136            }
1137            current_task.write().enable_no_new_privs();
1138            Ok(().into())
1139        }
1140        PR_GET_NO_NEW_PRIVS => {
1141            // If any args are set, this should return einval
1142            if arg2 != 0 || arg3 != 0 || arg4 != 0 {
1143                return error!(EINVAL);
1144            }
1145            Ok(current_task.read().no_new_privs().into())
1146        }
1147        PR_GET_SECCOMP => {
1148            if current_task.seccomp_filter_state.get() == SeccompStateValue::None {
1149                Ok(0.into())
1150            } else {
1151                Ok(2.into())
1152            }
1153        }
1154        PR_SET_SECCOMP => {
1155            if arg2 == SECCOMP_MODE_STRICT as u64 {
1156                return sys_seccomp(
1157                    locked,
1158                    current_task,
1159                    SECCOMP_SET_MODE_STRICT,
1160                    0,
1161                    UserAddress::NULL,
1162                );
1163            } else if arg2 == SECCOMP_MODE_FILTER as u64 {
1164                return sys_seccomp(locked, current_task, SECCOMP_SET_MODE_FILTER, 0, arg3.into());
1165            }
1166            Ok(().into())
1167        }
1168        PR_GET_CHILD_SUBREAPER => {
1169            let addr = UserAddress::from(arg2);
1170            #[allow(clippy::bool_to_int_with_if)]
1171            let value: i32 =
1172                if current_task.thread_group().read().is_child_subreaper { 1 } else { 0 };
1173            current_task.write_object(addr.into(), &value)?;
1174            Ok(().into())
1175        }
1176        PR_SET_CHILD_SUBREAPER => {
1177            current_task.thread_group().write().is_child_subreaper = arg2 != 0;
1178            Ok(().into())
1179        }
1180        PR_GET_SECUREBITS => Ok(current_task.current_creds().securebits.bits().into()),
1181        PR_SET_SECUREBITS => {
1182            // TODO(security): This does not yet respect locked flags.
1183            let mut creds = Credentials::clone(&current_task.current_creds());
1184            security::check_task_capable(current_task, CAP_SETPCAP)?;
1185
1186            let securebits = SecureBits::from_bits(arg2 as u32).ok_or_else(|| {
1187                track_stub!(TODO("https://fxbug.dev/322875244"), "PR_SET_SECUREBITS", arg2);
1188                errno!(ENOSYS)
1189            })?;
1190            creds.securebits = securebits;
1191            current_task.set_creds(creds);
1192            Ok(().into())
1193        }
1194        PR_CAPBSET_READ => {
1195            let cap = Capabilities::try_from(arg2)?;
1196            Ok(current_task.current_creds().cap_bounding.contains(cap).into())
1197        }
1198        PR_CAPBSET_DROP => {
1199            let mut creds = Credentials::clone(&current_task.current_creds());
1200            security::check_task_capable(current_task, CAP_SETPCAP)?;
1201
1202            creds.cap_bounding.remove(Capabilities::try_from(arg2)?);
1203            current_task.set_creds(creds);
1204            Ok(().into())
1205        }
1206        PR_CAP_AMBIENT => {
1207            let operation = arg2 as u32;
1208            let capability_arg = Capabilities::try_from(arg3)?;
1209            if arg4 != 0 || arg5 != 0 {
1210                return error!(EINVAL);
1211            }
1212
1213            // TODO(security): We don't currently validate capabilities, but this should return an
1214            // error if the capability_arg is invalid.
1215            match operation {
1216                PR_CAP_AMBIENT_RAISE => {
1217                    let mut creds = Credentials::clone(&current_task.current_creds());
1218                    if !(creds.cap_permitted.contains(capability_arg)
1219                        && creds.cap_inheritable.contains(capability_arg))
1220                    {
1221                        return error!(EPERM);
1222                    }
1223                    if creds.securebits.contains(SecureBits::NO_CAP_AMBIENT_RAISE)
1224                        || creds.securebits.contains(SecureBits::NO_CAP_AMBIENT_RAISE_LOCKED)
1225                    {
1226                        return error!(EPERM);
1227                    }
1228
1229                    creds.cap_ambient.insert(capability_arg);
1230                    current_task.set_creds(creds);
1231                    Ok(().into())
1232                }
1233                PR_CAP_AMBIENT_LOWER => {
1234                    let mut creds = Credentials::clone(&current_task.current_creds());
1235                    creds.cap_ambient.remove(capability_arg);
1236                    current_task.set_creds(creds);
1237                    Ok(().into())
1238                }
1239                PR_CAP_AMBIENT_IS_SET => {
1240                    Ok(current_task.current_creds().cap_ambient.contains(capability_arg).into())
1241                }
1242                PR_CAP_AMBIENT_CLEAR_ALL => {
1243                    if arg3 != 0 {
1244                        return error!(EINVAL);
1245                    }
1246
1247                    let mut creds = Credentials::clone(&current_task.current_creds());
1248                    creds.cap_ambient = Capabilities::empty();
1249                    current_task.set_creds(creds);
1250                    Ok(().into())
1251                }
1252                _ => error!(EINVAL),
1253            }
1254        }
1255        PR_SET_TIMERSLACK => {
1256            current_task.write().set_timerslack_ns(arg2);
1257            Ok(().into())
1258        }
1259        #[cfg(target_arch = "aarch64")]
1260        PR_GET_TAGGED_ADDR_CTRL => {
1261            track_stub!(TODO("https://fxbug.dev/408554469"), "PR_GET_TAGGED_ADDR_CTRL");
1262            Ok(0.into())
1263        }
1264        #[cfg(target_arch = "aarch64")]
1265        PR_SET_TAGGED_ADDR_CTRL => match u32::try_from(arg2).map_err(|_| errno!(EINVAL))? {
1266            // Only untagged pointers are allowed, the default.
1267            0 => Ok(().into()),
1268            PR_TAGGED_ADDR_ENABLE => {
1269                track_stub!(TODO("https://fxbug.dev/408554469"), "PR_TAGGED_ADDR_ENABLE");
1270                error!(EINVAL)
1271            }
1272            unknown_mode => {
1273                track_stub!(
1274                    TODO("https://fxbug.dev/408554469"),
1275                    "PR_SET_TAGGED_ADDR_CTRL unknown mode",
1276                    unknown_mode,
1277                );
1278                error!(EINVAL)
1279            }
1280        },
1281        _ => {
1282            track_stub!(TODO("https://fxbug.dev/322874733"), "prctl fallthrough", option);
1283            error!(ENOSYS)
1284        }
1285    }
1286}
1287
1288pub fn sys_ptrace(
1289    locked: &mut Locked<Unlocked>,
1290    current_task: &mut CurrentTask,
1291    request: u32,
1292    pid: pid_t,
1293    addr: UserAddress,
1294    data: UserAddress,
1295) -> Result<SyscallResult, Errno> {
1296    match request {
1297        PTRACE_TRACEME => ptrace_traceme(current_task),
1298        PTRACE_ATTACH => ptrace_attach(locked, current_task, pid, PtraceAttachType::Attach, data),
1299        PTRACE_SEIZE => ptrace_attach(locked, current_task, pid, PtraceAttachType::Seize, data),
1300        _ => ptrace_dispatch(locked, current_task, request, pid, addr, data),
1301    }
1302}
1303
1304pub fn sys_set_tid_address(
1305    _locked: &mut Locked<Unlocked>,
1306    current_task: &CurrentTask,
1307    user_tid: UserRef<pid_t>,
1308) -> Result<pid_t, Errno> {
1309    current_task.write().clear_child_tid = user_tid;
1310    Ok(current_task.get_tid())
1311}
1312
1313pub fn sys_getrusage(
1314    _locked: &mut Locked<Unlocked>,
1315    current_task: &CurrentTask,
1316    who: i32,
1317    user_usage: RUsagePtr,
1318) -> Result<(), Errno> {
1319    const RUSAGE_SELF: i32 = starnix_uapi::uapi::RUSAGE_SELF as i32;
1320    const RUSAGE_THREAD: i32 = starnix_uapi::uapi::RUSAGE_THREAD as i32;
1321    track_stub!(TODO("https://fxbug.dev/297370242"), "real rusage");
1322    let time_stats = match who {
1323        RUSAGE_CHILDREN => current_task.task.thread_group().read().children_time_stats,
1324        RUSAGE_SELF => current_task.task.thread_group().time_stats(),
1325        RUSAGE_THREAD => current_task.task.time_stats(),
1326        _ => return error!(EINVAL),
1327    };
1328
1329    let usage = rusage {
1330        ru_utime: timeval_from_duration(time_stats.user_time),
1331        ru_stime: timeval_from_duration(time_stats.system_time),
1332        ..rusage::default()
1333    };
1334    current_task.write_multi_arch_object(user_usage, usage)?;
1335
1336    Ok(())
1337}
1338
1339type PrLimitRef = MultiArchUserRef<uapi::rlimit, uapi::arch32::rlimit>;
1340
1341pub fn sys_getrlimit(
1342    locked: &mut Locked<Unlocked>,
1343    current_task: &CurrentTask,
1344    resource: u32,
1345    user_rlimit: PrLimitRef,
1346) -> Result<(), Errno> {
1347    do_prlimit64(locked, current_task, 0, resource, PrLimitRef::null(current_task), user_rlimit)
1348}
1349
1350pub fn sys_setrlimit(
1351    locked: &mut Locked<Unlocked>,
1352    current_task: &CurrentTask,
1353    resource: u32,
1354    user_rlimit: PrLimitRef,
1355) -> Result<(), Errno> {
1356    do_prlimit64(locked, current_task, 0, resource, user_rlimit, PrLimitRef::null(current_task))
1357}
1358
1359pub fn sys_prlimit64(
1360    locked: &mut Locked<Unlocked>,
1361    current_task: &CurrentTask,
1362    pid: pid_t,
1363    user_resource: u32,
1364    new_limit_ref: UserRef<uapi::rlimit>,
1365    old_limit_ref: UserRef<uapi::rlimit>,
1366) -> Result<(), Errno> {
1367    do_prlimit64::<uapi::rlimit>(
1368        locked,
1369        current_task,
1370        pid,
1371        user_resource,
1372        new_limit_ref.into(),
1373        old_limit_ref.into(),
1374    )
1375}
1376
1377pub fn do_prlimit64<T>(
1378    locked: &mut Locked<Unlocked>,
1379    current_task: &CurrentTask,
1380    pid: pid_t,
1381    user_resource: u32,
1382    new_limit_ref: MultiArchUserRef<uapi::rlimit, T>,
1383    old_limit_ref: MultiArchUserRef<uapi::rlimit, T>,
1384) -> Result<(), Errno>
1385where
1386    T: FromBytes + IntoBytes + Immutable + From<uapi::rlimit> + Into<uapi::rlimit>,
1387{
1388    let weak = get_task_or_current(current_task, pid);
1389    let target_task = Task::from_weak(&weak)?;
1390
1391    // To get or set the resource of a process other than itself, the caller must have either:
1392    // * the same `uid`, `euid`, `saved_uid`, `gid`, `egid`, `saved_gid` as the target.
1393    // * the CAP_SYS_RESOURCE
1394    if current_task.get_pid() != target_task.get_pid() {
1395        let self_creds = current_task.current_creds();
1396        let target_creds = target_task.real_creds();
1397        if self_creds.uid != target_creds.uid
1398            || self_creds.euid != target_creds.euid
1399            || self_creds.saved_uid != target_creds.saved_uid
1400            || self_creds.gid != target_creds.gid
1401            || self_creds.egid != target_creds.egid
1402            || self_creds.saved_gid != target_creds.saved_gid
1403        {
1404            security::check_task_capable(current_task, CAP_SYS_RESOURCE)?;
1405        }
1406        security::task_prlimit(
1407            current_task,
1408            &target_task,
1409            !old_limit_ref.is_null(),
1410            !new_limit_ref.is_null(),
1411        )?;
1412    }
1413
1414    let resource = Resource::from_raw(user_resource)?;
1415
1416    let old_limit = match resource {
1417        // TODO: Integrate Resource::STACK with generic ResourceLimits machinery.
1418        Resource::STACK => {
1419            if !new_limit_ref.is_null() {
1420                track_stub!(
1421                    TODO("https://fxbug.dev/322874791"),
1422                    "prlimit64 cannot set RLIMIT_STACK"
1423                );
1424            }
1425            // The stack size is fixed at the moment, but
1426            // if MAP_GROWSDOWN is implemented this should
1427            // report the limit that it can be grown.
1428            let mm = target_task.mm()?;
1429            let mm_state = mm.state.read();
1430            let stack_size = mm_state.stack_size as u64;
1431            rlimit { rlim_cur: stack_size, rlim_max: stack_size }
1432        }
1433        _ => {
1434            let new_limit = if new_limit_ref.is_null() {
1435                None
1436            } else {
1437                let new_limit = current_task.read_multi_arch_object(new_limit_ref)?;
1438                if new_limit.rlim_cur > new_limit.rlim_max {
1439                    return error!(EINVAL);
1440                }
1441                Some(new_limit)
1442            };
1443            ThreadGroup::adjust_rlimits(locked, current_task, &target_task, resource, new_limit)?
1444        }
1445    };
1446    if !old_limit_ref.is_null() {
1447        current_task.write_multi_arch_object(old_limit_ref, old_limit)?;
1448    }
1449    Ok(())
1450}
1451
1452pub fn sys_quotactl(
1453    _locked: &mut Locked<Unlocked>,
1454    _current_task: &CurrentTask,
1455    _cmd: i32,
1456    _special: UserRef<c_char>,
1457    _id: i32,
1458    _addr: UserRef<c_char>,
1459) -> Result<SyscallResult, Errno> {
1460    track_stub!(TODO("https://fxbug.dev/297302197"), "quotacl()");
1461    error!(ENOSYS)
1462}
1463
1464pub fn sys_capget(
1465    _locked: &mut Locked<Unlocked>,
1466    current_task: &CurrentTask,
1467    user_header: UserRef<__user_cap_header_struct>,
1468    user_data: UserRef<__user_cap_data_struct>,
1469) -> Result<(), Errno> {
1470    let mut header = current_task.read_object(user_header)?;
1471    let is_version_valid =
1472        [_LINUX_CAPABILITY_VERSION_1, _LINUX_CAPABILITY_VERSION_2, _LINUX_CAPABILITY_VERSION_3]
1473            .contains(&header.version);
1474    if !is_version_valid {
1475        header.version = _LINUX_CAPABILITY_VERSION_3;
1476        current_task.write_object(user_header, &header)?;
1477    }
1478    if user_data.is_null() {
1479        return Ok(());
1480    }
1481    if !is_version_valid || header.pid < 0 {
1482        return error!(EINVAL);
1483    }
1484
1485    let weak = get_task_or_current(current_task, header.pid);
1486    let target_task = Task::from_weak(&weak)?;
1487
1488    security::check_getcap_access(current_task, &target_task)?;
1489
1490    let (permitted, effective, inheritable) = {
1491        let creds = &target_task.real_creds();
1492        (creds.cap_permitted, creds.cap_effective, creds.cap_inheritable)
1493    };
1494
1495    match header.version {
1496        _LINUX_CAPABILITY_VERSION_1 => {
1497            let data: [__user_cap_data_struct; 1] = [__user_cap_data_struct {
1498                effective: effective.as_abi_v1(),
1499                inheritable: inheritable.as_abi_v1(),
1500                permitted: permitted.as_abi_v1(),
1501            }];
1502            current_task.write_objects(user_data, &data)?;
1503        }
1504        _LINUX_CAPABILITY_VERSION_2 | _LINUX_CAPABILITY_VERSION_3 => {
1505            // Return 64 bit capabilities as two sets of 32 bit capabilities, little endian
1506            let (permitted, effective, inheritable) =
1507                (permitted.as_abi_v3(), effective.as_abi_v3(), inheritable.as_abi_v3());
1508            let data: [__user_cap_data_struct; 2] = [
1509                __user_cap_data_struct {
1510                    effective: effective.0,
1511                    inheritable: inheritable.0,
1512                    permitted: permitted.0,
1513                },
1514                __user_cap_data_struct {
1515                    effective: effective.1,
1516                    inheritable: inheritable.1,
1517                    permitted: permitted.1,
1518                },
1519            ];
1520            current_task.write_objects(user_data, &data)?;
1521        }
1522        _ => {
1523            unreachable!("already returned if Linux capability version is not valid")
1524        }
1525    }
1526    Ok(())
1527}
1528
1529pub fn sys_capset(
1530    _locked: &mut Locked<Unlocked>,
1531    current_task: &CurrentTask,
1532    user_header: UserRef<__user_cap_header_struct>,
1533    user_data: UserRef<__user_cap_data_struct>,
1534) -> Result<(), Errno> {
1535    let mut header = current_task.read_object(user_header)?;
1536    let is_version_valid =
1537        [_LINUX_CAPABILITY_VERSION_1, _LINUX_CAPABILITY_VERSION_2, _LINUX_CAPABILITY_VERSION_3]
1538            .contains(&header.version);
1539    if !is_version_valid {
1540        header.version = _LINUX_CAPABILITY_VERSION_3;
1541        current_task.write_object(user_header, &header)?;
1542        return error!(EINVAL);
1543    }
1544    if header.pid != 0 && header.pid != current_task.tid {
1545        return error!(EPERM);
1546    }
1547
1548    let (new_permitted, new_effective, new_inheritable) = match header.version {
1549        _LINUX_CAPABILITY_VERSION_1 => {
1550            let data = current_task.read_object(user_data)?;
1551            (
1552                Capabilities::from_abi_v1(data.permitted),
1553                Capabilities::from_abi_v1(data.effective),
1554                Capabilities::from_abi_v1(data.inheritable),
1555            )
1556        }
1557        _LINUX_CAPABILITY_VERSION_2 | _LINUX_CAPABILITY_VERSION_3 => {
1558            let data =
1559                current_task.read_objects_to_array::<__user_cap_data_struct, 2>(user_data)?;
1560            (
1561                Capabilities::from_abi_v3((data[0].permitted, data[1].permitted)),
1562                Capabilities::from_abi_v3((data[0].effective, data[1].effective)),
1563                Capabilities::from_abi_v3((data[0].inheritable, data[1].inheritable)),
1564            )
1565        }
1566        _ => {
1567            unreachable!("already returned if Linux capability version is not valid")
1568        }
1569    };
1570
1571    // Permission checks. Copied out of TLPI section 39.7.
1572    let mut creds = Credentials::clone(&current_task.current_creds());
1573    {
1574        log_trace!(
1575            "Capabilities({{permitted={:?} from {:?}, effective={:?} from {:?}, inheritable={:?} from {:?}}}, bounding={:?})",
1576            new_permitted,
1577            creds.cap_permitted,
1578            new_effective,
1579            creds.cap_effective,
1580            new_inheritable,
1581            creds.cap_inheritable,
1582            creds.cap_bounding
1583        );
1584        if !creds.cap_inheritable.union(creds.cap_permitted).contains(new_inheritable) {
1585            security::check_task_capable(current_task, CAP_SETPCAP)?;
1586        }
1587
1588        if !creds.cap_inheritable.union(creds.cap_bounding).contains(new_inheritable) {
1589            return error!(EPERM);
1590        }
1591        if !creds.cap_permitted.contains(new_permitted) {
1592            return error!(EPERM);
1593        }
1594        if !new_permitted.contains(new_effective) {
1595            return error!(EPERM);
1596        }
1597    }
1598    let weak = get_task_or_current(current_task, header.pid);
1599    let target_task = Task::from_weak(&weak)?;
1600
1601    security::check_setcap_access(current_task, &target_task)?;
1602
1603    creds.cap_permitted = new_permitted;
1604    creds.cap_effective = new_effective;
1605    creds.cap_inheritable = new_inheritable;
1606    creds.cap_ambient = new_permitted & new_inheritable & creds.cap_ambient;
1607    current_task.set_creds(creds);
1608    Ok(())
1609}
1610
1611pub fn sys_seccomp(
1612    locked: &mut Locked<Unlocked>,
1613    current_task: &mut CurrentTask,
1614    operation: u32,
1615    flags: u32,
1616    args: UserAddress,
1617) -> Result<SyscallResult, Errno> {
1618    match operation {
1619        SECCOMP_SET_MODE_STRICT => {
1620            if flags != 0 || args != UserAddress::NULL {
1621                return error!(EINVAL);
1622            }
1623            current_task.set_seccomp_state(SeccompStateValue::Strict)?;
1624            Ok(().into())
1625        }
1626        SECCOMP_SET_MODE_FILTER => {
1627            if flags
1628                & (SECCOMP_FILTER_FLAG_LOG
1629                    | SECCOMP_FILTER_FLAG_NEW_LISTENER
1630                    | SECCOMP_FILTER_FLAG_SPEC_ALLOW
1631                    | SECCOMP_FILTER_FLAG_TSYNC
1632                    | SECCOMP_FILTER_FLAG_TSYNC_ESRCH)
1633                != flags
1634            {
1635                return error!(EINVAL);
1636            }
1637            if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER != 0)
1638                && (flags & SECCOMP_FILTER_FLAG_TSYNC != 0)
1639                && (flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH == 0)
1640            {
1641                return error!(EINVAL);
1642            }
1643            let fprog =
1644                current_task.read_multi_arch_object(SockFProgPtr::new(current_task, args))?;
1645            if fprog.len > BPF_MAXINSNS || fprog.len == 0 {
1646                return error!(EINVAL);
1647            }
1648            let code: Vec<sock_filter> =
1649                current_task.read_multi_arch_objects_to_vec(fprog.filter, fprog.len as usize)?;
1650
1651            if !current_task.read().no_new_privs() {
1652                security::check_task_capable(current_task, CAP_SYS_ADMIN)
1653                    .map_err(|_| errno!(EACCES))?;
1654            }
1655            current_task.add_seccomp_filter(locked, code, flags)
1656        }
1657        SECCOMP_GET_ACTION_AVAIL => {
1658            if flags != 0 || args.is_null() {
1659                return error!(EINVAL);
1660            }
1661            let action: u32 = current_task.read_object(UserRef::new(args))?;
1662            SeccompAction::is_action_available(action)
1663        }
1664        SECCOMP_GET_NOTIF_SIZES => {
1665            if flags != 0 {
1666                return error!(EINVAL);
1667            }
1668            track_stub!(TODO("https://fxbug.dev/322874791"), "SECCOMP_GET_NOTIF_SIZES");
1669            error!(ENOSYS)
1670        }
1671        _ => {
1672            track_stub!(TODO("https://fxbug.dev/322874916"), "seccomp fallthrough", operation);
1673            error!(EINVAL)
1674        }
1675    }
1676}
1677
1678pub fn sys_setgroups(
1679    _locked: &mut Locked<Unlocked>,
1680    current_task: &CurrentTask,
1681    size: usize,
1682    groups_addr: UserAddress,
1683) -> Result<(), Errno> {
1684    if size > NGROUPS_MAX as usize {
1685        return error!(EINVAL);
1686    }
1687    let groups = current_task.read_objects_to_vec::<gid_t>(groups_addr.into(), size)?;
1688    security::check_task_capable(current_task, CAP_SETGID)?;
1689    let mut creds = Credentials::clone(&current_task.current_creds());
1690    creds.groups = groups;
1691    current_task.set_creds(creds);
1692    Ok(())
1693}
1694
1695pub fn sys_getgroups(
1696    _locked: &mut Locked<Unlocked>,
1697    current_task: &CurrentTask,
1698    size: usize,
1699    groups_addr: UserAddress,
1700) -> Result<usize, Errno> {
1701    if size > NGROUPS_MAX as usize {
1702        return error!(EINVAL);
1703    }
1704    let creds = current_task.current_creds();
1705    if size != 0 {
1706        if size < creds.groups.len() {
1707            return error!(EINVAL);
1708        }
1709        current_task.write_memory(groups_addr, creds.groups.as_slice().as_bytes())?;
1710    }
1711    Ok(creds.groups.len())
1712}
1713
1714pub fn sys_setsid(
1715    locked: &mut Locked<Unlocked>,
1716    current_task: &CurrentTask,
1717) -> Result<pid_t, Errno> {
1718    current_task.thread_group().setsid(locked)?;
1719    Ok(current_task.get_pid())
1720}
1721
1722// Note the asymmetry with sys_setpriority: this returns "kernel nice" which ranges
1723// from 1 (weakest) to 40 (strongest). (It is part of Linux history that this syscall
1724// deals with niceness but has "priority" in its name.)
1725pub fn sys_getpriority(
1726    _locked: &mut Locked<Unlocked>,
1727    current_task: &CurrentTask,
1728    which: u32,
1729    who: i32,
1730) -> Result<u8, Errno> {
1731    match which {
1732        PRIO_PROCESS => {}
1733        // TODO: https://fxbug.dev/287121196 - support PRIO_PGRP and PRIO_USER?
1734        _ => return error!(EINVAL),
1735    }
1736    track_stub!(TODO("https://fxbug.dev/322893809"), "getpriority permissions");
1737    let weak = get_task_or_current(current_task, who);
1738    let target_task = Task::from_weak(&weak)?;
1739    let state = target_task.read();
1740    Ok(state.scheduler_state.normal_priority.raw_priority())
1741}
1742
1743// Note the asymmetry with sys_getpriority: this call's `priority` parameter is a
1744// "user nice" which ranges from -20 (strongest) to 19 (weakest) (other values can be
1745// passed and are clamped to that range and interpretation). (It is part of Linux
1746// history that this syscall deals with niceness but has "priority" in its name.)
1747pub fn sys_setpriority(
1748    locked: &mut Locked<Unlocked>,
1749    current_task: &CurrentTask,
1750    which: u32,
1751    who: i32,
1752    priority: i32,
1753) -> Result<(), Errno> {
1754    // Parse & validate the arguments.
1755    match which {
1756        PRIO_PROCESS => {}
1757        // TODO: https://fxbug.dev/287121196 - support PRIO_PGRP and PRIO_USER?
1758        _ => return error!(EINVAL),
1759    }
1760
1761    let weak = get_task_or_current(current_task, who);
1762    let target_task = Task::from_weak(&weak)?;
1763
1764    let normal_priority = NormalPriority::from_setpriority_syscall(priority);
1765
1766    // TODO: https://fxbug.dev/425143440 - we probably want to improve the locking here.
1767    let current_state = target_task.read().scheduler_state;
1768
1769    // Check capabilities and permissions, if required, for the operation.
1770    let euid_friendly = current_task.is_euid_friendly_with(&target_task);
1771    let strengthening = current_state.normal_priority < normal_priority;
1772    let rlimited = strengthening
1773        && normal_priority.exceeds(target_task.thread_group().get_rlimit(locked, Resource::NICE));
1774    if !euid_friendly {
1775        security::check_task_capable(current_task, CAP_SYS_NICE)?;
1776    } else if rlimited {
1777        security::check_task_capable(current_task, CAP_SYS_NICE).map_err(|_| errno!(EACCES))?;
1778    }
1779
1780    security::check_setsched_access(current_task, &target_task)?;
1781
1782    // Apply the new scheduler configuration to the task.
1783    target_task.set_scheduler_nice(normal_priority)?;
1784
1785    Ok(())
1786}
1787
1788pub fn sys_setns(
1789    _locked: &mut Locked<Unlocked>,
1790    current_task: &CurrentTask,
1791    ns_fd: FdNumber,
1792    ns_type: c_int,
1793) -> Result<(), Errno> {
1794    let file_handle = current_task.get_file(ns_fd)?;
1795
1796    // From man pages this is not quite right because some namespace types require more capabilities
1797    // or require this capability in multiple namespaces, but it should cover our current test
1798    // cases and we can make this more nuanced once more namespace types are supported.
1799    security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1800
1801    if let Some(mount_ns) = file_handle.downcast_file::<MountNamespaceFile>() {
1802        if !(ns_type == 0 || ns_type == CLONE_NEWNS as i32) {
1803            log_trace!("invalid type");
1804            return error!(EINVAL);
1805        }
1806
1807        track_stub!(TODO("https://fxbug.dev/297312091"), "setns CLONE_FS limitations");
1808        current_task.fs().set_namespace(mount_ns.0.clone())?;
1809        return Ok(());
1810    }
1811
1812    if let Some(_pidfd) = file_handle.downcast_file::<PidFdFileObject>() {
1813        track_stub!(TODO("https://fxbug.dev/297312844"), "setns w/ pidfd");
1814        return error!(ENOSYS);
1815    }
1816
1817    track_stub!(TODO("https://fxbug.dev/322893829"), "unknown ns file for setns, see logs");
1818    log_info!("ns_fd was not a supported namespace file: {}", file_handle.ops_type_name());
1819    error!(EINVAL)
1820}
1821
1822pub fn sys_unshare(
1823    _locked: &mut Locked<Unlocked>,
1824    current_task: &CurrentTask,
1825    flags: u32,
1826) -> Result<(), Errno> {
1827    const IMPLEMENTED_FLAGS: u32 = CLONE_FILES | CLONE_FS | CLONE_NEWNS | CLONE_NEWUTS;
1828    if flags & !IMPLEMENTED_FLAGS != 0 {
1829        track_stub!(TODO("https://fxbug.dev/322893372"), "unshare", flags & !IMPLEMENTED_FLAGS);
1830        return error!(EINVAL);
1831    }
1832
1833    if (flags & CLONE_FILES) != 0 {
1834        current_task.live().files.unshare();
1835    }
1836
1837    if (flags & CLONE_FS) != 0 {
1838        current_task.unshare_fs();
1839    }
1840
1841    if (flags & CLONE_NEWNS) != 0 {
1842        security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1843        current_task.fs().unshare_namespace();
1844    }
1845
1846    if (flags & CLONE_NEWUTS) != 0 {
1847        security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1848        // Fork the UTS namespace.
1849        let mut task_state = current_task.write();
1850        let new_uts_ns = task_state.uts_ns.read().clone();
1851        task_state.uts_ns = Arc::new(RwLock::new(new_uts_ns));
1852    }
1853
1854    Ok(())
1855}
1856
1857pub fn sys_swapon(
1858    locked: &mut Locked<Unlocked>,
1859    current_task: &CurrentTask,
1860    user_path: UserCString,
1861    _flags: i32,
1862) -> Result<(), Errno> {
1863    const MAX_SWAPFILES: usize = 32; // See https://man7.org/linux/man-pages/man2/swapon.2.html
1864
1865    security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1866
1867    track_stub!(TODO("https://fxbug.dev/322893905"), "swapon validate flags");
1868
1869    let path = current_task.read_path(user_path)?;
1870    let file = current_task.open_file(locked, path.as_ref(), OpenFlags::RDWR)?;
1871
1872    let node = file.node();
1873    let mode = node.info().mode;
1874    if !mode.is_reg() && !mode.is_blk() {
1875        return error!(EINVAL);
1876    }
1877
1878    // We determined this magic number by using the mkswap tool and the file tool. The mkswap tool
1879    // populates a few bytes in the file, including a UUID, which can be replaced with zeros while
1880    // still being recognized by the file tool. This string appears at a fixed offset
1881    // (MAGIC_OFFSET) in the file, which looks quite like a magic number.
1882    const MAGIC_OFFSET: usize = 0xff6;
1883    let swap_magic = b"SWAPSPACE2";
1884    let mut buffer = VecOutputBuffer::new(swap_magic.len());
1885    if file.read_at(locked, current_task, MAGIC_OFFSET, &mut buffer)? != swap_magic.len()
1886        || buffer.data() != swap_magic
1887    {
1888        return error!(EINVAL);
1889    }
1890
1891    let mut swap_files = current_task.kernel().swap_files.lock(locked);
1892    for swap_node in swap_files.iter() {
1893        if Arc::ptr_eq(swap_node, node) {
1894            return error!(EBUSY);
1895        }
1896    }
1897    if swap_files.len() >= MAX_SWAPFILES {
1898        return error!(EPERM);
1899    }
1900    swap_files.push(node.clone());
1901    Ok(())
1902}
1903
1904pub fn sys_swapoff(
1905    locked: &mut Locked<Unlocked>,
1906    current_task: &CurrentTask,
1907    user_path: UserCString,
1908) -> Result<(), Errno> {
1909    security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1910
1911    let path = current_task.read_path(user_path)?;
1912    let file = current_task.open_file(locked, path.as_ref(), OpenFlags::RDWR)?;
1913    let node = file.node();
1914
1915    let mut swap_files = current_task.kernel().swap_files.lock(locked);
1916    let original_length = swap_files.len();
1917    swap_files.retain(|swap_node| !Arc::ptr_eq(swap_node, node));
1918    if swap_files.len() == original_length {
1919        return error!(EINVAL);
1920    }
1921    Ok(())
1922}
1923
1924#[derive(Default, Debug, IntoBytes, KnownLayout, FromBytes, Immutable)]
1925#[repr(C)]
1926struct KcmpParams {
1927    mask: usize,
1928    shuffle: usize,
1929}
1930
1931static KCMP_PARAMS: LazyLock<KcmpParams> = LazyLock::new(|| {
1932    let mut params = KcmpParams::default();
1933    starnix_crypto::cprng_draw(params.as_mut_bytes());
1934    // Ensure the shuffle is odd so that multiplying a usize by this value is a permutation.
1935    params.shuffle |= 1;
1936    params
1937});
1938
1939fn obfuscate_value(value: usize) -> usize {
1940    let KcmpParams { mask, shuffle } = *KCMP_PARAMS;
1941    (value ^ mask).wrapping_mul(shuffle)
1942}
1943
1944fn obfuscate_ptr<T>(ptr: *const T) -> usize {
1945    obfuscate_value(ptr as usize)
1946}
1947
1948fn obfuscate_arc<T>(arc: &Arc<T>) -> usize {
1949    obfuscate_ptr(Arc::as_ptr(arc))
1950}
1951
1952pub fn sys_kcmp(
1953    locked: &mut Locked<Unlocked>,
1954    current_task: &CurrentTask,
1955    pid1: pid_t,
1956    pid2: pid_t,
1957    resource_type: u32,
1958    index1: u64,
1959    index2: u64,
1960) -> Result<u32, Errno> {
1961    let weak1 = current_task.get_task(pid1);
1962    let weak2 = current_task.get_task(pid2);
1963    let task1 = Task::from_weak(&weak1)?;
1964    let task2 = Task::from_weak(&weak2)?;
1965
1966    current_task.check_ptrace_access_mode(locked, PTRACE_MODE_READ_REALCREDS, &task1)?;
1967    current_task.check_ptrace_access_mode(locked, PTRACE_MODE_READ_REALCREDS, &task2)?;
1968
1969    let resource_type = KcmpResource::from_raw(resource_type)?;
1970
1971    // Output encoding (see <https://man7.org/linux/man-pages/man2/kcmp.2.html>):
1972    //
1973    //   0  v1 is equal to v2; in other words, the two processes share the resource.
1974    //   1  v1 is less than v2.
1975    //   2  v1 is greater than v2.
1976    //   3  v1 is not equal to v2, but ordering information is unavailable.
1977    //
1978    fn encode_ordering(value: cmp::Ordering) -> u32 {
1979        match value {
1980            cmp::Ordering::Equal => 0,
1981            cmp::Ordering::Less => 1,
1982            cmp::Ordering::Greater => 2,
1983        }
1984    }
1985
1986    match resource_type {
1987        KcmpResource::FILE => {
1988            fn get_file(task: &Task, index: u64) -> Result<FileHandle, Errno> {
1989                // TODO: Test whether O_PATH is allowed here. Conceptually, seems like
1990                //       O_PATH should be allowed, but we haven't tested it yet.
1991                task.live()?.files.get_allowing_opath(FdNumber::from_raw(
1992                    index.try_into().map_err(|_| errno!(EBADF))?,
1993                ))
1994            }
1995            let file1 = get_file(&task1, index1)?;
1996            let file2 = get_file(&task2, index2)?;
1997            Ok(encode_ordering(obfuscate_arc(&file1).cmp(&obfuscate_arc(&file2))))
1998        }
1999        KcmpResource::FILES => {
2000            let files1 = task1.live()?.files.id();
2001            let files2 = task2.live()?.files.id();
2002            Ok(encode_ordering(obfuscate_value(files1.raw()).cmp(&obfuscate_value(files2.raw()))))
2003        }
2004        KcmpResource::FS => {
2005            let fs1 = task1.live()?.fs();
2006            let fs2 = task2.live()?.fs();
2007            Ok(encode_ordering(obfuscate_arc(&fs1).cmp(&obfuscate_arc(&fs2))))
2008        }
2009        KcmpResource::SIGHAND => Ok(encode_ordering(
2010            obfuscate_arc(&task1.thread_group().signal_actions)
2011                .cmp(&obfuscate_arc(&task2.thread_group().signal_actions)),
2012        )),
2013        KcmpResource::VM => {
2014            Ok(encode_ordering(obfuscate_arc(&task1.mm()?).cmp(&obfuscate_arc(&task2.mm()?))))
2015        }
2016        _ => error!(EINVAL),
2017    }
2018}
2019
2020pub fn sys_syslog(
2021    locked: &mut Locked<Unlocked>,
2022    current_task: &CurrentTask,
2023    action_type: i32,
2024    address: UserAddress,
2025    length: i32,
2026) -> Result<i32, Errno> {
2027    let action = SyslogAction::try_from(action_type)?;
2028    let syslog =
2029        current_task.kernel().syslog.access(&current_task, SyslogAccess::Syscall(action))?;
2030    match action {
2031        SyslogAction::Read => {
2032            if address.is_null() || length < 0 {
2033                return error!(EINVAL);
2034            }
2035            let mut output_buffer =
2036                UserBuffersOutputBuffer::unified_new_at(current_task, address, length as usize)?;
2037            syslog.blocking_read(locked, current_task, &mut output_buffer)
2038        }
2039        SyslogAction::ReadAll => {
2040            if address.is_null() || length < 0 {
2041                return error!(EINVAL);
2042            }
2043            let mut output_buffer =
2044                UserBuffersOutputBuffer::unified_new_at(current_task, address, length as usize)?;
2045            syslog.read_all(current_task, &mut output_buffer)
2046        }
2047        SyslogAction::SizeUnread => syslog.size_unread(),
2048        SyslogAction::SizeBuffer => syslog.size_buffer(),
2049        SyslogAction::Close | SyslogAction::Open => Ok(0),
2050        SyslogAction::ReadClear => {
2051            track_stub!(TODO("https://fxbug.dev/322894145"), "syslog: read clear");
2052            Ok(0)
2053        }
2054        SyslogAction::Clear => {
2055            track_stub!(TODO("https://fxbug.dev/322893673"), "syslog: clear");
2056            Ok(0)
2057        }
2058        SyslogAction::ConsoleOff => {
2059            track_stub!(TODO("https://fxbug.dev/322894399"), "syslog: console off");
2060            Ok(0)
2061        }
2062        SyslogAction::ConsoleOn => {
2063            track_stub!(TODO("https://fxbug.dev/322894106"), "syslog: console on");
2064            Ok(0)
2065        }
2066        SyslogAction::ConsoleLevel => {
2067            if length <= 0 || length >= 8 {
2068                return error!(EINVAL);
2069            }
2070            track_stub!(TODO("https://fxbug.dev/322894199"), "syslog: console level");
2071            Ok(0)
2072        }
2073    }
2074}
2075
2076pub fn sys_vhangup(
2077    _locked: &mut Locked<Unlocked>,
2078    current_task: &CurrentTask,
2079) -> Result<(), Errno> {
2080    security::check_task_capable(current_task, CAP_SYS_TTY_CONFIG)?;
2081    track_stub!(TODO("https://fxbug.dev/324079257"), "vhangup");
2082    Ok(())
2083}
2084
2085// Syscalls for arch32 usage
2086#[cfg(target_arch = "aarch64")]
2087mod arch32 {
2088    pub use super::{
2089        sys_execve as sys_arch32_execve, sys_getegid as sys_arch32_getegid32,
2090        sys_geteuid as sys_arch32_geteuid32, sys_getgid as sys_arch32_getgid32,
2091        sys_getgroups as sys_arch32_getgroups32, sys_getpgid as sys_arch32_getpgid,
2092        sys_getppid as sys_arch32_getppid, sys_getpriority as sys_arch32_getpriority,
2093        sys_getresgid as sys_arch32_getresgid32, sys_getresuid as sys_arch32_getresuid32,
2094        sys_getrlimit as sys_arch32_ugetrlimit, sys_getrusage as sys_arch32_getrusage,
2095        sys_getuid as sys_arch32_getuid32, sys_ioprio_set as sys_arch32_ioprio_set,
2096        sys_ptrace as sys_arch32_ptrace, sys_quotactl as sys_arch32_quotactl,
2097        sys_sched_get_priority_max as sys_arch32_sched_get_priority_max,
2098        sys_sched_get_priority_min as sys_arch32_sched_get_priority_min,
2099        sys_sched_getaffinity as sys_arch32_sched_getaffinity,
2100        sys_sched_getparam as sys_arch32_sched_getparam,
2101        sys_sched_setaffinity as sys_arch32_sched_setaffinity,
2102        sys_sched_setparam as sys_arch32_sched_setparam,
2103        sys_sched_setscheduler as sys_arch32_sched_setscheduler, sys_seccomp as sys_arch32_seccomp,
2104        sys_setfsuid as sys_arch32_setfsuid, sys_setfsuid as sys_arch32_setfsuid32,
2105        sys_setgid as sys_arch32_setgid32, sys_setgroups as sys_arch32_setgroups32,
2106        sys_setns as sys_arch32_setns, sys_setpgid as sys_arch32_setpgid,
2107        sys_setpriority as sys_arch32_setpriority, sys_setregid as sys_arch32_setregid32,
2108        sys_setresgid as sys_arch32_setresgid32, sys_setresuid as sys_arch32_setresuid32,
2109        sys_setreuid as sys_arch32_setreuid32, sys_setreuid as sys_arch32_setreuid,
2110        sys_setrlimit as sys_arch32_setrlimit, sys_setsid as sys_arch32_setsid,
2111        sys_syslog as sys_arch32_syslog, sys_unshare as sys_arch32_unshare,
2112    };
2113}
2114
2115#[cfg(target_arch = "aarch64")]
2116pub use arch32::*;
2117
2118#[cfg(test)]
2119mod tests {
2120    use super::*;
2121    use crate::mm::syscalls::sys_munmap;
2122    use crate::testing::{AutoReleasableTask, map_memory, spawn_kernel_and_run};
2123    use starnix_syscalls::SUCCESS;
2124    use starnix_task_command::TaskCommand;
2125    use starnix_uapi::auth::Credentials;
2126    use starnix_uapi::{SCHED_FIFO, SCHED_NORMAL};
2127    use std::ffi::CString;
2128
2129    #[::fuchsia::test]
2130    async fn test_prctl_set_vma_anon_name() {
2131        spawn_kernel_and_run(async |locked, current_task| {
2132            let mapped_address =
2133                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2134            let name_addr = (mapped_address + 128u64).unwrap();
2135            let name = "test-name\0";
2136            current_task.write_memory(name_addr, name.as_bytes()).expect("failed to write name");
2137            sys_prctl(
2138                locked,
2139                current_task,
2140                PR_SET_VMA,
2141                PR_SET_VMA_ANON_NAME as u64,
2142                mapped_address.ptr() as u64,
2143                32,
2144                name_addr.ptr() as u64,
2145            )
2146            .expect("failed to set name");
2147            assert_eq!(
2148                "test-name",
2149                current_task
2150                    .mm()
2151                    .unwrap()
2152                    .get_mapping_name((mapped_address + 24u64).unwrap())
2153                    .expect("failed to get address")
2154                    .unwrap()
2155                    .to_string(),
2156            );
2157
2158            sys_munmap(locked, &current_task, mapped_address, *PAGE_SIZE as usize)
2159                .expect("failed to unmap memory");
2160            assert_eq!(
2161                error!(EFAULT),
2162                current_task.mm().unwrap().get_mapping_name((mapped_address + 24u64).unwrap())
2163            );
2164        })
2165        .await;
2166    }
2167
2168    #[::fuchsia::test]
2169    async fn test_set_vma_name_special_chars() {
2170        spawn_kernel_and_run(async |locked, current_task| {
2171            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2172
2173            let mapping_addr =
2174                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2175
2176            for c in 1..255 {
2177                let vma_name = CString::new([c]).unwrap();
2178                current_task.write_memory(name_addr, vma_name.as_bytes_with_nul()).unwrap();
2179
2180                let result = sys_prctl(
2181                    locked,
2182                    current_task,
2183                    PR_SET_VMA,
2184                    PR_SET_VMA_ANON_NAME as u64,
2185                    mapping_addr.ptr() as u64,
2186                    *PAGE_SIZE,
2187                    name_addr.ptr() as u64,
2188                );
2189
2190                if c > 0x1f
2191                    && c < 0x7f
2192                    && c != b'\\'
2193                    && c != b'`'
2194                    && c != b'$'
2195                    && c != b'['
2196                    && c != b']'
2197                {
2198                    assert_eq!(result, Ok(SUCCESS));
2199                } else {
2200                    assert_eq!(result, error!(EINVAL));
2201                }
2202            }
2203        })
2204        .await;
2205    }
2206
2207    #[::fuchsia::test]
2208    async fn test_set_vma_name_long() {
2209        spawn_kernel_and_run(async |locked, current_task| {
2210            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2211
2212            let mapping_addr =
2213                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2214
2215            let name_too_long = CString::new(vec![b'a'; 256]).unwrap();
2216
2217            current_task.write_memory(name_addr, name_too_long.as_bytes_with_nul()).unwrap();
2218
2219            assert_eq!(
2220                sys_prctl(
2221                    locked,
2222                    current_task,
2223                    PR_SET_VMA,
2224                    PR_SET_VMA_ANON_NAME as u64,
2225                    mapping_addr.ptr() as u64,
2226                    *PAGE_SIZE,
2227                    name_addr.ptr() as u64,
2228                ),
2229                error!(EINVAL)
2230            );
2231
2232            let name_just_long_enough = CString::new(vec![b'a'; 255]).unwrap();
2233
2234            current_task
2235                .write_memory(name_addr, name_just_long_enough.as_bytes_with_nul())
2236                .unwrap();
2237
2238            assert_eq!(
2239                sys_prctl(
2240                    locked,
2241                    current_task,
2242                    PR_SET_VMA,
2243                    PR_SET_VMA_ANON_NAME as u64,
2244                    mapping_addr.ptr() as u64,
2245                    *PAGE_SIZE,
2246                    name_addr.ptr() as u64,
2247                ),
2248                Ok(SUCCESS)
2249            );
2250        })
2251        .await;
2252    }
2253
2254    #[::fuchsia::test]
2255    async fn test_set_vma_name_misaligned() {
2256        spawn_kernel_and_run(async |locked, current_task| {
2257            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2258
2259            let mapping_addr =
2260                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2261
2262            let name = CString::new("name").unwrap();
2263            current_task.write_memory(name_addr, name.as_bytes_with_nul()).unwrap();
2264
2265            // Passing a misaligned pointer to the start of the named region fails.
2266            assert_eq!(
2267                sys_prctl(
2268                    locked,
2269                    current_task,
2270                    PR_SET_VMA,
2271                    PR_SET_VMA_ANON_NAME as u64,
2272                    1 + mapping_addr.ptr() as u64,
2273                    *PAGE_SIZE - 1,
2274                    name_addr.ptr() as u64,
2275                ),
2276                error!(EINVAL)
2277            );
2278
2279            // Passing an unaligned length does work, however.
2280            assert_eq!(
2281                sys_prctl(
2282                    locked,
2283                    current_task,
2284                    PR_SET_VMA,
2285                    PR_SET_VMA_ANON_NAME as u64,
2286                    mapping_addr.ptr() as u64,
2287                    *PAGE_SIZE - 1,
2288                    name_addr.ptr() as u64,
2289                ),
2290                Ok(SUCCESS)
2291            );
2292        })
2293        .await;
2294    }
2295
2296    #[::fuchsia::test]
2297    async fn test_prctl_get_set_dumpable() {
2298        spawn_kernel_and_run(async |locked, current_task| {
2299            sys_prctl(locked, current_task, PR_GET_DUMPABLE, 0, 0, 0, 0)
2300                .expect("failed to get dumpable");
2301
2302            sys_prctl(locked, current_task, PR_SET_DUMPABLE, 1, 0, 0, 0)
2303                .expect("failed to set dumpable");
2304            sys_prctl(locked, current_task, PR_GET_DUMPABLE, 0, 0, 0, 0)
2305                .expect("failed to get dumpable");
2306
2307            // SUID_DUMP_ROOT not supported.
2308            sys_prctl(locked, current_task, PR_SET_DUMPABLE, 2, 0, 0, 0)
2309                .expect("failed to set dumpable");
2310            sys_prctl(locked, current_task, PR_GET_DUMPABLE, 0, 0, 0, 0)
2311                .expect("failed to get dumpable");
2312        })
2313        .await;
2314    }
2315
2316    #[::fuchsia::test]
2317    async fn test_sys_getsid() {
2318        spawn_kernel_and_run(async |locked, current_task| {
2319            let kernel = current_task.kernel();
2320            assert_eq!(
2321                current_task.get_tid(),
2322                sys_getsid(locked, &current_task, 0).expect("failed to get sid")
2323            );
2324
2325            let second_task = crate::execution::create_init_child_process(
2326                locked,
2327                &kernel.weak_self.upgrade().unwrap(),
2328                TaskCommand::new(b"second task"),
2329                Credentials::with_ids(0, 0),
2330                None,
2331            )
2332            .expect("failed to create second task");
2333            let second_current = AutoReleasableTask::from(second_task);
2334
2335            assert_eq!(
2336                second_current.get_tid(),
2337                sys_getsid(locked, &current_task, second_current.get_tid())
2338                    .expect("failed to get sid")
2339            );
2340        })
2341        .await;
2342    }
2343
2344    #[::fuchsia::test]
2345    async fn test_get_affinity_size() {
2346        spawn_kernel_and_run(async |locked, current_task| {
2347            let mapped_address =
2348                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2349            let pid = current_task.get_pid();
2350            assert_eq!(
2351                sys_sched_getaffinity(locked, &current_task, pid, 16, mapped_address),
2352                Ok(16)
2353            );
2354            assert_eq!(
2355                sys_sched_getaffinity(locked, &current_task, pid, 1024, mapped_address),
2356                Ok(std::mem::size_of::<CpuSet>())
2357            );
2358            assert_eq!(
2359                sys_sched_getaffinity(locked, &current_task, pid, 1, mapped_address),
2360                error!(EINVAL)
2361            );
2362            assert_eq!(
2363                sys_sched_getaffinity(locked, &current_task, pid, 9, mapped_address),
2364                error!(EINVAL)
2365            );
2366        })
2367        .await;
2368    }
2369
2370    #[::fuchsia::test]
2371    async fn test_set_affinity_size() {
2372        spawn_kernel_and_run(async |locked, current_task| {
2373            let mapped_address =
2374                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2375            current_task.write_memory(mapped_address, &[0xffu8]).expect("failed to cpumask");
2376            let pid = current_task.get_pid();
2377            assert_eq!(
2378                sys_sched_setaffinity(
2379                    locked,
2380                    &current_task,
2381                    pid,
2382                    *PAGE_SIZE as u32,
2383                    mapped_address
2384                ),
2385                Ok(())
2386            );
2387            assert_eq!(
2388                sys_sched_setaffinity(locked, &current_task, pid, 1, mapped_address),
2389                error!(EINVAL)
2390            );
2391        })
2392        .await;
2393    }
2394
2395    #[::fuchsia::test]
2396    async fn test_task_name() {
2397        spawn_kernel_and_run(async |locked, current_task| {
2398            let mapped_address =
2399                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2400            let name = "my-task-name\0";
2401            current_task
2402                .write_memory(mapped_address, name.as_bytes())
2403                .expect("failed to write name");
2404
2405            let result =
2406                sys_prctl(locked, current_task, PR_SET_NAME, mapped_address.ptr() as u64, 0, 0, 0)
2407                    .unwrap();
2408            assert_eq!(SUCCESS, result);
2409
2410            let mapped_address =
2411                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2412            let result =
2413                sys_prctl(locked, current_task, PR_GET_NAME, mapped_address.ptr() as u64, 0, 0, 0)
2414                    .unwrap();
2415            assert_eq!(SUCCESS, result);
2416
2417            let name_length = name.len();
2418
2419            let out_name = current_task.read_memory_to_vec(mapped_address, name_length).unwrap();
2420            assert_eq!(name.as_bytes(), &out_name);
2421        })
2422        .await;
2423    }
2424
2425    #[::fuchsia::test]
2426    async fn test_sched_get_priority_min_max() {
2427        spawn_kernel_and_run(async |locked, current_task| {
2428            let non_rt_min =
2429                sys_sched_get_priority_min(locked, &current_task, SCHED_NORMAL).unwrap();
2430            assert_eq!(non_rt_min, 0);
2431            let non_rt_max =
2432                sys_sched_get_priority_max(locked, &current_task, SCHED_NORMAL).unwrap();
2433            assert_eq!(non_rt_max, 0);
2434
2435            let rt_min = sys_sched_get_priority_min(locked, &current_task, SCHED_FIFO).unwrap();
2436            assert_eq!(rt_min, 1);
2437            let rt_max = sys_sched_get_priority_max(locked, &current_task, SCHED_FIFO).unwrap();
2438            assert_eq!(rt_max, 99);
2439
2440            let min_bad_policy_error =
2441                sys_sched_get_priority_min(locked, &current_task, std::u32::MAX).unwrap_err();
2442            assert_eq!(min_bad_policy_error, errno!(EINVAL));
2443
2444            let max_bad_policy_error =
2445                sys_sched_get_priority_max(locked, &current_task, std::u32::MAX).unwrap_err();
2446            assert_eq!(max_bad_policy_error, errno!(EINVAL));
2447        })
2448        .await;
2449    }
2450
2451    #[::fuchsia::test]
2452    async fn test_sched_setscheduler() {
2453        spawn_kernel_and_run(async |locked, current_task| {
2454            current_task
2455                .thread_group()
2456                .limits
2457                .lock(locked)
2458                .set(Resource::RTPRIO, rlimit { rlim_cur: 255, rlim_max: 255 });
2459
2460            let scheduler = sys_sched_getscheduler(locked, &current_task, 0).unwrap();
2461            assert_eq!(scheduler, SCHED_NORMAL, "tasks should have normal scheduler by default");
2462
2463            let mapped_address =
2464                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2465            let requested_params = sched_param { sched_priority: 15 };
2466            current_task.write_object(mapped_address.into(), &requested_params).unwrap();
2467
2468            sys_sched_setscheduler(locked, &current_task, 0, SCHED_FIFO, mapped_address.into())
2469                .unwrap();
2470
2471            let new_scheduler = sys_sched_getscheduler(locked, &current_task, 0).unwrap();
2472            assert_eq!(new_scheduler, SCHED_FIFO, "task should have been assigned fifo scheduler");
2473
2474            let mapped_address =
2475                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2476            sys_sched_getparam(locked, &current_task, 0, mapped_address.into())
2477                .expect("sched_getparam");
2478            let param_value: sched_param =
2479                current_task.read_object(mapped_address.into()).expect("read_object");
2480            assert_eq!(param_value.sched_priority, 15);
2481        })
2482        .await;
2483    }
2484
2485    #[::fuchsia::test]
2486    async fn test_sched_getparam() {
2487        spawn_kernel_and_run(async |locked, current_task| {
2488            let mapped_address =
2489                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2490            sys_sched_getparam(locked, &current_task, 0, mapped_address.into())
2491                .expect("sched_getparam");
2492            let param_value: sched_param =
2493                current_task.read_object(mapped_address.into()).expect("read_object");
2494            assert_eq!(param_value.sched_priority, 0);
2495        })
2496        .await;
2497    }
2498
2499    #[::fuchsia::test]
2500    async fn test_setuid() {
2501        spawn_kernel_and_run(async |locked, current_task| {
2502            // Test for root.
2503            current_task.set_creds(Credentials::with_ids(0, 0));
2504            sys_setuid(locked, &current_task, 42).expect("setuid");
2505            let mut creds = Credentials::clone(&current_task.current_creds());
2506            assert_eq!(creds.euid, 42);
2507            assert_eq!(creds.uid, 42);
2508            assert_eq!(creds.saved_uid, 42);
2509
2510            // Remove the CAP_SETUID capability to avoid overwriting permission checks.
2511            creds.cap_effective.remove(CAP_SETUID);
2512            current_task.set_creds(creds);
2513
2514            // Test for non root, which task now is.
2515            assert_eq!(sys_setuid(locked, &current_task, 0), error!(EPERM));
2516            assert_eq!(sys_setuid(locked, &current_task, 43), error!(EPERM));
2517
2518            sys_setuid(locked, &current_task, 42).expect("setuid");
2519            assert_eq!(current_task.current_creds().euid, 42);
2520            assert_eq!(current_task.current_creds().uid, 42);
2521            assert_eq!(current_task.current_creds().saved_uid, 42);
2522
2523            // Change uid and saved_uid, and check that one can set the euid to these.
2524            let mut creds = Credentials::clone(&current_task.current_creds());
2525            creds.uid = 41;
2526            creds.euid = 42;
2527            creds.saved_uid = 43;
2528            current_task.set_creds(creds);
2529
2530            sys_setuid(locked, &current_task, 41).expect("setuid");
2531            assert_eq!(current_task.current_creds().euid, 41);
2532            assert_eq!(current_task.current_creds().uid, 41);
2533            assert_eq!(current_task.current_creds().saved_uid, 43);
2534
2535            let mut creds = Credentials::clone(&current_task.current_creds());
2536            creds.uid = 41;
2537            creds.euid = 42;
2538            creds.saved_uid = 43;
2539            current_task.set_creds(creds);
2540
2541            sys_setuid(locked, &current_task, 43).expect("setuid");
2542            assert_eq!(current_task.current_creds().euid, 43);
2543            assert_eq!(current_task.current_creds().uid, 41);
2544            assert_eq!(current_task.current_creds().saved_uid, 43);
2545        })
2546        .await;
2547    }
2548
2549    #[::fuchsia::test]
2550    async fn test_read_c_string_vector() {
2551        spawn_kernel_and_run(async |locked, current_task| {
2552            let arg_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2553            let arg = b"test-arg\0";
2554            current_task.write_memory(arg_addr, arg).expect("failed to write test arg");
2555            let arg_usercstr = UserCString::new(current_task, arg_addr);
2556            let null_usercstr = UserCString::null(current_task);
2557
2558            let argv_addr = UserCStringPtr::new(
2559                current_task,
2560                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE),
2561            );
2562            current_task
2563                .write_multi_arch_ptr(argv_addr.addr(), arg_usercstr)
2564                .expect("failed to write UserCString");
2565            current_task
2566                .write_multi_arch_ptr(argv_addr.next().unwrap().addr(), null_usercstr)
2567                .expect("failed to write UserCString");
2568
2569            // The arguments size limit should include the null terminator.
2570            assert!(read_c_string_vector(&current_task, argv_addr, 100, arg.len()).is_ok());
2571            assert_eq!(
2572                read_c_string_vector(
2573                    &current_task,
2574                    argv_addr,
2575                    100,
2576                    std::str::from_utf8(arg).unwrap().trim_matches('\0').len()
2577                ),
2578                error!(E2BIG)
2579            );
2580        })
2581        .await;
2582    }
2583}