starnix_core/task/
syscalls.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::execution::execute_task;
6use crate::mm::{DumpPolicy, MemoryAccessor, MemoryAccessorExt, PAGE_SIZE};
7use crate::ptrace::{
8    PR_SET_PTRACER_ANY, PtraceAllowedPtracers, PtraceAttachType, PtraceOptions, ptrace_attach,
9    ptrace_dispatch, ptrace_traceme,
10};
11use crate::security;
12use crate::signals::syscalls::RUsagePtr;
13use crate::task::{
14    CurrentTask, ExitStatus, NormalPriority, SchedulingPolicy, SeccompAction, SeccompStateValue,
15    SyslogAccess, Task, ThreadGroup, max_priority_for_sched_policy, min_priority_for_sched_policy,
16};
17use crate::vfs::{
18    FdNumber, FileHandle, MountNamespaceFile, PidFdFileObject, UserBuffersOutputBuffer,
19    VecOutputBuffer,
20};
21use starnix_logging::{log_error, log_info, log_trace, track_stub};
22use starnix_sync::{Locked, RwLock, Unlocked};
23use starnix_syscalls::SyscallResult;
24use starnix_task_command::TaskCommand;
25use starnix_types::ownership::WeakRef;
26use starnix_types::time::timeval_from_duration;
27use starnix_uapi::auth::{
28    CAP_SETGID, CAP_SETPCAP, CAP_SETUID, CAP_SYS_ADMIN, CAP_SYS_NICE, CAP_SYS_RESOURCE,
29    CAP_SYS_TTY_CONFIG, Capabilities, Credentials, PTRACE_MODE_READ_REALCREDS, SecureBits,
30};
31use starnix_uapi::errors::{ENAMETOOLONG, Errno};
32use starnix_uapi::file_mode::{Access, AccessCheck, FileMode};
33use starnix_uapi::kcmp::KcmpResource;
34use starnix_uapi::open_flags::OpenFlags;
35use starnix_uapi::resource_limits::Resource;
36use starnix_uapi::signals::{Signal, UncheckedSignal};
37use starnix_uapi::syslog::SyslogAction;
38use starnix_uapi::user_address::{
39    ArchSpecific, MappingMultiArchUserRef, MultiArchUserRef, UserAddress, UserCString,
40    UserCStringPtr, UserRef,
41};
42use starnix_uapi::vfs::ResolveFlags;
43use starnix_uapi::{
44    __user_cap_data_struct, __user_cap_header_struct, _LINUX_CAPABILITY_VERSION_1,
45    _LINUX_CAPABILITY_VERSION_2, _LINUX_CAPABILITY_VERSION_3, AT_EMPTY_PATH, AT_SYMLINK_NOFOLLOW,
46    BPF_MAXINSNS, CLONE_ARGS_SIZE_VER0, CLONE_ARGS_SIZE_VER1, CLONE_ARGS_SIZE_VER2, CLONE_FILES,
47    CLONE_FS, CLONE_NEWNS, CLONE_NEWUTS, CLONE_SETTLS, CLONE_VFORK, NGROUPS_MAX, PR_CAP_AMBIENT,
48    PR_CAP_AMBIENT_CLEAR_ALL, PR_CAP_AMBIENT_IS_SET, PR_CAP_AMBIENT_LOWER, PR_CAP_AMBIENT_RAISE,
49    PR_CAPBSET_DROP, PR_CAPBSET_READ, PR_GET_CHILD_SUBREAPER, PR_GET_DUMPABLE, PR_GET_KEEPCAPS,
50    PR_GET_NAME, PR_GET_NO_NEW_PRIVS, PR_GET_SECCOMP, PR_GET_SECUREBITS, PR_SET_CHILD_SUBREAPER,
51    PR_SET_DUMPABLE, PR_SET_KEEPCAPS, PR_SET_NAME, PR_SET_NO_NEW_PRIVS, PR_SET_PDEATHSIG,
52    PR_SET_PTRACER, PR_SET_SECCOMP, PR_SET_SECUREBITS, PR_SET_TIMERSLACK, PR_SET_VMA,
53    PR_SET_VMA_ANON_NAME, PRIO_PROCESS, PTRACE_ATTACH, PTRACE_SEIZE, PTRACE_TRACEME,
54    RUSAGE_CHILDREN, SCHED_RESET_ON_FORK, SECCOMP_FILTER_FLAG_LOG,
55    SECCOMP_FILTER_FLAG_NEW_LISTENER, SECCOMP_FILTER_FLAG_SPEC_ALLOW, SECCOMP_FILTER_FLAG_TSYNC,
56    SECCOMP_FILTER_FLAG_TSYNC_ESRCH, SECCOMP_GET_ACTION_AVAIL, SECCOMP_GET_NOTIF_SIZES,
57    SECCOMP_MODE_FILTER, SECCOMP_MODE_STRICT, SECCOMP_SET_MODE_FILTER, SECCOMP_SET_MODE_STRICT,
58    c_char, c_int, clone_args, errno, error, gid_t, pid_t, rlimit, rusage, sched_param,
59    sock_filter, uapi, uid_t,
60};
61use static_assertions::const_assert;
62use std::cmp;
63use std::ffi::CString;
64use std::sync::{Arc, LazyLock};
65use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout};
66
67#[cfg(target_arch = "aarch64")]
68use starnix_uapi::{PR_GET_TAGGED_ADDR_CTRL, PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE};
69
70pub type SockFProgPtr =
71    MappingMultiArchUserRef<SockFProg, uapi::sock_fprog, uapi::arch32::sock_fprog>;
72pub type SockFilterPtr = MultiArchUserRef<uapi::sock_filter, uapi::arch32::sock_filter>;
73
74pub struct SockFProg {
75    pub len: u32,
76    pub filter: SockFilterPtr,
77}
78
79uapi::arch_map_data! {
80    BidiTryFrom<SockFProg, sock_fprog> {
81        len = len;
82        filter = filter;
83    }
84}
85
86uapi::check_arch_independent_layout! {
87    sched_param {
88        sched_priority,
89    }
90}
91
92pub fn do_clone(
93    locked: &mut Locked<Unlocked>,
94    current_task: &mut CurrentTask,
95    args: &clone_args,
96) -> Result<pid_t, Errno> {
97    security::check_task_create_access(current_task)?;
98
99    let child_exit_signal = if args.exit_signal == 0 {
100        None
101    } else {
102        Some(Signal::try_from(UncheckedSignal::new(args.exit_signal))?)
103    };
104
105    let mut new_task = current_task.clone_task(
106        locked,
107        args.flags,
108        child_exit_signal,
109        UserRef::<pid_t>::new(UserAddress::from(args.parent_tid)),
110        UserRef::<pid_t>::new(UserAddress::from(args.child_tid)),
111        UserRef::<FdNumber>::new(UserAddress::from(args.pidfd)),
112    )?;
113
114    // Set the result register to 0 for the return value from clone in the
115    // cloned process.
116    new_task.thread_state.registers.set_return_register(0);
117    let (trace_kind, ptrace_state) = current_task.get_ptrace_core_state_for_clone(args);
118
119    if args.stack != 0 {
120        // In clone() the `stack` argument points to the top of the stack, while in clone3()
121        // `stack` points to the bottom of the stack. Therefore, in clone3() we need to add
122        // `stack_size` to calculate the stack pointer. Note that in clone() `stack_size` is 0.
123        new_task
124            .thread_state
125            .registers
126            .set_stack_pointer_register(args.stack.wrapping_add(args.stack_size));
127    }
128
129    if args.flags & (CLONE_SETTLS as u64) != 0 {
130        new_task.thread_state.registers.set_thread_pointer_register(args.tls);
131    }
132
133    let tid = new_task.task.tid;
134    let task_ref = WeakRef::from(&new_task.task);
135    execute_task(locked, new_task, |_, _| Ok(()), |_| {}, ptrace_state)?;
136
137    current_task.ptrace_event(locked, trace_kind, tid as u64);
138
139    if args.flags & (CLONE_VFORK as u64) != 0 {
140        current_task.wait_for_execve(task_ref)?;
141        current_task.ptrace_event(locked, PtraceOptions::TRACEVFORKDONE, tid as u64);
142    }
143
144    Ok(tid)
145}
146
147pub fn sys_clone3(
148    locked: &mut Locked<Unlocked>,
149    current_task: &mut CurrentTask,
150    user_clone_args: UserRef<clone_args>,
151    user_clone_args_size: usize,
152) -> Result<pid_t, Errno> {
153    // Only these specific sized versions are supported.
154    if !(user_clone_args_size == CLONE_ARGS_SIZE_VER0 as usize
155        || user_clone_args_size == CLONE_ARGS_SIZE_VER1 as usize
156        || user_clone_args_size == CLONE_ARGS_SIZE_VER2 as usize)
157    {
158        return error!(EINVAL);
159    }
160
161    // The most recent version of the struct size should match our definition.
162    const_assert!(std::mem::size_of::<clone_args>() == CLONE_ARGS_SIZE_VER2 as usize);
163
164    let clone_args = current_task.read_object_partial(user_clone_args, user_clone_args_size)?;
165    do_clone(locked, current_task, &clone_args)
166}
167
168fn read_c_string_vector(
169    mm: &CurrentTask,
170    user_vector: UserCStringPtr,
171    elem_limit: usize,
172    vec_limit: usize,
173) -> Result<(Vec<CString>, usize), Errno> {
174    let mut user_current = user_vector;
175    let mut vector: Vec<CString> = vec![];
176    let mut vec_size: usize = 0;
177    loop {
178        let user_string = mm.read_multi_arch_ptr(user_current)?;
179        if user_string.is_null() {
180            break;
181        }
182        let string = mm
183            .read_c_string_to_vec(user_string, elem_limit)
184            .map_err(|e| if e.code == ENAMETOOLONG { errno!(E2BIG) } else { e })?;
185        let cstring = CString::new(string).map_err(|_| errno!(EINVAL))?;
186        vec_size =
187            vec_size.checked_add(cstring.as_bytes_with_nul().len()).ok_or_else(|| errno!(E2BIG))?;
188        if vec_size > vec_limit {
189            return error!(E2BIG);
190        }
191        vector.push(cstring);
192        user_current = user_current.next()?;
193    }
194    Ok((vector, vec_size))
195}
196
197pub fn sys_execve(
198    locked: &mut Locked<Unlocked>,
199    current_task: &mut CurrentTask,
200    user_path: UserCString,
201    user_argv: UserCStringPtr,
202    user_environ: UserCStringPtr,
203) -> Result<(), Errno> {
204    sys_execveat(locked, current_task, FdNumber::AT_FDCWD, user_path, user_argv, user_environ, 0)
205}
206
207pub fn sys_execveat(
208    locked: &mut Locked<Unlocked>,
209    current_task: &mut CurrentTask,
210    dir_fd: FdNumber,
211    user_path: UserCString,
212    user_argv: UserCStringPtr,
213    user_environ: UserCStringPtr,
214    flags: u32,
215) -> Result<(), Errno> {
216    if flags & !(AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW) != 0 {
217        return error!(EINVAL);
218    }
219
220    // Calculate the limit for argv and environ size as 1/4 of the stack size, floored at 32 pages.
221    // See the Limits sections in https://man7.org/linux/man-pages/man2/execve.2.html
222    const PAGE_LIMIT: usize = 32;
223    let page_limit_size: usize = PAGE_LIMIT * *PAGE_SIZE as usize;
224    let rlimit = current_task.thread_group().get_rlimit(locked, Resource::STACK);
225    let stack_limit = rlimit / 4;
226    let argv_env_limit = cmp::max(page_limit_size, stack_limit as usize);
227
228    // The limit per argument or environment variable is 32 pages.
229    // See the Limits sections in https://man7.org/linux/man-pages/man2/execve.2.html
230    let (argv, argv_size) = if user_argv.is_null() {
231        (Vec::new(), 0)
232    } else {
233        read_c_string_vector(current_task, user_argv, page_limit_size, argv_env_limit)?
234    };
235
236    let (environ, _) = if user_environ.is_null() {
237        (Vec::new(), 0)
238    } else {
239        read_c_string_vector(
240            current_task,
241            user_environ,
242            page_limit_size,
243            argv_env_limit - argv_size,
244        )?
245    };
246
247    let path = &current_task.read_path(user_path)?;
248
249    log_trace!(argv:?, environ:?, flags:?; "execveat({dir_fd}, {path})");
250
251    let mut open_flags = OpenFlags::RDONLY;
252
253    if flags & AT_SYMLINK_NOFOLLOW != 0 {
254        open_flags |= OpenFlags::NOFOLLOW;
255    }
256
257    let executable = if path.is_empty() {
258        if flags & AT_EMPTY_PATH == 0 {
259            // If AT_EMPTY_PATH is not set, this is an error.
260            return error!(ENOENT);
261        }
262
263        // O_PATH allowed for:
264        //
265        //   Passing the file descriptor as the dirfd argument of
266        //   openat() and the other "*at()" system calls.  This
267        //   includes linkat(2) with AT_EMPTY_PATH (or via procfs
268        //   using AT_SYMLINK_FOLLOW) even if the file is not a
269        //   directory.
270        //
271        // See https://man7.org/linux/man-pages/man2/open.2.html
272        let file = current_task.files.get_allowing_opath(dir_fd)?;
273
274        // We are forced to reopen the file with O_RDONLY to get access to the underlying VMO.
275        // Note that skip the access check in the arguments in case the file mode does
276        // not actually have the read permission bit.
277        //
278        // This can happen because a file could have --x--x--x mode permissions and then
279        // be opened with O_PATH. Internally, the file operations would all be stubbed out
280        // for that file, which is undesirable here.
281        //
282        // See https://man7.org/linux/man-pages/man3/fexecve.3.html#DESCRIPTION
283        file.name.open(
284            locked,
285            current_task,
286            OpenFlags::RDONLY,
287            AccessCheck::check_for(Access::EXEC),
288        )?
289    } else {
290        current_task.open_file_at(
291            locked,
292            dir_fd,
293            path.as_ref(),
294            open_flags,
295            FileMode::default(),
296            ResolveFlags::empty(),
297            AccessCheck::check_for(Access::EXEC),
298        )?
299    };
300
301    // This path can affect script resolution (the path is appended to the script args)
302    // and the auxiliary value `AT_EXECFN` from the syscall `getauxval()`
303    let path = if dir_fd == FdNumber::AT_FDCWD {
304        // The file descriptor is CWD, so the path is exactly
305        // what the user specified.
306        path.to_vec()
307    } else {
308        // The path is `/dev/fd/N/P` where N is the file descriptor
309        // number and P is the user-provided path (if relative and non-empty).
310        //
311        // See https://man7.org/linux/man-pages/man2/execveat.2.html#NOTES
312        match path.first() {
313            Some(b'/') => {
314                // The user-provided path is absolute, so dir_fd is ignored.
315                path.to_vec()
316            }
317            Some(_) => {
318                // User-provided path is relative, append it.
319                let mut new_path = format!("/dev/fd/{}/", dir_fd.raw()).into_bytes();
320                new_path.append(&mut path.to_vec());
321                new_path
322            }
323            // User-provided path is empty
324            None => format!("/dev/fd/{}", dir_fd.raw()).into_bytes(),
325        }
326    };
327
328    let path = CString::new(path).map_err(|_| errno!(EINVAL))?;
329
330    current_task.exec(locked, executable, path, argv, environ)?;
331    Ok(())
332}
333
334pub fn sys_getcpu(
335    _locked: &mut Locked<Unlocked>,
336    current_task: &CurrentTask,
337    cpu_out: UserRef<u32>,
338    node_out: UserRef<u32>,
339) -> Result<(), Errno> {
340    // "When either cpu or node is NULL nothing is written to the respective pointer."
341    // from https://man7.org/linux/man-pages/man2/getcpu.2.html
342    if !cpu_out.is_null() {
343        let thread_stats = current_task
344            .thread
345            .read()
346            .as_ref()
347            .expect("current thread is never None when executing")
348            .stats()
349            .map_err(|e| errno!(EINVAL, format!("getting thread stats failed {e:?}")))?;
350        current_task.write_object(cpu_out, &thread_stats.last_scheduled_cpu)?;
351    }
352    if !node_out.is_null() {
353        // Zircon does not yet have a concept of NUMA task scheduling, always tell userspace that
354        // it's on the "first" node which should be true for non-NUMA systems.
355        track_stub!(TODO("https://fxbug.dev/325643815"), "getcpu() numa node");
356        current_task.write_object(node_out, &0)?;
357    }
358    Ok(())
359}
360
361pub fn sys_getpid(
362    _locked: &mut Locked<Unlocked>,
363    current_task: &CurrentTask,
364) -> Result<pid_t, Errno> {
365    Ok(current_task.get_pid())
366}
367
368pub fn sys_gettid(
369    _locked: &mut Locked<Unlocked>,
370    current_task: &CurrentTask,
371) -> Result<pid_t, Errno> {
372    Ok(current_task.get_tid())
373}
374
375pub fn sys_getppid(
376    _locked: &mut Locked<Unlocked>,
377    current_task: &CurrentTask,
378) -> Result<pid_t, Errno> {
379    Ok(current_task.thread_group().read().get_ppid())
380}
381
382fn get_task_or_current(current_task: &CurrentTask, pid: pid_t) -> WeakRef<Task> {
383    if pid == 0 { current_task.weak_task() } else { current_task.get_task(pid) }
384}
385
386pub fn sys_getsid(
387    _locked: &mut Locked<Unlocked>,
388    current_task: &CurrentTask,
389    pid: pid_t,
390) -> Result<pid_t, Errno> {
391    let weak = get_task_or_current(current_task, pid);
392    let target_task = Task::from_weak(&weak)?;
393    security::check_task_getsid(current_task, &target_task)?;
394    let sid = target_task.thread_group().read().process_group.session.leader;
395    Ok(sid)
396}
397
398pub fn sys_getpgid(
399    _locked: &mut Locked<Unlocked>,
400    current_task: &CurrentTask,
401    pid: pid_t,
402) -> Result<pid_t, Errno> {
403    let weak = get_task_or_current(current_task, pid);
404    let task = Task::from_weak(&weak)?;
405
406    security::check_getpgid_access(current_task, &task)?;
407    let pgid = task.thread_group().read().process_group.leader;
408    Ok(pgid)
409}
410
411pub fn sys_setpgid(
412    locked: &mut Locked<Unlocked>,
413    current_task: &CurrentTask,
414    pid: pid_t,
415    pgid: pid_t,
416) -> Result<(), Errno> {
417    let weak = get_task_or_current(current_task, pid);
418    let task = Task::from_weak(&weak)?;
419
420    current_task.thread_group().setpgid(locked, current_task, &task, pgid)?;
421    Ok(())
422}
423
424impl CurrentTask {
425    /// Returns true if the `current_task`'s effective user ID (EUID) is the same as the
426    /// EUID or UID of the `target_task`. We describe this as the current task being
427    /// "EUID-friendly" to the target and it enables actions to be performed that would
428    /// otherwise require additional privileges.
429    ///
430    /// See "The caller needs an effective user ID equal to the real user ID or effective
431    /// user ID of the [target]" at sched_setaffinity(2), comparable language at
432    /// setpriority(2), more ambiguous language at sched_setscheduler(2), and no
433    /// particular specification at sched_setparam(2).
434    fn is_euid_friendly_with(&self, target_task: &Task) -> bool {
435        let self_creds = self.current_creds();
436        let target_creds = target_task.real_creds();
437        self_creds.euid == target_creds.uid || self_creds.euid == target_creds.euid
438    }
439}
440
441// A non-root process is allowed to set any of its three uids to the value of any other. The
442// CAP_SETUID capability bypasses these checks and allows setting any uid to any integer. Likewise
443// for gids.
444fn new_uid_allowed(current_task: &CurrentTask, uid: uid_t) -> bool {
445    let current_creds = current_task.current_creds();
446    uid == current_creds.uid
447        || uid == current_creds.euid
448        || uid == current_creds.saved_uid
449        || security::is_task_capable_noaudit(current_task, CAP_SETUID)
450}
451
452fn new_gid_allowed(current_task: &CurrentTask, gid: gid_t) -> bool {
453    let current_creds = current_task.current_creds();
454    gid == current_creds.gid
455        || gid == current_creds.egid
456        || gid == current_creds.saved_gid
457        || security::is_task_capable_noaudit(current_task, CAP_SETGID)
458}
459
460pub fn sys_getuid(
461    _locked: &mut Locked<Unlocked>,
462    current_task: &CurrentTask,
463) -> Result<uid_t, Errno> {
464    Ok(current_task.current_creds().uid)
465}
466
467pub fn sys_getgid(
468    _locked: &mut Locked<Unlocked>,
469    current_task: &CurrentTask,
470) -> Result<gid_t, Errno> {
471    Ok(current_task.current_creds().gid)
472}
473
474pub fn sys_setuid(
475    _locked: &mut Locked<Unlocked>,
476    current_task: &CurrentTask,
477    uid: uid_t,
478) -> Result<(), Errno> {
479    if uid == gid_t::MAX {
480        return error!(EINVAL);
481    }
482    if !new_uid_allowed(&current_task, uid) {
483        return error!(EPERM);
484    }
485
486    let mut creds = Credentials::clone(&current_task.current_creds());
487    let prev = creds.copy_user_credentials();
488    creds.euid = uid;
489    creds.fsuid = uid;
490    if security::is_task_capable_noaudit(current_task, CAP_SETUID) {
491        creds.uid = uid;
492        creds.saved_uid = uid;
493    }
494
495    creds.update_capabilities(prev);
496    current_task.set_creds(creds);
497    Ok(())
498}
499
500pub fn sys_setgid(
501    _locked: &mut Locked<Unlocked>,
502    current_task: &CurrentTask,
503    gid: gid_t,
504) -> Result<(), Errno> {
505    if gid == gid_t::MAX {
506        return error!(EINVAL);
507    }
508    if !new_gid_allowed(&current_task, gid) {
509        return error!(EPERM);
510    }
511
512    let mut creds = Credentials::clone(&current_task.current_creds());
513    creds.egid = gid;
514    creds.fsgid = gid;
515    if security::is_task_capable_noaudit(current_task, CAP_SETGID) {
516        creds.gid = gid;
517        creds.saved_gid = gid;
518    }
519    current_task.set_creds(creds);
520    Ok(())
521}
522
523pub fn sys_geteuid(
524    _locked: &mut Locked<Unlocked>,
525    current_task: &CurrentTask,
526) -> Result<uid_t, Errno> {
527    Ok(current_task.current_creds().euid)
528}
529
530pub fn sys_getegid(
531    _locked: &mut Locked<Unlocked>,
532    current_task: &CurrentTask,
533) -> Result<gid_t, Errno> {
534    Ok(current_task.current_creds().egid)
535}
536
537pub fn sys_setfsuid(
538    _locked: &mut Locked<Unlocked>,
539    current_task: &CurrentTask,
540    fsuid: uid_t,
541) -> Result<uid_t, Errno> {
542    let mut creds = Credentials::clone(&current_task.current_creds());
543    let prev = creds.copy_user_credentials();
544    if fsuid != u32::MAX && new_uid_allowed(&current_task, fsuid) {
545        creds.fsuid = fsuid;
546        creds.update_capabilities(prev);
547        current_task.set_creds(creds);
548    }
549
550    Ok(prev.fsuid)
551}
552
553pub fn sys_setfsgid(
554    _locked: &mut Locked<Unlocked>,
555    current_task: &CurrentTask,
556    fsgid: gid_t,
557) -> Result<gid_t, Errno> {
558    let mut creds = Credentials::clone(&current_task.current_creds());
559    let prev = creds.copy_user_credentials();
560    let prev_fsgid = creds.fsgid;
561
562    if fsgid != u32::MAX && new_gid_allowed(&current_task, fsgid) {
563        creds.fsgid = fsgid;
564        creds.update_capabilities(prev);
565        current_task.set_creds(creds);
566    }
567
568    Ok(prev_fsgid)
569}
570
571pub fn sys_getresuid(
572    _locked: &mut Locked<Unlocked>,
573    current_task: &CurrentTask,
574    ruid_addr: UserRef<uid_t>,
575    euid_addr: UserRef<uid_t>,
576    suid_addr: UserRef<uid_t>,
577) -> Result<(), Errno> {
578    let creds = current_task.current_creds();
579    current_task.write_object(ruid_addr, &creds.uid)?;
580    current_task.write_object(euid_addr, &creds.euid)?;
581    current_task.write_object(suid_addr, &creds.saved_uid)?;
582    Ok(())
583}
584
585pub fn sys_getresgid(
586    _locked: &mut Locked<Unlocked>,
587    current_task: &CurrentTask,
588    rgid_addr: UserRef<gid_t>,
589    egid_addr: UserRef<gid_t>,
590    sgid_addr: UserRef<gid_t>,
591) -> Result<(), Errno> {
592    let creds = current_task.current_creds();
593    current_task.write_object(rgid_addr, &creds.gid)?;
594    current_task.write_object(egid_addr, &creds.egid)?;
595    current_task.write_object(sgid_addr, &creds.saved_gid)?;
596    Ok(())
597}
598
599pub fn sys_setreuid(
600    _locked: &mut Locked<Unlocked>,
601    current_task: &CurrentTask,
602    ruid: uid_t,
603    euid: uid_t,
604) -> Result<(), Errno> {
605    let allowed = |uid| uid == u32::MAX || new_uid_allowed(&current_task, uid);
606    if !allowed(ruid) || !allowed(euid) {
607        return error!(EPERM);
608    }
609
610    let mut creds = Credentials::clone(&current_task.current_creds());
611    let prev = creds.copy_user_credentials();
612    let mut is_ruid_set = false;
613    if ruid != u32::MAX {
614        creds.uid = ruid;
615        is_ruid_set = true;
616    }
617    if euid != u32::MAX {
618        creds.euid = euid;
619        creds.fsuid = euid;
620    }
621
622    if is_ruid_set || prev.uid != euid {
623        creds.saved_uid = creds.euid;
624    }
625
626    creds.update_capabilities(prev);
627    current_task.set_creds(creds);
628    Ok(())
629}
630
631pub fn sys_setregid(
632    _locked: &mut Locked<Unlocked>,
633    current_task: &CurrentTask,
634    rgid: gid_t,
635    egid: gid_t,
636) -> Result<(), Errno> {
637    let allowed = |gid| gid == u32::MAX || new_gid_allowed(&current_task, gid);
638    if !allowed(rgid) || !allowed(egid) {
639        return error!(EPERM);
640    }
641
642    let mut creds = Credentials::clone(&current_task.current_creds());
643    let previous_rgid = creds.gid;
644    let mut is_rgid_set = false;
645    if rgid != u32::MAX {
646        creds.gid = rgid;
647        is_rgid_set = true;
648    }
649    if egid != u32::MAX {
650        creds.egid = egid;
651        creds.fsgid = egid;
652    }
653
654    if is_rgid_set || previous_rgid != egid {
655        creds.saved_gid = creds.egid;
656    }
657
658    current_task.set_creds(creds);
659    Ok(())
660}
661
662pub fn sys_setresuid(
663    _locked: &mut Locked<Unlocked>,
664    current_task: &CurrentTask,
665    ruid: uid_t,
666    euid: uid_t,
667    suid: uid_t,
668) -> Result<(), Errno> {
669    let allowed = |uid| uid == u32::MAX || new_uid_allowed(&current_task, uid);
670    if !allowed(ruid) || !allowed(euid) || !allowed(suid) {
671        return error!(EPERM);
672    }
673
674    let mut creds = Credentials::clone(&current_task.current_creds());
675    let prev = creds.copy_user_credentials();
676    if ruid != u32::MAX {
677        creds.uid = ruid;
678    }
679    if euid != u32::MAX {
680        creds.euid = euid;
681        creds.fsuid = euid;
682    }
683    if suid != u32::MAX {
684        creds.saved_uid = suid;
685    }
686    creds.update_capabilities(prev);
687    current_task.set_creds(creds);
688    Ok(())
689}
690
691pub fn sys_setresgid(
692    _locked: &mut Locked<Unlocked>,
693    current_task: &CurrentTask,
694    rgid: gid_t,
695    egid: gid_t,
696    sgid: gid_t,
697) -> Result<(), Errno> {
698    let allowed = |gid| gid == u32::MAX || new_gid_allowed(&current_task, gid);
699    if !allowed(rgid) || !allowed(egid) || !allowed(sgid) {
700        return error!(EPERM);
701    }
702
703    let mut creds = Credentials::clone(&current_task.current_creds());
704    if rgid != u32::MAX {
705        creds.gid = rgid;
706    }
707    if egid != u32::MAX {
708        creds.egid = egid;
709        creds.fsgid = egid;
710    }
711    if sgid != u32::MAX {
712        creds.saved_gid = sgid;
713    }
714    current_task.set_creds(creds);
715    Ok(())
716}
717
718pub fn sys_exit(
719    _locked: &mut Locked<Unlocked>,
720    current_task: &CurrentTask,
721    code: i32,
722) -> Result<(), Errno> {
723    // Only change the current exit status if this has not been already set by exit_group, as
724    // otherwise it has priority.
725    current_task.write().set_exit_status_if_not_already(ExitStatus::Exit(code as u8));
726    Ok(())
727}
728
729pub fn sys_exit_group(
730    locked: &mut Locked<Unlocked>,
731    current_task: &mut CurrentTask,
732    code: i32,
733) -> Result<(), Errno> {
734    current_task.thread_group_exit(locked, ExitStatus::Exit(code as u8));
735    Ok(())
736}
737
738pub fn sys_sched_getscheduler(
739    _locked: &mut Locked<Unlocked>,
740    current_task: &CurrentTask,
741    pid: pid_t,
742) -> Result<u32, Errno> {
743    if pid < 0 {
744        return error!(EINVAL);
745    }
746
747    let weak = get_task_or_current(current_task, pid);
748    let target_task = Task::from_weak(&weak)?;
749    security::check_getsched_access(current_task, target_task.as_ref())?;
750    let current_scheduler_state = target_task.read().scheduler_state;
751    Ok(current_scheduler_state.policy_for_sched_getscheduler())
752}
753
754pub fn sys_sched_setscheduler(
755    locked: &mut Locked<Unlocked>,
756    current_task: &CurrentTask,
757    pid: pid_t,
758    policy: u32,
759    param: UserRef<sched_param>,
760) -> Result<(), Errno> {
761    // Parse & validate the arguments.
762    if pid < 0 || param.is_null() {
763        return error!(EINVAL);
764    }
765
766    let weak = get_task_or_current(current_task, pid);
767    let target_task = Task::from_weak(&weak)?;
768
769    let reset_on_fork = policy & SCHED_RESET_ON_FORK != 0;
770
771    let policy = SchedulingPolicy::try_from(policy & !SCHED_RESET_ON_FORK)?;
772    let realtime_priority =
773        policy.realtime_priority_from(current_task.read_object(param)?.sched_priority)?;
774
775    // TODO: https://fxbug.dev/425143440 - we probably want to improve the locking here.
776    let current_state = target_task.read().scheduler_state;
777
778    // Check capabilities and permissions, if required, for the operation.
779    let euid_friendly = current_task.is_euid_friendly_with(&target_task);
780    let strengthening = current_state.realtime_priority < realtime_priority;
781    let rlimited = strengthening
782        && realtime_priority
783            .exceeds(target_task.thread_group().get_rlimit(locked, Resource::RTPRIO));
784    let clearing_reset_on_fork = current_state.reset_on_fork && !reset_on_fork;
785    let caught_in_idle_trap = current_state.policy == SchedulingPolicy::Idle
786        && policy != SchedulingPolicy::Idle
787        && current_state
788            .normal_priority
789            .exceeds(target_task.thread_group().get_rlimit(locked, Resource::NICE));
790    if !euid_friendly || rlimited || clearing_reset_on_fork || caught_in_idle_trap {
791        security::check_task_capable(current_task, CAP_SYS_NICE)?;
792    }
793
794    security::check_setsched_access(current_task, &target_task)?;
795
796    // Apply the new scheduler configuration to the task.
797    target_task.set_scheduler_policy_priority_and_reset_on_fork(
798        policy,
799        realtime_priority,
800        reset_on_fork,
801    )?;
802
803    Ok(())
804}
805
806const CPU_SET_SIZE: usize = 128;
807
808#[repr(C)]
809#[derive(Debug, Copy, Clone, IntoBytes, FromBytes, KnownLayout, Immutable)]
810pub struct CpuSet {
811    bits: [u8; CPU_SET_SIZE],
812}
813
814impl Default for CpuSet {
815    fn default() -> Self {
816        Self { bits: [0; CPU_SET_SIZE] }
817    }
818}
819
820fn check_cpu_set_alignment(current_task: &CurrentTask, cpusetsize: u32) -> Result<(), Errno> {
821    let alignment = if current_task.is_arch32() { 4 } else { 8 };
822    if cpusetsize < alignment || cpusetsize % alignment != 0 {
823        return error!(EINVAL);
824    }
825    Ok(())
826}
827
828fn get_default_cpu_set() -> CpuSet {
829    let mut result = CpuSet::default();
830    let mut cpus_count = zx::system_get_num_cpus();
831    let cpus_count_max = (CPU_SET_SIZE * 8) as u32;
832    if cpus_count > cpus_count_max {
833        log_error!("cpus_count={cpus_count}, greater than the {cpus_count_max} max supported.");
834        cpus_count = cpus_count_max;
835    }
836    let mut index = 0;
837    while cpus_count > 0 {
838        let count = std::cmp::min(cpus_count, 8);
839        let (shl, overflow) = 1_u8.overflowing_shl(count);
840        let mask = if overflow { u8::max_value() } else { shl - 1 };
841        result.bits[index] = mask;
842        index += 1;
843        cpus_count -= count;
844    }
845    result
846}
847
848pub fn sys_sched_getaffinity(
849    _locked: &mut Locked<Unlocked>,
850    current_task: &CurrentTask,
851    pid: pid_t,
852    cpusetsize: u32,
853    user_mask: UserAddress,
854) -> Result<usize, Errno> {
855    if pid < 0 {
856        return error!(EINVAL);
857    }
858
859    check_cpu_set_alignment(current_task, cpusetsize)?;
860
861    let weak = get_task_or_current(current_task, pid);
862    let _task = Task::from_weak(&weak)?;
863
864    // sched_setaffinity() is not implemented. Fake affinity mask based on the number of CPUs.
865    let mask = get_default_cpu_set();
866    let mask_size = std::cmp::min(cpusetsize as usize, CPU_SET_SIZE);
867    current_task.write_memory(user_mask, &mask.bits[..mask_size])?;
868    track_stub!(TODO("https://fxbug.dev/322874659"), "sched_getaffinity");
869    Ok(mask_size)
870}
871
872pub fn sys_sched_setaffinity(
873    _locked: &mut Locked<Unlocked>,
874    current_task: &CurrentTask,
875    pid: pid_t,
876    cpusetsize: u32,
877    user_mask: UserAddress,
878) -> Result<(), Errno> {
879    if pid < 0 {
880        return error!(EINVAL);
881    }
882    let weak = get_task_or_current(current_task, pid);
883    let target_task = Task::from_weak(&weak)?;
884
885    check_cpu_set_alignment(current_task, cpusetsize)?;
886
887    let mask_size = std::cmp::min(cpusetsize as usize, CPU_SET_SIZE);
888    let mut mask = CpuSet::default();
889    current_task.read_memory_to_slice(user_mask, &mut mask.bits[..mask_size])?;
890
891    // Specified mask must include at least one valid CPU.
892    let max_mask = get_default_cpu_set();
893    let mut has_valid_cpu_in_mask = false;
894    for (l1, l2) in std::iter::zip(max_mask.bits, mask.bits) {
895        has_valid_cpu_in_mask = has_valid_cpu_in_mask || (l1 & l2 > 0);
896    }
897    if !has_valid_cpu_in_mask {
898        return error!(EINVAL);
899    }
900
901    if !current_task.is_euid_friendly_with(&target_task) {
902        security::check_task_capable(current_task, CAP_SYS_NICE)?;
903    }
904
905    // Currently, we ignore the mask and act as if the system reset the mask
906    // immediately to allowing all CPUs.
907    track_stub!(TODO("https://fxbug.dev/322874889"), "sched_setaffinity");
908    Ok(())
909}
910
911pub fn sys_sched_getparam(
912    _locked: &mut Locked<Unlocked>,
913    current_task: &CurrentTask,
914    pid: pid_t,
915    param: UserRef<sched_param>,
916) -> Result<(), Errno> {
917    if pid < 0 || param.is_null() {
918        return error!(EINVAL);
919    }
920
921    let weak = get_task_or_current(current_task, pid);
922    let target_task = Task::from_weak(&weak)?;
923    let param_value = target_task.read().scheduler_state.get_sched_param();
924    current_task.write_object(param, &param_value)?;
925    Ok(())
926}
927
928pub fn sys_sched_setparam(
929    locked: &mut Locked<Unlocked>,
930    current_task: &CurrentTask,
931    pid: pid_t,
932    param: UserRef<sched_param>,
933) -> Result<(), Errno> {
934    // Parse & validate the arguments.
935    if pid < 0 || param.is_null() {
936        return error!(EINVAL);
937    }
938    let weak = get_task_or_current(current_task, pid);
939    let target_task = Task::from_weak(&weak)?;
940
941    // TODO: https://fxbug.dev/425143440 - we probably want to improve the locking here.
942    let current_state = target_task.read().scheduler_state;
943
944    let realtime_priority = current_state
945        .policy
946        .realtime_priority_from(current_task.read_object(param)?.sched_priority)?;
947
948    // Check capabilities and permissions, if required, for the operation.
949    let euid_friendly = current_task.is_euid_friendly_with(&target_task);
950    let strengthening = current_state.realtime_priority < realtime_priority;
951    let rlimited = strengthening
952        && realtime_priority
953            .exceeds(target_task.thread_group().get_rlimit(locked, Resource::RTPRIO));
954    if !euid_friendly || rlimited {
955        security::check_task_capable(current_task, CAP_SYS_NICE)?;
956    }
957
958    security::check_setsched_access(current_task, &target_task)?;
959
960    // Apply the new scheduler configuration to the task.
961    target_task.set_scheduler_priority(realtime_priority)?;
962
963    Ok(())
964}
965
966pub fn sys_sched_get_priority_min(
967    _locked: &mut Locked<Unlocked>,
968    _ctx: &CurrentTask,
969    policy: u32,
970) -> Result<u8, Errno> {
971    min_priority_for_sched_policy(policy)
972}
973
974pub fn sys_sched_get_priority_max(
975    _locked: &mut Locked<Unlocked>,
976    _ctx: &CurrentTask,
977    policy: u32,
978) -> Result<u8, Errno> {
979    max_priority_for_sched_policy(policy)
980}
981
982pub fn sys_ioprio_set(
983    _locked: &mut Locked<Unlocked>,
984    _current_task: &mut CurrentTask,
985    _which: i32,
986    _who: i32,
987    _ioprio: i32,
988) -> Result<(), Errno> {
989    track_stub!(TODO("https://fxbug.dev/297591758"), "ioprio_set()");
990    error!(ENOSYS)
991}
992
993pub fn sys_prctl(
994    locked: &mut Locked<Unlocked>,
995    current_task: &mut CurrentTask,
996    option: u32,
997    arg2: u64,
998    arg3: u64,
999    arg4: u64,
1000    arg5: u64,
1001) -> Result<SyscallResult, Errno> {
1002    match option {
1003        PR_SET_VMA => {
1004            if arg2 != PR_SET_VMA_ANON_NAME as u64 {
1005                track_stub!(TODO("https://fxbug.dev/322874826"), "prctl PR_SET_VMA", arg2);
1006                return error!(ENOSYS);
1007            }
1008            let addr = UserAddress::from(arg3);
1009            let length = arg4 as usize;
1010            let name_addr = UserAddress::from(arg5);
1011            let name = if name_addr.is_null() {
1012                None
1013            } else {
1014                let name = UserCString::new(current_task, UserAddress::from(arg5));
1015                let name = current_task.read_c_string_to_vec(name, 256).map_err(|e| {
1016                    // An overly long name produces EINVAL and not ENAMETOOLONG in Linux 5.15.
1017                    if e.code == ENAMETOOLONG { errno!(EINVAL) } else { e }
1018                })?;
1019                // Some characters are forbidden in VMA names.
1020                if name.iter().any(|b| {
1021                    matches!(b,
1022                        0..=0x1f |
1023                        0x7f..=0xff |
1024                        b'\\' | b'`' | b'$' | b'[' | b']'
1025                    )
1026                }) {
1027                    return error!(EINVAL);
1028                }
1029                Some(name)
1030            };
1031            current_task.mm()?.set_mapping_name(addr, length, name)?;
1032            Ok(().into())
1033        }
1034        PR_SET_DUMPABLE => {
1035            let mm = current_task.mm()?;
1036            let mut dumpable = mm.dumpable.lock(locked);
1037            *dumpable = if arg2 == 1 { DumpPolicy::User } else { DumpPolicy::Disable };
1038            Ok(().into())
1039        }
1040        PR_GET_DUMPABLE => {
1041            let mm = current_task.mm()?;
1042            let dumpable = mm.dumpable.lock(locked);
1043            Ok(match *dumpable {
1044                DumpPolicy::Disable => 0.into(),
1045                DumpPolicy::User => 1.into(),
1046            })
1047        }
1048        PR_SET_PDEATHSIG => {
1049            track_stub!(TODO("https://fxbug.dev/322874397"), "PR_SET_PDEATHSIG");
1050            Ok(().into())
1051        }
1052        PR_SET_NAME => {
1053            let addr = UserAddress::from(arg2);
1054            let name = TaskCommand::new(&current_task.read_memory_to_array::<16>(addr)?);
1055            current_task.set_command_name(name);
1056            if current_task.tid == current_task.thread_group.leader {
1057                current_task.thread_group.sync_syscall_log_level();
1058            }
1059            Ok(0.into())
1060        }
1061        PR_GET_NAME => {
1062            let addr = UserAddress::from(arg2);
1063            let name = current_task.command().prctl_name();
1064            current_task.write_memory(addr, &name[..])?;
1065            Ok(().into())
1066        }
1067        PR_SET_PTRACER => {
1068            let allowed_ptracers = if arg2 == PR_SET_PTRACER_ANY as u64 {
1069                PtraceAllowedPtracers::Any
1070            } else if arg2 == 0 {
1071                PtraceAllowedPtracers::None
1072            } else {
1073                if current_task.kernel().pids.read().get_task(arg2 as i32).upgrade().is_none() {
1074                    return error!(EINVAL);
1075                }
1076                PtraceAllowedPtracers::Some(arg2 as pid_t)
1077            };
1078            current_task.thread_group().write().allowed_ptracers = allowed_ptracers;
1079            Ok(().into())
1080        }
1081        PR_GET_KEEPCAPS => {
1082            Ok(current_task.current_creds().securebits.contains(SecureBits::KEEP_CAPS).into())
1083        }
1084        PR_SET_KEEPCAPS => {
1085            if arg2 != 0 && arg2 != 1 {
1086                return error!(EINVAL);
1087            }
1088            let mut creds = Credentials::clone(&current_task.current_creds());
1089            creds.securebits.set(SecureBits::KEEP_CAPS, arg2 != 0);
1090            current_task.set_creds(creds);
1091            Ok(().into())
1092        }
1093        PR_SET_NO_NEW_PRIVS => {
1094            // If any args are set other than arg2 to 1, this should return einval
1095            if arg2 != 1 || arg3 != 0 || arg4 != 0 || arg5 != 0 {
1096                return error!(EINVAL);
1097            }
1098            current_task.write().enable_no_new_privs();
1099            Ok(().into())
1100        }
1101        PR_GET_NO_NEW_PRIVS => {
1102            // If any args are set, this should return einval
1103            if arg2 != 0 || arg3 != 0 || arg4 != 0 {
1104                return error!(EINVAL);
1105            }
1106            Ok(current_task.read().no_new_privs().into())
1107        }
1108        PR_GET_SECCOMP => {
1109            if current_task.seccomp_filter_state.get() == SeccompStateValue::None {
1110                Ok(0.into())
1111            } else {
1112                Ok(2.into())
1113            }
1114        }
1115        PR_SET_SECCOMP => {
1116            if arg2 == SECCOMP_MODE_STRICT as u64 {
1117                return sys_seccomp(
1118                    locked,
1119                    current_task,
1120                    SECCOMP_SET_MODE_STRICT,
1121                    0,
1122                    UserAddress::NULL,
1123                );
1124            } else if arg2 == SECCOMP_MODE_FILTER as u64 {
1125                return sys_seccomp(locked, current_task, SECCOMP_SET_MODE_FILTER, 0, arg3.into());
1126            }
1127            Ok(().into())
1128        }
1129        PR_GET_CHILD_SUBREAPER => {
1130            let addr = UserAddress::from(arg2);
1131            #[allow(clippy::bool_to_int_with_if)]
1132            let value: i32 =
1133                if current_task.thread_group().read().is_child_subreaper { 1 } else { 0 };
1134            current_task.write_object(addr.into(), &value)?;
1135            Ok(().into())
1136        }
1137        PR_SET_CHILD_SUBREAPER => {
1138            current_task.thread_group().write().is_child_subreaper = arg2 != 0;
1139            Ok(().into())
1140        }
1141        PR_GET_SECUREBITS => Ok(current_task.current_creds().securebits.bits().into()),
1142        PR_SET_SECUREBITS => {
1143            // TODO(security): This does not yet respect locked flags.
1144            let mut creds = Credentials::clone(&current_task.current_creds());
1145            security::check_task_capable(current_task, CAP_SETPCAP)?;
1146
1147            let securebits = SecureBits::from_bits(arg2 as u32).ok_or_else(|| {
1148                track_stub!(TODO("https://fxbug.dev/322875244"), "PR_SET_SECUREBITS", arg2);
1149                errno!(ENOSYS)
1150            })?;
1151            creds.securebits = securebits;
1152            current_task.set_creds(creds);
1153            Ok(().into())
1154        }
1155        PR_CAPBSET_READ => {
1156            let cap = Capabilities::try_from(arg2)?;
1157            Ok(current_task.current_creds().cap_bounding.contains(cap).into())
1158        }
1159        PR_CAPBSET_DROP => {
1160            let mut creds = Credentials::clone(&current_task.current_creds());
1161            security::check_task_capable(current_task, CAP_SETPCAP)?;
1162
1163            creds.cap_bounding.remove(Capabilities::try_from(arg2)?);
1164            current_task.set_creds(creds);
1165            Ok(().into())
1166        }
1167        PR_CAP_AMBIENT => {
1168            let operation = arg2 as u32;
1169            let capability_arg = Capabilities::try_from(arg3)?;
1170            if arg4 != 0 || arg5 != 0 {
1171                return error!(EINVAL);
1172            }
1173
1174            // TODO(security): We don't currently validate capabilities, but this should return an
1175            // error if the capability_arg is invalid.
1176            match operation {
1177                PR_CAP_AMBIENT_RAISE => {
1178                    let mut creds = Credentials::clone(&current_task.current_creds());
1179                    if !(creds.cap_permitted.contains(capability_arg)
1180                        && creds.cap_inheritable.contains(capability_arg))
1181                    {
1182                        return error!(EPERM);
1183                    }
1184                    if creds.securebits.contains(SecureBits::NO_CAP_AMBIENT_RAISE)
1185                        || creds.securebits.contains(SecureBits::NO_CAP_AMBIENT_RAISE_LOCKED)
1186                    {
1187                        return error!(EPERM);
1188                    }
1189
1190                    creds.cap_ambient.insert(capability_arg);
1191                    current_task.set_creds(creds);
1192                    Ok(().into())
1193                }
1194                PR_CAP_AMBIENT_LOWER => {
1195                    let mut creds = Credentials::clone(&current_task.current_creds());
1196                    creds.cap_ambient.remove(capability_arg);
1197                    current_task.set_creds(creds);
1198                    Ok(().into())
1199                }
1200                PR_CAP_AMBIENT_IS_SET => {
1201                    Ok(current_task.current_creds().cap_ambient.contains(capability_arg).into())
1202                }
1203                PR_CAP_AMBIENT_CLEAR_ALL => {
1204                    if arg3 != 0 {
1205                        return error!(EINVAL);
1206                    }
1207
1208                    let mut creds = Credentials::clone(&current_task.current_creds());
1209                    creds.cap_ambient = Capabilities::empty();
1210                    current_task.set_creds(creds);
1211                    Ok(().into())
1212                }
1213                _ => error!(EINVAL),
1214            }
1215        }
1216        PR_SET_TIMERSLACK => {
1217            current_task.write().set_timerslack_ns(arg2);
1218            Ok(().into())
1219        }
1220        #[cfg(target_arch = "aarch64")]
1221        PR_GET_TAGGED_ADDR_CTRL => {
1222            track_stub!(TODO("https://fxbug.dev/408554469"), "PR_GET_TAGGED_ADDR_CTRL");
1223            Ok(0.into())
1224        }
1225        #[cfg(target_arch = "aarch64")]
1226        PR_SET_TAGGED_ADDR_CTRL => match u32::try_from(arg2).map_err(|_| errno!(EINVAL))? {
1227            // Only untagged pointers are allowed, the default.
1228            0 => Ok(().into()),
1229            PR_TAGGED_ADDR_ENABLE => {
1230                track_stub!(TODO("https://fxbug.dev/408554469"), "PR_TAGGED_ADDR_ENABLE");
1231                error!(EINVAL)
1232            }
1233            unknown_mode => {
1234                track_stub!(
1235                    TODO("https://fxbug.dev/408554469"),
1236                    "PR_SET_TAGGED_ADDR_CTRL unknown mode",
1237                    unknown_mode,
1238                );
1239                error!(EINVAL)
1240            }
1241        },
1242        _ => {
1243            track_stub!(TODO("https://fxbug.dev/322874733"), "prctl fallthrough", option);
1244            error!(ENOSYS)
1245        }
1246    }
1247}
1248
1249pub fn sys_ptrace(
1250    locked: &mut Locked<Unlocked>,
1251    current_task: &mut CurrentTask,
1252    request: u32,
1253    pid: pid_t,
1254    addr: UserAddress,
1255    data: UserAddress,
1256) -> Result<SyscallResult, Errno> {
1257    match request {
1258        PTRACE_TRACEME => ptrace_traceme(current_task),
1259        PTRACE_ATTACH => ptrace_attach(locked, current_task, pid, PtraceAttachType::Attach, data),
1260        PTRACE_SEIZE => ptrace_attach(locked, current_task, pid, PtraceAttachType::Seize, data),
1261        _ => ptrace_dispatch(locked, current_task, request, pid, addr, data),
1262    }
1263}
1264
1265pub fn sys_set_tid_address(
1266    _locked: &mut Locked<Unlocked>,
1267    current_task: &CurrentTask,
1268    user_tid: UserRef<pid_t>,
1269) -> Result<pid_t, Errno> {
1270    current_task.write().clear_child_tid = user_tid;
1271    Ok(current_task.get_tid())
1272}
1273
1274pub fn sys_getrusage(
1275    _locked: &mut Locked<Unlocked>,
1276    current_task: &CurrentTask,
1277    who: i32,
1278    user_usage: RUsagePtr,
1279) -> Result<(), Errno> {
1280    const RUSAGE_SELF: i32 = starnix_uapi::uapi::RUSAGE_SELF as i32;
1281    const RUSAGE_THREAD: i32 = starnix_uapi::uapi::RUSAGE_THREAD as i32;
1282    track_stub!(TODO("https://fxbug.dev/297370242"), "real rusage");
1283    let time_stats = match who {
1284        RUSAGE_CHILDREN => current_task.task.thread_group().read().children_time_stats,
1285        RUSAGE_SELF => current_task.task.thread_group().time_stats(),
1286        RUSAGE_THREAD => current_task.task.time_stats(),
1287        _ => return error!(EINVAL),
1288    };
1289
1290    let usage = rusage {
1291        ru_utime: timeval_from_duration(time_stats.user_time),
1292        ru_stime: timeval_from_duration(time_stats.system_time),
1293        ..rusage::default()
1294    };
1295    current_task.write_multi_arch_object(user_usage, usage)?;
1296
1297    Ok(())
1298}
1299
1300type PrLimitRef = MultiArchUserRef<uapi::rlimit, uapi::arch32::rlimit>;
1301
1302pub fn sys_getrlimit(
1303    locked: &mut Locked<Unlocked>,
1304    current_task: &CurrentTask,
1305    resource: u32,
1306    user_rlimit: PrLimitRef,
1307) -> Result<(), Errno> {
1308    do_prlimit64(locked, current_task, 0, resource, PrLimitRef::null(current_task), user_rlimit)
1309}
1310
1311pub fn sys_setrlimit(
1312    locked: &mut Locked<Unlocked>,
1313    current_task: &CurrentTask,
1314    resource: u32,
1315    user_rlimit: PrLimitRef,
1316) -> Result<(), Errno> {
1317    do_prlimit64(locked, current_task, 0, resource, user_rlimit, PrLimitRef::null(current_task))
1318}
1319
1320pub fn sys_prlimit64(
1321    locked: &mut Locked<Unlocked>,
1322    current_task: &CurrentTask,
1323    pid: pid_t,
1324    user_resource: u32,
1325    new_limit_ref: UserRef<uapi::rlimit>,
1326    old_limit_ref: UserRef<uapi::rlimit>,
1327) -> Result<(), Errno> {
1328    do_prlimit64::<uapi::rlimit>(
1329        locked,
1330        current_task,
1331        pid,
1332        user_resource,
1333        new_limit_ref.into(),
1334        old_limit_ref.into(),
1335    )
1336}
1337
1338pub fn do_prlimit64<T>(
1339    locked: &mut Locked<Unlocked>,
1340    current_task: &CurrentTask,
1341    pid: pid_t,
1342    user_resource: u32,
1343    new_limit_ref: MultiArchUserRef<uapi::rlimit, T>,
1344    old_limit_ref: MultiArchUserRef<uapi::rlimit, T>,
1345) -> Result<(), Errno>
1346where
1347    T: FromBytes + IntoBytes + Immutable + From<uapi::rlimit> + Into<uapi::rlimit>,
1348{
1349    let weak = get_task_or_current(current_task, pid);
1350    let target_task = Task::from_weak(&weak)?;
1351
1352    // To get or set the resource of a process other than itself, the caller must have either:
1353    // * the same `uid`, `euid`, `saved_uid`, `gid`, `egid`, `saved_gid` as the target.
1354    // * the CAP_SYS_RESOURCE
1355    if current_task.get_pid() != target_task.get_pid() {
1356        let self_creds = current_task.current_creds();
1357        let target_creds = target_task.real_creds();
1358        if self_creds.uid != target_creds.uid
1359            || self_creds.euid != target_creds.euid
1360            || self_creds.saved_uid != target_creds.saved_uid
1361            || self_creds.gid != target_creds.gid
1362            || self_creds.egid != target_creds.egid
1363            || self_creds.saved_gid != target_creds.saved_gid
1364        {
1365            security::check_task_capable(current_task, CAP_SYS_RESOURCE)?;
1366        }
1367        security::task_prlimit(
1368            current_task,
1369            &target_task,
1370            !old_limit_ref.is_null(),
1371            !new_limit_ref.is_null(),
1372        )?;
1373    }
1374
1375    let resource = Resource::from_raw(user_resource)?;
1376
1377    let old_limit = match resource {
1378        // TODO: Integrate Resource::STACK with generic ResourceLimits machinery.
1379        Resource::STACK => {
1380            if !new_limit_ref.is_null() {
1381                track_stub!(
1382                    TODO("https://fxbug.dev/322874791"),
1383                    "prlimit64 cannot set RLIMIT_STACK"
1384                );
1385            }
1386            // The stack size is fixed at the moment, but
1387            // if MAP_GROWSDOWN is implemented this should
1388            // report the limit that it can be grown.
1389            let mm = target_task.mm()?;
1390            let mm_state = mm.state.read();
1391            let stack_size = mm_state.stack_size as u64;
1392            rlimit { rlim_cur: stack_size, rlim_max: stack_size }
1393        }
1394        _ => {
1395            let new_limit = if new_limit_ref.is_null() {
1396                None
1397            } else {
1398                let new_limit = current_task.read_multi_arch_object(new_limit_ref)?;
1399                if new_limit.rlim_cur > new_limit.rlim_max {
1400                    return error!(EINVAL);
1401                }
1402                Some(new_limit)
1403            };
1404            ThreadGroup::adjust_rlimits(locked, current_task, &target_task, resource, new_limit)?
1405        }
1406    };
1407    if !old_limit_ref.is_null() {
1408        current_task.write_multi_arch_object(old_limit_ref, old_limit)?;
1409    }
1410    Ok(())
1411}
1412
1413pub fn sys_quotactl(
1414    _locked: &mut Locked<Unlocked>,
1415    _current_task: &CurrentTask,
1416    _cmd: i32,
1417    _special: UserRef<c_char>,
1418    _id: i32,
1419    _addr: UserRef<c_char>,
1420) -> Result<SyscallResult, Errno> {
1421    track_stub!(TODO("https://fxbug.dev/297302197"), "quotacl()");
1422    error!(ENOSYS)
1423}
1424
1425pub fn sys_capget(
1426    _locked: &mut Locked<Unlocked>,
1427    current_task: &CurrentTask,
1428    user_header: UserRef<__user_cap_header_struct>,
1429    user_data: UserRef<__user_cap_data_struct>,
1430) -> Result<(), Errno> {
1431    let mut header = current_task.read_object(user_header)?;
1432    let is_version_valid =
1433        [_LINUX_CAPABILITY_VERSION_1, _LINUX_CAPABILITY_VERSION_2, _LINUX_CAPABILITY_VERSION_3]
1434            .contains(&header.version);
1435    if !is_version_valid {
1436        header.version = _LINUX_CAPABILITY_VERSION_3;
1437        current_task.write_object(user_header, &header)?;
1438    }
1439    if user_data.is_null() {
1440        return Ok(());
1441    }
1442    if !is_version_valid || header.pid < 0 {
1443        return error!(EINVAL);
1444    }
1445
1446    let weak = get_task_or_current(current_task, header.pid);
1447    let target_task = Task::from_weak(&weak)?;
1448
1449    security::check_getcap_access(current_task, &target_task)?;
1450
1451    let (permitted, effective, inheritable) = {
1452        let creds = &target_task.real_creds();
1453        (creds.cap_permitted, creds.cap_effective, creds.cap_inheritable)
1454    };
1455
1456    match header.version {
1457        _LINUX_CAPABILITY_VERSION_1 => {
1458            let data: [__user_cap_data_struct; 1] = [__user_cap_data_struct {
1459                effective: effective.as_abi_v1(),
1460                inheritable: inheritable.as_abi_v1(),
1461                permitted: permitted.as_abi_v1(),
1462            }];
1463            current_task.write_objects(user_data, &data)?;
1464        }
1465        _LINUX_CAPABILITY_VERSION_2 | _LINUX_CAPABILITY_VERSION_3 => {
1466            // Return 64 bit capabilities as two sets of 32 bit capabilities, little endian
1467            let (permitted, effective, inheritable) =
1468                (permitted.as_abi_v3(), effective.as_abi_v3(), inheritable.as_abi_v3());
1469            let data: [__user_cap_data_struct; 2] = [
1470                __user_cap_data_struct {
1471                    effective: effective.0,
1472                    inheritable: inheritable.0,
1473                    permitted: permitted.0,
1474                },
1475                __user_cap_data_struct {
1476                    effective: effective.1,
1477                    inheritable: inheritable.1,
1478                    permitted: permitted.1,
1479                },
1480            ];
1481            current_task.write_objects(user_data, &data)?;
1482        }
1483        _ => {
1484            unreachable!("already returned if Linux capability version is not valid")
1485        }
1486    }
1487    Ok(())
1488}
1489
1490pub fn sys_capset(
1491    _locked: &mut Locked<Unlocked>,
1492    current_task: &CurrentTask,
1493    user_header: UserRef<__user_cap_header_struct>,
1494    user_data: UserRef<__user_cap_data_struct>,
1495) -> Result<(), Errno> {
1496    let mut header = current_task.read_object(user_header)?;
1497    let is_version_valid =
1498        [_LINUX_CAPABILITY_VERSION_1, _LINUX_CAPABILITY_VERSION_2, _LINUX_CAPABILITY_VERSION_3]
1499            .contains(&header.version);
1500    if !is_version_valid {
1501        header.version = _LINUX_CAPABILITY_VERSION_3;
1502        current_task.write_object(user_header, &header)?;
1503        return error!(EINVAL);
1504    }
1505    if header.pid != 0 && header.pid != current_task.tid {
1506        return error!(EPERM);
1507    }
1508
1509    let (new_permitted, new_effective, new_inheritable) = match header.version {
1510        _LINUX_CAPABILITY_VERSION_1 => {
1511            let data = current_task.read_object(user_data)?;
1512            (
1513                Capabilities::from_abi_v1(data.permitted),
1514                Capabilities::from_abi_v1(data.effective),
1515                Capabilities::from_abi_v1(data.inheritable),
1516            )
1517        }
1518        _LINUX_CAPABILITY_VERSION_2 | _LINUX_CAPABILITY_VERSION_3 => {
1519            let data =
1520                current_task.read_objects_to_array::<__user_cap_data_struct, 2>(user_data)?;
1521            (
1522                Capabilities::from_abi_v3((data[0].permitted, data[1].permitted)),
1523                Capabilities::from_abi_v3((data[0].effective, data[1].effective)),
1524                Capabilities::from_abi_v3((data[0].inheritable, data[1].inheritable)),
1525            )
1526        }
1527        _ => {
1528            unreachable!("already returned if Linux capability version is not valid")
1529        }
1530    };
1531
1532    // Permission checks. Copied out of TLPI section 39.7.
1533    let mut creds = Credentials::clone(&current_task.current_creds());
1534    {
1535        log_trace!(
1536            "Capabilities({{permitted={:?} from {:?}, effective={:?} from {:?}, inheritable={:?} from {:?}}}, bounding={:?})",
1537            new_permitted,
1538            creds.cap_permitted,
1539            new_effective,
1540            creds.cap_effective,
1541            new_inheritable,
1542            creds.cap_inheritable,
1543            creds.cap_bounding
1544        );
1545        if !creds.cap_inheritable.union(creds.cap_permitted).contains(new_inheritable) {
1546            security::check_task_capable(current_task, CAP_SETPCAP)?;
1547        }
1548
1549        if !creds.cap_inheritable.union(creds.cap_bounding).contains(new_inheritable) {
1550            return error!(EPERM);
1551        }
1552        if !creds.cap_permitted.contains(new_permitted) {
1553            return error!(EPERM);
1554        }
1555        if !new_permitted.contains(new_effective) {
1556            return error!(EPERM);
1557        }
1558    }
1559    let weak = get_task_or_current(current_task, header.pid);
1560    let target_task = Task::from_weak(&weak)?;
1561
1562    security::check_setcap_access(current_task, &target_task)?;
1563
1564    creds.cap_permitted = new_permitted;
1565    creds.cap_effective = new_effective;
1566    creds.cap_inheritable = new_inheritable;
1567    creds.cap_ambient = new_permitted & new_inheritable & creds.cap_ambient;
1568    current_task.set_creds(creds);
1569    Ok(())
1570}
1571
1572pub fn sys_seccomp(
1573    locked: &mut Locked<Unlocked>,
1574    current_task: &mut CurrentTask,
1575    operation: u32,
1576    flags: u32,
1577    args: UserAddress,
1578) -> Result<SyscallResult, Errno> {
1579    match operation {
1580        SECCOMP_SET_MODE_STRICT => {
1581            if flags != 0 || args != UserAddress::NULL {
1582                return error!(EINVAL);
1583            }
1584            current_task.set_seccomp_state(SeccompStateValue::Strict)?;
1585            Ok(().into())
1586        }
1587        SECCOMP_SET_MODE_FILTER => {
1588            if flags
1589                & (SECCOMP_FILTER_FLAG_LOG
1590                    | SECCOMP_FILTER_FLAG_NEW_LISTENER
1591                    | SECCOMP_FILTER_FLAG_SPEC_ALLOW
1592                    | SECCOMP_FILTER_FLAG_TSYNC
1593                    | SECCOMP_FILTER_FLAG_TSYNC_ESRCH)
1594                != flags
1595            {
1596                return error!(EINVAL);
1597            }
1598            if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER != 0)
1599                && (flags & SECCOMP_FILTER_FLAG_TSYNC != 0)
1600                && (flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH == 0)
1601            {
1602                return error!(EINVAL);
1603            }
1604            let fprog =
1605                current_task.read_multi_arch_object(SockFProgPtr::new(current_task, args))?;
1606            if fprog.len > BPF_MAXINSNS || fprog.len == 0 {
1607                return error!(EINVAL);
1608            }
1609            let code: Vec<sock_filter> =
1610                current_task.read_multi_arch_objects_to_vec(fprog.filter, fprog.len as usize)?;
1611
1612            if !current_task.read().no_new_privs() {
1613                security::check_task_capable(current_task, CAP_SYS_ADMIN)
1614                    .map_err(|_| errno!(EACCES))?;
1615            }
1616            current_task.add_seccomp_filter(locked, code, flags)
1617        }
1618        SECCOMP_GET_ACTION_AVAIL => {
1619            if flags != 0 || args.is_null() {
1620                return error!(EINVAL);
1621            }
1622            let action: u32 = current_task.read_object(UserRef::new(args))?;
1623            SeccompAction::is_action_available(action)
1624        }
1625        SECCOMP_GET_NOTIF_SIZES => {
1626            if flags != 0 {
1627                return error!(EINVAL);
1628            }
1629            track_stub!(TODO("https://fxbug.dev/322874791"), "SECCOMP_GET_NOTIF_SIZES");
1630            error!(ENOSYS)
1631        }
1632        _ => {
1633            track_stub!(TODO("https://fxbug.dev/322874916"), "seccomp fallthrough", operation);
1634            error!(EINVAL)
1635        }
1636    }
1637}
1638
1639pub fn sys_setgroups(
1640    _locked: &mut Locked<Unlocked>,
1641    current_task: &CurrentTask,
1642    size: usize,
1643    groups_addr: UserAddress,
1644) -> Result<(), Errno> {
1645    if size > NGROUPS_MAX as usize {
1646        return error!(EINVAL);
1647    }
1648    let groups = current_task.read_objects_to_vec::<gid_t>(groups_addr.into(), size)?;
1649    let mut creds = Credentials::clone(&current_task.current_creds());
1650    if !creds.is_superuser() {
1651        return error!(EPERM);
1652    }
1653    creds.groups = groups;
1654    current_task.set_creds(creds);
1655    Ok(())
1656}
1657
1658pub fn sys_getgroups(
1659    _locked: &mut Locked<Unlocked>,
1660    current_task: &CurrentTask,
1661    size: usize,
1662    groups_addr: UserAddress,
1663) -> Result<usize, Errno> {
1664    if size > NGROUPS_MAX as usize {
1665        return error!(EINVAL);
1666    }
1667    let creds = current_task.current_creds();
1668    if size != 0 {
1669        if size < creds.groups.len() {
1670            return error!(EINVAL);
1671        }
1672        current_task.write_memory(groups_addr, creds.groups.as_slice().as_bytes())?;
1673    }
1674    Ok(creds.groups.len())
1675}
1676
1677pub fn sys_setsid(
1678    locked: &mut Locked<Unlocked>,
1679    current_task: &CurrentTask,
1680) -> Result<pid_t, Errno> {
1681    current_task.thread_group().setsid(locked)?;
1682    Ok(current_task.get_pid())
1683}
1684
1685// Note the asymmetry with sys_setpriority: this returns "kernel nice" which ranges
1686// from 1 (weakest) to 40 (strongest). (It is part of Linux history that this syscall
1687// deals with niceness but has "priority" in its name.)
1688pub fn sys_getpriority(
1689    _locked: &mut Locked<Unlocked>,
1690    current_task: &CurrentTask,
1691    which: u32,
1692    who: i32,
1693) -> Result<u8, Errno> {
1694    match which {
1695        PRIO_PROCESS => {}
1696        // TODO: https://fxbug.dev/287121196 - support PRIO_PGRP and PRIO_USER?
1697        _ => return error!(EINVAL),
1698    }
1699    track_stub!(TODO("https://fxbug.dev/322893809"), "getpriority permissions");
1700    let weak = get_task_or_current(current_task, who);
1701    let target_task = Task::from_weak(&weak)?;
1702    let state = target_task.read();
1703    Ok(state.scheduler_state.normal_priority.raw_priority())
1704}
1705
1706// Note the asymmetry with sys_getpriority: this call's `priority` parameter is a
1707// "user nice" which ranges from -20 (strongest) to 19 (weakest) (other values can be
1708// passed and are clamped to that range and interpretation). (It is part of Linux
1709// history that this syscall deals with niceness but has "priority" in its name.)
1710pub fn sys_setpriority(
1711    locked: &mut Locked<Unlocked>,
1712    current_task: &CurrentTask,
1713    which: u32,
1714    who: i32,
1715    priority: i32,
1716) -> Result<(), Errno> {
1717    // Parse & validate the arguments.
1718    match which {
1719        PRIO_PROCESS => {}
1720        // TODO: https://fxbug.dev/287121196 - support PRIO_PGRP and PRIO_USER?
1721        _ => return error!(EINVAL),
1722    }
1723
1724    let weak = get_task_or_current(current_task, who);
1725    let target_task = Task::from_weak(&weak)?;
1726
1727    let normal_priority = NormalPriority::from_setpriority_syscall(priority);
1728
1729    // TODO: https://fxbug.dev/425143440 - we probably want to improve the locking here.
1730    let current_state = target_task.read().scheduler_state;
1731
1732    // Check capabilities and permissions, if required, for the operation.
1733    let euid_friendly = current_task.is_euid_friendly_with(&target_task);
1734    let strengthening = current_state.normal_priority < normal_priority;
1735    let rlimited = strengthening
1736        && normal_priority.exceeds(target_task.thread_group().get_rlimit(locked, Resource::NICE));
1737    if !euid_friendly {
1738        security::check_task_capable(current_task, CAP_SYS_NICE)?;
1739    } else if rlimited {
1740        security::check_task_capable(current_task, CAP_SYS_NICE).map_err(|_| errno!(EACCES))?;
1741    }
1742
1743    security::check_setsched_access(current_task, &target_task)?;
1744
1745    // Apply the new scheduler configuration to the task.
1746    target_task.set_scheduler_nice(normal_priority)?;
1747
1748    Ok(())
1749}
1750
1751pub fn sys_setns(
1752    _locked: &mut Locked<Unlocked>,
1753    current_task: &CurrentTask,
1754    ns_fd: FdNumber,
1755    ns_type: c_int,
1756) -> Result<(), Errno> {
1757    let file_handle = current_task.task.files.get(ns_fd)?;
1758
1759    // From man pages this is not quite right because some namespace types require more capabilities
1760    // or require this capability in multiple namespaces, but it should cover our current test
1761    // cases and we can make this more nuanced once more namespace types are supported.
1762    security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1763
1764    if let Some(mount_ns) = file_handle.downcast_file::<MountNamespaceFile>() {
1765        if !(ns_type == 0 || ns_type == CLONE_NEWNS as i32) {
1766            log_trace!("invalid type");
1767            return error!(EINVAL);
1768        }
1769
1770        track_stub!(TODO("https://fxbug.dev/297312091"), "setns CLONE_FS limitations");
1771        current_task.task.fs().set_namespace(mount_ns.0.clone())?;
1772        return Ok(());
1773    }
1774
1775    if let Some(_pidfd) = file_handle.downcast_file::<PidFdFileObject>() {
1776        track_stub!(TODO("https://fxbug.dev/297312844"), "setns w/ pidfd");
1777        return error!(ENOSYS);
1778    }
1779
1780    track_stub!(TODO("https://fxbug.dev/322893829"), "unknown ns file for setns, see logs");
1781    log_info!("ns_fd was not a supported namespace file: {}", file_handle.ops_type_name());
1782    error!(EINVAL)
1783}
1784
1785pub fn sys_unshare(
1786    _locked: &mut Locked<Unlocked>,
1787    current_task: &CurrentTask,
1788    flags: u32,
1789) -> Result<(), Errno> {
1790    const IMPLEMENTED_FLAGS: u32 = CLONE_FILES | CLONE_FS | CLONE_NEWNS | CLONE_NEWUTS;
1791    if flags & !IMPLEMENTED_FLAGS != 0 {
1792        track_stub!(TODO("https://fxbug.dev/322893372"), "unshare", flags & !IMPLEMENTED_FLAGS);
1793        return error!(EINVAL);
1794    }
1795
1796    if (flags & CLONE_FILES) != 0 {
1797        current_task.files.unshare();
1798    }
1799
1800    if (flags & CLONE_FS) != 0 {
1801        current_task.unshare_fs();
1802    }
1803
1804    if (flags & CLONE_NEWNS) != 0 {
1805        security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1806        current_task.fs().unshare_namespace();
1807    }
1808
1809    if (flags & CLONE_NEWUTS) != 0 {
1810        security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1811        // Fork the UTS namespace.
1812        let mut task_state = current_task.write();
1813        let new_uts_ns = task_state.uts_ns.read().clone();
1814        task_state.uts_ns = Arc::new(RwLock::new(new_uts_ns));
1815    }
1816
1817    Ok(())
1818}
1819
1820pub fn sys_swapon(
1821    locked: &mut Locked<Unlocked>,
1822    current_task: &CurrentTask,
1823    user_path: UserCString,
1824    _flags: i32,
1825) -> Result<(), Errno> {
1826    const MAX_SWAPFILES: usize = 32; // See https://man7.org/linux/man-pages/man2/swapon.2.html
1827
1828    security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1829
1830    track_stub!(TODO("https://fxbug.dev/322893905"), "swapon validate flags");
1831
1832    let path = current_task.read_path(user_path)?;
1833    let file = current_task.open_file(locked, path.as_ref(), OpenFlags::RDWR)?;
1834
1835    let node = file.node();
1836    let mode = node.info().mode;
1837    if !mode.is_reg() && !mode.is_blk() {
1838        return error!(EINVAL);
1839    }
1840
1841    // We determined this magic number by using the mkswap tool and the file tool. The mkswap tool
1842    // populates a few bytes in the file, including a UUID, which can be replaced with zeros while
1843    // still being recognized by the file tool. This string appears at a fixed offset
1844    // (MAGIC_OFFSET) in the file, which looks quite like a magic number.
1845    const MAGIC_OFFSET: usize = 0xff6;
1846    let swap_magic = b"SWAPSPACE2";
1847    let mut buffer = VecOutputBuffer::new(swap_magic.len());
1848    if file.read_at(locked, current_task, MAGIC_OFFSET, &mut buffer)? != swap_magic.len()
1849        || buffer.data() != swap_magic
1850    {
1851        return error!(EINVAL);
1852    }
1853
1854    let mut swap_files = current_task.kernel().swap_files.lock(locked);
1855    for swap_node in swap_files.iter() {
1856        if Arc::ptr_eq(swap_node, node) {
1857            return error!(EBUSY);
1858        }
1859    }
1860    if swap_files.len() >= MAX_SWAPFILES {
1861        return error!(EPERM);
1862    }
1863    swap_files.push(node.clone());
1864    Ok(())
1865}
1866
1867pub fn sys_swapoff(
1868    locked: &mut Locked<Unlocked>,
1869    current_task: &CurrentTask,
1870    user_path: UserCString,
1871) -> Result<(), Errno> {
1872    security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1873
1874    let path = current_task.read_path(user_path)?;
1875    let file = current_task.open_file(locked, path.as_ref(), OpenFlags::RDWR)?;
1876    let node = file.node();
1877
1878    let mut swap_files = current_task.kernel().swap_files.lock(locked);
1879    let original_length = swap_files.len();
1880    swap_files.retain(|swap_node| !Arc::ptr_eq(swap_node, node));
1881    if swap_files.len() == original_length {
1882        return error!(EINVAL);
1883    }
1884    Ok(())
1885}
1886
1887#[derive(Default, Debug, IntoBytes, KnownLayout, FromBytes, Immutable)]
1888#[repr(C)]
1889struct KcmpParams {
1890    mask: usize,
1891    shuffle: usize,
1892}
1893
1894static KCMP_PARAMS: LazyLock<KcmpParams> = LazyLock::new(|| {
1895    let mut params = KcmpParams::default();
1896    zx::cprng_draw(params.as_mut_bytes());
1897    // Ensure the shuffle is odd so that multiplying a usize by this value is a permutation.
1898    params.shuffle |= 1;
1899    params
1900});
1901
1902fn obfuscate_value(value: usize) -> usize {
1903    let KcmpParams { mask, shuffle } = *KCMP_PARAMS;
1904    (value ^ mask).wrapping_mul(shuffle)
1905}
1906
1907fn obfuscate_ptr<T>(ptr: *const T) -> usize {
1908    obfuscate_value(ptr as usize)
1909}
1910
1911fn obfuscate_arc<T>(arc: &Arc<T>) -> usize {
1912    obfuscate_ptr(Arc::as_ptr(arc))
1913}
1914
1915pub fn sys_kcmp(
1916    locked: &mut Locked<Unlocked>,
1917    current_task: &CurrentTask,
1918    pid1: pid_t,
1919    pid2: pid_t,
1920    resource_type: u32,
1921    index1: u64,
1922    index2: u64,
1923) -> Result<u32, Errno> {
1924    let weak1 = current_task.get_task(pid1);
1925    let weak2 = current_task.get_task(pid2);
1926    let task1 = Task::from_weak(&weak1)?;
1927    let task2 = Task::from_weak(&weak2)?;
1928
1929    current_task.check_ptrace_access_mode(locked, PTRACE_MODE_READ_REALCREDS, &task1)?;
1930    current_task.check_ptrace_access_mode(locked, PTRACE_MODE_READ_REALCREDS, &task2)?;
1931
1932    let resource_type = KcmpResource::from_raw(resource_type)?;
1933
1934    // Output encoding (see <https://man7.org/linux/man-pages/man2/kcmp.2.html>):
1935    //
1936    //   0  v1 is equal to v2; in other words, the two processes share the resource.
1937    //   1  v1 is less than v2.
1938    //   2  v1 is greater than v2.
1939    //   3  v1 is not equal to v2, but ordering information is unavailable.
1940    //
1941    fn encode_ordering(value: cmp::Ordering) -> u32 {
1942        match value {
1943            cmp::Ordering::Equal => 0,
1944            cmp::Ordering::Less => 1,
1945            cmp::Ordering::Greater => 2,
1946        }
1947    }
1948
1949    match resource_type {
1950        KcmpResource::FILE => {
1951            fn get_file(task: &Task, index: u64) -> Result<FileHandle, Errno> {
1952                // TODO: Test whether O_PATH is allowed here. Conceptually, seems like
1953                //       O_PATH should be allowed, but we haven't tested it yet.
1954                task.files.get_allowing_opath(FdNumber::from_raw(
1955                    index.try_into().map_err(|_| errno!(EBADF))?,
1956                ))
1957            }
1958            let file1 = get_file(&task1, index1)?;
1959            let file2 = get_file(&task2, index2)?;
1960            Ok(encode_ordering(obfuscate_arc(&file1).cmp(&obfuscate_arc(&file2))))
1961        }
1962        KcmpResource::FILES => Ok(encode_ordering(
1963            obfuscate_value(task1.files.id().raw()).cmp(&obfuscate_value(task2.files.id().raw())),
1964        )),
1965        KcmpResource::FS => {
1966            Ok(encode_ordering(obfuscate_arc(&task1.fs()).cmp(&obfuscate_arc(&task2.fs()))))
1967        }
1968        KcmpResource::SIGHAND => Ok(encode_ordering(
1969            obfuscate_arc(&task1.thread_group().signal_actions)
1970                .cmp(&obfuscate_arc(&task2.thread_group().signal_actions)),
1971        )),
1972        KcmpResource::VM => {
1973            Ok(encode_ordering(obfuscate_arc(&task1.mm()?).cmp(&obfuscate_arc(&task2.mm()?))))
1974        }
1975        _ => error!(EINVAL),
1976    }
1977}
1978
1979pub fn sys_syslog(
1980    locked: &mut Locked<Unlocked>,
1981    current_task: &CurrentTask,
1982    action_type: i32,
1983    address: UserAddress,
1984    length: i32,
1985) -> Result<i32, Errno> {
1986    let action = SyslogAction::try_from(action_type)?;
1987    let syslog =
1988        current_task.kernel().syslog.access(&current_task, SyslogAccess::Syscall(action))?;
1989    match action {
1990        SyslogAction::Read => {
1991            if address.is_null() || length < 0 {
1992                return error!(EINVAL);
1993            }
1994            let mut output_buffer =
1995                UserBuffersOutputBuffer::unified_new_at(current_task, address, length as usize)?;
1996            syslog.blocking_read(locked, current_task, &mut output_buffer)
1997        }
1998        SyslogAction::ReadAll => {
1999            if address.is_null() || length < 0 {
2000                return error!(EINVAL);
2001            }
2002            let mut output_buffer =
2003                UserBuffersOutputBuffer::unified_new_at(current_task, address, length as usize)?;
2004            syslog.read_all(current_task, &mut output_buffer)
2005        }
2006        SyslogAction::SizeUnread => syslog.size_unread(),
2007        SyslogAction::SizeBuffer => syslog.size_buffer(),
2008        SyslogAction::Close | SyslogAction::Open => Ok(0),
2009        SyslogAction::ReadClear => {
2010            track_stub!(TODO("https://fxbug.dev/322894145"), "syslog: read clear");
2011            Ok(0)
2012        }
2013        SyslogAction::Clear => {
2014            track_stub!(TODO("https://fxbug.dev/322893673"), "syslog: clear");
2015            Ok(0)
2016        }
2017        SyslogAction::ConsoleOff => {
2018            track_stub!(TODO("https://fxbug.dev/322894399"), "syslog: console off");
2019            Ok(0)
2020        }
2021        SyslogAction::ConsoleOn => {
2022            track_stub!(TODO("https://fxbug.dev/322894106"), "syslog: console on");
2023            Ok(0)
2024        }
2025        SyslogAction::ConsoleLevel => {
2026            if length <= 0 || length >= 8 {
2027                return error!(EINVAL);
2028            }
2029            track_stub!(TODO("https://fxbug.dev/322894199"), "syslog: console level");
2030            Ok(0)
2031        }
2032    }
2033}
2034
2035pub fn sys_vhangup(
2036    _locked: &mut Locked<Unlocked>,
2037    current_task: &CurrentTask,
2038) -> Result<(), Errno> {
2039    security::check_task_capable(current_task, CAP_SYS_TTY_CONFIG)?;
2040    track_stub!(TODO("https://fxbug.dev/324079257"), "vhangup");
2041    Ok(())
2042}
2043
2044// Syscalls for arch32 usage
2045#[cfg(target_arch = "aarch64")]
2046mod arch32 {
2047    pub use super::{
2048        sys_execve as sys_arch32_execve, sys_getegid as sys_arch32_getegid32,
2049        sys_geteuid as sys_arch32_geteuid32, sys_getgid as sys_arch32_getgid32,
2050        sys_getgroups as sys_arch32_getgroups32, sys_getpgid as sys_arch32_getpgid,
2051        sys_getppid as sys_arch32_getppid, sys_getpriority as sys_arch32_getpriority,
2052        sys_getresgid as sys_arch32_getresgid32, sys_getresuid as sys_arch32_getresuid32,
2053        sys_getrlimit as sys_arch32_ugetrlimit, sys_getrusage as sys_arch32_getrusage,
2054        sys_getuid as sys_arch32_getuid32, sys_ioprio_set as sys_arch32_ioprio_set,
2055        sys_ptrace as sys_arch32_ptrace, sys_quotactl as sys_arch32_quotactl,
2056        sys_sched_get_priority_max as sys_arch32_sched_get_priority_max,
2057        sys_sched_get_priority_min as sys_arch32_sched_get_priority_min,
2058        sys_sched_getaffinity as sys_arch32_sched_getaffinity,
2059        sys_sched_getparam as sys_arch32_sched_getparam,
2060        sys_sched_setaffinity as sys_arch32_sched_setaffinity,
2061        sys_sched_setparam as sys_arch32_sched_setparam,
2062        sys_sched_setscheduler as sys_arch32_sched_setscheduler, sys_seccomp as sys_arch32_seccomp,
2063        sys_setfsuid as sys_arch32_setfsuid, sys_setfsuid as sys_arch32_setfsuid32,
2064        sys_setgid as sys_arch32_setgid32, sys_setgroups as sys_arch32_setgroups32,
2065        sys_setns as sys_arch32_setns, sys_setpgid as sys_arch32_setpgid,
2066        sys_setpriority as sys_arch32_setpriority, sys_setregid as sys_arch32_setregid32,
2067        sys_setresgid as sys_arch32_setresgid32, sys_setresuid as sys_arch32_setresuid32,
2068        sys_setreuid as sys_arch32_setreuid32, sys_setreuid as sys_arch32_setreuid,
2069        sys_setrlimit as sys_arch32_setrlimit, sys_setsid as sys_arch32_setsid,
2070        sys_syslog as sys_arch32_syslog, sys_unshare as sys_arch32_unshare,
2071    };
2072}
2073
2074#[cfg(target_arch = "aarch64")]
2075pub use arch32::*;
2076
2077#[cfg(test)]
2078mod tests {
2079    use super::*;
2080    use crate::mm::syscalls::sys_munmap;
2081    use crate::testing::{AutoReleasableTask, map_memory, spawn_kernel_and_run};
2082    use starnix_syscalls::SUCCESS;
2083    use starnix_task_command::TaskCommand;
2084    use starnix_uapi::auth::Credentials;
2085    use starnix_uapi::{SCHED_FIFO, SCHED_NORMAL};
2086    use std::ffi::CString;
2087
2088    #[::fuchsia::test]
2089    async fn test_prctl_set_vma_anon_name() {
2090        spawn_kernel_and_run(async |locked, current_task| {
2091            let mapped_address =
2092                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2093            let name_addr = (mapped_address + 128u64).unwrap();
2094            let name = "test-name\0";
2095            current_task.write_memory(name_addr, name.as_bytes()).expect("failed to write name");
2096            sys_prctl(
2097                locked,
2098                current_task,
2099                PR_SET_VMA,
2100                PR_SET_VMA_ANON_NAME as u64,
2101                mapped_address.ptr() as u64,
2102                32,
2103                name_addr.ptr() as u64,
2104            )
2105            .expect("failed to set name");
2106            assert_eq!(
2107                "test-name",
2108                current_task
2109                    .mm()
2110                    .unwrap()
2111                    .get_mapping_name((mapped_address + 24u64).unwrap())
2112                    .expect("failed to get address")
2113                    .unwrap()
2114                    .to_string(),
2115            );
2116
2117            sys_munmap(locked, &current_task, mapped_address, *PAGE_SIZE as usize)
2118                .expect("failed to unmap memory");
2119            assert_eq!(
2120                error!(EFAULT),
2121                current_task.mm().unwrap().get_mapping_name((mapped_address + 24u64).unwrap())
2122            );
2123        })
2124        .await;
2125    }
2126
2127    #[::fuchsia::test]
2128    async fn test_set_vma_name_special_chars() {
2129        spawn_kernel_and_run(async |locked, current_task| {
2130            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2131
2132            let mapping_addr =
2133                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2134
2135            for c in 1..255 {
2136                let vma_name = CString::new([c]).unwrap();
2137                current_task.write_memory(name_addr, vma_name.as_bytes_with_nul()).unwrap();
2138
2139                let result = sys_prctl(
2140                    locked,
2141                    current_task,
2142                    PR_SET_VMA,
2143                    PR_SET_VMA_ANON_NAME as u64,
2144                    mapping_addr.ptr() as u64,
2145                    *PAGE_SIZE,
2146                    name_addr.ptr() as u64,
2147                );
2148
2149                if c > 0x1f
2150                    && c < 0x7f
2151                    && c != b'\\'
2152                    && c != b'`'
2153                    && c != b'$'
2154                    && c != b'['
2155                    && c != b']'
2156                {
2157                    assert_eq!(result, Ok(SUCCESS));
2158                } else {
2159                    assert_eq!(result, error!(EINVAL));
2160                }
2161            }
2162        })
2163        .await;
2164    }
2165
2166    #[::fuchsia::test]
2167    async fn test_set_vma_name_long() {
2168        spawn_kernel_and_run(async |locked, current_task| {
2169            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2170
2171            let mapping_addr =
2172                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2173
2174            let name_too_long = CString::new(vec![b'a'; 256]).unwrap();
2175
2176            current_task.write_memory(name_addr, name_too_long.as_bytes_with_nul()).unwrap();
2177
2178            assert_eq!(
2179                sys_prctl(
2180                    locked,
2181                    current_task,
2182                    PR_SET_VMA,
2183                    PR_SET_VMA_ANON_NAME as u64,
2184                    mapping_addr.ptr() as u64,
2185                    *PAGE_SIZE,
2186                    name_addr.ptr() as u64,
2187                ),
2188                error!(EINVAL)
2189            );
2190
2191            let name_just_long_enough = CString::new(vec![b'a'; 255]).unwrap();
2192
2193            current_task
2194                .write_memory(name_addr, name_just_long_enough.as_bytes_with_nul())
2195                .unwrap();
2196
2197            assert_eq!(
2198                sys_prctl(
2199                    locked,
2200                    current_task,
2201                    PR_SET_VMA,
2202                    PR_SET_VMA_ANON_NAME as u64,
2203                    mapping_addr.ptr() as u64,
2204                    *PAGE_SIZE,
2205                    name_addr.ptr() as u64,
2206                ),
2207                Ok(SUCCESS)
2208            );
2209        })
2210        .await;
2211    }
2212
2213    #[::fuchsia::test]
2214    async fn test_set_vma_name_misaligned() {
2215        spawn_kernel_and_run(async |locked, current_task| {
2216            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2217
2218            let mapping_addr =
2219                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2220
2221            let name = CString::new("name").unwrap();
2222            current_task.write_memory(name_addr, name.as_bytes_with_nul()).unwrap();
2223
2224            // Passing a misaligned pointer to the start of the named region fails.
2225            assert_eq!(
2226                sys_prctl(
2227                    locked,
2228                    current_task,
2229                    PR_SET_VMA,
2230                    PR_SET_VMA_ANON_NAME as u64,
2231                    1 + mapping_addr.ptr() as u64,
2232                    *PAGE_SIZE - 1,
2233                    name_addr.ptr() as u64,
2234                ),
2235                error!(EINVAL)
2236            );
2237
2238            // Passing an unaligned length does work, however.
2239            assert_eq!(
2240                sys_prctl(
2241                    locked,
2242                    current_task,
2243                    PR_SET_VMA,
2244                    PR_SET_VMA_ANON_NAME as u64,
2245                    mapping_addr.ptr() as u64,
2246                    *PAGE_SIZE - 1,
2247                    name_addr.ptr() as u64,
2248                ),
2249                Ok(SUCCESS)
2250            );
2251        })
2252        .await;
2253    }
2254
2255    #[::fuchsia::test]
2256    async fn test_prctl_get_set_dumpable() {
2257        spawn_kernel_and_run(async |locked, current_task| {
2258            sys_prctl(locked, current_task, PR_GET_DUMPABLE, 0, 0, 0, 0)
2259                .expect("failed to get dumpable");
2260
2261            sys_prctl(locked, current_task, PR_SET_DUMPABLE, 1, 0, 0, 0)
2262                .expect("failed to set dumpable");
2263            sys_prctl(locked, current_task, PR_GET_DUMPABLE, 0, 0, 0, 0)
2264                .expect("failed to get dumpable");
2265
2266            // SUID_DUMP_ROOT not supported.
2267            sys_prctl(locked, current_task, PR_SET_DUMPABLE, 2, 0, 0, 0)
2268                .expect("failed to set dumpable");
2269            sys_prctl(locked, current_task, PR_GET_DUMPABLE, 0, 0, 0, 0)
2270                .expect("failed to get dumpable");
2271        })
2272        .await;
2273    }
2274
2275    #[::fuchsia::test]
2276    async fn test_sys_getsid() {
2277        spawn_kernel_and_run(async |locked, current_task| {
2278            let kernel = current_task.kernel();
2279            assert_eq!(
2280                current_task.get_tid(),
2281                sys_getsid(locked, &current_task, 0).expect("failed to get sid")
2282            );
2283
2284            let second_task = crate::execution::create_init_child_process(
2285                locked,
2286                &kernel.weak_self.upgrade().unwrap(),
2287                TaskCommand::new(b"second task"),
2288                Some(&CString::new("#kernel").unwrap()),
2289            )
2290            .expect("failed to create second task");
2291            second_task
2292                .mm()
2293                .unwrap()
2294                .initialize_mmap_layout_for_test(starnix_types::arch::ArchWidth::Arch64);
2295            let second_current = AutoReleasableTask::from(second_task);
2296
2297            assert_eq!(
2298                second_current.get_tid(),
2299                sys_getsid(locked, &current_task, second_current.get_tid())
2300                    .expect("failed to get sid")
2301            );
2302        })
2303        .await;
2304    }
2305
2306    #[::fuchsia::test]
2307    async fn test_get_affinity_size() {
2308        spawn_kernel_and_run(async |locked, current_task| {
2309            let mapped_address =
2310                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2311            let pid = current_task.get_pid();
2312            assert_eq!(
2313                sys_sched_getaffinity(locked, &current_task, pid, 16, mapped_address),
2314                Ok(16)
2315            );
2316            assert_eq!(
2317                sys_sched_getaffinity(locked, &current_task, pid, 1024, mapped_address),
2318                Ok(std::mem::size_of::<CpuSet>())
2319            );
2320            assert_eq!(
2321                sys_sched_getaffinity(locked, &current_task, pid, 1, mapped_address),
2322                error!(EINVAL)
2323            );
2324            assert_eq!(
2325                sys_sched_getaffinity(locked, &current_task, pid, 9, mapped_address),
2326                error!(EINVAL)
2327            );
2328        })
2329        .await;
2330    }
2331
2332    #[::fuchsia::test]
2333    async fn test_set_affinity_size() {
2334        spawn_kernel_and_run(async |locked, current_task| {
2335            let mapped_address =
2336                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2337            current_task.write_memory(mapped_address, &[0xffu8]).expect("failed to cpumask");
2338            let pid = current_task.get_pid();
2339            assert_eq!(
2340                sys_sched_setaffinity(
2341                    locked,
2342                    &current_task,
2343                    pid,
2344                    *PAGE_SIZE as u32,
2345                    mapped_address
2346                ),
2347                Ok(())
2348            );
2349            assert_eq!(
2350                sys_sched_setaffinity(locked, &current_task, pid, 1, mapped_address),
2351                error!(EINVAL)
2352            );
2353        })
2354        .await;
2355    }
2356
2357    #[::fuchsia::test]
2358    async fn test_task_name() {
2359        spawn_kernel_and_run(async |locked, current_task| {
2360            let mapped_address =
2361                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2362            let name = "my-task-name\0";
2363            current_task
2364                .write_memory(mapped_address, name.as_bytes())
2365                .expect("failed to write name");
2366
2367            let result =
2368                sys_prctl(locked, current_task, PR_SET_NAME, mapped_address.ptr() as u64, 0, 0, 0)
2369                    .unwrap();
2370            assert_eq!(SUCCESS, result);
2371
2372            let mapped_address =
2373                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2374            let result =
2375                sys_prctl(locked, current_task, PR_GET_NAME, mapped_address.ptr() as u64, 0, 0, 0)
2376                    .unwrap();
2377            assert_eq!(SUCCESS, result);
2378
2379            let name_length = name.len();
2380
2381            let out_name = current_task.read_memory_to_vec(mapped_address, name_length).unwrap();
2382            assert_eq!(name.as_bytes(), &out_name);
2383        })
2384        .await;
2385    }
2386
2387    #[::fuchsia::test]
2388    async fn test_sched_get_priority_min_max() {
2389        spawn_kernel_and_run(async |locked, current_task| {
2390            let non_rt_min =
2391                sys_sched_get_priority_min(locked, &current_task, SCHED_NORMAL).unwrap();
2392            assert_eq!(non_rt_min, 0);
2393            let non_rt_max =
2394                sys_sched_get_priority_max(locked, &current_task, SCHED_NORMAL).unwrap();
2395            assert_eq!(non_rt_max, 0);
2396
2397            let rt_min = sys_sched_get_priority_min(locked, &current_task, SCHED_FIFO).unwrap();
2398            assert_eq!(rt_min, 1);
2399            let rt_max = sys_sched_get_priority_max(locked, &current_task, SCHED_FIFO).unwrap();
2400            assert_eq!(rt_max, 99);
2401
2402            let min_bad_policy_error =
2403                sys_sched_get_priority_min(locked, &current_task, std::u32::MAX).unwrap_err();
2404            assert_eq!(min_bad_policy_error, errno!(EINVAL));
2405
2406            let max_bad_policy_error =
2407                sys_sched_get_priority_max(locked, &current_task, std::u32::MAX).unwrap_err();
2408            assert_eq!(max_bad_policy_error, errno!(EINVAL));
2409        })
2410        .await;
2411    }
2412
2413    #[::fuchsia::test]
2414    async fn test_sched_setscheduler() {
2415        spawn_kernel_and_run(async |locked, current_task| {
2416            current_task
2417                .thread_group()
2418                .limits
2419                .lock(locked)
2420                .set(Resource::RTPRIO, rlimit { rlim_cur: 255, rlim_max: 255 });
2421
2422            let scheduler = sys_sched_getscheduler(locked, &current_task, 0).unwrap();
2423            assert_eq!(scheduler, SCHED_NORMAL, "tasks should have normal scheduler by default");
2424
2425            let mapped_address =
2426                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2427            let requested_params = sched_param { sched_priority: 15 };
2428            current_task.write_object(mapped_address.into(), &requested_params).unwrap();
2429
2430            sys_sched_setscheduler(locked, &current_task, 0, SCHED_FIFO, mapped_address.into())
2431                .unwrap();
2432
2433            let new_scheduler = sys_sched_getscheduler(locked, &current_task, 0).unwrap();
2434            assert_eq!(new_scheduler, SCHED_FIFO, "task should have been assigned fifo scheduler");
2435
2436            let mapped_address =
2437                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2438            sys_sched_getparam(locked, &current_task, 0, mapped_address.into())
2439                .expect("sched_getparam");
2440            let param_value: sched_param =
2441                current_task.read_object(mapped_address.into()).expect("read_object");
2442            assert_eq!(param_value.sched_priority, 15);
2443        })
2444        .await;
2445    }
2446
2447    #[::fuchsia::test]
2448    async fn test_sched_getparam() {
2449        spawn_kernel_and_run(async |locked, current_task| {
2450            let mapped_address =
2451                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2452            sys_sched_getparam(locked, &current_task, 0, mapped_address.into())
2453                .expect("sched_getparam");
2454            let param_value: sched_param =
2455                current_task.read_object(mapped_address.into()).expect("read_object");
2456            assert_eq!(param_value.sched_priority, 0);
2457        })
2458        .await;
2459    }
2460
2461    #[::fuchsia::test]
2462    async fn test_setuid() {
2463        spawn_kernel_and_run(async |locked, current_task| {
2464            // Test for root.
2465            current_task.set_creds(Credentials::clone(&Credentials::root()));
2466            sys_setuid(locked, &current_task, 42).expect("setuid");
2467            let mut creds = Credentials::clone(&current_task.current_creds());
2468            assert_eq!(creds.euid, 42);
2469            assert_eq!(creds.uid, 42);
2470            assert_eq!(creds.saved_uid, 42);
2471
2472            // Remove the CAP_SETUID capability to avoid overwriting permission checks.
2473            creds.cap_effective.remove(CAP_SETUID);
2474            current_task.set_creds(creds);
2475
2476            // Test for non root, which task now is.
2477            assert_eq!(sys_setuid(locked, &current_task, 0), error!(EPERM));
2478            assert_eq!(sys_setuid(locked, &current_task, 43), error!(EPERM));
2479
2480            sys_setuid(locked, &current_task, 42).expect("setuid");
2481            let creds = current_task.clone_creds();
2482            assert_eq!(creds.euid, 42);
2483            assert_eq!(creds.uid, 42);
2484            assert_eq!(creds.saved_uid, 42);
2485
2486            // Change uid and saved_uid, and check that one can set the euid to these.
2487            let mut creds = Credentials::clone(&current_task.current_creds());
2488            creds.uid = 41;
2489            creds.euid = 42;
2490            creds.saved_uid = 43;
2491            current_task.set_creds(creds);
2492
2493            sys_setuid(locked, &current_task, 41).expect("setuid");
2494            let creds = current_task.clone_creds();
2495            assert_eq!(creds.euid, 41);
2496            assert_eq!(creds.uid, 41);
2497            assert_eq!(creds.saved_uid, 43);
2498
2499            let mut creds = Credentials::clone(&current_task.current_creds());
2500            creds.uid = 41;
2501            creds.euid = 42;
2502            creds.saved_uid = 43;
2503            current_task.set_creds(creds);
2504
2505            sys_setuid(locked, &current_task, 43).expect("setuid");
2506            let creds = current_task.clone_creds();
2507            assert_eq!(creds.euid, 43);
2508            assert_eq!(creds.uid, 41);
2509            assert_eq!(creds.saved_uid, 43);
2510        })
2511        .await;
2512    }
2513
2514    #[::fuchsia::test]
2515    async fn test_read_c_string_vector() {
2516        spawn_kernel_and_run(async |locked, current_task| {
2517            let arg_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2518            let arg = b"test-arg\0";
2519            current_task.write_memory(arg_addr, arg).expect("failed to write test arg");
2520            let arg_usercstr = UserCString::new(current_task, arg_addr);
2521            let null_usercstr = UserCString::null(current_task);
2522
2523            let argv_addr = UserCStringPtr::new(
2524                current_task,
2525                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE),
2526            );
2527            current_task
2528                .write_multi_arch_ptr(argv_addr.addr(), arg_usercstr)
2529                .expect("failed to write UserCString");
2530            current_task
2531                .write_multi_arch_ptr(argv_addr.next().unwrap().addr(), null_usercstr)
2532                .expect("failed to write UserCString");
2533
2534            // The arguments size limit should include the null terminator.
2535            assert!(read_c_string_vector(&current_task, argv_addr, 100, arg.len()).is_ok());
2536            assert_eq!(
2537                read_c_string_vector(
2538                    &current_task,
2539                    argv_addr,
2540                    100,
2541                    std::str::from_utf8(arg).unwrap().trim_matches('\0').len()
2542                ),
2543                error!(E2BIG)
2544            );
2545        })
2546        .await;
2547    }
2548}