starnix_core/arch/x64/
syscalls.rs

1// Copyright 2023 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use starnix_sync::{InterruptibleEvent, Locked, Unlocked, WakeReason};
6
7use crate::mm::MemoryAccessorExt;
8use crate::signals::RunState;
9use crate::signals::syscalls::sys_signalfd4;
10use crate::task::CurrentTask;
11use crate::task::syscalls::do_clone;
12use crate::time::utc;
13use crate::vfs::syscalls::{
14    poll, sys_dup3, sys_epoll_create1, sys_epoll_pwait, sys_eventfd2, sys_faccessat, sys_fchmodat,
15    sys_fchownat, sys_inotify_init1, sys_linkat, sys_mkdirat, sys_mknodat, sys_newfstatat,
16    sys_openat, sys_pipe2, sys_readlinkat, sys_renameat2, sys_symlinkat, sys_unlinkat,
17};
18use crate::vfs::{DirentSink32, FdNumber};
19use starnix_logging::track_stub;
20use starnix_types::time::{
21    duration_from_poll_timeout, duration_from_timeval, timeval_from_duration,
22};
23use starnix_uapi::device_type::DeviceType;
24use starnix_uapi::errors::{Errno, ErrnoResultExt};
25use starnix_uapi::file_mode::FileMode;
26use starnix_uapi::open_flags::OpenFlags;
27use starnix_uapi::signals::{SIGCHLD, SigSet};
28use starnix_uapi::user_address::{UserAddress, UserCString, UserRef};
29use starnix_uapi::vfs::EpollEvent;
30use starnix_uapi::{
31    __kernel_time_t, ARCH_SET_FS, ARCH_SET_GS, AT_REMOVEDIR, AT_SYMLINK_NOFOLLOW, CLONE_VFORK,
32    CLONE_VM, CSIGNAL, ITIMER_REAL, clone_args, errno, error, gid_t, itimerval, pid_t, pollfd,
33    tid_t, uapi, uid_t,
34};
35
36pub fn sys_access(
37    locked: &mut Locked<Unlocked>,
38    current_task: &CurrentTask,
39    user_path: UserCString,
40    mode: u32,
41) -> Result<(), Errno> {
42    sys_faccessat(locked, current_task, FdNumber::AT_FDCWD, user_path, mode)
43}
44
45pub fn sys_alarm(
46    _locked: &mut Locked<Unlocked>,
47    current_task: &CurrentTask,
48    duration: u32,
49) -> Result<u32, Errno> {
50    let duration = zx::MonotonicDuration::from_seconds(duration.into());
51    let new_value = timeval_from_duration(duration);
52    let old_value = current_task.thread_group().set_itimer(
53        current_task,
54        ITIMER_REAL,
55        itimerval { it_value: new_value, it_interval: Default::default() },
56    )?;
57
58    let remaining = duration_from_timeval(old_value.it_value)?;
59
60    let old_value_seconds = remaining.into_seconds();
61    if old_value_seconds == 0 && remaining != zx::MonotonicDuration::default() {
62        // We can't return a zero value if the alarm was scheduled even if it had
63        // less than one second remaining. Return 1 instead.
64        return Ok(1);
65    }
66    old_value_seconds.try_into().map_err(|_| errno!(EDOM))
67}
68
69pub fn sys_arch_prctl(
70    _locked: &mut Locked<Unlocked>,
71    current_task: &mut CurrentTask,
72    code: u32,
73    addr: UserAddress,
74) -> Result<(), Errno> {
75    match code {
76        ARCH_SET_FS => {
77            current_task.thread_state.registers.fs_base = addr.ptr() as u64;
78            Ok(())
79        }
80        ARCH_SET_GS => {
81            current_task.thread_state.registers.gs_base = addr.ptr() as u64;
82            Ok(())
83        }
84        _ => {
85            track_stub!(TODO("https://fxbug.dev/322874054"), "arch_prctl", code);
86            error!(ENOSYS)
87        }
88    }
89}
90
91pub fn sys_chmod(
92    locked: &mut Locked<Unlocked>,
93    current_task: &CurrentTask,
94    user_path: UserCString,
95    mode: FileMode,
96) -> Result<(), Errno> {
97    sys_fchmodat(locked, current_task, FdNumber::AT_FDCWD, user_path, mode)
98}
99
100pub fn sys_chown(
101    locked: &mut Locked<Unlocked>,
102    current_task: &CurrentTask,
103    user_path: UserCString,
104    owner: uid_t,
105    group: gid_t,
106) -> Result<(), Errno> {
107    sys_fchownat(locked, current_task, FdNumber::AT_FDCWD, user_path, owner, group, 0)
108}
109
110/// The parameter order for `clone` varies by architecture.
111pub fn sys_clone(
112    locked: &mut Locked<Unlocked>,
113    current_task: &mut CurrentTask,
114    flags: u64,
115    user_stack: UserAddress,
116    user_parent_tid: UserRef<tid_t>,
117    user_child_tid: UserRef<tid_t>,
118    user_tls: UserAddress,
119) -> Result<tid_t, Errno> {
120    // Our flags parameter uses the low 8 bits (CSIGNAL mask) of flags to indicate the exit
121    // signal. The CloneArgs struct separates these as `flags` and `exit_signal`.
122    do_clone(
123        locked,
124        current_task,
125        &clone_args {
126            flags: flags & !(CSIGNAL as u64),
127            child_tid: user_child_tid.addr().ptr() as u64,
128            parent_tid: user_parent_tid.addr().ptr() as u64,
129            pidfd: user_parent_tid.addr().ptr() as u64,
130            exit_signal: flags & (CSIGNAL as u64),
131            stack: user_stack.ptr() as u64,
132            tls: user_tls.ptr() as u64,
133            ..Default::default()
134        },
135    )
136}
137
138pub fn sys_fork(
139    locked: &mut Locked<Unlocked>,
140    current_task: &mut CurrentTask,
141) -> Result<tid_t, Errno> {
142    do_clone(
143        locked,
144        current_task,
145        &clone_args { exit_signal: uapi::SIGCHLD.into(), ..Default::default() },
146    )
147}
148
149// https://pubs.opengroup.org/onlinepubs/9699919799/functions/creat.html
150pub fn sys_creat(
151    locked: &mut Locked<Unlocked>,
152    current_task: &CurrentTask,
153    user_path: UserCString,
154    mode: FileMode,
155) -> Result<FdNumber, Errno> {
156    sys_open(
157        locked,
158        current_task,
159        user_path,
160        (OpenFlags::WRONLY | OpenFlags::CREAT | OpenFlags::TRUNC).bits(),
161        mode,
162    )
163}
164
165pub fn sys_dup2(
166    locked: &mut Locked<Unlocked>,
167    current_task: &CurrentTask,
168    oldfd: FdNumber,
169    newfd: FdNumber,
170) -> Result<FdNumber, Errno> {
171    if oldfd == newfd {
172        // O_PATH allowed for:
173        //
174        //  Duplicating the file descriptor (dup(2), fcntl(2)
175        //  F_DUPFD, etc.).
176        //
177        // See https://man7.org/linux/man-pages/man2/open.2.html
178        current_task.files.get_allowing_opath(oldfd)?;
179        return Ok(newfd);
180    }
181    sys_dup3(locked, current_task, oldfd, newfd, 0)
182}
183
184pub fn sys_epoll_create(
185    locked: &mut Locked<Unlocked>,
186    current_task: &CurrentTask,
187    size: i32,
188) -> Result<FdNumber, Errno> {
189    if size < 1 {
190        // The man page for epoll_create says the size was used in a previous implementation as
191        // a hint but no longer does anything. But it's still required to be >= 1 to ensure
192        // programs are backwards-compatible.
193        return error!(EINVAL);
194    }
195    sys_epoll_create1(locked, current_task, 0)
196}
197
198pub fn sys_epoll_wait(
199    locked: &mut Locked<Unlocked>,
200    current_task: &mut CurrentTask,
201    epfd: FdNumber,
202    events: UserRef<EpollEvent>,
203    max_events: i32,
204    timeout: i32,
205) -> Result<usize, Errno> {
206    sys_epoll_pwait(
207        locked,
208        current_task,
209        epfd,
210        events,
211        max_events,
212        timeout,
213        UserRef::<SigSet>::default(),
214    )
215}
216
217pub fn sys_eventfd(
218    locked: &mut Locked<Unlocked>,
219    current_task: &CurrentTask,
220    value: u32,
221) -> Result<FdNumber, Errno> {
222    sys_eventfd2(locked, current_task, value, 0)
223}
224
225pub fn sys_getdents(
226    locked: &mut Locked<Unlocked>,
227    current_task: &CurrentTask,
228    fd: FdNumber,
229    user_buffer: UserAddress,
230    user_capacity: usize,
231) -> Result<usize, Errno> {
232    let file = current_task.files.get(fd)?;
233    let mut offset = file.offset.lock();
234    let mut sink = DirentSink32::new(current_task, &mut offset, user_buffer, user_capacity);
235    let result = file.readdir(locked, current_task, &mut sink);
236    sink.map_result_with_actual(result)
237}
238
239pub fn sys_getpgrp(
240    _locked: &mut Locked<Unlocked>,
241    current_task: &CurrentTask,
242) -> Result<pid_t, Errno> {
243    Ok(current_task.thread_group().read().process_group.leader)
244}
245
246pub fn sys_inotify_init(
247    locked: &mut Locked<Unlocked>,
248    current_task: &CurrentTask,
249) -> Result<FdNumber, Errno> {
250    sys_inotify_init1(locked, current_task, 0)
251}
252
253pub fn sys_lchown(
254    locked: &mut Locked<Unlocked>,
255    current_task: &CurrentTask,
256    user_path: UserCString,
257    owner: uid_t,
258    group: gid_t,
259) -> Result<(), Errno> {
260    sys_fchownat(
261        locked,
262        current_task,
263        FdNumber::AT_FDCWD,
264        user_path,
265        owner,
266        group,
267        AT_SYMLINK_NOFOLLOW,
268    )
269}
270
271pub fn sys_link(
272    locked: &mut Locked<Unlocked>,
273    current_task: &CurrentTask,
274    old_user_path: UserCString,
275    new_user_path: UserCString,
276) -> Result<(), Errno> {
277    sys_linkat(
278        locked,
279        current_task,
280        FdNumber::AT_FDCWD,
281        old_user_path,
282        FdNumber::AT_FDCWD,
283        new_user_path,
284        0,
285    )
286}
287
288pub fn sys_lstat(
289    locked: &mut Locked<Unlocked>,
290    current_task: &CurrentTask,
291    user_path: UserCString,
292    buffer: UserRef<uapi::stat>,
293) -> Result<(), Errno> {
294    // TODO(https://fxbug.dev/42172993): Add the `AT_NO_AUTOMOUNT` flag once it is supported in
295    // `sys_newfstatat`.
296    sys_newfstatat(
297        locked,
298        current_task,
299        FdNumber::AT_FDCWD,
300        user_path,
301        buffer.into(),
302        AT_SYMLINK_NOFOLLOW,
303    )
304}
305
306pub fn sys_mkdir(
307    locked: &mut Locked<Unlocked>,
308    current_task: &CurrentTask,
309    user_path: UserCString,
310    mode: FileMode,
311) -> Result<(), Errno> {
312    sys_mkdirat(locked, current_task, FdNumber::AT_FDCWD, user_path, mode)
313}
314
315pub fn sys_mknod(
316    locked: &mut Locked<Unlocked>,
317    current_task: &CurrentTask,
318    user_path: UserCString,
319    mode: FileMode,
320    dev: DeviceType,
321) -> Result<(), Errno> {
322    sys_mknodat(locked, current_task, FdNumber::AT_FDCWD, user_path, mode, dev)
323}
324
325pub fn sys_open(
326    locked: &mut Locked<Unlocked>,
327    current_task: &CurrentTask,
328    user_path: UserCString,
329    flags: u32,
330    mode: FileMode,
331) -> Result<FdNumber, Errno> {
332    sys_openat(locked, current_task, FdNumber::AT_FDCWD, user_path, flags, mode)
333}
334
335pub fn sys_pause(_locked: &mut Locked<Unlocked>, current_task: &CurrentTask) -> Result<(), Errno> {
336    let event = InterruptibleEvent::new();
337    let guard = event.begin_wait();
338    let result = current_task.run_in_state(RunState::Event(event.clone()), || {
339        match guard.block_until(None, zx::MonotonicInstant::INFINITE) {
340            Err(WakeReason::Interrupted) => error!(ERESTARTNOHAND),
341            Err(WakeReason::DeadlineExpired) => panic!("blocking forever cannot time out"),
342            Ok(()) => Ok(()),
343        }
344    });
345    result.map_eintr(|| errno!(ERESTARTNOHAND))
346}
347
348pub fn sys_pipe(
349    locked: &mut Locked<Unlocked>,
350    current_task: &CurrentTask,
351    user_pipe: UserRef<FdNumber>,
352) -> Result<(), Errno> {
353    sys_pipe2(locked, current_task, user_pipe, 0)
354}
355
356pub fn sys_poll(
357    locked: &mut Locked<Unlocked>,
358    current_task: &mut CurrentTask,
359    user_fds: UserRef<pollfd>,
360    num_fds: i32,
361    timeout: i32,
362) -> Result<usize, Errno> {
363    let deadline = zx::MonotonicInstant::after(duration_from_poll_timeout(timeout)?);
364    poll(locked, current_task, user_fds, num_fds, None, deadline)
365}
366
367pub fn sys_readlink(
368    locked: &mut Locked<Unlocked>,
369    current_task: &CurrentTask,
370    user_path: UserCString,
371    buffer: UserAddress,
372    buffer_size: usize,
373) -> Result<usize, Errno> {
374    sys_readlinkat(locked, current_task, FdNumber::AT_FDCWD, user_path, buffer, buffer_size)
375}
376
377pub fn sys_rmdir(
378    locked: &mut Locked<Unlocked>,
379    current_task: &CurrentTask,
380    user_path: UserCString,
381) -> Result<(), Errno> {
382    sys_unlinkat(locked, current_task, FdNumber::AT_FDCWD, user_path, AT_REMOVEDIR)
383}
384
385pub fn sys_rename(
386    locked: &mut Locked<Unlocked>,
387    current_task: &CurrentTask,
388    old_user_path: UserCString,
389    new_user_path: UserCString,
390) -> Result<(), Errno> {
391    sys_renameat2(
392        locked,
393        current_task,
394        FdNumber::AT_FDCWD,
395        old_user_path,
396        FdNumber::AT_FDCWD,
397        new_user_path,
398        0,
399    )
400}
401
402pub fn sys_renameat(
403    locked: &mut Locked<Unlocked>,
404    current_task: &CurrentTask,
405    old_dir_fd: FdNumber,
406    old_user_path: UserCString,
407    new_dir_fd: FdNumber,
408    new_user_path: UserCString,
409) -> Result<(), Errno> {
410    sys_renameat2(locked, current_task, old_dir_fd, old_user_path, new_dir_fd, new_user_path, 0)
411}
412
413pub fn sys_stat(
414    locked: &mut Locked<Unlocked>,
415    current_task: &CurrentTask,
416    user_path: UserCString,
417    buffer: UserRef<uapi::stat>,
418) -> Result<(), Errno> {
419    // TODO(https://fxbug.dev/42172993): Add the `AT_NO_AUTOMOUNT` flag once it is supported in
420    // `sys_newfstatat`.
421    sys_newfstatat(locked, current_task, FdNumber::AT_FDCWD, user_path, buffer.into(), 0)
422}
423
424// https://man7.org/linux/man-pages/man2/symlink.2.html
425pub fn sys_symlink(
426    locked: &mut Locked<Unlocked>,
427    current_task: &CurrentTask,
428    user_target: UserCString,
429    user_path: UserCString,
430) -> Result<(), Errno> {
431    sys_symlinkat(locked, current_task, user_target, FdNumber::AT_FDCWD, user_path)
432}
433
434pub fn sys_time(
435    _locked: &mut Locked<Unlocked>,
436    current_task: &CurrentTask,
437    time_addr: UserRef<__kernel_time_t>,
438) -> Result<__kernel_time_t, Errno> {
439    let time = (utc::utc_now().into_nanos() / zx::MonotonicDuration::from_seconds(1).into_nanos())
440        as __kernel_time_t;
441    if !time_addr.is_null() {
442        current_task.write_object(time_addr, &time)?;
443    }
444    Ok(time)
445}
446
447pub fn sys_unlink(
448    locked: &mut Locked<Unlocked>,
449    current_task: &CurrentTask,
450    user_path: UserCString,
451) -> Result<(), Errno> {
452    sys_unlinkat(locked, current_task, FdNumber::AT_FDCWD, user_path, 0)
453}
454
455pub fn sys_signalfd(
456    locked: &mut Locked<Unlocked>,
457    current_task: &CurrentTask,
458    fd: FdNumber,
459    mask_addr: UserRef<SigSet>,
460    mask_size: usize,
461) -> Result<FdNumber, Errno> {
462    sys_signalfd4(locked, current_task, fd, mask_addr, mask_size, 0)
463}
464
465pub fn sys_vfork(
466    locked: &mut Locked<Unlocked>,
467    current_task: &mut CurrentTask,
468) -> Result<tid_t, Errno> {
469    do_clone(
470        locked,
471        current_task,
472        &clone_args {
473            flags: (CLONE_VFORK | CLONE_VM) as u64,
474            exit_signal: SIGCHLD.number() as u64,
475            ..Default::default()
476        },
477    )
478}
479
480#[cfg(test)]
481mod tests {
482    use super::*;
483    use crate::mm::{MemoryAccessor, PAGE_SIZE};
484    use crate::testing::{map_memory, spawn_kernel_and_run, spawn_kernel_and_run_with_pkgfs};
485    use crate::vfs::FdFlags;
486
487    #[::fuchsia::test]
488    async fn test_sys_dup2() {
489        // Most tests are handled by test_sys_dup3, only test the case where both fds are equals.
490        spawn_kernel_and_run_with_pkgfs(async |locked, current_task| {
491            let fd = FdNumber::from_raw(42);
492            assert_eq!(sys_dup2(locked, current_task, fd, fd), error!(EBADF));
493            let file_handle = current_task
494                .open_file(locked, "data/testfile.txt".into(), OpenFlags::RDONLY)
495                .expect("open_file");
496            let fd = current_task.add_file(locked, file_handle, FdFlags::empty()).expect("add");
497            assert_eq!(sys_dup2(locked, current_task, fd, fd), Ok(fd));
498        })
499        .await;
500    }
501
502    #[::fuchsia::test]
503    async fn test_sys_creat() {
504        spawn_kernel_and_run(async |locked, current_task| {
505            let path_addr = map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
506            let path = "newfile.txt";
507            current_task.write_memory(path_addr, path.as_bytes()).unwrap();
508            let fd = sys_creat(
509                locked,
510                current_task,
511                UserCString::new(current_task, path_addr),
512                FileMode::default(),
513            )
514            .unwrap();
515            let _file_handle =
516                current_task.open_file(locked, path.into(), OpenFlags::RDONLY).unwrap();
517            assert!(
518                !current_task
519                    .files
520                    .get_fd_flags_allowing_opath(fd)
521                    .unwrap()
522                    .contains(FdFlags::CLOEXEC)
523            );
524        })
525        .await;
526    }
527
528    #[::fuchsia::test]
529    async fn test_time() {
530        spawn_kernel_and_run(async |locked, current_task| {
531            let time1 = sys_time(locked, &current_task, Default::default()).expect("time");
532            assert!(time1 > 0);
533            let address = map_memory(
534                locked,
535                &current_task,
536                UserAddress::default(),
537                std::mem::size_of::<__kernel_time_t>() as u64,
538            );
539            std::thread::sleep(std::time::Duration::from_secs(2));
540            let time2 = sys_time(locked, &current_task, address.into()).expect("time");
541            assert!(time2 >= time1 + 2);
542            assert!(time2 < time1 + 10);
543            let time3: __kernel_time_t =
544                current_task.read_object(address.into()).expect("read_object");
545            assert_eq!(time2, time3);
546        })
547        .await;
548    }
549}