Skip to main content

starnix_modules_iouring/
syscalls.rs

1// Copyright 2026 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::io_uring::{IORING_MAX_ENTRIES, IoUringFileObject};
6use starnix_core::mm::{IOVecPtr, MemoryAccessorExt};
7use starnix_core::security;
8use starnix_core::task::CurrentTask;
9use starnix_core::vfs::{FdFlags, FdNumber};
10use starnix_logging::track_stub;
11use starnix_sync::{Locked, Unlocked};
12use starnix_syscalls::{SUCCESS, SyscallResult};
13use starnix_uapi::auth::CAP_SYS_ADMIN;
14use starnix_uapi::errors::Errno;
15use starnix_uapi::signals::SigSet;
16use starnix_uapi::user_address::{UserAddress, UserRef};
17use starnix_uapi::user_value::UserValue;
18use starnix_uapi::{
19    errno, error, io_uring_params,
20    io_uring_register_op_IORING_REGISTER_BUFFERS as IORING_REGISTER_BUFFERS,
21    io_uring_register_op_IORING_REGISTER_IOWQ_MAX_WORKERS as IORING_REGISTER_IOWQ_MAX_WORKERS,
22    io_uring_register_op_IORING_REGISTER_PBUF_RING as IORING_REGISTER_PBUF_RING,
23    io_uring_register_op_IORING_REGISTER_PBUF_STATUS as IORING_REGISTER_PBUF_STATUS,
24    io_uring_register_op_IORING_REGISTER_PERSONALITY as IORING_REGISTER_PERSONALITY,
25    io_uring_register_op_IORING_REGISTER_RING_FDS as IORING_REGISTER_RING_FDS,
26    io_uring_register_op_IORING_UNREGISTER_BUFFERS as IORING_UNREGISTER_BUFFERS,
27    io_uring_register_op_IORING_UNREGISTER_PBUF_RING as IORING_UNREGISTER_PBUF_RING,
28    io_uring_register_op_IORING_UNREGISTER_RING_FDS as IORING_UNREGISTER_RING_FDS, uapi,
29};
30use std::sync::atomic;
31
32pub fn sys_io_uring_setup(
33    locked: &mut Locked<Unlocked>,
34    current_task: &CurrentTask,
35    user_entries: UserValue<u32>,
36    user_params: UserRef<io_uring_params>,
37) -> Result<FdNumber, Errno> {
38    // TODO: https://fxbug.dev/397186254 - we will want to do a no-audit CAP_IPC_LOCK capability
39    // check; see "If not granted CAP_IPC_LOCK io_uring operations are accounted against the user's
40    // RLIMIT_MEMLOCK limit" at
41    // https://github.com/SELinuxProject/selinux-notebook/blob/main/src/auditing.md#capability-audit-exemptions
42
43    if !current_task.kernel().features.io_uring {
44        return error!(ENOSYS);
45    }
46
47    // Apply policy from /proc/sys/kernel/io_uring_disabled
48    let limits = &current_task.kernel().system_limits;
49    match limits.io_uring_disabled.load(atomic::Ordering::Relaxed) {
50        0 => (),
51        1 => {
52            let io_uring_group = limits.io_uring_group.load(atomic::Ordering::Relaxed).try_into();
53            if io_uring_group.is_err()
54                || !current_task.current_creds().is_in_group(io_uring_group.unwrap())
55            {
56                security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
57            }
58        }
59        _ => {
60            return error!(EPERM);
61        }
62    }
63
64    let entries = user_entries.validate(1..IORING_MAX_ENTRIES).ok_or_else(|| errno!(EINVAL))?;
65
66    let mut params = current_task.read_object(user_params)?;
67    for byte in params.resv {
68        if byte != 0 {
69            return error!(EINVAL);
70        }
71    }
72
73    let file = IoUringFileObject::new_file(locked, current_task, entries, &mut params)?;
74
75    // io_uring file descriptors are always created with CLOEXEC.
76    let fd = current_task.add_file(locked, file, FdFlags::CLOEXEC)?;
77    current_task.write_object(user_params, &params)?;
78    Ok(fd)
79}
80
81pub fn sys_io_uring_enter(
82    locked: &mut Locked<Unlocked>,
83    current_task: &CurrentTask,
84    fd: FdNumber,
85    to_submit: u32,
86    min_complete: u32,
87    flags: u32,
88    _sig: UserRef<SigSet>,
89    sigset_size: usize,
90) -> Result<u32, Errno> {
91    if !current_task.kernel().features.io_uring {
92        return error!(ENOSYS);
93    }
94    if !_sig.is_null() {
95        if sigset_size != std::mem::size_of::<SigSet>() {
96            return error!(EINVAL);
97        }
98    }
99    let file = current_task.get_file(fd)?;
100    let io_uring = file.downcast_file::<IoUringFileObject>().ok_or_else(|| errno!(EOPNOTSUPP))?;
101    // TODO(https://fxbug.dev/297431387): Use `_sig` to change the signal mask for `current_task`.
102    io_uring.enter(locked, current_task, to_submit, min_complete, flags)
103}
104
105pub fn sys_io_uring_register(
106    locked: &mut Locked<Unlocked>,
107    current_task: &CurrentTask,
108    fd: FdNumber,
109    opcode: u32,
110    arg: UserAddress,
111    nr_args: UserValue<u32>,
112) -> Result<SyscallResult, Errno> {
113    if !current_task.kernel().features.io_uring {
114        return error!(ENOSYS);
115    }
116    let file = current_task.get_file(fd)?;
117    let io_uring = file.downcast_file::<IoUringFileObject>().ok_or_else(|| errno!(EOPNOTSUPP))?;
118    match opcode {
119        IORING_REGISTER_BUFFERS => {
120            // TODO(https://fxbug.dev/297431387): Check nr_args for zero and return EINVAL here.
121            let iovec = IOVecPtr::new(current_task, arg);
122            let buffers = current_task.read_iovec(iovec, nr_args)?;
123            io_uring.register_buffers(locked, buffers);
124            return Ok(SUCCESS);
125        }
126        IORING_UNREGISTER_BUFFERS => {
127            if !arg.is_null() {
128                return error!(EINVAL);
129            }
130            io_uring.unregister_buffers(locked);
131            return Ok(SUCCESS);
132        }
133        IORING_REGISTER_IOWQ_MAX_WORKERS => {
134            track_stub!(
135                TODO("https://fxbug.dev/297431387"),
136                "io_uring_register IORING_REGISTER_IOWQ_MAX_WORKERS",
137                opcode
138            );
139            // The current implementation only ever use 1 worker for read and 1 for write.
140            return Ok(SUCCESS);
141        }
142        IORING_REGISTER_RING_FDS => {
143            track_stub!(
144                TODO("https://fxbug.dev/297431387"),
145                "io_uring_register IORING_REGISTER_RING_FDS",
146                opcode
147            );
148            // The current implementation doesn't use any thread local specific identifier for
149            // performance. Instead, when registering a fd, just return the passed fd as the value
150            // to use.
151            let nr_args: usize = nr_args.raw().try_into().map_err(|_| errno!(EINVAL))?;
152            if nr_args > 16 {
153                return error!(EINVAL);
154            }
155            let updates_addr = UserRef::<uapi::io_uring_rsrc_update>::from(arg);
156            let mut updates = current_task
157                .read_objects_to_smallvec::<uapi::io_uring_rsrc_update, 1>(updates_addr, nr_args)?;
158            let mut result = 0;
159            for update in updates.iter_mut() {
160                if update.offset == u32::MAX {
161                    update.offset = update.data.try_into().map_err(|_| errno!(EINVAL))?;
162                    result += 1;
163                }
164            }
165            current_task.write_objects(updates_addr, &updates)?;
166            return Ok(result.into());
167        }
168        IORING_UNREGISTER_RING_FDS => {
169            track_stub!(
170                TODO("https://fxbug.dev/297431387"),
171                "io_uring_register IORING_UNREGISTER_RING_FDS",
172                opcode
173            );
174            // Because registering a fd doesn't use any resource currently, unregistering is free.
175            return Ok(SUCCESS);
176        }
177        IORING_REGISTER_PBUF_RING => {
178            let nr_args: usize = nr_args.raw().try_into().map_err(|_| errno!(EINVAL))?;
179            if nr_args != 1 {
180                return error!(EINVAL);
181            }
182            let buffer_definition: uapi::io_uring_buf_reg = current_task.read_object(arg.into())?;
183            io_uring.register_ring_buffers(locked, buffer_definition)?;
184            return Ok(SUCCESS);
185        }
186
187        IORING_UNREGISTER_PBUF_RING => {
188            let nr_args: usize = nr_args.raw().try_into().map_err(|_| errno!(EINVAL))?;
189            if nr_args != 1 {
190                return error!(EINVAL);
191            }
192            let buffer_definition: uapi::io_uring_buf_reg = current_task.read_object(arg.into())?;
193            io_uring.unregister_ring_buffers(locked, buffer_definition)?;
194            return Ok(SUCCESS);
195        }
196
197        IORING_REGISTER_PBUF_STATUS => {
198            let nr_args: usize = nr_args.raw().try_into().map_err(|_| errno!(EINVAL))?;
199            if nr_args != 1 {
200                return error!(EINVAL);
201            }
202            let buffer_status_addr = UserRef::<uapi::io_uring_buf_status>::from(arg);
203            let mut buffer_status: uapi::io_uring_buf_status =
204                current_task.read_object(buffer_status_addr)?;
205            io_uring.ring_buffer_status(locked, &mut buffer_status)?;
206            current_task.write_object(buffer_status_addr, &buffer_status)?;
207            return Ok(SUCCESS);
208        }
209        IORING_REGISTER_PERSONALITY => {
210            // TODO(https://fxbug.dev/505326006) If registering personality is implemented,
211            // then implement the uring_override_creds security hook.
212            track_stub!(
213                TODO("https://fxbug.dev/297431387"),
214                "io_uring_register unknown op",
215                opcode
216            );
217            return error!(EINVAL);
218        }
219        _ => {
220            track_stub!(
221                TODO("https://fxbug.dev/297431387"),
222                "io_uring_register unknown op",
223                opcode
224            );
225            return error!(EINVAL);
226        }
227    }
228}
229
230pub use sys_io_uring_enter as sys_arch32_io_uring_enter;
231pub use sys_io_uring_register as sys_arch32_io_uring_register;
232pub use sys_io_uring_setup as sys_arch32_io_uring_setup;