starnix_core/mm/
syscalls.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::barrier::{BarrierType, system_barrier};
6use crate::mm::debugger::notify_debugger_of_module_list;
7use crate::mm::{
8    DesiredAddress, FutexKey, IOVecPtr, MappingName, MappingOptions, MembarrierType,
9    MemoryAccessorExt, MremapFlags, PAGE_SIZE, PrivateFutexKey, ProtectionFlags, SharedFutexKey,
10};
11use crate::security;
12use crate::syscalls::time::TimeSpecPtr;
13use crate::task::{CurrentTask, Task};
14use crate::time::TargetTime;
15use crate::time::utc::estimate_boot_deadline_from_utc;
16use crate::vfs::buffers::{OutputBuffer, UserBuffersInputBuffer, UserBuffersOutputBuffer};
17use crate::vfs::{FdFlags, FdNumber, UserFaultFile};
18use fuchsia_runtime::UtcTimeline;
19use linux_uapi::MLOCK_ONFAULT;
20use starnix_logging::{CATEGORY_STARNIX_MM, log_trace, trace_duration, track_stub};
21use starnix_sync::{FileOpsCore, LockEqualOrBefore, Locked, Unlocked};
22use starnix_syscalls::SyscallArg;
23use starnix_types::time::{duration_from_timespec, time_from_timespec, timespec_from_time};
24use starnix_uapi::auth::{CAP_SYS_PTRACE, PTRACE_MODE_ATTACH_REALCREDS};
25use starnix_uapi::errors::{EINTR, Errno};
26use starnix_uapi::open_flags::OpenFlags;
27use starnix_uapi::user_address::{UserAddress, UserRef};
28use starnix_uapi::user_value::UserValue;
29use starnix_uapi::{
30    FUTEX_BITSET_MATCH_ANY, FUTEX_CLOCK_REALTIME, FUTEX_CMD_MASK, FUTEX_CMP_REQUEUE,
31    FUTEX_CMP_REQUEUE_PI, FUTEX_LOCK_PI, FUTEX_LOCK_PI2, FUTEX_PRIVATE_FLAG, FUTEX_REQUEUE,
32    FUTEX_TRYLOCK_PI, FUTEX_UNLOCK_PI, FUTEX_WAIT, FUTEX_WAIT_BITSET, FUTEX_WAIT_REQUEUE_PI,
33    FUTEX_WAKE, FUTEX_WAKE_BITSET, FUTEX_WAKE_OP, MAP_ANONYMOUS, MAP_DENYWRITE, MAP_FIXED,
34    MAP_FIXED_NOREPLACE, MAP_GROWSDOWN, MAP_LOCKED, MAP_NORESERVE, MAP_POPULATE, MAP_PRIVATE,
35    MAP_SHARED, MAP_SHARED_VALIDATE, MAP_STACK, MS_INVALIDATE, O_CLOEXEC, O_NONBLOCK, PROT_EXEC,
36    UFFD_USER_MODE_ONLY, errno, error, robust_list_head, tid_t, uapi,
37};
38use std::ops::Deref as _;
39use zx;
40
41#[cfg(target_arch = "x86_64")]
42use starnix_uapi::MAP_32BIT;
43
44// Returns any platform-specific mmap flags. This is a separate function because as of this writing
45// "attributes on expressions are experimental."
46#[cfg(target_arch = "x86_64")]
47fn get_valid_platform_mmap_flags() -> u32 {
48    MAP_32BIT
49}
50#[cfg(not(target_arch = "x86_64"))]
51fn get_valid_platform_mmap_flags() -> u32 {
52    0
53}
54
55/// sys_mmap takes a mutable reference to current_task because it may modify the IP register.
56pub fn sys_mmap(
57    locked: &mut Locked<Unlocked>,
58    current_task: &mut CurrentTask,
59    addr: UserAddress,
60    length: usize,
61    prot: u32,
62    flags: u32,
63    fd: FdNumber,
64    offset: u64,
65) -> Result<UserAddress, Errno> {
66    let user_address = do_mmap(locked, current_task, addr, length, prot, flags, fd, offset)?;
67    if prot & PROT_EXEC != 0 {
68        // Possibly loads a new module. Notify debugger for the change.
69        // We only care about dynamic linker loading modules for now, which uses mmap. In the future
70        // we might want to support unloading modules in munmap or JIT compilation in mprotect.
71        notify_debugger_of_module_list(current_task)?;
72    }
73    Ok(user_address)
74}
75
76pub fn do_mmap<L>(
77    locked: &mut Locked<L>,
78    current_task: &CurrentTask,
79    addr: UserAddress,
80    length: usize,
81    prot: u32,
82    flags: u32,
83    fd: FdNumber,
84    offset: u64,
85) -> Result<UserAddress, Errno>
86where
87    L: LockEqualOrBefore<FileOpsCore>,
88{
89    let prot_flags = ProtectionFlags::from_access_bits(prot).ok_or_else(|| {
90        track_stub!(TODO("https://fxbug.dev/322874211"), "mmap parse protection", prot);
91        errno!(EINVAL)
92    })?;
93
94    let valid_flags: u32 = get_valid_platform_mmap_flags()
95        | MAP_PRIVATE
96        | MAP_SHARED
97        | MAP_SHARED_VALIDATE
98        | MAP_ANONYMOUS
99        | MAP_FIXED
100        | MAP_FIXED_NOREPLACE
101        | MAP_POPULATE
102        | MAP_NORESERVE
103        | MAP_STACK
104        | MAP_DENYWRITE
105        | MAP_GROWSDOWN
106        | MAP_LOCKED;
107    if flags & !valid_flags != 0 {
108        if flags & MAP_SHARED_VALIDATE != 0 {
109            return error!(EOPNOTSUPP);
110        }
111        track_stub!(TODO("https://fxbug.dev/322873638"), "mmap check flags", flags);
112        return error!(EINVAL);
113    }
114
115    let file = if flags & MAP_ANONYMOUS != 0 { None } else { Some(current_task.files.get(fd)?) };
116    if flags & (MAP_PRIVATE | MAP_SHARED) == 0
117        || flags & (MAP_PRIVATE | MAP_SHARED) == MAP_PRIVATE | MAP_SHARED
118    {
119        return error!(EINVAL);
120    }
121    if length == 0 {
122        return error!(EINVAL);
123    }
124    if offset % *PAGE_SIZE != 0 {
125        return error!(EINVAL);
126    }
127
128    // TODO(tbodt): should we consider MAP_NORESERVE?
129
130    let addr = match (addr, flags & MAP_FIXED != 0, flags & MAP_FIXED_NOREPLACE != 0) {
131        (UserAddress::NULL, false, false) => DesiredAddress::Any,
132        (UserAddress::NULL, true, _) | (UserAddress::NULL, _, true) => return error!(EINVAL),
133        (addr, false, false) => DesiredAddress::Hint(addr),
134        (addr, _, true) => DesiredAddress::Fixed(addr),
135        (addr, true, false) => DesiredAddress::FixedOverwrite(addr),
136    };
137
138    let memory_offset = if flags & MAP_ANONYMOUS != 0 { 0 } else { offset };
139
140    let mut options = MappingOptions::empty();
141    if flags & MAP_SHARED != 0 {
142        options |= MappingOptions::SHARED;
143    }
144    if flags & MAP_ANONYMOUS != 0 {
145        options |= MappingOptions::ANONYMOUS;
146    }
147    #[cfg(target_arch = "x86_64")]
148    if flags & MAP_FIXED == 0 && flags & MAP_32BIT != 0 {
149        options |= MappingOptions::LOWER_32BIT;
150    }
151    if flags & MAP_GROWSDOWN != 0 {
152        options |= MappingOptions::GROWSDOWN;
153    }
154    if flags & MAP_POPULATE != 0 {
155        options |= MappingOptions::POPULATE;
156    }
157    if flags & MAP_LOCKED != 0 {
158        // The kernel isn't expected to return an error if locking fails with this flag, so for now
159        // this implementation will always fail to lock memory even if mapping succeeds.
160        track_stub!(TODO("https://fxbug.dev/406377606"), "MAP_LOCKED");
161    }
162
163    security::mmap_file(current_task, file.as_ref(), prot_flags, options)?;
164
165    if flags & MAP_ANONYMOUS != 0 {
166        trace_duration!(CATEGORY_STARNIX_MM, "AnonymousMmap");
167        current_task.mm()?.map_anonymous(addr, length, prot_flags, options, MappingName::None)
168    } else {
169        trace_duration!(CATEGORY_STARNIX_MM, "FileBackedMmap");
170        // TODO(tbodt): maximize protection flags so that mprotect works
171        let file = file.expect("file retrieved above for file-backed mapping");
172        file.mmap(
173            locked,
174            current_task,
175            addr,
176            memory_offset,
177            length,
178            prot_flags,
179            options,
180            file.name.to_passive(),
181        )
182    }
183}
184
185pub fn sys_mprotect(
186    _locked: &mut Locked<Unlocked>,
187    current_task: &CurrentTask,
188    addr: UserAddress,
189    length: usize,
190    prot: u32,
191) -> Result<(), Errno> {
192    let prot_flags = ProtectionFlags::from_bits(prot).ok_or_else(|| {
193        track_stub!(TODO("https://fxbug.dev/322874672"), "mprotect parse protection", prot);
194        errno!(EINVAL)
195    })?;
196    current_task.mm()?.protect(current_task, addr, length, prot_flags)?;
197    Ok(())
198}
199
200pub fn sys_mremap(
201    _locked: &mut Locked<Unlocked>,
202    current_task: &CurrentTask,
203    addr: UserAddress,
204    old_length: usize,
205    new_length: usize,
206    flags: u32,
207    new_addr: UserAddress,
208) -> Result<UserAddress, Errno> {
209    let flags = MremapFlags::from_bits(flags).ok_or_else(|| errno!(EINVAL))?;
210    let addr =
211        current_task.mm()?.remap(current_task, addr, old_length, new_length, flags, new_addr)?;
212    Ok(addr)
213}
214
215pub fn sys_munmap(
216    _locked: &mut Locked<Unlocked>,
217    current_task: &CurrentTask,
218    addr: UserAddress,
219    length: usize,
220) -> Result<(), Errno> {
221    current_task.mm()?.unmap(addr, length)?;
222    Ok(())
223}
224
225pub fn sys_msync(
226    _locked: &mut Locked<Unlocked>,
227    current_task: &CurrentTask,
228    addr: UserAddress,
229    length: usize,
230    flags: u32,
231) -> Result<(), Errno> {
232    track_stub!(TODO("https://fxbug.dev/322874588"), "msync");
233
234    let mm = current_task.mm()?;
235
236    // Perform some basic validation of the address range given to satisfy gvisor tests that
237    // use msync as a way to probe whether a page is mapped or not.
238    mm.ensure_mapped(addr, length)?;
239
240    let addr_end = (addr + length).map_err(|_| errno!(ENOMEM))?;
241    if flags & MS_INVALIDATE != 0 && mm.state.read().num_locked_bytes(addr..addr_end) > 0 {
242        // gvisor mlock tests rely on returning EBUSY from msync on locked ranges.
243        return error!(EBUSY);
244    }
245
246    Ok(())
247}
248
249pub fn sys_madvise(
250    _locked: &mut Locked<Unlocked>,
251    current_task: &CurrentTask,
252    addr: UserAddress,
253    length: usize,
254    advice: u32,
255) -> Result<(), Errno> {
256    current_task.mm()?.madvise(current_task, addr, length, advice)?;
257    Ok(())
258}
259
260pub fn sys_process_madvise(
261    _locked: &mut Locked<Unlocked>,
262    _current_task: &CurrentTask,
263    _pidfd: FdNumber,
264    _iovec_addr: IOVecPtr,
265    _iovec_count: UserValue<i32>,
266    _advice: UserValue<i32>,
267    _flags: UserValue<u32>,
268) -> Result<usize, Errno> {
269    track_stub!(TODO("https://fxbug.dev/409060664"), "process_madvise");
270    error!(ENOSYS)
271}
272
273pub fn sys_brk(
274    locked: &mut Locked<Unlocked>,
275    current_task: &CurrentTask,
276    addr: UserAddress,
277) -> Result<UserAddress, Errno> {
278    current_task.mm()?.set_brk(locked, current_task, addr)
279}
280
281pub fn sys_process_vm_readv(
282    locked: &mut Locked<Unlocked>,
283    current_task: &CurrentTask,
284    tid: tid_t,
285    local_iov_addr: IOVecPtr,
286    local_iov_count: UserValue<i32>,
287    remote_iov_addr: IOVecPtr,
288    remote_iov_count: UserValue<i32>,
289    flags: usize,
290) -> Result<usize, Errno> {
291    if flags != 0 {
292        return error!(EINVAL);
293    }
294
295    // Source and destination are allowed to be of different length. It is valid to use a nullptr if
296    // the associated length is 0. Thus, if either source or destination length is 0 and nullptr,
297    // make sure to return Ok(0) before doing any other validation/operations.
298    if (local_iov_count == 0 && local_iov_addr.is_null())
299        || (remote_iov_count == 0 && remote_iov_addr.is_null())
300    {
301        return Ok(0);
302    }
303
304    let weak_remote_task = current_task.get_task(tid);
305    let remote_task = Task::from_weak(&weak_remote_task)?;
306
307    current_task.check_ptrace_access_mode(locked, PTRACE_MODE_ATTACH_REALCREDS, &remote_task)?;
308
309    let local_iov = current_task.read_iovec(local_iov_addr, local_iov_count)?;
310    let remote_iov = current_task.read_iovec(remote_iov_addr, remote_iov_count)?;
311    log_trace!(
312        "process_vm_readv(tid={}, local_iov={:?}, remote_iov={:?})",
313        tid,
314        local_iov,
315        remote_iov
316    );
317
318    track_stub!(TODO("https://fxbug.dev/322874765"), "process_vm_readv single-copy");
319    // According to the man page, this syscall was added to Linux specifically to
320    // avoid doing two copies like other IPC mechanisms require. We should avoid this too at some
321    // point.
322    let mut output = UserBuffersOutputBuffer::unified_new(current_task, local_iov)?;
323    let remote_mm = remote_task.mm().ok();
324    if current_task.has_same_address_space(remote_mm.as_ref()) {
325        let mut input = UserBuffersInputBuffer::unified_new(current_task, remote_iov)?;
326        output.write_buffer(&mut input)
327    } else {
328        let mut input = UserBuffersInputBuffer::syscall_new(remote_task.deref(), remote_iov)?;
329        output.write_buffer(&mut input)
330    }
331}
332
333pub fn sys_process_vm_writev(
334    locked: &mut Locked<Unlocked>,
335    current_task: &CurrentTask,
336    tid: tid_t,
337    local_iov_addr: IOVecPtr,
338    local_iov_count: UserValue<i32>,
339    remote_iov_addr: IOVecPtr,
340    remote_iov_count: UserValue<i32>,
341    flags: usize,
342) -> Result<usize, Errno> {
343    if flags != 0 {
344        return error!(EINVAL);
345    }
346
347    // Source and destination are allowed to be of different length. It is valid to use a nullptr if
348    // the associated length is 0. Thus, if either source or destination length is 0 and nullptr,
349    // make sure to return Ok(0) before doing any other validation/operations.
350    if (local_iov_count == 0 && local_iov_addr.is_null())
351        || (remote_iov_count == 0 && remote_iov_addr.is_null())
352    {
353        return Ok(0);
354    }
355
356    let weak_remote_task = current_task.get_task(tid);
357    let remote_task = Task::from_weak(&weak_remote_task)?;
358
359    current_task.check_ptrace_access_mode(locked, PTRACE_MODE_ATTACH_REALCREDS, &remote_task)?;
360
361    let local_iov = current_task.read_iovec(local_iov_addr, local_iov_count)?;
362    let remote_iov = current_task.read_iovec(remote_iov_addr, remote_iov_count)?;
363    log_trace!(
364        "sys_process_vm_writev(tid={}, local_iov={:?}, remote_iov={:?})",
365        tid,
366        local_iov,
367        remote_iov
368    );
369
370    track_stub!(TODO("https://fxbug.dev/322874339"), "process_vm_writev single-copy");
371    // NB: According to the man page, this syscall was added to Linux specifically to
372    // avoid doing two copies like other IPC mechanisms require. We should avoid this too at some
373    // point.
374    let mut input = UserBuffersInputBuffer::unified_new(current_task, local_iov)?;
375    let remote_mm = remote_task.mm().ok();
376    if current_task.has_same_address_space(remote_mm.as_ref()) {
377        let mut output = UserBuffersOutputBuffer::unified_new(current_task, remote_iov)?;
378        output.write_buffer(&mut input)
379    } else {
380        let mut output = UserBuffersOutputBuffer::syscall_new(remote_task.deref(), remote_iov)?;
381        output.write_buffer(&mut input)
382    }
383}
384
385pub fn sys_process_mrelease(
386    _locked: &mut Locked<Unlocked>,
387    current_task: &CurrentTask,
388    pidfd: FdNumber,
389    flags: u32,
390) -> Result<(), Errno> {
391    if flags != 0 {
392        return error!(EINVAL);
393    }
394    let file = current_task.files.get(pidfd)?;
395    let task = current_task.get_task(file.as_thread_group_key()?.pid());
396    let task = task.upgrade().ok_or_else(|| errno!(ESRCH))?;
397    if !task.load_stopped().is_stopped() {
398        return error!(EINVAL);
399    }
400
401    let mm = task.mm()?;
402    let mm_state = mm.state.write();
403    mm_state.mrelease()
404}
405
406pub fn sys_membarrier(
407    _locked: &mut Locked<Unlocked>,
408    current_task: &CurrentTask,
409    cmd: uapi::membarrier_cmd,
410    _flags: u32,
411    _cpu_id: i32,
412) -> Result<u32, Errno> {
413    match cmd {
414        // This command returns a bit mask of all supported commands.
415        // We support everything except for the RSEQ family.
416        uapi::membarrier_cmd_MEMBARRIER_CMD_QUERY => Ok(uapi::membarrier_cmd_MEMBARRIER_CMD_GLOBAL
417            | uapi::membarrier_cmd_MEMBARRIER_CMD_GLOBAL_EXPEDITED
418            | uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED
419            | uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED
420            | uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED
421            | uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE
422            | uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE),
423        // Global and global expedited barriers are treated identically. We don't track
424        // registration for global expedited barriers currently.
425        uapi::membarrier_cmd_MEMBARRIER_CMD_GLOBAL
426        | uapi::membarrier_cmd_MEMBARRIER_CMD_GLOBAL_EXPEDITED => {
427            system_barrier(BarrierType::DataMemory);
428            Ok(0)
429        }
430        // Global registration commands are ignored.
431        uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED => Ok(0),
432        uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED => {
433            // A private expedited barrier is only issued if the address space is registered
434            // for these barriers.
435            if current_task.mm()?.membarrier_private_expedited_registered(MembarrierType::Memory) {
436                // If a barrier is requested, issue a global barrier.
437                system_barrier(BarrierType::DataMemory);
438                Ok(0)
439            } else {
440                error!(EPERM)
441            }
442        }
443        // Private sync core barriers are treated as global instruction stream barriers.
444        uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE => {
445            if current_task.mm()?.membarrier_private_expedited_registered(MembarrierType::SyncCore)
446            {
447                system_barrier(BarrierType::InstructionStream);
448                Ok(0)
449            } else {
450                error!(EPERM)
451            }
452        }
453        uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED => {
454            let _ =
455                current_task.mm()?.register_membarrier_private_expedited(MembarrierType::Memory)?;
456            Ok(0)
457        }
458
459        uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE => {
460            let _ = current_task
461                .mm()?
462                .register_membarrier_private_expedited(MembarrierType::SyncCore)?;
463            Ok(0)
464        }
465        uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ => {
466            track_stub!(TODO("https://fxbug.dev/447158570"), "membarrier rseq");
467            error!(ENOSYS)
468        }
469        uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ => {
470            track_stub!(TODO("https://fxbug.dev/447158570"), "membarrier rseq");
471            error!(ENOSYS)
472        }
473        _ => error!(EINVAL),
474    }
475}
476
477pub fn sys_userfaultfd(
478    locked: &mut Locked<Unlocked>,
479    current_task: &CurrentTask,
480    raw_flags: u32,
481) -> Result<FdNumber, Errno> {
482    let unknown_flags = raw_flags & !(O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
483    if unknown_flags != 0 {
484        return error!(EINVAL, format!("unknown flags provided: {unknown_flags:x?}"));
485    }
486    let mut open_flags = OpenFlags::empty();
487    if raw_flags & O_NONBLOCK != 0 {
488        open_flags |= OpenFlags::NONBLOCK;
489    }
490    if raw_flags & O_CLOEXEC != 0 {
491        open_flags |= OpenFlags::CLOEXEC;
492    }
493
494    let fd_flags = if raw_flags & O_CLOEXEC != 0 {
495        FdFlags::CLOEXEC
496    } else {
497        track_stub!(TODO("https://fxbug.dev/297375964"), "userfaultfds that survive exec()");
498        return error!(ENOSYS);
499    };
500
501    let user_mode_only = raw_flags & UFFD_USER_MODE_ONLY == 0;
502    let uff_handle = UserFaultFile::new(locked, current_task, open_flags, user_mode_only)?;
503    current_task.add_file(locked, uff_handle, fd_flags)
504}
505
506pub fn sys_futex(
507    locked: &mut Locked<Unlocked>,
508    current_task: &mut CurrentTask,
509    addr: UserAddress,
510    op: u32,
511    value: u32,
512    timeout_or_value2: SyscallArg,
513    addr2: UserAddress,
514    value3: u32,
515) -> Result<usize, Errno> {
516    if op & FUTEX_PRIVATE_FLAG != 0 {
517        do_futex::<PrivateFutexKey>(
518            locked,
519            current_task,
520            addr,
521            op,
522            value,
523            timeout_or_value2,
524            addr2,
525            value3,
526        )
527    } else {
528        do_futex::<SharedFutexKey>(
529            locked,
530            current_task,
531            addr,
532            op,
533            value,
534            timeout_or_value2,
535            addr2,
536            value3,
537        )
538    }
539}
540
541fn do_futex<Key: FutexKey>(
542    locked: &mut Locked<Unlocked>,
543    current_task: &mut CurrentTask,
544    addr: UserAddress,
545    op: u32,
546    value: u32,
547    timeout_or_value2: SyscallArg,
548    addr2: UserAddress,
549    value3: u32,
550) -> Result<usize, Errno> {
551    let futexes = Key::get_table_from_task(current_task)?;
552    let cmd = op & (FUTEX_CMD_MASK as u32);
553
554    let is_realtime = match (cmd, op & FUTEX_CLOCK_REALTIME != 0) {
555        // This option bit can be employed only with the FUTEX_WAIT_BITSET, FUTEX_WAIT_REQUEUE_PI,
556        // (since Linux 4.5) FUTEX_WAIT, and (since Linux 5.14) FUTEX_LOCK_PI2 operations.
557        (FUTEX_WAIT_BITSET | FUTEX_WAIT_REQUEUE_PI | FUTEX_WAIT | FUTEX_LOCK_PI2, true) => true,
558        (_, true) => return error!(EINVAL),
559
560        // FUTEX_LOCK_PI always uses realtime.
561        (FUTEX_LOCK_PI, _) => true,
562
563        (_, false) => false,
564    };
565
566    // The timeout is interpreted differently by WAIT and WAIT_BITSET: WAIT takes a
567    // timeout and WAIT_BITSET takes a deadline.
568    let read_timespec = |current_task: &CurrentTask| {
569        let utime = TimeSpecPtr::new(current_task, timeout_or_value2);
570        if utime.is_null() {
571            Ok(timespec_from_time(zx::MonotonicInstant::INFINITE))
572        } else {
573            current_task.read_multi_arch_object(utime)
574        }
575    };
576    let read_timeout = |current_task: &CurrentTask| {
577        let timespec = read_timespec(current_task)?;
578        let timeout = duration_from_timespec(timespec);
579        let deadline = zx::MonotonicInstant::after(timeout?);
580        if is_realtime {
581            // Since this is a timeout, waiting on the monotonic timeline before it's paused is
582            // just as good as actually estimating UTC here.
583            track_stub!(TODO("https://fxbug.dev/356912301"), "FUTEX_CLOCK_REALTIME timeout");
584        }
585        Ok(deadline)
586    };
587    let read_deadline = |current_task: &CurrentTask| {
588        let timespec = read_timespec(current_task)?;
589        if is_realtime {
590            Ok(TargetTime::RealTime(time_from_timespec::<UtcTimeline>(timespec)?))
591        } else {
592            Ok(TargetTime::Monotonic(time_from_timespec::<zx::MonotonicTimeline>(timespec)?))
593        }
594    };
595
596    match cmd {
597        FUTEX_WAIT => {
598            let deadline = read_timeout(current_task)?;
599            let bitset = FUTEX_BITSET_MATCH_ANY;
600            do_futex_wait_with_restart::<Key>(
601                locked,
602                current_task,
603                addr,
604                value,
605                bitset,
606                TargetTime::Monotonic(deadline),
607            )?;
608            Ok(0)
609        }
610        FUTEX_WAKE => {
611            futexes.wake(locked, current_task, addr, value as usize, FUTEX_BITSET_MATCH_ANY)
612        }
613        FUTEX_WAKE_OP => {
614            track_stub!(TODO("https://fxbug.dev/361181940"), "FUTEX_WAKE_OP");
615            error!(ENOSYS)
616        }
617        FUTEX_WAIT_BITSET => {
618            if value3 == 0 {
619                return error!(EINVAL);
620            }
621            let deadline = read_deadline(current_task)?;
622            do_futex_wait_with_restart::<Key>(locked, current_task, addr, value, value3, deadline)?;
623            Ok(0)
624        }
625        FUTEX_WAKE_BITSET => {
626            if value3 == 0 {
627                return error!(EINVAL);
628            }
629            futexes.wake(locked, current_task, addr, value as usize, value3)
630        }
631        FUTEX_REQUEUE | FUTEX_CMP_REQUEUE => {
632            let wake_count = value as usize;
633            let requeue_count: usize = timeout_or_value2.into();
634            if wake_count > std::i32::MAX as usize || requeue_count > std::i32::MAX as usize {
635                return error!(EINVAL);
636            }
637            let expected_value = if cmd == FUTEX_CMP_REQUEUE { Some(value3) } else { None };
638            futexes.requeue(
639                locked,
640                current_task,
641                addr,
642                wake_count,
643                requeue_count,
644                addr2,
645                expected_value,
646            )
647        }
648        FUTEX_WAIT_REQUEUE_PI => {
649            track_stub!(TODO("https://fxbug.dev/361181558"), "FUTEX_WAIT_REQUEUE_PI");
650            error!(ENOSYS)
651        }
652        FUTEX_CMP_REQUEUE_PI => {
653            track_stub!(TODO("https://fxbug.dev/361181773"), "FUTEX_CMP_REQUEUE_PI");
654            error!(ENOSYS)
655        }
656        FUTEX_LOCK_PI | FUTEX_LOCK_PI2 => {
657            futexes.lock_pi(locked, current_task, addr, read_timeout(current_task)?)?;
658            Ok(0)
659        }
660        FUTEX_TRYLOCK_PI => {
661            track_stub!(TODO("https://fxbug.dev/361175318"), "FUTEX_TRYLOCK_PI");
662            error!(ENOSYS)
663        }
664        FUTEX_UNLOCK_PI => {
665            futexes.unlock_pi(locked, current_task, addr)?;
666            Ok(0)
667        }
668        _ => {
669            track_stub!(TODO("https://fxbug.dev/322875124"), "futex unknown command", cmd);
670            error!(ENOSYS)
671        }
672    }
673}
674
675fn do_futex_wait_with_restart<Key: FutexKey>(
676    locked: &mut Locked<Unlocked>,
677    current_task: &mut CurrentTask,
678    addr: UserAddress,
679    value: u32,
680    mask: u32,
681    deadline: TargetTime,
682) -> Result<(), Errno> {
683    let futexes = Key::get_table_from_task(current_task)?;
684    let result = match deadline {
685        TargetTime::Monotonic(mono_deadline) => {
686            futexes.wait(locked, current_task, addr, value, mask, mono_deadline)
687        }
688        TargetTime::BootInstant(boot_deadline) => {
689            let timer_slack = current_task.read().get_timerslack();
690            futexes.wait_boot(locked, current_task, addr, value, mask, boot_deadline, timer_slack)
691        }
692        TargetTime::RealTime(utc_deadline) => {
693            // We convert real time deadlines to boot time deadlines since we cannot wait using a UTC deadline.
694            let (boot_deadline, _) = estimate_boot_deadline_from_utc(utc_deadline);
695            let timer_slack = current_task.read().get_timerslack();
696            futexes.wait_boot(locked, current_task, addr, value, mask, boot_deadline, timer_slack)
697        }
698    };
699    match result {
700        Err(err) if err == EINTR => {
701            current_task.set_syscall_restart_func(move |locked, current_task| {
702                do_futex_wait_with_restart::<Key>(locked, current_task, addr, value, mask, deadline)
703            });
704            error!(ERESTART_RESTARTBLOCK)
705        }
706        result => result,
707    }
708}
709
710pub fn sys_get_robust_list(
711    _locked: &mut Locked<Unlocked>,
712    current_task: &CurrentTask,
713    tid: tid_t,
714    user_head_ptr: UserRef<UserAddress>,
715    user_len_ptr: UserRef<usize>,
716) -> Result<(), Errno> {
717    if tid < 0 {
718        return error!(EINVAL);
719    }
720    if user_head_ptr.is_null() || user_len_ptr.is_null() {
721        return error!(EFAULT);
722    }
723    if tid != 0 {
724        security::check_task_capable(current_task, CAP_SYS_PTRACE)?;
725    }
726    let task = if tid == 0 { current_task.weak_task() } else { current_task.get_task(tid) };
727    let task = Task::from_weak(&task)?;
728    current_task.write_object(user_head_ptr, &task.read().robust_list_head.addr())?;
729    current_task.write_object(user_len_ptr, &std::mem::size_of::<robust_list_head>())?;
730    Ok(())
731}
732
733pub fn sys_set_robust_list(
734    _locked: &mut Locked<Unlocked>,
735    current_task: &CurrentTask,
736    user_head: UserRef<robust_list_head>,
737    len: usize,
738) -> Result<(), Errno> {
739    if len != std::mem::size_of::<robust_list_head>() {
740        return error!(EINVAL);
741    }
742    current_task.write().robust_list_head = user_head.into();
743    Ok(())
744}
745
746pub fn sys_mlock(
747    locked: &mut Locked<Unlocked>,
748    current_task: &CurrentTask,
749    addr: UserAddress,
750    length: usize,
751) -> Result<(), Errno> {
752    // If flags is 0, mlock2() behaves exactly the same as mlock().
753    sys_mlock2(locked, current_task, addr, length, 0)
754}
755
756pub fn sys_mlock2(
757    locked: &mut Locked<Unlocked>,
758    current_task: &CurrentTask,
759    addr: UserAddress,
760    length: usize,
761    flags: u64,
762) -> Result<(), Errno> {
763    const KNOWN_FLAGS: u64 = MLOCK_ONFAULT as u64;
764    if (flags & !KNOWN_FLAGS) != 0 {
765        return error!(EINVAL);
766    }
767    let on_fault = flags & MLOCK_ONFAULT as u64 != 0;
768    current_task.mm()?.mlock(current_task, locked, addr, length, on_fault)
769}
770
771pub fn sys_munlock(
772    _locked: &mut Locked<Unlocked>,
773    current_task: &CurrentTask,
774    addr: UserAddress,
775    length: usize,
776) -> Result<(), Errno> {
777    current_task.mm()?.munlock(current_task, addr, length)
778}
779
780pub fn sys_mlockall(
781    _locked: &mut Locked<Unlocked>,
782    _current_task: &CurrentTask,
783    _flags: u64,
784) -> Result<(), Errno> {
785    track_stub!(TODO("https://fxbug.dev/297292097"), "mlockall()");
786    error!(ENOSYS)
787}
788
789pub fn sys_munlockall(
790    _locked: &mut Locked<Unlocked>,
791    _current_task: &CurrentTask,
792    _flags: u64,
793) -> Result<(), Errno> {
794    track_stub!(TODO("https://fxbug.dev/297292097"), "munlockall()");
795    error!(ENOSYS)
796}
797
798pub fn sys_mincore(
799    _locked: &mut Locked<Unlocked>,
800    _current_task: &CurrentTask,
801    _addr: UserAddress,
802    _length: usize,
803    _out: UserRef<u8>,
804) -> Result<(), Errno> {
805    track_stub!(TODO("https://fxbug.dev/297372240"), "mincore()");
806    error!(ENOSYS)
807}
808
809// Syscalls for arch32 usage
810#[cfg(target_arch = "aarch64")]
811mod arch32 {
812    use crate::mm::PAGE_SIZE;
813    use crate::mm::syscalls::{UserAddress, sys_mmap};
814    use crate::task::{CurrentTask, RobustListHeadPtr};
815    use crate::vfs::FdNumber;
816    use starnix_sync::{Locked, Unlocked};
817    use starnix_uapi::errors::Errno;
818    use starnix_uapi::user_address::UserRef;
819    use starnix_uapi::{error, uapi};
820
821    pub fn sys_arch32_set_robust_list(
822        _locked: &mut Locked<Unlocked>,
823        current_task: &CurrentTask,
824        user_head: UserRef<uapi::arch32::robust_list_head>,
825        len: usize,
826    ) -> Result<(), Errno> {
827        if len != std::mem::size_of::<uapi::arch32::robust_list_head>() {
828            return error!(EINVAL);
829        }
830        current_task.write().robust_list_head = RobustListHeadPtr::from_32(user_head);
831        Ok(())
832    }
833
834    pub fn sys_arch32_mmap2(
835        locked: &mut Locked<Unlocked>,
836        current_task: &mut CurrentTask,
837        addr: UserAddress,
838        length: usize,
839        prot: u32,
840        flags: u32,
841        fd: FdNumber,
842        offset: u64,
843    ) -> Result<UserAddress, Errno> {
844        sys_mmap(locked, current_task, addr, length, prot, flags, fd, offset * *PAGE_SIZE)
845    }
846
847    pub fn sys_arch32_munmap(
848        _locked: &mut Locked<Unlocked>,
849        current_task: &CurrentTask,
850        addr: UserAddress,
851        length: usize,
852    ) -> Result<(), Errno> {
853        if !addr.is_lower_32bit() || length >= (1 << 32) {
854            return error!(EINVAL);
855        }
856        current_task.mm()?.unmap(addr, length)?;
857        Ok(())
858    }
859
860    pub use super::{
861        sys_futex as sys_arch32_futex, sys_madvise as sys_arch32_madvise,
862        sys_membarrier as sys_arch32_membarrier, sys_mincore as sys_arch32_mincore,
863        sys_mlock as sys_arch32_mlock, sys_mlock2 as sys_arch32_mlock2,
864        sys_mlockall as sys_arch32_mlockall, sys_mremap as sys_arch32_mremap,
865        sys_msync as sys_arch32_msync, sys_munlock as sys_arch32_munlock,
866        sys_munlockall as sys_arch32_munlockall,
867        sys_process_mrelease as sys_arch32_process_mrelease,
868        sys_process_vm_readv as sys_arch32_process_vm_readv,
869        sys_userfaultfd as sys_arch32_userfaultfd,
870    };
871}
872
873#[cfg(target_arch = "aarch64")]
874pub use arch32::*;
875
876#[cfg(test)]
877mod tests {
878    use super::*;
879    use crate::mm::memory::MemoryObject;
880    use crate::testing::*;
881    use starnix_uapi::errors::EEXIST;
882    use starnix_uapi::file_mode::Access;
883    use starnix_uapi::{MREMAP_FIXED, MREMAP_MAYMOVE, PROT_READ};
884
885    #[::fuchsia::test]
886    async fn test_mmap_with_colliding_hint() {
887        spawn_kernel_and_run(async |locked, current_task| {
888            let page_size = *PAGE_SIZE;
889
890            let mapped_address =
891                map_memory(locked, &current_task, UserAddress::default(), page_size);
892            match do_mmap(
893                locked,
894                &current_task,
895                mapped_address,
896                page_size as usize,
897                PROT_READ,
898                MAP_PRIVATE | MAP_ANONYMOUS,
899                FdNumber::from_raw(-1),
900                0,
901            ) {
902                Ok(address) => {
903                    assert_ne!(address, mapped_address);
904                }
905                error => {
906                    panic!("mmap with colliding hint failed: {error:?}");
907                }
908            }
909        })
910        .await;
911    }
912
913    #[::fuchsia::test]
914    async fn test_mmap_with_fixed_collision() {
915        spawn_kernel_and_run(async |locked, current_task| {
916            let page_size = *PAGE_SIZE;
917
918            let mapped_address =
919                map_memory(locked, &current_task, UserAddress::default(), page_size);
920            match do_mmap(
921                locked,
922                &current_task,
923                mapped_address,
924                page_size as usize,
925                PROT_READ,
926                MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
927                FdNumber::from_raw(-1),
928                0,
929            ) {
930                Ok(address) => {
931                    assert_eq!(address, mapped_address);
932                }
933                error => {
934                    panic!("mmap with fixed collision failed: {error:?}");
935                }
936            }
937        })
938        .await;
939    }
940
941    #[::fuchsia::test]
942    async fn test_mmap_with_fixed_noreplace_collision() {
943        spawn_kernel_and_run(async |locked, current_task| {
944            let page_size = *PAGE_SIZE;
945
946            let mapped_address =
947                map_memory(locked, &current_task, UserAddress::default(), page_size);
948            match do_mmap(
949                locked,
950                &current_task,
951                mapped_address,
952                page_size as usize,
953                PROT_READ,
954                MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE,
955                FdNumber::from_raw(-1),
956                0,
957            ) {
958                Err(errno) => {
959                    assert_eq!(errno, EEXIST);
960                }
961                result => {
962                    panic!("mmap with fixed_noreplace collision failed: {result:?}");
963                }
964            }
965        })
966        .await;
967    }
968
969    /// It is ok to call munmap with an address that is a multiple of the page size, and
970    /// a non-zero length.
971    #[::fuchsia::test]
972    async fn test_munmap() {
973        spawn_kernel_and_run(async |locked, current_task| {
974            let mapped_address =
975                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
976            assert_eq!(
977                sys_munmap(locked, &current_task, mapped_address, *PAGE_SIZE as usize),
978                Ok(())
979            );
980
981            // Verify that the memory is no longer readable.
982            assert_eq!(current_task.read_memory_to_array::<5>(mapped_address), error!(EFAULT));
983        })
984        .await;
985    }
986
987    /// It is ok to call munmap on an unmapped range.
988    #[::fuchsia::test]
989    async fn test_munmap_not_mapped() {
990        spawn_kernel_and_run(async |locked, current_task| {
991            let mapped_address =
992                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
993            assert_eq!(
994                sys_munmap(locked, &current_task, mapped_address, *PAGE_SIZE as usize),
995                Ok(())
996            );
997            assert_eq!(
998                sys_munmap(locked, &current_task, mapped_address, *PAGE_SIZE as usize),
999                Ok(())
1000            );
1001        })
1002        .await;
1003    }
1004
1005    /// It is an error to call munmap with a length of 0.
1006    #[::fuchsia::test]
1007    async fn test_munmap_0_length() {
1008        spawn_kernel_and_run(async |locked, current_task| {
1009            let mapped_address =
1010                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
1011            assert_eq!(sys_munmap(locked, &current_task, mapped_address, 0), error!(EINVAL));
1012        })
1013        .await;
1014    }
1015
1016    /// It is an error to call munmap with an address that is not a multiple of the page size.
1017    #[::fuchsia::test]
1018    async fn test_munmap_not_aligned() {
1019        spawn_kernel_and_run(async |locked, current_task| {
1020            let mapped_address =
1021                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
1022            assert_eq!(
1023                sys_munmap(
1024                    locked,
1025                    &current_task,
1026                    (mapped_address + 1u64).unwrap(),
1027                    *PAGE_SIZE as usize
1028                ),
1029                error!(EINVAL)
1030            );
1031
1032            // Verify that the memory is still readable.
1033            assert!(current_task.read_memory_to_array::<5>(mapped_address).is_ok());
1034        })
1035        .await;
1036    }
1037
1038    /// The entire page should be unmapped, not just the range [address, address + length).
1039    #[::fuchsia::test]
1040    async fn test_munmap_unmap_partial() {
1041        spawn_kernel_and_run(async |locked, current_task| {
1042            let mapped_address =
1043                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
1044            assert_eq!(
1045                sys_munmap(locked, &current_task, mapped_address, (*PAGE_SIZE as usize) / 2),
1046                Ok(())
1047            );
1048
1049            // Verify that memory can't be read in either half of the page.
1050            assert_eq!(current_task.read_memory_to_array::<5>(mapped_address), error!(EFAULT));
1051            assert_eq!(
1052                current_task
1053                    .read_memory_to_array::<5>((mapped_address + (*PAGE_SIZE - 2)).unwrap()),
1054                error!(EFAULT)
1055            );
1056        })
1057        .await;
1058    }
1059
1060    /// All pages that intersect the munmap range should be unmapped.
1061    #[::fuchsia::test]
1062    async fn test_munmap_multiple_pages() {
1063        spawn_kernel_and_run(async |locked, current_task| {
1064            let mapped_address =
1065                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
1066            assert_eq!(
1067                sys_munmap(locked, &current_task, mapped_address, (*PAGE_SIZE as usize) + 1),
1068                Ok(())
1069            );
1070
1071            // Verify that neither page is readable.
1072            assert_eq!(current_task.read_memory_to_array::<5>(mapped_address), error!(EFAULT));
1073            assert_eq!(
1074                current_task
1075                    .read_memory_to_array::<5>((mapped_address + (*PAGE_SIZE + 1u64)).unwrap()),
1076                error!(EFAULT)
1077            );
1078        })
1079        .await;
1080    }
1081
1082    /// Only the pages that intersect the munmap range should be unmapped.
1083    #[::fuchsia::test]
1084    async fn test_munmap_one_of_many_pages() {
1085        spawn_kernel_and_run(async |locked, current_task| {
1086            let mapped_address =
1087                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
1088            assert_eq!(
1089                sys_munmap(locked, &current_task, mapped_address, (*PAGE_SIZE as usize) - 1),
1090                Ok(())
1091            );
1092
1093            // Verify that the second page is still readable.
1094            assert_eq!(current_task.read_memory_to_array::<5>(mapped_address), error!(EFAULT));
1095            assert!(
1096                current_task
1097                    .read_memory_to_array::<5>((mapped_address + (*PAGE_SIZE + 1u64)).unwrap())
1098                    .is_ok()
1099            );
1100        })
1101        .await;
1102    }
1103
1104    /// Unmap the middle page of a mapping.
1105    #[::fuchsia::test]
1106    async fn test_munmap_middle_page() {
1107        spawn_kernel_and_run(async |locked, current_task| {
1108            let mapped_address =
1109                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1110            assert_eq!(
1111                sys_munmap(
1112                    locked,
1113                    &current_task,
1114                    (mapped_address + *PAGE_SIZE).unwrap(),
1115                    *PAGE_SIZE as usize
1116                ),
1117                Ok(())
1118            );
1119
1120            // Verify that the first and third pages are still readable.
1121            assert!(current_task.read_memory_to_vec(mapped_address, 5).is_ok());
1122            assert_eq!(
1123                current_task.read_memory_to_vec((mapped_address + *PAGE_SIZE).unwrap(), 5),
1124                error!(EFAULT)
1125            );
1126            assert!(
1127                current_task
1128                    .read_memory_to_vec((mapped_address + (*PAGE_SIZE * 2)).unwrap(), 5)
1129                    .is_ok()
1130            );
1131        })
1132        .await;
1133    }
1134
1135    /// Unmap a range of pages that includes disjoint mappings.
1136    #[::fuchsia::test]
1137    async fn test_munmap_many_mappings() {
1138        spawn_kernel_and_run(async |locked, current_task| {
1139            let mapped_addresses: Vec<_> = std::iter::repeat_with(|| {
1140                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE)
1141            })
1142            .take(3)
1143            .collect();
1144            let min_address = *mapped_addresses.iter().min().unwrap();
1145            let max_address = *mapped_addresses.iter().max().unwrap();
1146            let unmap_length = (max_address - min_address) + *PAGE_SIZE as usize;
1147
1148            assert_eq!(sys_munmap(locked, &current_task, min_address, unmap_length), Ok(()));
1149
1150            // Verify that none of the mapped pages are readable.
1151            for mapped_address in mapped_addresses {
1152                assert_eq!(current_task.read_memory_to_vec(mapped_address, 5), error!(EFAULT));
1153            }
1154        })
1155        .await;
1156    }
1157
1158    #[::fuchsia::test]
1159    async fn test_msync_validates_address_range() {
1160        spawn_kernel_and_run(async |locked, current_task| {
1161            // Map 3 pages and test that ranges covering these pages return no error.
1162            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1163            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 3, 0), Ok(()));
1164            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 2, 0), Ok(()));
1165            assert_eq!(
1166                sys_msync(
1167                    locked,
1168                    &current_task,
1169                    (addr + *PAGE_SIZE).unwrap(),
1170                    *PAGE_SIZE as usize * 2,
1171                    0
1172                ),
1173                Ok(())
1174            );
1175
1176            // Unmap the middle page and test that ranges covering that page return ENOMEM.
1177            sys_munmap(locked, &current_task, (addr + *PAGE_SIZE).unwrap(), *PAGE_SIZE as usize)
1178                .expect("unmap middle");
1179            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize, 0), Ok(()));
1180            assert_eq!(
1181                sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 3, 0),
1182                error!(ENOMEM)
1183            );
1184            assert_eq!(
1185                sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 2, 0),
1186                error!(ENOMEM)
1187            );
1188            assert_eq!(
1189                sys_msync(
1190                    locked,
1191                    &current_task,
1192                    (addr + *PAGE_SIZE).unwrap(),
1193                    *PAGE_SIZE as usize * 2,
1194                    0
1195                ),
1196                error!(ENOMEM)
1197            );
1198            assert_eq!(
1199                sys_msync(
1200                    locked,
1201                    &current_task,
1202                    (addr + (*PAGE_SIZE * 2)).unwrap(),
1203                    *PAGE_SIZE as usize,
1204                    0
1205                ),
1206                Ok(())
1207            );
1208
1209            // Map the middle page back and test that ranges covering the three pages
1210            // (spanning multiple ranges) return no error.
1211            assert_eq!(
1212                map_memory(locked, &current_task, (addr + *PAGE_SIZE).unwrap(), *PAGE_SIZE),
1213                (addr + *PAGE_SIZE).unwrap()
1214            );
1215            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 3, 0), Ok(()));
1216            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 2, 0), Ok(()));
1217            assert_eq!(
1218                sys_msync(
1219                    locked,
1220                    &current_task,
1221                    (addr + *PAGE_SIZE).unwrap(),
1222                    *PAGE_SIZE as usize * 2,
1223                    0
1224                ),
1225                Ok(())
1226            );
1227        })
1228        .await;
1229    }
1230
1231    /// Shrinks an entire range.
1232    #[::fuchsia::test]
1233    async fn test_mremap_shrink_whole_range_from_end() {
1234        spawn_kernel_and_run(async |locked, current_task| {
1235            // Map 2 pages.
1236            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
1237            fill_page(&current_task, addr, 'a');
1238            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1239
1240            // Shrink the mapping from 2 to 1 pages.
1241            assert_eq!(
1242                remap_memory(
1243                    locked,
1244                    &current_task,
1245                    addr,
1246                    *PAGE_SIZE * 2,
1247                    *PAGE_SIZE,
1248                    0,
1249                    UserAddress::default()
1250                ),
1251                Ok(addr)
1252            );
1253
1254            check_page_eq(&current_task, addr, 'a');
1255            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1256        })
1257        .await;
1258    }
1259
1260    /// Shrinks part of a range, introducing a hole in the middle.
1261    #[::fuchsia::test]
1262    async fn test_mremap_shrink_partial_range() {
1263        spawn_kernel_and_run(async |locked, current_task| {
1264            // Map 3 pages.
1265            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1266            fill_page(&current_task, addr, 'a');
1267            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1268            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1269
1270            // Shrink the first 2 pages down to 1, creating a hole.
1271            assert_eq!(
1272                remap_memory(
1273                    locked,
1274                    &current_task,
1275                    addr,
1276                    *PAGE_SIZE * 2,
1277                    *PAGE_SIZE,
1278                    0,
1279                    UserAddress::default()
1280                ),
1281                Ok(addr)
1282            );
1283
1284            check_page_eq(&current_task, addr, 'a');
1285            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1286            check_page_eq(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1287        })
1288        .await;
1289    }
1290
1291    /// Shrinking doesn't care if the range specified spans multiple mappings.
1292    #[::fuchsia::test]
1293    async fn test_mremap_shrink_across_ranges() {
1294        spawn_kernel_and_run(async |locked, current_task| {
1295            // Map 3 pages, unmap the middle, then map the middle again. This will leave us with
1296            // 3 contiguous mappings.
1297            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1298            assert_eq!(
1299                sys_munmap(
1300                    locked,
1301                    &current_task,
1302                    (addr + *PAGE_SIZE).unwrap(),
1303                    *PAGE_SIZE as usize
1304                ),
1305                Ok(())
1306            );
1307            assert_eq!(
1308                map_memory(locked, &current_task, (addr + *PAGE_SIZE).unwrap(), *PAGE_SIZE),
1309                (addr + *PAGE_SIZE).unwrap()
1310            );
1311
1312            fill_page(&current_task, addr, 'a');
1313            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1314            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1315
1316            // Remap over all three mappings, shrinking to 1 page.
1317            assert_eq!(
1318                remap_memory(
1319                    locked,
1320                    &current_task,
1321                    addr,
1322                    *PAGE_SIZE * 3,
1323                    *PAGE_SIZE,
1324                    0,
1325                    UserAddress::default()
1326                ),
1327                Ok(addr)
1328            );
1329
1330            check_page_eq(&current_task, addr, 'a');
1331            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1332            check_unmapped(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap());
1333        })
1334        .await;
1335    }
1336
1337    /// Grows a mapping in-place.
1338    #[::fuchsia::test]
1339    async fn test_mremap_grow_in_place() {
1340        spawn_kernel_and_run(async |locked, current_task| {
1341            // Map 3 pages, unmap the middle, leaving a hole.
1342            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1343            fill_page(&current_task, addr, 'a');
1344            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1345            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1346            assert_eq!(
1347                sys_munmap(
1348                    locked,
1349                    &current_task,
1350                    (addr + *PAGE_SIZE).unwrap(),
1351                    *PAGE_SIZE as usize
1352                ),
1353                Ok(())
1354            );
1355
1356            // Grow the first page in-place into the middle.
1357            assert_eq!(
1358                remap_memory(
1359                    locked,
1360                    &current_task,
1361                    addr,
1362                    *PAGE_SIZE,
1363                    *PAGE_SIZE * 2,
1364                    0,
1365                    UserAddress::default()
1366                ),
1367                Ok(addr)
1368            );
1369
1370            check_page_eq(&current_task, addr, 'a');
1371
1372            // The middle page should be new, and not just pointing to the original middle page filled
1373            // with 'b'.
1374            check_page_ne(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1375
1376            check_page_eq(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1377        })
1378        .await;
1379    }
1380
1381    /// Tries to grow a set of pages that cannot fit, and forces a move.
1382    #[::fuchsia::test]
1383    async fn test_mremap_grow_maymove() {
1384        spawn_kernel_and_run(async |locked, current_task| {
1385            // Map 3 pages.
1386            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1387            fill_page(&current_task, addr, 'a');
1388            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1389            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1390
1391            // Grow the first two pages by 1, forcing a move.
1392            let new_addr = remap_memory(
1393                locked,
1394                &current_task,
1395                addr,
1396                *PAGE_SIZE * 2,
1397                *PAGE_SIZE * 3,
1398                MREMAP_MAYMOVE,
1399                UserAddress::default(),
1400            )
1401            .expect("failed to mremap");
1402
1403            assert_ne!(new_addr, addr, "mremap did not move the mapping");
1404
1405            // The first two pages should have been moved.
1406            check_unmapped(&current_task, addr);
1407            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1408
1409            // The third page should still be present.
1410            check_page_eq(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1411
1412            // The moved pages should have the same contents.
1413            check_page_eq(&current_task, new_addr, 'a');
1414            check_page_eq(&current_task, (new_addr + *PAGE_SIZE).unwrap(), 'b');
1415
1416            // The newly grown page should not be the same as the original third page.
1417            check_page_ne(&current_task, (new_addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1418        })
1419        .await;
1420    }
1421
1422    /// Shrinks a set of pages and move them to a fixed location.
1423    #[::fuchsia::test]
1424    async fn test_mremap_shrink_fixed() {
1425        spawn_kernel_and_run(async |locked, current_task| {
1426            // Map 2 pages which will act as the destination.
1427            let dst_addr =
1428                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
1429            fill_page(&current_task, dst_addr, 'y');
1430            fill_page(&current_task, (dst_addr + *PAGE_SIZE).unwrap(), 'z');
1431
1432            // Map 3 pages.
1433            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1434            fill_page(&current_task, addr, 'a');
1435            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1436            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1437
1438            // Shrink the first two pages and move them to overwrite the mappings at `dst_addr`.
1439            let new_addr = remap_memory(
1440                locked,
1441                &current_task,
1442                addr,
1443                *PAGE_SIZE * 2,
1444                *PAGE_SIZE,
1445                MREMAP_MAYMOVE | MREMAP_FIXED,
1446                dst_addr,
1447            )
1448            .expect("failed to mremap");
1449
1450            assert_eq!(new_addr, dst_addr, "mremap did not move the mapping");
1451
1452            // The first two pages should have been moved.
1453            check_unmapped(&current_task, addr);
1454            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1455
1456            // The third page should still be present.
1457            check_page_eq(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1458
1459            // The first moved page should have the same contents.
1460            check_page_eq(&current_task, new_addr, 'a');
1461
1462            // The second page should be part of the original dst mapping.
1463            check_page_eq(&current_task, (new_addr + *PAGE_SIZE).unwrap(), 'z');
1464        })
1465        .await;
1466    }
1467
1468    /// Clobbers the middle of an existing mapping with mremap to a fixed location.
1469    #[::fuchsia::test]
1470    async fn test_mremap_clobber_memory_mapping() {
1471        spawn_kernel_and_run(async |locked, current_task| {
1472            let dst_memory = MemoryObject::from(zx::Vmo::create(2 * *PAGE_SIZE).unwrap());
1473            dst_memory.write(&['x' as u8].repeat(*PAGE_SIZE as usize), 0).unwrap();
1474            dst_memory.write(&['y' as u8].repeat(*PAGE_SIZE as usize), *PAGE_SIZE).unwrap();
1475
1476            let dst_addr = current_task
1477                .mm()
1478                .unwrap()
1479                .map_memory(
1480                    DesiredAddress::Any,
1481                    dst_memory.into(),
1482                    0,
1483                    2 * (*PAGE_SIZE as usize),
1484                    ProtectionFlags::READ,
1485                    Access::rwx(),
1486                    MappingOptions::empty(),
1487                    MappingName::None,
1488                )
1489                .unwrap();
1490
1491            // Map 3 pages.
1492            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1493            fill_page(&current_task, addr, 'a');
1494            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1495            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1496
1497            // Overwrite the second page of the mapping with the second page of the anonymous mapping.
1498            let remapped_addr = sys_mremap(
1499                locked,
1500                &*current_task,
1501                (addr + *PAGE_SIZE).unwrap(),
1502                *PAGE_SIZE as usize,
1503                *PAGE_SIZE as usize,
1504                MREMAP_FIXED | MREMAP_MAYMOVE,
1505                (dst_addr + *PAGE_SIZE).unwrap(),
1506            )
1507            .unwrap();
1508
1509            assert_eq!(remapped_addr, (dst_addr + *PAGE_SIZE).unwrap());
1510
1511            check_page_eq(&current_task, addr, 'a');
1512            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1513            check_page_eq(&current_task, (addr + (2 * *PAGE_SIZE)).unwrap(), 'c');
1514
1515            check_page_eq(&current_task, dst_addr, 'x');
1516            check_page_eq(&current_task, (dst_addr + *PAGE_SIZE).unwrap(), 'b');
1517        })
1518        .await;
1519    }
1520
1521    #[cfg(target_arch = "x86_64")]
1522    #[::fuchsia::test]
1523    async fn test_map_32_bit() {
1524        use starnix_uapi::PROT_WRITE;
1525
1526        spawn_kernel_and_run(async |locked, current_task| {
1527            let page_size = *PAGE_SIZE;
1528
1529            for _i in 0..256 {
1530                match do_mmap(
1531                    locked,
1532                    &current_task,
1533                    UserAddress::from(0),
1534                    page_size as usize,
1535                    PROT_READ | PROT_WRITE,
1536                    MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT,
1537                    FdNumber::from_raw(-1),
1538                    0,
1539                ) {
1540                    Ok(address) => {
1541                        let memory_end = address.ptr() + page_size as usize;
1542                        assert!(memory_end <= 0x80000000);
1543                    }
1544                    error => {
1545                        panic!("mmap with MAP_32BIT failed: {error:?}");
1546                    }
1547                }
1548            }
1549        })
1550        .await;
1551    }
1552}