starnix_core/mm/
syscalls.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::barrier::{BarrierType, system_barrier};
6use crate::mm::debugger::notify_debugger_of_module_list;
7use crate::mm::{
8    DesiredAddress, FutexKey, IOVecPtr, MappingName, MappingOptions, MembarrierType,
9    MemoryAccessorExt, MremapFlags, PAGE_SIZE, PrivateFutexKey, ProtectionFlags, SharedFutexKey,
10};
11use crate::security;
12use crate::syscalls::time::TimeSpecPtr;
13use crate::task::{CurrentTask, TargetTime, Task};
14use crate::time::utc::estimate_boot_deadline_from_utc;
15use crate::vfs::buffers::{OutputBuffer, UserBuffersInputBuffer, UserBuffersOutputBuffer};
16use crate::vfs::{FdFlags, FdNumber, UserFaultFile};
17use fuchsia_runtime::UtcTimeline;
18use linux_uapi::MLOCK_ONFAULT;
19use starnix_logging::{CATEGORY_STARNIX_MM, log_trace, trace_duration, track_stub};
20use starnix_sync::{FileOpsCore, LockEqualOrBefore, Locked, Unlocked};
21use starnix_syscalls::SyscallArg;
22use starnix_types::time::{duration_from_timespec, time_from_timespec, timespec_from_time};
23use starnix_uapi::auth::{CAP_SYS_PTRACE, PTRACE_MODE_ATTACH_REALCREDS};
24use starnix_uapi::errors::{EINTR, Errno};
25use starnix_uapi::open_flags::OpenFlags;
26use starnix_uapi::user_address::{UserAddress, UserRef};
27use starnix_uapi::user_value::UserValue;
28use starnix_uapi::{
29    FUTEX_BITSET_MATCH_ANY, FUTEX_CLOCK_REALTIME, FUTEX_CMD_MASK, FUTEX_CMP_REQUEUE,
30    FUTEX_CMP_REQUEUE_PI, FUTEX_LOCK_PI, FUTEX_LOCK_PI2, FUTEX_PRIVATE_FLAG, FUTEX_REQUEUE,
31    FUTEX_TRYLOCK_PI, FUTEX_UNLOCK_PI, FUTEX_WAIT, FUTEX_WAIT_BITSET, FUTEX_WAIT_REQUEUE_PI,
32    FUTEX_WAKE, FUTEX_WAKE_BITSET, FUTEX_WAKE_OP, MAP_ANONYMOUS, MAP_DENYWRITE, MAP_FIXED,
33    MAP_FIXED_NOREPLACE, MAP_GROWSDOWN, MAP_LOCKED, MAP_NORESERVE, MAP_POPULATE, MAP_PRIVATE,
34    MAP_SHARED, MAP_SHARED_VALIDATE, MAP_STACK, MS_INVALIDATE, O_CLOEXEC, O_NONBLOCK, PROT_EXEC,
35    UFFD_USER_MODE_ONLY, errno, error, robust_list_head, tid_t, uapi,
36};
37use std::ops::Deref as _;
38use zx;
39
40#[cfg(target_arch = "x86_64")]
41use starnix_uapi::MAP_32BIT;
42
43// Returns any platform-specific mmap flags. This is a separate function because as of this writing
44// "attributes on expressions are experimental."
45#[cfg(target_arch = "x86_64")]
46fn get_valid_platform_mmap_flags() -> u32 {
47    MAP_32BIT
48}
49#[cfg(not(target_arch = "x86_64"))]
50fn get_valid_platform_mmap_flags() -> u32 {
51    0
52}
53
54/// sys_mmap takes a mutable reference to current_task because it may modify the IP register.
55pub fn sys_mmap(
56    locked: &mut Locked<Unlocked>,
57    current_task: &mut CurrentTask,
58    addr: UserAddress,
59    length: usize,
60    prot: u32,
61    flags: u32,
62    fd: FdNumber,
63    offset: u64,
64) -> Result<UserAddress, Errno> {
65    let user_address = do_mmap(locked, current_task, addr, length, prot, flags, fd, offset)?;
66    if prot & PROT_EXEC != 0 {
67        // Possibly loads a new module. Notify debugger for the change.
68        // We only care about dynamic linker loading modules for now, which uses mmap. In the future
69        // we might want to support unloading modules in munmap or JIT compilation in mprotect.
70        notify_debugger_of_module_list(current_task)?;
71    }
72    Ok(user_address)
73}
74
75pub fn do_mmap<L>(
76    locked: &mut Locked<L>,
77    current_task: &CurrentTask,
78    addr: UserAddress,
79    length: usize,
80    prot: u32,
81    flags: u32,
82    fd: FdNumber,
83    offset: u64,
84) -> Result<UserAddress, Errno>
85where
86    L: LockEqualOrBefore<FileOpsCore>,
87{
88    let prot_flags = ProtectionFlags::from_access_bits(prot).ok_or_else(|| {
89        track_stub!(TODO("https://fxbug.dev/322874211"), "mmap parse protection", prot);
90        errno!(EINVAL)
91    })?;
92
93    let valid_flags: u32 = get_valid_platform_mmap_flags()
94        | MAP_PRIVATE
95        | MAP_SHARED
96        | MAP_SHARED_VALIDATE
97        | MAP_ANONYMOUS
98        | MAP_FIXED
99        | MAP_FIXED_NOREPLACE
100        | MAP_POPULATE
101        | MAP_NORESERVE
102        | MAP_STACK
103        | MAP_DENYWRITE
104        | MAP_GROWSDOWN
105        | MAP_LOCKED;
106    if flags & !valid_flags != 0 {
107        if flags & MAP_SHARED_VALIDATE != 0 {
108            return error!(EOPNOTSUPP);
109        }
110        track_stub!(TODO("https://fxbug.dev/322873638"), "mmap check flags", flags);
111        return error!(EINVAL);
112    }
113
114    let file = if flags & MAP_ANONYMOUS != 0 { None } else { Some(current_task.files.get(fd)?) };
115    if flags & (MAP_PRIVATE | MAP_SHARED) == 0
116        || flags & (MAP_PRIVATE | MAP_SHARED) == MAP_PRIVATE | MAP_SHARED
117    {
118        return error!(EINVAL);
119    }
120    if length == 0 {
121        return error!(EINVAL);
122    }
123    if offset % *PAGE_SIZE != 0 {
124        return error!(EINVAL);
125    }
126
127    // TODO(tbodt): should we consider MAP_NORESERVE?
128
129    let addr = match (addr, flags & MAP_FIXED != 0, flags & MAP_FIXED_NOREPLACE != 0) {
130        (UserAddress::NULL, false, false) => DesiredAddress::Any,
131        (UserAddress::NULL, true, _) | (UserAddress::NULL, _, true) => return error!(EINVAL),
132        (addr, false, false) => DesiredAddress::Hint(addr),
133        (addr, _, true) => DesiredAddress::Fixed(addr),
134        (addr, true, false) => DesiredAddress::FixedOverwrite(addr),
135    };
136
137    let memory_offset = if flags & MAP_ANONYMOUS != 0 { 0 } else { offset };
138
139    let mut options = MappingOptions::empty();
140    if flags & MAP_SHARED != 0 {
141        options |= MappingOptions::SHARED;
142    }
143    if flags & MAP_ANONYMOUS != 0 {
144        options |= MappingOptions::ANONYMOUS;
145    }
146    #[cfg(target_arch = "x86_64")]
147    if flags & MAP_FIXED == 0 && flags & MAP_32BIT != 0 {
148        options |= MappingOptions::LOWER_32BIT;
149    }
150    if flags & MAP_GROWSDOWN != 0 {
151        options |= MappingOptions::GROWSDOWN;
152    }
153    if flags & MAP_POPULATE != 0 {
154        options |= MappingOptions::POPULATE;
155    }
156    if flags & MAP_LOCKED != 0 {
157        // The kernel isn't expected to return an error if locking fails with this flag, so for now
158        // this implementation will always fail to lock memory even if mapping succeeds.
159        track_stub!(TODO("https://fxbug.dev/406377606"), "MAP_LOCKED");
160    }
161
162    security::mmap_file(current_task, file.as_ref(), prot_flags, options)?;
163
164    if flags & MAP_ANONYMOUS != 0 {
165        trace_duration!(CATEGORY_STARNIX_MM, "AnonymousMmap");
166        current_task.mm()?.map_anonymous(addr, length, prot_flags, options, MappingName::None)
167    } else {
168        trace_duration!(CATEGORY_STARNIX_MM, "FileBackedMmap");
169        // TODO(tbodt): maximize protection flags so that mprotect works
170        let file = file.expect("file retrieved above for file-backed mapping");
171        file.mmap(
172            locked,
173            current_task,
174            addr,
175            memory_offset,
176            length,
177            prot_flags,
178            options,
179            file.name.to_passive(),
180        )
181    }
182}
183
184pub fn sys_mprotect(
185    _locked: &mut Locked<Unlocked>,
186    current_task: &CurrentTask,
187    addr: UserAddress,
188    length: usize,
189    prot: u32,
190) -> Result<(), Errno> {
191    let prot_flags = ProtectionFlags::from_bits(prot).ok_or_else(|| {
192        track_stub!(TODO("https://fxbug.dev/322874672"), "mprotect parse protection", prot);
193        errno!(EINVAL)
194    })?;
195    current_task.mm()?.protect(current_task, addr, length, prot_flags)?;
196    Ok(())
197}
198
199pub fn sys_mremap(
200    _locked: &mut Locked<Unlocked>,
201    current_task: &CurrentTask,
202    addr: UserAddress,
203    old_length: usize,
204    new_length: usize,
205    flags: u32,
206    new_addr: UserAddress,
207) -> Result<UserAddress, Errno> {
208    let flags = MremapFlags::from_bits(flags).ok_or_else(|| errno!(EINVAL))?;
209    let addr =
210        current_task.mm()?.remap(current_task, addr, old_length, new_length, flags, new_addr)?;
211    Ok(addr)
212}
213
214pub fn sys_munmap(
215    _locked: &mut Locked<Unlocked>,
216    current_task: &CurrentTask,
217    addr: UserAddress,
218    length: usize,
219) -> Result<(), Errno> {
220    current_task.mm()?.unmap(addr, length)?;
221    Ok(())
222}
223
224pub fn sys_msync(
225    _locked: &mut Locked<Unlocked>,
226    current_task: &CurrentTask,
227    addr: UserAddress,
228    length: usize,
229    flags: u32,
230) -> Result<(), Errno> {
231    track_stub!(TODO("https://fxbug.dev/322874588"), "msync");
232
233    let mm = current_task.mm()?;
234
235    // Perform some basic validation of the address range given to satisfy gvisor tests that
236    // use msync as a way to probe whether a page is mapped or not.
237    mm.ensure_mapped(addr, length)?;
238
239    let addr_end = (addr + length).map_err(|_| errno!(ENOMEM))?;
240    if flags & MS_INVALIDATE != 0 && mm.state.read().num_locked_bytes(addr..addr_end) > 0 {
241        // gvisor mlock tests rely on returning EBUSY from msync on locked ranges.
242        return error!(EBUSY);
243    }
244
245    Ok(())
246}
247
248pub fn sys_madvise(
249    _locked: &mut Locked<Unlocked>,
250    current_task: &CurrentTask,
251    addr: UserAddress,
252    length: usize,
253    advice: u32,
254) -> Result<(), Errno> {
255    current_task.mm()?.madvise(current_task, addr, length, advice)?;
256    Ok(())
257}
258
259pub fn sys_process_madvise(
260    _locked: &mut Locked<Unlocked>,
261    _current_task: &CurrentTask,
262    _pidfd: FdNumber,
263    _iovec_addr: IOVecPtr,
264    _iovec_count: UserValue<i32>,
265    _advice: UserValue<i32>,
266    _flags: UserValue<u32>,
267) -> Result<usize, Errno> {
268    track_stub!(TODO("https://fxbug.dev/409060664"), "process_madvise");
269    error!(ENOSYS)
270}
271
272pub fn sys_brk(
273    locked: &mut Locked<Unlocked>,
274    current_task: &CurrentTask,
275    addr: UserAddress,
276) -> Result<UserAddress, Errno> {
277    current_task.mm()?.set_brk(locked, current_task, addr)
278}
279
280pub fn sys_process_vm_readv(
281    locked: &mut Locked<Unlocked>,
282    current_task: &CurrentTask,
283    tid: tid_t,
284    local_iov_addr: IOVecPtr,
285    local_iov_count: UserValue<i32>,
286    remote_iov_addr: IOVecPtr,
287    remote_iov_count: UserValue<i32>,
288    flags: usize,
289) -> Result<usize, Errno> {
290    if flags != 0 {
291        return error!(EINVAL);
292    }
293
294    // Source and destination are allowed to be of different length. It is valid to use a nullptr if
295    // the associated length is 0. Thus, if either source or destination length is 0 and nullptr,
296    // make sure to return Ok(0) before doing any other validation/operations.
297    if (local_iov_count == 0 && local_iov_addr.is_null())
298        || (remote_iov_count == 0 && remote_iov_addr.is_null())
299    {
300        return Ok(0);
301    }
302
303    let weak_remote_task = current_task.get_task(tid);
304    let remote_task = Task::from_weak(&weak_remote_task)?;
305
306    current_task.check_ptrace_access_mode(locked, PTRACE_MODE_ATTACH_REALCREDS, &remote_task)?;
307
308    let local_iov = current_task.read_iovec(local_iov_addr, local_iov_count)?;
309    let remote_iov = current_task.read_iovec(remote_iov_addr, remote_iov_count)?;
310    log_trace!(
311        "process_vm_readv(tid={}, local_iov={:?}, remote_iov={:?})",
312        tid,
313        local_iov,
314        remote_iov
315    );
316
317    track_stub!(TODO("https://fxbug.dev/322874765"), "process_vm_readv single-copy");
318    // According to the man page, this syscall was added to Linux specifically to
319    // avoid doing two copies like other IPC mechanisms require. We should avoid this too at some
320    // point.
321    let mut output = UserBuffersOutputBuffer::unified_new(current_task, local_iov)?;
322    let remote_mm = remote_task.mm().ok();
323    if current_task.has_same_address_space(remote_mm.as_ref()) {
324        let mut input = UserBuffersInputBuffer::unified_new(current_task, remote_iov)?;
325        output.write_buffer(&mut input)
326    } else {
327        let mut input = UserBuffersInputBuffer::syscall_new(remote_task.deref(), remote_iov)?;
328        output.write_buffer(&mut input)
329    }
330}
331
332pub fn sys_process_vm_writev(
333    locked: &mut Locked<Unlocked>,
334    current_task: &CurrentTask,
335    tid: tid_t,
336    local_iov_addr: IOVecPtr,
337    local_iov_count: UserValue<i32>,
338    remote_iov_addr: IOVecPtr,
339    remote_iov_count: UserValue<i32>,
340    flags: usize,
341) -> Result<usize, Errno> {
342    if flags != 0 {
343        return error!(EINVAL);
344    }
345
346    // Source and destination are allowed to be of different length. It is valid to use a nullptr if
347    // the associated length is 0. Thus, if either source or destination length is 0 and nullptr,
348    // make sure to return Ok(0) before doing any other validation/operations.
349    if (local_iov_count == 0 && local_iov_addr.is_null())
350        || (remote_iov_count == 0 && remote_iov_addr.is_null())
351    {
352        return Ok(0);
353    }
354
355    let weak_remote_task = current_task.get_task(tid);
356    let remote_task = Task::from_weak(&weak_remote_task)?;
357
358    current_task.check_ptrace_access_mode(locked, PTRACE_MODE_ATTACH_REALCREDS, &remote_task)?;
359
360    let local_iov = current_task.read_iovec(local_iov_addr, local_iov_count)?;
361    let remote_iov = current_task.read_iovec(remote_iov_addr, remote_iov_count)?;
362    log_trace!(
363        "sys_process_vm_writev(tid={}, local_iov={:?}, remote_iov={:?})",
364        tid,
365        local_iov,
366        remote_iov
367    );
368
369    track_stub!(TODO("https://fxbug.dev/322874339"), "process_vm_writev single-copy");
370    // NB: According to the man page, this syscall was added to Linux specifically to
371    // avoid doing two copies like other IPC mechanisms require. We should avoid this too at some
372    // point.
373    let mut input = UserBuffersInputBuffer::unified_new(current_task, local_iov)?;
374    let remote_mm = remote_task.mm().ok();
375    if current_task.has_same_address_space(remote_mm.as_ref()) {
376        let mut output = UserBuffersOutputBuffer::unified_new(current_task, remote_iov)?;
377        output.write_buffer(&mut input)
378    } else {
379        let mut output = UserBuffersOutputBuffer::syscall_new(remote_task.deref(), remote_iov)?;
380        output.write_buffer(&mut input)
381    }
382}
383
384pub fn sys_process_mrelease(
385    _locked: &mut Locked<Unlocked>,
386    current_task: &CurrentTask,
387    pidfd: FdNumber,
388    flags: u32,
389) -> Result<(), Errno> {
390    if flags != 0 {
391        return error!(EINVAL);
392    }
393    let file = current_task.files.get(pidfd)?;
394    let task = current_task.get_task(file.as_thread_group_key()?.pid());
395    let task = task.upgrade().ok_or_else(|| errno!(ESRCH))?;
396    if !task.load_stopped().is_stopped() {
397        return error!(EINVAL);
398    }
399
400    let mm = task.mm()?;
401    let mm_state = mm.state.write();
402    mm_state.mrelease()
403}
404
405pub fn sys_membarrier(
406    _locked: &mut Locked<Unlocked>,
407    current_task: &CurrentTask,
408    cmd: uapi::membarrier_cmd,
409    _flags: u32,
410    _cpu_id: i32,
411) -> Result<u32, Errno> {
412    match cmd {
413        // This command returns a bit mask of all supported commands.
414        // We support everything except for the RSEQ family.
415        uapi::membarrier_cmd_MEMBARRIER_CMD_QUERY => Ok(uapi::membarrier_cmd_MEMBARRIER_CMD_GLOBAL
416            | uapi::membarrier_cmd_MEMBARRIER_CMD_GLOBAL_EXPEDITED
417            | uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED
418            | uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED
419            | uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED
420            | uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE
421            | uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE),
422        // Global and global expedited barriers are treated identically. We don't track
423        // registration for global expedited barriers currently.
424        uapi::membarrier_cmd_MEMBARRIER_CMD_GLOBAL
425        | uapi::membarrier_cmd_MEMBARRIER_CMD_GLOBAL_EXPEDITED => {
426            system_barrier(BarrierType::DataMemory);
427            Ok(0)
428        }
429        // Global registration commands are ignored.
430        uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED => Ok(0),
431        uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED => {
432            // A private expedited barrier is only issued if the address space is registered
433            // for these barriers.
434            if current_task.mm()?.membarrier_private_expedited_registered(MembarrierType::Memory) {
435                // If a barrier is requested, issue a global barrier.
436                system_barrier(BarrierType::DataMemory);
437                Ok(0)
438            } else {
439                error!(EPERM)
440            }
441        }
442        // Private sync core barriers are treated as global instruction stream barriers.
443        uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE => {
444            if current_task.mm()?.membarrier_private_expedited_registered(MembarrierType::SyncCore)
445            {
446                system_barrier(BarrierType::InstructionStream);
447                Ok(0)
448            } else {
449                error!(EPERM)
450            }
451        }
452        uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED => {
453            let _ =
454                current_task.mm()?.register_membarrier_private_expedited(MembarrierType::Memory)?;
455            Ok(0)
456        }
457
458        uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE => {
459            let _ = current_task
460                .mm()?
461                .register_membarrier_private_expedited(MembarrierType::SyncCore)?;
462            Ok(0)
463        }
464        uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ => {
465            track_stub!(TODO("https://fxbug.dev/447158570"), "membarrier rseq");
466            error!(ENOSYS)
467        }
468        uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ => {
469            track_stub!(TODO("https://fxbug.dev/447158570"), "membarrier rseq");
470            error!(ENOSYS)
471        }
472        _ => error!(EINVAL),
473    }
474}
475
476pub fn sys_userfaultfd(
477    locked: &mut Locked<Unlocked>,
478    current_task: &CurrentTask,
479    raw_flags: u32,
480) -> Result<FdNumber, Errno> {
481    let unknown_flags = raw_flags & !(O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
482    if unknown_flags != 0 {
483        return error!(EINVAL, format!("unknown flags provided: {unknown_flags:x?}"));
484    }
485    let mut open_flags = OpenFlags::empty();
486    if raw_flags & O_NONBLOCK != 0 {
487        open_flags |= OpenFlags::NONBLOCK;
488    }
489    if raw_flags & O_CLOEXEC != 0 {
490        open_flags |= OpenFlags::CLOEXEC;
491    }
492
493    let fd_flags = if raw_flags & O_CLOEXEC != 0 {
494        FdFlags::CLOEXEC
495    } else {
496        track_stub!(TODO("https://fxbug.dev/297375964"), "userfaultfds that survive exec()");
497        return error!(ENOSYS);
498    };
499
500    let user_mode_only = raw_flags & UFFD_USER_MODE_ONLY == 0;
501    let uff_handle = UserFaultFile::new(locked, current_task, open_flags, user_mode_only)?;
502    current_task.add_file(locked, uff_handle, fd_flags)
503}
504
505pub fn sys_futex(
506    locked: &mut Locked<Unlocked>,
507    current_task: &mut CurrentTask,
508    addr: UserAddress,
509    op: u32,
510    value: u32,
511    timeout_or_value2: SyscallArg,
512    addr2: UserAddress,
513    value3: u32,
514) -> Result<usize, Errno> {
515    if op & FUTEX_PRIVATE_FLAG != 0 {
516        do_futex::<PrivateFutexKey>(
517            locked,
518            current_task,
519            addr,
520            op,
521            value,
522            timeout_or_value2,
523            addr2,
524            value3,
525        )
526    } else {
527        do_futex::<SharedFutexKey>(
528            locked,
529            current_task,
530            addr,
531            op,
532            value,
533            timeout_or_value2,
534            addr2,
535            value3,
536        )
537    }
538}
539
540fn do_futex<Key: FutexKey>(
541    locked: &mut Locked<Unlocked>,
542    current_task: &mut CurrentTask,
543    addr: UserAddress,
544    op: u32,
545    value: u32,
546    timeout_or_value2: SyscallArg,
547    addr2: UserAddress,
548    value3: u32,
549) -> Result<usize, Errno> {
550    let futexes = Key::get_table_from_task(current_task)?;
551    let cmd = op & (FUTEX_CMD_MASK as u32);
552
553    let is_realtime = match (cmd, op & FUTEX_CLOCK_REALTIME != 0) {
554        // This option bit can be employed only with the FUTEX_WAIT_BITSET, FUTEX_WAIT_REQUEUE_PI,
555        // (since Linux 4.5) FUTEX_WAIT, and (since Linux 5.14) FUTEX_LOCK_PI2 operations.
556        (FUTEX_WAIT_BITSET | FUTEX_WAIT_REQUEUE_PI | FUTEX_WAIT | FUTEX_LOCK_PI2, true) => true,
557        (_, true) => return error!(EINVAL),
558
559        // FUTEX_LOCK_PI always uses realtime.
560        (FUTEX_LOCK_PI, _) => true,
561
562        (_, false) => false,
563    };
564
565    // The timeout is interpreted differently by WAIT and WAIT_BITSET: WAIT takes a
566    // timeout and WAIT_BITSET takes a deadline.
567    let read_timespec = |current_task: &CurrentTask| {
568        let utime = TimeSpecPtr::new(current_task, timeout_or_value2);
569        if utime.is_null() {
570            Ok(timespec_from_time(zx::MonotonicInstant::INFINITE))
571        } else {
572            current_task.read_multi_arch_object(utime)
573        }
574    };
575    let read_timeout = |current_task: &CurrentTask| {
576        let timespec = read_timespec(current_task)?;
577        let timeout = duration_from_timespec(timespec);
578        let deadline = zx::MonotonicInstant::after(timeout?);
579        if is_realtime {
580            // Since this is a timeout, waiting on the monotonic timeline before it's paused is
581            // just as good as actually estimating UTC here.
582            track_stub!(TODO("https://fxbug.dev/356912301"), "FUTEX_CLOCK_REALTIME timeout");
583        }
584        Ok(deadline)
585    };
586    let read_deadline = |current_task: &CurrentTask| {
587        let timespec = read_timespec(current_task)?;
588        if is_realtime {
589            Ok(TargetTime::RealTime(time_from_timespec::<UtcTimeline>(timespec)?))
590        } else {
591            Ok(TargetTime::Monotonic(time_from_timespec::<zx::MonotonicTimeline>(timespec)?))
592        }
593    };
594
595    match cmd {
596        FUTEX_WAIT => {
597            let deadline = read_timeout(current_task)?;
598            let bitset = FUTEX_BITSET_MATCH_ANY;
599            do_futex_wait_with_restart::<Key>(
600                locked,
601                current_task,
602                addr,
603                value,
604                bitset,
605                TargetTime::Monotonic(deadline),
606            )?;
607            Ok(0)
608        }
609        FUTEX_WAKE => {
610            futexes.wake(locked, current_task, addr, value as usize, FUTEX_BITSET_MATCH_ANY)
611        }
612        FUTEX_WAKE_OP => {
613            track_stub!(TODO("https://fxbug.dev/361181940"), "FUTEX_WAKE_OP");
614            error!(ENOSYS)
615        }
616        FUTEX_WAIT_BITSET => {
617            if value3 == 0 {
618                return error!(EINVAL);
619            }
620            let deadline = read_deadline(current_task)?;
621            do_futex_wait_with_restart::<Key>(locked, current_task, addr, value, value3, deadline)?;
622            Ok(0)
623        }
624        FUTEX_WAKE_BITSET => {
625            if value3 == 0 {
626                return error!(EINVAL);
627            }
628            futexes.wake(locked, current_task, addr, value as usize, value3)
629        }
630        FUTEX_REQUEUE | FUTEX_CMP_REQUEUE => {
631            let wake_count = value as usize;
632            let requeue_count: usize = timeout_or_value2.into();
633            if wake_count > std::i32::MAX as usize || requeue_count > std::i32::MAX as usize {
634                return error!(EINVAL);
635            }
636            let expected_value = if cmd == FUTEX_CMP_REQUEUE { Some(value3) } else { None };
637            futexes.requeue(
638                locked,
639                current_task,
640                addr,
641                wake_count,
642                requeue_count,
643                addr2,
644                expected_value,
645            )
646        }
647        FUTEX_WAIT_REQUEUE_PI => {
648            track_stub!(TODO("https://fxbug.dev/361181558"), "FUTEX_WAIT_REQUEUE_PI");
649            error!(ENOSYS)
650        }
651        FUTEX_CMP_REQUEUE_PI => {
652            track_stub!(TODO("https://fxbug.dev/361181773"), "FUTEX_CMP_REQUEUE_PI");
653            error!(ENOSYS)
654        }
655        FUTEX_LOCK_PI | FUTEX_LOCK_PI2 => {
656            futexes.lock_pi(locked, current_task, addr, read_timeout(current_task)?)?;
657            Ok(0)
658        }
659        FUTEX_TRYLOCK_PI => {
660            track_stub!(TODO("https://fxbug.dev/361175318"), "FUTEX_TRYLOCK_PI");
661            error!(ENOSYS)
662        }
663        FUTEX_UNLOCK_PI => {
664            futexes.unlock_pi(locked, current_task, addr)?;
665            Ok(0)
666        }
667        _ => {
668            track_stub!(TODO("https://fxbug.dev/322875124"), "futex unknown command", cmd);
669            error!(ENOSYS)
670        }
671    }
672}
673
674fn do_futex_wait_with_restart<Key: FutexKey>(
675    locked: &mut Locked<Unlocked>,
676    current_task: &mut CurrentTask,
677    addr: UserAddress,
678    value: u32,
679    mask: u32,
680    deadline: TargetTime,
681) -> Result<(), Errno> {
682    let futexes = Key::get_table_from_task(current_task)?;
683    let result = match deadline {
684        TargetTime::Monotonic(mono_deadline) => {
685            futexes.wait(locked, current_task, addr, value, mask, mono_deadline)
686        }
687        TargetTime::BootInstant(boot_deadline) => {
688            let timer_slack = current_task.read().get_timerslack();
689            futexes.wait_boot(locked, current_task, addr, value, mask, boot_deadline, timer_slack)
690        }
691        TargetTime::RealTime(utc_deadline) => {
692            // We convert real time deadlines to boot time deadlines since we cannot wait using a UTC deadline.
693            let (boot_deadline, _) = estimate_boot_deadline_from_utc(utc_deadline);
694            let timer_slack = current_task.read().get_timerslack();
695            futexes.wait_boot(locked, current_task, addr, value, mask, boot_deadline, timer_slack)
696        }
697    };
698    match result {
699        Err(err) if err == EINTR => {
700            current_task.set_syscall_restart_func(move |locked, current_task| {
701                do_futex_wait_with_restart::<Key>(locked, current_task, addr, value, mask, deadline)
702            });
703            error!(ERESTART_RESTARTBLOCK)
704        }
705        result => result,
706    }
707}
708
709pub fn sys_get_robust_list(
710    _locked: &mut Locked<Unlocked>,
711    current_task: &CurrentTask,
712    tid: tid_t,
713    user_head_ptr: UserRef<UserAddress>,
714    user_len_ptr: UserRef<usize>,
715) -> Result<(), Errno> {
716    if tid < 0 {
717        return error!(EINVAL);
718    }
719    if user_head_ptr.is_null() || user_len_ptr.is_null() {
720        return error!(EFAULT);
721    }
722    if tid != 0 {
723        security::check_task_capable(current_task, CAP_SYS_PTRACE)?;
724    }
725    let task = if tid == 0 { current_task.weak_task() } else { current_task.get_task(tid) };
726    let task = Task::from_weak(&task)?;
727    current_task.write_object(user_head_ptr, &task.read().robust_list_head.addr())?;
728    current_task.write_object(user_len_ptr, &std::mem::size_of::<robust_list_head>())?;
729    Ok(())
730}
731
732pub fn sys_set_robust_list(
733    _locked: &mut Locked<Unlocked>,
734    current_task: &CurrentTask,
735    user_head: UserRef<robust_list_head>,
736    len: usize,
737) -> Result<(), Errno> {
738    if len != std::mem::size_of::<robust_list_head>() {
739        return error!(EINVAL);
740    }
741    current_task.write().robust_list_head = user_head.into();
742    Ok(())
743}
744
745pub fn sys_mlock(
746    locked: &mut Locked<Unlocked>,
747    current_task: &CurrentTask,
748    addr: UserAddress,
749    length: usize,
750) -> Result<(), Errno> {
751    // If flags is 0, mlock2() behaves exactly the same as mlock().
752    sys_mlock2(locked, current_task, addr, length, 0)
753}
754
755pub fn sys_mlock2(
756    locked: &mut Locked<Unlocked>,
757    current_task: &CurrentTask,
758    addr: UserAddress,
759    length: usize,
760    flags: u64,
761) -> Result<(), Errno> {
762    const KNOWN_FLAGS: u64 = MLOCK_ONFAULT as u64;
763    if (flags & !KNOWN_FLAGS) != 0 {
764        return error!(EINVAL);
765    }
766    let on_fault = flags & MLOCK_ONFAULT as u64 != 0;
767    current_task.mm()?.mlock(current_task, locked, addr, length, on_fault)
768}
769
770pub fn sys_munlock(
771    _locked: &mut Locked<Unlocked>,
772    current_task: &CurrentTask,
773    addr: UserAddress,
774    length: usize,
775) -> Result<(), Errno> {
776    current_task.mm()?.munlock(current_task, addr, length)
777}
778
779pub fn sys_mlockall(
780    _locked: &mut Locked<Unlocked>,
781    _current_task: &CurrentTask,
782    _flags: u64,
783) -> Result<(), Errno> {
784    track_stub!(TODO("https://fxbug.dev/297292097"), "mlockall()");
785    error!(ENOSYS)
786}
787
788pub fn sys_munlockall(
789    _locked: &mut Locked<Unlocked>,
790    _current_task: &CurrentTask,
791    _flags: u64,
792) -> Result<(), Errno> {
793    track_stub!(TODO("https://fxbug.dev/297292097"), "munlockall()");
794    error!(ENOSYS)
795}
796
797pub fn sys_mincore(
798    _locked: &mut Locked<Unlocked>,
799    _current_task: &CurrentTask,
800    _addr: UserAddress,
801    _length: usize,
802    _out: UserRef<u8>,
803) -> Result<(), Errno> {
804    track_stub!(TODO("https://fxbug.dev/297372240"), "mincore()");
805    error!(ENOSYS)
806}
807
808// Syscalls for arch32 usage
809#[cfg(target_arch = "aarch64")]
810mod arch32 {
811    use crate::mm::PAGE_SIZE;
812    use crate::mm::syscalls::{UserAddress, sys_mmap};
813    use crate::task::{CurrentTask, RobustListHeadPtr};
814    use crate::vfs::FdNumber;
815    use starnix_sync::{Locked, Unlocked};
816    use starnix_uapi::errors::Errno;
817    use starnix_uapi::user_address::UserRef;
818    use starnix_uapi::{error, uapi};
819
820    pub fn sys_arch32_set_robust_list(
821        _locked: &mut Locked<Unlocked>,
822        current_task: &CurrentTask,
823        user_head: UserRef<uapi::arch32::robust_list_head>,
824        len: usize,
825    ) -> Result<(), Errno> {
826        if len != std::mem::size_of::<uapi::arch32::robust_list_head>() {
827            return error!(EINVAL);
828        }
829        current_task.write().robust_list_head = RobustListHeadPtr::from_32(user_head);
830        Ok(())
831    }
832
833    pub fn sys_arch32_mmap2(
834        locked: &mut Locked<Unlocked>,
835        current_task: &mut CurrentTask,
836        addr: UserAddress,
837        length: usize,
838        prot: u32,
839        flags: u32,
840        fd: FdNumber,
841        offset: u64,
842    ) -> Result<UserAddress, Errno> {
843        sys_mmap(locked, current_task, addr, length, prot, flags, fd, offset * *PAGE_SIZE)
844    }
845
846    pub fn sys_arch32_munmap(
847        _locked: &mut Locked<Unlocked>,
848        current_task: &CurrentTask,
849        addr: UserAddress,
850        length: usize,
851    ) -> Result<(), Errno> {
852        if !addr.is_lower_32bit() || length >= (1 << 32) {
853            return error!(EINVAL);
854        }
855        current_task.mm()?.unmap(addr, length)?;
856        Ok(())
857    }
858
859    pub use super::{
860        sys_futex as sys_arch32_futex, sys_madvise as sys_arch32_madvise,
861        sys_membarrier as sys_arch32_membarrier, sys_mincore as sys_arch32_mincore,
862        sys_mlock as sys_arch32_mlock, sys_mlock2 as sys_arch32_mlock2,
863        sys_mlockall as sys_arch32_mlockall, sys_mremap as sys_arch32_mremap,
864        sys_msync as sys_arch32_msync, sys_munlock as sys_arch32_munlock,
865        sys_munlockall as sys_arch32_munlockall,
866        sys_process_mrelease as sys_arch32_process_mrelease,
867        sys_process_vm_readv as sys_arch32_process_vm_readv,
868        sys_userfaultfd as sys_arch32_userfaultfd,
869    };
870}
871
872#[cfg(target_arch = "aarch64")]
873pub use arch32::*;
874
875#[cfg(test)]
876mod tests {
877    use super::*;
878    use crate::mm::memory::MemoryObject;
879    use crate::testing::*;
880    use starnix_uapi::errors::EEXIST;
881    use starnix_uapi::file_mode::Access;
882    use starnix_uapi::{MREMAP_FIXED, MREMAP_MAYMOVE, PROT_READ};
883
884    #[::fuchsia::test]
885    async fn test_mmap_with_colliding_hint() {
886        spawn_kernel_and_run(async |locked, current_task| {
887            let page_size = *PAGE_SIZE;
888
889            let mapped_address =
890                map_memory(locked, &current_task, UserAddress::default(), page_size);
891            match do_mmap(
892                locked,
893                &current_task,
894                mapped_address,
895                page_size as usize,
896                PROT_READ,
897                MAP_PRIVATE | MAP_ANONYMOUS,
898                FdNumber::from_raw(-1),
899                0,
900            ) {
901                Ok(address) => {
902                    assert_ne!(address, mapped_address);
903                }
904                error => {
905                    panic!("mmap with colliding hint failed: {error:?}");
906                }
907            }
908        })
909        .await;
910    }
911
912    #[::fuchsia::test]
913    async fn test_mmap_with_fixed_collision() {
914        spawn_kernel_and_run(async |locked, current_task| {
915            let page_size = *PAGE_SIZE;
916
917            let mapped_address =
918                map_memory(locked, &current_task, UserAddress::default(), page_size);
919            match do_mmap(
920                locked,
921                &current_task,
922                mapped_address,
923                page_size as usize,
924                PROT_READ,
925                MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
926                FdNumber::from_raw(-1),
927                0,
928            ) {
929                Ok(address) => {
930                    assert_eq!(address, mapped_address);
931                }
932                error => {
933                    panic!("mmap with fixed collision failed: {error:?}");
934                }
935            }
936        })
937        .await;
938    }
939
940    #[::fuchsia::test]
941    async fn test_mmap_with_fixed_noreplace_collision() {
942        spawn_kernel_and_run(async |locked, current_task| {
943            let page_size = *PAGE_SIZE;
944
945            let mapped_address =
946                map_memory(locked, &current_task, UserAddress::default(), page_size);
947            match do_mmap(
948                locked,
949                &current_task,
950                mapped_address,
951                page_size as usize,
952                PROT_READ,
953                MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE,
954                FdNumber::from_raw(-1),
955                0,
956            ) {
957                Err(errno) => {
958                    assert_eq!(errno, EEXIST);
959                }
960                result => {
961                    panic!("mmap with fixed_noreplace collision failed: {result:?}");
962                }
963            }
964        })
965        .await;
966    }
967
968    /// It is ok to call munmap with an address that is a multiple of the page size, and
969    /// a non-zero length.
970    #[::fuchsia::test]
971    async fn test_munmap() {
972        spawn_kernel_and_run(async |locked, current_task| {
973            let mapped_address =
974                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
975            assert_eq!(
976                sys_munmap(locked, &current_task, mapped_address, *PAGE_SIZE as usize),
977                Ok(())
978            );
979
980            // Verify that the memory is no longer readable.
981            assert_eq!(current_task.read_memory_to_array::<5>(mapped_address), error!(EFAULT));
982        })
983        .await;
984    }
985
986    /// It is ok to call munmap on an unmapped range.
987    #[::fuchsia::test]
988    async fn test_munmap_not_mapped() {
989        spawn_kernel_and_run(async |locked, current_task| {
990            let mapped_address =
991                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
992            assert_eq!(
993                sys_munmap(locked, &current_task, mapped_address, *PAGE_SIZE as usize),
994                Ok(())
995            );
996            assert_eq!(
997                sys_munmap(locked, &current_task, mapped_address, *PAGE_SIZE as usize),
998                Ok(())
999            );
1000        })
1001        .await;
1002    }
1003
1004    /// It is an error to call munmap with a length of 0.
1005    #[::fuchsia::test]
1006    async fn test_munmap_0_length() {
1007        spawn_kernel_and_run(async |locked, current_task| {
1008            let mapped_address =
1009                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
1010            assert_eq!(sys_munmap(locked, &current_task, mapped_address, 0), error!(EINVAL));
1011        })
1012        .await;
1013    }
1014
1015    /// It is an error to call munmap with an address that is not a multiple of the page size.
1016    #[::fuchsia::test]
1017    async fn test_munmap_not_aligned() {
1018        spawn_kernel_and_run(async |locked, current_task| {
1019            let mapped_address =
1020                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
1021            assert_eq!(
1022                sys_munmap(
1023                    locked,
1024                    &current_task,
1025                    (mapped_address + 1u64).unwrap(),
1026                    *PAGE_SIZE as usize
1027                ),
1028                error!(EINVAL)
1029            );
1030
1031            // Verify that the memory is still readable.
1032            assert!(current_task.read_memory_to_array::<5>(mapped_address).is_ok());
1033        })
1034        .await;
1035    }
1036
1037    /// The entire page should be unmapped, not just the range [address, address + length).
1038    #[::fuchsia::test]
1039    async fn test_munmap_unmap_partial() {
1040        spawn_kernel_and_run(async |locked, current_task| {
1041            let mapped_address =
1042                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
1043            assert_eq!(
1044                sys_munmap(locked, &current_task, mapped_address, (*PAGE_SIZE as usize) / 2),
1045                Ok(())
1046            );
1047
1048            // Verify that memory can't be read in either half of the page.
1049            assert_eq!(current_task.read_memory_to_array::<5>(mapped_address), error!(EFAULT));
1050            assert_eq!(
1051                current_task
1052                    .read_memory_to_array::<5>((mapped_address + (*PAGE_SIZE - 2)).unwrap()),
1053                error!(EFAULT)
1054            );
1055        })
1056        .await;
1057    }
1058
1059    /// All pages that intersect the munmap range should be unmapped.
1060    #[::fuchsia::test]
1061    async fn test_munmap_multiple_pages() {
1062        spawn_kernel_and_run(async |locked, current_task| {
1063            let mapped_address =
1064                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
1065            assert_eq!(
1066                sys_munmap(locked, &current_task, mapped_address, (*PAGE_SIZE as usize) + 1),
1067                Ok(())
1068            );
1069
1070            // Verify that neither page is readable.
1071            assert_eq!(current_task.read_memory_to_array::<5>(mapped_address), error!(EFAULT));
1072            assert_eq!(
1073                current_task
1074                    .read_memory_to_array::<5>((mapped_address + (*PAGE_SIZE + 1u64)).unwrap()),
1075                error!(EFAULT)
1076            );
1077        })
1078        .await;
1079    }
1080
1081    /// Only the pages that intersect the munmap range should be unmapped.
1082    #[::fuchsia::test]
1083    async fn test_munmap_one_of_many_pages() {
1084        spawn_kernel_and_run(async |locked, current_task| {
1085            let mapped_address =
1086                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
1087            assert_eq!(
1088                sys_munmap(locked, &current_task, mapped_address, (*PAGE_SIZE as usize) - 1),
1089                Ok(())
1090            );
1091
1092            // Verify that the second page is still readable.
1093            assert_eq!(current_task.read_memory_to_array::<5>(mapped_address), error!(EFAULT));
1094            assert!(
1095                current_task
1096                    .read_memory_to_array::<5>((mapped_address + (*PAGE_SIZE + 1u64)).unwrap())
1097                    .is_ok()
1098            );
1099        })
1100        .await;
1101    }
1102
1103    /// Unmap the middle page of a mapping.
1104    #[::fuchsia::test]
1105    async fn test_munmap_middle_page() {
1106        spawn_kernel_and_run(async |locked, current_task| {
1107            let mapped_address =
1108                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1109            assert_eq!(
1110                sys_munmap(
1111                    locked,
1112                    &current_task,
1113                    (mapped_address + *PAGE_SIZE).unwrap(),
1114                    *PAGE_SIZE as usize
1115                ),
1116                Ok(())
1117            );
1118
1119            // Verify that the first and third pages are still readable.
1120            assert!(current_task.read_memory_to_vec(mapped_address, 5).is_ok());
1121            assert_eq!(
1122                current_task.read_memory_to_vec((mapped_address + *PAGE_SIZE).unwrap(), 5),
1123                error!(EFAULT)
1124            );
1125            assert!(
1126                current_task
1127                    .read_memory_to_vec((mapped_address + (*PAGE_SIZE * 2)).unwrap(), 5)
1128                    .is_ok()
1129            );
1130        })
1131        .await;
1132    }
1133
1134    /// Unmap a range of pages that includes disjoint mappings.
1135    #[::fuchsia::test]
1136    async fn test_munmap_many_mappings() {
1137        spawn_kernel_and_run(async |locked, current_task| {
1138            let mapped_addresses: Vec<_> = std::iter::repeat_with(|| {
1139                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE)
1140            })
1141            .take(3)
1142            .collect();
1143            let min_address = *mapped_addresses.iter().min().unwrap();
1144            let max_address = *mapped_addresses.iter().max().unwrap();
1145            let unmap_length = (max_address - min_address) + *PAGE_SIZE as usize;
1146
1147            assert_eq!(sys_munmap(locked, &current_task, min_address, unmap_length), Ok(()));
1148
1149            // Verify that none of the mapped pages are readable.
1150            for mapped_address in mapped_addresses {
1151                assert_eq!(current_task.read_memory_to_vec(mapped_address, 5), error!(EFAULT));
1152            }
1153        })
1154        .await;
1155    }
1156
1157    #[::fuchsia::test]
1158    async fn test_msync_validates_address_range() {
1159        spawn_kernel_and_run(async |locked, current_task| {
1160            // Map 3 pages and test that ranges covering these pages return no error.
1161            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1162            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 3, 0), Ok(()));
1163            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 2, 0), Ok(()));
1164            assert_eq!(
1165                sys_msync(
1166                    locked,
1167                    &current_task,
1168                    (addr + *PAGE_SIZE).unwrap(),
1169                    *PAGE_SIZE as usize * 2,
1170                    0
1171                ),
1172                Ok(())
1173            );
1174
1175            // Unmap the middle page and test that ranges covering that page return ENOMEM.
1176            sys_munmap(locked, &current_task, (addr + *PAGE_SIZE).unwrap(), *PAGE_SIZE as usize)
1177                .expect("unmap middle");
1178            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize, 0), Ok(()));
1179            assert_eq!(
1180                sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 3, 0),
1181                error!(ENOMEM)
1182            );
1183            assert_eq!(
1184                sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 2, 0),
1185                error!(ENOMEM)
1186            );
1187            assert_eq!(
1188                sys_msync(
1189                    locked,
1190                    &current_task,
1191                    (addr + *PAGE_SIZE).unwrap(),
1192                    *PAGE_SIZE as usize * 2,
1193                    0
1194                ),
1195                error!(ENOMEM)
1196            );
1197            assert_eq!(
1198                sys_msync(
1199                    locked,
1200                    &current_task,
1201                    (addr + (*PAGE_SIZE * 2)).unwrap(),
1202                    *PAGE_SIZE as usize,
1203                    0
1204                ),
1205                Ok(())
1206            );
1207
1208            // Map the middle page back and test that ranges covering the three pages
1209            // (spanning multiple ranges) return no error.
1210            assert_eq!(
1211                map_memory(locked, &current_task, (addr + *PAGE_SIZE).unwrap(), *PAGE_SIZE),
1212                (addr + *PAGE_SIZE).unwrap()
1213            );
1214            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 3, 0), Ok(()));
1215            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 2, 0), Ok(()));
1216            assert_eq!(
1217                sys_msync(
1218                    locked,
1219                    &current_task,
1220                    (addr + *PAGE_SIZE).unwrap(),
1221                    *PAGE_SIZE as usize * 2,
1222                    0
1223                ),
1224                Ok(())
1225            );
1226        })
1227        .await;
1228    }
1229
1230    /// Shrinks an entire range.
1231    #[::fuchsia::test]
1232    async fn test_mremap_shrink_whole_range_from_end() {
1233        spawn_kernel_and_run(async |locked, current_task| {
1234            // Map 2 pages.
1235            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
1236            fill_page(&current_task, addr, 'a');
1237            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1238
1239            // Shrink the mapping from 2 to 1 pages.
1240            assert_eq!(
1241                remap_memory(
1242                    locked,
1243                    &current_task,
1244                    addr,
1245                    *PAGE_SIZE * 2,
1246                    *PAGE_SIZE,
1247                    0,
1248                    UserAddress::default()
1249                ),
1250                Ok(addr)
1251            );
1252
1253            check_page_eq(&current_task, addr, 'a');
1254            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1255        })
1256        .await;
1257    }
1258
1259    /// Shrinks part of a range, introducing a hole in the middle.
1260    #[::fuchsia::test]
1261    async fn test_mremap_shrink_partial_range() {
1262        spawn_kernel_and_run(async |locked, current_task| {
1263            // Map 3 pages.
1264            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1265            fill_page(&current_task, addr, 'a');
1266            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1267            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1268
1269            // Shrink the first 2 pages down to 1, creating a hole.
1270            assert_eq!(
1271                remap_memory(
1272                    locked,
1273                    &current_task,
1274                    addr,
1275                    *PAGE_SIZE * 2,
1276                    *PAGE_SIZE,
1277                    0,
1278                    UserAddress::default()
1279                ),
1280                Ok(addr)
1281            );
1282
1283            check_page_eq(&current_task, addr, 'a');
1284            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1285            check_page_eq(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1286        })
1287        .await;
1288    }
1289
1290    /// Shrinking doesn't care if the range specified spans multiple mappings.
1291    #[::fuchsia::test]
1292    async fn test_mremap_shrink_across_ranges() {
1293        spawn_kernel_and_run(async |locked, current_task| {
1294            // Map 3 pages, unmap the middle, then map the middle again. This will leave us with
1295            // 3 contiguous mappings.
1296            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1297            assert_eq!(
1298                sys_munmap(
1299                    locked,
1300                    &current_task,
1301                    (addr + *PAGE_SIZE).unwrap(),
1302                    *PAGE_SIZE as usize
1303                ),
1304                Ok(())
1305            );
1306            assert_eq!(
1307                map_memory(locked, &current_task, (addr + *PAGE_SIZE).unwrap(), *PAGE_SIZE),
1308                (addr + *PAGE_SIZE).unwrap()
1309            );
1310
1311            fill_page(&current_task, addr, 'a');
1312            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1313            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1314
1315            // Remap over all three mappings, shrinking to 1 page.
1316            assert_eq!(
1317                remap_memory(
1318                    locked,
1319                    &current_task,
1320                    addr,
1321                    *PAGE_SIZE * 3,
1322                    *PAGE_SIZE,
1323                    0,
1324                    UserAddress::default()
1325                ),
1326                Ok(addr)
1327            );
1328
1329            check_page_eq(&current_task, addr, 'a');
1330            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1331            check_unmapped(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap());
1332        })
1333        .await;
1334    }
1335
1336    /// Grows a mapping in-place.
1337    #[::fuchsia::test]
1338    async fn test_mremap_grow_in_place() {
1339        spawn_kernel_and_run(async |locked, current_task| {
1340            // Map 3 pages, unmap the middle, leaving a hole.
1341            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1342            fill_page(&current_task, addr, 'a');
1343            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1344            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1345            assert_eq!(
1346                sys_munmap(
1347                    locked,
1348                    &current_task,
1349                    (addr + *PAGE_SIZE).unwrap(),
1350                    *PAGE_SIZE as usize
1351                ),
1352                Ok(())
1353            );
1354
1355            // Grow the first page in-place into the middle.
1356            assert_eq!(
1357                remap_memory(
1358                    locked,
1359                    &current_task,
1360                    addr,
1361                    *PAGE_SIZE,
1362                    *PAGE_SIZE * 2,
1363                    0,
1364                    UserAddress::default()
1365                ),
1366                Ok(addr)
1367            );
1368
1369            check_page_eq(&current_task, addr, 'a');
1370
1371            // The middle page should be new, and not just pointing to the original middle page filled
1372            // with 'b'.
1373            check_page_ne(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1374
1375            check_page_eq(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1376        })
1377        .await;
1378    }
1379
1380    /// Tries to grow a set of pages that cannot fit, and forces a move.
1381    #[::fuchsia::test]
1382    async fn test_mremap_grow_maymove() {
1383        spawn_kernel_and_run(async |locked, current_task| {
1384            // Map 3 pages.
1385            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1386            fill_page(&current_task, addr, 'a');
1387            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1388            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1389
1390            // Grow the first two pages by 1, forcing a move.
1391            let new_addr = remap_memory(
1392                locked,
1393                &current_task,
1394                addr,
1395                *PAGE_SIZE * 2,
1396                *PAGE_SIZE * 3,
1397                MREMAP_MAYMOVE,
1398                UserAddress::default(),
1399            )
1400            .expect("failed to mremap");
1401
1402            assert_ne!(new_addr, addr, "mremap did not move the mapping");
1403
1404            // The first two pages should have been moved.
1405            check_unmapped(&current_task, addr);
1406            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1407
1408            // The third page should still be present.
1409            check_page_eq(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1410
1411            // The moved pages should have the same contents.
1412            check_page_eq(&current_task, new_addr, 'a');
1413            check_page_eq(&current_task, (new_addr + *PAGE_SIZE).unwrap(), 'b');
1414
1415            // The newly grown page should not be the same as the original third page.
1416            check_page_ne(&current_task, (new_addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1417        })
1418        .await;
1419    }
1420
1421    /// Shrinks a set of pages and move them to a fixed location.
1422    #[::fuchsia::test]
1423    async fn test_mremap_shrink_fixed() {
1424        spawn_kernel_and_run(async |locked, current_task| {
1425            // Map 2 pages which will act as the destination.
1426            let dst_addr =
1427                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
1428            fill_page(&current_task, dst_addr, 'y');
1429            fill_page(&current_task, (dst_addr + *PAGE_SIZE).unwrap(), 'z');
1430
1431            // Map 3 pages.
1432            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1433            fill_page(&current_task, addr, 'a');
1434            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1435            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1436
1437            // Shrink the first two pages and move them to overwrite the mappings at `dst_addr`.
1438            let new_addr = remap_memory(
1439                locked,
1440                &current_task,
1441                addr,
1442                *PAGE_SIZE * 2,
1443                *PAGE_SIZE,
1444                MREMAP_MAYMOVE | MREMAP_FIXED,
1445                dst_addr,
1446            )
1447            .expect("failed to mremap");
1448
1449            assert_eq!(new_addr, dst_addr, "mremap did not move the mapping");
1450
1451            // The first two pages should have been moved.
1452            check_unmapped(&current_task, addr);
1453            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1454
1455            // The third page should still be present.
1456            check_page_eq(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1457
1458            // The first moved page should have the same contents.
1459            check_page_eq(&current_task, new_addr, 'a');
1460
1461            // The second page should be part of the original dst mapping.
1462            check_page_eq(&current_task, (new_addr + *PAGE_SIZE).unwrap(), 'z');
1463        })
1464        .await;
1465    }
1466
1467    /// Clobbers the middle of an existing mapping with mremap to a fixed location.
1468    #[::fuchsia::test]
1469    async fn test_mremap_clobber_memory_mapping() {
1470        spawn_kernel_and_run(async |locked, current_task| {
1471            let dst_memory = MemoryObject::from(zx::Vmo::create(2 * *PAGE_SIZE).unwrap());
1472            dst_memory.write(&['x' as u8].repeat(*PAGE_SIZE as usize), 0).unwrap();
1473            dst_memory.write(&['y' as u8].repeat(*PAGE_SIZE as usize), *PAGE_SIZE).unwrap();
1474
1475            let dst_addr = current_task
1476                .mm()
1477                .unwrap()
1478                .map_memory(
1479                    DesiredAddress::Any,
1480                    dst_memory.into(),
1481                    0,
1482                    2 * (*PAGE_SIZE as usize),
1483                    ProtectionFlags::READ,
1484                    Access::rwx(),
1485                    MappingOptions::empty(),
1486                    MappingName::None,
1487                )
1488                .unwrap();
1489
1490            // Map 3 pages.
1491            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1492            fill_page(&current_task, addr, 'a');
1493            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1494            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1495
1496            // Overwrite the second page of the mapping with the second page of the anonymous mapping.
1497            let remapped_addr = sys_mremap(
1498                locked,
1499                &*current_task,
1500                (addr + *PAGE_SIZE).unwrap(),
1501                *PAGE_SIZE as usize,
1502                *PAGE_SIZE as usize,
1503                MREMAP_FIXED | MREMAP_MAYMOVE,
1504                (dst_addr + *PAGE_SIZE).unwrap(),
1505            )
1506            .unwrap();
1507
1508            assert_eq!(remapped_addr, (dst_addr + *PAGE_SIZE).unwrap());
1509
1510            check_page_eq(&current_task, addr, 'a');
1511            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1512            check_page_eq(&current_task, (addr + (2 * *PAGE_SIZE)).unwrap(), 'c');
1513
1514            check_page_eq(&current_task, dst_addr, 'x');
1515            check_page_eq(&current_task, (dst_addr + *PAGE_SIZE).unwrap(), 'b');
1516        })
1517        .await;
1518    }
1519
1520    #[cfg(target_arch = "x86_64")]
1521    #[::fuchsia::test]
1522    async fn test_map_32_bit() {
1523        use starnix_uapi::PROT_WRITE;
1524
1525        spawn_kernel_and_run(async |locked, current_task| {
1526            let page_size = *PAGE_SIZE;
1527
1528            for _i in 0..256 {
1529                match do_mmap(
1530                    locked,
1531                    &current_task,
1532                    UserAddress::from(0),
1533                    page_size as usize,
1534                    PROT_READ | PROT_WRITE,
1535                    MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT,
1536                    FdNumber::from_raw(-1),
1537                    0,
1538                ) {
1539                    Ok(address) => {
1540                        let memory_end = address.ptr() + page_size as usize;
1541                        assert!(memory_end <= 0x80000000);
1542                    }
1543                    error => {
1544                        panic!("mmap with MAP_32BIT failed: {error:?}");
1545                    }
1546                }
1547            }
1548        })
1549        .await;
1550    }
1551}