Skip to main content

starnix_core/mm/
syscalls.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::barrier::{BarrierType, system_barrier};
6use crate::mm::debugger::notify_debugger_of_module_list;
7use crate::mm::{
8    DesiredAddress, FutexKey, IOVecPtr, MappingName, MappingOptions, MembarrierType,
9    MemoryAccessorExt, MremapFlags, MsyncFlags, PAGE_SIZE, PrivateFutexKey, ProtectionFlags,
10    SharedFutexKey,
11};
12use crate::security;
13use crate::syscalls::time::TimeSpecPtr;
14use crate::task::{CurrentTask, Task};
15use crate::time::TargetTime;
16use crate::time::utc::estimate_boot_deadline_from_utc;
17use crate::vfs::buffers::{OutputBuffer, UserBuffersInputBuffer, UserBuffersOutputBuffer};
18use crate::vfs::{FdFlags, FdNumber, UserFaultFile};
19use fuchsia_runtime::UtcTimeline;
20use linux_uapi::MLOCK_ONFAULT;
21use starnix_logging::{CATEGORY_STARNIX_MM, log_trace, trace_duration, track_stub};
22use starnix_sync::{FileOpsCore, LockEqualOrBefore, Locked, Unlocked};
23use starnix_syscalls::SyscallArg;
24use starnix_types::time::{duration_from_timespec, time_from_timespec, timespec_from_time};
25use starnix_uapi::auth::{CAP_SYS_PTRACE, PTRACE_MODE_ATTACH_REALCREDS};
26use starnix_uapi::errors::{EINTR, Errno};
27use starnix_uapi::open_flags::OpenFlags;
28use starnix_uapi::user_address::{UserAddress, UserRef};
29use starnix_uapi::user_value::UserValue;
30use starnix_uapi::{
31    FUTEX_BITSET_MATCH_ANY, FUTEX_CLOCK_REALTIME, FUTEX_CMD_MASK, FUTEX_CMP_REQUEUE,
32    FUTEX_CMP_REQUEUE_PI, FUTEX_LOCK_PI, FUTEX_LOCK_PI2, FUTEX_PRIVATE_FLAG, FUTEX_REQUEUE,
33    FUTEX_TRYLOCK_PI, FUTEX_UNLOCK_PI, FUTEX_WAIT, FUTEX_WAIT_BITSET, FUTEX_WAIT_REQUEUE_PI,
34    FUTEX_WAKE, FUTEX_WAKE_BITSET, FUTEX_WAKE_OP, MAP_ANONYMOUS, MAP_DENYWRITE, MAP_FIXED,
35    MAP_FIXED_NOREPLACE, MAP_GROWSDOWN, MAP_LOCKED, MAP_NORESERVE, MAP_POPULATE, MAP_PRIVATE,
36    MAP_SHARED, MAP_SHARED_VALIDATE, MAP_STACK, O_CLOEXEC, O_NONBLOCK, PROT_EXEC,
37    UFFD_USER_MODE_ONLY, errno, error, robust_list_head, tid_t, uapi,
38};
39use std::ops::Deref as _;
40use zx;
41
42#[cfg(target_arch = "x86_64")]
43use starnix_uapi::MAP_32BIT;
44
45// Returns any platform-specific mmap flags. This is a separate function because as of this writing
46// "attributes on expressions are experimental."
47#[cfg(target_arch = "x86_64")]
48fn get_valid_platform_mmap_flags() -> u32 {
49    MAP_32BIT
50}
51#[cfg(not(target_arch = "x86_64"))]
52fn get_valid_platform_mmap_flags() -> u32 {
53    0
54}
55
56/// sys_mmap takes a mutable reference to current_task because it may modify the IP register.
57pub fn sys_mmap(
58    locked: &mut Locked<Unlocked>,
59    current_task: &mut CurrentTask,
60    addr: UserAddress,
61    length: usize,
62    prot: u32,
63    flags: u32,
64    fd: FdNumber,
65    offset: u64,
66) -> Result<UserAddress, Errno> {
67    let user_address = do_mmap(locked, current_task, addr, length, prot, flags, fd, offset)?;
68    if prot & PROT_EXEC != 0 {
69        // Possibly loads a new module. Notify debugger for the change.
70        // We only care about dynamic linker loading modules for now, which uses mmap. In the future
71        // we might want to support unloading modules in munmap or JIT compilation in mprotect.
72        notify_debugger_of_module_list(current_task)?;
73    }
74    Ok(user_address)
75}
76
77pub fn do_mmap<L>(
78    locked: &mut Locked<L>,
79    current_task: &CurrentTask,
80    addr: UserAddress,
81    length: usize,
82    prot: u32,
83    flags: u32,
84    fd: FdNumber,
85    offset: u64,
86) -> Result<UserAddress, Errno>
87where
88    L: LockEqualOrBefore<FileOpsCore>,
89{
90    let prot_flags = ProtectionFlags::from_access_bits(prot).ok_or_else(|| {
91        track_stub!(TODO("https://fxbug.dev/322874211"), "mmap parse protection", prot);
92        errno!(EINVAL)
93    })?;
94
95    let valid_flags: u32 = get_valid_platform_mmap_flags()
96        | MAP_PRIVATE
97        | MAP_SHARED
98        | MAP_SHARED_VALIDATE
99        | MAP_ANONYMOUS
100        | MAP_FIXED
101        | MAP_FIXED_NOREPLACE
102        | MAP_POPULATE
103        | MAP_NORESERVE
104        | MAP_STACK
105        | MAP_DENYWRITE
106        | MAP_GROWSDOWN
107        | MAP_LOCKED;
108    if flags & !valid_flags != 0 {
109        if flags & MAP_SHARED_VALIDATE != 0 {
110            return error!(EOPNOTSUPP);
111        }
112        track_stub!(TODO("https://fxbug.dev/322873638"), "mmap check flags", flags);
113        return error!(EINVAL);
114    }
115
116    let file = if flags & MAP_ANONYMOUS != 0 { None } else { Some(current_task.files.get(fd)?) };
117    if flags & (MAP_PRIVATE | MAP_SHARED) == 0
118        || flags & (MAP_PRIVATE | MAP_SHARED) == MAP_PRIVATE | MAP_SHARED
119    {
120        return error!(EINVAL);
121    }
122    if length == 0 {
123        return error!(EINVAL);
124    }
125    if offset % *PAGE_SIZE != 0 {
126        return error!(EINVAL);
127    }
128
129    // TODO(tbodt): should we consider MAP_NORESERVE?
130
131    let addr = match (addr, flags & MAP_FIXED != 0, flags & MAP_FIXED_NOREPLACE != 0) {
132        (UserAddress::NULL, false, false) => DesiredAddress::Any,
133        (UserAddress::NULL, true, _) | (UserAddress::NULL, _, true) => return error!(EINVAL),
134        (addr, false, false) => DesiredAddress::Hint(addr),
135        (addr, _, true) => DesiredAddress::Fixed(addr),
136        (addr, true, false) => DesiredAddress::FixedOverwrite(addr),
137    };
138
139    let memory_offset = if flags & MAP_ANONYMOUS != 0 { 0 } else { offset };
140
141    let mut options = MappingOptions::empty();
142    if flags & MAP_SHARED != 0 {
143        options |= MappingOptions::SHARED;
144    }
145    if flags & MAP_ANONYMOUS != 0 {
146        options |= MappingOptions::ANONYMOUS;
147    }
148    #[cfg(target_arch = "x86_64")]
149    if flags & MAP_FIXED == 0 && flags & MAP_32BIT != 0 {
150        options |= MappingOptions::LOWER_32BIT;
151    }
152    if flags & MAP_GROWSDOWN != 0 {
153        options |= MappingOptions::GROWSDOWN;
154    }
155    if flags & MAP_POPULATE != 0 {
156        options |= MappingOptions::POPULATE;
157    }
158    if flags & MAP_LOCKED != 0 {
159        // The kernel isn't expected to return an error if locking fails with this flag, so for now
160        // this implementation will always fail to lock memory even if mapping succeeds.
161        track_stub!(TODO("https://fxbug.dev/406377606"), "MAP_LOCKED");
162    }
163
164    security::mmap_file(current_task, file.as_ref(), prot_flags, options)?;
165
166    if flags & MAP_ANONYMOUS != 0 {
167        trace_duration!(CATEGORY_STARNIX_MM, "AnonymousMmap");
168        current_task.mm()?.map_anonymous(addr, length, prot_flags, options, MappingName::None)
169    } else {
170        trace_duration!(CATEGORY_STARNIX_MM, "FileBackedMmap");
171        // TODO(tbodt): maximize protection flags so that mprotect works
172        let file = file.expect("file retrieved above for file-backed mapping");
173        file.mmap(
174            locked,
175            current_task,
176            addr,
177            memory_offset,
178            length,
179            prot_flags,
180            options,
181            file.name.to_passive(),
182        )
183    }
184}
185
186pub fn sys_mprotect(
187    _locked: &mut Locked<Unlocked>,
188    current_task: &CurrentTask,
189    addr: UserAddress,
190    length: usize,
191    prot: u32,
192) -> Result<(), Errno> {
193    let prot_flags = ProtectionFlags::from_bits(prot).ok_or_else(|| {
194        track_stub!(TODO("https://fxbug.dev/322874672"), "mprotect parse protection", prot);
195        errno!(EINVAL)
196    })?;
197    current_task.mm()?.protect(current_task, addr, length, prot_flags)?;
198    Ok(())
199}
200
201pub fn sys_mremap(
202    _locked: &mut Locked<Unlocked>,
203    current_task: &CurrentTask,
204    addr: UserAddress,
205    old_length: usize,
206    new_length: usize,
207    flags: u32,
208    new_addr: UserAddress,
209) -> Result<UserAddress, Errno> {
210    let flags = MremapFlags::from_bits(flags).ok_or_else(|| errno!(EINVAL))?;
211    let addr =
212        current_task.mm()?.remap(current_task, addr, old_length, new_length, flags, new_addr)?;
213    Ok(addr)
214}
215
216pub fn sys_munmap(
217    _locked: &mut Locked<Unlocked>,
218    current_task: &CurrentTask,
219    addr: UserAddress,
220    length: usize,
221) -> Result<(), Errno> {
222    current_task.mm()?.unmap(addr, length)?;
223    Ok(())
224}
225
226pub fn sys_msync(
227    locked: &mut Locked<Unlocked>,
228    current_task: &CurrentTask,
229    addr: UserAddress,
230    length: usize,
231    flags: u32,
232) -> Result<(), Errno> {
233    let flags = MsyncFlags::from_bits_retain(flags);
234    current_task.mm()?.msync(locked, current_task, addr, length, flags)
235}
236
237pub fn sys_madvise(
238    _locked: &mut Locked<Unlocked>,
239    current_task: &CurrentTask,
240    addr: UserAddress,
241    length: usize,
242    advice: u32,
243) -> Result<(), Errno> {
244    current_task.mm()?.madvise(current_task, addr, length, advice)?;
245    Ok(())
246}
247
248pub fn sys_process_madvise(
249    _locked: &mut Locked<Unlocked>,
250    _current_task: &CurrentTask,
251    _pidfd: FdNumber,
252    _iovec_addr: IOVecPtr,
253    _iovec_count: UserValue<i32>,
254    _advice: UserValue<i32>,
255    _flags: UserValue<u32>,
256) -> Result<usize, Errno> {
257    track_stub!(TODO("https://fxbug.dev/409060664"), "process_madvise");
258    error!(ENOSYS)
259}
260
261pub fn sys_brk(
262    locked: &mut Locked<Unlocked>,
263    current_task: &CurrentTask,
264    addr: UserAddress,
265) -> Result<UserAddress, Errno> {
266    current_task.mm()?.set_brk(locked, current_task, addr)
267}
268
269pub fn sys_process_vm_readv(
270    locked: &mut Locked<Unlocked>,
271    current_task: &CurrentTask,
272    tid: tid_t,
273    local_iov_addr: IOVecPtr,
274    local_iov_count: UserValue<i32>,
275    remote_iov_addr: IOVecPtr,
276    remote_iov_count: UserValue<i32>,
277    flags: usize,
278) -> Result<usize, Errno> {
279    if flags != 0 {
280        return error!(EINVAL);
281    }
282
283    // Source and destination are allowed to be of different length. It is valid to use a nullptr if
284    // the associated length is 0. Thus, if either source or destination length is 0 and nullptr,
285    // make sure to return Ok(0) before doing any other validation/operations.
286    if (local_iov_count == 0 && local_iov_addr.is_null())
287        || (remote_iov_count == 0 && remote_iov_addr.is_null())
288    {
289        return Ok(0);
290    }
291
292    let weak_remote_task = current_task.get_task(tid);
293    let remote_task = Task::from_weak(&weak_remote_task)?;
294
295    current_task.check_ptrace_access_mode(locked, PTRACE_MODE_ATTACH_REALCREDS, &remote_task)?;
296
297    let local_iov = current_task.read_iovec(local_iov_addr, local_iov_count)?;
298    let remote_iov = current_task.read_iovec(remote_iov_addr, remote_iov_count)?;
299    log_trace!(
300        "process_vm_readv(tid={}, local_iov={:?}, remote_iov={:?})",
301        tid,
302        local_iov,
303        remote_iov
304    );
305
306    track_stub!(TODO("https://fxbug.dev/322874765"), "process_vm_readv single-copy");
307    // According to the man page, this syscall was added to Linux specifically to
308    // avoid doing two copies like other IPC mechanisms require. We should avoid this too at some
309    // point.
310    let mut output = UserBuffersOutputBuffer::unified_new(current_task, local_iov)?;
311    let remote_mm = remote_task.mm().ok();
312    if current_task.has_same_address_space(remote_mm.as_ref()) {
313        let mut input = UserBuffersInputBuffer::unified_new(current_task, remote_iov)?;
314        output.write_buffer(&mut input)
315    } else {
316        let mut input = UserBuffersInputBuffer::syscall_new(remote_task.deref(), remote_iov)?;
317        output.write_buffer(&mut input)
318    }
319}
320
321pub fn sys_process_vm_writev(
322    locked: &mut Locked<Unlocked>,
323    current_task: &CurrentTask,
324    tid: tid_t,
325    local_iov_addr: IOVecPtr,
326    local_iov_count: UserValue<i32>,
327    remote_iov_addr: IOVecPtr,
328    remote_iov_count: UserValue<i32>,
329    flags: usize,
330) -> Result<usize, Errno> {
331    if flags != 0 {
332        return error!(EINVAL);
333    }
334
335    // Source and destination are allowed to be of different length. It is valid to use a nullptr if
336    // the associated length is 0. Thus, if either source or destination length is 0 and nullptr,
337    // make sure to return Ok(0) before doing any other validation/operations.
338    if (local_iov_count == 0 && local_iov_addr.is_null())
339        || (remote_iov_count == 0 && remote_iov_addr.is_null())
340    {
341        return Ok(0);
342    }
343
344    let weak_remote_task = current_task.get_task(tid);
345    let remote_task = Task::from_weak(&weak_remote_task)?;
346
347    current_task.check_ptrace_access_mode(locked, PTRACE_MODE_ATTACH_REALCREDS, &remote_task)?;
348
349    let local_iov = current_task.read_iovec(local_iov_addr, local_iov_count)?;
350    let remote_iov = current_task.read_iovec(remote_iov_addr, remote_iov_count)?;
351    log_trace!(
352        "sys_process_vm_writev(tid={}, local_iov={:?}, remote_iov={:?})",
353        tid,
354        local_iov,
355        remote_iov
356    );
357
358    track_stub!(TODO("https://fxbug.dev/322874339"), "process_vm_writev single-copy");
359    // NB: According to the man page, this syscall was added to Linux specifically to
360    // avoid doing two copies like other IPC mechanisms require. We should avoid this too at some
361    // point.
362    let mut input = UserBuffersInputBuffer::unified_new(current_task, local_iov)?;
363    let remote_mm = remote_task.mm().ok();
364    if current_task.has_same_address_space(remote_mm.as_ref()) {
365        let mut output = UserBuffersOutputBuffer::unified_new(current_task, remote_iov)?;
366        output.write_buffer(&mut input)
367    } else {
368        let mut output = UserBuffersOutputBuffer::syscall_new(remote_task.deref(), remote_iov)?;
369        output.write_buffer(&mut input)
370    }
371}
372
373pub fn sys_process_mrelease(
374    _locked: &mut Locked<Unlocked>,
375    current_task: &CurrentTask,
376    pidfd: FdNumber,
377    flags: u32,
378) -> Result<(), Errno> {
379    if flags != 0 {
380        return error!(EINVAL);
381    }
382    let file = current_task.files.get(pidfd)?;
383    let task = current_task.get_task(file.as_thread_group_key()?.pid());
384    let task = task.upgrade().ok_or_else(|| errno!(ESRCH))?;
385    if !task.load_stopped().is_stopped() {
386        return error!(EINVAL);
387    }
388
389    let mm = task.mm()?;
390    let mm_state = mm.state.write();
391    mm_state.mrelease()
392}
393
394pub fn sys_membarrier(
395    _locked: &mut Locked<Unlocked>,
396    current_task: &CurrentTask,
397    cmd: uapi::membarrier_cmd,
398    _flags: u32,
399    _cpu_id: i32,
400) -> Result<u32, Errno> {
401    match cmd {
402        // This command returns a bit mask of all supported commands.
403        // We support everything except for the RSEQ family.
404        uapi::membarrier_cmd_MEMBARRIER_CMD_QUERY => Ok(uapi::membarrier_cmd_MEMBARRIER_CMD_GLOBAL
405            | uapi::membarrier_cmd_MEMBARRIER_CMD_GLOBAL_EXPEDITED
406            | uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED
407            | uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED
408            | uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED
409            | uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE
410            | uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE),
411        // Global and global expedited barriers are treated identically. We don't track
412        // registration for global expedited barriers currently.
413        uapi::membarrier_cmd_MEMBARRIER_CMD_GLOBAL
414        | uapi::membarrier_cmd_MEMBARRIER_CMD_GLOBAL_EXPEDITED => {
415            system_barrier(BarrierType::DataMemory);
416            Ok(0)
417        }
418        // Global registration commands are ignored.
419        uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED => Ok(0),
420        uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED => {
421            // A private expedited barrier is only issued if the address space is registered
422            // for these barriers.
423            if current_task.mm()?.membarrier_private_expedited_registered(MembarrierType::Memory) {
424                // If a barrier is requested, issue a global barrier.
425                system_barrier(BarrierType::DataMemory);
426                Ok(0)
427            } else {
428                error!(EPERM)
429            }
430        }
431        // Private sync core barriers are treated as global instruction stream barriers.
432        uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE => {
433            if current_task.mm()?.membarrier_private_expedited_registered(MembarrierType::SyncCore)
434            {
435                system_barrier(BarrierType::InstructionStream);
436                Ok(0)
437            } else {
438                error!(EPERM)
439            }
440        }
441        uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED => {
442            let _ =
443                current_task.mm()?.register_membarrier_private_expedited(MembarrierType::Memory)?;
444            Ok(0)
445        }
446
447        uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE => {
448            let _ = current_task
449                .mm()?
450                .register_membarrier_private_expedited(MembarrierType::SyncCore)?;
451            Ok(0)
452        }
453        uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ => {
454            track_stub!(TODO("https://fxbug.dev/447158570"), "membarrier rseq");
455            error!(ENOSYS)
456        }
457        uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ => {
458            track_stub!(TODO("https://fxbug.dev/447158570"), "membarrier rseq");
459            error!(ENOSYS)
460        }
461        _ => error!(EINVAL),
462    }
463}
464
465pub fn sys_userfaultfd(
466    locked: &mut Locked<Unlocked>,
467    current_task: &CurrentTask,
468    raw_flags: u32,
469) -> Result<FdNumber, Errno> {
470    let unknown_flags = raw_flags & !(O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
471    if unknown_flags != 0 {
472        return error!(EINVAL, format!("unknown flags provided: {unknown_flags:x?}"));
473    }
474    let mut open_flags = OpenFlags::empty();
475    if raw_flags & O_NONBLOCK != 0 {
476        open_flags |= OpenFlags::NONBLOCK;
477    }
478    if raw_flags & O_CLOEXEC != 0 {
479        open_flags |= OpenFlags::CLOEXEC;
480    }
481
482    let fd_flags = if raw_flags & O_CLOEXEC != 0 {
483        FdFlags::CLOEXEC
484    } else {
485        track_stub!(TODO("https://fxbug.dev/297375964"), "userfaultfds that survive exec()");
486        return error!(ENOSYS);
487    };
488
489    let user_mode_only = raw_flags & UFFD_USER_MODE_ONLY == 0;
490    let uff_handle = UserFaultFile::new(locked, current_task, open_flags, user_mode_only)?;
491    current_task.add_file(locked, uff_handle, fd_flags)
492}
493
494pub fn sys_futex(
495    locked: &mut Locked<Unlocked>,
496    current_task: &mut CurrentTask,
497    addr: UserAddress,
498    op: u32,
499    value: u32,
500    timeout_or_value2: SyscallArg,
501    addr2: UserAddress,
502    value3: u32,
503) -> Result<usize, Errno> {
504    if op & FUTEX_PRIVATE_FLAG != 0 {
505        do_futex::<PrivateFutexKey>(
506            locked,
507            current_task,
508            addr,
509            op,
510            value,
511            timeout_or_value2,
512            addr2,
513            value3,
514        )
515    } else {
516        do_futex::<SharedFutexKey>(
517            locked,
518            current_task,
519            addr,
520            op,
521            value,
522            timeout_or_value2,
523            addr2,
524            value3,
525        )
526    }
527}
528
529fn do_futex<Key: FutexKey>(
530    locked: &mut Locked<Unlocked>,
531    current_task: &mut CurrentTask,
532    addr: UserAddress,
533    op: u32,
534    value: u32,
535    timeout_or_value2: SyscallArg,
536    addr2: UserAddress,
537    value3: u32,
538) -> Result<usize, Errno> {
539    let futexes = Key::get_table_from_task(current_task)?;
540    let cmd = op & (FUTEX_CMD_MASK as u32);
541
542    let is_realtime = match (cmd, op & FUTEX_CLOCK_REALTIME != 0) {
543        // This option bit can be employed only with the FUTEX_WAIT_BITSET, FUTEX_WAIT_REQUEUE_PI,
544        // (since Linux 4.5) FUTEX_WAIT, and (since Linux 5.14) FUTEX_LOCK_PI2 operations.
545        (FUTEX_WAIT_BITSET | FUTEX_WAIT_REQUEUE_PI | FUTEX_WAIT | FUTEX_LOCK_PI2, true) => true,
546        (_, true) => return error!(EINVAL),
547
548        // FUTEX_LOCK_PI always uses realtime.
549        (FUTEX_LOCK_PI, _) => true,
550
551        (_, false) => false,
552    };
553
554    // The timeout is interpreted differently by WAIT and WAIT_BITSET: WAIT takes a
555    // timeout and WAIT_BITSET takes a deadline.
556    let read_timespec = |current_task: &CurrentTask| {
557        let utime = TimeSpecPtr::new(current_task, timeout_or_value2);
558        if utime.is_null() {
559            Ok(timespec_from_time(zx::MonotonicInstant::INFINITE))
560        } else {
561            current_task.read_multi_arch_object(utime)
562        }
563    };
564    let read_timeout = |current_task: &CurrentTask| {
565        let timespec = read_timespec(current_task)?;
566        let timeout = duration_from_timespec(timespec);
567        let deadline = zx::MonotonicInstant::after(timeout?);
568        if is_realtime {
569            // Since this is a timeout, waiting on the monotonic timeline before it's paused is
570            // just as good as actually estimating UTC here.
571            track_stub!(TODO("https://fxbug.dev/356912301"), "FUTEX_CLOCK_REALTIME timeout");
572        }
573        Ok(deadline)
574    };
575    let read_deadline = |current_task: &CurrentTask| {
576        let timespec = read_timespec(current_task)?;
577        if is_realtime {
578            Ok(TargetTime::RealTime(time_from_timespec::<UtcTimeline>(timespec)?))
579        } else {
580            Ok(TargetTime::Monotonic(time_from_timespec::<zx::MonotonicTimeline>(timespec)?))
581        }
582    };
583
584    match cmd {
585        FUTEX_WAIT => {
586            let deadline = read_timeout(current_task)?;
587            let bitset = FUTEX_BITSET_MATCH_ANY;
588            do_futex_wait_with_restart::<Key>(
589                locked,
590                current_task,
591                addr,
592                value,
593                bitset,
594                TargetTime::Monotonic(deadline),
595            )?;
596            Ok(0)
597        }
598        FUTEX_WAKE => {
599            futexes.wake(locked, current_task, addr, value as usize, FUTEX_BITSET_MATCH_ANY)
600        }
601        FUTEX_WAKE_OP => {
602            track_stub!(TODO("https://fxbug.dev/361181940"), "FUTEX_WAKE_OP");
603            error!(ENOSYS)
604        }
605        FUTEX_WAIT_BITSET => {
606            if value3 == 0 {
607                return error!(EINVAL);
608            }
609            let deadline = read_deadline(current_task)?;
610            do_futex_wait_with_restart::<Key>(locked, current_task, addr, value, value3, deadline)?;
611            Ok(0)
612        }
613        FUTEX_WAKE_BITSET => {
614            if value3 == 0 {
615                return error!(EINVAL);
616            }
617            futexes.wake(locked, current_task, addr, value as usize, value3)
618        }
619        FUTEX_REQUEUE | FUTEX_CMP_REQUEUE => {
620            let wake_count = value as usize;
621            let requeue_count: usize = timeout_or_value2.into();
622            if wake_count > std::i32::MAX as usize || requeue_count > std::i32::MAX as usize {
623                return error!(EINVAL);
624            }
625            let expected_value = if cmd == FUTEX_CMP_REQUEUE { Some(value3) } else { None };
626            futexes.requeue(
627                locked,
628                current_task,
629                addr,
630                wake_count,
631                requeue_count,
632                addr2,
633                expected_value,
634            )
635        }
636        FUTEX_WAIT_REQUEUE_PI => {
637            track_stub!(TODO("https://fxbug.dev/361181558"), "FUTEX_WAIT_REQUEUE_PI");
638            error!(ENOSYS)
639        }
640        FUTEX_CMP_REQUEUE_PI => {
641            track_stub!(TODO("https://fxbug.dev/361181773"), "FUTEX_CMP_REQUEUE_PI");
642            error!(ENOSYS)
643        }
644        FUTEX_LOCK_PI | FUTEX_LOCK_PI2 => {
645            futexes.lock_pi(locked, current_task, addr, read_timeout(current_task)?)?;
646            Ok(0)
647        }
648        FUTEX_TRYLOCK_PI => {
649            track_stub!(TODO("https://fxbug.dev/361175318"), "FUTEX_TRYLOCK_PI");
650            error!(ENOSYS)
651        }
652        FUTEX_UNLOCK_PI => {
653            futexes.unlock_pi(locked, current_task, addr)?;
654            Ok(0)
655        }
656        _ => {
657            track_stub!(TODO("https://fxbug.dev/322875124"), "futex unknown command", cmd);
658            error!(ENOSYS)
659        }
660    }
661}
662
663fn do_futex_wait_with_restart<Key: FutexKey>(
664    locked: &mut Locked<Unlocked>,
665    current_task: &mut CurrentTask,
666    addr: UserAddress,
667    value: u32,
668    mask: u32,
669    deadline: TargetTime,
670) -> Result<(), Errno> {
671    let futexes = Key::get_table_from_task(current_task)?;
672    let result = match deadline {
673        TargetTime::Monotonic(mono_deadline) => {
674            futexes.wait(locked, current_task, addr, value, mask, mono_deadline)
675        }
676        TargetTime::BootInstant(boot_deadline) => {
677            let timer_slack = current_task.read().get_timerslack();
678            futexes.wait_boot(locked, current_task, addr, value, mask, boot_deadline, timer_slack)
679        }
680        TargetTime::RealTime(utc_deadline) => {
681            // We convert real time deadlines to boot time deadlines since we cannot wait using a UTC deadline.
682            let (boot_deadline, _) = estimate_boot_deadline_from_utc(utc_deadline);
683            let timer_slack = current_task.read().get_timerslack();
684            futexes.wait_boot(locked, current_task, addr, value, mask, boot_deadline, timer_slack)
685        }
686    };
687    match result {
688        Err(err) if err == EINTR => {
689            current_task.set_syscall_restart_func(move |locked, current_task| {
690                do_futex_wait_with_restart::<Key>(locked, current_task, addr, value, mask, deadline)
691            });
692            error!(ERESTART_RESTARTBLOCK)
693        }
694        result => result,
695    }
696}
697
698pub fn sys_get_robust_list(
699    _locked: &mut Locked<Unlocked>,
700    current_task: &CurrentTask,
701    tid: tid_t,
702    user_head_ptr: UserRef<UserAddress>,
703    user_len_ptr: UserRef<usize>,
704) -> Result<(), Errno> {
705    if tid < 0 {
706        return error!(EINVAL);
707    }
708    if user_head_ptr.is_null() || user_len_ptr.is_null() {
709        return error!(EFAULT);
710    }
711    if tid != 0 {
712        security::check_task_capable(current_task, CAP_SYS_PTRACE)?;
713    }
714    let task = if tid == 0 { current_task.weak_task() } else { current_task.get_task(tid) };
715    let task = Task::from_weak(&task)?;
716    current_task.write_object(user_head_ptr, &task.read().robust_list_head.addr())?;
717    current_task.write_object(user_len_ptr, &std::mem::size_of::<robust_list_head>())?;
718    Ok(())
719}
720
721pub fn sys_set_robust_list(
722    _locked: &mut Locked<Unlocked>,
723    current_task: &CurrentTask,
724    user_head: UserRef<robust_list_head>,
725    len: usize,
726) -> Result<(), Errno> {
727    if len != std::mem::size_of::<robust_list_head>() {
728        return error!(EINVAL);
729    }
730    current_task.write().robust_list_head = user_head.into();
731    Ok(())
732}
733
734pub fn sys_mlock(
735    locked: &mut Locked<Unlocked>,
736    current_task: &CurrentTask,
737    addr: UserAddress,
738    length: usize,
739) -> Result<(), Errno> {
740    // If flags is 0, mlock2() behaves exactly the same as mlock().
741    sys_mlock2(locked, current_task, addr, length, 0)
742}
743
744pub fn sys_mlock2(
745    locked: &mut Locked<Unlocked>,
746    current_task: &CurrentTask,
747    addr: UserAddress,
748    length: usize,
749    flags: u64,
750) -> Result<(), Errno> {
751    const KNOWN_FLAGS: u64 = MLOCK_ONFAULT as u64;
752    if (flags & !KNOWN_FLAGS) != 0 {
753        return error!(EINVAL);
754    }
755    let on_fault = flags & MLOCK_ONFAULT as u64 != 0;
756    current_task.mm()?.mlock(current_task, locked, addr, length, on_fault)
757}
758
759pub fn sys_munlock(
760    _locked: &mut Locked<Unlocked>,
761    current_task: &CurrentTask,
762    addr: UserAddress,
763    length: usize,
764) -> Result<(), Errno> {
765    current_task.mm()?.munlock(current_task, addr, length)
766}
767
768pub fn sys_mlockall(
769    _locked: &mut Locked<Unlocked>,
770    _current_task: &CurrentTask,
771    _flags: u64,
772) -> Result<(), Errno> {
773    track_stub!(TODO("https://fxbug.dev/297292097"), "mlockall()");
774    error!(ENOSYS)
775}
776
777pub fn sys_munlockall(
778    _locked: &mut Locked<Unlocked>,
779    _current_task: &CurrentTask,
780    _flags: u64,
781) -> Result<(), Errno> {
782    track_stub!(TODO("https://fxbug.dev/297292097"), "munlockall()");
783    error!(ENOSYS)
784}
785
786pub fn sys_mincore(
787    _locked: &mut Locked<Unlocked>,
788    _current_task: &CurrentTask,
789    _addr: UserAddress,
790    _length: usize,
791    _out: UserRef<u8>,
792) -> Result<(), Errno> {
793    track_stub!(TODO("https://fxbug.dev/297372240"), "mincore()");
794    error!(ENOSYS)
795}
796
797// Syscalls for arch32 usage
798#[cfg(target_arch = "aarch64")]
799mod arch32 {
800    use crate::mm::PAGE_SIZE;
801    use crate::mm::syscalls::{UserAddress, sys_mmap};
802    use crate::task::{CurrentTask, RobustListHeadPtr};
803    use crate::vfs::FdNumber;
804    use starnix_sync::{Locked, Unlocked};
805    use starnix_uapi::errors::Errno;
806    use starnix_uapi::user_address::UserRef;
807    use starnix_uapi::{error, uapi};
808
809    pub fn sys_arch32_set_robust_list(
810        _locked: &mut Locked<Unlocked>,
811        current_task: &CurrentTask,
812        user_head: UserRef<uapi::arch32::robust_list_head>,
813        len: usize,
814    ) -> Result<(), Errno> {
815        if len != std::mem::size_of::<uapi::arch32::robust_list_head>() {
816            return error!(EINVAL);
817        }
818        current_task.write().robust_list_head = RobustListHeadPtr::from_32(user_head);
819        Ok(())
820    }
821
822    pub fn sys_arch32_mmap2(
823        locked: &mut Locked<Unlocked>,
824        current_task: &mut CurrentTask,
825        addr: UserAddress,
826        length: usize,
827        prot: u32,
828        flags: u32,
829        fd: FdNumber,
830        offset: u64,
831    ) -> Result<UserAddress, Errno> {
832        sys_mmap(locked, current_task, addr, length, prot, flags, fd, offset * *PAGE_SIZE)
833    }
834
835    pub fn sys_arch32_munmap(
836        _locked: &mut Locked<Unlocked>,
837        current_task: &CurrentTask,
838        addr: UserAddress,
839        length: usize,
840    ) -> Result<(), Errno> {
841        if !addr.is_lower_32bit() || length >= (1 << 32) {
842            return error!(EINVAL);
843        }
844        current_task.mm()?.unmap(addr, length)?;
845        Ok(())
846    }
847
848    pub use super::{
849        sys_futex as sys_arch32_futex, sys_madvise as sys_arch32_madvise,
850        sys_membarrier as sys_arch32_membarrier, sys_mincore as sys_arch32_mincore,
851        sys_mlock as sys_arch32_mlock, sys_mlock2 as sys_arch32_mlock2,
852        sys_mlockall as sys_arch32_mlockall, sys_mremap as sys_arch32_mremap,
853        sys_msync as sys_arch32_msync, sys_munlock as sys_arch32_munlock,
854        sys_munlockall as sys_arch32_munlockall,
855        sys_process_mrelease as sys_arch32_process_mrelease,
856        sys_process_vm_readv as sys_arch32_process_vm_readv,
857        sys_userfaultfd as sys_arch32_userfaultfd,
858    };
859}
860
861#[cfg(target_arch = "aarch64")]
862pub use arch32::*;
863
864#[cfg(test)]
865mod tests {
866    use super::*;
867    use crate::mm::memory::MemoryObject;
868    use crate::testing::*;
869    use starnix_uapi::errors::EEXIST;
870    use starnix_uapi::file_mode::Access;
871    use starnix_uapi::{MREMAP_FIXED, MREMAP_MAYMOVE, PROT_READ};
872
873    #[::fuchsia::test]
874    async fn test_mmap_with_colliding_hint() {
875        spawn_kernel_and_run(async |locked, current_task| {
876            let page_size = *PAGE_SIZE;
877
878            let mapped_address =
879                map_memory(locked, &current_task, UserAddress::default(), page_size);
880            match do_mmap(
881                locked,
882                &current_task,
883                mapped_address,
884                page_size as usize,
885                PROT_READ,
886                MAP_PRIVATE | MAP_ANONYMOUS,
887                FdNumber::from_raw(-1),
888                0,
889            ) {
890                Ok(address) => {
891                    assert_ne!(address, mapped_address);
892                }
893                error => {
894                    panic!("mmap with colliding hint failed: {error:?}");
895                }
896            }
897        })
898        .await;
899    }
900
901    #[::fuchsia::test]
902    async fn test_mmap_with_fixed_collision() {
903        spawn_kernel_and_run(async |locked, current_task| {
904            let page_size = *PAGE_SIZE;
905
906            let mapped_address =
907                map_memory(locked, &current_task, UserAddress::default(), page_size);
908            match do_mmap(
909                locked,
910                &current_task,
911                mapped_address,
912                page_size as usize,
913                PROT_READ,
914                MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
915                FdNumber::from_raw(-1),
916                0,
917            ) {
918                Ok(address) => {
919                    assert_eq!(address, mapped_address);
920                }
921                error => {
922                    panic!("mmap with fixed collision failed: {error:?}");
923                }
924            }
925        })
926        .await;
927    }
928
929    #[::fuchsia::test]
930    async fn test_mmap_with_fixed_noreplace_collision() {
931        spawn_kernel_and_run(async |locked, current_task| {
932            let page_size = *PAGE_SIZE;
933
934            let mapped_address =
935                map_memory(locked, &current_task, UserAddress::default(), page_size);
936            match do_mmap(
937                locked,
938                &current_task,
939                mapped_address,
940                page_size as usize,
941                PROT_READ,
942                MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE,
943                FdNumber::from_raw(-1),
944                0,
945            ) {
946                Err(errno) => {
947                    assert_eq!(errno, EEXIST);
948                }
949                result => {
950                    panic!("mmap with fixed_noreplace collision failed: {result:?}");
951                }
952            }
953        })
954        .await;
955    }
956
957    /// It is ok to call munmap with an address that is a multiple of the page size, and
958    /// a non-zero length.
959    #[::fuchsia::test]
960    async fn test_munmap() {
961        spawn_kernel_and_run(async |locked, current_task| {
962            let mapped_address =
963                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
964            assert_eq!(
965                sys_munmap(locked, &current_task, mapped_address, *PAGE_SIZE as usize),
966                Ok(())
967            );
968
969            // Verify that the memory is no longer readable.
970            assert_eq!(current_task.read_memory_to_array::<5>(mapped_address), error!(EFAULT));
971        })
972        .await;
973    }
974
975    /// It is ok to call munmap on an unmapped range.
976    #[::fuchsia::test]
977    async fn test_munmap_not_mapped() {
978        spawn_kernel_and_run(async |locked, current_task| {
979            let mapped_address =
980                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
981            assert_eq!(
982                sys_munmap(locked, &current_task, mapped_address, *PAGE_SIZE as usize),
983                Ok(())
984            );
985            assert_eq!(
986                sys_munmap(locked, &current_task, mapped_address, *PAGE_SIZE as usize),
987                Ok(())
988            );
989        })
990        .await;
991    }
992
993    /// It is an error to call munmap with a length of 0.
994    #[::fuchsia::test]
995    async fn test_munmap_0_length() {
996        spawn_kernel_and_run(async |locked, current_task| {
997            let mapped_address =
998                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
999            assert_eq!(sys_munmap(locked, &current_task, mapped_address, 0), error!(EINVAL));
1000        })
1001        .await;
1002    }
1003
1004    /// It is an error to call munmap with an address that is not a multiple of the page size.
1005    #[::fuchsia::test]
1006    async fn test_munmap_not_aligned() {
1007        spawn_kernel_and_run(async |locked, current_task| {
1008            let mapped_address =
1009                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
1010            assert_eq!(
1011                sys_munmap(
1012                    locked,
1013                    &current_task,
1014                    (mapped_address + 1u64).unwrap(),
1015                    *PAGE_SIZE as usize
1016                ),
1017                error!(EINVAL)
1018            );
1019
1020            // Verify that the memory is still readable.
1021            assert!(current_task.read_memory_to_array::<5>(mapped_address).is_ok());
1022        })
1023        .await;
1024    }
1025
1026    /// The entire page should be unmapped, not just the range [address, address + length).
1027    #[::fuchsia::test]
1028    async fn test_munmap_unmap_partial() {
1029        spawn_kernel_and_run(async |locked, current_task| {
1030            let mapped_address =
1031                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
1032            assert_eq!(
1033                sys_munmap(locked, &current_task, mapped_address, (*PAGE_SIZE as usize) / 2),
1034                Ok(())
1035            );
1036
1037            // Verify that memory can't be read in either half of the page.
1038            assert_eq!(current_task.read_memory_to_array::<5>(mapped_address), error!(EFAULT));
1039            assert_eq!(
1040                current_task
1041                    .read_memory_to_array::<5>((mapped_address + (*PAGE_SIZE - 2)).unwrap()),
1042                error!(EFAULT)
1043            );
1044        })
1045        .await;
1046    }
1047
1048    /// All pages that intersect the munmap range should be unmapped.
1049    #[::fuchsia::test]
1050    async fn test_munmap_multiple_pages() {
1051        spawn_kernel_and_run(async |locked, current_task| {
1052            let mapped_address =
1053                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
1054            assert_eq!(
1055                sys_munmap(locked, &current_task, mapped_address, (*PAGE_SIZE as usize) + 1),
1056                Ok(())
1057            );
1058
1059            // Verify that neither page is readable.
1060            assert_eq!(current_task.read_memory_to_array::<5>(mapped_address), error!(EFAULT));
1061            assert_eq!(
1062                current_task
1063                    .read_memory_to_array::<5>((mapped_address + (*PAGE_SIZE + 1u64)).unwrap()),
1064                error!(EFAULT)
1065            );
1066        })
1067        .await;
1068    }
1069
1070    /// Only the pages that intersect the munmap range should be unmapped.
1071    #[::fuchsia::test]
1072    async fn test_munmap_one_of_many_pages() {
1073        spawn_kernel_and_run(async |locked, current_task| {
1074            let mapped_address =
1075                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
1076            assert_eq!(
1077                sys_munmap(locked, &current_task, mapped_address, (*PAGE_SIZE as usize) - 1),
1078                Ok(())
1079            );
1080
1081            // Verify that the second page is still readable.
1082            assert_eq!(current_task.read_memory_to_array::<5>(mapped_address), error!(EFAULT));
1083            assert!(
1084                current_task
1085                    .read_memory_to_array::<5>((mapped_address + (*PAGE_SIZE + 1u64)).unwrap())
1086                    .is_ok()
1087            );
1088        })
1089        .await;
1090    }
1091
1092    /// Unmap the middle page of a mapping.
1093    #[::fuchsia::test]
1094    async fn test_munmap_middle_page() {
1095        spawn_kernel_and_run(async |locked, current_task| {
1096            let mapped_address =
1097                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1098            assert_eq!(
1099                sys_munmap(
1100                    locked,
1101                    &current_task,
1102                    (mapped_address + *PAGE_SIZE).unwrap(),
1103                    *PAGE_SIZE as usize
1104                ),
1105                Ok(())
1106            );
1107
1108            // Verify that the first and third pages are still readable.
1109            assert!(current_task.read_memory_to_vec(mapped_address, 5).is_ok());
1110            assert_eq!(
1111                current_task.read_memory_to_vec((mapped_address + *PAGE_SIZE).unwrap(), 5),
1112                error!(EFAULT)
1113            );
1114            assert!(
1115                current_task
1116                    .read_memory_to_vec((mapped_address + (*PAGE_SIZE * 2)).unwrap(), 5)
1117                    .is_ok()
1118            );
1119        })
1120        .await;
1121    }
1122
1123    /// Unmap a range of pages that includes disjoint mappings.
1124    #[::fuchsia::test]
1125    async fn test_munmap_many_mappings() {
1126        spawn_kernel_and_run(async |locked, current_task| {
1127            let mapped_addresses: Vec<_> = std::iter::repeat_with(|| {
1128                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE)
1129            })
1130            .take(3)
1131            .collect();
1132            let min_address = *mapped_addresses.iter().min().unwrap();
1133            let max_address = *mapped_addresses.iter().max().unwrap();
1134            let unmap_length = (max_address - min_address) + *PAGE_SIZE as usize;
1135
1136            assert_eq!(sys_munmap(locked, &current_task, min_address, unmap_length), Ok(()));
1137
1138            // Verify that none of the mapped pages are readable.
1139            for mapped_address in mapped_addresses {
1140                assert_eq!(current_task.read_memory_to_vec(mapped_address, 5), error!(EFAULT));
1141            }
1142        })
1143        .await;
1144    }
1145
1146    #[::fuchsia::test]
1147    async fn test_msync_validates_address_range() {
1148        spawn_kernel_and_run(async |locked, current_task| {
1149            // Map 3 pages and test that ranges covering these pages return no error.
1150            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1151            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 3, 0), Ok(()));
1152            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 2, 0), Ok(()));
1153            assert_eq!(
1154                sys_msync(
1155                    locked,
1156                    &current_task,
1157                    (addr + *PAGE_SIZE).unwrap(),
1158                    *PAGE_SIZE as usize * 2,
1159                    0
1160                ),
1161                Ok(())
1162            );
1163
1164            // Unmap the middle page and test that ranges covering that page return ENOMEM.
1165            sys_munmap(locked, &current_task, (addr + *PAGE_SIZE).unwrap(), *PAGE_SIZE as usize)
1166                .expect("unmap middle");
1167            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize, 0), Ok(()));
1168            assert_eq!(
1169                sys_msync(
1170                    locked,
1171                    &current_task,
1172                    addr,
1173                    *PAGE_SIZE as usize * 3,
1174                    starnix_uapi::MS_SYNC
1175                ),
1176                error!(ENOMEM)
1177            );
1178            assert_eq!(
1179                sys_msync(
1180                    locked,
1181                    &current_task,
1182                    addr,
1183                    *PAGE_SIZE as usize * 2,
1184                    starnix_uapi::MS_SYNC
1185                ),
1186                error!(ENOMEM)
1187            );
1188            assert_eq!(
1189                sys_msync(
1190                    locked,
1191                    &current_task,
1192                    (addr + *PAGE_SIZE).unwrap(),
1193                    *PAGE_SIZE as usize * 2,
1194                    starnix_uapi::MS_SYNC
1195                ),
1196                error!(ENOMEM)
1197            );
1198            assert_eq!(
1199                sys_msync(
1200                    locked,
1201                    &current_task,
1202                    (addr + (*PAGE_SIZE * 2)).unwrap(),
1203                    *PAGE_SIZE as usize,
1204                    0
1205                ),
1206                Ok(())
1207            );
1208
1209            // Map the middle page back and test that ranges covering the three pages
1210            // (spanning multiple ranges) return no error.
1211            assert_eq!(
1212                map_memory(locked, &current_task, (addr + *PAGE_SIZE).unwrap(), *PAGE_SIZE),
1213                (addr + *PAGE_SIZE).unwrap()
1214            );
1215            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 3, 0), Ok(()));
1216            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 2, 0), Ok(()));
1217            assert_eq!(
1218                sys_msync(
1219                    locked,
1220                    &current_task,
1221                    (addr + *PAGE_SIZE).unwrap(),
1222                    *PAGE_SIZE as usize * 2,
1223                    0
1224                ),
1225                Ok(())
1226            );
1227        })
1228        .await;
1229    }
1230
1231    /// Shrinks an entire range.
1232    #[::fuchsia::test]
1233    async fn test_mremap_shrink_whole_range_from_end() {
1234        spawn_kernel_and_run(async |locked, current_task| {
1235            // Map 2 pages.
1236            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
1237            fill_page(&current_task, addr, 'a');
1238            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1239
1240            // Shrink the mapping from 2 to 1 pages.
1241            assert_eq!(
1242                remap_memory(
1243                    locked,
1244                    &current_task,
1245                    addr,
1246                    *PAGE_SIZE * 2,
1247                    *PAGE_SIZE,
1248                    0,
1249                    UserAddress::default()
1250                ),
1251                Ok(addr)
1252            );
1253
1254            check_page_eq(&current_task, addr, 'a');
1255            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1256        })
1257        .await;
1258    }
1259
1260    /// Shrinks part of a range, introducing a hole in the middle.
1261    #[::fuchsia::test]
1262    async fn test_mremap_shrink_partial_range() {
1263        spawn_kernel_and_run(async |locked, current_task| {
1264            // Map 3 pages.
1265            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1266            fill_page(&current_task, addr, 'a');
1267            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1268            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1269
1270            // Shrink the first 2 pages down to 1, creating a hole.
1271            assert_eq!(
1272                remap_memory(
1273                    locked,
1274                    &current_task,
1275                    addr,
1276                    *PAGE_SIZE * 2,
1277                    *PAGE_SIZE,
1278                    0,
1279                    UserAddress::default()
1280                ),
1281                Ok(addr)
1282            );
1283
1284            check_page_eq(&current_task, addr, 'a');
1285            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1286            check_page_eq(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1287        })
1288        .await;
1289    }
1290
1291    /// Shrinking doesn't care if the range specified spans multiple mappings.
1292    #[::fuchsia::test]
1293    async fn test_mremap_shrink_across_ranges() {
1294        spawn_kernel_and_run(async |locked, current_task| {
1295            // Map 3 pages, unmap the middle, then map the middle again. This will leave us with
1296            // 3 contiguous mappings.
1297            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1298            assert_eq!(
1299                sys_munmap(
1300                    locked,
1301                    &current_task,
1302                    (addr + *PAGE_SIZE).unwrap(),
1303                    *PAGE_SIZE as usize
1304                ),
1305                Ok(())
1306            );
1307            assert_eq!(
1308                map_memory(locked, &current_task, (addr + *PAGE_SIZE).unwrap(), *PAGE_SIZE),
1309                (addr + *PAGE_SIZE).unwrap()
1310            );
1311
1312            fill_page(&current_task, addr, 'a');
1313            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1314            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1315
1316            // Remap over all three mappings, shrinking to 1 page.
1317            assert_eq!(
1318                remap_memory(
1319                    locked,
1320                    &current_task,
1321                    addr,
1322                    *PAGE_SIZE * 3,
1323                    *PAGE_SIZE,
1324                    0,
1325                    UserAddress::default()
1326                ),
1327                Ok(addr)
1328            );
1329
1330            check_page_eq(&current_task, addr, 'a');
1331            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1332            check_unmapped(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap());
1333        })
1334        .await;
1335    }
1336
1337    /// Grows a mapping in-place.
1338    #[::fuchsia::test]
1339    async fn test_mremap_grow_in_place() {
1340        spawn_kernel_and_run(async |locked, current_task| {
1341            // Map 3 pages, unmap the middle, leaving a hole.
1342            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1343            fill_page(&current_task, addr, 'a');
1344            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1345            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1346            assert_eq!(
1347                sys_munmap(
1348                    locked,
1349                    &current_task,
1350                    (addr + *PAGE_SIZE).unwrap(),
1351                    *PAGE_SIZE as usize
1352                ),
1353                Ok(())
1354            );
1355
1356            // Grow the first page in-place into the middle.
1357            assert_eq!(
1358                remap_memory(
1359                    locked,
1360                    &current_task,
1361                    addr,
1362                    *PAGE_SIZE,
1363                    *PAGE_SIZE * 2,
1364                    0,
1365                    UserAddress::default()
1366                ),
1367                Ok(addr)
1368            );
1369
1370            check_page_eq(&current_task, addr, 'a');
1371
1372            // The middle page should be new, and not just pointing to the original middle page filled
1373            // with 'b'.
1374            check_page_ne(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1375
1376            check_page_eq(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1377        })
1378        .await;
1379    }
1380
1381    /// Tries to grow a set of pages that cannot fit, and forces a move.
1382    #[::fuchsia::test]
1383    async fn test_mremap_grow_maymove() {
1384        spawn_kernel_and_run(async |locked, current_task| {
1385            // Map 3 pages.
1386            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1387            fill_page(&current_task, addr, 'a');
1388            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1389            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1390
1391            // Grow the first two pages by 1, forcing a move.
1392            let new_addr = remap_memory(
1393                locked,
1394                &current_task,
1395                addr,
1396                *PAGE_SIZE * 2,
1397                *PAGE_SIZE * 3,
1398                MREMAP_MAYMOVE,
1399                UserAddress::default(),
1400            )
1401            .expect("failed to mremap");
1402
1403            assert_ne!(new_addr, addr, "mremap did not move the mapping");
1404
1405            // The first two pages should have been moved.
1406            check_unmapped(&current_task, addr);
1407            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1408
1409            // The third page should still be present.
1410            check_page_eq(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1411
1412            // The moved pages should have the same contents.
1413            check_page_eq(&current_task, new_addr, 'a');
1414            check_page_eq(&current_task, (new_addr + *PAGE_SIZE).unwrap(), 'b');
1415
1416            // The newly grown page should not be the same as the original third page.
1417            check_page_ne(&current_task, (new_addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1418        })
1419        .await;
1420    }
1421
1422    /// Shrinks a set of pages and move them to a fixed location.
1423    #[::fuchsia::test]
1424    async fn test_mremap_shrink_fixed() {
1425        spawn_kernel_and_run(async |locked, current_task| {
1426            // Map 2 pages which will act as the destination.
1427            let dst_addr =
1428                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
1429            fill_page(&current_task, dst_addr, 'y');
1430            fill_page(&current_task, (dst_addr + *PAGE_SIZE).unwrap(), 'z');
1431
1432            // Map 3 pages.
1433            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1434            fill_page(&current_task, addr, 'a');
1435            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1436            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1437
1438            // Shrink the first two pages and move them to overwrite the mappings at `dst_addr`.
1439            let new_addr = remap_memory(
1440                locked,
1441                &current_task,
1442                addr,
1443                *PAGE_SIZE * 2,
1444                *PAGE_SIZE,
1445                MREMAP_MAYMOVE | MREMAP_FIXED,
1446                dst_addr,
1447            )
1448            .expect("failed to mremap");
1449
1450            assert_eq!(new_addr, dst_addr, "mremap did not move the mapping");
1451
1452            // The first two pages should have been moved.
1453            check_unmapped(&current_task, addr);
1454            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1455
1456            // The third page should still be present.
1457            check_page_eq(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1458
1459            // The first moved page should have the same contents.
1460            check_page_eq(&current_task, new_addr, 'a');
1461
1462            // The second page should be part of the original dst mapping.
1463            check_page_eq(&current_task, (new_addr + *PAGE_SIZE).unwrap(), 'z');
1464        })
1465        .await;
1466    }
1467
1468    /// Clobbers the middle of an existing mapping with mremap to a fixed location.
1469    #[::fuchsia::test]
1470    async fn test_mremap_clobber_memory_mapping() {
1471        spawn_kernel_and_run(async |locked, current_task| {
1472            let dst_memory = MemoryObject::from(zx::Vmo::create(2 * *PAGE_SIZE).unwrap());
1473            dst_memory.write(&['x' as u8].repeat(*PAGE_SIZE as usize), 0).unwrap();
1474            dst_memory.write(&['y' as u8].repeat(*PAGE_SIZE as usize), *PAGE_SIZE).unwrap();
1475
1476            let dst_addr = current_task
1477                .mm()
1478                .unwrap()
1479                .map_memory(
1480                    DesiredAddress::Any,
1481                    dst_memory.into(),
1482                    0,
1483                    2 * (*PAGE_SIZE as usize),
1484                    ProtectionFlags::READ,
1485                    Access::rwx(),
1486                    MappingOptions::empty(),
1487                    MappingName::None,
1488                )
1489                .unwrap();
1490
1491            // Map 3 pages.
1492            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1493            fill_page(&current_task, addr, 'a');
1494            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1495            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1496
1497            // Overwrite the second page of the mapping with the second page of the anonymous mapping.
1498            let remapped_addr = sys_mremap(
1499                locked,
1500                &*current_task,
1501                (addr + *PAGE_SIZE).unwrap(),
1502                *PAGE_SIZE as usize,
1503                *PAGE_SIZE as usize,
1504                MREMAP_FIXED | MREMAP_MAYMOVE,
1505                (dst_addr + *PAGE_SIZE).unwrap(),
1506            )
1507            .unwrap();
1508
1509            assert_eq!(remapped_addr, (dst_addr + *PAGE_SIZE).unwrap());
1510
1511            check_page_eq(&current_task, addr, 'a');
1512            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1513            check_page_eq(&current_task, (addr + (2 * *PAGE_SIZE)).unwrap(), 'c');
1514
1515            check_page_eq(&current_task, dst_addr, 'x');
1516            check_page_eq(&current_task, (dst_addr + *PAGE_SIZE).unwrap(), 'b');
1517        })
1518        .await;
1519    }
1520
1521    #[cfg(target_arch = "x86_64")]
1522    #[::fuchsia::test]
1523    async fn test_map_32_bit() {
1524        use starnix_uapi::PROT_WRITE;
1525
1526        spawn_kernel_and_run(async |locked, current_task| {
1527            let page_size = *PAGE_SIZE;
1528
1529            for _i in 0..256 {
1530                match do_mmap(
1531                    locked,
1532                    &current_task,
1533                    UserAddress::from(0),
1534                    page_size as usize,
1535                    PROT_READ | PROT_WRITE,
1536                    MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT,
1537                    FdNumber::from_raw(-1),
1538                    0,
1539                ) {
1540                    Ok(address) => {
1541                        let memory_end = address.ptr() + page_size as usize;
1542                        assert!(memory_end <= 0x80000000);
1543                    }
1544                    error => {
1545                        panic!("mmap with MAP_32BIT failed: {error:?}");
1546                    }
1547                }
1548            }
1549        })
1550        .await;
1551    }
1552}