Skip to main content

starnix_core/mm/
syscalls.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::barrier::{BarrierType, system_barrier};
6use crate::mm::debugger::notify_debugger_of_module_list;
7use crate::mm::{
8    DesiredAddress, FutexKey, IOVecPtr, MappingName, MappingOptions, MembarrierType,
9    MemoryAccessorExt, MremapFlags, MsyncFlags, PAGE_SIZE, PrivateFutexKey, ProtectionFlags,
10    SharedFutexKey,
11};
12use crate::security;
13use crate::syscalls::time::TimeSpecPtr;
14use crate::task::CurrentTask;
15use crate::time::TargetTime;
16use crate::time::utc::estimate_boot_deadline_from_utc;
17use crate::vfs::buffers::{OutputBuffer, UserBuffersInputBuffer, UserBuffersOutputBuffer};
18use crate::vfs::{FdFlags, FdNumber, UserFaultFile};
19use fuchsia_runtime::UtcTimeline;
20use linux_uapi::MLOCK_ONFAULT;
21use starnix_logging::{CATEGORY_STARNIX_MM, log_trace, trace_duration, track_stub};
22use starnix_sync::{FileOpsCore, LockEqualOrBefore, Locked, Unlocked};
23use starnix_syscalls::SyscallArg;
24use starnix_types::time::{duration_from_timespec, time_from_timespec, timespec_from_time};
25use starnix_uapi::auth::{
26    CAP_SYS_PTRACE, PTRACE_MODE_ATTACH_REALCREDS, PTRACE_MODE_READ_REALCREDS,
27};
28use starnix_uapi::errors::{EINTR, Errno};
29use starnix_uapi::open_flags::OpenFlags;
30use starnix_uapi::user_address::{UserAddress, UserRef};
31use starnix_uapi::user_value::UserValue;
32use starnix_uapi::{
33    FUTEX_BITSET_MATCH_ANY, FUTEX_CLOCK_REALTIME, FUTEX_CMD_MASK, FUTEX_CMP_REQUEUE,
34    FUTEX_CMP_REQUEUE_PI, FUTEX_LOCK_PI, FUTEX_LOCK_PI2, FUTEX_PRIVATE_FLAG, FUTEX_REQUEUE,
35    FUTEX_TRYLOCK_PI, FUTEX_UNLOCK_PI, FUTEX_WAIT, FUTEX_WAIT_BITSET, FUTEX_WAIT_REQUEUE_PI,
36    FUTEX_WAKE, FUTEX_WAKE_BITSET, FUTEX_WAKE_OP, MAP_ANONYMOUS, MAP_DENYWRITE, MAP_FIXED,
37    MAP_FIXED_NOREPLACE, MAP_GROWSDOWN, MAP_LOCKED, MAP_NORESERVE, MAP_POPULATE, MAP_PRIVATE,
38    MAP_SHARED, MAP_SHARED_VALIDATE, MAP_STACK, O_CLOEXEC, O_NONBLOCK, PROT_EXEC,
39    UFFD_USER_MODE_ONLY, errno, error, robust_list_head, tid_t, uapi,
40};
41use std::ops::Deref as _;
42use zx;
43
44#[cfg(target_arch = "x86_64")]
45use starnix_uapi::MAP_32BIT;
46
47// Returns any platform-specific mmap flags. This is a separate function because as of this writing
48// "attributes on expressions are experimental."
49#[cfg(target_arch = "x86_64")]
50fn get_valid_platform_mmap_flags() -> u32 {
51    MAP_32BIT
52}
53#[cfg(not(target_arch = "x86_64"))]
54fn get_valid_platform_mmap_flags() -> u32 {
55    0
56}
57
58/// sys_mmap takes a mutable reference to current_task because it may modify the IP register.
59pub fn sys_mmap(
60    locked: &mut Locked<Unlocked>,
61    current_task: &mut CurrentTask,
62    addr: UserAddress,
63    length: usize,
64    prot: u32,
65    flags: u32,
66    fd: FdNumber,
67    offset: u64,
68) -> Result<UserAddress, Errno> {
69    let user_address = do_mmap(locked, current_task, addr, length, prot, flags, fd, offset)?;
70    if prot & PROT_EXEC != 0 {
71        // Possibly loads a new module. Notify debugger for the change.
72        // We only care about dynamic linker loading modules for now, which uses mmap. In the future
73        // we might want to support unloading modules in munmap or JIT compilation in mprotect.
74        notify_debugger_of_module_list(current_task)?;
75    }
76    Ok(user_address)
77}
78
79pub fn do_mmap<L>(
80    locked: &mut Locked<L>,
81    current_task: &CurrentTask,
82    addr: UserAddress,
83    length: usize,
84    prot: u32,
85    flags: u32,
86    fd: FdNumber,
87    offset: u64,
88) -> Result<UserAddress, Errno>
89where
90    L: LockEqualOrBefore<FileOpsCore> + starnix_sync::LockBefore<starnix_sync::ThreadGroupLimits>,
91{
92    let prot_flags = ProtectionFlags::from_access_bits(prot).ok_or_else(|| {
93        track_stub!(TODO("https://fxbug.dev/322874211"), "mmap parse protection", prot);
94        errno!(EINVAL)
95    })?;
96
97    let valid_flags: u32 = get_valid_platform_mmap_flags()
98        | MAP_PRIVATE
99        | MAP_SHARED
100        | MAP_SHARED_VALIDATE
101        | MAP_ANONYMOUS
102        | MAP_FIXED
103        | MAP_FIXED_NOREPLACE
104        | MAP_POPULATE
105        | MAP_NORESERVE
106        | MAP_STACK
107        | MAP_DENYWRITE
108        | MAP_GROWSDOWN
109        | MAP_LOCKED;
110    if flags & !valid_flags != 0 {
111        if flags & MAP_SHARED_VALIDATE != 0 {
112            return error!(EOPNOTSUPP);
113        }
114        track_stub!(TODO("https://fxbug.dev/322873638"), "mmap check flags", flags);
115        return error!(EINVAL);
116    }
117
118    let file = if flags & MAP_ANONYMOUS != 0 { None } else { Some(current_task.get_file(fd)?) };
119    if flags & (MAP_PRIVATE | MAP_SHARED) == 0
120        || flags & (MAP_PRIVATE | MAP_SHARED) == MAP_PRIVATE | MAP_SHARED
121    {
122        return error!(EINVAL);
123    }
124    if length == 0 {
125        return error!(EINVAL);
126    }
127    if offset % *PAGE_SIZE != 0 {
128        return error!(EINVAL);
129    }
130
131    let page_size = *PAGE_SIZE as usize;
132    let length_aligned =
133        length.checked_add(page_size - 1).ok_or_else(|| errno!(ENOMEM))? & !(page_size - 1);
134    let rlimit_as =
135        current_task.thread_group().get_rlimit(locked, starnix_uapi::resource_limits::Resource::AS)
136            as usize;
137    let current_usage: usize = current_task.mm()?.get_total_usage();
138
139    if current_usage.saturating_add(length_aligned) > rlimit_as {
140        return error!(ENOMEM);
141    }
142
143    // TODO(tbodt): should we consider MAP_NORESERVE?
144
145    let addr = match (addr, flags & MAP_FIXED != 0, flags & MAP_FIXED_NOREPLACE != 0) {
146        (UserAddress::NULL, false, false) => DesiredAddress::Any,
147        (UserAddress::NULL, true, _) | (UserAddress::NULL, _, true) => return error!(EINVAL),
148        (addr, false, false) => DesiredAddress::Hint(addr),
149        (addr, _, true) => DesiredAddress::Fixed(addr),
150        (addr, true, false) => DesiredAddress::FixedOverwrite(addr),
151    };
152
153    let memory_offset = if flags & MAP_ANONYMOUS != 0 { 0 } else { offset };
154
155    let mut options = MappingOptions::empty();
156    if flags & MAP_SHARED != 0 {
157        options |= MappingOptions::SHARED;
158    }
159    if flags & MAP_ANONYMOUS != 0 {
160        options |= MappingOptions::ANONYMOUS;
161    }
162    #[cfg(target_arch = "x86_64")]
163    if flags & MAP_FIXED == 0 && flags & MAP_32BIT != 0 {
164        options |= MappingOptions::LOWER_32BIT;
165    }
166    if flags & MAP_GROWSDOWN != 0 {
167        options |= MappingOptions::GROWSDOWN;
168    }
169    if flags & MAP_POPULATE != 0 {
170        options |= MappingOptions::POPULATE;
171    }
172    if flags & MAP_LOCKED != 0 {
173        // The kernel isn't expected to return an error if locking fails with this flag, so for now
174        // this implementation will always fail to lock memory even if mapping succeeds.
175        track_stub!(TODO("https://fxbug.dev/406377606"), "MAP_LOCKED");
176    }
177
178    security::mmap_file(current_task, file.as_ref(), prot_flags, options)?;
179
180    if flags & MAP_ANONYMOUS != 0 {
181        trace_duration!(CATEGORY_STARNIX_MM, "AnonymousMmap");
182        current_task.mm()?.map_anonymous(addr, length, prot_flags, options, MappingName::None)
183    } else {
184        trace_duration!(CATEGORY_STARNIX_MM, "FileBackedMmap");
185        // TODO(tbodt): maximize protection flags so that mprotect works
186        let file = file.expect("file retrieved above for file-backed mapping");
187        file.mmap(
188            locked,
189            current_task,
190            addr,
191            memory_offset,
192            length,
193            prot_flags,
194            options,
195            file.name.to_passive(),
196        )
197    }
198}
199
200pub fn sys_mprotect(
201    _locked: &mut Locked<Unlocked>,
202    current_task: &CurrentTask,
203    addr: UserAddress,
204    length: usize,
205    prot: u32,
206) -> Result<(), Errno> {
207    let prot_flags = ProtectionFlags::from_bits(prot).ok_or_else(|| {
208        track_stub!(TODO("https://fxbug.dev/322874672"), "mprotect parse protection", prot);
209        errno!(EINVAL)
210    })?;
211    current_task.mm()?.protect(current_task, addr, length, prot_flags)?;
212    Ok(())
213}
214
215pub fn sys_mremap(
216    _locked: &mut Locked<Unlocked>,
217    current_task: &CurrentTask,
218    addr: UserAddress,
219    old_length: usize,
220    new_length: usize,
221    flags: u32,
222    new_addr: UserAddress,
223) -> Result<UserAddress, Errno> {
224    let flags = MremapFlags::from_bits(flags).ok_or_else(|| errno!(EINVAL))?;
225    let addr =
226        current_task.mm()?.remap(current_task, addr, old_length, new_length, flags, new_addr)?;
227    Ok(addr)
228}
229
230pub fn sys_munmap(
231    _locked: &mut Locked<Unlocked>,
232    current_task: &CurrentTask,
233    addr: UserAddress,
234    length: usize,
235) -> Result<(), Errno> {
236    current_task.mm()?.unmap(addr, length)?;
237    Ok(())
238}
239
240pub fn sys_msync(
241    locked: &mut Locked<Unlocked>,
242    current_task: &CurrentTask,
243    addr: UserAddress,
244    length: usize,
245    flags: u32,
246) -> Result<(), Errno> {
247    let flags = MsyncFlags::from_bits_retain(flags);
248    current_task.mm()?.msync(locked, current_task, addr, length, flags)
249}
250
251pub fn sys_madvise(
252    _locked: &mut Locked<Unlocked>,
253    current_task: &CurrentTask,
254    addr: UserAddress,
255    length: usize,
256    advice: u32,
257) -> Result<(), Errno> {
258    current_task.mm()?.madvise(addr, length, advice)?;
259    Ok(())
260}
261
262pub fn sys_process_madvise(
263    _locked: &mut Locked<Unlocked>,
264    _current_task: &CurrentTask,
265    _pidfd: FdNumber,
266    _iovec_addr: IOVecPtr,
267    _iovec_count: UserValue<i32>,
268    _advice: UserValue<i32>,
269    _flags: UserValue<u32>,
270) -> Result<usize, Errno> {
271    track_stub!(TODO("https://fxbug.dev/409060664"), "process_madvise");
272    error!(ENOSYS)
273}
274
275pub fn sys_brk(
276    locked: &mut Locked<Unlocked>,
277    current_task: &CurrentTask,
278    addr: UserAddress,
279) -> Result<UserAddress, Errno> {
280    current_task.mm()?.set_brk(locked, current_task, addr)
281}
282
283pub fn sys_process_vm_readv(
284    locked: &mut Locked<Unlocked>,
285    current_task: &CurrentTask,
286    tid: tid_t,
287    local_iov_addr: IOVecPtr,
288    local_iov_count: UserValue<i32>,
289    remote_iov_addr: IOVecPtr,
290    remote_iov_count: UserValue<i32>,
291    flags: usize,
292) -> Result<usize, Errno> {
293    if flags != 0 {
294        return error!(EINVAL);
295    }
296
297    // Source and destination are allowed to be of different length. It is valid to use a nullptr if
298    // the associated length is 0. Thus, if either source or destination length is 0 and nullptr,
299    // make sure to return Ok(0) before doing any other validation/operations.
300    if (local_iov_count == 0 && local_iov_addr.is_null())
301        || (remote_iov_count == 0 && remote_iov_addr.is_null())
302    {
303        return Ok(0);
304    }
305
306    let remote_task = current_task.get_task(tid)?;
307
308    current_task.check_ptrace_access_mode(locked, PTRACE_MODE_ATTACH_REALCREDS, &remote_task)?;
309
310    let local_iov = current_task.read_iovec(local_iov_addr, local_iov_count)?;
311    let remote_iov = current_task.read_iovec(remote_iov_addr, remote_iov_count)?;
312    log_trace!(
313        "process_vm_readv(tid={}, local_iov={:?}, remote_iov={:?})",
314        tid,
315        local_iov,
316        remote_iov
317    );
318
319    track_stub!(TODO("https://fxbug.dev/322874765"), "process_vm_readv single-copy");
320    // According to the man page, this syscall was added to Linux specifically to
321    // avoid doing two copies like other IPC mechanisms require. We should avoid this too at some
322    // point.
323    let mut output = UserBuffersOutputBuffer::unified_new(current_task, local_iov)?;
324    let remote_mm = remote_task.mm().ok();
325    if current_task.has_same_address_space(remote_mm.as_ref()) {
326        let mut input = UserBuffersInputBuffer::unified_new(current_task, remote_iov)?;
327        output.write_buffer(&mut input)
328    } else {
329        let mut input = UserBuffersInputBuffer::syscall_new(remote_task.deref(), remote_iov)?;
330        output.write_buffer(&mut input)
331    }
332}
333
334pub fn sys_process_vm_writev(
335    locked: &mut Locked<Unlocked>,
336    current_task: &CurrentTask,
337    tid: tid_t,
338    local_iov_addr: IOVecPtr,
339    local_iov_count: UserValue<i32>,
340    remote_iov_addr: IOVecPtr,
341    remote_iov_count: UserValue<i32>,
342    flags: usize,
343) -> Result<usize, Errno> {
344    if flags != 0 {
345        return error!(EINVAL);
346    }
347
348    // Source and destination are allowed to be of different length. It is valid to use a nullptr if
349    // the associated length is 0. Thus, if either source or destination length is 0 and nullptr,
350    // make sure to return Ok(0) before doing any other validation/operations.
351    if (local_iov_count == 0 && local_iov_addr.is_null())
352        || (remote_iov_count == 0 && remote_iov_addr.is_null())
353    {
354        return Ok(0);
355    }
356
357    let remote_task = current_task.get_task(tid)?;
358
359    current_task.check_ptrace_access_mode(locked, PTRACE_MODE_ATTACH_REALCREDS, &remote_task)?;
360
361    let local_iov = current_task.read_iovec(local_iov_addr, local_iov_count)?;
362    let remote_iov = current_task.read_iovec(remote_iov_addr, remote_iov_count)?;
363    log_trace!(
364        "sys_process_vm_writev(tid={}, local_iov={:?}, remote_iov={:?})",
365        tid,
366        local_iov,
367        remote_iov
368    );
369
370    track_stub!(TODO("https://fxbug.dev/322874339"), "process_vm_writev single-copy");
371    // NB: According to the man page, this syscall was added to Linux specifically to
372    // avoid doing two copies like other IPC mechanisms require. We should avoid this too at some
373    // point.
374    let mut input = UserBuffersInputBuffer::unified_new(current_task, local_iov)?;
375    let remote_mm = remote_task.mm().ok();
376    if current_task.has_same_address_space(remote_mm.as_ref()) {
377        let mut output = UserBuffersOutputBuffer::unified_new(current_task, remote_iov)?;
378        output.write_buffer(&mut input)
379    } else {
380        let mut output = UserBuffersOutputBuffer::syscall_new(remote_task.deref(), remote_iov)?;
381        output.write_buffer(&mut input)
382    }
383}
384
385pub fn sys_process_mrelease(
386    _locked: &mut Locked<Unlocked>,
387    current_task: &CurrentTask,
388    pidfd: FdNumber,
389    flags: u32,
390) -> Result<(), Errno> {
391    if flags != 0 {
392        return error!(EINVAL);
393    }
394    let file = current_task.get_file(pidfd)?;
395    let task = current_task.get_task(file.as_thread_group_key()?.pid())?;
396    if !task.load_stopped().is_stopped() {
397        return error!(EINVAL);
398    }
399
400    task.mm()?.mrelease()
401}
402
403pub fn sys_membarrier(
404    _locked: &mut Locked<Unlocked>,
405    current_task: &CurrentTask,
406    cmd: uapi::membarrier_cmd,
407    _flags: u32,
408    _cpu_id: i32,
409) -> Result<u32, Errno> {
410    match cmd {
411        // This command returns a bit mask of all supported commands.
412        // We support everything except for the RSEQ family.
413        uapi::membarrier_cmd_MEMBARRIER_CMD_QUERY => Ok(uapi::membarrier_cmd_MEMBARRIER_CMD_GLOBAL
414            | uapi::membarrier_cmd_MEMBARRIER_CMD_GLOBAL_EXPEDITED
415            | uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED
416            | uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED
417            | uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED
418            | uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE
419            | uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE),
420        // Global and global expedited barriers are treated identically. We don't track
421        // registration for global expedited barriers currently.
422        uapi::membarrier_cmd_MEMBARRIER_CMD_GLOBAL
423        | uapi::membarrier_cmd_MEMBARRIER_CMD_GLOBAL_EXPEDITED => {
424            system_barrier(BarrierType::DataMemory);
425            Ok(0)
426        }
427        // Global registration commands are ignored.
428        uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED => Ok(0),
429        uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED => {
430            // A private expedited barrier is only issued if the address space is registered
431            // for these barriers.
432            if current_task.mm()?.membarrier_private_expedited_registered(MembarrierType::Memory) {
433                // If a barrier is requested, issue a global barrier.
434                system_barrier(BarrierType::DataMemory);
435                Ok(0)
436            } else {
437                error!(EPERM)
438            }
439        }
440        // Private sync core barriers are treated as global instruction stream barriers.
441        uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE => {
442            if current_task.mm()?.membarrier_private_expedited_registered(MembarrierType::SyncCore)
443            {
444                system_barrier(BarrierType::InstructionStream);
445                Ok(0)
446            } else {
447                error!(EPERM)
448            }
449        }
450        uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED => {
451            let _ =
452                current_task.mm()?.register_membarrier_private_expedited(MembarrierType::Memory)?;
453            Ok(0)
454        }
455
456        uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE => {
457            let _ = current_task
458                .mm()?
459                .register_membarrier_private_expedited(MembarrierType::SyncCore)?;
460            Ok(0)
461        }
462        uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ => {
463            track_stub!(TODO("https://fxbug.dev/447158570"), "membarrier rseq");
464            error!(ENOSYS)
465        }
466        uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ => {
467            track_stub!(TODO("https://fxbug.dev/447158570"), "membarrier rseq");
468            error!(ENOSYS)
469        }
470        _ => error!(EINVAL),
471    }
472}
473
474pub fn sys_userfaultfd(
475    locked: &mut Locked<Unlocked>,
476    current_task: &CurrentTask,
477    raw_flags: u32,
478) -> Result<FdNumber, Errno> {
479    let unknown_flags = raw_flags & !(O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
480    if unknown_flags != 0 {
481        return error!(EINVAL, format!("unknown flags provided: {unknown_flags:x?}"));
482    }
483    let mut open_flags = OpenFlags::empty();
484    if raw_flags & O_NONBLOCK != 0 {
485        open_flags |= OpenFlags::NONBLOCK;
486    }
487    if raw_flags & O_CLOEXEC != 0 {
488        open_flags |= OpenFlags::CLOEXEC;
489    }
490
491    let fd_flags = if raw_flags & O_CLOEXEC != 0 {
492        FdFlags::CLOEXEC
493    } else {
494        track_stub!(TODO("https://fxbug.dev/297375964"), "userfaultfds that survive exec()");
495        return error!(ENOSYS);
496    };
497
498    let user_mode_only = raw_flags & UFFD_USER_MODE_ONLY != 0;
499    if !user_mode_only {
500        security::check_task_capable(current_task, CAP_SYS_PTRACE)?;
501    }
502    let uff_handle = UserFaultFile::new(locked, current_task, open_flags, user_mode_only)?;
503    current_task.add_file(locked, uff_handle, fd_flags)
504}
505
506pub fn sys_futex(
507    locked: &mut Locked<Unlocked>,
508    current_task: &mut CurrentTask,
509    addr: UserAddress,
510    op: u32,
511    value: u32,
512    timeout_or_value2: SyscallArg,
513    addr2: UserAddress,
514    value3: u32,
515) -> Result<usize, Errno> {
516    if op & FUTEX_PRIVATE_FLAG != 0 {
517        do_futex::<PrivateFutexKey>(
518            locked,
519            current_task,
520            addr,
521            op,
522            value,
523            timeout_or_value2,
524            addr2,
525            value3,
526        )
527    } else {
528        do_futex::<SharedFutexKey>(
529            locked,
530            current_task,
531            addr,
532            op,
533            value,
534            timeout_or_value2,
535            addr2,
536            value3,
537        )
538    }
539}
540
541fn do_futex<Key: FutexKey>(
542    locked: &mut Locked<Unlocked>,
543    current_task: &mut CurrentTask,
544    addr: UserAddress,
545    op: u32,
546    value: u32,
547    timeout_or_value2: SyscallArg,
548    addr2: UserAddress,
549    value3: u32,
550) -> Result<usize, Errno> {
551    let futexes = Key::get_table_from_task(current_task)?;
552    let cmd = op & (FUTEX_CMD_MASK as u32);
553
554    let is_realtime = match (cmd, op & FUTEX_CLOCK_REALTIME != 0) {
555        // This option bit can be employed only with the FUTEX_WAIT_BITSET, FUTEX_WAIT_REQUEUE_PI,
556        // (since Linux 4.5) FUTEX_WAIT, and (since Linux 5.14) FUTEX_LOCK_PI2 operations.
557        (FUTEX_WAIT_BITSET | FUTEX_WAIT_REQUEUE_PI | FUTEX_WAIT | FUTEX_LOCK_PI2, true) => true,
558        (_, true) => return error!(EINVAL),
559
560        // FUTEX_LOCK_PI always uses realtime.
561        (FUTEX_LOCK_PI, _) => true,
562
563        (_, false) => false,
564    };
565
566    // The timeout is interpreted differently by WAIT and WAIT_BITSET: WAIT takes a
567    // timeout and WAIT_BITSET takes a deadline.
568    let read_timespec = |current_task: &CurrentTask| {
569        let utime = TimeSpecPtr::new(current_task, timeout_or_value2);
570        if utime.is_null() {
571            Ok(timespec_from_time(zx::MonotonicInstant::INFINITE))
572        } else {
573            current_task.read_multi_arch_object(utime)
574        }
575    };
576    let read_timeout = |current_task: &CurrentTask| {
577        let timespec = read_timespec(current_task)?;
578        let timeout = duration_from_timespec(timespec);
579        let deadline = zx::MonotonicInstant::after(timeout?);
580        if is_realtime {
581            // Since this is a timeout, waiting on the monotonic timeline before it's paused is
582            // just as good as actually estimating UTC here.
583            track_stub!(TODO("https://fxbug.dev/356912301"), "FUTEX_CLOCK_REALTIME timeout");
584        }
585        Ok(deadline)
586    };
587    let read_deadline = |current_task: &CurrentTask| {
588        let timespec = read_timespec(current_task)?;
589        if is_realtime {
590            Ok(TargetTime::RealTime(time_from_timespec::<UtcTimeline>(timespec)?))
591        } else {
592            Ok(TargetTime::Monotonic(time_from_timespec::<zx::MonotonicTimeline>(timespec)?))
593        }
594    };
595
596    match cmd {
597        FUTEX_WAIT => {
598            let deadline = read_timeout(current_task)?;
599            let bitset = FUTEX_BITSET_MATCH_ANY;
600            do_futex_wait_with_restart::<Key>(
601                locked,
602                current_task,
603                addr,
604                value,
605                bitset,
606                TargetTime::Monotonic(deadline),
607            )?;
608            Ok(0)
609        }
610        FUTEX_WAKE => {
611            futexes.wake(locked, current_task, addr, value as usize, FUTEX_BITSET_MATCH_ANY)
612        }
613        FUTEX_WAKE_OP => {
614            track_stub!(TODO("https://fxbug.dev/361181940"), "FUTEX_WAKE_OP");
615            error!(ENOSYS)
616        }
617        FUTEX_WAIT_BITSET => {
618            if value3 == 0 {
619                return error!(EINVAL);
620            }
621            let deadline = read_deadline(current_task)?;
622            do_futex_wait_with_restart::<Key>(locked, current_task, addr, value, value3, deadline)?;
623            Ok(0)
624        }
625        FUTEX_WAKE_BITSET => {
626            if value3 == 0 {
627                return error!(EINVAL);
628            }
629            futexes.wake(locked, current_task, addr, value as usize, value3)
630        }
631        FUTEX_REQUEUE | FUTEX_CMP_REQUEUE => {
632            let wake_count = value as usize;
633            let requeue_count: usize = timeout_or_value2.into();
634            if wake_count > std::i32::MAX as usize || requeue_count > std::i32::MAX as usize {
635                return error!(EINVAL);
636            }
637            let expected_value = if cmd == FUTEX_CMP_REQUEUE { Some(value3) } else { None };
638            futexes.requeue(
639                locked,
640                current_task,
641                addr,
642                wake_count,
643                requeue_count,
644                addr2,
645                expected_value,
646            )
647        }
648        FUTEX_WAIT_REQUEUE_PI => {
649            track_stub!(TODO("https://fxbug.dev/361181558"), "FUTEX_WAIT_REQUEUE_PI");
650            error!(ENOSYS)
651        }
652        FUTEX_CMP_REQUEUE_PI => {
653            track_stub!(TODO("https://fxbug.dev/361181773"), "FUTEX_CMP_REQUEUE_PI");
654            error!(ENOSYS)
655        }
656        FUTEX_LOCK_PI | FUTEX_LOCK_PI2 => {
657            futexes.lock_pi(locked, current_task, addr, read_timeout(current_task)?)?;
658            Ok(0)
659        }
660        FUTEX_TRYLOCK_PI => {
661            track_stub!(TODO("https://fxbug.dev/361175318"), "FUTEX_TRYLOCK_PI");
662            error!(ENOSYS)
663        }
664        FUTEX_UNLOCK_PI => {
665            futexes.unlock_pi(locked, current_task, addr)?;
666            Ok(0)
667        }
668        _ => {
669            track_stub!(TODO("https://fxbug.dev/322875124"), "futex unknown command", cmd);
670            error!(ENOSYS)
671        }
672    }
673}
674
675fn do_futex_wait_with_restart<Key: FutexKey>(
676    locked: &mut Locked<Unlocked>,
677    current_task: &mut CurrentTask,
678    addr: UserAddress,
679    value: u32,
680    mask: u32,
681    deadline: TargetTime,
682) -> Result<(), Errno> {
683    let futexes = Key::get_table_from_task(current_task)?;
684    let result = match deadline {
685        TargetTime::Monotonic(mono_deadline) => {
686            futexes.wait(locked, current_task, addr, value, mask, mono_deadline)
687        }
688        TargetTime::BootInstant(boot_deadline) => {
689            let timer_slack = current_task.read().get_timerslack();
690            futexes.wait_boot(locked, current_task, addr, value, mask, boot_deadline, timer_slack)
691        }
692        TargetTime::RealTime(utc_deadline) => {
693            // We convert real time deadlines to boot time deadlines since we cannot wait using a UTC deadline.
694            let (boot_deadline, _) = estimate_boot_deadline_from_utc(utc_deadline);
695            let timer_slack = current_task.read().get_timerslack();
696            futexes.wait_boot(locked, current_task, addr, value, mask, boot_deadline, timer_slack)
697        }
698    };
699    match result {
700        Err(err) if err == EINTR => {
701            current_task.set_syscall_restart_func(move |locked, current_task| {
702                do_futex_wait_with_restart::<Key>(locked, current_task, addr, value, mask, deadline)
703            });
704            error!(ERESTART_RESTARTBLOCK)
705        }
706        result => result,
707    }
708}
709
710pub fn sys_get_robust_list(
711    locked: &mut Locked<Unlocked>,
712    current_task: &CurrentTask,
713    tid: tid_t,
714    user_head_ptr: UserRef<UserAddress>,
715    user_len_ptr: UserRef<usize>,
716) -> Result<(), Errno> {
717    if tid < 0 {
718        return error!(EINVAL);
719    }
720    if user_head_ptr.is_null() || user_len_ptr.is_null() {
721        return error!(EFAULT);
722    }
723    let task = if tid == 0 {
724        current_task.task.clone()
725    } else {
726        let task = current_task.get_task(tid)?;
727        current_task.check_ptrace_access_mode(locked, PTRACE_MODE_READ_REALCREDS, &task)?;
728        task
729    };
730    current_task.write_object(user_head_ptr, &task.read().robust_list_head.addr())?;
731    current_task.write_object(user_len_ptr, &std::mem::size_of::<robust_list_head>())?;
732    Ok(())
733}
734
735pub fn sys_set_robust_list(
736    _locked: &mut Locked<Unlocked>,
737    current_task: &CurrentTask,
738    user_head: UserRef<robust_list_head>,
739    len: usize,
740) -> Result<(), Errno> {
741    if len != std::mem::size_of::<robust_list_head>() {
742        return error!(EINVAL);
743    }
744    current_task.write().robust_list_head = user_head.into();
745    Ok(())
746}
747
748pub fn sys_mlock(
749    locked: &mut Locked<Unlocked>,
750    current_task: &CurrentTask,
751    addr: UserAddress,
752    length: usize,
753) -> Result<(), Errno> {
754    // If flags is 0, mlock2() behaves exactly the same as mlock().
755    sys_mlock2(locked, current_task, addr, length, 0)
756}
757
758pub fn sys_mlock2(
759    locked: &mut Locked<Unlocked>,
760    current_task: &CurrentTask,
761    addr: UserAddress,
762    length: usize,
763    flags: u64,
764) -> Result<(), Errno> {
765    const KNOWN_FLAGS: u64 = MLOCK_ONFAULT as u64;
766    if (flags & !KNOWN_FLAGS) != 0 {
767        return error!(EINVAL);
768    }
769    let on_fault = flags & MLOCK_ONFAULT as u64 != 0;
770    current_task.mm()?.mlock(current_task, locked, addr, length, on_fault)
771}
772
773pub fn sys_munlock(
774    _locked: &mut Locked<Unlocked>,
775    current_task: &CurrentTask,
776    addr: UserAddress,
777    length: usize,
778) -> Result<(), Errno> {
779    current_task.mm()?.munlock(current_task, addr, length)
780}
781
782pub fn sys_mlockall(
783    _locked: &mut Locked<Unlocked>,
784    _current_task: &CurrentTask,
785    _flags: u64,
786) -> Result<(), Errno> {
787    track_stub!(TODO("https://fxbug.dev/297292097"), "mlockall()");
788    error!(ENOSYS)
789}
790
791pub fn sys_munlockall(
792    _locked: &mut Locked<Unlocked>,
793    _current_task: &CurrentTask,
794    _flags: u64,
795) -> Result<(), Errno> {
796    track_stub!(TODO("https://fxbug.dev/297292097"), "munlockall()");
797    error!(ENOSYS)
798}
799
800pub fn sys_mincore(
801    _locked: &mut Locked<Unlocked>,
802    _current_task: &CurrentTask,
803    _addr: UserAddress,
804    _length: usize,
805    _out: UserRef<u8>,
806) -> Result<(), Errno> {
807    track_stub!(TODO("https://fxbug.dev/297372240"), "mincore()");
808    error!(ENOSYS)
809}
810
811// Syscalls for arch32 usage
812#[cfg(target_arch = "aarch64")]
813mod arch32 {
814    use crate::mm::PAGE_SIZE;
815    use crate::mm::memory_accessor::MemoryAccessorExt;
816    use crate::mm::syscalls::{UserAddress, sys_mmap};
817    use crate::task::{CurrentTask, RobustListHeadPtr};
818    use crate::vfs::FdNumber;
819    use starnix_sync::{Locked, Unlocked};
820    use starnix_uapi::auth::PTRACE_MODE_READ_REALCREDS;
821    use starnix_uapi::errors::Errno;
822    use starnix_uapi::user_address::UserRef;
823    use starnix_uapi::{error, uapi};
824
825    pub fn sys_arch32_set_robust_list(
826        _locked: &mut Locked<Unlocked>,
827        current_task: &CurrentTask,
828        user_head: UserRef<uapi::arch32::robust_list_head>,
829        len: usize,
830    ) -> Result<(), Errno> {
831        if len != std::mem::size_of::<uapi::arch32::robust_list_head>() {
832            return error!(EINVAL);
833        }
834        current_task.write().robust_list_head = RobustListHeadPtr::from_32(user_head);
835        Ok(())
836    }
837
838    pub fn sys_arch32_get_robust_list(
839        locked: &mut Locked<Unlocked>,
840        current_task: &CurrentTask,
841        tid: starnix_uapi::tid_t,
842        user_head_ptr: UserRef<u32>,
843        user_len_ptr: UserRef<u32>,
844    ) -> Result<(), Errno> {
845        if tid < 0 {
846            return error!(EINVAL);
847        }
848        if user_head_ptr.is_null() || user_len_ptr.is_null() {
849            return error!(EFAULT);
850        }
851        let task = if tid == 0 {
852            current_task.task.clone()
853        } else {
854            let task = current_task.get_task(tid)?;
855            current_task.check_ptrace_access_mode(locked, PTRACE_MODE_READ_REALCREDS, &task)?;
856            task
857        };
858
859        let addr = task.read().robust_list_head.addr().ptr() as u32;
860        current_task.write_object(user_head_ptr, &addr)?;
861        current_task.write_object(
862            user_len_ptr,
863            &(std::mem::size_of::<uapi::arch32::robust_list_head>() as u32),
864        )?;
865        Ok(())
866    }
867
868    pub fn sys_arch32_mmap2(
869        locked: &mut Locked<Unlocked>,
870        current_task: &mut CurrentTask,
871        addr: UserAddress,
872        length: usize,
873        prot: u32,
874        flags: u32,
875        fd: FdNumber,
876        offset: u64,
877    ) -> Result<UserAddress, Errno> {
878        sys_mmap(locked, current_task, addr, length, prot, flags, fd, offset * *PAGE_SIZE)
879    }
880
881    pub fn sys_arch32_munmap(
882        _locked: &mut Locked<Unlocked>,
883        current_task: &CurrentTask,
884        addr: UserAddress,
885        length: usize,
886    ) -> Result<(), Errno> {
887        if !addr.is_lower_32bit() || length >= (1 << 32) {
888            return error!(EINVAL);
889        }
890        current_task.mm()?.unmap(addr, length)?;
891        Ok(())
892    }
893
894    pub use super::{
895        sys_futex as sys_arch32_futex, sys_madvise as sys_arch32_madvise,
896        sys_membarrier as sys_arch32_membarrier, sys_mincore as sys_arch32_mincore,
897        sys_mlock as sys_arch32_mlock, sys_mlock2 as sys_arch32_mlock2,
898        sys_mlockall as sys_arch32_mlockall, sys_mremap as sys_arch32_mremap,
899        sys_msync as sys_arch32_msync, sys_munlock as sys_arch32_munlock,
900        sys_munlockall as sys_arch32_munlockall,
901        sys_process_mrelease as sys_arch32_process_mrelease,
902        sys_process_vm_readv as sys_arch32_process_vm_readv,
903        sys_userfaultfd as sys_arch32_userfaultfd,
904    };
905}
906
907#[cfg(target_arch = "aarch64")]
908pub use arch32::*;
909
910#[cfg(test)]
911mod tests {
912    use super::*;
913    use crate::mm::memory::MemoryObject;
914    use crate::testing::*;
915    use starnix_uapi::errors::EEXIST;
916    use starnix_uapi::file_mode::Access;
917    use starnix_uapi::{MREMAP_FIXED, MREMAP_MAYMOVE, PROT_READ};
918
919    #[::fuchsia::test]
920    async fn test_mmap_with_colliding_hint() {
921        spawn_kernel_and_run(async |locked, current_task| {
922            let page_size = *PAGE_SIZE;
923
924            let mapped_address =
925                map_memory(locked, &current_task, UserAddress::default(), page_size);
926            match do_mmap(
927                locked,
928                &current_task,
929                mapped_address,
930                page_size as usize,
931                PROT_READ,
932                MAP_PRIVATE | MAP_ANONYMOUS,
933                FdNumber::from_raw(-1),
934                0,
935            ) {
936                Ok(address) => {
937                    assert_ne!(address, mapped_address);
938                }
939                error => {
940                    panic!("mmap with colliding hint failed: {error:?}");
941                }
942            }
943        })
944        .await;
945    }
946
947    #[::fuchsia::test]
948    async fn test_mmap_with_fixed_collision() {
949        spawn_kernel_and_run(async |locked, current_task| {
950            let page_size = *PAGE_SIZE;
951
952            let mapped_address =
953                map_memory(locked, &current_task, UserAddress::default(), page_size);
954            match do_mmap(
955                locked,
956                &current_task,
957                mapped_address,
958                page_size as usize,
959                PROT_READ,
960                MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
961                FdNumber::from_raw(-1),
962                0,
963            ) {
964                Ok(address) => {
965                    assert_eq!(address, mapped_address);
966                }
967                error => {
968                    panic!("mmap with fixed collision failed: {error:?}");
969                }
970            }
971        })
972        .await;
973    }
974
975    #[::fuchsia::test]
976    async fn test_mmap_with_fixed_noreplace_collision() {
977        spawn_kernel_and_run(async |locked, current_task| {
978            let page_size = *PAGE_SIZE;
979
980            let mapped_address =
981                map_memory(locked, &current_task, UserAddress::default(), page_size);
982            match do_mmap(
983                locked,
984                &current_task,
985                mapped_address,
986                page_size as usize,
987                PROT_READ,
988                MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE,
989                FdNumber::from_raw(-1),
990                0,
991            ) {
992                Err(errno) => {
993                    assert_eq!(errno, EEXIST);
994                }
995                result => {
996                    panic!("mmap with fixed_noreplace collision failed: {result:?}");
997                }
998            }
999        })
1000        .await;
1001    }
1002
1003    /// It is ok to call munmap with an address that is a multiple of the page size, and
1004    /// a non-zero length.
1005    #[::fuchsia::test]
1006    async fn test_munmap() {
1007        spawn_kernel_and_run(async |locked, current_task| {
1008            let mapped_address =
1009                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
1010            assert_eq!(
1011                sys_munmap(locked, &current_task, mapped_address, *PAGE_SIZE as usize),
1012                Ok(())
1013            );
1014
1015            // Verify that the memory is no longer readable.
1016            assert_eq!(current_task.read_memory_to_array::<5>(mapped_address), error!(EFAULT));
1017        })
1018        .await;
1019    }
1020
1021    /// It is ok to call munmap on an unmapped range.
1022    #[::fuchsia::test]
1023    async fn test_munmap_not_mapped() {
1024        spawn_kernel_and_run(async |locked, current_task| {
1025            let mapped_address =
1026                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
1027            assert_eq!(
1028                sys_munmap(locked, &current_task, mapped_address, *PAGE_SIZE as usize),
1029                Ok(())
1030            );
1031            assert_eq!(
1032                sys_munmap(locked, &current_task, mapped_address, *PAGE_SIZE as usize),
1033                Ok(())
1034            );
1035        })
1036        .await;
1037    }
1038
1039    /// It is an error to call munmap with a length of 0.
1040    #[::fuchsia::test]
1041    async fn test_munmap_0_length() {
1042        spawn_kernel_and_run(async |locked, current_task| {
1043            let mapped_address =
1044                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
1045            assert_eq!(sys_munmap(locked, &current_task, mapped_address, 0), error!(EINVAL));
1046        })
1047        .await;
1048    }
1049
1050    /// It is an error to call munmap with an address that is not a multiple of the page size.
1051    #[::fuchsia::test]
1052    async fn test_munmap_not_aligned() {
1053        spawn_kernel_and_run(async |locked, current_task| {
1054            let mapped_address =
1055                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
1056            assert_eq!(
1057                sys_munmap(
1058                    locked,
1059                    &current_task,
1060                    (mapped_address + 1u64).unwrap(),
1061                    *PAGE_SIZE as usize
1062                ),
1063                error!(EINVAL)
1064            );
1065
1066            // Verify that the memory is still readable.
1067            assert!(current_task.read_memory_to_array::<5>(mapped_address).is_ok());
1068        })
1069        .await;
1070    }
1071
1072    /// The entire page should be unmapped, not just the range [address, address + length).
1073    #[::fuchsia::test]
1074    async fn test_munmap_unmap_partial() {
1075        spawn_kernel_and_run(async |locked, current_task| {
1076            let mapped_address =
1077                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
1078            assert_eq!(
1079                sys_munmap(locked, &current_task, mapped_address, (*PAGE_SIZE as usize) / 2),
1080                Ok(())
1081            );
1082
1083            // Verify that memory can't be read in either half of the page.
1084            assert_eq!(current_task.read_memory_to_array::<5>(mapped_address), error!(EFAULT));
1085            assert_eq!(
1086                current_task
1087                    .read_memory_to_array::<5>((mapped_address + (*PAGE_SIZE - 2)).unwrap()),
1088                error!(EFAULT)
1089            );
1090        })
1091        .await;
1092    }
1093
1094    /// All pages that intersect the munmap range should be unmapped.
1095    #[::fuchsia::test]
1096    async fn test_munmap_multiple_pages() {
1097        spawn_kernel_and_run(async |locked, current_task| {
1098            let mapped_address =
1099                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
1100            assert_eq!(
1101                sys_munmap(locked, &current_task, mapped_address, (*PAGE_SIZE as usize) + 1),
1102                Ok(())
1103            );
1104
1105            // Verify that neither page is readable.
1106            assert_eq!(current_task.read_memory_to_array::<5>(mapped_address), error!(EFAULT));
1107            assert_eq!(
1108                current_task
1109                    .read_memory_to_array::<5>((mapped_address + (*PAGE_SIZE + 1u64)).unwrap()),
1110                error!(EFAULT)
1111            );
1112        })
1113        .await;
1114    }
1115
1116    /// Only the pages that intersect the munmap range should be unmapped.
1117    #[::fuchsia::test]
1118    async fn test_munmap_one_of_many_pages() {
1119        spawn_kernel_and_run(async |locked, current_task| {
1120            let mapped_address =
1121                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
1122            assert_eq!(
1123                sys_munmap(locked, &current_task, mapped_address, (*PAGE_SIZE as usize) - 1),
1124                Ok(())
1125            );
1126
1127            // Verify that the second page is still readable.
1128            assert_eq!(current_task.read_memory_to_array::<5>(mapped_address), error!(EFAULT));
1129            assert!(
1130                current_task
1131                    .read_memory_to_array::<5>((mapped_address + (*PAGE_SIZE + 1u64)).unwrap())
1132                    .is_ok()
1133            );
1134        })
1135        .await;
1136    }
1137
1138    /// Unmap the middle page of a mapping.
1139    #[::fuchsia::test]
1140    async fn test_munmap_middle_page() {
1141        spawn_kernel_and_run(async |locked, current_task| {
1142            let mapped_address =
1143                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1144            assert_eq!(
1145                sys_munmap(
1146                    locked,
1147                    &current_task,
1148                    (mapped_address + *PAGE_SIZE).unwrap(),
1149                    *PAGE_SIZE as usize
1150                ),
1151                Ok(())
1152            );
1153
1154            // Verify that the first and third pages are still readable.
1155            assert!(current_task.read_memory_to_vec(mapped_address, 5).is_ok());
1156            assert_eq!(
1157                current_task.read_memory_to_vec((mapped_address + *PAGE_SIZE).unwrap(), 5),
1158                error!(EFAULT)
1159            );
1160            assert!(
1161                current_task
1162                    .read_memory_to_vec((mapped_address + (*PAGE_SIZE * 2)).unwrap(), 5)
1163                    .is_ok()
1164            );
1165        })
1166        .await;
1167    }
1168
1169    /// Unmap a range of pages that includes disjoint mappings.
1170    #[::fuchsia::test]
1171    async fn test_munmap_many_mappings() {
1172        spawn_kernel_and_run(async |locked, current_task| {
1173            let mapped_addresses: Vec<_> = std::iter::repeat_with(|| {
1174                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE)
1175            })
1176            .take(3)
1177            .collect();
1178            let min_address = *mapped_addresses.iter().min().unwrap();
1179            let max_address = *mapped_addresses.iter().max().unwrap();
1180            let unmap_length = (max_address - min_address) + *PAGE_SIZE as usize;
1181
1182            assert_eq!(sys_munmap(locked, &current_task, min_address, unmap_length), Ok(()));
1183
1184            // Verify that none of the mapped pages are readable.
1185            for mapped_address in mapped_addresses {
1186                assert_eq!(current_task.read_memory_to_vec(mapped_address, 5), error!(EFAULT));
1187            }
1188        })
1189        .await;
1190    }
1191
1192    #[::fuchsia::test]
1193    async fn test_msync_validates_address_range() {
1194        spawn_kernel_and_run(async |locked, current_task| {
1195            // Map 3 pages and test that ranges covering these pages return no error.
1196            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1197            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 3, 0), Ok(()));
1198            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 2, 0), Ok(()));
1199            assert_eq!(
1200                sys_msync(
1201                    locked,
1202                    &current_task,
1203                    (addr + *PAGE_SIZE).unwrap(),
1204                    *PAGE_SIZE as usize * 2,
1205                    0
1206                ),
1207                Ok(())
1208            );
1209
1210            // Unmap the middle page and test that ranges covering that page return ENOMEM.
1211            sys_munmap(locked, &current_task, (addr + *PAGE_SIZE).unwrap(), *PAGE_SIZE as usize)
1212                .expect("unmap middle");
1213            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize, 0), Ok(()));
1214            assert_eq!(
1215                sys_msync(
1216                    locked,
1217                    &current_task,
1218                    addr,
1219                    *PAGE_SIZE as usize * 3,
1220                    starnix_uapi::MS_SYNC
1221                ),
1222                error!(ENOMEM)
1223            );
1224            assert_eq!(
1225                sys_msync(
1226                    locked,
1227                    &current_task,
1228                    addr,
1229                    *PAGE_SIZE as usize * 2,
1230                    starnix_uapi::MS_SYNC
1231                ),
1232                error!(ENOMEM)
1233            );
1234            assert_eq!(
1235                sys_msync(
1236                    locked,
1237                    &current_task,
1238                    (addr + *PAGE_SIZE).unwrap(),
1239                    *PAGE_SIZE as usize * 2,
1240                    starnix_uapi::MS_SYNC
1241                ),
1242                error!(ENOMEM)
1243            );
1244            assert_eq!(
1245                sys_msync(
1246                    locked,
1247                    &current_task,
1248                    (addr + (*PAGE_SIZE * 2)).unwrap(),
1249                    *PAGE_SIZE as usize,
1250                    0
1251                ),
1252                Ok(())
1253            );
1254
1255            // Map the middle page back and test that ranges covering the three pages
1256            // (spanning multiple ranges) return no error.
1257            assert_eq!(
1258                map_memory(locked, &current_task, (addr + *PAGE_SIZE).unwrap(), *PAGE_SIZE),
1259                (addr + *PAGE_SIZE).unwrap()
1260            );
1261            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 3, 0), Ok(()));
1262            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 2, 0), Ok(()));
1263            assert_eq!(
1264                sys_msync(
1265                    locked,
1266                    &current_task,
1267                    (addr + *PAGE_SIZE).unwrap(),
1268                    *PAGE_SIZE as usize * 2,
1269                    0
1270                ),
1271                Ok(())
1272            );
1273        })
1274        .await;
1275    }
1276
1277    /// Shrinks an entire range.
1278    #[::fuchsia::test]
1279    async fn test_mremap_shrink_whole_range_from_end() {
1280        spawn_kernel_and_run(async |locked, current_task| {
1281            // Map 2 pages.
1282            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
1283            fill_page(&current_task, addr, 'a');
1284            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1285
1286            // Shrink the mapping from 2 to 1 pages.
1287            assert_eq!(
1288                remap_memory(
1289                    locked,
1290                    &current_task,
1291                    addr,
1292                    *PAGE_SIZE * 2,
1293                    *PAGE_SIZE,
1294                    0,
1295                    UserAddress::default()
1296                ),
1297                Ok(addr)
1298            );
1299
1300            check_page_eq(&current_task, addr, 'a');
1301            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1302        })
1303        .await;
1304    }
1305
1306    /// Shrinks part of a range, introducing a hole in the middle.
1307    #[::fuchsia::test]
1308    async fn test_mremap_shrink_partial_range() {
1309        spawn_kernel_and_run(async |locked, current_task| {
1310            // Map 3 pages.
1311            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1312            fill_page(&current_task, addr, 'a');
1313            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1314            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1315
1316            // Shrink the first 2 pages down to 1, creating a hole.
1317            assert_eq!(
1318                remap_memory(
1319                    locked,
1320                    &current_task,
1321                    addr,
1322                    *PAGE_SIZE * 2,
1323                    *PAGE_SIZE,
1324                    0,
1325                    UserAddress::default()
1326                ),
1327                Ok(addr)
1328            );
1329
1330            check_page_eq(&current_task, addr, 'a');
1331            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1332            check_page_eq(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1333        })
1334        .await;
1335    }
1336
1337    /// Shrinking doesn't care if the range specified spans multiple mappings.
1338    #[::fuchsia::test]
1339    async fn test_mremap_shrink_across_ranges() {
1340        spawn_kernel_and_run(async |locked, current_task| {
1341            // Map 3 pages, unmap the middle, then map the middle again. This will leave us with
1342            // 3 contiguous mappings.
1343            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1344            assert_eq!(
1345                sys_munmap(
1346                    locked,
1347                    &current_task,
1348                    (addr + *PAGE_SIZE).unwrap(),
1349                    *PAGE_SIZE as usize
1350                ),
1351                Ok(())
1352            );
1353            assert_eq!(
1354                map_memory(locked, &current_task, (addr + *PAGE_SIZE).unwrap(), *PAGE_SIZE),
1355                (addr + *PAGE_SIZE).unwrap()
1356            );
1357
1358            fill_page(&current_task, addr, 'a');
1359            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1360            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1361
1362            // Remap over all three mappings, shrinking to 1 page.
1363            assert_eq!(
1364                remap_memory(
1365                    locked,
1366                    &current_task,
1367                    addr,
1368                    *PAGE_SIZE * 3,
1369                    *PAGE_SIZE,
1370                    0,
1371                    UserAddress::default()
1372                ),
1373                Ok(addr)
1374            );
1375
1376            check_page_eq(&current_task, addr, 'a');
1377            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1378            check_unmapped(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap());
1379        })
1380        .await;
1381    }
1382
1383    /// Grows a mapping in-place.
1384    #[::fuchsia::test]
1385    async fn test_mremap_grow_in_place() {
1386        spawn_kernel_and_run(async |locked, current_task| {
1387            // Map 3 pages, unmap the middle, leaving a hole.
1388            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1389            fill_page(&current_task, addr, 'a');
1390            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1391            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1392            assert_eq!(
1393                sys_munmap(
1394                    locked,
1395                    &current_task,
1396                    (addr + *PAGE_SIZE).unwrap(),
1397                    *PAGE_SIZE as usize
1398                ),
1399                Ok(())
1400            );
1401
1402            // Grow the first page in-place into the middle.
1403            assert_eq!(
1404                remap_memory(
1405                    locked,
1406                    &current_task,
1407                    addr,
1408                    *PAGE_SIZE,
1409                    *PAGE_SIZE * 2,
1410                    0,
1411                    UserAddress::default()
1412                ),
1413                Ok(addr)
1414            );
1415
1416            check_page_eq(&current_task, addr, 'a');
1417
1418            // The middle page should be new, and not just pointing to the original middle page filled
1419            // with 'b'.
1420            check_page_ne(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1421
1422            check_page_eq(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1423        })
1424        .await;
1425    }
1426
1427    /// Tries to grow a set of pages that cannot fit, and forces a move.
1428    #[::fuchsia::test]
1429    async fn test_mremap_grow_maymove() {
1430        spawn_kernel_and_run(async |locked, current_task| {
1431            // Map 3 pages.
1432            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1433            fill_page(&current_task, addr, 'a');
1434            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1435            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1436
1437            // Grow the first two pages by 1, forcing a move.
1438            let new_addr = remap_memory(
1439                locked,
1440                &current_task,
1441                addr,
1442                *PAGE_SIZE * 2,
1443                *PAGE_SIZE * 3,
1444                MREMAP_MAYMOVE,
1445                UserAddress::default(),
1446            )
1447            .expect("failed to mremap");
1448
1449            assert_ne!(new_addr, addr, "mremap did not move the mapping");
1450
1451            // The first two pages should have been moved.
1452            check_unmapped(&current_task, addr);
1453            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1454
1455            // The third page should still be present.
1456            check_page_eq(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1457
1458            // The moved pages should have the same contents.
1459            check_page_eq(&current_task, new_addr, 'a');
1460            check_page_eq(&current_task, (new_addr + *PAGE_SIZE).unwrap(), 'b');
1461
1462            // The newly grown page should not be the same as the original third page.
1463            check_page_ne(&current_task, (new_addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1464        })
1465        .await;
1466    }
1467
1468    /// Shrinks a set of pages and move them to a fixed location.
1469    #[::fuchsia::test]
1470    async fn test_mremap_shrink_fixed() {
1471        spawn_kernel_and_run(async |locked, current_task| {
1472            // Map 2 pages which will act as the destination.
1473            let dst_addr =
1474                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
1475            fill_page(&current_task, dst_addr, 'y');
1476            fill_page(&current_task, (dst_addr + *PAGE_SIZE).unwrap(), 'z');
1477
1478            // Map 3 pages.
1479            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1480            fill_page(&current_task, addr, 'a');
1481            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1482            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1483
1484            // Shrink the first two pages and move them to overwrite the mappings at `dst_addr`.
1485            let new_addr = remap_memory(
1486                locked,
1487                &current_task,
1488                addr,
1489                *PAGE_SIZE * 2,
1490                *PAGE_SIZE,
1491                MREMAP_MAYMOVE | MREMAP_FIXED,
1492                dst_addr,
1493            )
1494            .expect("failed to mremap");
1495
1496            assert_eq!(new_addr, dst_addr, "mremap did not move the mapping");
1497
1498            // The first two pages should have been moved.
1499            check_unmapped(&current_task, addr);
1500            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1501
1502            // The third page should still be present.
1503            check_page_eq(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1504
1505            // The first moved page should have the same contents.
1506            check_page_eq(&current_task, new_addr, 'a');
1507
1508            // The second page should be part of the original dst mapping.
1509            check_page_eq(&current_task, (new_addr + *PAGE_SIZE).unwrap(), 'z');
1510        })
1511        .await;
1512    }
1513
1514    /// Clobbers the middle of an existing mapping with mremap to a fixed location.
1515    #[::fuchsia::test]
1516    async fn test_mremap_clobber_memory_mapping() {
1517        spawn_kernel_and_run(async |locked, current_task| {
1518            let dst_memory = MemoryObject::from(zx::Vmo::create(2 * *PAGE_SIZE).unwrap());
1519            dst_memory.write(&['x' as u8].repeat(*PAGE_SIZE as usize), 0).unwrap();
1520            dst_memory.write(&['y' as u8].repeat(*PAGE_SIZE as usize), *PAGE_SIZE).unwrap();
1521
1522            let dst_addr = current_task
1523                .mm()
1524                .unwrap()
1525                .map_memory(
1526                    DesiredAddress::Any,
1527                    dst_memory.into(),
1528                    0,
1529                    2 * (*PAGE_SIZE as usize),
1530                    ProtectionFlags::READ,
1531                    Access::rwx(),
1532                    MappingOptions::empty(),
1533                    MappingName::None,
1534                )
1535                .unwrap();
1536
1537            // Map 3 pages.
1538            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1539            fill_page(&current_task, addr, 'a');
1540            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1541            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1542
1543            // Overwrite the second page of the mapping with the second page of the anonymous mapping.
1544            let remapped_addr = sys_mremap(
1545                locked,
1546                &*current_task,
1547                (addr + *PAGE_SIZE).unwrap(),
1548                *PAGE_SIZE as usize,
1549                *PAGE_SIZE as usize,
1550                MREMAP_FIXED | MREMAP_MAYMOVE,
1551                (dst_addr + *PAGE_SIZE).unwrap(),
1552            )
1553            .unwrap();
1554
1555            assert_eq!(remapped_addr, (dst_addr + *PAGE_SIZE).unwrap());
1556
1557            check_page_eq(&current_task, addr, 'a');
1558            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1559            check_page_eq(&current_task, (addr + (2 * *PAGE_SIZE)).unwrap(), 'c');
1560
1561            check_page_eq(&current_task, dst_addr, 'x');
1562            check_page_eq(&current_task, (dst_addr + *PAGE_SIZE).unwrap(), 'b');
1563        })
1564        .await;
1565    }
1566
1567    #[cfg(target_arch = "x86_64")]
1568    #[::fuchsia::test]
1569    async fn test_map_32_bit() {
1570        use starnix_uapi::PROT_WRITE;
1571
1572        spawn_kernel_and_run(async |locked, current_task| {
1573            let page_size = *PAGE_SIZE;
1574
1575            for _i in 0..256 {
1576                match do_mmap(
1577                    locked,
1578                    &current_task,
1579                    UserAddress::from(0),
1580                    page_size as usize,
1581                    PROT_READ | PROT_WRITE,
1582                    MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT,
1583                    FdNumber::from_raw(-1),
1584                    0,
1585                ) {
1586                    Ok(address) => {
1587                        let memory_end = address.ptr() + page_size as usize;
1588                        assert!(memory_end <= 0x80000000);
1589                    }
1590                    error => {
1591                        panic!("mmap with MAP_32BIT failed: {error:?}");
1592                    }
1593                }
1594            }
1595        })
1596        .await;
1597    }
1598}