Skip to main content

starnix_core/mm/
syscalls.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::barrier::{BarrierType, system_barrier};
6use crate::mm::debugger::notify_debugger_of_module_list;
7use crate::mm::{
8    DesiredAddress, FutexKey, IOVecPtr, MappingName, MappingOptions, MembarrierType,
9    MemoryAccessorExt, MremapFlags, MsyncFlags, PAGE_SIZE, PrivateFutexKey, ProtectionFlags,
10    SharedFutexKey,
11};
12use crate::security;
13use crate::syscalls::time::TimeSpecPtr;
14use crate::task::{CurrentTask, Task};
15use crate::time::TargetTime;
16use crate::time::utc::estimate_boot_deadline_from_utc;
17use crate::vfs::buffers::{OutputBuffer, UserBuffersInputBuffer, UserBuffersOutputBuffer};
18use crate::vfs::{FdFlags, FdNumber, UserFaultFile};
19use fuchsia_runtime::UtcTimeline;
20use linux_uapi::MLOCK_ONFAULT;
21use starnix_logging::{CATEGORY_STARNIX_MM, log_trace, trace_duration, track_stub};
22use starnix_sync::{FileOpsCore, LockEqualOrBefore, Locked, Unlocked};
23use starnix_syscalls::SyscallArg;
24use starnix_types::time::{duration_from_timespec, time_from_timespec, timespec_from_time};
25use starnix_uapi::auth::{CAP_SYS_PTRACE, PTRACE_MODE_ATTACH_REALCREDS};
26use starnix_uapi::errors::{EINTR, Errno};
27use starnix_uapi::open_flags::OpenFlags;
28use starnix_uapi::user_address::{UserAddress, UserRef};
29use starnix_uapi::user_value::UserValue;
30use starnix_uapi::{
31    FUTEX_BITSET_MATCH_ANY, FUTEX_CLOCK_REALTIME, FUTEX_CMD_MASK, FUTEX_CMP_REQUEUE,
32    FUTEX_CMP_REQUEUE_PI, FUTEX_LOCK_PI, FUTEX_LOCK_PI2, FUTEX_PRIVATE_FLAG, FUTEX_REQUEUE,
33    FUTEX_TRYLOCK_PI, FUTEX_UNLOCK_PI, FUTEX_WAIT, FUTEX_WAIT_BITSET, FUTEX_WAIT_REQUEUE_PI,
34    FUTEX_WAKE, FUTEX_WAKE_BITSET, FUTEX_WAKE_OP, MAP_ANONYMOUS, MAP_DENYWRITE, MAP_FIXED,
35    MAP_FIXED_NOREPLACE, MAP_GROWSDOWN, MAP_LOCKED, MAP_NORESERVE, MAP_POPULATE, MAP_PRIVATE,
36    MAP_SHARED, MAP_SHARED_VALIDATE, MAP_STACK, O_CLOEXEC, O_NONBLOCK, PROT_EXEC,
37    UFFD_USER_MODE_ONLY, errno, error, robust_list_head, tid_t, uapi,
38};
39use std::ops::Deref as _;
40use zx;
41
42#[cfg(target_arch = "x86_64")]
43use starnix_uapi::MAP_32BIT;
44
45// Returns any platform-specific mmap flags. This is a separate function because as of this writing
46// "attributes on expressions are experimental."
47#[cfg(target_arch = "x86_64")]
48fn get_valid_platform_mmap_flags() -> u32 {
49    MAP_32BIT
50}
51#[cfg(not(target_arch = "x86_64"))]
52fn get_valid_platform_mmap_flags() -> u32 {
53    0
54}
55
56/// sys_mmap takes a mutable reference to current_task because it may modify the IP register.
57pub fn sys_mmap(
58    locked: &mut Locked<Unlocked>,
59    current_task: &mut CurrentTask,
60    addr: UserAddress,
61    length: usize,
62    prot: u32,
63    flags: u32,
64    fd: FdNumber,
65    offset: u64,
66) -> Result<UserAddress, Errno> {
67    let user_address = do_mmap(locked, current_task, addr, length, prot, flags, fd, offset)?;
68    if prot & PROT_EXEC != 0 {
69        // Possibly loads a new module. Notify debugger for the change.
70        // We only care about dynamic linker loading modules for now, which uses mmap. In the future
71        // we might want to support unloading modules in munmap or JIT compilation in mprotect.
72        notify_debugger_of_module_list(current_task)?;
73    }
74    Ok(user_address)
75}
76
77pub fn do_mmap<L>(
78    locked: &mut Locked<L>,
79    current_task: &CurrentTask,
80    addr: UserAddress,
81    length: usize,
82    prot: u32,
83    flags: u32,
84    fd: FdNumber,
85    offset: u64,
86) -> Result<UserAddress, Errno>
87where
88    L: LockEqualOrBefore<FileOpsCore>,
89{
90    let prot_flags = ProtectionFlags::from_access_bits(prot).ok_or_else(|| {
91        track_stub!(TODO("https://fxbug.dev/322874211"), "mmap parse protection", prot);
92        errno!(EINVAL)
93    })?;
94
95    let valid_flags: u32 = get_valid_platform_mmap_flags()
96        | MAP_PRIVATE
97        | MAP_SHARED
98        | MAP_SHARED_VALIDATE
99        | MAP_ANONYMOUS
100        | MAP_FIXED
101        | MAP_FIXED_NOREPLACE
102        | MAP_POPULATE
103        | MAP_NORESERVE
104        | MAP_STACK
105        | MAP_DENYWRITE
106        | MAP_GROWSDOWN
107        | MAP_LOCKED;
108    if flags & !valid_flags != 0 {
109        if flags & MAP_SHARED_VALIDATE != 0 {
110            return error!(EOPNOTSUPP);
111        }
112        track_stub!(TODO("https://fxbug.dev/322873638"), "mmap check flags", flags);
113        return error!(EINVAL);
114    }
115
116    let file = if flags & MAP_ANONYMOUS != 0 { None } else { Some(current_task.get_file(fd)?) };
117    if flags & (MAP_PRIVATE | MAP_SHARED) == 0
118        || flags & (MAP_PRIVATE | MAP_SHARED) == MAP_PRIVATE | MAP_SHARED
119    {
120        return error!(EINVAL);
121    }
122    if length == 0 {
123        return error!(EINVAL);
124    }
125    if offset % *PAGE_SIZE != 0 {
126        return error!(EINVAL);
127    }
128
129    // TODO(tbodt): should we consider MAP_NORESERVE?
130
131    let addr = match (addr, flags & MAP_FIXED != 0, flags & MAP_FIXED_NOREPLACE != 0) {
132        (UserAddress::NULL, false, false) => DesiredAddress::Any,
133        (UserAddress::NULL, true, _) | (UserAddress::NULL, _, true) => return error!(EINVAL),
134        (addr, false, false) => DesiredAddress::Hint(addr),
135        (addr, _, true) => DesiredAddress::Fixed(addr),
136        (addr, true, false) => DesiredAddress::FixedOverwrite(addr),
137    };
138
139    let memory_offset = if flags & MAP_ANONYMOUS != 0 { 0 } else { offset };
140
141    let mut options = MappingOptions::empty();
142    if flags & MAP_SHARED != 0 {
143        options |= MappingOptions::SHARED;
144    }
145    if flags & MAP_ANONYMOUS != 0 {
146        options |= MappingOptions::ANONYMOUS;
147    }
148    #[cfg(target_arch = "x86_64")]
149    if flags & MAP_FIXED == 0 && flags & MAP_32BIT != 0 {
150        options |= MappingOptions::LOWER_32BIT;
151    }
152    if flags & MAP_GROWSDOWN != 0 {
153        options |= MappingOptions::GROWSDOWN;
154    }
155    if flags & MAP_POPULATE != 0 {
156        options |= MappingOptions::POPULATE;
157    }
158    if flags & MAP_LOCKED != 0 {
159        // The kernel isn't expected to return an error if locking fails with this flag, so for now
160        // this implementation will always fail to lock memory even if mapping succeeds.
161        track_stub!(TODO("https://fxbug.dev/406377606"), "MAP_LOCKED");
162    }
163
164    security::mmap_file(current_task, file.as_ref(), prot_flags, options)?;
165
166    if flags & MAP_ANONYMOUS != 0 {
167        trace_duration!(CATEGORY_STARNIX_MM, "AnonymousMmap");
168        current_task.mm()?.map_anonymous(addr, length, prot_flags, options, MappingName::None)
169    } else {
170        trace_duration!(CATEGORY_STARNIX_MM, "FileBackedMmap");
171        // TODO(tbodt): maximize protection flags so that mprotect works
172        let file = file.expect("file retrieved above for file-backed mapping");
173        file.mmap(
174            locked,
175            current_task,
176            addr,
177            memory_offset,
178            length,
179            prot_flags,
180            options,
181            file.name.to_passive(),
182        )
183    }
184}
185
186pub fn sys_mprotect(
187    _locked: &mut Locked<Unlocked>,
188    current_task: &CurrentTask,
189    addr: UserAddress,
190    length: usize,
191    prot: u32,
192) -> Result<(), Errno> {
193    let prot_flags = ProtectionFlags::from_bits(prot).ok_or_else(|| {
194        track_stub!(TODO("https://fxbug.dev/322874672"), "mprotect parse protection", prot);
195        errno!(EINVAL)
196    })?;
197    current_task.mm()?.protect(current_task, addr, length, prot_flags)?;
198    Ok(())
199}
200
201pub fn sys_mremap(
202    _locked: &mut Locked<Unlocked>,
203    current_task: &CurrentTask,
204    addr: UserAddress,
205    old_length: usize,
206    new_length: usize,
207    flags: u32,
208    new_addr: UserAddress,
209) -> Result<UserAddress, Errno> {
210    let flags = MremapFlags::from_bits(flags).ok_or_else(|| errno!(EINVAL))?;
211    let addr =
212        current_task.mm()?.remap(current_task, addr, old_length, new_length, flags, new_addr)?;
213    Ok(addr)
214}
215
216pub fn sys_munmap(
217    _locked: &mut Locked<Unlocked>,
218    current_task: &CurrentTask,
219    addr: UserAddress,
220    length: usize,
221) -> Result<(), Errno> {
222    current_task.mm()?.unmap(addr, length)?;
223    Ok(())
224}
225
226pub fn sys_msync(
227    locked: &mut Locked<Unlocked>,
228    current_task: &CurrentTask,
229    addr: UserAddress,
230    length: usize,
231    flags: u32,
232) -> Result<(), Errno> {
233    let flags = MsyncFlags::from_bits_retain(flags);
234    current_task.mm()?.msync(locked, current_task, addr, length, flags)
235}
236
237pub fn sys_madvise(
238    _locked: &mut Locked<Unlocked>,
239    current_task: &CurrentTask,
240    addr: UserAddress,
241    length: usize,
242    advice: u32,
243) -> Result<(), Errno> {
244    current_task.mm()?.madvise(addr, length, advice)?;
245    Ok(())
246}
247
248pub fn sys_process_madvise(
249    _locked: &mut Locked<Unlocked>,
250    _current_task: &CurrentTask,
251    _pidfd: FdNumber,
252    _iovec_addr: IOVecPtr,
253    _iovec_count: UserValue<i32>,
254    _advice: UserValue<i32>,
255    _flags: UserValue<u32>,
256) -> Result<usize, Errno> {
257    track_stub!(TODO("https://fxbug.dev/409060664"), "process_madvise");
258    error!(ENOSYS)
259}
260
261pub fn sys_brk(
262    locked: &mut Locked<Unlocked>,
263    current_task: &CurrentTask,
264    addr: UserAddress,
265) -> Result<UserAddress, Errno> {
266    current_task.mm()?.set_brk(locked, current_task, addr)
267}
268
269pub fn sys_process_vm_readv(
270    locked: &mut Locked<Unlocked>,
271    current_task: &CurrentTask,
272    tid: tid_t,
273    local_iov_addr: IOVecPtr,
274    local_iov_count: UserValue<i32>,
275    remote_iov_addr: IOVecPtr,
276    remote_iov_count: UserValue<i32>,
277    flags: usize,
278) -> Result<usize, Errno> {
279    if flags != 0 {
280        return error!(EINVAL);
281    }
282
283    // Source and destination are allowed to be of different length. It is valid to use a nullptr if
284    // the associated length is 0. Thus, if either source or destination length is 0 and nullptr,
285    // make sure to return Ok(0) before doing any other validation/operations.
286    if (local_iov_count == 0 && local_iov_addr.is_null())
287        || (remote_iov_count == 0 && remote_iov_addr.is_null())
288    {
289        return Ok(0);
290    }
291
292    let weak_remote_task = current_task.get_task(tid);
293    let remote_task = Task::from_weak(&weak_remote_task)?;
294
295    current_task.check_ptrace_access_mode(locked, PTRACE_MODE_ATTACH_REALCREDS, &remote_task)?;
296
297    let local_iov = current_task.read_iovec(local_iov_addr, local_iov_count)?;
298    let remote_iov = current_task.read_iovec(remote_iov_addr, remote_iov_count)?;
299    log_trace!(
300        "process_vm_readv(tid={}, local_iov={:?}, remote_iov={:?})",
301        tid,
302        local_iov,
303        remote_iov
304    );
305
306    track_stub!(TODO("https://fxbug.dev/322874765"), "process_vm_readv single-copy");
307    // According to the man page, this syscall was added to Linux specifically to
308    // avoid doing two copies like other IPC mechanisms require. We should avoid this too at some
309    // point.
310    let mut output = UserBuffersOutputBuffer::unified_new(current_task, local_iov)?;
311    let remote_mm = remote_task.mm().ok();
312    if current_task.has_same_address_space(remote_mm.as_ref()) {
313        let mut input = UserBuffersInputBuffer::unified_new(current_task, remote_iov)?;
314        output.write_buffer(&mut input)
315    } else {
316        let mut input = UserBuffersInputBuffer::syscall_new(remote_task.deref(), remote_iov)?;
317        output.write_buffer(&mut input)
318    }
319}
320
321pub fn sys_process_vm_writev(
322    locked: &mut Locked<Unlocked>,
323    current_task: &CurrentTask,
324    tid: tid_t,
325    local_iov_addr: IOVecPtr,
326    local_iov_count: UserValue<i32>,
327    remote_iov_addr: IOVecPtr,
328    remote_iov_count: UserValue<i32>,
329    flags: usize,
330) -> Result<usize, Errno> {
331    if flags != 0 {
332        return error!(EINVAL);
333    }
334
335    // Source and destination are allowed to be of different length. It is valid to use a nullptr if
336    // the associated length is 0. Thus, if either source or destination length is 0 and nullptr,
337    // make sure to return Ok(0) before doing any other validation/operations.
338    if (local_iov_count == 0 && local_iov_addr.is_null())
339        || (remote_iov_count == 0 && remote_iov_addr.is_null())
340    {
341        return Ok(0);
342    }
343
344    let weak_remote_task = current_task.get_task(tid);
345    let remote_task = Task::from_weak(&weak_remote_task)?;
346
347    current_task.check_ptrace_access_mode(locked, PTRACE_MODE_ATTACH_REALCREDS, &remote_task)?;
348
349    let local_iov = current_task.read_iovec(local_iov_addr, local_iov_count)?;
350    let remote_iov = current_task.read_iovec(remote_iov_addr, remote_iov_count)?;
351    log_trace!(
352        "sys_process_vm_writev(tid={}, local_iov={:?}, remote_iov={:?})",
353        tid,
354        local_iov,
355        remote_iov
356    );
357
358    track_stub!(TODO("https://fxbug.dev/322874339"), "process_vm_writev single-copy");
359    // NB: According to the man page, this syscall was added to Linux specifically to
360    // avoid doing two copies like other IPC mechanisms require. We should avoid this too at some
361    // point.
362    let mut input = UserBuffersInputBuffer::unified_new(current_task, local_iov)?;
363    let remote_mm = remote_task.mm().ok();
364    if current_task.has_same_address_space(remote_mm.as_ref()) {
365        let mut output = UserBuffersOutputBuffer::unified_new(current_task, remote_iov)?;
366        output.write_buffer(&mut input)
367    } else {
368        let mut output = UserBuffersOutputBuffer::syscall_new(remote_task.deref(), remote_iov)?;
369        output.write_buffer(&mut input)
370    }
371}
372
373pub fn sys_process_mrelease(
374    _locked: &mut Locked<Unlocked>,
375    current_task: &CurrentTask,
376    pidfd: FdNumber,
377    flags: u32,
378) -> Result<(), Errno> {
379    if flags != 0 {
380        return error!(EINVAL);
381    }
382    let file = current_task.get_file(pidfd)?;
383    let task = current_task.get_task(file.as_thread_group_key()?.pid());
384    let task = task.upgrade().ok_or_else(|| errno!(ESRCH))?;
385    if !task.load_stopped().is_stopped() {
386        return error!(EINVAL);
387    }
388
389    task.mm()?.mrelease()
390}
391
392pub fn sys_membarrier(
393    _locked: &mut Locked<Unlocked>,
394    current_task: &CurrentTask,
395    cmd: uapi::membarrier_cmd,
396    _flags: u32,
397    _cpu_id: i32,
398) -> Result<u32, Errno> {
399    match cmd {
400        // This command returns a bit mask of all supported commands.
401        // We support everything except for the RSEQ family.
402        uapi::membarrier_cmd_MEMBARRIER_CMD_QUERY => Ok(uapi::membarrier_cmd_MEMBARRIER_CMD_GLOBAL
403            | uapi::membarrier_cmd_MEMBARRIER_CMD_GLOBAL_EXPEDITED
404            | uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED
405            | uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED
406            | uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED
407            | uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE
408            | uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE),
409        // Global and global expedited barriers are treated identically. We don't track
410        // registration for global expedited barriers currently.
411        uapi::membarrier_cmd_MEMBARRIER_CMD_GLOBAL
412        | uapi::membarrier_cmd_MEMBARRIER_CMD_GLOBAL_EXPEDITED => {
413            system_barrier(BarrierType::DataMemory);
414            Ok(0)
415        }
416        // Global registration commands are ignored.
417        uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED => Ok(0),
418        uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED => {
419            // A private expedited barrier is only issued if the address space is registered
420            // for these barriers.
421            if current_task.mm()?.membarrier_private_expedited_registered(MembarrierType::Memory) {
422                // If a barrier is requested, issue a global barrier.
423                system_barrier(BarrierType::DataMemory);
424                Ok(0)
425            } else {
426                error!(EPERM)
427            }
428        }
429        // Private sync core barriers are treated as global instruction stream barriers.
430        uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE => {
431            if current_task.mm()?.membarrier_private_expedited_registered(MembarrierType::SyncCore)
432            {
433                system_barrier(BarrierType::InstructionStream);
434                Ok(0)
435            } else {
436                error!(EPERM)
437            }
438        }
439        uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED => {
440            let _ =
441                current_task.mm()?.register_membarrier_private_expedited(MembarrierType::Memory)?;
442            Ok(0)
443        }
444
445        uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE => {
446            let _ = current_task
447                .mm()?
448                .register_membarrier_private_expedited(MembarrierType::SyncCore)?;
449            Ok(0)
450        }
451        uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ => {
452            track_stub!(TODO("https://fxbug.dev/447158570"), "membarrier rseq");
453            error!(ENOSYS)
454        }
455        uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ => {
456            track_stub!(TODO("https://fxbug.dev/447158570"), "membarrier rseq");
457            error!(ENOSYS)
458        }
459        _ => error!(EINVAL),
460    }
461}
462
463pub fn sys_userfaultfd(
464    locked: &mut Locked<Unlocked>,
465    current_task: &CurrentTask,
466    raw_flags: u32,
467) -> Result<FdNumber, Errno> {
468    let unknown_flags = raw_flags & !(O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
469    if unknown_flags != 0 {
470        return error!(EINVAL, format!("unknown flags provided: {unknown_flags:x?}"));
471    }
472    let mut open_flags = OpenFlags::empty();
473    if raw_flags & O_NONBLOCK != 0 {
474        open_flags |= OpenFlags::NONBLOCK;
475    }
476    if raw_flags & O_CLOEXEC != 0 {
477        open_flags |= OpenFlags::CLOEXEC;
478    }
479
480    let fd_flags = if raw_flags & O_CLOEXEC != 0 {
481        FdFlags::CLOEXEC
482    } else {
483        track_stub!(TODO("https://fxbug.dev/297375964"), "userfaultfds that survive exec()");
484        return error!(ENOSYS);
485    };
486
487    let user_mode_only = raw_flags & UFFD_USER_MODE_ONLY == 0;
488    let uff_handle = UserFaultFile::new(locked, current_task, open_flags, user_mode_only)?;
489    current_task.add_file(locked, uff_handle, fd_flags)
490}
491
492pub fn sys_futex(
493    locked: &mut Locked<Unlocked>,
494    current_task: &mut CurrentTask,
495    addr: UserAddress,
496    op: u32,
497    value: u32,
498    timeout_or_value2: SyscallArg,
499    addr2: UserAddress,
500    value3: u32,
501) -> Result<usize, Errno> {
502    if op & FUTEX_PRIVATE_FLAG != 0 {
503        do_futex::<PrivateFutexKey>(
504            locked,
505            current_task,
506            addr,
507            op,
508            value,
509            timeout_or_value2,
510            addr2,
511            value3,
512        )
513    } else {
514        do_futex::<SharedFutexKey>(
515            locked,
516            current_task,
517            addr,
518            op,
519            value,
520            timeout_or_value2,
521            addr2,
522            value3,
523        )
524    }
525}
526
527fn do_futex<Key: FutexKey>(
528    locked: &mut Locked<Unlocked>,
529    current_task: &mut CurrentTask,
530    addr: UserAddress,
531    op: u32,
532    value: u32,
533    timeout_or_value2: SyscallArg,
534    addr2: UserAddress,
535    value3: u32,
536) -> Result<usize, Errno> {
537    let futexes = Key::get_table_from_task(current_task)?;
538    let cmd = op & (FUTEX_CMD_MASK as u32);
539
540    let is_realtime = match (cmd, op & FUTEX_CLOCK_REALTIME != 0) {
541        // This option bit can be employed only with the FUTEX_WAIT_BITSET, FUTEX_WAIT_REQUEUE_PI,
542        // (since Linux 4.5) FUTEX_WAIT, and (since Linux 5.14) FUTEX_LOCK_PI2 operations.
543        (FUTEX_WAIT_BITSET | FUTEX_WAIT_REQUEUE_PI | FUTEX_WAIT | FUTEX_LOCK_PI2, true) => true,
544        (_, true) => return error!(EINVAL),
545
546        // FUTEX_LOCK_PI always uses realtime.
547        (FUTEX_LOCK_PI, _) => true,
548
549        (_, false) => false,
550    };
551
552    // The timeout is interpreted differently by WAIT and WAIT_BITSET: WAIT takes a
553    // timeout and WAIT_BITSET takes a deadline.
554    let read_timespec = |current_task: &CurrentTask| {
555        let utime = TimeSpecPtr::new(current_task, timeout_or_value2);
556        if utime.is_null() {
557            Ok(timespec_from_time(zx::MonotonicInstant::INFINITE))
558        } else {
559            current_task.read_multi_arch_object(utime)
560        }
561    };
562    let read_timeout = |current_task: &CurrentTask| {
563        let timespec = read_timespec(current_task)?;
564        let timeout = duration_from_timespec(timespec);
565        let deadline = zx::MonotonicInstant::after(timeout?);
566        if is_realtime {
567            // Since this is a timeout, waiting on the monotonic timeline before it's paused is
568            // just as good as actually estimating UTC here.
569            track_stub!(TODO("https://fxbug.dev/356912301"), "FUTEX_CLOCK_REALTIME timeout");
570        }
571        Ok(deadline)
572    };
573    let read_deadline = |current_task: &CurrentTask| {
574        let timespec = read_timespec(current_task)?;
575        if is_realtime {
576            Ok(TargetTime::RealTime(time_from_timespec::<UtcTimeline>(timespec)?))
577        } else {
578            Ok(TargetTime::Monotonic(time_from_timespec::<zx::MonotonicTimeline>(timespec)?))
579        }
580    };
581
582    match cmd {
583        FUTEX_WAIT => {
584            let deadline = read_timeout(current_task)?;
585            let bitset = FUTEX_BITSET_MATCH_ANY;
586            do_futex_wait_with_restart::<Key>(
587                locked,
588                current_task,
589                addr,
590                value,
591                bitset,
592                TargetTime::Monotonic(deadline),
593            )?;
594            Ok(0)
595        }
596        FUTEX_WAKE => {
597            futexes.wake(locked, current_task, addr, value as usize, FUTEX_BITSET_MATCH_ANY)
598        }
599        FUTEX_WAKE_OP => {
600            track_stub!(TODO("https://fxbug.dev/361181940"), "FUTEX_WAKE_OP");
601            error!(ENOSYS)
602        }
603        FUTEX_WAIT_BITSET => {
604            if value3 == 0 {
605                return error!(EINVAL);
606            }
607            let deadline = read_deadline(current_task)?;
608            do_futex_wait_with_restart::<Key>(locked, current_task, addr, value, value3, deadline)?;
609            Ok(0)
610        }
611        FUTEX_WAKE_BITSET => {
612            if value3 == 0 {
613                return error!(EINVAL);
614            }
615            futexes.wake(locked, current_task, addr, value as usize, value3)
616        }
617        FUTEX_REQUEUE | FUTEX_CMP_REQUEUE => {
618            let wake_count = value as usize;
619            let requeue_count: usize = timeout_or_value2.into();
620            if wake_count > std::i32::MAX as usize || requeue_count > std::i32::MAX as usize {
621                return error!(EINVAL);
622            }
623            let expected_value = if cmd == FUTEX_CMP_REQUEUE { Some(value3) } else { None };
624            futexes.requeue(
625                locked,
626                current_task,
627                addr,
628                wake_count,
629                requeue_count,
630                addr2,
631                expected_value,
632            )
633        }
634        FUTEX_WAIT_REQUEUE_PI => {
635            track_stub!(TODO("https://fxbug.dev/361181558"), "FUTEX_WAIT_REQUEUE_PI");
636            error!(ENOSYS)
637        }
638        FUTEX_CMP_REQUEUE_PI => {
639            track_stub!(TODO("https://fxbug.dev/361181773"), "FUTEX_CMP_REQUEUE_PI");
640            error!(ENOSYS)
641        }
642        FUTEX_LOCK_PI | FUTEX_LOCK_PI2 => {
643            futexes.lock_pi(locked, current_task, addr, read_timeout(current_task)?)?;
644            Ok(0)
645        }
646        FUTEX_TRYLOCK_PI => {
647            track_stub!(TODO("https://fxbug.dev/361175318"), "FUTEX_TRYLOCK_PI");
648            error!(ENOSYS)
649        }
650        FUTEX_UNLOCK_PI => {
651            futexes.unlock_pi(locked, current_task, addr)?;
652            Ok(0)
653        }
654        _ => {
655            track_stub!(TODO("https://fxbug.dev/322875124"), "futex unknown command", cmd);
656            error!(ENOSYS)
657        }
658    }
659}
660
661fn do_futex_wait_with_restart<Key: FutexKey>(
662    locked: &mut Locked<Unlocked>,
663    current_task: &mut CurrentTask,
664    addr: UserAddress,
665    value: u32,
666    mask: u32,
667    deadline: TargetTime,
668) -> Result<(), Errno> {
669    let futexes = Key::get_table_from_task(current_task)?;
670    let result = match deadline {
671        TargetTime::Monotonic(mono_deadline) => {
672            futexes.wait(locked, current_task, addr, value, mask, mono_deadline)
673        }
674        TargetTime::BootInstant(boot_deadline) => {
675            let timer_slack = current_task.read().get_timerslack();
676            futexes.wait_boot(locked, current_task, addr, value, mask, boot_deadline, timer_slack)
677        }
678        TargetTime::RealTime(utc_deadline) => {
679            // We convert real time deadlines to boot time deadlines since we cannot wait using a UTC deadline.
680            let (boot_deadline, _) = estimate_boot_deadline_from_utc(utc_deadline);
681            let timer_slack = current_task.read().get_timerslack();
682            futexes.wait_boot(locked, current_task, addr, value, mask, boot_deadline, timer_slack)
683        }
684    };
685    match result {
686        Err(err) if err == EINTR => {
687            current_task.set_syscall_restart_func(move |locked, current_task| {
688                do_futex_wait_with_restart::<Key>(locked, current_task, addr, value, mask, deadline)
689            });
690            error!(ERESTART_RESTARTBLOCK)
691        }
692        result => result,
693    }
694}
695
696pub fn sys_get_robust_list(
697    _locked: &mut Locked<Unlocked>,
698    current_task: &CurrentTask,
699    tid: tid_t,
700    user_head_ptr: UserRef<UserAddress>,
701    user_len_ptr: UserRef<usize>,
702) -> Result<(), Errno> {
703    if tid < 0 {
704        return error!(EINVAL);
705    }
706    if user_head_ptr.is_null() || user_len_ptr.is_null() {
707        return error!(EFAULT);
708    }
709    if tid != 0 {
710        security::check_task_capable(current_task, CAP_SYS_PTRACE)?;
711    }
712    let task = if tid == 0 { current_task.weak_task() } else { current_task.get_task(tid) };
713    let task = Task::from_weak(&task)?;
714    current_task.write_object(user_head_ptr, &task.read().robust_list_head.addr())?;
715    current_task.write_object(user_len_ptr, &std::mem::size_of::<robust_list_head>())?;
716    Ok(())
717}
718
719pub fn sys_set_robust_list(
720    _locked: &mut Locked<Unlocked>,
721    current_task: &CurrentTask,
722    user_head: UserRef<robust_list_head>,
723    len: usize,
724) -> Result<(), Errno> {
725    if len != std::mem::size_of::<robust_list_head>() {
726        return error!(EINVAL);
727    }
728    current_task.write().robust_list_head = user_head.into();
729    Ok(())
730}
731
732pub fn sys_mlock(
733    locked: &mut Locked<Unlocked>,
734    current_task: &CurrentTask,
735    addr: UserAddress,
736    length: usize,
737) -> Result<(), Errno> {
738    // If flags is 0, mlock2() behaves exactly the same as mlock().
739    sys_mlock2(locked, current_task, addr, length, 0)
740}
741
742pub fn sys_mlock2(
743    locked: &mut Locked<Unlocked>,
744    current_task: &CurrentTask,
745    addr: UserAddress,
746    length: usize,
747    flags: u64,
748) -> Result<(), Errno> {
749    const KNOWN_FLAGS: u64 = MLOCK_ONFAULT as u64;
750    if (flags & !KNOWN_FLAGS) != 0 {
751        return error!(EINVAL);
752    }
753    let on_fault = flags & MLOCK_ONFAULT as u64 != 0;
754    current_task.mm()?.mlock(current_task, locked, addr, length, on_fault)
755}
756
757pub fn sys_munlock(
758    _locked: &mut Locked<Unlocked>,
759    current_task: &CurrentTask,
760    addr: UserAddress,
761    length: usize,
762) -> Result<(), Errno> {
763    current_task.mm()?.munlock(current_task, addr, length)
764}
765
766pub fn sys_mlockall(
767    _locked: &mut Locked<Unlocked>,
768    _current_task: &CurrentTask,
769    _flags: u64,
770) -> Result<(), Errno> {
771    track_stub!(TODO("https://fxbug.dev/297292097"), "mlockall()");
772    error!(ENOSYS)
773}
774
775pub fn sys_munlockall(
776    _locked: &mut Locked<Unlocked>,
777    _current_task: &CurrentTask,
778    _flags: u64,
779) -> Result<(), Errno> {
780    track_stub!(TODO("https://fxbug.dev/297292097"), "munlockall()");
781    error!(ENOSYS)
782}
783
784pub fn sys_mincore(
785    _locked: &mut Locked<Unlocked>,
786    _current_task: &CurrentTask,
787    _addr: UserAddress,
788    _length: usize,
789    _out: UserRef<u8>,
790) -> Result<(), Errno> {
791    track_stub!(TODO("https://fxbug.dev/297372240"), "mincore()");
792    error!(ENOSYS)
793}
794
795// Syscalls for arch32 usage
796#[cfg(target_arch = "aarch64")]
797mod arch32 {
798    use crate::mm::PAGE_SIZE;
799    use crate::mm::syscalls::{UserAddress, sys_mmap};
800    use crate::task::{CurrentTask, RobustListHeadPtr};
801    use crate::vfs::FdNumber;
802    use starnix_sync::{Locked, Unlocked};
803    use starnix_uapi::errors::Errno;
804    use starnix_uapi::user_address::UserRef;
805    use starnix_uapi::{error, uapi};
806
807    pub fn sys_arch32_set_robust_list(
808        _locked: &mut Locked<Unlocked>,
809        current_task: &CurrentTask,
810        user_head: UserRef<uapi::arch32::robust_list_head>,
811        len: usize,
812    ) -> Result<(), Errno> {
813        if len != std::mem::size_of::<uapi::arch32::robust_list_head>() {
814            return error!(EINVAL);
815        }
816        current_task.write().robust_list_head = RobustListHeadPtr::from_32(user_head);
817        Ok(())
818    }
819
820    pub fn sys_arch32_mmap2(
821        locked: &mut Locked<Unlocked>,
822        current_task: &mut CurrentTask,
823        addr: UserAddress,
824        length: usize,
825        prot: u32,
826        flags: u32,
827        fd: FdNumber,
828        offset: u64,
829    ) -> Result<UserAddress, Errno> {
830        sys_mmap(locked, current_task, addr, length, prot, flags, fd, offset * *PAGE_SIZE)
831    }
832
833    pub fn sys_arch32_munmap(
834        _locked: &mut Locked<Unlocked>,
835        current_task: &CurrentTask,
836        addr: UserAddress,
837        length: usize,
838    ) -> Result<(), Errno> {
839        if !addr.is_lower_32bit() || length >= (1 << 32) {
840            return error!(EINVAL);
841        }
842        current_task.mm()?.unmap(addr, length)?;
843        Ok(())
844    }
845
846    pub use super::{
847        sys_futex as sys_arch32_futex, sys_madvise as sys_arch32_madvise,
848        sys_membarrier as sys_arch32_membarrier, sys_mincore as sys_arch32_mincore,
849        sys_mlock as sys_arch32_mlock, sys_mlock2 as sys_arch32_mlock2,
850        sys_mlockall as sys_arch32_mlockall, sys_mremap as sys_arch32_mremap,
851        sys_msync as sys_arch32_msync, sys_munlock as sys_arch32_munlock,
852        sys_munlockall as sys_arch32_munlockall,
853        sys_process_mrelease as sys_arch32_process_mrelease,
854        sys_process_vm_readv as sys_arch32_process_vm_readv,
855        sys_userfaultfd as sys_arch32_userfaultfd,
856    };
857}
858
859#[cfg(target_arch = "aarch64")]
860pub use arch32::*;
861
862#[cfg(test)]
863mod tests {
864    use super::*;
865    use crate::mm::memory::MemoryObject;
866    use crate::testing::*;
867    use starnix_uapi::errors::EEXIST;
868    use starnix_uapi::file_mode::Access;
869    use starnix_uapi::{MREMAP_FIXED, MREMAP_MAYMOVE, PROT_READ};
870
871    #[::fuchsia::test]
872    async fn test_mmap_with_colliding_hint() {
873        spawn_kernel_and_run(async |locked, current_task| {
874            let page_size = *PAGE_SIZE;
875
876            let mapped_address =
877                map_memory(locked, &current_task, UserAddress::default(), page_size);
878            match do_mmap(
879                locked,
880                &current_task,
881                mapped_address,
882                page_size as usize,
883                PROT_READ,
884                MAP_PRIVATE | MAP_ANONYMOUS,
885                FdNumber::from_raw(-1),
886                0,
887            ) {
888                Ok(address) => {
889                    assert_ne!(address, mapped_address);
890                }
891                error => {
892                    panic!("mmap with colliding hint failed: {error:?}");
893                }
894            }
895        })
896        .await;
897    }
898
899    #[::fuchsia::test]
900    async fn test_mmap_with_fixed_collision() {
901        spawn_kernel_and_run(async |locked, current_task| {
902            let page_size = *PAGE_SIZE;
903
904            let mapped_address =
905                map_memory(locked, &current_task, UserAddress::default(), page_size);
906            match do_mmap(
907                locked,
908                &current_task,
909                mapped_address,
910                page_size as usize,
911                PROT_READ,
912                MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
913                FdNumber::from_raw(-1),
914                0,
915            ) {
916                Ok(address) => {
917                    assert_eq!(address, mapped_address);
918                }
919                error => {
920                    panic!("mmap with fixed collision failed: {error:?}");
921                }
922            }
923        })
924        .await;
925    }
926
927    #[::fuchsia::test]
928    async fn test_mmap_with_fixed_noreplace_collision() {
929        spawn_kernel_and_run(async |locked, current_task| {
930            let page_size = *PAGE_SIZE;
931
932            let mapped_address =
933                map_memory(locked, &current_task, UserAddress::default(), page_size);
934            match do_mmap(
935                locked,
936                &current_task,
937                mapped_address,
938                page_size as usize,
939                PROT_READ,
940                MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE,
941                FdNumber::from_raw(-1),
942                0,
943            ) {
944                Err(errno) => {
945                    assert_eq!(errno, EEXIST);
946                }
947                result => {
948                    panic!("mmap with fixed_noreplace collision failed: {result:?}");
949                }
950            }
951        })
952        .await;
953    }
954
955    /// It is ok to call munmap with an address that is a multiple of the page size, and
956    /// a non-zero length.
957    #[::fuchsia::test]
958    async fn test_munmap() {
959        spawn_kernel_and_run(async |locked, current_task| {
960            let mapped_address =
961                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
962            assert_eq!(
963                sys_munmap(locked, &current_task, mapped_address, *PAGE_SIZE as usize),
964                Ok(())
965            );
966
967            // Verify that the memory is no longer readable.
968            assert_eq!(current_task.read_memory_to_array::<5>(mapped_address), error!(EFAULT));
969        })
970        .await;
971    }
972
973    /// It is ok to call munmap on an unmapped range.
974    #[::fuchsia::test]
975    async fn test_munmap_not_mapped() {
976        spawn_kernel_and_run(async |locked, current_task| {
977            let mapped_address =
978                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
979            assert_eq!(
980                sys_munmap(locked, &current_task, mapped_address, *PAGE_SIZE as usize),
981                Ok(())
982            );
983            assert_eq!(
984                sys_munmap(locked, &current_task, mapped_address, *PAGE_SIZE as usize),
985                Ok(())
986            );
987        })
988        .await;
989    }
990
991    /// It is an error to call munmap with a length of 0.
992    #[::fuchsia::test]
993    async fn test_munmap_0_length() {
994        spawn_kernel_and_run(async |locked, current_task| {
995            let mapped_address =
996                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
997            assert_eq!(sys_munmap(locked, &current_task, mapped_address, 0), error!(EINVAL));
998        })
999        .await;
1000    }
1001
1002    /// It is an error to call munmap with an address that is not a multiple of the page size.
1003    #[::fuchsia::test]
1004    async fn test_munmap_not_aligned() {
1005        spawn_kernel_and_run(async |locked, current_task| {
1006            let mapped_address =
1007                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
1008            assert_eq!(
1009                sys_munmap(
1010                    locked,
1011                    &current_task,
1012                    (mapped_address + 1u64).unwrap(),
1013                    *PAGE_SIZE as usize
1014                ),
1015                error!(EINVAL)
1016            );
1017
1018            // Verify that the memory is still readable.
1019            assert!(current_task.read_memory_to_array::<5>(mapped_address).is_ok());
1020        })
1021        .await;
1022    }
1023
1024    /// The entire page should be unmapped, not just the range [address, address + length).
1025    #[::fuchsia::test]
1026    async fn test_munmap_unmap_partial() {
1027        spawn_kernel_and_run(async |locked, current_task| {
1028            let mapped_address =
1029                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
1030            assert_eq!(
1031                sys_munmap(locked, &current_task, mapped_address, (*PAGE_SIZE as usize) / 2),
1032                Ok(())
1033            );
1034
1035            // Verify that memory can't be read in either half of the page.
1036            assert_eq!(current_task.read_memory_to_array::<5>(mapped_address), error!(EFAULT));
1037            assert_eq!(
1038                current_task
1039                    .read_memory_to_array::<5>((mapped_address + (*PAGE_SIZE - 2)).unwrap()),
1040                error!(EFAULT)
1041            );
1042        })
1043        .await;
1044    }
1045
1046    /// All pages that intersect the munmap range should be unmapped.
1047    #[::fuchsia::test]
1048    async fn test_munmap_multiple_pages() {
1049        spawn_kernel_and_run(async |locked, current_task| {
1050            let mapped_address =
1051                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
1052            assert_eq!(
1053                sys_munmap(locked, &current_task, mapped_address, (*PAGE_SIZE as usize) + 1),
1054                Ok(())
1055            );
1056
1057            // Verify that neither page is readable.
1058            assert_eq!(current_task.read_memory_to_array::<5>(mapped_address), error!(EFAULT));
1059            assert_eq!(
1060                current_task
1061                    .read_memory_to_array::<5>((mapped_address + (*PAGE_SIZE + 1u64)).unwrap()),
1062                error!(EFAULT)
1063            );
1064        })
1065        .await;
1066    }
1067
1068    /// Only the pages that intersect the munmap range should be unmapped.
1069    #[::fuchsia::test]
1070    async fn test_munmap_one_of_many_pages() {
1071        spawn_kernel_and_run(async |locked, current_task| {
1072            let mapped_address =
1073                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
1074            assert_eq!(
1075                sys_munmap(locked, &current_task, mapped_address, (*PAGE_SIZE as usize) - 1),
1076                Ok(())
1077            );
1078
1079            // Verify that the second page is still readable.
1080            assert_eq!(current_task.read_memory_to_array::<5>(mapped_address), error!(EFAULT));
1081            assert!(
1082                current_task
1083                    .read_memory_to_array::<5>((mapped_address + (*PAGE_SIZE + 1u64)).unwrap())
1084                    .is_ok()
1085            );
1086        })
1087        .await;
1088    }
1089
1090    /// Unmap the middle page of a mapping.
1091    #[::fuchsia::test]
1092    async fn test_munmap_middle_page() {
1093        spawn_kernel_and_run(async |locked, current_task| {
1094            let mapped_address =
1095                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1096            assert_eq!(
1097                sys_munmap(
1098                    locked,
1099                    &current_task,
1100                    (mapped_address + *PAGE_SIZE).unwrap(),
1101                    *PAGE_SIZE as usize
1102                ),
1103                Ok(())
1104            );
1105
1106            // Verify that the first and third pages are still readable.
1107            assert!(current_task.read_memory_to_vec(mapped_address, 5).is_ok());
1108            assert_eq!(
1109                current_task.read_memory_to_vec((mapped_address + *PAGE_SIZE).unwrap(), 5),
1110                error!(EFAULT)
1111            );
1112            assert!(
1113                current_task
1114                    .read_memory_to_vec((mapped_address + (*PAGE_SIZE * 2)).unwrap(), 5)
1115                    .is_ok()
1116            );
1117        })
1118        .await;
1119    }
1120
1121    /// Unmap a range of pages that includes disjoint mappings.
1122    #[::fuchsia::test]
1123    async fn test_munmap_many_mappings() {
1124        spawn_kernel_and_run(async |locked, current_task| {
1125            let mapped_addresses: Vec<_> = std::iter::repeat_with(|| {
1126                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE)
1127            })
1128            .take(3)
1129            .collect();
1130            let min_address = *mapped_addresses.iter().min().unwrap();
1131            let max_address = *mapped_addresses.iter().max().unwrap();
1132            let unmap_length = (max_address - min_address) + *PAGE_SIZE as usize;
1133
1134            assert_eq!(sys_munmap(locked, &current_task, min_address, unmap_length), Ok(()));
1135
1136            // Verify that none of the mapped pages are readable.
1137            for mapped_address in mapped_addresses {
1138                assert_eq!(current_task.read_memory_to_vec(mapped_address, 5), error!(EFAULT));
1139            }
1140        })
1141        .await;
1142    }
1143
1144    #[::fuchsia::test]
1145    async fn test_msync_validates_address_range() {
1146        spawn_kernel_and_run(async |locked, current_task| {
1147            // Map 3 pages and test that ranges covering these pages return no error.
1148            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1149            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 3, 0), Ok(()));
1150            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 2, 0), Ok(()));
1151            assert_eq!(
1152                sys_msync(
1153                    locked,
1154                    &current_task,
1155                    (addr + *PAGE_SIZE).unwrap(),
1156                    *PAGE_SIZE as usize * 2,
1157                    0
1158                ),
1159                Ok(())
1160            );
1161
1162            // Unmap the middle page and test that ranges covering that page return ENOMEM.
1163            sys_munmap(locked, &current_task, (addr + *PAGE_SIZE).unwrap(), *PAGE_SIZE as usize)
1164                .expect("unmap middle");
1165            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize, 0), Ok(()));
1166            assert_eq!(
1167                sys_msync(
1168                    locked,
1169                    &current_task,
1170                    addr,
1171                    *PAGE_SIZE as usize * 3,
1172                    starnix_uapi::MS_SYNC
1173                ),
1174                error!(ENOMEM)
1175            );
1176            assert_eq!(
1177                sys_msync(
1178                    locked,
1179                    &current_task,
1180                    addr,
1181                    *PAGE_SIZE as usize * 2,
1182                    starnix_uapi::MS_SYNC
1183                ),
1184                error!(ENOMEM)
1185            );
1186            assert_eq!(
1187                sys_msync(
1188                    locked,
1189                    &current_task,
1190                    (addr + *PAGE_SIZE).unwrap(),
1191                    *PAGE_SIZE as usize * 2,
1192                    starnix_uapi::MS_SYNC
1193                ),
1194                error!(ENOMEM)
1195            );
1196            assert_eq!(
1197                sys_msync(
1198                    locked,
1199                    &current_task,
1200                    (addr + (*PAGE_SIZE * 2)).unwrap(),
1201                    *PAGE_SIZE as usize,
1202                    0
1203                ),
1204                Ok(())
1205            );
1206
1207            // Map the middle page back and test that ranges covering the three pages
1208            // (spanning multiple ranges) return no error.
1209            assert_eq!(
1210                map_memory(locked, &current_task, (addr + *PAGE_SIZE).unwrap(), *PAGE_SIZE),
1211                (addr + *PAGE_SIZE).unwrap()
1212            );
1213            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 3, 0), Ok(()));
1214            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 2, 0), Ok(()));
1215            assert_eq!(
1216                sys_msync(
1217                    locked,
1218                    &current_task,
1219                    (addr + *PAGE_SIZE).unwrap(),
1220                    *PAGE_SIZE as usize * 2,
1221                    0
1222                ),
1223                Ok(())
1224            );
1225        })
1226        .await;
1227    }
1228
1229    /// Shrinks an entire range.
1230    #[::fuchsia::test]
1231    async fn test_mremap_shrink_whole_range_from_end() {
1232        spawn_kernel_and_run(async |locked, current_task| {
1233            // Map 2 pages.
1234            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
1235            fill_page(&current_task, addr, 'a');
1236            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1237
1238            // Shrink the mapping from 2 to 1 pages.
1239            assert_eq!(
1240                remap_memory(
1241                    locked,
1242                    &current_task,
1243                    addr,
1244                    *PAGE_SIZE * 2,
1245                    *PAGE_SIZE,
1246                    0,
1247                    UserAddress::default()
1248                ),
1249                Ok(addr)
1250            );
1251
1252            check_page_eq(&current_task, addr, 'a');
1253            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1254        })
1255        .await;
1256    }
1257
1258    /// Shrinks part of a range, introducing a hole in the middle.
1259    #[::fuchsia::test]
1260    async fn test_mremap_shrink_partial_range() {
1261        spawn_kernel_and_run(async |locked, current_task| {
1262            // Map 3 pages.
1263            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1264            fill_page(&current_task, addr, 'a');
1265            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1266            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1267
1268            // Shrink the first 2 pages down to 1, creating a hole.
1269            assert_eq!(
1270                remap_memory(
1271                    locked,
1272                    &current_task,
1273                    addr,
1274                    *PAGE_SIZE * 2,
1275                    *PAGE_SIZE,
1276                    0,
1277                    UserAddress::default()
1278                ),
1279                Ok(addr)
1280            );
1281
1282            check_page_eq(&current_task, addr, 'a');
1283            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1284            check_page_eq(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1285        })
1286        .await;
1287    }
1288
1289    /// Shrinking doesn't care if the range specified spans multiple mappings.
1290    #[::fuchsia::test]
1291    async fn test_mremap_shrink_across_ranges() {
1292        spawn_kernel_and_run(async |locked, current_task| {
1293            // Map 3 pages, unmap the middle, then map the middle again. This will leave us with
1294            // 3 contiguous mappings.
1295            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1296            assert_eq!(
1297                sys_munmap(
1298                    locked,
1299                    &current_task,
1300                    (addr + *PAGE_SIZE).unwrap(),
1301                    *PAGE_SIZE as usize
1302                ),
1303                Ok(())
1304            );
1305            assert_eq!(
1306                map_memory(locked, &current_task, (addr + *PAGE_SIZE).unwrap(), *PAGE_SIZE),
1307                (addr + *PAGE_SIZE).unwrap()
1308            );
1309
1310            fill_page(&current_task, addr, 'a');
1311            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1312            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1313
1314            // Remap over all three mappings, shrinking to 1 page.
1315            assert_eq!(
1316                remap_memory(
1317                    locked,
1318                    &current_task,
1319                    addr,
1320                    *PAGE_SIZE * 3,
1321                    *PAGE_SIZE,
1322                    0,
1323                    UserAddress::default()
1324                ),
1325                Ok(addr)
1326            );
1327
1328            check_page_eq(&current_task, addr, 'a');
1329            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1330            check_unmapped(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap());
1331        })
1332        .await;
1333    }
1334
1335    /// Grows a mapping in-place.
1336    #[::fuchsia::test]
1337    async fn test_mremap_grow_in_place() {
1338        spawn_kernel_and_run(async |locked, current_task| {
1339            // Map 3 pages, unmap the middle, leaving a hole.
1340            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1341            fill_page(&current_task, addr, 'a');
1342            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1343            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1344            assert_eq!(
1345                sys_munmap(
1346                    locked,
1347                    &current_task,
1348                    (addr + *PAGE_SIZE).unwrap(),
1349                    *PAGE_SIZE as usize
1350                ),
1351                Ok(())
1352            );
1353
1354            // Grow the first page in-place into the middle.
1355            assert_eq!(
1356                remap_memory(
1357                    locked,
1358                    &current_task,
1359                    addr,
1360                    *PAGE_SIZE,
1361                    *PAGE_SIZE * 2,
1362                    0,
1363                    UserAddress::default()
1364                ),
1365                Ok(addr)
1366            );
1367
1368            check_page_eq(&current_task, addr, 'a');
1369
1370            // The middle page should be new, and not just pointing to the original middle page filled
1371            // with 'b'.
1372            check_page_ne(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1373
1374            check_page_eq(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1375        })
1376        .await;
1377    }
1378
1379    /// Tries to grow a set of pages that cannot fit, and forces a move.
1380    #[::fuchsia::test]
1381    async fn test_mremap_grow_maymove() {
1382        spawn_kernel_and_run(async |locked, current_task| {
1383            // Map 3 pages.
1384            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1385            fill_page(&current_task, addr, 'a');
1386            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1387            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1388
1389            // Grow the first two pages by 1, forcing a move.
1390            let new_addr = remap_memory(
1391                locked,
1392                &current_task,
1393                addr,
1394                *PAGE_SIZE * 2,
1395                *PAGE_SIZE * 3,
1396                MREMAP_MAYMOVE,
1397                UserAddress::default(),
1398            )
1399            .expect("failed to mremap");
1400
1401            assert_ne!(new_addr, addr, "mremap did not move the mapping");
1402
1403            // The first two pages should have been moved.
1404            check_unmapped(&current_task, addr);
1405            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1406
1407            // The third page should still be present.
1408            check_page_eq(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1409
1410            // The moved pages should have the same contents.
1411            check_page_eq(&current_task, new_addr, 'a');
1412            check_page_eq(&current_task, (new_addr + *PAGE_SIZE).unwrap(), 'b');
1413
1414            // The newly grown page should not be the same as the original third page.
1415            check_page_ne(&current_task, (new_addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1416        })
1417        .await;
1418    }
1419
1420    /// Shrinks a set of pages and move them to a fixed location.
1421    #[::fuchsia::test]
1422    async fn test_mremap_shrink_fixed() {
1423        spawn_kernel_and_run(async |locked, current_task| {
1424            // Map 2 pages which will act as the destination.
1425            let dst_addr =
1426                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
1427            fill_page(&current_task, dst_addr, 'y');
1428            fill_page(&current_task, (dst_addr + *PAGE_SIZE).unwrap(), 'z');
1429
1430            // Map 3 pages.
1431            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1432            fill_page(&current_task, addr, 'a');
1433            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1434            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1435
1436            // Shrink the first two pages and move them to overwrite the mappings at `dst_addr`.
1437            let new_addr = remap_memory(
1438                locked,
1439                &current_task,
1440                addr,
1441                *PAGE_SIZE * 2,
1442                *PAGE_SIZE,
1443                MREMAP_MAYMOVE | MREMAP_FIXED,
1444                dst_addr,
1445            )
1446            .expect("failed to mremap");
1447
1448            assert_eq!(new_addr, dst_addr, "mremap did not move the mapping");
1449
1450            // The first two pages should have been moved.
1451            check_unmapped(&current_task, addr);
1452            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1453
1454            // The third page should still be present.
1455            check_page_eq(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1456
1457            // The first moved page should have the same contents.
1458            check_page_eq(&current_task, new_addr, 'a');
1459
1460            // The second page should be part of the original dst mapping.
1461            check_page_eq(&current_task, (new_addr + *PAGE_SIZE).unwrap(), 'z');
1462        })
1463        .await;
1464    }
1465
1466    /// Clobbers the middle of an existing mapping with mremap to a fixed location.
1467    #[::fuchsia::test]
1468    async fn test_mremap_clobber_memory_mapping() {
1469        spawn_kernel_and_run(async |locked, current_task| {
1470            let dst_memory = MemoryObject::from(zx::Vmo::create(2 * *PAGE_SIZE).unwrap());
1471            dst_memory.write(&['x' as u8].repeat(*PAGE_SIZE as usize), 0).unwrap();
1472            dst_memory.write(&['y' as u8].repeat(*PAGE_SIZE as usize), *PAGE_SIZE).unwrap();
1473
1474            let dst_addr = current_task
1475                .mm()
1476                .unwrap()
1477                .map_memory(
1478                    DesiredAddress::Any,
1479                    dst_memory.into(),
1480                    0,
1481                    2 * (*PAGE_SIZE as usize),
1482                    ProtectionFlags::READ,
1483                    Access::rwx(),
1484                    MappingOptions::empty(),
1485                    MappingName::None,
1486                )
1487                .unwrap();
1488
1489            // Map 3 pages.
1490            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1491            fill_page(&current_task, addr, 'a');
1492            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1493            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1494
1495            // Overwrite the second page of the mapping with the second page of the anonymous mapping.
1496            let remapped_addr = sys_mremap(
1497                locked,
1498                &*current_task,
1499                (addr + *PAGE_SIZE).unwrap(),
1500                *PAGE_SIZE as usize,
1501                *PAGE_SIZE as usize,
1502                MREMAP_FIXED | MREMAP_MAYMOVE,
1503                (dst_addr + *PAGE_SIZE).unwrap(),
1504            )
1505            .unwrap();
1506
1507            assert_eq!(remapped_addr, (dst_addr + *PAGE_SIZE).unwrap());
1508
1509            check_page_eq(&current_task, addr, 'a');
1510            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1511            check_page_eq(&current_task, (addr + (2 * *PAGE_SIZE)).unwrap(), 'c');
1512
1513            check_page_eq(&current_task, dst_addr, 'x');
1514            check_page_eq(&current_task, (dst_addr + *PAGE_SIZE).unwrap(), 'b');
1515        })
1516        .await;
1517    }
1518
1519    #[cfg(target_arch = "x86_64")]
1520    #[::fuchsia::test]
1521    async fn test_map_32_bit() {
1522        use starnix_uapi::PROT_WRITE;
1523
1524        spawn_kernel_and_run(async |locked, current_task| {
1525            let page_size = *PAGE_SIZE;
1526
1527            for _i in 0..256 {
1528                match do_mmap(
1529                    locked,
1530                    &current_task,
1531                    UserAddress::from(0),
1532                    page_size as usize,
1533                    PROT_READ | PROT_WRITE,
1534                    MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT,
1535                    FdNumber::from_raw(-1),
1536                    0,
1537                ) {
1538                    Ok(address) => {
1539                        let memory_end = address.ptr() + page_size as usize;
1540                        assert!(memory_end <= 0x80000000);
1541                    }
1542                    error => {
1543                        panic!("mmap with MAP_32BIT failed: {error:?}");
1544                    }
1545                }
1546            }
1547        })
1548        .await;
1549    }
1550}