Skip to main content

starnix_core/mm/
syscalls.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::barrier::{BarrierType, system_barrier};
6use crate::mm::debugger::notify_debugger_of_module_list;
7use crate::mm::{
8    DesiredAddress, FutexKey, IOVecPtr, MappingName, MappingOptions, MembarrierType,
9    MemoryAccessorExt, MremapFlags, MsyncFlags, PAGE_SIZE, PrivateFutexKey, ProtectionFlags,
10    SharedFutexKey,
11};
12use crate::security;
13use crate::syscalls::time::TimeSpecPtr;
14use crate::task::{CurrentTask, Task};
15use crate::time::TargetTime;
16use crate::time::utc::estimate_boot_deadline_from_utc;
17use crate::vfs::buffers::{OutputBuffer, UserBuffersInputBuffer, UserBuffersOutputBuffer};
18use crate::vfs::{FdFlags, FdNumber, UserFaultFile};
19use fuchsia_runtime::UtcTimeline;
20use linux_uapi::MLOCK_ONFAULT;
21use starnix_logging::{CATEGORY_STARNIX_MM, log_trace, trace_duration, track_stub};
22use starnix_sync::{FileOpsCore, LockEqualOrBefore, Locked, Unlocked};
23use starnix_syscalls::SyscallArg;
24use starnix_types::time::{duration_from_timespec, time_from_timespec, timespec_from_time};
25use starnix_uapi::auth::{CAP_SYS_PTRACE, PTRACE_MODE_ATTACH_REALCREDS};
26use starnix_uapi::errors::{EINTR, Errno};
27use starnix_uapi::open_flags::OpenFlags;
28use starnix_uapi::user_address::{UserAddress, UserRef};
29use starnix_uapi::user_value::UserValue;
30use starnix_uapi::{
31    FUTEX_BITSET_MATCH_ANY, FUTEX_CLOCK_REALTIME, FUTEX_CMD_MASK, FUTEX_CMP_REQUEUE,
32    FUTEX_CMP_REQUEUE_PI, FUTEX_LOCK_PI, FUTEX_LOCK_PI2, FUTEX_PRIVATE_FLAG, FUTEX_REQUEUE,
33    FUTEX_TRYLOCK_PI, FUTEX_UNLOCK_PI, FUTEX_WAIT, FUTEX_WAIT_BITSET, FUTEX_WAIT_REQUEUE_PI,
34    FUTEX_WAKE, FUTEX_WAKE_BITSET, FUTEX_WAKE_OP, MAP_ANONYMOUS, MAP_DENYWRITE, MAP_FIXED,
35    MAP_FIXED_NOREPLACE, MAP_GROWSDOWN, MAP_LOCKED, MAP_NORESERVE, MAP_POPULATE, MAP_PRIVATE,
36    MAP_SHARED, MAP_SHARED_VALIDATE, MAP_STACK, O_CLOEXEC, O_NONBLOCK, PROT_EXEC,
37    UFFD_USER_MODE_ONLY, errno, error, robust_list_head, tid_t, uapi,
38};
39use std::ops::Deref as _;
40use zx;
41
42#[cfg(target_arch = "x86_64")]
43use starnix_uapi::MAP_32BIT;
44
45// Returns any platform-specific mmap flags. This is a separate function because as of this writing
46// "attributes on expressions are experimental."
47#[cfg(target_arch = "x86_64")]
48fn get_valid_platform_mmap_flags() -> u32 {
49    MAP_32BIT
50}
51#[cfg(not(target_arch = "x86_64"))]
52fn get_valid_platform_mmap_flags() -> u32 {
53    0
54}
55
56/// sys_mmap takes a mutable reference to current_task because it may modify the IP register.
57pub fn sys_mmap(
58    locked: &mut Locked<Unlocked>,
59    current_task: &mut CurrentTask,
60    addr: UserAddress,
61    length: usize,
62    prot: u32,
63    flags: u32,
64    fd: FdNumber,
65    offset: u64,
66) -> Result<UserAddress, Errno> {
67    let user_address = do_mmap(locked, current_task, addr, length, prot, flags, fd, offset)?;
68    if prot & PROT_EXEC != 0 {
69        // Possibly loads a new module. Notify debugger for the change.
70        // We only care about dynamic linker loading modules for now, which uses mmap. In the future
71        // we might want to support unloading modules in munmap or JIT compilation in mprotect.
72        notify_debugger_of_module_list(current_task)?;
73    }
74    Ok(user_address)
75}
76
77pub fn do_mmap<L>(
78    locked: &mut Locked<L>,
79    current_task: &CurrentTask,
80    addr: UserAddress,
81    length: usize,
82    prot: u32,
83    flags: u32,
84    fd: FdNumber,
85    offset: u64,
86) -> Result<UserAddress, Errno>
87where
88    L: LockEqualOrBefore<FileOpsCore>,
89{
90    let prot_flags = ProtectionFlags::from_access_bits(prot).ok_or_else(|| {
91        track_stub!(TODO("https://fxbug.dev/322874211"), "mmap parse protection", prot);
92        errno!(EINVAL)
93    })?;
94
95    let valid_flags: u32 = get_valid_platform_mmap_flags()
96        | MAP_PRIVATE
97        | MAP_SHARED
98        | MAP_SHARED_VALIDATE
99        | MAP_ANONYMOUS
100        | MAP_FIXED
101        | MAP_FIXED_NOREPLACE
102        | MAP_POPULATE
103        | MAP_NORESERVE
104        | MAP_STACK
105        | MAP_DENYWRITE
106        | MAP_GROWSDOWN
107        | MAP_LOCKED;
108    if flags & !valid_flags != 0 {
109        if flags & MAP_SHARED_VALIDATE != 0 {
110            return error!(EOPNOTSUPP);
111        }
112        track_stub!(TODO("https://fxbug.dev/322873638"), "mmap check flags", flags);
113        return error!(EINVAL);
114    }
115
116    let file =
117        if flags & MAP_ANONYMOUS != 0 { None } else { Some(current_task.get_file(fd)?) };
118    if flags & (MAP_PRIVATE | MAP_SHARED) == 0
119        || flags & (MAP_PRIVATE | MAP_SHARED) == MAP_PRIVATE | MAP_SHARED
120    {
121        return error!(EINVAL);
122    }
123    if length == 0 {
124        return error!(EINVAL);
125    }
126    if offset % *PAGE_SIZE != 0 {
127        return error!(EINVAL);
128    }
129
130    // TODO(tbodt): should we consider MAP_NORESERVE?
131
132    let addr = match (addr, flags & MAP_FIXED != 0, flags & MAP_FIXED_NOREPLACE != 0) {
133        (UserAddress::NULL, false, false) => DesiredAddress::Any,
134        (UserAddress::NULL, true, _) | (UserAddress::NULL, _, true) => return error!(EINVAL),
135        (addr, false, false) => DesiredAddress::Hint(addr),
136        (addr, _, true) => DesiredAddress::Fixed(addr),
137        (addr, true, false) => DesiredAddress::FixedOverwrite(addr),
138    };
139
140    let memory_offset = if flags & MAP_ANONYMOUS != 0 { 0 } else { offset };
141
142    let mut options = MappingOptions::empty();
143    if flags & MAP_SHARED != 0 {
144        options |= MappingOptions::SHARED;
145    }
146    if flags & MAP_ANONYMOUS != 0 {
147        options |= MappingOptions::ANONYMOUS;
148    }
149    #[cfg(target_arch = "x86_64")]
150    if flags & MAP_FIXED == 0 && flags & MAP_32BIT != 0 {
151        options |= MappingOptions::LOWER_32BIT;
152    }
153    if flags & MAP_GROWSDOWN != 0 {
154        options |= MappingOptions::GROWSDOWN;
155    }
156    if flags & MAP_POPULATE != 0 {
157        options |= MappingOptions::POPULATE;
158    }
159    if flags & MAP_LOCKED != 0 {
160        // The kernel isn't expected to return an error if locking fails with this flag, so for now
161        // this implementation will always fail to lock memory even if mapping succeeds.
162        track_stub!(TODO("https://fxbug.dev/406377606"), "MAP_LOCKED");
163    }
164
165    security::mmap_file(current_task, file.as_ref(), prot_flags, options)?;
166
167    if flags & MAP_ANONYMOUS != 0 {
168        trace_duration!(CATEGORY_STARNIX_MM, "AnonymousMmap");
169        current_task.mm()?.map_anonymous(addr, length, prot_flags, options, MappingName::None)
170    } else {
171        trace_duration!(CATEGORY_STARNIX_MM, "FileBackedMmap");
172        // TODO(tbodt): maximize protection flags so that mprotect works
173        let file = file.expect("file retrieved above for file-backed mapping");
174        file.mmap(
175            locked,
176            current_task,
177            addr,
178            memory_offset,
179            length,
180            prot_flags,
181            options,
182            file.name.to_passive(),
183        )
184    }
185}
186
187pub fn sys_mprotect(
188    _locked: &mut Locked<Unlocked>,
189    current_task: &CurrentTask,
190    addr: UserAddress,
191    length: usize,
192    prot: u32,
193) -> Result<(), Errno> {
194    let prot_flags = ProtectionFlags::from_bits(prot).ok_or_else(|| {
195        track_stub!(TODO("https://fxbug.dev/322874672"), "mprotect parse protection", prot);
196        errno!(EINVAL)
197    })?;
198    current_task.mm()?.protect(current_task, addr, length, prot_flags)?;
199    Ok(())
200}
201
202pub fn sys_mremap(
203    _locked: &mut Locked<Unlocked>,
204    current_task: &CurrentTask,
205    addr: UserAddress,
206    old_length: usize,
207    new_length: usize,
208    flags: u32,
209    new_addr: UserAddress,
210) -> Result<UserAddress, Errno> {
211    let flags = MremapFlags::from_bits(flags).ok_or_else(|| errno!(EINVAL))?;
212    let addr =
213        current_task.mm()?.remap(current_task, addr, old_length, new_length, flags, new_addr)?;
214    Ok(addr)
215}
216
217pub fn sys_munmap(
218    _locked: &mut Locked<Unlocked>,
219    current_task: &CurrentTask,
220    addr: UserAddress,
221    length: usize,
222) -> Result<(), Errno> {
223    current_task.mm()?.unmap(addr, length)?;
224    Ok(())
225}
226
227pub fn sys_msync(
228    locked: &mut Locked<Unlocked>,
229    current_task: &CurrentTask,
230    addr: UserAddress,
231    length: usize,
232    flags: u32,
233) -> Result<(), Errno> {
234    let flags = MsyncFlags::from_bits_retain(flags);
235    current_task.mm()?.msync(locked, current_task, addr, length, flags)
236}
237
238pub fn sys_madvise(
239    _locked: &mut Locked<Unlocked>,
240    current_task: &CurrentTask,
241    addr: UserAddress,
242    length: usize,
243    advice: u32,
244) -> Result<(), Errno> {
245    current_task.mm()?.madvise(current_task, addr, length, advice)?;
246    Ok(())
247}
248
249pub fn sys_process_madvise(
250    _locked: &mut Locked<Unlocked>,
251    _current_task: &CurrentTask,
252    _pidfd: FdNumber,
253    _iovec_addr: IOVecPtr,
254    _iovec_count: UserValue<i32>,
255    _advice: UserValue<i32>,
256    _flags: UserValue<u32>,
257) -> Result<usize, Errno> {
258    track_stub!(TODO("https://fxbug.dev/409060664"), "process_madvise");
259    error!(ENOSYS)
260}
261
262pub fn sys_brk(
263    locked: &mut Locked<Unlocked>,
264    current_task: &CurrentTask,
265    addr: UserAddress,
266) -> Result<UserAddress, Errno> {
267    current_task.mm()?.set_brk(locked, current_task, addr)
268}
269
270pub fn sys_process_vm_readv(
271    locked: &mut Locked<Unlocked>,
272    current_task: &CurrentTask,
273    tid: tid_t,
274    local_iov_addr: IOVecPtr,
275    local_iov_count: UserValue<i32>,
276    remote_iov_addr: IOVecPtr,
277    remote_iov_count: UserValue<i32>,
278    flags: usize,
279) -> Result<usize, Errno> {
280    if flags != 0 {
281        return error!(EINVAL);
282    }
283
284    // Source and destination are allowed to be of different length. It is valid to use a nullptr if
285    // the associated length is 0. Thus, if either source or destination length is 0 and nullptr,
286    // make sure to return Ok(0) before doing any other validation/operations.
287    if (local_iov_count == 0 && local_iov_addr.is_null())
288        || (remote_iov_count == 0 && remote_iov_addr.is_null())
289    {
290        return Ok(0);
291    }
292
293    let weak_remote_task = current_task.get_task(tid);
294    let remote_task = Task::from_weak(&weak_remote_task)?;
295
296    current_task.check_ptrace_access_mode(locked, PTRACE_MODE_ATTACH_REALCREDS, &remote_task)?;
297
298    let local_iov = current_task.read_iovec(local_iov_addr, local_iov_count)?;
299    let remote_iov = current_task.read_iovec(remote_iov_addr, remote_iov_count)?;
300    log_trace!(
301        "process_vm_readv(tid={}, local_iov={:?}, remote_iov={:?})",
302        tid,
303        local_iov,
304        remote_iov
305    );
306
307    track_stub!(TODO("https://fxbug.dev/322874765"), "process_vm_readv single-copy");
308    // According to the man page, this syscall was added to Linux specifically to
309    // avoid doing two copies like other IPC mechanisms require. We should avoid this too at some
310    // point.
311    let mut output = UserBuffersOutputBuffer::unified_new(current_task, local_iov)?;
312    let remote_mm = remote_task.mm().ok();
313    if current_task.has_same_address_space(remote_mm.as_ref()) {
314        let mut input = UserBuffersInputBuffer::unified_new(current_task, remote_iov)?;
315        output.write_buffer(&mut input)
316    } else {
317        let mut input = UserBuffersInputBuffer::syscall_new(remote_task.deref(), remote_iov)?;
318        output.write_buffer(&mut input)
319    }
320}
321
322pub fn sys_process_vm_writev(
323    locked: &mut Locked<Unlocked>,
324    current_task: &CurrentTask,
325    tid: tid_t,
326    local_iov_addr: IOVecPtr,
327    local_iov_count: UserValue<i32>,
328    remote_iov_addr: IOVecPtr,
329    remote_iov_count: UserValue<i32>,
330    flags: usize,
331) -> Result<usize, Errno> {
332    if flags != 0 {
333        return error!(EINVAL);
334    }
335
336    // Source and destination are allowed to be of different length. It is valid to use a nullptr if
337    // the associated length is 0. Thus, if either source or destination length is 0 and nullptr,
338    // make sure to return Ok(0) before doing any other validation/operations.
339    if (local_iov_count == 0 && local_iov_addr.is_null())
340        || (remote_iov_count == 0 && remote_iov_addr.is_null())
341    {
342        return Ok(0);
343    }
344
345    let weak_remote_task = current_task.get_task(tid);
346    let remote_task = Task::from_weak(&weak_remote_task)?;
347
348    current_task.check_ptrace_access_mode(locked, PTRACE_MODE_ATTACH_REALCREDS, &remote_task)?;
349
350    let local_iov = current_task.read_iovec(local_iov_addr, local_iov_count)?;
351    let remote_iov = current_task.read_iovec(remote_iov_addr, remote_iov_count)?;
352    log_trace!(
353        "sys_process_vm_writev(tid={}, local_iov={:?}, remote_iov={:?})",
354        tid,
355        local_iov,
356        remote_iov
357    );
358
359    track_stub!(TODO("https://fxbug.dev/322874339"), "process_vm_writev single-copy");
360    // NB: According to the man page, this syscall was added to Linux specifically to
361    // avoid doing two copies like other IPC mechanisms require. We should avoid this too at some
362    // point.
363    let mut input = UserBuffersInputBuffer::unified_new(current_task, local_iov)?;
364    let remote_mm = remote_task.mm().ok();
365    if current_task.has_same_address_space(remote_mm.as_ref()) {
366        let mut output = UserBuffersOutputBuffer::unified_new(current_task, remote_iov)?;
367        output.write_buffer(&mut input)
368    } else {
369        let mut output = UserBuffersOutputBuffer::syscall_new(remote_task.deref(), remote_iov)?;
370        output.write_buffer(&mut input)
371    }
372}
373
374pub fn sys_process_mrelease(
375    _locked: &mut Locked<Unlocked>,
376    current_task: &CurrentTask,
377    pidfd: FdNumber,
378    flags: u32,
379) -> Result<(), Errno> {
380    if flags != 0 {
381        return error!(EINVAL);
382    }
383    let file = current_task.get_file(pidfd)?;
384    let task = current_task.get_task(file.as_thread_group_key()?.pid());
385    let task = task.upgrade().ok_or_else(|| errno!(ESRCH))?;
386    if !task.load_stopped().is_stopped() {
387        return error!(EINVAL);
388    }
389
390    let mm = task.mm()?;
391    let mm_state = mm.state.write();
392    mm_state.mrelease()
393}
394
395pub fn sys_membarrier(
396    _locked: &mut Locked<Unlocked>,
397    current_task: &CurrentTask,
398    cmd: uapi::membarrier_cmd,
399    _flags: u32,
400    _cpu_id: i32,
401) -> Result<u32, Errno> {
402    match cmd {
403        // This command returns a bit mask of all supported commands.
404        // We support everything except for the RSEQ family.
405        uapi::membarrier_cmd_MEMBARRIER_CMD_QUERY => Ok(uapi::membarrier_cmd_MEMBARRIER_CMD_GLOBAL
406            | uapi::membarrier_cmd_MEMBARRIER_CMD_GLOBAL_EXPEDITED
407            | uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED
408            | uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED
409            | uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED
410            | uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE
411            | uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE),
412        // Global and global expedited barriers are treated identically. We don't track
413        // registration for global expedited barriers currently.
414        uapi::membarrier_cmd_MEMBARRIER_CMD_GLOBAL
415        | uapi::membarrier_cmd_MEMBARRIER_CMD_GLOBAL_EXPEDITED => {
416            system_barrier(BarrierType::DataMemory);
417            Ok(0)
418        }
419        // Global registration commands are ignored.
420        uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED => Ok(0),
421        uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED => {
422            // A private expedited barrier is only issued if the address space is registered
423            // for these barriers.
424            if current_task.mm()?.membarrier_private_expedited_registered(MembarrierType::Memory) {
425                // If a barrier is requested, issue a global barrier.
426                system_barrier(BarrierType::DataMemory);
427                Ok(0)
428            } else {
429                error!(EPERM)
430            }
431        }
432        // Private sync core barriers are treated as global instruction stream barriers.
433        uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE => {
434            if current_task.mm()?.membarrier_private_expedited_registered(MembarrierType::SyncCore)
435            {
436                system_barrier(BarrierType::InstructionStream);
437                Ok(0)
438            } else {
439                error!(EPERM)
440            }
441        }
442        uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED => {
443            let _ =
444                current_task.mm()?.register_membarrier_private_expedited(MembarrierType::Memory)?;
445            Ok(0)
446        }
447
448        uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE => {
449            let _ = current_task
450                .mm()?
451                .register_membarrier_private_expedited(MembarrierType::SyncCore)?;
452            Ok(0)
453        }
454        uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ => {
455            track_stub!(TODO("https://fxbug.dev/447158570"), "membarrier rseq");
456            error!(ENOSYS)
457        }
458        uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ => {
459            track_stub!(TODO("https://fxbug.dev/447158570"), "membarrier rseq");
460            error!(ENOSYS)
461        }
462        _ => error!(EINVAL),
463    }
464}
465
466pub fn sys_userfaultfd(
467    locked: &mut Locked<Unlocked>,
468    current_task: &CurrentTask,
469    raw_flags: u32,
470) -> Result<FdNumber, Errno> {
471    let unknown_flags = raw_flags & !(O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
472    if unknown_flags != 0 {
473        return error!(EINVAL, format!("unknown flags provided: {unknown_flags:x?}"));
474    }
475    let mut open_flags = OpenFlags::empty();
476    if raw_flags & O_NONBLOCK != 0 {
477        open_flags |= OpenFlags::NONBLOCK;
478    }
479    if raw_flags & O_CLOEXEC != 0 {
480        open_flags |= OpenFlags::CLOEXEC;
481    }
482
483    let fd_flags = if raw_flags & O_CLOEXEC != 0 {
484        FdFlags::CLOEXEC
485    } else {
486        track_stub!(TODO("https://fxbug.dev/297375964"), "userfaultfds that survive exec()");
487        return error!(ENOSYS);
488    };
489
490    let user_mode_only = raw_flags & UFFD_USER_MODE_ONLY == 0;
491    let uff_handle = UserFaultFile::new(locked, current_task, open_flags, user_mode_only)?;
492    current_task.add_file(locked, uff_handle, fd_flags)
493}
494
495pub fn sys_futex(
496    locked: &mut Locked<Unlocked>,
497    current_task: &mut CurrentTask,
498    addr: UserAddress,
499    op: u32,
500    value: u32,
501    timeout_or_value2: SyscallArg,
502    addr2: UserAddress,
503    value3: u32,
504) -> Result<usize, Errno> {
505    if op & FUTEX_PRIVATE_FLAG != 0 {
506        do_futex::<PrivateFutexKey>(
507            locked,
508            current_task,
509            addr,
510            op,
511            value,
512            timeout_or_value2,
513            addr2,
514            value3,
515        )
516    } else {
517        do_futex::<SharedFutexKey>(
518            locked,
519            current_task,
520            addr,
521            op,
522            value,
523            timeout_or_value2,
524            addr2,
525            value3,
526        )
527    }
528}
529
530fn do_futex<Key: FutexKey>(
531    locked: &mut Locked<Unlocked>,
532    current_task: &mut CurrentTask,
533    addr: UserAddress,
534    op: u32,
535    value: u32,
536    timeout_or_value2: SyscallArg,
537    addr2: UserAddress,
538    value3: u32,
539) -> Result<usize, Errno> {
540    let futexes = Key::get_table_from_task(current_task)?;
541    let cmd = op & (FUTEX_CMD_MASK as u32);
542
543    let is_realtime = match (cmd, op & FUTEX_CLOCK_REALTIME != 0) {
544        // This option bit can be employed only with the FUTEX_WAIT_BITSET, FUTEX_WAIT_REQUEUE_PI,
545        // (since Linux 4.5) FUTEX_WAIT, and (since Linux 5.14) FUTEX_LOCK_PI2 operations.
546        (FUTEX_WAIT_BITSET | FUTEX_WAIT_REQUEUE_PI | FUTEX_WAIT | FUTEX_LOCK_PI2, true) => true,
547        (_, true) => return error!(EINVAL),
548
549        // FUTEX_LOCK_PI always uses realtime.
550        (FUTEX_LOCK_PI, _) => true,
551
552        (_, false) => false,
553    };
554
555    // The timeout is interpreted differently by WAIT and WAIT_BITSET: WAIT takes a
556    // timeout and WAIT_BITSET takes a deadline.
557    let read_timespec = |current_task: &CurrentTask| {
558        let utime = TimeSpecPtr::new(current_task, timeout_or_value2);
559        if utime.is_null() {
560            Ok(timespec_from_time(zx::MonotonicInstant::INFINITE))
561        } else {
562            current_task.read_multi_arch_object(utime)
563        }
564    };
565    let read_timeout = |current_task: &CurrentTask| {
566        let timespec = read_timespec(current_task)?;
567        let timeout = duration_from_timespec(timespec);
568        let deadline = zx::MonotonicInstant::after(timeout?);
569        if is_realtime {
570            // Since this is a timeout, waiting on the monotonic timeline before it's paused is
571            // just as good as actually estimating UTC here.
572            track_stub!(TODO("https://fxbug.dev/356912301"), "FUTEX_CLOCK_REALTIME timeout");
573        }
574        Ok(deadline)
575    };
576    let read_deadline = |current_task: &CurrentTask| {
577        let timespec = read_timespec(current_task)?;
578        if is_realtime {
579            Ok(TargetTime::RealTime(time_from_timespec::<UtcTimeline>(timespec)?))
580        } else {
581            Ok(TargetTime::Monotonic(time_from_timespec::<zx::MonotonicTimeline>(timespec)?))
582        }
583    };
584
585    match cmd {
586        FUTEX_WAIT => {
587            let deadline = read_timeout(current_task)?;
588            let bitset = FUTEX_BITSET_MATCH_ANY;
589            do_futex_wait_with_restart::<Key>(
590                locked,
591                current_task,
592                addr,
593                value,
594                bitset,
595                TargetTime::Monotonic(deadline),
596            )?;
597            Ok(0)
598        }
599        FUTEX_WAKE => {
600            futexes.wake(locked, current_task, addr, value as usize, FUTEX_BITSET_MATCH_ANY)
601        }
602        FUTEX_WAKE_OP => {
603            track_stub!(TODO("https://fxbug.dev/361181940"), "FUTEX_WAKE_OP");
604            error!(ENOSYS)
605        }
606        FUTEX_WAIT_BITSET => {
607            if value3 == 0 {
608                return error!(EINVAL);
609            }
610            let deadline = read_deadline(current_task)?;
611            do_futex_wait_with_restart::<Key>(locked, current_task, addr, value, value3, deadline)?;
612            Ok(0)
613        }
614        FUTEX_WAKE_BITSET => {
615            if value3 == 0 {
616                return error!(EINVAL);
617            }
618            futexes.wake(locked, current_task, addr, value as usize, value3)
619        }
620        FUTEX_REQUEUE | FUTEX_CMP_REQUEUE => {
621            let wake_count = value as usize;
622            let requeue_count: usize = timeout_or_value2.into();
623            if wake_count > std::i32::MAX as usize || requeue_count > std::i32::MAX as usize {
624                return error!(EINVAL);
625            }
626            let expected_value = if cmd == FUTEX_CMP_REQUEUE { Some(value3) } else { None };
627            futexes.requeue(
628                locked,
629                current_task,
630                addr,
631                wake_count,
632                requeue_count,
633                addr2,
634                expected_value,
635            )
636        }
637        FUTEX_WAIT_REQUEUE_PI => {
638            track_stub!(TODO("https://fxbug.dev/361181558"), "FUTEX_WAIT_REQUEUE_PI");
639            error!(ENOSYS)
640        }
641        FUTEX_CMP_REQUEUE_PI => {
642            track_stub!(TODO("https://fxbug.dev/361181773"), "FUTEX_CMP_REQUEUE_PI");
643            error!(ENOSYS)
644        }
645        FUTEX_LOCK_PI | FUTEX_LOCK_PI2 => {
646            futexes.lock_pi(locked, current_task, addr, read_timeout(current_task)?)?;
647            Ok(0)
648        }
649        FUTEX_TRYLOCK_PI => {
650            track_stub!(TODO("https://fxbug.dev/361175318"), "FUTEX_TRYLOCK_PI");
651            error!(ENOSYS)
652        }
653        FUTEX_UNLOCK_PI => {
654            futexes.unlock_pi(locked, current_task, addr)?;
655            Ok(0)
656        }
657        _ => {
658            track_stub!(TODO("https://fxbug.dev/322875124"), "futex unknown command", cmd);
659            error!(ENOSYS)
660        }
661    }
662}
663
664fn do_futex_wait_with_restart<Key: FutexKey>(
665    locked: &mut Locked<Unlocked>,
666    current_task: &mut CurrentTask,
667    addr: UserAddress,
668    value: u32,
669    mask: u32,
670    deadline: TargetTime,
671) -> Result<(), Errno> {
672    let futexes = Key::get_table_from_task(current_task)?;
673    let result = match deadline {
674        TargetTime::Monotonic(mono_deadline) => {
675            futexes.wait(locked, current_task, addr, value, mask, mono_deadline)
676        }
677        TargetTime::BootInstant(boot_deadline) => {
678            let timer_slack = current_task.read().get_timerslack();
679            futexes.wait_boot(locked, current_task, addr, value, mask, boot_deadline, timer_slack)
680        }
681        TargetTime::RealTime(utc_deadline) => {
682            // We convert real time deadlines to boot time deadlines since we cannot wait using a UTC deadline.
683            let (boot_deadline, _) = estimate_boot_deadline_from_utc(utc_deadline);
684            let timer_slack = current_task.read().get_timerslack();
685            futexes.wait_boot(locked, current_task, addr, value, mask, boot_deadline, timer_slack)
686        }
687    };
688    match result {
689        Err(err) if err == EINTR => {
690            current_task.set_syscall_restart_func(move |locked, current_task| {
691                do_futex_wait_with_restart::<Key>(locked, current_task, addr, value, mask, deadline)
692            });
693            error!(ERESTART_RESTARTBLOCK)
694        }
695        result => result,
696    }
697}
698
699pub fn sys_get_robust_list(
700    _locked: &mut Locked<Unlocked>,
701    current_task: &CurrentTask,
702    tid: tid_t,
703    user_head_ptr: UserRef<UserAddress>,
704    user_len_ptr: UserRef<usize>,
705) -> Result<(), Errno> {
706    if tid < 0 {
707        return error!(EINVAL);
708    }
709    if user_head_ptr.is_null() || user_len_ptr.is_null() {
710        return error!(EFAULT);
711    }
712    if tid != 0 {
713        security::check_task_capable(current_task, CAP_SYS_PTRACE)?;
714    }
715    let task = if tid == 0 { current_task.weak_task() } else { current_task.get_task(tid) };
716    let task = Task::from_weak(&task)?;
717    current_task.write_object(user_head_ptr, &task.read().robust_list_head.addr())?;
718    current_task.write_object(user_len_ptr, &std::mem::size_of::<robust_list_head>())?;
719    Ok(())
720}
721
722pub fn sys_set_robust_list(
723    _locked: &mut Locked<Unlocked>,
724    current_task: &CurrentTask,
725    user_head: UserRef<robust_list_head>,
726    len: usize,
727) -> Result<(), Errno> {
728    if len != std::mem::size_of::<robust_list_head>() {
729        return error!(EINVAL);
730    }
731    current_task.write().robust_list_head = user_head.into();
732    Ok(())
733}
734
735pub fn sys_mlock(
736    locked: &mut Locked<Unlocked>,
737    current_task: &CurrentTask,
738    addr: UserAddress,
739    length: usize,
740) -> Result<(), Errno> {
741    // If flags is 0, mlock2() behaves exactly the same as mlock().
742    sys_mlock2(locked, current_task, addr, length, 0)
743}
744
745pub fn sys_mlock2(
746    locked: &mut Locked<Unlocked>,
747    current_task: &CurrentTask,
748    addr: UserAddress,
749    length: usize,
750    flags: u64,
751) -> Result<(), Errno> {
752    const KNOWN_FLAGS: u64 = MLOCK_ONFAULT as u64;
753    if (flags & !KNOWN_FLAGS) != 0 {
754        return error!(EINVAL);
755    }
756    let on_fault = flags & MLOCK_ONFAULT as u64 != 0;
757    current_task.mm()?.mlock(current_task, locked, addr, length, on_fault)
758}
759
760pub fn sys_munlock(
761    _locked: &mut Locked<Unlocked>,
762    current_task: &CurrentTask,
763    addr: UserAddress,
764    length: usize,
765) -> Result<(), Errno> {
766    current_task.mm()?.munlock(current_task, addr, length)
767}
768
769pub fn sys_mlockall(
770    _locked: &mut Locked<Unlocked>,
771    _current_task: &CurrentTask,
772    _flags: u64,
773) -> Result<(), Errno> {
774    track_stub!(TODO("https://fxbug.dev/297292097"), "mlockall()");
775    error!(ENOSYS)
776}
777
778pub fn sys_munlockall(
779    _locked: &mut Locked<Unlocked>,
780    _current_task: &CurrentTask,
781    _flags: u64,
782) -> Result<(), Errno> {
783    track_stub!(TODO("https://fxbug.dev/297292097"), "munlockall()");
784    error!(ENOSYS)
785}
786
787pub fn sys_mincore(
788    _locked: &mut Locked<Unlocked>,
789    _current_task: &CurrentTask,
790    _addr: UserAddress,
791    _length: usize,
792    _out: UserRef<u8>,
793) -> Result<(), Errno> {
794    track_stub!(TODO("https://fxbug.dev/297372240"), "mincore()");
795    error!(ENOSYS)
796}
797
798// Syscalls for arch32 usage
799#[cfg(target_arch = "aarch64")]
800mod arch32 {
801    use crate::mm::PAGE_SIZE;
802    use crate::mm::syscalls::{UserAddress, sys_mmap};
803    use crate::task::{CurrentTask, RobustListHeadPtr};
804    use crate::vfs::FdNumber;
805    use starnix_sync::{Locked, Unlocked};
806    use starnix_uapi::errors::Errno;
807    use starnix_uapi::user_address::UserRef;
808    use starnix_uapi::{error, uapi};
809
810    pub fn sys_arch32_set_robust_list(
811        _locked: &mut Locked<Unlocked>,
812        current_task: &CurrentTask,
813        user_head: UserRef<uapi::arch32::robust_list_head>,
814        len: usize,
815    ) -> Result<(), Errno> {
816        if len != std::mem::size_of::<uapi::arch32::robust_list_head>() {
817            return error!(EINVAL);
818        }
819        current_task.write().robust_list_head = RobustListHeadPtr::from_32(user_head);
820        Ok(())
821    }
822
823    pub fn sys_arch32_mmap2(
824        locked: &mut Locked<Unlocked>,
825        current_task: &mut CurrentTask,
826        addr: UserAddress,
827        length: usize,
828        prot: u32,
829        flags: u32,
830        fd: FdNumber,
831        offset: u64,
832    ) -> Result<UserAddress, Errno> {
833        sys_mmap(locked, current_task, addr, length, prot, flags, fd, offset * *PAGE_SIZE)
834    }
835
836    pub fn sys_arch32_munmap(
837        _locked: &mut Locked<Unlocked>,
838        current_task: &CurrentTask,
839        addr: UserAddress,
840        length: usize,
841    ) -> Result<(), Errno> {
842        if !addr.is_lower_32bit() || length >= (1 << 32) {
843            return error!(EINVAL);
844        }
845        current_task.mm()?.unmap(addr, length)?;
846        Ok(())
847    }
848
849    pub use super::{
850        sys_futex as sys_arch32_futex, sys_madvise as sys_arch32_madvise,
851        sys_membarrier as sys_arch32_membarrier, sys_mincore as sys_arch32_mincore,
852        sys_mlock as sys_arch32_mlock, sys_mlock2 as sys_arch32_mlock2,
853        sys_mlockall as sys_arch32_mlockall, sys_mremap as sys_arch32_mremap,
854        sys_msync as sys_arch32_msync, sys_munlock as sys_arch32_munlock,
855        sys_munlockall as sys_arch32_munlockall,
856        sys_process_mrelease as sys_arch32_process_mrelease,
857        sys_process_vm_readv as sys_arch32_process_vm_readv,
858        sys_userfaultfd as sys_arch32_userfaultfd,
859    };
860}
861
862#[cfg(target_arch = "aarch64")]
863pub use arch32::*;
864
865#[cfg(test)]
866mod tests {
867    use super::*;
868    use crate::mm::memory::MemoryObject;
869    use crate::testing::*;
870    use starnix_uapi::errors::EEXIST;
871    use starnix_uapi::file_mode::Access;
872    use starnix_uapi::{MREMAP_FIXED, MREMAP_MAYMOVE, PROT_READ};
873
874    #[::fuchsia::test]
875    async fn test_mmap_with_colliding_hint() {
876        spawn_kernel_and_run(async |locked, current_task| {
877            let page_size = *PAGE_SIZE;
878
879            let mapped_address =
880                map_memory(locked, &current_task, UserAddress::default(), page_size);
881            match do_mmap(
882                locked,
883                &current_task,
884                mapped_address,
885                page_size as usize,
886                PROT_READ,
887                MAP_PRIVATE | MAP_ANONYMOUS,
888                FdNumber::from_raw(-1),
889                0,
890            ) {
891                Ok(address) => {
892                    assert_ne!(address, mapped_address);
893                }
894                error => {
895                    panic!("mmap with colliding hint failed: {error:?}");
896                }
897            }
898        })
899        .await;
900    }
901
902    #[::fuchsia::test]
903    async fn test_mmap_with_fixed_collision() {
904        spawn_kernel_and_run(async |locked, current_task| {
905            let page_size = *PAGE_SIZE;
906
907            let mapped_address =
908                map_memory(locked, &current_task, UserAddress::default(), page_size);
909            match do_mmap(
910                locked,
911                &current_task,
912                mapped_address,
913                page_size as usize,
914                PROT_READ,
915                MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
916                FdNumber::from_raw(-1),
917                0,
918            ) {
919                Ok(address) => {
920                    assert_eq!(address, mapped_address);
921                }
922                error => {
923                    panic!("mmap with fixed collision failed: {error:?}");
924                }
925            }
926        })
927        .await;
928    }
929
930    #[::fuchsia::test]
931    async fn test_mmap_with_fixed_noreplace_collision() {
932        spawn_kernel_and_run(async |locked, current_task| {
933            let page_size = *PAGE_SIZE;
934
935            let mapped_address =
936                map_memory(locked, &current_task, UserAddress::default(), page_size);
937            match do_mmap(
938                locked,
939                &current_task,
940                mapped_address,
941                page_size as usize,
942                PROT_READ,
943                MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE,
944                FdNumber::from_raw(-1),
945                0,
946            ) {
947                Err(errno) => {
948                    assert_eq!(errno, EEXIST);
949                }
950                result => {
951                    panic!("mmap with fixed_noreplace collision failed: {result:?}");
952                }
953            }
954        })
955        .await;
956    }
957
958    /// It is ok to call munmap with an address that is a multiple of the page size, and
959    /// a non-zero length.
960    #[::fuchsia::test]
961    async fn test_munmap() {
962        spawn_kernel_and_run(async |locked, current_task| {
963            let mapped_address =
964                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
965            assert_eq!(
966                sys_munmap(locked, &current_task, mapped_address, *PAGE_SIZE as usize),
967                Ok(())
968            );
969
970            // Verify that the memory is no longer readable.
971            assert_eq!(current_task.read_memory_to_array::<5>(mapped_address), error!(EFAULT));
972        })
973        .await;
974    }
975
976    /// It is ok to call munmap on an unmapped range.
977    #[::fuchsia::test]
978    async fn test_munmap_not_mapped() {
979        spawn_kernel_and_run(async |locked, current_task| {
980            let mapped_address =
981                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
982            assert_eq!(
983                sys_munmap(locked, &current_task, mapped_address, *PAGE_SIZE as usize),
984                Ok(())
985            );
986            assert_eq!(
987                sys_munmap(locked, &current_task, mapped_address, *PAGE_SIZE as usize),
988                Ok(())
989            );
990        })
991        .await;
992    }
993
994    /// It is an error to call munmap with a length of 0.
995    #[::fuchsia::test]
996    async fn test_munmap_0_length() {
997        spawn_kernel_and_run(async |locked, current_task| {
998            let mapped_address =
999                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
1000            assert_eq!(sys_munmap(locked, &current_task, mapped_address, 0), error!(EINVAL));
1001        })
1002        .await;
1003    }
1004
1005    /// It is an error to call munmap with an address that is not a multiple of the page size.
1006    #[::fuchsia::test]
1007    async fn test_munmap_not_aligned() {
1008        spawn_kernel_and_run(async |locked, current_task| {
1009            let mapped_address =
1010                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
1011            assert_eq!(
1012                sys_munmap(
1013                    locked,
1014                    &current_task,
1015                    (mapped_address + 1u64).unwrap(),
1016                    *PAGE_SIZE as usize
1017                ),
1018                error!(EINVAL)
1019            );
1020
1021            // Verify that the memory is still readable.
1022            assert!(current_task.read_memory_to_array::<5>(mapped_address).is_ok());
1023        })
1024        .await;
1025    }
1026
1027    /// The entire page should be unmapped, not just the range [address, address + length).
1028    #[::fuchsia::test]
1029    async fn test_munmap_unmap_partial() {
1030        spawn_kernel_and_run(async |locked, current_task| {
1031            let mapped_address =
1032                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
1033            assert_eq!(
1034                sys_munmap(locked, &current_task, mapped_address, (*PAGE_SIZE as usize) / 2),
1035                Ok(())
1036            );
1037
1038            // Verify that memory can't be read in either half of the page.
1039            assert_eq!(current_task.read_memory_to_array::<5>(mapped_address), error!(EFAULT));
1040            assert_eq!(
1041                current_task
1042                    .read_memory_to_array::<5>((mapped_address + (*PAGE_SIZE - 2)).unwrap()),
1043                error!(EFAULT)
1044            );
1045        })
1046        .await;
1047    }
1048
1049    /// All pages that intersect the munmap range should be unmapped.
1050    #[::fuchsia::test]
1051    async fn test_munmap_multiple_pages() {
1052        spawn_kernel_and_run(async |locked, current_task| {
1053            let mapped_address =
1054                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
1055            assert_eq!(
1056                sys_munmap(locked, &current_task, mapped_address, (*PAGE_SIZE as usize) + 1),
1057                Ok(())
1058            );
1059
1060            // Verify that neither page is readable.
1061            assert_eq!(current_task.read_memory_to_array::<5>(mapped_address), error!(EFAULT));
1062            assert_eq!(
1063                current_task
1064                    .read_memory_to_array::<5>((mapped_address + (*PAGE_SIZE + 1u64)).unwrap()),
1065                error!(EFAULT)
1066            );
1067        })
1068        .await;
1069    }
1070
1071    /// Only the pages that intersect the munmap range should be unmapped.
1072    #[::fuchsia::test]
1073    async fn test_munmap_one_of_many_pages() {
1074        spawn_kernel_and_run(async |locked, current_task| {
1075            let mapped_address =
1076                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
1077            assert_eq!(
1078                sys_munmap(locked, &current_task, mapped_address, (*PAGE_SIZE as usize) - 1),
1079                Ok(())
1080            );
1081
1082            // Verify that the second page is still readable.
1083            assert_eq!(current_task.read_memory_to_array::<5>(mapped_address), error!(EFAULT));
1084            assert!(
1085                current_task
1086                    .read_memory_to_array::<5>((mapped_address + (*PAGE_SIZE + 1u64)).unwrap())
1087                    .is_ok()
1088            );
1089        })
1090        .await;
1091    }
1092
1093    /// Unmap the middle page of a mapping.
1094    #[::fuchsia::test]
1095    async fn test_munmap_middle_page() {
1096        spawn_kernel_and_run(async |locked, current_task| {
1097            let mapped_address =
1098                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1099            assert_eq!(
1100                sys_munmap(
1101                    locked,
1102                    &current_task,
1103                    (mapped_address + *PAGE_SIZE).unwrap(),
1104                    *PAGE_SIZE as usize
1105                ),
1106                Ok(())
1107            );
1108
1109            // Verify that the first and third pages are still readable.
1110            assert!(current_task.read_memory_to_vec(mapped_address, 5).is_ok());
1111            assert_eq!(
1112                current_task.read_memory_to_vec((mapped_address + *PAGE_SIZE).unwrap(), 5),
1113                error!(EFAULT)
1114            );
1115            assert!(
1116                current_task
1117                    .read_memory_to_vec((mapped_address + (*PAGE_SIZE * 2)).unwrap(), 5)
1118                    .is_ok()
1119            );
1120        })
1121        .await;
1122    }
1123
1124    /// Unmap a range of pages that includes disjoint mappings.
1125    #[::fuchsia::test]
1126    async fn test_munmap_many_mappings() {
1127        spawn_kernel_and_run(async |locked, current_task| {
1128            let mapped_addresses: Vec<_> = std::iter::repeat_with(|| {
1129                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE)
1130            })
1131            .take(3)
1132            .collect();
1133            let min_address = *mapped_addresses.iter().min().unwrap();
1134            let max_address = *mapped_addresses.iter().max().unwrap();
1135            let unmap_length = (max_address - min_address) + *PAGE_SIZE as usize;
1136
1137            assert_eq!(sys_munmap(locked, &current_task, min_address, unmap_length), Ok(()));
1138
1139            // Verify that none of the mapped pages are readable.
1140            for mapped_address in mapped_addresses {
1141                assert_eq!(current_task.read_memory_to_vec(mapped_address, 5), error!(EFAULT));
1142            }
1143        })
1144        .await;
1145    }
1146
1147    #[::fuchsia::test]
1148    async fn test_msync_validates_address_range() {
1149        spawn_kernel_and_run(async |locked, current_task| {
1150            // Map 3 pages and test that ranges covering these pages return no error.
1151            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1152            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 3, 0), Ok(()));
1153            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 2, 0), Ok(()));
1154            assert_eq!(
1155                sys_msync(
1156                    locked,
1157                    &current_task,
1158                    (addr + *PAGE_SIZE).unwrap(),
1159                    *PAGE_SIZE as usize * 2,
1160                    0
1161                ),
1162                Ok(())
1163            );
1164
1165            // Unmap the middle page and test that ranges covering that page return ENOMEM.
1166            sys_munmap(locked, &current_task, (addr + *PAGE_SIZE).unwrap(), *PAGE_SIZE as usize)
1167                .expect("unmap middle");
1168            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize, 0), Ok(()));
1169            assert_eq!(
1170                sys_msync(
1171                    locked,
1172                    &current_task,
1173                    addr,
1174                    *PAGE_SIZE as usize * 3,
1175                    starnix_uapi::MS_SYNC
1176                ),
1177                error!(ENOMEM)
1178            );
1179            assert_eq!(
1180                sys_msync(
1181                    locked,
1182                    &current_task,
1183                    addr,
1184                    *PAGE_SIZE as usize * 2,
1185                    starnix_uapi::MS_SYNC
1186                ),
1187                error!(ENOMEM)
1188            );
1189            assert_eq!(
1190                sys_msync(
1191                    locked,
1192                    &current_task,
1193                    (addr + *PAGE_SIZE).unwrap(),
1194                    *PAGE_SIZE as usize * 2,
1195                    starnix_uapi::MS_SYNC
1196                ),
1197                error!(ENOMEM)
1198            );
1199            assert_eq!(
1200                sys_msync(
1201                    locked,
1202                    &current_task,
1203                    (addr + (*PAGE_SIZE * 2)).unwrap(),
1204                    *PAGE_SIZE as usize,
1205                    0
1206                ),
1207                Ok(())
1208            );
1209
1210            // Map the middle page back and test that ranges covering the three pages
1211            // (spanning multiple ranges) return no error.
1212            assert_eq!(
1213                map_memory(locked, &current_task, (addr + *PAGE_SIZE).unwrap(), *PAGE_SIZE),
1214                (addr + *PAGE_SIZE).unwrap()
1215            );
1216            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 3, 0), Ok(()));
1217            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 2, 0), Ok(()));
1218            assert_eq!(
1219                sys_msync(
1220                    locked,
1221                    &current_task,
1222                    (addr + *PAGE_SIZE).unwrap(),
1223                    *PAGE_SIZE as usize * 2,
1224                    0
1225                ),
1226                Ok(())
1227            );
1228        })
1229        .await;
1230    }
1231
1232    /// Shrinks an entire range.
1233    #[::fuchsia::test]
1234    async fn test_mremap_shrink_whole_range_from_end() {
1235        spawn_kernel_and_run(async |locked, current_task| {
1236            // Map 2 pages.
1237            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
1238            fill_page(&current_task, addr, 'a');
1239            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1240
1241            // Shrink the mapping from 2 to 1 pages.
1242            assert_eq!(
1243                remap_memory(
1244                    locked,
1245                    &current_task,
1246                    addr,
1247                    *PAGE_SIZE * 2,
1248                    *PAGE_SIZE,
1249                    0,
1250                    UserAddress::default()
1251                ),
1252                Ok(addr)
1253            );
1254
1255            check_page_eq(&current_task, addr, 'a');
1256            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1257        })
1258        .await;
1259    }
1260
1261    /// Shrinks part of a range, introducing a hole in the middle.
1262    #[::fuchsia::test]
1263    async fn test_mremap_shrink_partial_range() {
1264        spawn_kernel_and_run(async |locked, current_task| {
1265            // Map 3 pages.
1266            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1267            fill_page(&current_task, addr, 'a');
1268            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1269            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1270
1271            // Shrink the first 2 pages down to 1, creating a hole.
1272            assert_eq!(
1273                remap_memory(
1274                    locked,
1275                    &current_task,
1276                    addr,
1277                    *PAGE_SIZE * 2,
1278                    *PAGE_SIZE,
1279                    0,
1280                    UserAddress::default()
1281                ),
1282                Ok(addr)
1283            );
1284
1285            check_page_eq(&current_task, addr, 'a');
1286            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1287            check_page_eq(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1288        })
1289        .await;
1290    }
1291
1292    /// Shrinking doesn't care if the range specified spans multiple mappings.
1293    #[::fuchsia::test]
1294    async fn test_mremap_shrink_across_ranges() {
1295        spawn_kernel_and_run(async |locked, current_task| {
1296            // Map 3 pages, unmap the middle, then map the middle again. This will leave us with
1297            // 3 contiguous mappings.
1298            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1299            assert_eq!(
1300                sys_munmap(
1301                    locked,
1302                    &current_task,
1303                    (addr + *PAGE_SIZE).unwrap(),
1304                    *PAGE_SIZE as usize
1305                ),
1306                Ok(())
1307            );
1308            assert_eq!(
1309                map_memory(locked, &current_task, (addr + *PAGE_SIZE).unwrap(), *PAGE_SIZE),
1310                (addr + *PAGE_SIZE).unwrap()
1311            );
1312
1313            fill_page(&current_task, addr, 'a');
1314            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1315            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1316
1317            // Remap over all three mappings, shrinking to 1 page.
1318            assert_eq!(
1319                remap_memory(
1320                    locked,
1321                    &current_task,
1322                    addr,
1323                    *PAGE_SIZE * 3,
1324                    *PAGE_SIZE,
1325                    0,
1326                    UserAddress::default()
1327                ),
1328                Ok(addr)
1329            );
1330
1331            check_page_eq(&current_task, addr, 'a');
1332            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1333            check_unmapped(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap());
1334        })
1335        .await;
1336    }
1337
1338    /// Grows a mapping in-place.
1339    #[::fuchsia::test]
1340    async fn test_mremap_grow_in_place() {
1341        spawn_kernel_and_run(async |locked, current_task| {
1342            // Map 3 pages, unmap the middle, leaving a hole.
1343            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1344            fill_page(&current_task, addr, 'a');
1345            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1346            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1347            assert_eq!(
1348                sys_munmap(
1349                    locked,
1350                    &current_task,
1351                    (addr + *PAGE_SIZE).unwrap(),
1352                    *PAGE_SIZE as usize
1353                ),
1354                Ok(())
1355            );
1356
1357            // Grow the first page in-place into the middle.
1358            assert_eq!(
1359                remap_memory(
1360                    locked,
1361                    &current_task,
1362                    addr,
1363                    *PAGE_SIZE,
1364                    *PAGE_SIZE * 2,
1365                    0,
1366                    UserAddress::default()
1367                ),
1368                Ok(addr)
1369            );
1370
1371            check_page_eq(&current_task, addr, 'a');
1372
1373            // The middle page should be new, and not just pointing to the original middle page filled
1374            // with 'b'.
1375            check_page_ne(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1376
1377            check_page_eq(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1378        })
1379        .await;
1380    }
1381
1382    /// Tries to grow a set of pages that cannot fit, and forces a move.
1383    #[::fuchsia::test]
1384    async fn test_mremap_grow_maymove() {
1385        spawn_kernel_and_run(async |locked, current_task| {
1386            // Map 3 pages.
1387            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1388            fill_page(&current_task, addr, 'a');
1389            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1390            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1391
1392            // Grow the first two pages by 1, forcing a move.
1393            let new_addr = remap_memory(
1394                locked,
1395                &current_task,
1396                addr,
1397                *PAGE_SIZE * 2,
1398                *PAGE_SIZE * 3,
1399                MREMAP_MAYMOVE,
1400                UserAddress::default(),
1401            )
1402            .expect("failed to mremap");
1403
1404            assert_ne!(new_addr, addr, "mremap did not move the mapping");
1405
1406            // The first two pages should have been moved.
1407            check_unmapped(&current_task, addr);
1408            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1409
1410            // The third page should still be present.
1411            check_page_eq(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1412
1413            // The moved pages should have the same contents.
1414            check_page_eq(&current_task, new_addr, 'a');
1415            check_page_eq(&current_task, (new_addr + *PAGE_SIZE).unwrap(), 'b');
1416
1417            // The newly grown page should not be the same as the original third page.
1418            check_page_ne(&current_task, (new_addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1419        })
1420        .await;
1421    }
1422
1423    /// Shrinks a set of pages and move them to a fixed location.
1424    #[::fuchsia::test]
1425    async fn test_mremap_shrink_fixed() {
1426        spawn_kernel_and_run(async |locked, current_task| {
1427            // Map 2 pages which will act as the destination.
1428            let dst_addr =
1429                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
1430            fill_page(&current_task, dst_addr, 'y');
1431            fill_page(&current_task, (dst_addr + *PAGE_SIZE).unwrap(), 'z');
1432
1433            // Map 3 pages.
1434            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1435            fill_page(&current_task, addr, 'a');
1436            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1437            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1438
1439            // Shrink the first two pages and move them to overwrite the mappings at `dst_addr`.
1440            let new_addr = remap_memory(
1441                locked,
1442                &current_task,
1443                addr,
1444                *PAGE_SIZE * 2,
1445                *PAGE_SIZE,
1446                MREMAP_MAYMOVE | MREMAP_FIXED,
1447                dst_addr,
1448            )
1449            .expect("failed to mremap");
1450
1451            assert_eq!(new_addr, dst_addr, "mremap did not move the mapping");
1452
1453            // The first two pages should have been moved.
1454            check_unmapped(&current_task, addr);
1455            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1456
1457            // The third page should still be present.
1458            check_page_eq(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1459
1460            // The first moved page should have the same contents.
1461            check_page_eq(&current_task, new_addr, 'a');
1462
1463            // The second page should be part of the original dst mapping.
1464            check_page_eq(&current_task, (new_addr + *PAGE_SIZE).unwrap(), 'z');
1465        })
1466        .await;
1467    }
1468
1469    /// Clobbers the middle of an existing mapping with mremap to a fixed location.
1470    #[::fuchsia::test]
1471    async fn test_mremap_clobber_memory_mapping() {
1472        spawn_kernel_and_run(async |locked, current_task| {
1473            let dst_memory = MemoryObject::from(zx::Vmo::create(2 * *PAGE_SIZE).unwrap());
1474            dst_memory.write(&['x' as u8].repeat(*PAGE_SIZE as usize), 0).unwrap();
1475            dst_memory.write(&['y' as u8].repeat(*PAGE_SIZE as usize), *PAGE_SIZE).unwrap();
1476
1477            let dst_addr = current_task
1478                .mm()
1479                .unwrap()
1480                .map_memory(
1481                    DesiredAddress::Any,
1482                    dst_memory.into(),
1483                    0,
1484                    2 * (*PAGE_SIZE as usize),
1485                    ProtectionFlags::READ,
1486                    Access::rwx(),
1487                    MappingOptions::empty(),
1488                    MappingName::None,
1489                )
1490                .unwrap();
1491
1492            // Map 3 pages.
1493            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1494            fill_page(&current_task, addr, 'a');
1495            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1496            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1497
1498            // Overwrite the second page of the mapping with the second page of the anonymous mapping.
1499            let remapped_addr = sys_mremap(
1500                locked,
1501                &*current_task,
1502                (addr + *PAGE_SIZE).unwrap(),
1503                *PAGE_SIZE as usize,
1504                *PAGE_SIZE as usize,
1505                MREMAP_FIXED | MREMAP_MAYMOVE,
1506                (dst_addr + *PAGE_SIZE).unwrap(),
1507            )
1508            .unwrap();
1509
1510            assert_eq!(remapped_addr, (dst_addr + *PAGE_SIZE).unwrap());
1511
1512            check_page_eq(&current_task, addr, 'a');
1513            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1514            check_page_eq(&current_task, (addr + (2 * *PAGE_SIZE)).unwrap(), 'c');
1515
1516            check_page_eq(&current_task, dst_addr, 'x');
1517            check_page_eq(&current_task, (dst_addr + *PAGE_SIZE).unwrap(), 'b');
1518        })
1519        .await;
1520    }
1521
1522    #[cfg(target_arch = "x86_64")]
1523    #[::fuchsia::test]
1524    async fn test_map_32_bit() {
1525        use starnix_uapi::PROT_WRITE;
1526
1527        spawn_kernel_and_run(async |locked, current_task| {
1528            let page_size = *PAGE_SIZE;
1529
1530            for _i in 0..256 {
1531                match do_mmap(
1532                    locked,
1533                    &current_task,
1534                    UserAddress::from(0),
1535                    page_size as usize,
1536                    PROT_READ | PROT_WRITE,
1537                    MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT,
1538                    FdNumber::from_raw(-1),
1539                    0,
1540                ) {
1541                    Ok(address) => {
1542                        let memory_end = address.ptr() + page_size as usize;
1543                        assert!(memory_end <= 0x80000000);
1544                    }
1545                    error => {
1546                        panic!("mmap with MAP_32BIT failed: {error:?}");
1547                    }
1548                }
1549            }
1550        })
1551        .await;
1552    }
1553}