Skip to main content

starnix_core/mm/
syscalls.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::barrier::{BarrierType, system_barrier};
6use crate::mm::debugger::notify_debugger_of_module_list;
7use crate::mm::{
8    DesiredAddress, FutexKey, IOVecPtr, MappingName, MappingOptions, MembarrierType,
9    MemoryAccessorExt, MremapFlags, MsyncFlags, PAGE_SIZE, PrivateFutexKey, ProtectionFlags,
10    SharedFutexKey,
11};
12use crate::security;
13use crate::syscalls::time::TimeSpecPtr;
14use crate::task::CurrentTask;
15use crate::time::TargetTime;
16use crate::time::utc::estimate_boot_deadline_from_utc;
17use crate::vfs::FdNumber;
18use crate::vfs::buffers::{OutputBuffer, UserBuffersInputBuffer, UserBuffersOutputBuffer};
19use fuchsia_runtime::UtcTimeline;
20use linux_uapi::MLOCK_ONFAULT;
21use starnix_logging::{CATEGORY_STARNIX_MM, log_trace, track_stub};
22use starnix_sync::{FileOpsCore, LockEqualOrBefore, Locked, Unlocked};
23use starnix_syscalls::SyscallArg;
24use starnix_types::time::{duration_from_timespec, time_from_timespec, timespec_from_time};
25use starnix_uapi::auth::{PTRACE_MODE_ATTACH_REALCREDS, PTRACE_MODE_READ_REALCREDS};
26use starnix_uapi::errors::{EINTR, Errno};
27use starnix_uapi::user_address::{UserAddress, UserRef};
28use starnix_uapi::user_value::UserValue;
29use starnix_uapi::{
30    FUTEX_BITSET_MATCH_ANY, FUTEX_CLOCK_REALTIME, FUTEX_CMD_MASK, FUTEX_CMP_REQUEUE,
31    FUTEX_CMP_REQUEUE_PI, FUTEX_LOCK_PI, FUTEX_LOCK_PI2, FUTEX_PRIVATE_FLAG, FUTEX_REQUEUE,
32    FUTEX_TRYLOCK_PI, FUTEX_UNLOCK_PI, FUTEX_WAIT, FUTEX_WAIT_BITSET, FUTEX_WAIT_REQUEUE_PI,
33    FUTEX_WAKE, FUTEX_WAKE_BITSET, FUTEX_WAKE_OP, MAP_ANONYMOUS, MAP_DENYWRITE, MAP_FIXED,
34    MAP_FIXED_NOREPLACE, MAP_GROWSDOWN, MAP_LOCKED, MAP_NORESERVE, MAP_POPULATE, MAP_PRIVATE,
35    MAP_SHARED, MAP_SHARED_VALIDATE, MAP_STACK, PROT_EXEC, errno, error, robust_list_head, tid_t,
36    uapi,
37};
38use std::ops::Deref as _;
39use zx;
40
41#[cfg(target_arch = "x86_64")]
42use starnix_uapi::MAP_32BIT;
43
44// Returns any platform-specific mmap flags. This is a separate function because as of this writing
45// "attributes on expressions are experimental."
46#[cfg(target_arch = "x86_64")]
47fn get_valid_platform_mmap_flags() -> u32 {
48    MAP_32BIT
49}
50#[cfg(not(target_arch = "x86_64"))]
51fn get_valid_platform_mmap_flags() -> u32 {
52    0
53}
54
55/// sys_mmap takes a mutable reference to current_task because it may modify the IP register.
56pub fn sys_mmap(
57    locked: &mut Locked<Unlocked>,
58    current_task: &mut CurrentTask,
59    addr: UserAddress,
60    length: usize,
61    prot: u32,
62    flags: u32,
63    fd: FdNumber,
64    offset: u64,
65) -> Result<UserAddress, Errno> {
66    let user_address = do_mmap(locked, current_task, addr, length, prot, flags, fd, offset)?;
67    if prot & PROT_EXEC != 0 {
68        // Possibly loads a new module. Notify debugger for the change.
69        // We only care about dynamic linker loading modules for now, which uses mmap. In the future
70        // we might want to support unloading modules in munmap or JIT compilation in mprotect.
71        notify_debugger_of_module_list(current_task)?;
72    }
73    Ok(user_address)
74}
75
76pub fn do_mmap<L>(
77    locked: &mut Locked<L>,
78    current_task: &CurrentTask,
79    addr: UserAddress,
80    length: usize,
81    prot: u32,
82    flags: u32,
83    fd: FdNumber,
84    offset: u64,
85) -> Result<UserAddress, Errno>
86where
87    L: LockEqualOrBefore<FileOpsCore> + starnix_sync::LockBefore<starnix_sync::ThreadGroupLimits>,
88{
89    let prot_flags = ProtectionFlags::from_access_bits(prot).ok_or_else(|| {
90        track_stub!(TODO("https://fxbug.dev/322874211"), "mmap parse protection", prot);
91        errno!(EINVAL)
92    })?;
93
94    let valid_flags: u32 = get_valid_platform_mmap_flags()
95        | MAP_PRIVATE
96        | MAP_SHARED
97        | MAP_SHARED_VALIDATE
98        | MAP_ANONYMOUS
99        | MAP_FIXED
100        | MAP_FIXED_NOREPLACE
101        | MAP_POPULATE
102        | MAP_NORESERVE
103        | MAP_STACK
104        | MAP_DENYWRITE
105        | MAP_GROWSDOWN
106        | MAP_LOCKED;
107    if flags & !valid_flags != 0 {
108        if flags & MAP_SHARED_VALIDATE != 0 {
109            return error!(EOPNOTSUPP);
110        }
111        track_stub!(TODO("https://fxbug.dev/322873638"), "mmap check flags", flags);
112        return error!(EINVAL);
113    }
114
115    let file = if flags & MAP_ANONYMOUS != 0 { None } else { Some(current_task.get_file(fd)?) };
116    if flags & (MAP_PRIVATE | MAP_SHARED) == 0
117        || flags & (MAP_PRIVATE | MAP_SHARED) == MAP_PRIVATE | MAP_SHARED
118    {
119        return error!(EINVAL);
120    }
121    if length == 0 {
122        return error!(EINVAL);
123    }
124    if offset % *PAGE_SIZE != 0 {
125        return error!(EINVAL);
126    }
127
128    let page_size = *PAGE_SIZE as usize;
129    let length_aligned =
130        length.checked_add(page_size - 1).ok_or_else(|| errno!(ENOMEM))? & !(page_size - 1);
131    let rlimit_as =
132        current_task.thread_group().get_rlimit(locked, starnix_uapi::resource_limits::Resource::AS)
133            as usize;
134    let current_usage: usize = current_task.mm()?.get_total_usage();
135
136    if current_usage.saturating_add(length_aligned) > rlimit_as {
137        return error!(ENOMEM);
138    }
139
140    // TODO(tbodt): should we consider MAP_NORESERVE?
141
142    let addr = match (addr, flags & MAP_FIXED != 0, flags & MAP_FIXED_NOREPLACE != 0) {
143        (UserAddress::NULL, false, false) => DesiredAddress::Any,
144        (UserAddress::NULL, true, _) | (UserAddress::NULL, _, true) => return error!(EINVAL),
145        (addr, false, false) => DesiredAddress::Hint(addr),
146        (addr, _, true) => DesiredAddress::Fixed(addr),
147        (addr, true, false) => DesiredAddress::FixedOverwrite(addr),
148    };
149
150    let memory_offset = if flags & MAP_ANONYMOUS != 0 { 0 } else { offset };
151
152    let mut options = MappingOptions::empty();
153    if flags & MAP_SHARED != 0 {
154        options |= MappingOptions::SHARED;
155    }
156    if flags & MAP_ANONYMOUS != 0 {
157        options |= MappingOptions::ANONYMOUS;
158    }
159    #[cfg(target_arch = "x86_64")]
160    if flags & MAP_FIXED == 0 && flags & MAP_32BIT != 0 {
161        options |= MappingOptions::LOWER_32BIT;
162    }
163    if flags & MAP_GROWSDOWN != 0 {
164        options |= MappingOptions::GROWSDOWN;
165    }
166    if flags & MAP_POPULATE != 0 {
167        options |= MappingOptions::POPULATE;
168    }
169    if flags & MAP_LOCKED != 0 {
170        // The kernel isn't expected to return an error if locking fails with this flag, so for now
171        // this implementation will always fail to lock memory even if mapping succeeds.
172        track_stub!(TODO("https://fxbug.dev/406377606"), "MAP_LOCKED");
173    }
174
175    security::mmap_file(current_task, file.as_ref(), prot_flags, options)?;
176
177    if flags & MAP_ANONYMOUS != 0 {
178        fuchsia_trace::duration!(CATEGORY_STARNIX_MM, "AnonymousMmap");
179        current_task.mm()?.map_anonymous(addr, length, prot_flags, options, MappingName::None)
180    } else {
181        fuchsia_trace::duration!(CATEGORY_STARNIX_MM, "FileBackedMmap");
182        // TODO(tbodt): maximize protection flags so that mprotect works
183        let file = file.expect("file retrieved above for file-backed mapping");
184        file.mmap(
185            locked,
186            current_task,
187            addr,
188            memory_offset,
189            length,
190            prot_flags,
191            options,
192            file.name.to_passive(),
193        )
194    }
195}
196
197pub fn sys_mprotect(
198    _locked: &mut Locked<Unlocked>,
199    current_task: &CurrentTask,
200    addr: UserAddress,
201    length: usize,
202    prot: u32,
203) -> Result<(), Errno> {
204    let prot_flags = ProtectionFlags::from_bits(prot).ok_or_else(|| {
205        track_stub!(TODO("https://fxbug.dev/322874672"), "mprotect parse protection", prot);
206        errno!(EINVAL)
207    })?;
208    current_task.mm()?.protect(current_task, addr, length, prot_flags)?;
209    Ok(())
210}
211
212pub fn sys_mremap(
213    _locked: &mut Locked<Unlocked>,
214    current_task: &CurrentTask,
215    addr: UserAddress,
216    old_length: usize,
217    new_length: usize,
218    flags: u32,
219    new_addr: UserAddress,
220) -> Result<UserAddress, Errno> {
221    let flags = MremapFlags::from_bits(flags).ok_or_else(|| errno!(EINVAL))?;
222    let addr =
223        current_task.mm()?.remap(current_task, addr, old_length, new_length, flags, new_addr)?;
224    Ok(addr)
225}
226
227pub fn sys_munmap(
228    _locked: &mut Locked<Unlocked>,
229    current_task: &CurrentTask,
230    addr: UserAddress,
231    length: usize,
232) -> Result<(), Errno> {
233    current_task.mm()?.unmap(addr, length)?;
234    Ok(())
235}
236
237pub fn sys_msync(
238    locked: &mut Locked<Unlocked>,
239    current_task: &CurrentTask,
240    addr: UserAddress,
241    length: usize,
242    flags: u32,
243) -> Result<(), Errno> {
244    let flags = MsyncFlags::from_bits_retain(flags);
245    current_task.mm()?.msync(locked, current_task, addr, length, flags)
246}
247
248pub fn sys_madvise(
249    _locked: &mut Locked<Unlocked>,
250    current_task: &CurrentTask,
251    addr: UserAddress,
252    length: usize,
253    advice: u32,
254) -> Result<(), Errno> {
255    current_task.mm()?.madvise(addr, length, advice)?;
256    Ok(())
257}
258
259pub fn sys_process_madvise(
260    _locked: &mut Locked<Unlocked>,
261    _current_task: &CurrentTask,
262    _pidfd: FdNumber,
263    _iovec_addr: IOVecPtr,
264    _iovec_count: UserValue<i32>,
265    _advice: UserValue<i32>,
266    _flags: UserValue<u32>,
267) -> Result<usize, Errno> {
268    track_stub!(TODO("https://fxbug.dev/409060664"), "process_madvise");
269    error!(ENOSYS)
270}
271
272pub fn sys_brk(
273    locked: &mut Locked<Unlocked>,
274    current_task: &CurrentTask,
275    addr: UserAddress,
276) -> Result<UserAddress, Errno> {
277    current_task.mm()?.set_brk(locked, current_task, addr)
278}
279
280pub fn sys_process_vm_readv(
281    locked: &mut Locked<Unlocked>,
282    current_task: &CurrentTask,
283    tid: tid_t,
284    local_iov_addr: IOVecPtr,
285    local_iov_count: UserValue<i32>,
286    remote_iov_addr: IOVecPtr,
287    remote_iov_count: UserValue<i32>,
288    flags: usize,
289) -> Result<usize, Errno> {
290    if flags != 0 {
291        return error!(EINVAL);
292    }
293
294    // Source and destination are allowed to be of different length. It is valid to use a nullptr if
295    // the associated length is 0. Thus, if either source or destination length is 0 and nullptr,
296    // make sure to return Ok(0) before doing any other validation/operations.
297    if (local_iov_count == 0 && local_iov_addr.is_null())
298        || (remote_iov_count == 0 && remote_iov_addr.is_null())
299    {
300        return Ok(0);
301    }
302
303    let remote_task = current_task.get_task(tid)?;
304
305    current_task.check_ptrace_access_mode(locked, PTRACE_MODE_ATTACH_REALCREDS, &remote_task)?;
306
307    let local_iov = current_task.read_iovec(local_iov_addr, local_iov_count)?;
308    let remote_iov = current_task.read_iovec(remote_iov_addr, remote_iov_count)?;
309    log_trace!(
310        "process_vm_readv(tid={}, local_iov={:?}, remote_iov={:?})",
311        tid,
312        local_iov,
313        remote_iov
314    );
315
316    track_stub!(TODO("https://fxbug.dev/322874765"), "process_vm_readv single-copy");
317    // According to the man page, this syscall was added to Linux specifically to
318    // avoid doing two copies like other IPC mechanisms require. We should avoid this too at some
319    // point.
320    let mut output = UserBuffersOutputBuffer::unified_new(current_task, local_iov)?;
321    let remote_mm = remote_task.mm().ok();
322    if current_task.has_same_address_space(remote_mm.as_ref()) {
323        let mut input = UserBuffersInputBuffer::unified_new(current_task, remote_iov)?;
324        output.write_buffer(&mut input)
325    } else {
326        let mut input = UserBuffersInputBuffer::syscall_new(remote_task.deref(), remote_iov)?;
327        output.write_buffer(&mut input)
328    }
329}
330
331pub fn sys_process_vm_writev(
332    locked: &mut Locked<Unlocked>,
333    current_task: &CurrentTask,
334    tid: tid_t,
335    local_iov_addr: IOVecPtr,
336    local_iov_count: UserValue<i32>,
337    remote_iov_addr: IOVecPtr,
338    remote_iov_count: UserValue<i32>,
339    flags: usize,
340) -> Result<usize, Errno> {
341    if flags != 0 {
342        return error!(EINVAL);
343    }
344
345    // Source and destination are allowed to be of different length. It is valid to use a nullptr if
346    // the associated length is 0. Thus, if either source or destination length is 0 and nullptr,
347    // make sure to return Ok(0) before doing any other validation/operations.
348    if (local_iov_count == 0 && local_iov_addr.is_null())
349        || (remote_iov_count == 0 && remote_iov_addr.is_null())
350    {
351        return Ok(0);
352    }
353
354    let remote_task = current_task.get_task(tid)?;
355
356    current_task.check_ptrace_access_mode(locked, PTRACE_MODE_ATTACH_REALCREDS, &remote_task)?;
357
358    let local_iov = current_task.read_iovec(local_iov_addr, local_iov_count)?;
359    let remote_iov = current_task.read_iovec(remote_iov_addr, remote_iov_count)?;
360    log_trace!(
361        "sys_process_vm_writev(tid={}, local_iov={:?}, remote_iov={:?})",
362        tid,
363        local_iov,
364        remote_iov
365    );
366
367    track_stub!(TODO("https://fxbug.dev/322874339"), "process_vm_writev single-copy");
368    // NB: According to the man page, this syscall was added to Linux specifically to
369    // avoid doing two copies like other IPC mechanisms require. We should avoid this too at some
370    // point.
371    let mut input = UserBuffersInputBuffer::unified_new(current_task, local_iov)?;
372    let remote_mm = remote_task.mm().ok();
373    if current_task.has_same_address_space(remote_mm.as_ref()) {
374        let mut output = UserBuffersOutputBuffer::unified_new(current_task, remote_iov)?;
375        output.write_buffer(&mut input)
376    } else {
377        let mut output = UserBuffersOutputBuffer::syscall_new(remote_task.deref(), remote_iov)?;
378        output.write_buffer(&mut input)
379    }
380}
381
382pub fn sys_process_mrelease(
383    _locked: &mut Locked<Unlocked>,
384    current_task: &CurrentTask,
385    pidfd: FdNumber,
386    flags: u32,
387) -> Result<(), Errno> {
388    if flags != 0 {
389        return error!(EINVAL);
390    }
391    let file = current_task.get_file(pidfd)?;
392    let task = current_task.get_task(file.as_thread_group_key()?.pid())?;
393    if !task.load_stopped().is_stopped() {
394        return error!(EINVAL);
395    }
396
397    task.mm()?.mrelease()
398}
399
400pub fn sys_membarrier(
401    _locked: &mut Locked<Unlocked>,
402    current_task: &CurrentTask,
403    cmd: uapi::membarrier_cmd,
404    _flags: u32,
405    _cpu_id: i32,
406) -> Result<u32, Errno> {
407    match cmd {
408        // This command returns a bit mask of all supported commands.
409        // We support everything except for the RSEQ family.
410        uapi::membarrier_cmd_MEMBARRIER_CMD_QUERY => Ok(uapi::membarrier_cmd_MEMBARRIER_CMD_GLOBAL
411            | uapi::membarrier_cmd_MEMBARRIER_CMD_GLOBAL_EXPEDITED
412            | uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED
413            | uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED
414            | uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED
415            | uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE
416            | uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE),
417        // Global and global expedited barriers are treated identically. We don't track
418        // registration for global expedited barriers currently.
419        uapi::membarrier_cmd_MEMBARRIER_CMD_GLOBAL
420        | uapi::membarrier_cmd_MEMBARRIER_CMD_GLOBAL_EXPEDITED => {
421            system_barrier(BarrierType::DataMemory);
422            Ok(0)
423        }
424        // Global registration commands are ignored.
425        uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED => Ok(0),
426        uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED => {
427            // A private expedited barrier is only issued if the address space is registered
428            // for these barriers.
429            if current_task.mm()?.membarrier_private_expedited_registered(MembarrierType::Memory) {
430                // If a barrier is requested, issue a global barrier.
431                system_barrier(BarrierType::DataMemory);
432                Ok(0)
433            } else {
434                error!(EPERM)
435            }
436        }
437        // Private sync core barriers are treated as global instruction stream barriers.
438        uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE => {
439            if current_task.mm()?.membarrier_private_expedited_registered(MembarrierType::SyncCore)
440            {
441                system_barrier(BarrierType::InstructionStream);
442                Ok(0)
443            } else {
444                error!(EPERM)
445            }
446        }
447        uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED => {
448            let _ =
449                current_task.mm()?.register_membarrier_private_expedited(MembarrierType::Memory)?;
450            Ok(0)
451        }
452
453        uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE => {
454            let _ = current_task
455                .mm()?
456                .register_membarrier_private_expedited(MembarrierType::SyncCore)?;
457            Ok(0)
458        }
459        uapi::membarrier_cmd_MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ => {
460            track_stub!(TODO("https://fxbug.dev/447158570"), "membarrier rseq");
461            error!(ENOSYS)
462        }
463        uapi::membarrier_cmd_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ => {
464            track_stub!(TODO("https://fxbug.dev/447158570"), "membarrier rseq");
465            error!(ENOSYS)
466        }
467        _ => error!(EINVAL),
468    }
469}
470
471pub fn sys_futex(
472    locked: &mut Locked<Unlocked>,
473    current_task: &mut CurrentTask,
474    addr: UserAddress,
475    op: u32,
476    value: u32,
477    timeout_or_value2: SyscallArg,
478    addr2: UserAddress,
479    value3: u32,
480) -> Result<usize, Errno> {
481    if op & FUTEX_PRIVATE_FLAG != 0 {
482        do_futex::<PrivateFutexKey>(
483            locked,
484            current_task,
485            addr,
486            op,
487            value,
488            timeout_or_value2,
489            addr2,
490            value3,
491        )
492    } else {
493        do_futex::<SharedFutexKey>(
494            locked,
495            current_task,
496            addr,
497            op,
498            value,
499            timeout_or_value2,
500            addr2,
501            value3,
502        )
503    }
504}
505
506fn do_futex<Key: FutexKey>(
507    locked: &mut Locked<Unlocked>,
508    current_task: &mut CurrentTask,
509    addr: UserAddress,
510    op: u32,
511    value: u32,
512    timeout_or_value2: SyscallArg,
513    addr2: UserAddress,
514    value3: u32,
515) -> Result<usize, Errno> {
516    let futexes = Key::get_table_from_task(current_task)?;
517    let cmd = op & (FUTEX_CMD_MASK as u32);
518
519    let is_realtime = match (cmd, op & FUTEX_CLOCK_REALTIME != 0) {
520        // This option bit can be employed only with the FUTEX_WAIT_BITSET, FUTEX_WAIT_REQUEUE_PI,
521        // (since Linux 4.5) FUTEX_WAIT, and (since Linux 5.14) FUTEX_LOCK_PI2 operations.
522        (FUTEX_WAIT_BITSET | FUTEX_WAIT_REQUEUE_PI | FUTEX_WAIT | FUTEX_LOCK_PI2, true) => true,
523        (_, true) => return error!(EINVAL),
524
525        // FUTEX_LOCK_PI always uses realtime.
526        (FUTEX_LOCK_PI, _) => true,
527
528        (_, false) => false,
529    };
530
531    // The timeout is interpreted differently by WAIT and WAIT_BITSET: WAIT takes a
532    // timeout and WAIT_BITSET takes a deadline.
533    let read_timespec = |current_task: &CurrentTask| {
534        let utime = TimeSpecPtr::new(current_task, timeout_or_value2);
535        if utime.is_null() {
536            Ok(timespec_from_time(zx::MonotonicInstant::INFINITE))
537        } else {
538            current_task.read_multi_arch_object(utime)
539        }
540    };
541    let read_timeout = |current_task: &CurrentTask| {
542        let timespec = read_timespec(current_task)?;
543        let timeout = duration_from_timespec(timespec);
544        let deadline = zx::MonotonicInstant::after(timeout?);
545        if is_realtime {
546            // Since this is a timeout, waiting on the monotonic timeline before it's paused is
547            // just as good as actually estimating UTC here.
548            track_stub!(TODO("https://fxbug.dev/356912301"), "FUTEX_CLOCK_REALTIME timeout");
549        }
550        Ok(deadline)
551    };
552    let read_deadline = |current_task: &CurrentTask| {
553        let timespec = read_timespec(current_task)?;
554        if is_realtime {
555            Ok(TargetTime::RealTime(time_from_timespec::<UtcTimeline>(timespec)?))
556        } else {
557            Ok(TargetTime::Monotonic(time_from_timespec::<zx::MonotonicTimeline>(timespec)?))
558        }
559    };
560
561    match cmd {
562        FUTEX_WAIT => {
563            let deadline = read_timeout(current_task)?;
564            let bitset = FUTEX_BITSET_MATCH_ANY;
565            do_futex_wait_with_restart::<Key>(
566                locked,
567                current_task,
568                addr,
569                value,
570                bitset,
571                TargetTime::Monotonic(deadline),
572            )?;
573            Ok(0)
574        }
575        FUTEX_WAKE => {
576            futexes.wake(locked, current_task, addr, value as usize, FUTEX_BITSET_MATCH_ANY)
577        }
578        FUTEX_WAKE_OP => {
579            track_stub!(TODO("https://fxbug.dev/361181940"), "FUTEX_WAKE_OP");
580            error!(ENOSYS)
581        }
582        FUTEX_WAIT_BITSET => {
583            if value3 == 0 {
584                return error!(EINVAL);
585            }
586            let deadline = read_deadline(current_task)?;
587            do_futex_wait_with_restart::<Key>(locked, current_task, addr, value, value3, deadline)?;
588            Ok(0)
589        }
590        FUTEX_WAKE_BITSET => {
591            if value3 == 0 {
592                return error!(EINVAL);
593            }
594            futexes.wake(locked, current_task, addr, value as usize, value3)
595        }
596        FUTEX_REQUEUE | FUTEX_CMP_REQUEUE => {
597            let wake_count = value as usize;
598            let requeue_count: usize = timeout_or_value2.into();
599            if wake_count > std::i32::MAX as usize || requeue_count > std::i32::MAX as usize {
600                return error!(EINVAL);
601            }
602            let expected_value = if cmd == FUTEX_CMP_REQUEUE { Some(value3) } else { None };
603            futexes.requeue(
604                locked,
605                current_task,
606                addr,
607                wake_count,
608                requeue_count,
609                addr2,
610                expected_value,
611            )
612        }
613        FUTEX_WAIT_REQUEUE_PI => {
614            track_stub!(TODO("https://fxbug.dev/361181558"), "FUTEX_WAIT_REQUEUE_PI");
615            error!(ENOSYS)
616        }
617        FUTEX_CMP_REQUEUE_PI => {
618            track_stub!(TODO("https://fxbug.dev/361181773"), "FUTEX_CMP_REQUEUE_PI");
619            error!(ENOSYS)
620        }
621        FUTEX_LOCK_PI | FUTEX_LOCK_PI2 => {
622            futexes.lock_pi(locked, current_task, addr, read_timeout(current_task)?)?;
623            Ok(0)
624        }
625        FUTEX_TRYLOCK_PI => {
626            track_stub!(TODO("https://fxbug.dev/361175318"), "FUTEX_TRYLOCK_PI");
627            error!(ENOSYS)
628        }
629        FUTEX_UNLOCK_PI => {
630            futexes.unlock_pi(locked, current_task, addr)?;
631            Ok(0)
632        }
633        _ => {
634            track_stub!(TODO("https://fxbug.dev/322875124"), "futex unknown command", cmd);
635            error!(ENOSYS)
636        }
637    }
638}
639
640fn do_futex_wait_with_restart<Key: FutexKey>(
641    locked: &mut Locked<Unlocked>,
642    current_task: &mut CurrentTask,
643    addr: UserAddress,
644    value: u32,
645    mask: u32,
646    deadline: TargetTime,
647) -> Result<(), Errno> {
648    let futexes = Key::get_table_from_task(current_task)?;
649    let result = match deadline {
650        TargetTime::Monotonic(mono_deadline) => {
651            futexes.wait(locked, current_task, addr, value, mask, mono_deadline)
652        }
653        TargetTime::BootInstant(boot_deadline) => {
654            let timer_slack = current_task.read().get_timerslack();
655            futexes.wait_boot(locked, current_task, addr, value, mask, boot_deadline, timer_slack)
656        }
657        TargetTime::RealTime(utc_deadline) => {
658            // We convert real time deadlines to boot time deadlines since we cannot wait using a UTC deadline.
659            let (boot_deadline, _) = estimate_boot_deadline_from_utc(utc_deadline);
660            let timer_slack = current_task.read().get_timerslack();
661            futexes.wait_boot(locked, current_task, addr, value, mask, boot_deadline, timer_slack)
662        }
663    };
664    match result {
665        Err(err) if err == EINTR => {
666            current_task.set_syscall_restart_func(move |locked, current_task| {
667                do_futex_wait_with_restart::<Key>(locked, current_task, addr, value, mask, deadline)
668            });
669            error!(ERESTART_RESTARTBLOCK)
670        }
671        result => result,
672    }
673}
674
675pub fn sys_get_robust_list(
676    locked: &mut Locked<Unlocked>,
677    current_task: &CurrentTask,
678    tid: tid_t,
679    user_head_ptr: UserRef<UserAddress>,
680    user_len_ptr: UserRef<usize>,
681) -> Result<(), Errno> {
682    if tid < 0 {
683        return error!(EINVAL);
684    }
685    if user_head_ptr.is_null() || user_len_ptr.is_null() {
686        return error!(EFAULT);
687    }
688    let task = if tid == 0 {
689        current_task.task.clone()
690    } else {
691        let task = current_task.get_task(tid)?;
692        current_task.check_ptrace_access_mode(locked, PTRACE_MODE_READ_REALCREDS, &task)?;
693        task
694    };
695    current_task.write_object(user_head_ptr, &task.read().robust_list_head.addr())?;
696    current_task.write_object(user_len_ptr, &std::mem::size_of::<robust_list_head>())?;
697    Ok(())
698}
699
700pub fn sys_set_robust_list(
701    _locked: &mut Locked<Unlocked>,
702    current_task: &CurrentTask,
703    user_head: UserRef<robust_list_head>,
704    len: usize,
705) -> Result<(), Errno> {
706    if len != std::mem::size_of::<robust_list_head>() {
707        return error!(EINVAL);
708    }
709    current_task.write().robust_list_head = user_head.into();
710    Ok(())
711}
712
713pub fn sys_mlock(
714    locked: &mut Locked<Unlocked>,
715    current_task: &CurrentTask,
716    addr: UserAddress,
717    length: usize,
718) -> Result<(), Errno> {
719    // If flags is 0, mlock2() behaves exactly the same as mlock().
720    sys_mlock2(locked, current_task, addr, length, 0)
721}
722
723pub fn sys_mlock2(
724    locked: &mut Locked<Unlocked>,
725    current_task: &CurrentTask,
726    addr: UserAddress,
727    length: usize,
728    flags: u64,
729) -> Result<(), Errno> {
730    const KNOWN_FLAGS: u64 = MLOCK_ONFAULT as u64;
731    if (flags & !KNOWN_FLAGS) != 0 {
732        return error!(EINVAL);
733    }
734    let on_fault = flags & MLOCK_ONFAULT as u64 != 0;
735    current_task.mm()?.mlock(current_task, locked, addr, length, on_fault)
736}
737
738pub fn sys_munlock(
739    _locked: &mut Locked<Unlocked>,
740    current_task: &CurrentTask,
741    addr: UserAddress,
742    length: usize,
743) -> Result<(), Errno> {
744    current_task.mm()?.munlock(current_task, addr, length)
745}
746
747pub fn sys_mlockall(
748    _locked: &mut Locked<Unlocked>,
749    _current_task: &CurrentTask,
750    _flags: u64,
751) -> Result<(), Errno> {
752    track_stub!(TODO("https://fxbug.dev/297292097"), "mlockall()");
753    error!(ENOSYS)
754}
755
756pub fn sys_munlockall(
757    _locked: &mut Locked<Unlocked>,
758    _current_task: &CurrentTask,
759    _flags: u64,
760) -> Result<(), Errno> {
761    track_stub!(TODO("https://fxbug.dev/297292097"), "munlockall()");
762    error!(ENOSYS)
763}
764
765pub fn sys_mincore(
766    _locked: &mut Locked<Unlocked>,
767    _current_task: &CurrentTask,
768    _addr: UserAddress,
769    _length: usize,
770    _out: UserRef<u8>,
771) -> Result<(), Errno> {
772    track_stub!(TODO("https://fxbug.dev/297372240"), "mincore()");
773    error!(ENOSYS)
774}
775
776// Syscalls for arch32 usage
777#[cfg(target_arch = "aarch64")]
778mod arch32 {
779    use crate::mm::PAGE_SIZE;
780    use crate::mm::memory_accessor::MemoryAccessorExt;
781    use crate::mm::syscalls::{UserAddress, sys_mmap};
782    use crate::task::{CurrentTask, RobustListHeadPtr};
783    use crate::vfs::FdNumber;
784    use starnix_sync::{Locked, Unlocked};
785    use starnix_uapi::auth::PTRACE_MODE_READ_REALCREDS;
786    use starnix_uapi::errors::Errno;
787    use starnix_uapi::user_address::UserRef;
788    use starnix_uapi::{error, uapi};
789
790    pub fn sys_arch32_set_robust_list(
791        _locked: &mut Locked<Unlocked>,
792        current_task: &CurrentTask,
793        user_head: UserRef<uapi::arch32::robust_list_head>,
794        len: usize,
795    ) -> Result<(), Errno> {
796        if len != std::mem::size_of::<uapi::arch32::robust_list_head>() {
797            return error!(EINVAL);
798        }
799        current_task.write().robust_list_head = RobustListHeadPtr::from_32(user_head);
800        Ok(())
801    }
802
803    pub fn sys_arch32_get_robust_list(
804        locked: &mut Locked<Unlocked>,
805        current_task: &CurrentTask,
806        tid: starnix_uapi::tid_t,
807        user_head_ptr: UserRef<u32>,
808        user_len_ptr: UserRef<u32>,
809    ) -> Result<(), Errno> {
810        if tid < 0 {
811            return error!(EINVAL);
812        }
813        if user_head_ptr.is_null() || user_len_ptr.is_null() {
814            return error!(EFAULT);
815        }
816        let task = if tid == 0 {
817            current_task.task.clone()
818        } else {
819            let task = current_task.get_task(tid)?;
820            current_task.check_ptrace_access_mode(locked, PTRACE_MODE_READ_REALCREDS, &task)?;
821            task
822        };
823
824        let addr = task.read().robust_list_head.addr().ptr() as u32;
825        current_task.write_object(user_head_ptr, &addr)?;
826        current_task.write_object(
827            user_len_ptr,
828            &(std::mem::size_of::<uapi::arch32::robust_list_head>() as u32),
829        )?;
830        Ok(())
831    }
832
833    pub fn sys_arch32_mmap2(
834        locked: &mut Locked<Unlocked>,
835        current_task: &mut CurrentTask,
836        addr: UserAddress,
837        length: usize,
838        prot: u32,
839        flags: u32,
840        fd: FdNumber,
841        offset: u64,
842    ) -> Result<UserAddress, Errno> {
843        sys_mmap(locked, current_task, addr, length, prot, flags, fd, offset * *PAGE_SIZE)
844    }
845
846    pub fn sys_arch32_munmap(
847        _locked: &mut Locked<Unlocked>,
848        current_task: &CurrentTask,
849        addr: UserAddress,
850        length: usize,
851    ) -> Result<(), Errno> {
852        if !addr.is_lower_32bit() || length >= (1 << 32) {
853            return error!(EINVAL);
854        }
855        current_task.mm()?.unmap(addr, length)?;
856        Ok(())
857    }
858
859    pub use super::{
860        sys_futex as sys_arch32_futex, sys_madvise as sys_arch32_madvise,
861        sys_membarrier as sys_arch32_membarrier, sys_mincore as sys_arch32_mincore,
862        sys_mlock as sys_arch32_mlock, sys_mlock2 as sys_arch32_mlock2,
863        sys_mlockall as sys_arch32_mlockall, sys_mremap as sys_arch32_mremap,
864        sys_msync as sys_arch32_msync, sys_munlock as sys_arch32_munlock,
865        sys_munlockall as sys_arch32_munlockall,
866        sys_process_mrelease as sys_arch32_process_mrelease,
867        sys_process_vm_readv as sys_arch32_process_vm_readv,
868    };
869}
870
871#[cfg(target_arch = "aarch64")]
872pub use arch32::*;
873
874#[cfg(test)]
875mod tests {
876    use super::*;
877    use crate::mm::memory::MemoryObject;
878    use crate::testing::*;
879    use starnix_uapi::errors::EEXIST;
880    use starnix_uapi::file_mode::Access;
881    use starnix_uapi::{MREMAP_FIXED, MREMAP_MAYMOVE, PROT_READ};
882
883    #[::fuchsia::test]
884    async fn test_mmap_with_colliding_hint() {
885        spawn_kernel_and_run(async |locked, current_task| {
886            let page_size = *PAGE_SIZE;
887
888            let mapped_address =
889                map_memory(locked, &current_task, UserAddress::default(), page_size);
890            match do_mmap(
891                locked,
892                &current_task,
893                mapped_address,
894                page_size as usize,
895                PROT_READ,
896                MAP_PRIVATE | MAP_ANONYMOUS,
897                FdNumber::from_raw(-1),
898                0,
899            ) {
900                Ok(address) => {
901                    assert_ne!(address, mapped_address);
902                }
903                error => {
904                    panic!("mmap with colliding hint failed: {error:?}");
905                }
906            }
907        })
908        .await;
909    }
910
911    #[::fuchsia::test]
912    async fn test_mmap_with_fixed_collision() {
913        spawn_kernel_and_run(async |locked, current_task| {
914            let page_size = *PAGE_SIZE;
915
916            let mapped_address =
917                map_memory(locked, &current_task, UserAddress::default(), page_size);
918            match do_mmap(
919                locked,
920                &current_task,
921                mapped_address,
922                page_size as usize,
923                PROT_READ,
924                MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
925                FdNumber::from_raw(-1),
926                0,
927            ) {
928                Ok(address) => {
929                    assert_eq!(address, mapped_address);
930                }
931                error => {
932                    panic!("mmap with fixed collision failed: {error:?}");
933                }
934            }
935        })
936        .await;
937    }
938
939    #[::fuchsia::test]
940    async fn test_mmap_with_fixed_noreplace_collision() {
941        spawn_kernel_and_run(async |locked, current_task| {
942            let page_size = *PAGE_SIZE;
943
944            let mapped_address =
945                map_memory(locked, &current_task, UserAddress::default(), page_size);
946            match do_mmap(
947                locked,
948                &current_task,
949                mapped_address,
950                page_size as usize,
951                PROT_READ,
952                MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE,
953                FdNumber::from_raw(-1),
954                0,
955            ) {
956                Err(errno) => {
957                    assert_eq!(errno, EEXIST);
958                }
959                result => {
960                    panic!("mmap with fixed_noreplace collision failed: {result:?}");
961                }
962            }
963        })
964        .await;
965    }
966
967    /// It is ok to call munmap with an address that is a multiple of the page size, and
968    /// a non-zero length.
969    #[::fuchsia::test]
970    async fn test_munmap() {
971        spawn_kernel_and_run(async |locked, current_task| {
972            let mapped_address =
973                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
974            assert_eq!(
975                sys_munmap(locked, &current_task, mapped_address, *PAGE_SIZE as usize),
976                Ok(())
977            );
978
979            // Verify that the memory is no longer readable.
980            assert_eq!(current_task.read_memory_to_array::<5>(mapped_address), error!(EFAULT));
981        })
982        .await;
983    }
984
985    /// It is ok to call munmap on an unmapped range.
986    #[::fuchsia::test]
987    async fn test_munmap_not_mapped() {
988        spawn_kernel_and_run(async |locked, current_task| {
989            let mapped_address =
990                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
991            assert_eq!(
992                sys_munmap(locked, &current_task, mapped_address, *PAGE_SIZE as usize),
993                Ok(())
994            );
995            assert_eq!(
996                sys_munmap(locked, &current_task, mapped_address, *PAGE_SIZE as usize),
997                Ok(())
998            );
999        })
1000        .await;
1001    }
1002
1003    /// It is an error to call munmap with a length of 0.
1004    #[::fuchsia::test]
1005    async fn test_munmap_0_length() {
1006        spawn_kernel_and_run(async |locked, current_task| {
1007            let mapped_address =
1008                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
1009            assert_eq!(sys_munmap(locked, &current_task, mapped_address, 0), error!(EINVAL));
1010        })
1011        .await;
1012    }
1013
1014    /// It is an error to call munmap with an address that is not a multiple of the page size.
1015    #[::fuchsia::test]
1016    async fn test_munmap_not_aligned() {
1017        spawn_kernel_and_run(async |locked, current_task| {
1018            let mapped_address =
1019                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
1020            assert_eq!(
1021                sys_munmap(
1022                    locked,
1023                    &current_task,
1024                    (mapped_address + 1u64).unwrap(),
1025                    *PAGE_SIZE as usize
1026                ),
1027                error!(EINVAL)
1028            );
1029
1030            // Verify that the memory is still readable.
1031            assert!(current_task.read_memory_to_array::<5>(mapped_address).is_ok());
1032        })
1033        .await;
1034    }
1035
1036    /// The entire page should be unmapped, not just the range [address, address + length).
1037    #[::fuchsia::test]
1038    async fn test_munmap_unmap_partial() {
1039        spawn_kernel_and_run(async |locked, current_task| {
1040            let mapped_address =
1041                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
1042            assert_eq!(
1043                sys_munmap(locked, &current_task, mapped_address, (*PAGE_SIZE as usize) / 2),
1044                Ok(())
1045            );
1046
1047            // Verify that memory can't be read in either half of the page.
1048            assert_eq!(current_task.read_memory_to_array::<5>(mapped_address), error!(EFAULT));
1049            assert_eq!(
1050                current_task
1051                    .read_memory_to_array::<5>((mapped_address + (*PAGE_SIZE - 2)).unwrap()),
1052                error!(EFAULT)
1053            );
1054        })
1055        .await;
1056    }
1057
1058    /// All pages that intersect the munmap range should be unmapped.
1059    #[::fuchsia::test]
1060    async fn test_munmap_multiple_pages() {
1061        spawn_kernel_and_run(async |locked, current_task| {
1062            let mapped_address =
1063                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
1064            assert_eq!(
1065                sys_munmap(locked, &current_task, mapped_address, (*PAGE_SIZE as usize) + 1),
1066                Ok(())
1067            );
1068
1069            // Verify that neither page is readable.
1070            assert_eq!(current_task.read_memory_to_array::<5>(mapped_address), error!(EFAULT));
1071            assert_eq!(
1072                current_task
1073                    .read_memory_to_array::<5>((mapped_address + (*PAGE_SIZE + 1u64)).unwrap()),
1074                error!(EFAULT)
1075            );
1076        })
1077        .await;
1078    }
1079
1080    /// Only the pages that intersect the munmap range should be unmapped.
1081    #[::fuchsia::test]
1082    async fn test_munmap_one_of_many_pages() {
1083        spawn_kernel_and_run(async |locked, current_task| {
1084            let mapped_address =
1085                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
1086            assert_eq!(
1087                sys_munmap(locked, &current_task, mapped_address, (*PAGE_SIZE as usize) - 1),
1088                Ok(())
1089            );
1090
1091            // Verify that the second page is still readable.
1092            assert_eq!(current_task.read_memory_to_array::<5>(mapped_address), error!(EFAULT));
1093            assert!(
1094                current_task
1095                    .read_memory_to_array::<5>((mapped_address + (*PAGE_SIZE + 1u64)).unwrap())
1096                    .is_ok()
1097            );
1098        })
1099        .await;
1100    }
1101
1102    /// Unmap the middle page of a mapping.
1103    #[::fuchsia::test]
1104    async fn test_munmap_middle_page() {
1105        spawn_kernel_and_run(async |locked, current_task| {
1106            let mapped_address =
1107                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1108            assert_eq!(
1109                sys_munmap(
1110                    locked,
1111                    &current_task,
1112                    (mapped_address + *PAGE_SIZE).unwrap(),
1113                    *PAGE_SIZE as usize
1114                ),
1115                Ok(())
1116            );
1117
1118            // Verify that the first and third pages are still readable.
1119            assert!(current_task.read_memory_to_vec(mapped_address, 5).is_ok());
1120            assert_eq!(
1121                current_task.read_memory_to_vec((mapped_address + *PAGE_SIZE).unwrap(), 5),
1122                error!(EFAULT)
1123            );
1124            assert!(
1125                current_task
1126                    .read_memory_to_vec((mapped_address + (*PAGE_SIZE * 2)).unwrap(), 5)
1127                    .is_ok()
1128            );
1129        })
1130        .await;
1131    }
1132
1133    /// Unmap a range of pages that includes disjoint mappings.
1134    #[::fuchsia::test]
1135    async fn test_munmap_many_mappings() {
1136        spawn_kernel_and_run(async |locked, current_task| {
1137            let mapped_addresses: Vec<_> = std::iter::repeat_with(|| {
1138                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE)
1139            })
1140            .take(3)
1141            .collect();
1142            let min_address = *mapped_addresses.iter().min().unwrap();
1143            let max_address = *mapped_addresses.iter().max().unwrap();
1144            let unmap_length = (max_address - min_address) + *PAGE_SIZE as usize;
1145
1146            assert_eq!(sys_munmap(locked, &current_task, min_address, unmap_length), Ok(()));
1147
1148            // Verify that none of the mapped pages are readable.
1149            for mapped_address in mapped_addresses {
1150                assert_eq!(current_task.read_memory_to_vec(mapped_address, 5), error!(EFAULT));
1151            }
1152        })
1153        .await;
1154    }
1155
1156    #[::fuchsia::test]
1157    async fn test_msync_validates_address_range() {
1158        spawn_kernel_and_run(async |locked, current_task| {
1159            // Map 3 pages and test that ranges covering these pages return no error.
1160            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1161            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 3, 0), Ok(()));
1162            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 2, 0), Ok(()));
1163            assert_eq!(
1164                sys_msync(
1165                    locked,
1166                    &current_task,
1167                    (addr + *PAGE_SIZE).unwrap(),
1168                    *PAGE_SIZE as usize * 2,
1169                    0
1170                ),
1171                Ok(())
1172            );
1173
1174            // Unmap the middle page and test that ranges covering that page return ENOMEM.
1175            sys_munmap(locked, &current_task, (addr + *PAGE_SIZE).unwrap(), *PAGE_SIZE as usize)
1176                .expect("unmap middle");
1177            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize, 0), Ok(()));
1178            assert_eq!(
1179                sys_msync(
1180                    locked,
1181                    &current_task,
1182                    addr,
1183                    *PAGE_SIZE as usize * 3,
1184                    starnix_uapi::MS_SYNC
1185                ),
1186                error!(ENOMEM)
1187            );
1188            assert_eq!(
1189                sys_msync(
1190                    locked,
1191                    &current_task,
1192                    addr,
1193                    *PAGE_SIZE as usize * 2,
1194                    starnix_uapi::MS_SYNC
1195                ),
1196                error!(ENOMEM)
1197            );
1198            assert_eq!(
1199                sys_msync(
1200                    locked,
1201                    &current_task,
1202                    (addr + *PAGE_SIZE).unwrap(),
1203                    *PAGE_SIZE as usize * 2,
1204                    starnix_uapi::MS_SYNC
1205                ),
1206                error!(ENOMEM)
1207            );
1208            assert_eq!(
1209                sys_msync(
1210                    locked,
1211                    &current_task,
1212                    (addr + (*PAGE_SIZE * 2)).unwrap(),
1213                    *PAGE_SIZE as usize,
1214                    0
1215                ),
1216                Ok(())
1217            );
1218
1219            // Map the middle page back and test that ranges covering the three pages
1220            // (spanning multiple ranges) return no error.
1221            assert_eq!(
1222                map_memory(locked, &current_task, (addr + *PAGE_SIZE).unwrap(), *PAGE_SIZE),
1223                (addr + *PAGE_SIZE).unwrap()
1224            );
1225            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 3, 0), Ok(()));
1226            assert_eq!(sys_msync(locked, &current_task, addr, *PAGE_SIZE as usize * 2, 0), Ok(()));
1227            assert_eq!(
1228                sys_msync(
1229                    locked,
1230                    &current_task,
1231                    (addr + *PAGE_SIZE).unwrap(),
1232                    *PAGE_SIZE as usize * 2,
1233                    0
1234                ),
1235                Ok(())
1236            );
1237        })
1238        .await;
1239    }
1240
1241    /// Shrinks an entire range.
1242    #[::fuchsia::test]
1243    async fn test_mremap_shrink_whole_range_from_end() {
1244        spawn_kernel_and_run(async |locked, current_task| {
1245            // Map 2 pages.
1246            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
1247            fill_page(&current_task, addr, 'a');
1248            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1249
1250            // Shrink the mapping from 2 to 1 pages.
1251            assert_eq!(
1252                remap_memory(
1253                    locked,
1254                    &current_task,
1255                    addr,
1256                    *PAGE_SIZE * 2,
1257                    *PAGE_SIZE,
1258                    0,
1259                    UserAddress::default()
1260                ),
1261                Ok(addr)
1262            );
1263
1264            check_page_eq(&current_task, addr, 'a');
1265            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1266        })
1267        .await;
1268    }
1269
1270    /// Shrinks part of a range, introducing a hole in the middle.
1271    #[::fuchsia::test]
1272    async fn test_mremap_shrink_partial_range() {
1273        spawn_kernel_and_run(async |locked, current_task| {
1274            // Map 3 pages.
1275            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1276            fill_page(&current_task, addr, 'a');
1277            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1278            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1279
1280            // Shrink the first 2 pages down to 1, creating a hole.
1281            assert_eq!(
1282                remap_memory(
1283                    locked,
1284                    &current_task,
1285                    addr,
1286                    *PAGE_SIZE * 2,
1287                    *PAGE_SIZE,
1288                    0,
1289                    UserAddress::default()
1290                ),
1291                Ok(addr)
1292            );
1293
1294            check_page_eq(&current_task, addr, 'a');
1295            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1296            check_page_eq(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1297        })
1298        .await;
1299    }
1300
1301    /// Shrinking doesn't care if the range specified spans multiple mappings.
1302    #[::fuchsia::test]
1303    async fn test_mremap_shrink_across_ranges() {
1304        spawn_kernel_and_run(async |locked, current_task| {
1305            // Map 3 pages, unmap the middle, then map the middle again. This will leave us with
1306            // 3 contiguous mappings.
1307            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1308            assert_eq!(
1309                sys_munmap(
1310                    locked,
1311                    &current_task,
1312                    (addr + *PAGE_SIZE).unwrap(),
1313                    *PAGE_SIZE as usize
1314                ),
1315                Ok(())
1316            );
1317            assert_eq!(
1318                map_memory(locked, &current_task, (addr + *PAGE_SIZE).unwrap(), *PAGE_SIZE),
1319                (addr + *PAGE_SIZE).unwrap()
1320            );
1321
1322            fill_page(&current_task, addr, 'a');
1323            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1324            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1325
1326            // Remap over all three mappings, shrinking to 1 page.
1327            assert_eq!(
1328                remap_memory(
1329                    locked,
1330                    &current_task,
1331                    addr,
1332                    *PAGE_SIZE * 3,
1333                    *PAGE_SIZE,
1334                    0,
1335                    UserAddress::default()
1336                ),
1337                Ok(addr)
1338            );
1339
1340            check_page_eq(&current_task, addr, 'a');
1341            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1342            check_unmapped(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap());
1343        })
1344        .await;
1345    }
1346
1347    /// Grows a mapping in-place.
1348    #[::fuchsia::test]
1349    async fn test_mremap_grow_in_place() {
1350        spawn_kernel_and_run(async |locked, current_task| {
1351            // Map 3 pages, unmap the middle, leaving a hole.
1352            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1353            fill_page(&current_task, addr, 'a');
1354            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1355            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1356            assert_eq!(
1357                sys_munmap(
1358                    locked,
1359                    &current_task,
1360                    (addr + *PAGE_SIZE).unwrap(),
1361                    *PAGE_SIZE as usize
1362                ),
1363                Ok(())
1364            );
1365
1366            // Grow the first page in-place into the middle.
1367            assert_eq!(
1368                remap_memory(
1369                    locked,
1370                    &current_task,
1371                    addr,
1372                    *PAGE_SIZE,
1373                    *PAGE_SIZE * 2,
1374                    0,
1375                    UserAddress::default()
1376                ),
1377                Ok(addr)
1378            );
1379
1380            check_page_eq(&current_task, addr, 'a');
1381
1382            // The middle page should be new, and not just pointing to the original middle page filled
1383            // with 'b'.
1384            check_page_ne(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1385
1386            check_page_eq(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1387        })
1388        .await;
1389    }
1390
1391    /// Tries to grow a set of pages that cannot fit, and forces a move.
1392    #[::fuchsia::test]
1393    async fn test_mremap_grow_maymove() {
1394        spawn_kernel_and_run(async |locked, current_task| {
1395            // Map 3 pages.
1396            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1397            fill_page(&current_task, addr, 'a');
1398            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1399            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1400
1401            // Grow the first two pages by 1, forcing a move.
1402            let new_addr = remap_memory(
1403                locked,
1404                &current_task,
1405                addr,
1406                *PAGE_SIZE * 2,
1407                *PAGE_SIZE * 3,
1408                MREMAP_MAYMOVE,
1409                UserAddress::default(),
1410            )
1411            .expect("failed to mremap");
1412
1413            assert_ne!(new_addr, addr, "mremap did not move the mapping");
1414
1415            // The first two pages should have been moved.
1416            check_unmapped(&current_task, addr);
1417            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1418
1419            // The third page should still be present.
1420            check_page_eq(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1421
1422            // The moved pages should have the same contents.
1423            check_page_eq(&current_task, new_addr, 'a');
1424            check_page_eq(&current_task, (new_addr + *PAGE_SIZE).unwrap(), 'b');
1425
1426            // The newly grown page should not be the same as the original third page.
1427            check_page_ne(&current_task, (new_addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1428        })
1429        .await;
1430    }
1431
1432    /// Shrinks a set of pages and move them to a fixed location.
1433    #[::fuchsia::test]
1434    async fn test_mremap_shrink_fixed() {
1435        spawn_kernel_and_run(async |locked, current_task| {
1436            // Map 2 pages which will act as the destination.
1437            let dst_addr =
1438                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
1439            fill_page(&current_task, dst_addr, 'y');
1440            fill_page(&current_task, (dst_addr + *PAGE_SIZE).unwrap(), 'z');
1441
1442            // Map 3 pages.
1443            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1444            fill_page(&current_task, addr, 'a');
1445            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1446            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1447
1448            // Shrink the first two pages and move them to overwrite the mappings at `dst_addr`.
1449            let new_addr = remap_memory(
1450                locked,
1451                &current_task,
1452                addr,
1453                *PAGE_SIZE * 2,
1454                *PAGE_SIZE,
1455                MREMAP_MAYMOVE | MREMAP_FIXED,
1456                dst_addr,
1457            )
1458            .expect("failed to mremap");
1459
1460            assert_eq!(new_addr, dst_addr, "mremap did not move the mapping");
1461
1462            // The first two pages should have been moved.
1463            check_unmapped(&current_task, addr);
1464            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1465
1466            // The third page should still be present.
1467            check_page_eq(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1468
1469            // The first moved page should have the same contents.
1470            check_page_eq(&current_task, new_addr, 'a');
1471
1472            // The second page should be part of the original dst mapping.
1473            check_page_eq(&current_task, (new_addr + *PAGE_SIZE).unwrap(), 'z');
1474        })
1475        .await;
1476    }
1477
1478    /// Clobbers the middle of an existing mapping with mremap to a fixed location.
1479    #[::fuchsia::test]
1480    async fn test_mremap_clobber_memory_mapping() {
1481        spawn_kernel_and_run(async |locked, current_task| {
1482            let dst_memory = MemoryObject::from(zx::Vmo::create(2 * *PAGE_SIZE).unwrap());
1483            dst_memory.write(&['x' as u8].repeat(*PAGE_SIZE as usize), 0).unwrap();
1484            dst_memory.write(&['y' as u8].repeat(*PAGE_SIZE as usize), *PAGE_SIZE).unwrap();
1485
1486            let dst_addr = current_task
1487                .mm()
1488                .unwrap()
1489                .map_memory(
1490                    DesiredAddress::Any,
1491                    dst_memory.into(),
1492                    0,
1493                    2 * (*PAGE_SIZE as usize),
1494                    ProtectionFlags::READ,
1495                    Access::rwx(),
1496                    MappingOptions::empty(),
1497                    MappingName::None,
1498                )
1499                .unwrap();
1500
1501            // Map 3 pages.
1502            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 3);
1503            fill_page(&current_task, addr, 'a');
1504            fill_page(&current_task, (addr + *PAGE_SIZE).unwrap(), 'b');
1505            fill_page(&current_task, (addr + (*PAGE_SIZE * 2)).unwrap(), 'c');
1506
1507            // Overwrite the second page of the mapping with the second page of the anonymous mapping.
1508            let remapped_addr = sys_mremap(
1509                locked,
1510                &*current_task,
1511                (addr + *PAGE_SIZE).unwrap(),
1512                *PAGE_SIZE as usize,
1513                *PAGE_SIZE as usize,
1514                MREMAP_FIXED | MREMAP_MAYMOVE,
1515                (dst_addr + *PAGE_SIZE).unwrap(),
1516            )
1517            .unwrap();
1518
1519            assert_eq!(remapped_addr, (dst_addr + *PAGE_SIZE).unwrap());
1520
1521            check_page_eq(&current_task, addr, 'a');
1522            check_unmapped(&current_task, (addr + *PAGE_SIZE).unwrap());
1523            check_page_eq(&current_task, (addr + (2 * *PAGE_SIZE)).unwrap(), 'c');
1524
1525            check_page_eq(&current_task, dst_addr, 'x');
1526            check_page_eq(&current_task, (dst_addr + *PAGE_SIZE).unwrap(), 'b');
1527        })
1528        .await;
1529    }
1530
1531    #[cfg(target_arch = "x86_64")]
1532    #[::fuchsia::test]
1533    async fn test_map_32_bit() {
1534        use starnix_uapi::PROT_WRITE;
1535
1536        spawn_kernel_and_run(async |locked, current_task| {
1537            let page_size = *PAGE_SIZE;
1538
1539            for _i in 0..256 {
1540                match do_mmap(
1541                    locked,
1542                    &current_task,
1543                    UserAddress::from(0),
1544                    page_size as usize,
1545                    PROT_READ | PROT_WRITE,
1546                    MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT,
1547                    FdNumber::from_raw(-1),
1548                    0,
1549                ) {
1550                    Ok(address) => {
1551                        let memory_end = address.ptr() + page_size as usize;
1552                        assert!(memory_end <= 0x80000000);
1553                    }
1554                    error => {
1555                        panic!("mmap with MAP_32BIT failed: {error:?}");
1556                    }
1557                }
1558            }
1559        })
1560        .await;
1561    }
1562}