starnix_core/mm/
memory_manager.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::barrier::{BarrierType, system_barrier};
6use crate::mm::mapping::MappingBackingMemory;
7use crate::mm::memory::MemoryObject;
8use crate::mm::memory_accessor::{MemoryAccessor, TaskMemoryAccessor};
9use crate::mm::private_anonymous_memory_manager::PrivateAnonymousMemoryManager;
10use crate::mm::{
11    FaultRegisterMode, FutexTable, InflightVmsplicedPayloads, MapInfoCache, Mapping,
12    MappingBacking, MappingFlags, MappingName, MlockPinFlavor, PrivateFutexKey, ProtectionFlags,
13    UserFault, VMEX_RESOURCE, VmsplicePayload, VmsplicePayloadSegment, read_to_array,
14};
15use crate::security;
16use crate::signals::{SignalDetail, SignalInfo};
17use crate::task::{CurrentTask, ExceptionResult, PageFaultExceptionReport, Task};
18use crate::vfs::aio::AioContext;
19use crate::vfs::pseudo::dynamic_file::{
20    DynamicFile, DynamicFileBuf, DynamicFileSource, SequenceFileSource,
21};
22use crate::vfs::{FsNodeOps, FsString, NamespaceNode};
23use anyhow::{Error, anyhow};
24use bitflags::bitflags;
25use flyweights::FlyByteStr;
26use linux_uapi::BUS_ADRERR;
27use memory_pinning::PinnedMapping;
28use range_map::RangeMap;
29use starnix_ext::map_ext::EntryExt;
30use starnix_lifecycle::DropNotifier;
31use starnix_logging::{
32    CATEGORY_STARNIX_MM, impossible_error, log_warn, trace_duration, track_stub,
33};
34use starnix_sync::{
35    LockBefore, Locked, MmDumpable, OrderedMutex, RwLock, RwLockWriteGuard, ThreadGroupLimits,
36    Unlocked, UserFaultInner,
37};
38use starnix_types::arch::ArchWidth;
39use starnix_types::futex_address::FutexAddress;
40use starnix_types::math::{round_down_to_system_page_size, round_up_to_system_page_size};
41use starnix_types::ownership::WeakRef;
42use starnix_types::user_buffer::{UserBuffer, UserBuffers};
43use starnix_uapi::auth::CAP_IPC_LOCK;
44use starnix_uapi::errors::Errno;
45use starnix_uapi::file_mode::Access;
46use starnix_uapi::range_ext::RangeExt;
47use starnix_uapi::resource_limits::Resource;
48use starnix_uapi::restricted_aspace::{
49    RESTRICTED_ASPACE_BASE, RESTRICTED_ASPACE_HIGHEST_ADDRESS, RESTRICTED_ASPACE_RANGE,
50    RESTRICTED_ASPACE_SIZE,
51};
52use starnix_uapi::signals::{SIGBUS, SIGSEGV};
53use starnix_uapi::user_address::{ArchSpecific, UserAddress};
54use starnix_uapi::{
55    MADV_COLD, MADV_COLLAPSE, MADV_DODUMP, MADV_DOFORK, MADV_DONTDUMP, MADV_DONTFORK,
56    MADV_DONTNEED, MADV_DONTNEED_LOCKED, MADV_FREE, MADV_HUGEPAGE, MADV_HWPOISON, MADV_KEEPONFORK,
57    MADV_MERGEABLE, MADV_NOHUGEPAGE, MADV_NORMAL, MADV_PAGEOUT, MADV_POPULATE_READ, MADV_RANDOM,
58    MADV_REMOVE, MADV_SEQUENTIAL, MADV_SOFT_OFFLINE, MADV_UNMERGEABLE, MADV_WILLNEED,
59    MADV_WIPEONFORK, MREMAP_DONTUNMAP, MREMAP_FIXED, MREMAP_MAYMOVE, SI_KERNEL, errno, error,
60};
61use std::collections::HashMap;
62use std::mem::MaybeUninit;
63use std::ops::{Deref, DerefMut, Range, RangeBounds};
64use std::sync::{Arc, LazyLock, Weak};
65use syncio::zxio::zxio_default_maybe_faultable_copy;
66use zerocopy::IntoBytes;
67use zx::{AsHandleRef, HandleBased, Rights, VmarInfo, VmoChildOptions};
68
69pub const ZX_VM_SPECIFIC_OVERWRITE: zx::VmarFlags =
70    zx::VmarFlags::from_bits_retain(zx::VmarFlagsExtended::SPECIFIC_OVERWRITE.bits());
71
72// We do not create shared processes in unit tests.
73pub(crate) const UNIFIED_ASPACES_ENABLED: bool = cfg!(not(test));
74
75/// Initializes the usercopy utilities.
76///
77/// It is useful to explicitly call this so that the usercopy is initialized
78/// at a known instant. For example, Starnix may want to make sure the usercopy
79/// thread created to support user copying is associated to the Starnix process
80/// and not a restricted-mode process.
81pub fn init_usercopy() {
82    // This call lazily initializes the `Usercopy` instance.
83    let _ = usercopy();
84}
85
86pub const GUARD_PAGE_COUNT_FOR_GROWSDOWN_MAPPINGS: usize = 256;
87
88#[cfg(target_arch = "x86_64")]
89const ASLR_RANDOM_BITS: usize = 27;
90
91#[cfg(target_arch = "aarch64")]
92const ASLR_RANDOM_BITS: usize = 28;
93
94#[cfg(target_arch = "riscv64")]
95const ASLR_RANDOM_BITS: usize = 18;
96
97/// Number of bits of entropy for processes running in 32 bits mode.
98const ASLR_32_RANDOM_BITS: usize = 8;
99
100// The biggest we expect stack to be; increase as needed
101// TODO(https://fxbug.dev/322874791): Once setting RLIMIT_STACK is implemented, we should use it.
102const MAX_STACK_SIZE: usize = 512 * 1024 * 1024;
103
104// Value to report temporarily as the VM RSS HWM.
105// TODO(https://fxbug.dev/396221597): Need support from the kernel to track the committed bytes high
106// water mark.
107const STUB_VM_RSS_HWM: usize = 2 * 1024 * 1024;
108
109fn usercopy() -> Option<&'static usercopy::Usercopy> {
110    static USERCOPY: LazyLock<Option<usercopy::Usercopy>> = LazyLock::new(|| {
111        // We do not create shared processes in unit tests.
112        if UNIFIED_ASPACES_ENABLED {
113            // ASUMPTION: All Starnix managed Linux processes have the same
114            // restricted mode address range.
115            Some(usercopy::Usercopy::new(RESTRICTED_ASPACE_RANGE).unwrap())
116        } else {
117            None
118        }
119    });
120
121    LazyLock::force(&USERCOPY).as_ref()
122}
123
124/// Provides an implementation for zxio's `zxio_maybe_faultable_copy` that supports
125/// catching faults.
126///
127/// See zxio's `zxio_maybe_faultable_copy` documentation for more details.
128///
129/// # Safety
130///
131/// Only one of `src`/`dest` may be an address to a buffer owned by user/restricted-mode
132/// (`ret_dest` indicates whether the user-owned buffer is `dest` when `true`).
133/// The other must be a valid Starnix/normal-mode buffer that will never cause a fault
134/// when the first `count` bytes are read/written.
135#[unsafe(no_mangle)]
136pub unsafe fn zxio_maybe_faultable_copy_impl(
137    dest: *mut u8,
138    src: *const u8,
139    count: usize,
140    ret_dest: bool,
141) -> bool {
142    if let Some(usercopy) = usercopy() {
143        #[allow(clippy::undocumented_unsafe_blocks, reason = "2024 edition migration")]
144        let ret = unsafe { usercopy.raw_hermetic_copy(dest, src, count, ret_dest) };
145        ret == count
146    } else {
147        #[allow(clippy::undocumented_unsafe_blocks, reason = "2024 edition migration")]
148        unsafe {
149            zxio_default_maybe_faultable_copy(dest, src, count, ret_dest)
150        }
151    }
152}
153
154pub static PAGE_SIZE: LazyLock<u64> = LazyLock::new(|| zx::system_get_page_size() as u64);
155
156bitflags! {
157    #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
158    pub struct MappingOptions: u16 {
159      const SHARED      = 1 << 0;
160      const ANONYMOUS   = 1 << 1;
161      const LOWER_32BIT = 1 << 2;
162      const GROWSDOWN   = 1 << 3;
163      const ELF_BINARY  = 1 << 4;
164      const DONTFORK    = 1 << 5;
165      const WIPEONFORK  = 1 << 6;
166      const DONT_SPLIT  = 1 << 7;
167      const DONT_EXPAND = 1 << 8;
168      const POPULATE    = 1 << 9;
169    }
170}
171
172bitflags! {
173    #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
174    pub struct MremapFlags: u32 {
175        const MAYMOVE = MREMAP_MAYMOVE;
176        const FIXED = MREMAP_FIXED;
177        const DONTUNMAP = MREMAP_DONTUNMAP;
178    }
179}
180
181const PROGRAM_BREAK_LIMIT: u64 = 64 * 1024 * 1024;
182
183#[derive(Debug, Clone, Eq, PartialEq)]
184struct ProgramBreak {
185    // These base address at which the data segment is mapped.
186    base: UserAddress,
187
188    // The current program break.
189    //
190    // The addresses from [base, current.round_up(*PAGE_SIZE)) are mapped into the
191    // client address space from the underlying |memory|.
192    current: UserAddress,
193}
194
195/// The policy about whether the address space can be dumped.
196#[derive(Debug, Clone, Copy, Eq, PartialEq)]
197pub enum DumpPolicy {
198    /// The address space cannot be dumped.
199    ///
200    /// Corresponds to SUID_DUMP_DISABLE.
201    Disable,
202
203    /// The address space can be dumped.
204    ///
205    /// Corresponds to SUID_DUMP_USER.
206    User,
207}
208
209// Supported types of membarriers.
210pub enum MembarrierType {
211    Memory,   // MEMBARRIER_CMD_GLOBAL, etc
212    SyncCore, // MEMBARRIER_CMD_..._SYNC_CORE
213}
214
215// Tracks the types of membarriers this address space is registered to receive.
216#[derive(Default, Clone)]
217struct MembarrierRegistrations {
218    memory: bool,
219    sync_core: bool,
220}
221
222pub struct MemoryManagerState {
223    /// The VMAR in which userspace mappings occur.
224    ///
225    /// We map userspace memory in this child VMAR so that we can destroy the
226    /// entire VMAR during exec.
227    /// For 32-bit tasks, we limit the user_vmar to correspond to the available memory.
228    ///
229    /// This field is set to `ZX_HANDLE_INVALID` when the address-space has been destroyed (e.g. on
230    /// `exec()`), allowing the value to be pro-actively checked for, or the `ZX_ERR_BAD_HANDLE`
231    /// status return from Zircon operations handled, to suit the call-site.
232    user_vmar: zx::Vmar,
233
234    /// Cached VmarInfo for user_vmar.
235    user_vmar_info: zx::VmarInfo,
236
237    /// The memory mappings currently used by this address space.
238    ///
239    /// The mappings record which object backs each address.
240    mappings: RangeMap<UserAddress, Mapping>,
241
242    /// Memory object backing private, anonymous memory allocations in this address space.
243    private_anonymous: PrivateAnonymousMemoryManager,
244
245    /// UserFaults registered with this memory manager.
246    userfaultfds: Vec<Weak<UserFault>>,
247
248    /// Shadow mappings for mlock()'d pages.
249    ///
250    /// Used for MlockPinFlavor::ShadowProcess to keep track of when we need to unmap
251    /// memory from the shadow process.
252    shadow_mappings_for_mlock: RangeMap<UserAddress, Arc<PinnedMapping>>,
253
254    forkable_state: MemoryManagerForkableState,
255}
256
257// 64k under the 4GB
258const LOWER_4GB_LIMIT: UserAddress = UserAddress::const_from(0xffff_0000);
259
260#[derive(Default, Clone)]
261pub struct MemoryManagerForkableState {
262    /// State for the brk and sbrk syscalls.
263    brk: Option<ProgramBreak>,
264
265    /// The namespace node that represents the executable associated with this task.
266    executable_node: Option<NamespaceNode>,
267
268    pub stack_size: usize,
269    pub stack_start: UserAddress,
270    pub auxv_start: UserAddress,
271    pub auxv_end: UserAddress,
272    pub argv_start: UserAddress,
273    pub argv_end: UserAddress,
274    pub environ_start: UserAddress,
275    pub environ_end: UserAddress,
276
277    /// vDSO location
278    pub vdso_base: UserAddress,
279
280    /// Randomized regions:
281    pub mmap_top: UserAddress,
282    pub stack_origin: UserAddress,
283    pub brk_origin: UserAddress,
284
285    // Membarrier registrations
286    membarrier_registrations: MembarrierRegistrations,
287}
288
289impl Deref for MemoryManagerState {
290    type Target = MemoryManagerForkableState;
291    fn deref(&self) -> &Self::Target {
292        &self.forkable_state
293    }
294}
295
296impl DerefMut for MemoryManagerState {
297    fn deref_mut(&mut self) -> &mut Self::Target {
298        &mut self.forkable_state
299    }
300}
301
302#[derive(Debug, Default)]
303struct ReleasedMappings {
304    doomed: Vec<Mapping>,
305    doomed_pins: Vec<Arc<PinnedMapping>>,
306}
307
308impl ReleasedMappings {
309    fn extend(&mut self, mappings: impl IntoIterator<Item = Mapping>) {
310        self.doomed.extend(mappings);
311    }
312
313    fn extend_pins(&mut self, mappings: impl IntoIterator<Item = Arc<PinnedMapping>>) {
314        self.doomed_pins.extend(mappings);
315    }
316
317    fn is_empty(&self) -> bool {
318        self.doomed.is_empty() && self.doomed_pins.is_empty()
319    }
320
321    #[cfg(test)]
322    fn len(&self) -> usize {
323        self.doomed.len() + self.doomed_pins.len()
324    }
325
326    fn finalize(&mut self, mm_state: RwLockWriteGuard<'_, MemoryManagerState>) {
327        // Drop the state before the unmapped mappings, since dropping a mapping may acquire a lock
328        // in `DirEntry`'s `drop`.
329        std::mem::drop(mm_state);
330        std::mem::take(&mut self.doomed);
331        std::mem::take(&mut self.doomed_pins);
332    }
333}
334
335impl Drop for ReleasedMappings {
336    fn drop(&mut self) {
337        assert!(self.is_empty(), "ReleasedMappings::finalize() must be called before drop");
338    }
339}
340
341fn map_in_vmar(
342    vmar: &zx::Vmar,
343    vmar_info: &zx::VmarInfo,
344    addr: SelectedAddress,
345    memory: &MemoryObject,
346    memory_offset: u64,
347    length: usize,
348    flags: MappingFlags,
349    populate: bool,
350) -> Result<UserAddress, Errno> {
351    let vmar_offset = addr.addr().checked_sub(vmar_info.base).ok_or_else(|| errno!(ENOMEM))?;
352    let vmar_extra_flags = match addr {
353        SelectedAddress::Fixed(_) => zx::VmarFlags::SPECIFIC,
354        SelectedAddress::FixedOverwrite(_) => ZX_VM_SPECIFIC_OVERWRITE,
355    };
356
357    if populate {
358        let op = if flags.contains(MappingFlags::WRITE) {
359            // Requires ZX_RIGHT_WRITEABLE which we should expect when the mapping is writeable.
360            zx::VmoOp::COMMIT
361        } else {
362            // When we don't expect to have ZX_RIGHT_WRITEABLE, fall back to a VMO op that doesn't
363            // need it.
364            zx::VmoOp::PREFETCH
365        };
366        trace_duration!(CATEGORY_STARNIX_MM, "MmapCommitPages");
367        let _ = memory.op_range(op, memory_offset, length as u64);
368        // "The mmap() call doesn't fail if the mapping cannot be populated."
369    }
370
371    let vmar_maybe_map_range = if populate && !vmar_extra_flags.contains(ZX_VM_SPECIFIC_OVERWRITE) {
372        zx::VmarFlags::MAP_RANGE
373    } else {
374        zx::VmarFlags::empty()
375    };
376    let vmar_flags = flags.access_flags().to_vmar_flags()
377        | zx::VmarFlags::ALLOW_FAULTS
378        | vmar_extra_flags
379        | vmar_maybe_map_range;
380
381    let map_result = memory.map_in_vmar(vmar, vmar_offset.ptr(), memory_offset, length, vmar_flags);
382    let mapped_addr = map_result.map_err(MemoryManager::get_errno_for_map_err)?;
383
384    Ok(UserAddress::from_ptr(mapped_addr))
385}
386
387impl MemoryManagerState {
388    /// Returns occupied address ranges that intersect with the given range.
389    ///
390    /// An address range is "occupied" if (a) there is already a mapping in that range or (b) there
391    /// is a GROWSDOWN mapping <= 256 pages above that range. The 256 pages below a GROWSDOWN
392    /// mapping is the "guard region." The memory manager avoids mapping memory in the guard region
393    /// in some circumstances to preserve space for the GROWSDOWN mapping to grow down.
394    fn get_occupied_address_ranges<'a>(
395        &'a self,
396        subrange: &'a Range<UserAddress>,
397    ) -> impl Iterator<Item = Range<UserAddress>> + 'a {
398        let query_range = subrange.start
399            ..(subrange
400                .end
401                .saturating_add(*PAGE_SIZE as usize * GUARD_PAGE_COUNT_FOR_GROWSDOWN_MAPPINGS));
402        self.mappings.range(query_range).filter_map(|(range, mapping)| {
403            let occupied_range = mapping.inflate_to_include_guard_pages(range);
404            if occupied_range.start < subrange.end && subrange.start < occupied_range.end {
405                Some(occupied_range)
406            } else {
407                None
408            }
409        })
410    }
411
412    fn count_possible_placements(
413        &self,
414        length: usize,
415        subrange: &Range<UserAddress>,
416    ) -> Option<usize> {
417        let mut occupied_ranges = self.get_occupied_address_ranges(subrange);
418        let mut possible_placements = 0;
419        // If the allocation is placed at the first available address, every page that is left
420        // before the next mapping or the end of subrange is +1 potential placement.
421        let mut first_fill_end = subrange.start.checked_add(length)?;
422        while first_fill_end <= subrange.end {
423            let Some(mapping) = occupied_ranges.next() else {
424                possible_placements += (subrange.end - first_fill_end) / (*PAGE_SIZE as usize) + 1;
425                break;
426            };
427            if mapping.start >= first_fill_end {
428                possible_placements += (mapping.start - first_fill_end) / (*PAGE_SIZE as usize) + 1;
429            }
430            first_fill_end = mapping.end.checked_add(length)?;
431        }
432        Some(possible_placements)
433    }
434
435    fn pick_placement(
436        &self,
437        length: usize,
438        mut chosen_placement_idx: usize,
439        subrange: &Range<UserAddress>,
440    ) -> Option<UserAddress> {
441        let mut candidate =
442            Range { start: subrange.start, end: subrange.start.checked_add(length)? };
443        let mut occupied_ranges = self.get_occupied_address_ranges(subrange);
444        loop {
445            let Some(mapping) = occupied_ranges.next() else {
446                // No more mappings: treat the rest of the index as an offset.
447                let res =
448                    candidate.start.checked_add(chosen_placement_idx * *PAGE_SIZE as usize)?;
449                debug_assert!(res.checked_add(length)? <= subrange.end);
450                return Some(res);
451            };
452            if mapping.start < candidate.end {
453                // doesn't fit, skip
454                candidate = Range { start: mapping.end, end: mapping.end.checked_add(length)? };
455                continue;
456            }
457            let unused_space =
458                (mapping.start.ptr() - candidate.end.ptr()) / (*PAGE_SIZE as usize) + 1;
459            if unused_space > chosen_placement_idx {
460                // Chosen placement is within the range; treat the rest of the index as an offset.
461                let res =
462                    candidate.start.checked_add(chosen_placement_idx * *PAGE_SIZE as usize)?;
463                return Some(res);
464            }
465
466            // chosen address is further up, skip
467            chosen_placement_idx -= unused_space;
468            candidate = Range { start: mapping.end, end: mapping.end.checked_add(length)? };
469        }
470    }
471
472    fn find_random_unused_range(
473        &self,
474        length: usize,
475        subrange: &Range<UserAddress>,
476    ) -> Option<UserAddress> {
477        let possible_placements = self.count_possible_placements(length, subrange)?;
478        if possible_placements == 0 {
479            return None;
480        }
481        let chosen_placement_idx = rand::random_range(0..possible_placements);
482        self.pick_placement(length, chosen_placement_idx, subrange)
483    }
484
485    // Find the first unused range of addresses that fits a mapping of `length` bytes, searching
486    // from `mmap_top` downwards.
487    pub fn find_next_unused_range(&self, length: usize) -> Option<UserAddress> {
488        let gap_size = length as u64;
489        let mut upper_bound = self.mmap_top;
490
491        loop {
492            let gap_end = self.mappings.find_gap_end(gap_size, &upper_bound);
493            let candidate = gap_end.checked_sub(length)?;
494
495            // Is there a next mapping? If not, the candidate is already good.
496            let Some((occupied_range, mapping)) = self.mappings.get(gap_end) else {
497                return Some(candidate);
498            };
499            let occupied_range = mapping.inflate_to_include_guard_pages(occupied_range);
500            // If it doesn't overlap, the gap is big enough to fit.
501            if occupied_range.start >= gap_end {
502                return Some(candidate);
503            }
504            // If there was a mapping in the way, use the start of that range as the upper bound.
505            upper_bound = occupied_range.start;
506        }
507    }
508
509    // Accept the hint if the range is unused and within the range available for mapping.
510    fn is_hint_acceptable(&self, hint_addr: UserAddress, length: usize) -> bool {
511        let Some(hint_end) = hint_addr.checked_add(length) else {
512            return false;
513        };
514        if !RESTRICTED_ASPACE_RANGE.contains(&hint_addr.ptr())
515            || !RESTRICTED_ASPACE_RANGE.contains(&hint_end.ptr())
516        {
517            return false;
518        };
519        self.get_occupied_address_ranges(&(hint_addr..hint_end)).next().is_none()
520    }
521
522    fn select_address(
523        &self,
524        addr: DesiredAddress,
525        length: usize,
526        flags: MappingFlags,
527    ) -> Result<SelectedAddress, Errno> {
528        let adjusted_length = round_up_to_system_page_size(length).or_else(|_| error!(ENOMEM))?;
529
530        let find_address = || -> Result<SelectedAddress, Errno> {
531            let new_addr = if flags.contains(MappingFlags::LOWER_32BIT) {
532                // MAP_32BIT specifies that the memory allocated will
533                // be within the first 2 GB of the process address space.
534                self.find_random_unused_range(
535                    adjusted_length,
536                    &(UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
537                        ..UserAddress::from_ptr(0x80000000)),
538                )
539                .ok_or_else(|| errno!(ENOMEM))?
540            } else {
541                self.find_next_unused_range(adjusted_length).ok_or_else(|| errno!(ENOMEM))?
542            };
543
544            Ok(SelectedAddress::Fixed(new_addr))
545        };
546
547        Ok(match addr {
548            DesiredAddress::Any => find_address()?,
549            DesiredAddress::Hint(hint_addr) => {
550                // Round down to page size
551                let hint_addr =
552                    UserAddress::from_ptr(hint_addr.ptr() - hint_addr.ptr() % *PAGE_SIZE as usize);
553                if self.is_hint_acceptable(hint_addr, adjusted_length) {
554                    SelectedAddress::Fixed(hint_addr)
555                } else {
556                    find_address()?
557                }
558            }
559            DesiredAddress::Fixed(addr) => SelectedAddress::Fixed(addr),
560            DesiredAddress::FixedOverwrite(addr) => SelectedAddress::FixedOverwrite(addr),
561        })
562    }
563
564    // Map the memory without updating `self.mappings`.
565    fn map_in_user_vmar(
566        &self,
567        addr: SelectedAddress,
568        memory: &MemoryObject,
569        memory_offset: u64,
570        length: usize,
571        flags: MappingFlags,
572        populate: bool,
573    ) -> Result<UserAddress, Errno> {
574        map_in_vmar(
575            &self.user_vmar,
576            &self.user_vmar_info,
577            addr,
578            memory,
579            memory_offset,
580            length,
581            flags,
582            populate,
583        )
584    }
585
586    fn validate_addr(&self, addr: DesiredAddress, length: usize) -> Result<(), Errno> {
587        if let DesiredAddress::FixedOverwrite(addr) = addr {
588            if self.check_has_unauthorized_splits(addr, length) {
589                return error!(ENOMEM);
590            }
591        }
592        Ok(())
593    }
594
595    fn map_memory(
596        &mut self,
597        mm: &Arc<MemoryManager>,
598        addr: DesiredAddress,
599        memory: Arc<MemoryObject>,
600        memory_offset: u64,
601        length: usize,
602        flags: MappingFlags,
603        max_access: Access,
604        populate: bool,
605        name: MappingName,
606        released_mappings: &mut ReleasedMappings,
607    ) -> Result<UserAddress, Errno> {
608        self.validate_addr(addr, length)?;
609
610        let selected_address = self.select_address(addr, length, flags)?;
611        let mapped_addr = self.map_in_user_vmar(
612            selected_address,
613            &memory,
614            memory_offset,
615            length,
616            flags,
617            populate,
618        )?;
619
620        #[cfg(any(test, debug_assertions))]
621        {
622            // Take the lock on directory entry while holding the one on the mm state to ensure any
623            // wrong ordering will trigger the tracing-mutex at the right call site.
624            if let MappingName::File(file) = &name {
625                let _l1 = file.name.entry.read();
626            }
627        }
628
629        let end = (mapped_addr + length)?.round_up(*PAGE_SIZE)?;
630
631        if let DesiredAddress::FixedOverwrite(addr) = addr {
632            assert_eq!(addr, mapped_addr);
633            self.update_after_unmap(mm, addr, end - addr, released_mappings)?;
634        }
635
636        let mapping = Mapping::with_name(
637            self.create_memory_backing(mapped_addr, memory, memory_offset),
638            flags,
639            max_access,
640            name,
641        );
642        released_mappings.extend(self.mappings.insert(mapped_addr..end, mapping));
643
644        Ok(mapped_addr)
645    }
646
647    fn map_private_anonymous(
648        &mut self,
649        mm: &Arc<MemoryManager>,
650        addr: DesiredAddress,
651        length: usize,
652        prot_flags: ProtectionFlags,
653        options: MappingOptions,
654        populate: bool,
655        name: MappingName,
656        released_mappings: &mut ReleasedMappings,
657    ) -> Result<UserAddress, Errno> {
658        self.validate_addr(addr, length)?;
659
660        let flags = MappingFlags::from_access_flags_and_options(prot_flags, options);
661        let selected_addr = self.select_address(addr, length, flags)?;
662        let backing_memory_offset = selected_addr.addr().ptr();
663
664        let mapped_addr = self.map_in_user_vmar(
665            selected_addr,
666            &self.private_anonymous.backing,
667            backing_memory_offset as u64,
668            length,
669            flags,
670            populate,
671        )?;
672
673        let end = (mapped_addr + length)?.round_up(*PAGE_SIZE)?;
674        if let DesiredAddress::FixedOverwrite(addr) = addr {
675            assert_eq!(addr, mapped_addr);
676            self.update_after_unmap(mm, addr, end - addr, released_mappings)?;
677        }
678
679        let mapping = Mapping::new_private_anonymous(flags, name);
680        released_mappings.extend(self.mappings.insert(mapped_addr..end, mapping));
681
682        Ok(mapped_addr)
683    }
684
685    fn map_anonymous(
686        &mut self,
687        mm: &Arc<MemoryManager>,
688        addr: DesiredAddress,
689        length: usize,
690        prot_flags: ProtectionFlags,
691        options: MappingOptions,
692        name: MappingName,
693        released_mappings: &mut ReleasedMappings,
694    ) -> Result<UserAddress, Errno> {
695        if !options.contains(MappingOptions::SHARED) {
696            return self.map_private_anonymous(
697                mm,
698                addr,
699                length,
700                prot_flags,
701                options,
702                options.contains(MappingOptions::POPULATE),
703                name,
704                released_mappings,
705            );
706        }
707        let memory = create_anonymous_mapping_memory(length as u64)?;
708        let flags = MappingFlags::from_access_flags_and_options(prot_flags, options);
709        self.map_memory(
710            mm,
711            addr,
712            memory,
713            0,
714            length,
715            flags,
716            Access::rwx(),
717            options.contains(MappingOptions::POPULATE),
718            name,
719            released_mappings,
720        )
721    }
722
723    fn remap(
724        &mut self,
725        _current_task: &CurrentTask,
726        mm: &Arc<MemoryManager>,
727        old_addr: UserAddress,
728        old_length: usize,
729        new_length: usize,
730        flags: MremapFlags,
731        new_addr: UserAddress,
732        released_mappings: &mut ReleasedMappings,
733    ) -> Result<UserAddress, Errno> {
734        // MREMAP_FIXED moves a mapping, which requires MREMAP_MAYMOVE.
735        if flags.contains(MremapFlags::FIXED) && !flags.contains(MremapFlags::MAYMOVE) {
736            return error!(EINVAL);
737        }
738
739        // MREMAP_DONTUNMAP is always a move, so it requires MREMAP_MAYMOVE.
740        // There is no resizing allowed either.
741        if flags.contains(MremapFlags::DONTUNMAP)
742            && (!flags.contains(MremapFlags::MAYMOVE) || old_length != new_length)
743        {
744            return error!(EINVAL);
745        }
746
747        // In-place copies are invalid.
748        if !flags.contains(MremapFlags::MAYMOVE) && old_length == 0 {
749            return error!(ENOMEM);
750        }
751
752        if new_length == 0 {
753            return error!(EINVAL);
754        }
755
756        // Make sure old_addr is page-aligned.
757        if !old_addr.is_aligned(*PAGE_SIZE) {
758            return error!(EINVAL);
759        }
760
761        let old_length = round_up_to_system_page_size(old_length)?;
762        let new_length = round_up_to_system_page_size(new_length)?;
763
764        if self.check_has_unauthorized_splits(old_addr, old_length) {
765            return error!(EINVAL);
766        }
767
768        if self.check_has_unauthorized_splits(new_addr, new_length) {
769            return error!(EINVAL);
770        }
771
772        if !flags.contains(MremapFlags::DONTUNMAP)
773            && !flags.contains(MremapFlags::FIXED)
774            && old_length != 0
775        {
776            // We are not requested to remap to a specific address, so first we see if we can remap
777            // in-place. In-place copies (old_length == 0) are not allowed.
778            if let Some(new_addr) =
779                self.try_remap_in_place(mm, old_addr, old_length, new_length, released_mappings)?
780            {
781                return Ok(new_addr);
782            }
783        }
784
785        // There is no space to grow in place, or there is an explicit request to move.
786        if flags.contains(MremapFlags::MAYMOVE) {
787            let dst_address =
788                if flags.contains(MremapFlags::FIXED) { Some(new_addr) } else { None };
789            self.remap_move(
790                mm,
791                old_addr,
792                old_length,
793                dst_address,
794                new_length,
795                flags.contains(MremapFlags::DONTUNMAP),
796                released_mappings,
797            )
798        } else {
799            error!(ENOMEM)
800        }
801    }
802
803    /// Attempts to grow or shrink the mapping in-place. Returns `Ok(Some(addr))` if the remap was
804    /// successful. Returns `Ok(None)` if there was no space to grow.
805    fn try_remap_in_place(
806        &mut self,
807        mm: &Arc<MemoryManager>,
808        old_addr: UserAddress,
809        old_length: usize,
810        new_length: usize,
811        released_mappings: &mut ReleasedMappings,
812    ) -> Result<Option<UserAddress>, Errno> {
813        let old_range = old_addr..old_addr.checked_add(old_length).ok_or_else(|| errno!(EINVAL))?;
814        let new_range_in_place =
815            old_addr..old_addr.checked_add(new_length).ok_or_else(|| errno!(EINVAL))?;
816
817        if new_length <= old_length {
818            // Shrink the mapping in-place, which should always succeed.
819            // This is done by unmapping the extraneous region.
820            if new_length != old_length {
821                self.unmap(mm, new_range_in_place.end, old_length - new_length, released_mappings)?;
822            }
823            return Ok(Some(old_addr));
824        }
825
826        if self.mappings.range(old_range.end..new_range_in_place.end).next().is_some() {
827            // There is some mapping in the growth range prevening an in-place growth.
828            return Ok(None);
829        }
830
831        // There is space to grow in-place. The old range must be one contiguous mapping.
832        let (original_range, mapping) =
833            self.mappings.get(old_addr).ok_or_else(|| errno!(EINVAL))?;
834
835        if old_range.end > original_range.end {
836            return error!(EFAULT);
837        }
838        let original_range = original_range.clone();
839        let original_mapping = mapping.clone();
840
841        // Compute the new length of the entire mapping once it has grown.
842        let final_length = (original_range.end - original_range.start) + (new_length - old_length);
843
844        match self.get_mapping_backing(&original_mapping) {
845            MappingBacking::Memory(backing) => {
846                // Re-map the original range, which may include pages before the requested range.
847                Ok(Some(self.map_memory(
848                    mm,
849                    DesiredAddress::FixedOverwrite(original_range.start),
850                    backing.memory().clone(),
851                    backing.address_to_offset(original_range.start),
852                    final_length,
853                    original_mapping.flags(),
854                    original_mapping.max_access(),
855                    false,
856                    original_mapping.name(),
857                    released_mappings,
858                )?))
859            }
860            MappingBacking::PrivateAnonymous => {
861                let growth_start = original_range.end;
862                let growth_length = new_length - old_length;
863                let final_end = (original_range.start + final_length)?;
864                // Map new pages to back the growth.
865                self.map_in_user_vmar(
866                    SelectedAddress::FixedOverwrite(growth_start),
867                    &self.private_anonymous.backing,
868                    growth_start.ptr() as u64,
869                    growth_length,
870                    original_mapping.flags(),
871                    false,
872                )?;
873                // Overwrite the mapping entry with the new larger size.
874                released_mappings.extend(
875                    self.mappings.insert(original_range.start..final_end, original_mapping.clone()),
876                );
877                Ok(Some(original_range.start))
878            }
879        }
880    }
881
882    /// Grows or shrinks the mapping while moving it to a new destination.
883    fn remap_move(
884        &mut self,
885        mm: &Arc<MemoryManager>,
886        src_addr: UserAddress,
887        src_length: usize,
888        dst_addr: Option<UserAddress>,
889        dst_length: usize,
890        keep_source: bool,
891        released_mappings: &mut ReleasedMappings,
892    ) -> Result<UserAddress, Errno> {
893        let src_range = src_addr..src_addr.checked_add(src_length).ok_or_else(|| errno!(EINVAL))?;
894        let (original_range, src_mapping) =
895            self.mappings.get(src_addr).ok_or_else(|| errno!(EINVAL))?;
896        let original_range = original_range.clone();
897        let src_mapping = src_mapping.clone();
898
899        if src_length == 0 && !src_mapping.flags().contains(MappingFlags::SHARED) {
900            // src_length == 0 means that the mapping is to be copied. This behavior is only valid
901            // with MAP_SHARED mappings.
902            return error!(EINVAL);
903        }
904
905        // If the destination range is smaller than the source range, we must first shrink
906        // the source range in place. This must be done now and visible to processes, even if
907        // a later failure causes the remap operation to fail.
908        if src_length != 0 && src_length > dst_length {
909            self.unmap(mm, (src_addr + dst_length)?, src_length - dst_length, released_mappings)?;
910        }
911
912        let dst_addr_for_map = match dst_addr {
913            None => DesiredAddress::Any,
914            Some(dst_addr) => {
915                // The mapping is being moved to a specific address.
916                let dst_range =
917                    dst_addr..(dst_addr.checked_add(dst_length).ok_or_else(|| errno!(EINVAL))?);
918                if !src_range.intersect(&dst_range).is_empty() {
919                    return error!(EINVAL);
920                }
921
922                // The destination range must be unmapped. This must be done now and visible to
923                // processes, even if a later failure causes the remap operation to fail.
924                self.unmap(mm, dst_addr, dst_length, released_mappings)?;
925
926                DesiredAddress::Fixed(dst_addr)
927            }
928        };
929
930        // According to gVisor's aio_test, Linux checks for DONT_EXPAND after unmapping the dst
931        // range.
932        if dst_length > src_length && src_mapping.flags().contains(MappingFlags::DONT_EXPAND) {
933            return error!(EFAULT);
934        }
935
936        if src_range.end > original_range.end {
937            // The source range is not one contiguous mapping. This check must be done only after
938            // the source range is shrunk and the destination unmapped.
939            return error!(EFAULT);
940        }
941
942        match self.get_mapping_backing(&src_mapping) {
943            MappingBacking::PrivateAnonymous => {
944                let dst_addr =
945                    self.select_address(dst_addr_for_map, dst_length, src_mapping.flags())?.addr();
946                let dst_end = (dst_addr + dst_length)?;
947
948                let length_to_move = std::cmp::min(dst_length, src_length) as u64;
949                let growth_start_addr = (dst_addr + length_to_move)?;
950
951                if dst_addr != src_addr {
952                    let src_move_end = (src_range.start + length_to_move)?;
953                    let range_to_move = src_range.start..src_move_end;
954                    // Move the previously mapped pages into their new location.
955                    self.private_anonymous.move_pages(&range_to_move, dst_addr)?;
956                }
957
958                self.map_in_user_vmar(
959                    SelectedAddress::FixedOverwrite(dst_addr),
960                    &self.private_anonymous.backing,
961                    dst_addr.ptr() as u64,
962                    dst_length,
963                    src_mapping.flags(),
964                    false,
965                )?;
966
967                if dst_length > src_length {
968                    // The mapping has grown, map new pages in to cover the growth.
969                    let growth_length = dst_length - src_length;
970
971                    self.map_private_anonymous(
972                        mm,
973                        DesiredAddress::FixedOverwrite(growth_start_addr),
974                        growth_length,
975                        src_mapping.flags().access_flags(),
976                        src_mapping.flags().options(),
977                        false,
978                        src_mapping.name(),
979                        released_mappings,
980                    )?;
981                }
982
983                released_mappings.extend(self.mappings.insert(
984                    dst_addr..dst_end,
985                    Mapping::new_private_anonymous(src_mapping.flags(), src_mapping.name()),
986                ));
987
988                if dst_addr != src_addr && src_length != 0 && !keep_source {
989                    self.unmap(mm, src_addr, src_length, released_mappings)?;
990                }
991
992                return Ok(dst_addr);
993            }
994            MappingBacking::Memory(backing) => {
995                // This mapping is backed by an FD or is a shared anonymous mapping. Just map the
996                // range of the memory object covering the moved pages. If the memory object already
997                // had COW semantics, this preserves them.
998                let (dst_memory_offset, memory) =
999                    (backing.address_to_offset(src_addr), backing.memory().clone());
1000
1001                let new_address = self.map_memory(
1002                    mm,
1003                    dst_addr_for_map,
1004                    memory,
1005                    dst_memory_offset,
1006                    dst_length,
1007                    src_mapping.flags(),
1008                    src_mapping.max_access(),
1009                    false,
1010                    src_mapping.name(),
1011                    released_mappings,
1012                )?;
1013
1014                if src_length != 0 && !keep_source {
1015                    // Only unmap the source range if this is not a copy and if there was not a specific
1016                    // request to not unmap. It was checked earlier that in case of src_length == 0
1017                    // this mapping is MAP_SHARED.
1018                    self.unmap(mm, src_addr, src_length, released_mappings)?;
1019                }
1020
1021                return Ok(new_address);
1022            }
1023        };
1024    }
1025
1026    // Checks if an operation may be performed over the target mapping that may
1027    // result in a split mapping.
1028    //
1029    // An operation may be forbidden if the target mapping only partially covers
1030    // an existing mapping with the `MappingOptions::DONT_SPLIT` flag set.
1031    fn check_has_unauthorized_splits(&self, addr: UserAddress, length: usize) -> bool {
1032        let query_range = addr..addr.saturating_add(length);
1033        let mut intersection = self.mappings.range(query_range.clone());
1034
1035        // A mapping is not OK if it disallows splitting and the target range
1036        // does not fully cover the mapping range.
1037        let check_if_mapping_has_unauthorized_split =
1038            |mapping: Option<(&Range<UserAddress>, &Mapping)>| {
1039                mapping.is_some_and(|(mapping_range, mapping)| {
1040                    mapping.flags().contains(MappingFlags::DONT_SPLIT)
1041                        && (mapping_range.start < query_range.start
1042                            || query_range.end < mapping_range.end)
1043                })
1044            };
1045
1046        // We only check the first and last mappings in the range because naturally,
1047        // the mappings in the middle are fully covered by the target mapping and
1048        // won't be split.
1049        check_if_mapping_has_unauthorized_split(intersection.next())
1050            || check_if_mapping_has_unauthorized_split(intersection.next_back())
1051    }
1052
1053    /// Unmaps the specified range. Unmapped mappings are placed in `released_mappings`.
1054    fn unmap(
1055        &mut self,
1056        mm: &Arc<MemoryManager>,
1057        addr: UserAddress,
1058        length: usize,
1059        released_mappings: &mut ReleasedMappings,
1060    ) -> Result<(), Errno> {
1061        if !addr.is_aligned(*PAGE_SIZE) {
1062            return error!(EINVAL);
1063        }
1064        let length = round_up_to_system_page_size(length)?;
1065        if length == 0 {
1066            return error!(EINVAL);
1067        }
1068
1069        if self.check_has_unauthorized_splits(addr, length) {
1070            return error!(EINVAL);
1071        }
1072
1073        // Unmap the range, including the the tail of any range that would have been split. This
1074        // operation is safe because we're operating on another process.
1075        #[allow(
1076            clippy::undocumented_unsafe_blocks,
1077            reason = "Force documented unsafe blocks in Starnix"
1078        )]
1079        match unsafe { self.user_vmar.unmap(addr.ptr(), length) } {
1080            Ok(_) => (),
1081            Err(zx::Status::NOT_FOUND) => (),
1082            Err(zx::Status::INVALID_ARGS) => return error!(EINVAL),
1083            Err(status) => {
1084                impossible_error(status);
1085            }
1086        };
1087
1088        self.update_after_unmap(mm, addr, length, released_mappings)?;
1089
1090        Ok(())
1091    }
1092
1093    // Updates `self.mappings` after the specified range was unmaped.
1094    //
1095    // The range to unmap can span multiple mappings, and can split mappings if
1096    // the range start or end falls in the middle of a mapping.
1097    //
1098    // Private anonymous memory is contained in the same memory object; The pages of that object
1099    // that are no longer reachable should be released.
1100    //
1101    // File-backed mappings don't need to have their memory object modified.
1102    //
1103    // Unmapped mappings are placed in `released_mappings`.
1104    fn update_after_unmap(
1105        &mut self,
1106        mm: &Arc<MemoryManager>,
1107        addr: UserAddress,
1108        length: usize,
1109        released_mappings: &mut ReleasedMappings,
1110    ) -> Result<(), Errno> {
1111        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
1112        let unmap_range = addr..end_addr;
1113
1114        // Remove any shadow mappings for mlock()'d pages that are now unmapped.
1115        released_mappings.extend_pins(self.shadow_mappings_for_mlock.remove(unmap_range.clone()));
1116
1117        for (range, mapping) in self.mappings.range(unmap_range.clone()) {
1118            // Deallocate any pages in the private, anonymous backing that are now unreachable.
1119            if let MappingBacking::PrivateAnonymous = self.get_mapping_backing(mapping) {
1120                let unmapped_range = &unmap_range.intersect(range);
1121
1122                mm.inflight_vmspliced_payloads
1123                    .handle_unmapping(&self.private_anonymous.backing, unmapped_range)?;
1124
1125                self.private_anonymous
1126                    .zero(unmapped_range.start, unmapped_range.end - unmapped_range.start)?;
1127            }
1128        }
1129        released_mappings.extend(self.mappings.remove(unmap_range));
1130        return Ok(());
1131    }
1132
1133    fn protect_vmar_range(
1134        &self,
1135        addr: UserAddress,
1136        length: usize,
1137        prot_flags: ProtectionFlags,
1138    ) -> Result<(), Errno> {
1139        let vmar_flags = prot_flags.to_vmar_flags();
1140        // SAFETY: Modifying user vmar
1141        unsafe { self.user_vmar.protect(addr.ptr(), length, vmar_flags) }.map_err(|s| match s {
1142            zx::Status::INVALID_ARGS => errno!(EINVAL),
1143            zx::Status::NOT_FOUND => errno!(ENOMEM),
1144            zx::Status::ACCESS_DENIED => errno!(EACCES),
1145            _ => impossible_error(s),
1146        })
1147    }
1148
1149    fn protect(
1150        &mut self,
1151        current_task: &CurrentTask,
1152        addr: UserAddress,
1153        length: usize,
1154        prot_flags: ProtectionFlags,
1155        released_mappings: &mut ReleasedMappings,
1156    ) -> Result<(), Errno> {
1157        let vmar_flags = prot_flags.to_vmar_flags();
1158        let page_size = *PAGE_SIZE;
1159        let end = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?.round_up(page_size)?;
1160
1161        if self.check_has_unauthorized_splits(addr, length) {
1162            return error!(EINVAL);
1163        }
1164
1165        let prot_range = if prot_flags.contains(ProtectionFlags::GROWSDOWN) {
1166            let mut start = addr;
1167            let Some((range, mapping)) = self.mappings.get(start) else {
1168                return error!(EINVAL);
1169            };
1170            // Ensure that the mapping has GROWSDOWN if PROT_GROWSDOWN was specified.
1171            if !mapping.flags().contains(MappingFlags::GROWSDOWN) {
1172                return error!(EINVAL);
1173            }
1174            let access_flags = mapping.flags().access_flags();
1175            // From <https://man7.org/linux/man-pages/man2/mprotect.2.html>:
1176            //
1177            //   PROT_GROWSDOWN
1178            //     Apply the protection mode down to the beginning of a
1179            //     mapping that grows downward (which should be a stack
1180            //     segment or a segment mapped with the MAP_GROWSDOWN flag
1181            //     set).
1182            start = range.start;
1183            while let Some((range, mapping)) =
1184                self.mappings.get(start.saturating_sub(page_size as usize))
1185            {
1186                if !mapping.flags().contains(MappingFlags::GROWSDOWN)
1187                    || mapping.flags().access_flags() != access_flags
1188                {
1189                    break;
1190                }
1191                start = range.start;
1192            }
1193            start..end
1194        } else {
1195            addr..end
1196        };
1197
1198        let addr = prot_range.start;
1199        let length = prot_range.end - prot_range.start;
1200
1201        // TODO: We should check the max_access flags on all the mappings in this range.
1202        //       There are cases where max_access is more restrictive than the Zircon rights
1203        //       we hold on the underlying VMOs.
1204
1205        // TODO(https://fxbug.dev/411617451): `mprotect` should apply the protection flags
1206        // until it encounters a mapping that doesn't allow it, rather than not apply the protection
1207        // flags at all if a single mapping doesn't allow it.
1208        for (range, mapping) in self.mappings.range(prot_range.clone()) {
1209            security::file_mprotect(current_task, range, mapping, prot_flags)?;
1210        }
1211
1212        // Make one call to mprotect to update all the zircon protections.
1213        // SAFETY: This is safe because the vmar belongs to a different process.
1214        unsafe { self.user_vmar.protect(addr.ptr(), length, vmar_flags) }.map_err(|s| match s {
1215            zx::Status::INVALID_ARGS => errno!(EINVAL),
1216            zx::Status::NOT_FOUND => {
1217                track_stub!(
1218                    TODO("https://fxbug.dev/322875024"),
1219                    "mprotect: succeed and update prot after NOT_FOUND"
1220                );
1221                errno!(EINVAL)
1222            }
1223            zx::Status::ACCESS_DENIED => errno!(EACCES),
1224            _ => impossible_error(s),
1225        })?;
1226
1227        // Update the flags on each mapping in the range.
1228        let mut updates = vec![];
1229        for (range, mapping) in self.mappings.range(prot_range.clone()) {
1230            if mapping.flags().contains(MappingFlags::UFFD) {
1231                track_stub!(
1232                    TODO("https://fxbug.dev/297375964"),
1233                    "mprotect on uffd-registered range should not alter protections"
1234                );
1235                return error!(EINVAL);
1236            }
1237            let range = range.intersect(&prot_range);
1238            let mut mapping = mapping.clone();
1239            mapping.set_flags(mapping.flags().with_access_flags(prot_flags));
1240            updates.push((range, mapping));
1241        }
1242        // Use a separate loop to avoid mutating the mappings structure while iterating over it.
1243        for (range, mapping) in updates {
1244            released_mappings.extend(self.mappings.insert(range, mapping));
1245        }
1246        Ok(())
1247    }
1248
1249    fn madvise(
1250        &mut self,
1251        _current_task: &CurrentTask,
1252        addr: UserAddress,
1253        length: usize,
1254        advice: u32,
1255        released_mappings: &mut ReleasedMappings,
1256    ) -> Result<(), Errno> {
1257        if !addr.is_aligned(*PAGE_SIZE) {
1258            return error!(EINVAL);
1259        }
1260
1261        let end_addr =
1262            addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?.round_up(*PAGE_SIZE)?;
1263        if end_addr > self.max_address() {
1264            return error!(EFAULT);
1265        }
1266
1267        if advice == MADV_NORMAL {
1268            track_stub!(TODO("https://fxbug.dev/322874202"), "madvise undo hints for MADV_NORMAL");
1269            return Ok(());
1270        }
1271
1272        let mut updates = vec![];
1273        let range_for_op = addr..end_addr;
1274        for (range, mapping) in self.mappings.range(range_for_op.clone()) {
1275            let range_to_zero = range.intersect(&range_for_op);
1276            if range_to_zero.is_empty() {
1277                continue;
1278            }
1279            let start_offset = mapping.address_to_offset(range_to_zero.start);
1280            let end_offset = mapping.address_to_offset(range_to_zero.end);
1281            if advice == MADV_DONTFORK
1282                || advice == MADV_DOFORK
1283                || advice == MADV_WIPEONFORK
1284                || advice == MADV_KEEPONFORK
1285                || advice == MADV_DONTDUMP
1286                || advice == MADV_DODUMP
1287                || advice == MADV_MERGEABLE
1288                || advice == MADV_UNMERGEABLE
1289            {
1290                // WIPEONFORK is only supported on private anonymous mappings per madvise(2).
1291                // KEEPONFORK can be specified on ranges that cover other sorts of mappings. It should
1292                // have no effect on mappings that are not private and anonymous as such mappings cannot
1293                // have the WIPEONFORK option set.
1294                if advice == MADV_WIPEONFORK && !mapping.private_anonymous() {
1295                    return error!(EINVAL);
1296                }
1297                let new_flags = match advice {
1298                    MADV_DONTFORK => mapping.flags() | MappingFlags::DONTFORK,
1299                    MADV_DOFORK => mapping.flags() & MappingFlags::DONTFORK.complement(),
1300                    MADV_WIPEONFORK => mapping.flags() | MappingFlags::WIPEONFORK,
1301                    MADV_KEEPONFORK => mapping.flags() & MappingFlags::WIPEONFORK.complement(),
1302                    MADV_DONTDUMP => {
1303                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_DONTDUMP");
1304                        mapping.flags()
1305                    }
1306                    MADV_DODUMP => {
1307                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_DODUMP");
1308                        mapping.flags()
1309                    }
1310                    MADV_MERGEABLE => {
1311                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_MERGEABLE");
1312                        mapping.flags()
1313                    }
1314                    MADV_UNMERGEABLE => {
1315                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_UNMERGEABLE");
1316                        mapping.flags()
1317                    }
1318                    // Only the variants in this match should be reachable given the condition for
1319                    // the containing branch.
1320                    unknown_advice => unreachable!("unknown advice {unknown_advice}"),
1321                };
1322                let mut new_mapping = mapping.clone();
1323                new_mapping.set_flags(new_flags);
1324                updates.push((range_to_zero, new_mapping));
1325            } else {
1326                if mapping.flags().contains(MappingFlags::SHARED) {
1327                    continue;
1328                }
1329                let op = match advice {
1330                    MADV_DONTNEED if !mapping.flags().contains(MappingFlags::ANONYMOUS) => {
1331                        // Note, we cannot simply implemented MADV_DONTNEED with
1332                        // zx::VmoOp::DONT_NEED because they have different
1333                        // semantics.
1334                        track_stub!(
1335                            TODO("https://fxbug.dev/322874496"),
1336                            "MADV_DONTNEED with file-backed mapping"
1337                        );
1338                        return error!(EINVAL);
1339                    }
1340                    MADV_DONTNEED if mapping.flags().contains(MappingFlags::LOCKED) => {
1341                        return error!(EINVAL);
1342                    }
1343                    MADV_DONTNEED => zx::VmoOp::ZERO,
1344                    MADV_DONTNEED_LOCKED => {
1345                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_DONTNEED_LOCKED");
1346                        return error!(EINVAL);
1347                    }
1348                    MADV_WILLNEED => {
1349                        if mapping.flags().contains(MappingFlags::WRITE) {
1350                            zx::VmoOp::COMMIT
1351                        } else {
1352                            zx::VmoOp::PREFETCH
1353                        }
1354                    }
1355                    MADV_COLD => {
1356                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_COLD");
1357                        return error!(EINVAL);
1358                    }
1359                    MADV_PAGEOUT => {
1360                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_PAGEOUT");
1361                        return error!(EINVAL);
1362                    }
1363                    MADV_POPULATE_READ => {
1364                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_POPULATE_READ");
1365                        return error!(EINVAL);
1366                    }
1367                    MADV_RANDOM => {
1368                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_RANDOM");
1369                        return error!(EINVAL);
1370                    }
1371                    MADV_SEQUENTIAL => {
1372                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_SEQUENTIAL");
1373                        return error!(EINVAL);
1374                    }
1375                    MADV_FREE if !mapping.flags().contains(MappingFlags::ANONYMOUS) => {
1376                        track_stub!(
1377                            TODO("https://fxbug.dev/411748419"),
1378                            "MADV_FREE with file-backed mapping"
1379                        );
1380                        return error!(EINVAL);
1381                    }
1382                    MADV_FREE if mapping.flags().contains(MappingFlags::LOCKED) => {
1383                        return error!(EINVAL);
1384                    }
1385                    MADV_FREE => {
1386                        track_stub!(TODO("https://fxbug.dev/411748419"), "MADV_FREE");
1387                        // TODO(https://fxbug.dev/411748419) For now, treat MADV_FREE like
1388                        // MADV_DONTNEED as a stopgap until we have proper support.
1389                        zx::VmoOp::ZERO
1390                    }
1391                    MADV_REMOVE => {
1392                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_REMOVE");
1393                        return error!(EINVAL);
1394                    }
1395                    MADV_HWPOISON => {
1396                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_HWPOISON");
1397                        return error!(EINVAL);
1398                    }
1399                    MADV_SOFT_OFFLINE => {
1400                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_SOFT_OFFLINE");
1401                        return error!(EINVAL);
1402                    }
1403                    MADV_HUGEPAGE => {
1404                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_HUGEPAGE");
1405                        return error!(EINVAL);
1406                    }
1407                    MADV_COLLAPSE => {
1408                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_COLLAPSE");
1409                        return error!(EINVAL);
1410                    }
1411                    MADV_NOHUGEPAGE => return Ok(()),
1412                    advice => {
1413                        track_stub!(TODO("https://fxbug.dev/322874202"), "madvise", advice);
1414                        return error!(EINVAL);
1415                    }
1416                };
1417
1418                let memory = match self.get_mapping_backing(mapping) {
1419                    MappingBacking::Memory(backing) => backing.memory(),
1420                    MappingBacking::PrivateAnonymous => &self.private_anonymous.backing,
1421                };
1422                memory.op_range(op, start_offset, end_offset - start_offset).map_err(
1423                    |s| match s {
1424                        zx::Status::OUT_OF_RANGE => errno!(EINVAL),
1425                        zx::Status::NO_MEMORY => errno!(ENOMEM),
1426                        zx::Status::INVALID_ARGS => errno!(EINVAL),
1427                        zx::Status::ACCESS_DENIED => errno!(EACCES),
1428                        _ => impossible_error(s),
1429                    },
1430                )?;
1431            }
1432        }
1433        // Use a separate loop to avoid mutating the mappings structure while iterating over it.
1434        for (range, mapping) in updates {
1435            released_mappings.extend(self.mappings.insert(range, mapping));
1436        }
1437        Ok(())
1438    }
1439
1440    fn mlock<L>(
1441        &mut self,
1442        current_task: &CurrentTask,
1443        locked: &mut Locked<L>,
1444        desired_addr: UserAddress,
1445        desired_length: usize,
1446        on_fault: bool,
1447        released_mappings: &mut ReleasedMappings,
1448    ) -> Result<(), Errno>
1449    where
1450        L: LockBefore<ThreadGroupLimits>,
1451    {
1452        let desired_end_addr =
1453            desired_addr.checked_add(desired_length).ok_or_else(|| errno!(EINVAL))?;
1454        let start_addr = round_down_to_system_page_size(desired_addr)?;
1455        let end_addr = round_up_to_system_page_size(desired_end_addr)?;
1456
1457        let mut updates = vec![];
1458        let mut bytes_mapped_in_range = 0;
1459        let mut num_new_locked_bytes = 0;
1460        let mut failed_to_lock = false;
1461        for (range, mapping) in self.mappings.range(start_addr..end_addr) {
1462            let mut range = range.clone();
1463            let mut mapping = mapping.clone();
1464
1465            // Handle mappings that start before the region to be locked.
1466            range.start = std::cmp::max(range.start, start_addr);
1467            // Handle mappings that extend past the region to be locked.
1468            range.end = std::cmp::min(range.end, end_addr);
1469
1470            bytes_mapped_in_range += (range.end - range.start) as u64;
1471
1472            // PROT_NONE mappings generate ENOMEM but are left locked.
1473            if !mapping
1474                .flags()
1475                .intersects(MappingFlags::READ | MappingFlags::WRITE | MappingFlags::EXEC)
1476            {
1477                failed_to_lock = true;
1478            }
1479
1480            if !mapping.flags().contains(MappingFlags::LOCKED) {
1481                num_new_locked_bytes += (range.end - range.start) as u64;
1482                let shadow_mapping = match current_task.kernel().features.mlock_pin_flavor {
1483                    // Pin the memory by mapping the backing memory into the high priority vmar.
1484                    MlockPinFlavor::ShadowProcess => {
1485                        // Keep different shadow processes distinct for accounting purposes.
1486                        struct MlockShadowProcess(memory_pinning::ShadowProcess);
1487                        let shadow_process =
1488                            current_task.kernel().expando.get_or_try_init(|| {
1489                                memory_pinning::ShadowProcess::new(zx::Name::new_lossy(
1490                                    "starnix_mlock_pins",
1491                                ))
1492                                .map(MlockShadowProcess)
1493                                .map_err(|_| errno!(EPERM))
1494                            })?;
1495
1496                        let (vmo, offset) = match self.get_mapping_backing(&mapping) {
1497                            MappingBacking::Memory(m) => (
1498                                m.memory().as_vmo().ok_or_else(|| errno!(ENOMEM))?,
1499                                m.address_to_offset(range.start),
1500                            ),
1501                            MappingBacking::PrivateAnonymous => (
1502                                self.private_anonymous
1503                                    .backing
1504                                    .as_vmo()
1505                                    .ok_or_else(|| errno!(ENOMEM))?,
1506                                range.start.ptr() as u64,
1507                            ),
1508                        };
1509                        Some(shadow_process.0.pin_pages(vmo, offset, range.end - range.start)?)
1510                    }
1511
1512                    // Relying on VMAR-level operations means just flags are set per-mapping.
1513                    MlockPinFlavor::Noop | MlockPinFlavor::VmarAlwaysNeed => None,
1514                };
1515                mapping.set_mlock();
1516                updates.push((range, mapping, shadow_mapping));
1517            }
1518        }
1519
1520        if bytes_mapped_in_range as usize != end_addr - start_addr {
1521            return error!(ENOMEM);
1522        }
1523
1524        let memlock_rlimit = current_task.thread_group().get_rlimit(locked, Resource::MEMLOCK);
1525        if self.total_locked_bytes() + num_new_locked_bytes > memlock_rlimit {
1526            if crate::security::check_task_capable(current_task, CAP_IPC_LOCK).is_err() {
1527                let code = if memlock_rlimit > 0 { errno!(ENOMEM) } else { errno!(EPERM) };
1528                return Err(code);
1529            }
1530        }
1531
1532        let op_range_status_to_errno = |e| match e {
1533            zx::Status::BAD_STATE | zx::Status::NOT_SUPPORTED => errno!(ENOMEM),
1534            zx::Status::INVALID_ARGS | zx::Status::OUT_OF_RANGE => errno!(EINVAL),
1535            zx::Status::ACCESS_DENIED => {
1536                unreachable!("user vmar should always have needed rights")
1537            }
1538            zx::Status::BAD_HANDLE => {
1539                unreachable!("user vmar should always be a valid handle")
1540            }
1541            zx::Status::WRONG_TYPE => unreachable!("user vmar handle should be a vmar"),
1542            _ => unreachable!("unknown error from op_range on user vmar for mlock: {e}"),
1543        };
1544
1545        if !on_fault && !current_task.kernel().features.mlock_always_onfault {
1546            self.user_vmar
1547                .op_range(zx::VmarOp::PREFETCH, start_addr.ptr(), end_addr - start_addr)
1548                .map_err(op_range_status_to_errno)?;
1549        }
1550
1551        match current_task.kernel().features.mlock_pin_flavor {
1552            MlockPinFlavor::VmarAlwaysNeed => {
1553                self.user_vmar
1554                    .op_range(zx::VmarOp::ALWAYS_NEED, start_addr.ptr(), end_addr - start_addr)
1555                    .map_err(op_range_status_to_errno)?;
1556            }
1557            // The shadow process doesn't use any vmar-level operations to pin memory.
1558            MlockPinFlavor::Noop | MlockPinFlavor::ShadowProcess => (),
1559        }
1560
1561        for (range, mapping, shadow_mapping) in updates {
1562            if let Some(shadow_mapping) = shadow_mapping {
1563                released_mappings.extend_pins(
1564                    self.shadow_mappings_for_mlock.insert(range.clone(), shadow_mapping),
1565                );
1566            }
1567            released_mappings.extend(self.mappings.insert(range, mapping));
1568        }
1569
1570        if failed_to_lock { error!(ENOMEM) } else { Ok(()) }
1571    }
1572
1573    fn munlock(
1574        &mut self,
1575        _current_task: &CurrentTask,
1576        desired_addr: UserAddress,
1577        desired_length: usize,
1578        released_mappings: &mut ReleasedMappings,
1579    ) -> Result<(), Errno> {
1580        let desired_end_addr =
1581            desired_addr.checked_add(desired_length).ok_or_else(|| errno!(EINVAL))?;
1582        let start_addr = round_down_to_system_page_size(desired_addr)?;
1583        let end_addr = round_up_to_system_page_size(desired_end_addr)?;
1584
1585        let mut updates = vec![];
1586        let mut bytes_mapped_in_range = 0;
1587        for (range, mapping) in self.mappings.range(start_addr..end_addr) {
1588            let mut range = range.clone();
1589            let mut mapping = mapping.clone();
1590
1591            // Handle mappings that start before the region to be locked.
1592            range.start = std::cmp::max(range.start, start_addr);
1593            // Handle mappings that extend past the region to be locked.
1594            range.end = std::cmp::min(range.end, end_addr);
1595
1596            bytes_mapped_in_range += (range.end - range.start) as u64;
1597
1598            if mapping.flags().contains(MappingFlags::LOCKED) {
1599                // This clears the locking for the shadow process pin flavor. It's not currently
1600                // possible to actually unlock pages that were locked with the
1601                // ZX_VMAR_OP_ALWAYS_NEED pin flavor.
1602                mapping.clear_mlock();
1603                updates.push((range, mapping));
1604            }
1605        }
1606
1607        if bytes_mapped_in_range as usize != end_addr - start_addr {
1608            return error!(ENOMEM);
1609        }
1610
1611        for (range, mapping) in updates {
1612            released_mappings.extend(self.mappings.insert(range.clone(), mapping));
1613            released_mappings.extend_pins(self.shadow_mappings_for_mlock.remove(range));
1614        }
1615
1616        Ok(())
1617    }
1618
1619    pub fn total_locked_bytes(&self) -> u64 {
1620        self.num_locked_bytes(
1621            UserAddress::from(self.user_vmar_info.base as u64)
1622                ..UserAddress::from((self.user_vmar_info.base + self.user_vmar_info.len) as u64),
1623        )
1624    }
1625
1626    pub fn num_locked_bytes(&self, range: impl RangeBounds<UserAddress>) -> u64 {
1627        self.mappings
1628            .range(range)
1629            .filter(|(_, mapping)| mapping.flags().contains(MappingFlags::LOCKED))
1630            .map(|(range, _)| (range.end - range.start) as u64)
1631            .sum()
1632    }
1633
1634    fn max_address(&self) -> UserAddress {
1635        UserAddress::from_ptr(self.user_vmar_info.base + self.user_vmar_info.len)
1636    }
1637
1638    fn get_mappings_for_vmsplice(
1639        &self,
1640        mm: &Arc<MemoryManager>,
1641        buffers: &UserBuffers,
1642    ) -> Result<Vec<Arc<VmsplicePayload>>, Errno> {
1643        let mut vmsplice_mappings = Vec::new();
1644
1645        for UserBuffer { mut address, length } in buffers.iter().copied() {
1646            let mappings = self.get_contiguous_mappings_at(address, length)?;
1647            for (mapping, length) in mappings {
1648                let vmsplice_payload = match self.get_mapping_backing(mapping) {
1649                    MappingBacking::Memory(m) => VmsplicePayloadSegment {
1650                        addr_offset: address,
1651                        length,
1652                        memory: m.memory().clone(),
1653                        memory_offset: m.address_to_offset(address),
1654                    },
1655                    MappingBacking::PrivateAnonymous => VmsplicePayloadSegment {
1656                        addr_offset: address,
1657                        length,
1658                        memory: self.private_anonymous.backing.clone(),
1659                        memory_offset: address.ptr() as u64,
1660                    },
1661                };
1662                vmsplice_mappings.push(VmsplicePayload::new(Arc::downgrade(mm), vmsplice_payload));
1663
1664                address = (address + length)?;
1665            }
1666        }
1667
1668        Ok(vmsplice_mappings)
1669    }
1670
1671    /// Returns all the mappings starting at `addr`, and continuing until either `length` bytes have
1672    /// been covered or an unmapped page is reached.
1673    ///
1674    /// Mappings are returned in ascending order along with the number of bytes that intersect the
1675    /// requested range. The returned mappings are guaranteed to be contiguous and the total length
1676    /// corresponds to the number of contiguous mapped bytes starting from `addr`, i.e.:
1677    /// - 0 (empty iterator) if `addr` is not mapped.
1678    /// - exactly `length` if the requested range is fully mapped.
1679    /// - the offset of the first unmapped page (between 0 and `length`) if the requested range is
1680    ///   only partially mapped.
1681    ///
1682    /// Returns EFAULT if the requested range overflows or extends past the end of the vmar.
1683    fn get_contiguous_mappings_at(
1684        &self,
1685        addr: UserAddress,
1686        length: usize,
1687    ) -> Result<impl Iterator<Item = (&Mapping, usize)>, Errno> {
1688        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EFAULT))?;
1689        if end_addr > self.max_address() {
1690            return error!(EFAULT);
1691        }
1692
1693        // Iterate over all contiguous mappings intersecting the requested range.
1694        let mut mappings = self.mappings.range(addr..end_addr);
1695        let mut prev_range_end = None;
1696        let mut offset = 0;
1697        let result = std::iter::from_fn(move || {
1698            if offset != length {
1699                if let Some((range, mapping)) = mappings.next() {
1700                    return match prev_range_end {
1701                        // If this is the first mapping that we are considering, it may not actually
1702                        // contain `addr` at all.
1703                        None if range.start > addr => None,
1704
1705                        // Subsequent mappings may not be contiguous.
1706                        Some(prev_range_end) if range.start != prev_range_end => None,
1707
1708                        // This mapping can be returned.
1709                        _ => {
1710                            let mapping_length = std::cmp::min(length, range.end - addr) - offset;
1711                            offset += mapping_length;
1712                            prev_range_end = Some(range.end);
1713                            Some((mapping, mapping_length))
1714                        }
1715                    };
1716                }
1717            }
1718
1719            None
1720        });
1721
1722        Ok(result)
1723    }
1724
1725    /// Determines whether a fault at the given address could be covered by extending a growsdown
1726    /// mapping.
1727    ///
1728    /// If the address already belongs to a mapping, this function returns `None`. If the next
1729    /// mapping above the given address has the `MappingFlags::GROWSDOWN` flag, this function
1730    /// returns the address at which that mapping starts and the mapping itself. Otherwise, this
1731    /// function returns `None`.
1732    fn find_growsdown_mapping(&self, addr: UserAddress) -> Option<(UserAddress, &Mapping)> {
1733        match self.mappings.range(addr..).next() {
1734            Some((range, mapping)) => {
1735                if range.contains(&addr) {
1736                    // |addr| is already contained within a mapping, nothing to grow.
1737                    return None;
1738                } else if !mapping.flags().contains(MappingFlags::GROWSDOWN) {
1739                    // The next mapping above the given address does not have the
1740                    // `MappingFlags::GROWSDOWN` flag.
1741                    None
1742                } else {
1743                    Some((range.start, mapping))
1744                }
1745            }
1746            None => None,
1747        }
1748    }
1749
1750    /// Determines if an access at a given address could be covered by extending a growsdown mapping
1751    /// and extends it if possible. Returns true if the given address is covered by a mapping.
1752    fn extend_growsdown_mapping_to_address(
1753        &mut self,
1754        mm: &Arc<MemoryManager>,
1755        addr: UserAddress,
1756        is_write: bool,
1757    ) -> Result<bool, Error> {
1758        let Some((mapping_low_addr, mapping_to_grow)) = self.find_growsdown_mapping(addr) else {
1759            return Ok(false);
1760        };
1761        if is_write && !mapping_to_grow.can_write() {
1762            // Don't grow a read-only GROWSDOWN mapping for a write fault, it won't work.
1763            return Ok(false);
1764        }
1765        if !mapping_to_grow.flags().contains(MappingFlags::ANONYMOUS) {
1766            // Currently, we only grow anonymous mappings.
1767            return Ok(false);
1768        }
1769        let low_addr = (addr - (addr.ptr() as u64 % *PAGE_SIZE))?;
1770        let high_addr = mapping_low_addr;
1771
1772        let length = high_addr
1773            .ptr()
1774            .checked_sub(low_addr.ptr())
1775            .ok_or_else(|| anyhow!("Invalid growth range"))?;
1776
1777        let mut released_mappings = ReleasedMappings::default();
1778        self.map_anonymous(
1779            mm,
1780            DesiredAddress::FixedOverwrite(low_addr),
1781            length,
1782            mapping_to_grow.flags().access_flags(),
1783            mapping_to_grow.flags().options(),
1784            mapping_to_grow.name(),
1785            &mut released_mappings,
1786        )?;
1787        // We can't have any released mappings because `find_growsdown_mapping` will return None if
1788        // the mapping already exists in this range.
1789        assert!(
1790            released_mappings.is_empty(),
1791            "expected to not remove mappings by inserting, got {released_mappings:#?}"
1792        );
1793        Ok(true)
1794    }
1795
1796    /// Reads exactly `bytes.len()` bytes of memory.
1797    ///
1798    /// # Parameters
1799    /// - `addr`: The address to read data from.
1800    /// - `bytes`: The byte array to read into.
1801    fn read_memory<'a>(
1802        &self,
1803        addr: UserAddress,
1804        bytes: &'a mut [MaybeUninit<u8>],
1805    ) -> Result<&'a mut [u8], Errno> {
1806        let mut bytes_read = 0;
1807        for (mapping, len) in self.get_contiguous_mappings_at(addr, bytes.len())? {
1808            let next_offset = bytes_read + len;
1809            self.read_mapping_memory(
1810                (addr + bytes_read)?,
1811                mapping,
1812                &mut bytes[bytes_read..next_offset],
1813            )?;
1814            bytes_read = next_offset;
1815        }
1816
1817        if bytes_read != bytes.len() {
1818            error!(EFAULT)
1819        } else {
1820            // SAFETY: The created slice is properly aligned/sized since it
1821            // is a subset of the `bytes` slice. Note that `MaybeUninit<T>` has
1822            // the same layout as `T`. Also note that `bytes_read` bytes have
1823            // been properly initialized.
1824            let bytes = unsafe {
1825                std::slice::from_raw_parts_mut(bytes.as_mut_ptr() as *mut u8, bytes_read)
1826            };
1827            Ok(bytes)
1828        }
1829    }
1830
1831    /// Reads exactly `bytes.len()` bytes of memory from `addr`.
1832    ///
1833    /// # Parameters
1834    /// - `addr`: The address to read data from.
1835    /// - `bytes`: The byte array to read into.
1836    fn read_mapping_memory<'a>(
1837        &self,
1838        addr: UserAddress,
1839        mapping: &Mapping,
1840        bytes: &'a mut [MaybeUninit<u8>],
1841    ) -> Result<&'a mut [u8], Errno> {
1842        if !mapping.can_read() {
1843            return error!(EFAULT, "read_mapping_memory called on unreadable mapping");
1844        }
1845        match self.get_mapping_backing(mapping) {
1846            MappingBacking::Memory(backing) => backing.read_memory(addr, bytes),
1847            MappingBacking::PrivateAnonymous => self.private_anonymous.read_memory(addr, bytes),
1848        }
1849    }
1850
1851    /// Reads bytes starting at `addr`, continuing until either `bytes.len()` bytes have been read
1852    /// or no more bytes can be read.
1853    ///
1854    /// This is used, for example, to read null-terminated strings where the exact length is not
1855    /// known, only the maximum length is.
1856    ///
1857    /// # Parameters
1858    /// - `addr`: The address to read data from.
1859    /// - `bytes`: The byte array to read into.
1860    fn read_memory_partial<'a>(
1861        &self,
1862        addr: UserAddress,
1863        bytes: &'a mut [MaybeUninit<u8>],
1864    ) -> Result<&'a mut [u8], Errno> {
1865        let mut bytes_read = 0;
1866        for (mapping, len) in self.get_contiguous_mappings_at(addr, bytes.len())? {
1867            let next_offset = bytes_read + len;
1868            if self
1869                .read_mapping_memory(
1870                    (addr + bytes_read)?,
1871                    mapping,
1872                    &mut bytes[bytes_read..next_offset],
1873                )
1874                .is_err()
1875            {
1876                break;
1877            }
1878            bytes_read = next_offset;
1879        }
1880
1881        // If at least one byte was requested but we got none, it means that `addr` was invalid.
1882        if !bytes.is_empty() && bytes_read == 0 {
1883            error!(EFAULT)
1884        } else {
1885            // SAFETY: The created slice is properly aligned/sized since it
1886            // is a subset of the `bytes` slice. Note that `MaybeUninit<T>` has
1887            // the same layout as `T`. Also note that `bytes_read` bytes have
1888            // been properly initialized.
1889            let bytes = unsafe {
1890                std::slice::from_raw_parts_mut(bytes.as_mut_ptr() as *mut u8, bytes_read)
1891            };
1892            Ok(bytes)
1893        }
1894    }
1895
1896    /// Like `read_memory_partial` but only returns the bytes up to and including
1897    /// a null (zero) byte.
1898    fn read_memory_partial_until_null_byte<'a>(
1899        &self,
1900        addr: UserAddress,
1901        bytes: &'a mut [MaybeUninit<u8>],
1902    ) -> Result<&'a mut [u8], Errno> {
1903        let read_bytes = self.read_memory_partial(addr, bytes)?;
1904        let max_len = memchr::memchr(b'\0', read_bytes)
1905            .map_or_else(|| read_bytes.len(), |null_index| null_index + 1);
1906        Ok(&mut read_bytes[..max_len])
1907    }
1908
1909    /// Writes the provided bytes.
1910    ///
1911    /// In case of success, the number of bytes written will always be `bytes.len()`.
1912    ///
1913    /// # Parameters
1914    /// - `addr`: The address to write to.
1915    /// - `bytes`: The bytes to write.
1916    fn write_memory(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
1917        let mut bytes_written = 0;
1918        for (mapping, len) in self.get_contiguous_mappings_at(addr, bytes.len())? {
1919            let next_offset = bytes_written + len;
1920            self.write_mapping_memory(
1921                (addr + bytes_written)?,
1922                mapping,
1923                &bytes[bytes_written..next_offset],
1924            )?;
1925            bytes_written = next_offset;
1926        }
1927
1928        if bytes_written != bytes.len() { error!(EFAULT) } else { Ok(bytes.len()) }
1929    }
1930
1931    /// Writes the provided bytes to `addr`.
1932    ///
1933    /// # Parameters
1934    /// - `addr`: The address to write to.
1935    /// - `bytes`: The bytes to write to the memory object.
1936    fn write_mapping_memory(
1937        &self,
1938        addr: UserAddress,
1939        mapping: &Mapping,
1940        bytes: &[u8],
1941    ) -> Result<(), Errno> {
1942        if !mapping.can_write() {
1943            return error!(EFAULT, "write_mapping_memory called on unwritable memory");
1944        }
1945        match self.get_mapping_backing(mapping) {
1946            MappingBacking::Memory(backing) => backing.write_memory(addr, bytes),
1947            MappingBacking::PrivateAnonymous => self.private_anonymous.write_memory(addr, bytes),
1948        }
1949    }
1950
1951    /// Writes bytes starting at `addr`, continuing until either `bytes.len()` bytes have been
1952    /// written or no more bytes can be written.
1953    ///
1954    /// # Parameters
1955    /// - `addr`: The address to read data from.
1956    /// - `bytes`: The byte array to write from.
1957    fn write_memory_partial(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
1958        let mut bytes_written = 0;
1959        for (mapping, len) in self.get_contiguous_mappings_at(addr, bytes.len())? {
1960            let next_offset = bytes_written + len;
1961            if self
1962                .write_mapping_memory(
1963                    (addr + bytes_written)?,
1964                    mapping,
1965                    &bytes[bytes_written..next_offset],
1966                )
1967                .is_err()
1968            {
1969                break;
1970            }
1971            bytes_written = next_offset;
1972        }
1973
1974        if !bytes.is_empty() && bytes_written == 0 { error!(EFAULT) } else { Ok(bytes.len()) }
1975    }
1976
1977    fn zero(&self, addr: UserAddress, length: usize) -> Result<usize, Errno> {
1978        let mut bytes_written = 0;
1979        for (mapping, len) in self.get_contiguous_mappings_at(addr, length)? {
1980            let next_offset = bytes_written + len;
1981            if self.zero_mapping((addr + bytes_written)?, mapping, len).is_err() {
1982                break;
1983            }
1984            bytes_written = next_offset;
1985        }
1986
1987        if length != bytes_written { error!(EFAULT) } else { Ok(length) }
1988    }
1989
1990    fn zero_mapping(
1991        &self,
1992        addr: UserAddress,
1993        mapping: &Mapping,
1994        length: usize,
1995    ) -> Result<usize, Errno> {
1996        if !mapping.can_write() {
1997            return error!(EFAULT);
1998        }
1999
2000        match self.get_mapping_backing(mapping) {
2001            MappingBacking::Memory(backing) => backing.zero(addr, length),
2002            MappingBacking::PrivateAnonymous => self.private_anonymous.zero(addr, length),
2003        }
2004    }
2005
2006    pub fn create_memory_backing(
2007        &self,
2008        base: UserAddress,
2009        memory: Arc<MemoryObject>,
2010        memory_offset: u64,
2011    ) -> MappingBacking {
2012        MappingBacking::Memory(Box::new(MappingBackingMemory::new(base, memory, memory_offset)))
2013    }
2014
2015    pub fn get_mapping_backing<'a>(&self, mapping: &'a Mapping) -> &'a MappingBacking {
2016        mapping.get_backing_internal()
2017    }
2018
2019    fn get_aio_context(&self, addr: UserAddress) -> Option<(Range<UserAddress>, Arc<AioContext>)> {
2020        let Some((range, mapping)) = self.mappings.get(addr) else {
2021            return None;
2022        };
2023        let MappingName::AioContext(ref aio_context) = mapping.name() else {
2024            return None;
2025        };
2026        if !mapping.can_read() {
2027            return None;
2028        }
2029        Some((range.clone(), aio_context.clone()))
2030    }
2031
2032    fn find_uffd<L>(&self, locked: &mut Locked<L>, addr: UserAddress) -> Option<Arc<UserFault>>
2033    where
2034        L: LockBefore<UserFaultInner>,
2035    {
2036        for userfault in self.userfaultfds.iter() {
2037            if let Some(userfault) = userfault.upgrade() {
2038                if userfault.contains_addr(locked, addr) {
2039                    return Some(userfault);
2040                }
2041            }
2042        }
2043        None
2044    }
2045
2046    pub fn mrelease(&self) -> Result<(), Errno> {
2047        self.private_anonymous
2048            .zero(UserAddress::from_ptr(self.user_vmar_info.base), self.user_vmar_info.len)?;
2049        return Ok(());
2050    }
2051
2052    fn cache_flush(&self, range: Range<UserAddress>) -> Result<(), Errno> {
2053        let mut addr = range.start;
2054        let size = range.end - range.start;
2055        for (mapping, len) in self.get_contiguous_mappings_at(addr, size)? {
2056            if !mapping.can_read() {
2057                return error!(EFAULT);
2058            }
2059            // SAFETY: This is operating on a readable restricted mode mapping and will not fault.
2060            zx::Status::ok(unsafe {
2061                zx::sys::zx_cache_flush(
2062                    addr.ptr() as *const u8,
2063                    len,
2064                    zx::sys::ZX_CACHE_FLUSH_DATA | zx::sys::ZX_CACHE_FLUSH_INSN,
2065                )
2066            })
2067            .map_err(impossible_error)?;
2068
2069            addr = (addr + len).unwrap(); // unwrap since we're iterating within the address space.
2070        }
2071        // Did we flush the entire range?
2072        if addr != range.end { error!(EFAULT) } else { Ok(()) }
2073    }
2074
2075    // Returns details of mappings in the `user_vmar`, or an empty vector if the `user_vmar` has
2076    // been destroyed.
2077    fn with_zx_mappings<R>(
2078        &self,
2079        current_task: &CurrentTask,
2080        op: impl FnOnce(&[zx::MapInfo]) -> R,
2081    ) -> R {
2082        if self.user_vmar.is_invalid_handle() {
2083            return op(&[]);
2084        };
2085
2086        MapInfoCache::get_or_init(current_task)
2087            .expect("must be able to retrieve map info cache")
2088            .with_map_infos(&self.user_vmar, |infos| {
2089                // No other https://fuchsia.dev/reference/syscalls/object_get_info?hl=en#errors
2090                // are possible, because we created the VMAR and the `zx` crate ensures that the
2091                // info query is well-formed.
2092                op(infos.expect("must be able to query mappings for private user VMAR"))
2093            })
2094    }
2095
2096    /// Register the address space managed by this memory manager for interest in
2097    /// receiving private expedited memory barriers of the given kind.
2098    pub fn register_membarrier_private_expedited(
2099        &mut self,
2100        mtype: MembarrierType,
2101    ) -> Result<(), Errno> {
2102        let registrations = &mut self.forkable_state.membarrier_registrations;
2103        match mtype {
2104            MembarrierType::Memory => {
2105                registrations.memory = true;
2106            }
2107            MembarrierType::SyncCore => {
2108                registrations.sync_core = true;
2109            }
2110        }
2111        Ok(())
2112    }
2113
2114    /// Checks if the address space managed by this memory manager is registered
2115    /// for interest in private expedited barriers of the given kind.
2116    pub fn membarrier_private_expedited_registered(&self, mtype: MembarrierType) -> bool {
2117        let registrations = &self.forkable_state.membarrier_registrations;
2118        match mtype {
2119            MembarrierType::Memory => registrations.memory,
2120            MembarrierType::SyncCore => registrations.sync_core,
2121        }
2122    }
2123
2124    fn force_write_memory(
2125        &mut self,
2126        addr: UserAddress,
2127        bytes: &[u8],
2128        released_mappings: &mut ReleasedMappings,
2129    ) -> Result<(), Errno> {
2130        let (range, mapping) = self.mappings.get(addr).ok_or_else(|| errno!(EFAULT))?;
2131        if range.end < addr.saturating_add(bytes.len()) {
2132            track_stub!(
2133                TODO("https://fxbug.dev/445790710"),
2134                "ptrace poke across multiple mappings"
2135            );
2136            return error!(EFAULT);
2137        }
2138
2139        // Don't create CoW copy of shared memory, go through regular syscall writing.
2140        if mapping.flags().contains(MappingFlags::SHARED) {
2141            if !mapping.can_write() {
2142                // Linux returns EIO here instead of EFAULT.
2143                return error!(EIO);
2144            }
2145            return self.write_mapping_memory(addr, mapping, &bytes);
2146        }
2147
2148        let backing = match self.get_mapping_backing(mapping) {
2149            MappingBacking::PrivateAnonymous => {
2150                // Starnix has a writable handle to private anonymous memory.
2151                return self.private_anonymous.write_memory(addr, &bytes);
2152            }
2153            MappingBacking::Memory(backing) => backing,
2154        };
2155
2156        let vmo = backing.memory().as_vmo().ok_or_else(|| errno!(EFAULT))?;
2157        let addr_offset = backing.address_to_offset(addr);
2158        let can_exec =
2159            vmo.basic_info().expect("get VMO handle info").rights.contains(Rights::EXECUTE);
2160
2161        // Attempt to write to existing VMO
2162        match vmo.write(&bytes, addr_offset) {
2163            Ok(()) => {
2164                if can_exec {
2165                    // Issue a barrier to avoid executing stale instructions.
2166                    system_barrier(BarrierType::InstructionStream);
2167                }
2168                return Ok(());
2169            }
2170
2171            Err(zx::Status::ACCESS_DENIED) => { /* Fall through */ }
2172
2173            Err(status) => {
2174                return Err(MemoryManager::get_errno_for_vmo_err(status));
2175            }
2176        }
2177
2178        // Create a CoW child of the entire VMO and swap with the backing.
2179        let mapping_offset = backing.address_to_offset(range.start);
2180        let len = range.end - range.start;
2181
2182        // 1. Obtain a writable child of the VMO.
2183        let size = vmo.get_size().map_err(MemoryManager::get_errno_for_vmo_err)?;
2184        let child_vmo = vmo
2185            .create_child(VmoChildOptions::SNAPSHOT_AT_LEAST_ON_WRITE, 0, size)
2186            .map_err(MemoryManager::get_errno_for_vmo_err)?;
2187
2188        // 2. Modify the memory.
2189        child_vmo.write(&bytes, addr_offset).map_err(MemoryManager::get_errno_for_vmo_err)?;
2190
2191        // 3. If needed, remint the VMO as executable. Zircon flushes instruction caches when
2192        // mapping executable memory below, so a barrier isn't necessary here.
2193        let child_vmo = if can_exec {
2194            child_vmo
2195                .replace_as_executable(&VMEX_RESOURCE)
2196                .map_err(MemoryManager::get_errno_for_vmo_err)?
2197        } else {
2198            child_vmo
2199        };
2200
2201        // 4. Map the new VMO into user VMAR
2202        let memory = Arc::new(MemoryObject::from(child_vmo));
2203        let mapped_addr = self.map_in_user_vmar(
2204            SelectedAddress::FixedOverwrite(range.start),
2205            &memory,
2206            mapping_offset,
2207            len,
2208            mapping.flags(),
2209            false,
2210        )?;
2211        assert_eq!(mapped_addr, range.start);
2212
2213        // 5. Update mappings
2214        let new_backing = MappingBackingMemory::new(range.start, memory, mapping_offset);
2215
2216        let mut new_mapping = mapping.clone();
2217        new_mapping.set_backing_internal(MappingBacking::Memory(Box::new(new_backing)));
2218
2219        let range = range.clone();
2220        released_mappings.extend(self.mappings.insert(range, new_mapping));
2221
2222        Ok(())
2223    }
2224
2225    fn set_brk<L>(
2226        &mut self,
2227        locked: &mut Locked<L>,
2228        current_task: &CurrentTask,
2229        mm: &Arc<MemoryManager>,
2230        addr: UserAddress,
2231        released_mappings: &mut ReleasedMappings,
2232    ) -> Result<UserAddress, Errno>
2233    where
2234        L: LockBefore<ThreadGroupLimits>,
2235    {
2236        let rlimit_data = std::cmp::min(
2237            PROGRAM_BREAK_LIMIT,
2238            current_task.thread_group().get_rlimit(locked, Resource::DATA),
2239        );
2240
2241        let brk = match self.brk.clone() {
2242            None => {
2243                let brk = ProgramBreak { base: self.brk_origin, current: self.brk_origin };
2244                self.brk = Some(brk.clone());
2245                brk
2246            }
2247            Some(brk) => brk,
2248        };
2249
2250        let Ok(last_address) = brk.base + rlimit_data else {
2251            // The requested program break is out-of-range. We're supposed to simply
2252            // return the current program break.
2253            return Ok(brk.current);
2254        };
2255
2256        if addr < brk.base || addr > last_address {
2257            // The requested program break is out-of-range. We're supposed to simply
2258            // return the current program break.
2259            return Ok(brk.current);
2260        }
2261
2262        let old_end = brk.current.round_up(*PAGE_SIZE).unwrap();
2263        let new_end = addr.round_up(*PAGE_SIZE).unwrap();
2264
2265        match new_end.cmp(&old_end) {
2266            std::cmp::Ordering::Less => {
2267                // Shrinking the program break removes any mapped pages in the
2268                // affected range, regardless of whether they were actually program
2269                // break pages, or other mappings.
2270                let delta = old_end - new_end;
2271
2272                if self.unmap(mm, new_end, delta, released_mappings).is_err() {
2273                    return Ok(brk.current);
2274                }
2275            }
2276            std::cmp::Ordering::Greater => {
2277                let range = old_end..new_end;
2278                let delta = new_end - old_end;
2279
2280                // Check for mappings over the program break region.
2281                if self.mappings.range(range).next().is_some() {
2282                    return Ok(brk.current);
2283                }
2284
2285                if self
2286                    .map_anonymous(
2287                        mm,
2288                        DesiredAddress::FixedOverwrite(old_end),
2289                        delta,
2290                        ProtectionFlags::READ | ProtectionFlags::WRITE,
2291                        MappingOptions::ANONYMOUS,
2292                        MappingName::Heap,
2293                        released_mappings,
2294                    )
2295                    .is_err()
2296                {
2297                    return Ok(brk.current);
2298                }
2299            }
2300            _ => {}
2301        };
2302
2303        // Any required updates to the program break succeeded, so update internal state.
2304        let mut new_brk = brk;
2305        new_brk.current = addr;
2306        self.brk = Some(new_brk);
2307
2308        Ok(addr)
2309    }
2310
2311    fn register_with_uffd<L>(
2312        &mut self,
2313        locked: &mut Locked<L>,
2314        addr: UserAddress,
2315        length: usize,
2316        userfault: &Arc<UserFault>,
2317        mode: FaultRegisterMode,
2318        released_mappings: &mut ReleasedMappings,
2319    ) -> Result<(), Errno>
2320    where
2321        L: LockBefore<UserFaultInner>,
2322    {
2323        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
2324        let range_for_op = addr..end_addr;
2325        let mut updates = vec![];
2326
2327        for (range, mapping) in self.mappings.range(range_for_op.clone()) {
2328            if !mapping.private_anonymous() {
2329                track_stub!(TODO("https://fxbug.dev/391599171"), "uffd for shmem and hugetlbfs");
2330                return error!(EINVAL);
2331            }
2332            if mapping.flags().contains(MappingFlags::UFFD) {
2333                return error!(EBUSY);
2334            }
2335            let range = range.intersect(&range_for_op);
2336            let mut mapping = mapping.clone();
2337            mapping.set_uffd(mode);
2338            updates.push((range, mapping));
2339        }
2340        if updates.is_empty() {
2341            return error!(EINVAL);
2342        }
2343
2344        self.protect_vmar_range(addr, length, ProtectionFlags::empty())
2345            .expect("Failed to remove protections on uffd-registered range");
2346
2347        // Use a separate loop to avoid mutating the mappings structure while iterating over it.
2348        for (range, mapping) in updates {
2349            released_mappings.extend(self.mappings.insert(range, mapping));
2350        }
2351
2352        userfault.insert_pages(locked, range_for_op, false);
2353
2354        Ok(())
2355    }
2356
2357    fn unregister_range_from_uffd<L>(
2358        &mut self,
2359        locked: &mut Locked<L>,
2360        userfault: &Arc<UserFault>,
2361        addr: UserAddress,
2362        length: usize,
2363        released_mappings: &mut ReleasedMappings,
2364    ) -> Result<(), Errno>
2365    where
2366        L: LockBefore<UserFaultInner>,
2367    {
2368        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
2369        let range_for_op = addr..end_addr;
2370        let mut updates = vec![];
2371
2372        for (range, mapping) in self.mappings.range(range_for_op.clone()) {
2373            if !mapping.private_anonymous() {
2374                track_stub!(TODO("https://fxbug.dev/391599171"), "uffd for shmem and hugetlbfs");
2375                return error!(EINVAL);
2376            }
2377            if mapping.flags().contains(MappingFlags::UFFD) {
2378                let range = range.intersect(&range_for_op);
2379                if userfault.remove_pages(locked, range.clone()) {
2380                    let mut mapping = mapping.clone();
2381                    mapping.clear_uffd();
2382                    updates.push((range, mapping));
2383                }
2384            }
2385        }
2386        for (range, mapping) in updates {
2387            let length = range.end - range.start;
2388            let restored_flags = mapping.flags().access_flags();
2389
2390            released_mappings.extend(self.mappings.insert(range.clone(), mapping));
2391
2392            self.protect_vmar_range(range.start, length, restored_flags)
2393                .expect("Failed to restore original protection bits on uffd-registered range");
2394        }
2395        Ok(())
2396    }
2397
2398    fn unregister_uffd<L>(
2399        &mut self,
2400        locked: &mut Locked<L>,
2401        userfault: &Arc<UserFault>,
2402        released_mappings: &mut ReleasedMappings,
2403    ) where
2404        L: LockBefore<UserFaultInner>,
2405    {
2406        let mut updates = vec![];
2407
2408        for (range, mapping) in self.mappings.iter() {
2409            if mapping.flags().contains(MappingFlags::UFFD) {
2410                for range in userfault.get_registered_pages_overlapping_range(locked, range.clone())
2411                {
2412                    let mut mapping = mapping.clone();
2413                    mapping.clear_uffd();
2414                    updates.push((range.clone(), mapping));
2415                }
2416            }
2417        }
2418        // Use a separate loop to avoid mutating the mappings structure while iterating over it.
2419        for (range, mapping) in updates {
2420            let length = range.end - range.start;
2421            let restored_flags = mapping.flags().access_flags();
2422            released_mappings.extend(self.mappings.insert(range.clone(), mapping));
2423            // We can't recover from an error here as this is run during the cleanup.
2424            self.protect_vmar_range(range.start, length, restored_flags)
2425                .expect("Failed to restore original protection bits on uffd-registered range");
2426        }
2427
2428        userfault.remove_pages(
2429            locked,
2430            UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
2431                ..UserAddress::from_ptr(RESTRICTED_ASPACE_HIGHEST_ADDRESS),
2432        );
2433
2434        let weak_userfault = Arc::downgrade(userfault);
2435        self.userfaultfds.retain(|uf| !Weak::ptr_eq(uf, &weak_userfault));
2436    }
2437
2438    fn set_mapping_name(
2439        &mut self,
2440        addr: UserAddress,
2441        length: usize,
2442        name: Option<FsString>,
2443        released_mappings: &mut ReleasedMappings,
2444    ) -> Result<(), Errno> {
2445        if addr.ptr() % *PAGE_SIZE as usize != 0 {
2446            return error!(EINVAL);
2447        }
2448        let end = match addr.checked_add(length) {
2449            Some(addr) => addr.round_up(*PAGE_SIZE).map_err(|_| errno!(ENOMEM))?,
2450            None => return error!(EINVAL),
2451        };
2452
2453        let mappings_in_range =
2454            self.mappings.range(addr..end).map(|(r, m)| (r.clone(), m.clone())).collect::<Vec<_>>();
2455
2456        if mappings_in_range.is_empty() {
2457            return error!(EINVAL);
2458        }
2459        if !mappings_in_range.first().unwrap().0.contains(&addr) {
2460            return error!(ENOMEM);
2461        }
2462
2463        let mut last_range_end = None;
2464        // There's no get_mut on RangeMap, because it would be hard to implement correctly in
2465        // combination with merging of adjacent mappings. Instead, make a copy, change the copy,
2466        // and insert the copy.
2467        for (mut range, mut mapping) in mappings_in_range {
2468            if let MappingName::File(_) = mapping.name() {
2469                // It's invalid to assign a name to a file-backed mapping.
2470                return error!(EBADF);
2471            }
2472            // Handle mappings that start before the region to be named.
2473            range.start = std::cmp::max(range.start, addr);
2474            // Handle mappings that extend past the region to be named.
2475            range.end = std::cmp::min(range.end, end);
2476
2477            if let Some(last_range_end) = last_range_end {
2478                if last_range_end != range.start {
2479                    // The name must apply to a contiguous range of mapped pages.
2480                    return error!(ENOMEM);
2481                }
2482            }
2483            last_range_end = Some(range.end.round_up(*PAGE_SIZE)?);
2484            // TODO(b/310255065): We have no place to store names in a way visible to programs outside of Starnix
2485            // such as memory analysis tools.
2486            if let MappingBacking::Memory(backing) = self.get_mapping_backing(&mapping) {
2487                match &name {
2488                    Some(memory_name) => {
2489                        backing.memory().set_zx_name(memory_name);
2490                    }
2491                    None => {
2492                        backing.memory().set_zx_name(b"");
2493                    }
2494                }
2495            }
2496            mapping.set_name(match &name {
2497                Some(name) => MappingName::Vma(FlyByteStr::new(name.as_bytes())),
2498                None => MappingName::None,
2499            });
2500            released_mappings.extend(self.mappings.insert(range, mapping));
2501        }
2502        if let Some(last_range_end) = last_range_end {
2503            if last_range_end < end {
2504                // The name must apply to a contiguous range of mapped pages.
2505                return error!(ENOMEM);
2506            }
2507        }
2508        Ok(())
2509    }
2510}
2511
2512fn create_user_vmar(vmar: &zx::Vmar, vmar_info: &zx::VmarInfo) -> Result<zx::Vmar, zx::Status> {
2513    let (vmar, ptr) = vmar.allocate(
2514        0,
2515        vmar_info.len,
2516        zx::VmarFlags::SPECIFIC
2517            | zx::VmarFlags::CAN_MAP_SPECIFIC
2518            | zx::VmarFlags::CAN_MAP_READ
2519            | zx::VmarFlags::CAN_MAP_WRITE
2520            | zx::VmarFlags::CAN_MAP_EXECUTE,
2521    )?;
2522    assert_eq!(ptr, vmar_info.base);
2523    Ok(vmar)
2524}
2525
2526/// A memory manager for another thread.
2527///
2528/// When accessing memory through this object, we use less efficient codepaths that work across
2529/// address spaces.
2530pub struct RemoteMemoryManager {
2531    mm: Arc<MemoryManager>,
2532}
2533
2534impl RemoteMemoryManager {
2535    fn new(mm: Arc<MemoryManager>) -> Self {
2536        Self { mm }
2537    }
2538}
2539
2540// If we just have a MemoryManager, we cannot assume that its address space is current, which means
2541// we need to use the slower "syscall" mechanism to access its memory.
2542impl MemoryAccessor for RemoteMemoryManager {
2543    fn read_memory<'a>(
2544        &self,
2545        addr: UserAddress,
2546        bytes: &'a mut [MaybeUninit<u8>],
2547    ) -> Result<&'a mut [u8], Errno> {
2548        self.mm.syscall_read_memory(addr, bytes)
2549    }
2550
2551    fn read_memory_partial_until_null_byte<'a>(
2552        &self,
2553        addr: UserAddress,
2554        bytes: &'a mut [MaybeUninit<u8>],
2555    ) -> Result<&'a mut [u8], Errno> {
2556        self.mm.syscall_read_memory_partial_until_null_byte(addr, bytes)
2557    }
2558
2559    fn read_memory_partial<'a>(
2560        &self,
2561        addr: UserAddress,
2562        bytes: &'a mut [MaybeUninit<u8>],
2563    ) -> Result<&'a mut [u8], Errno> {
2564        self.mm.syscall_read_memory_partial(addr, bytes)
2565    }
2566
2567    fn write_memory(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
2568        self.mm.syscall_write_memory(addr, bytes)
2569    }
2570
2571    fn write_memory_partial(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
2572        self.mm.syscall_write_memory_partial(addr, bytes)
2573    }
2574
2575    fn zero(&self, addr: UserAddress, length: usize) -> Result<usize, Errno> {
2576        self.mm.syscall_zero(addr, length)
2577    }
2578}
2579
2580impl TaskMemoryAccessor for RemoteMemoryManager {
2581    fn maximum_valid_address(&self) -> Option<UserAddress> {
2582        Some(self.mm.maximum_valid_user_address)
2583    }
2584}
2585
2586impl MemoryManager {
2587    pub fn summarize(&self, summary: &mut crate::mm::MappingSummary) {
2588        let state = self.state.read();
2589        for (_, mapping) in state.mappings.iter() {
2590            summary.add(&state, mapping);
2591        }
2592    }
2593
2594    pub fn get_mappings_for_vmsplice(
2595        self: &Arc<MemoryManager>,
2596        buffers: &UserBuffers,
2597    ) -> Result<Vec<Arc<VmsplicePayload>>, Errno> {
2598        self.state.read().get_mappings_for_vmsplice(self, buffers)
2599    }
2600
2601    pub fn has_same_address_space(&self, other: &Self) -> bool {
2602        self.root_vmar == other.root_vmar
2603    }
2604
2605    pub fn unified_read_memory<'a>(
2606        &self,
2607        current_task: &CurrentTask,
2608        addr: UserAddress,
2609        bytes: &'a mut [MaybeUninit<u8>],
2610    ) -> Result<&'a mut [u8], Errno> {
2611        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2612
2613        if let Some(usercopy) = usercopy() {
2614            let (read_bytes, unread_bytes) = usercopy.copyin(addr.ptr(), bytes);
2615            if unread_bytes.is_empty() { Ok(read_bytes) } else { error!(EFAULT) }
2616        } else {
2617            self.syscall_read_memory(addr, bytes)
2618        }
2619    }
2620
2621    pub fn syscall_read_memory<'a>(
2622        &self,
2623        addr: UserAddress,
2624        bytes: &'a mut [MaybeUninit<u8>],
2625    ) -> Result<&'a mut [u8], Errno> {
2626        self.state.read().read_memory(addr, bytes)
2627    }
2628
2629    pub fn unified_read_memory_partial_until_null_byte<'a>(
2630        &self,
2631        current_task: &CurrentTask,
2632        addr: UserAddress,
2633        bytes: &'a mut [MaybeUninit<u8>],
2634    ) -> Result<&'a mut [u8], Errno> {
2635        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2636
2637        if let Some(usercopy) = usercopy() {
2638            let (read_bytes, unread_bytes) = usercopy.copyin_until_null_byte(addr.ptr(), bytes);
2639            if read_bytes.is_empty() && !unread_bytes.is_empty() {
2640                error!(EFAULT)
2641            } else {
2642                Ok(read_bytes)
2643            }
2644        } else {
2645            self.syscall_read_memory_partial_until_null_byte(addr, bytes)
2646        }
2647    }
2648
2649    pub fn syscall_read_memory_partial_until_null_byte<'a>(
2650        &self,
2651        addr: UserAddress,
2652        bytes: &'a mut [MaybeUninit<u8>],
2653    ) -> Result<&'a mut [u8], Errno> {
2654        self.state.read().read_memory_partial_until_null_byte(addr, bytes)
2655    }
2656
2657    pub fn unified_read_memory_partial<'a>(
2658        &self,
2659        current_task: &CurrentTask,
2660        addr: UserAddress,
2661        bytes: &'a mut [MaybeUninit<u8>],
2662    ) -> Result<&'a mut [u8], Errno> {
2663        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2664
2665        if let Some(usercopy) = usercopy() {
2666            let (read_bytes, unread_bytes) = usercopy.copyin(addr.ptr(), bytes);
2667            if read_bytes.is_empty() && !unread_bytes.is_empty() {
2668                error!(EFAULT)
2669            } else {
2670                Ok(read_bytes)
2671            }
2672        } else {
2673            self.syscall_read_memory_partial(addr, bytes)
2674        }
2675    }
2676
2677    pub fn syscall_read_memory_partial<'a>(
2678        &self,
2679        addr: UserAddress,
2680        bytes: &'a mut [MaybeUninit<u8>],
2681    ) -> Result<&'a mut [u8], Errno> {
2682        self.state.read().read_memory_partial(addr, bytes)
2683    }
2684
2685    pub fn unified_write_memory(
2686        &self,
2687        current_task: &CurrentTask,
2688        addr: UserAddress,
2689        bytes: &[u8],
2690    ) -> Result<usize, Errno> {
2691        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2692
2693        if let Some(usercopy) = usercopy() {
2694            let num_copied = usercopy.copyout(bytes, addr.ptr());
2695            if num_copied != bytes.len() {
2696                error!(
2697                    EFAULT,
2698                    format!("expected {:?} bytes, copied {:?} bytes", bytes.len(), num_copied)
2699                )
2700            } else {
2701                Ok(num_copied)
2702            }
2703        } else {
2704            self.syscall_write_memory(addr, bytes)
2705        }
2706    }
2707
2708    /// Write `bytes` to memory address `addr`, making a copy-on-write child of the VMO backing and
2709    /// replacing the mapping if necessary.
2710    ///
2711    /// NOTE: this bypasses userspace's memory protection configuration and should only be called
2712    /// by codepaths like ptrace which bypass memory protection.
2713    pub fn force_write_memory(&self, addr: UserAddress, bytes: &[u8]) -> Result<(), Errno> {
2714        let mut state = self.state.write();
2715        let mut released_mappings = ReleasedMappings::default();
2716        let result = state.force_write_memory(addr, bytes, &mut released_mappings);
2717        released_mappings.finalize(state);
2718        result
2719    }
2720
2721    pub fn syscall_write_memory(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
2722        self.state.read().write_memory(addr, bytes)
2723    }
2724
2725    pub fn unified_write_memory_partial(
2726        &self,
2727        current_task: &CurrentTask,
2728        addr: UserAddress,
2729        bytes: &[u8],
2730    ) -> Result<usize, Errno> {
2731        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2732
2733        if let Some(usercopy) = usercopy() {
2734            let num_copied = usercopy.copyout(bytes, addr.ptr());
2735            if num_copied == 0 && !bytes.is_empty() { error!(EFAULT) } else { Ok(num_copied) }
2736        } else {
2737            self.syscall_write_memory_partial(addr, bytes)
2738        }
2739    }
2740
2741    pub fn syscall_write_memory_partial(
2742        &self,
2743        addr: UserAddress,
2744        bytes: &[u8],
2745    ) -> Result<usize, Errno> {
2746        self.state.read().write_memory_partial(addr, bytes)
2747    }
2748
2749    pub fn unified_zero(
2750        &self,
2751        current_task: &CurrentTask,
2752        addr: UserAddress,
2753        length: usize,
2754    ) -> Result<usize, Errno> {
2755        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2756
2757        {
2758            let page_size = *PAGE_SIZE as usize;
2759            // Get the page boundary immediately following `addr` if `addr` is
2760            // not page aligned.
2761            let next_page_boundary = round_up_to_system_page_size(addr.ptr())?;
2762            // The number of bytes needed to zero at least a full page (not just
2763            // a pages worth of bytes) starting at `addr`.
2764            let length_with_atleast_one_full_page = page_size + (next_page_boundary - addr.ptr());
2765            // If at least one full page is being zeroed, go through the memory object since Zircon
2766            // can swap the mapped pages with the zero page which should be cheaper than zeroing
2767            // out a pages worth of bytes manually.
2768            //
2769            // If we are not zeroing out a full page, then go through usercopy
2770            // if unified aspaces is enabled.
2771            if length >= length_with_atleast_one_full_page {
2772                return self.syscall_zero(addr, length);
2773            }
2774        }
2775
2776        if let Some(usercopy) = usercopy() {
2777            if usercopy.zero(addr.ptr(), length) == length { Ok(length) } else { error!(EFAULT) }
2778        } else {
2779            self.syscall_zero(addr, length)
2780        }
2781    }
2782
2783    pub fn syscall_zero(&self, addr: UserAddress, length: usize) -> Result<usize, Errno> {
2784        self.state.read().zero(addr, length)
2785    }
2786
2787    /// Obtain a reference to this memory manager that can be used from another thread.
2788    pub fn as_remote(self: &Arc<Self>) -> RemoteMemoryManager {
2789        RemoteMemoryManager::new(self.clone())
2790    }
2791
2792    /// Performs a data and instruction cache flush over the given address range.
2793    pub fn cache_flush(&self, range: Range<UserAddress>) -> Result<(), Errno> {
2794        self.state.read().cache_flush(range)
2795    }
2796
2797    /// Register the address space managed by this memory manager for interest in
2798    /// receiving private expedited memory barriers of the given type.
2799    pub fn register_membarrier_private_expedited(
2800        &self,
2801        mtype: MembarrierType,
2802    ) -> Result<(), Errno> {
2803        self.state.write().register_membarrier_private_expedited(mtype)
2804    }
2805
2806    /// Checks if the address space managed by this memory manager is registered
2807    /// for interest in private expedited barriers of the given kind.
2808    pub fn membarrier_private_expedited_registered(&self, mtype: MembarrierType) -> bool {
2809        self.state.read().membarrier_private_expedited_registered(mtype)
2810    }
2811}
2812
2813pub struct MemoryManager {
2814    /// The root VMAR for the child process.
2815    ///
2816    /// Instead of mapping memory directly in this VMAR, we map the memory in
2817    /// `state.user_vmar`.
2818    root_vmar: zx::Vmar,
2819
2820    /// The base address of the root_vmar.
2821    pub base_addr: UserAddress,
2822
2823    /// The futexes in this address space.
2824    pub futex: Arc<FutexTable<PrivateFutexKey>>,
2825
2826    /// Mutable state for the memory manager.
2827    pub state: RwLock<MemoryManagerState>,
2828
2829    /// Whether this address space is dumpable.
2830    pub dumpable: OrderedMutex<DumpPolicy, MmDumpable>,
2831
2832    /// Maximum valid user address for this vmar.
2833    pub maximum_valid_user_address: UserAddress,
2834
2835    /// In-flight payloads enqueued to a pipe as a consequence of a `vmsplice(2)`
2836    /// operation.
2837    ///
2838    /// For details on why we need to keep track of in-flight vmspliced payloads,
2839    /// see [`VmsplicePayload`].
2840    ///
2841    /// For details on why this isn't under the `RwLock` protected `MemoryManagerState`,
2842    /// See [`InflightVmsplicedPayloads::payloads`].
2843    pub inflight_vmspliced_payloads: InflightVmsplicedPayloads,
2844
2845    /// A mechanism to be notified when this `MemoryManager` is destroyed.
2846    pub drop_notifier: DropNotifier,
2847}
2848
2849impl MemoryManager {
2850    pub fn new(root_vmar: zx::Vmar) -> Result<Self, zx::Status> {
2851        let info = root_vmar.info()?;
2852        let user_vmar = create_user_vmar(&root_vmar, &info)?;
2853        let user_vmar_info = user_vmar.info()?;
2854
2855        debug_assert_eq!(RESTRICTED_ASPACE_BASE, user_vmar_info.base);
2856        debug_assert_eq!(RESTRICTED_ASPACE_SIZE, user_vmar_info.len);
2857
2858        Ok(Self::from_vmar(root_vmar, user_vmar, user_vmar_info))
2859    }
2860
2861    fn from_vmar(root_vmar: zx::Vmar, user_vmar: zx::Vmar, user_vmar_info: zx::VmarInfo) -> Self {
2862        // The private anonymous backing memory object extend from the user address 0 up to the
2863        // highest mappable address. The pages below `user_vmar_info.base` are never mapped, but
2864        // including them in the memory object makes the math for mapping address to memory object
2865        // offsets simpler.
2866        let backing_size = (user_vmar_info.base + user_vmar_info.len) as u64;
2867
2868        MemoryManager {
2869            root_vmar,
2870            base_addr: UserAddress::from_ptr(user_vmar_info.base),
2871            futex: Arc::<FutexTable<PrivateFutexKey>>::default(),
2872            state: RwLock::new(MemoryManagerState {
2873                user_vmar: user_vmar,
2874                user_vmar_info,
2875                mappings: Default::default(),
2876                private_anonymous: PrivateAnonymousMemoryManager::new(backing_size),
2877                userfaultfds: Default::default(),
2878                shadow_mappings_for_mlock: Default::default(),
2879                forkable_state: Default::default(),
2880            }),
2881            // TODO(security): Reset to DISABLE, or the value in the fs.suid_dumpable sysctl, under
2882            // certain conditions as specified in the prctl(2) man page.
2883            dumpable: OrderedMutex::new(DumpPolicy::User),
2884            maximum_valid_user_address: UserAddress::from_ptr(
2885                user_vmar_info.base + user_vmar_info.len,
2886            ),
2887            inflight_vmspliced_payloads: Default::default(),
2888            drop_notifier: DropNotifier::default(),
2889        }
2890    }
2891
2892    pub fn set_brk<L>(
2893        self: &Arc<Self>,
2894        locked: &mut Locked<L>,
2895        current_task: &CurrentTask,
2896        addr: UserAddress,
2897    ) -> Result<UserAddress, Errno>
2898    where
2899        L: LockBefore<ThreadGroupLimits>,
2900    {
2901        let mut state = self.state.write();
2902        let mut released_mappings = ReleasedMappings::default();
2903        let result = state.set_brk(locked, current_task, self, addr, &mut released_mappings);
2904        released_mappings.finalize(state);
2905        result
2906    }
2907
2908    pub fn register_uffd(&self, userfault: &Arc<UserFault>) {
2909        let mut state = self.state.write();
2910        state.userfaultfds.push(Arc::downgrade(userfault));
2911    }
2912
2913    /// Register a given memory range with a userfault object.
2914    pub fn register_with_uffd<L>(
2915        self: &Arc<Self>,
2916        locked: &mut Locked<L>,
2917        addr: UserAddress,
2918        length: usize,
2919        userfault: &Arc<UserFault>,
2920        mode: FaultRegisterMode,
2921    ) -> Result<(), Errno>
2922    where
2923        L: LockBefore<UserFaultInner>,
2924    {
2925        let mut state = self.state.write();
2926        let mut released_mappings = ReleasedMappings::default();
2927        let result =
2928            state.register_with_uffd(locked, addr, length, userfault, mode, &mut released_mappings);
2929        released_mappings.finalize(state);
2930        result
2931    }
2932
2933    /// Unregister a given range from any userfault objects associated with it.
2934    pub fn unregister_range_from_uffd<L>(
2935        &self,
2936        locked: &mut Locked<L>,
2937        userfault: &Arc<UserFault>,
2938        addr: UserAddress,
2939        length: usize,
2940    ) -> Result<(), Errno>
2941    where
2942        L: LockBefore<UserFaultInner>,
2943    {
2944        let mut state = self.state.write();
2945        let mut released_mappings = ReleasedMappings::default();
2946        let result = state.unregister_range_from_uffd(
2947            locked,
2948            userfault,
2949            addr,
2950            length,
2951            &mut released_mappings,
2952        );
2953        released_mappings.finalize(state);
2954        result
2955    }
2956
2957    /// Unregister any mappings registered with a given userfault object. Used when closing the last
2958    /// file descriptor associated to it.
2959    pub fn unregister_uffd<L>(&self, locked: &mut Locked<L>, userfault: &Arc<UserFault>)
2960    where
2961        L: LockBefore<UserFaultInner>,
2962    {
2963        let mut state = self.state.write();
2964        let mut released_mappings = ReleasedMappings::default();
2965        state.unregister_uffd(locked, userfault, &mut released_mappings);
2966        released_mappings.finalize(state);
2967    }
2968
2969    /// Populate a range of pages registered with an userfaulfd according to a `populate` function.
2970    /// This will fail if the pages were not registered with userfaultfd, or if the page at `addr`
2971    /// was already populated. If any page other than the first one was populated, the `length`
2972    /// is adjusted to only include the first N unpopulated pages, and this adjusted length
2973    /// is then passed to `populate`. On success, returns the number of populated bytes.
2974    pub fn populate_from_uffd<F, L>(
2975        &self,
2976        locked: &mut Locked<L>,
2977        addr: UserAddress,
2978        length: usize,
2979        userfault: &Arc<UserFault>,
2980        populate: F,
2981    ) -> Result<usize, Errno>
2982    where
2983        F: FnOnce(&MemoryManagerState, usize) -> Result<usize, Errno>,
2984        L: LockBefore<UserFaultInner>,
2985    {
2986        let state = self.state.read();
2987
2988        // Check that the addr..length range is a contiguous range of mappings which are all
2989        // registered with an userfault object.
2990        let mut bytes_registered_with_uffd = 0;
2991        for (mapping, len) in state.get_contiguous_mappings_at(addr, length)? {
2992            if mapping.flags().contains(MappingFlags::UFFD) {
2993                // Check that the mapping is registered with the same uffd. This is not required,
2994                // but we don't support cross-uffd operations yet.
2995                if !userfault.contains_addr(locked, addr) {
2996                    track_stub!(
2997                        TODO("https://fxbug.dev/391599171"),
2998                        "operations across different uffds"
2999                    );
3000                    return error!(ENOTSUP);
3001                };
3002            } else {
3003                return error!(ENOENT);
3004            }
3005            bytes_registered_with_uffd += len;
3006        }
3007        if bytes_registered_with_uffd != length {
3008            return error!(ENOENT);
3009        }
3010
3011        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
3012
3013        // Determine how many pages in the requested range are already populated
3014        let first_populated =
3015            userfault.get_first_populated_page_after(locked, addr).ok_or_else(|| errno!(ENOENT))?;
3016        // If the very first page is already populated, uffd operations should just return EEXIST
3017        if first_populated == addr {
3018            return error!(EEXIST);
3019        }
3020        // Otherwise it is possible to do an incomplete operation by only populating pages until
3021        // the first populated one.
3022        let trimmed_end = std::cmp::min(first_populated, end_addr);
3023        let effective_length = trimmed_end - addr;
3024
3025        populate(&state, effective_length)?;
3026        userfault.insert_pages(locked, addr..trimmed_end, true);
3027
3028        // Since we used protection bits to force pagefaults, we now need to reverse this change by
3029        // restoring the protections on the underlying Zircon mappings to the "real" protection bits
3030        // that were kept in the Starnix mappings. This will prevent new pagefaults from being
3031        // generated. Only do this on the pages that were populated by this operation.
3032        for (range, mapping) in state.mappings.range(addr..trimmed_end) {
3033            let range_to_protect = range.intersect(&(addr..trimmed_end));
3034            let restored_flags = mapping.flags().access_flags();
3035            let length = range_to_protect.end - range_to_protect.start;
3036            state
3037                .protect_vmar_range(range_to_protect.start, length, restored_flags)
3038                .expect("Failed to restore original protection bits on uffd-registered range");
3039        }
3040        // Return the number of effectively populated bytes, which might be smaller than the
3041        // requested number.
3042        Ok(effective_length)
3043    }
3044
3045    pub fn zero_from_uffd<L>(
3046        &self,
3047        locked: &mut Locked<L>,
3048        addr: UserAddress,
3049        length: usize,
3050        userfault: &Arc<UserFault>,
3051    ) -> Result<usize, Errno>
3052    where
3053        L: LockBefore<UserFaultInner>,
3054    {
3055        self.populate_from_uffd(locked, addr, length, userfault, |state, effective_length| {
3056            state.zero(addr, effective_length)
3057        })
3058    }
3059
3060    pub fn fill_from_uffd<L>(
3061        &self,
3062        locked: &mut Locked<L>,
3063        addr: UserAddress,
3064        buf: &[u8],
3065        length: usize,
3066        userfault: &Arc<UserFault>,
3067    ) -> Result<usize, Errno>
3068    where
3069        L: LockBefore<UserFaultInner>,
3070    {
3071        self.populate_from_uffd(locked, addr, length, userfault, |state, effective_length| {
3072            state.write_memory(addr, &buf[..effective_length])
3073        })
3074    }
3075
3076    pub fn copy_from_uffd<L>(
3077        &self,
3078        locked: &mut Locked<L>,
3079        source_addr: UserAddress,
3080        dst_addr: UserAddress,
3081        length: usize,
3082        userfault: &Arc<UserFault>,
3083    ) -> Result<usize, Errno>
3084    where
3085        L: LockBefore<UserFaultInner>,
3086    {
3087        self.populate_from_uffd(locked, dst_addr, length, userfault, |state, effective_length| {
3088            let mut buf = vec![std::mem::MaybeUninit::uninit(); effective_length];
3089            let buf = state.read_memory(source_addr, &mut buf)?;
3090            state.write_memory(dst_addr, &buf[..effective_length])
3091        })
3092    }
3093
3094    /// Create a snapshot of the memory mapping from `self` into `target`. All
3095    /// memory mappings are copied entry-for-entry, and the copies end up at
3096    /// exactly the same addresses.
3097    pub fn snapshot_to<L>(
3098        &self,
3099        locked: &mut Locked<L>,
3100        target: &Arc<MemoryManager>,
3101    ) -> Result<(), Errno>
3102    where
3103        L: LockBefore<MmDumpable>,
3104    {
3105        // Hold the lock throughout the operation to uphold memory manager's invariants.
3106        // See mm/README.md.
3107        let state: &mut MemoryManagerState = &mut self.state.write();
3108        let mut target_state = target.state.write();
3109        let mut clone_cache = HashMap::<zx::Koid, Arc<MemoryObject>>::new();
3110
3111        let backing_size = (state.user_vmar_info.base + state.user_vmar_info.len) as u64;
3112        target_state.private_anonymous = state.private_anonymous.snapshot(backing_size)?;
3113
3114        for (range, mapping) in state.mappings.iter() {
3115            if mapping.flags().contains(MappingFlags::DONTFORK) {
3116                continue;
3117            }
3118            // Locking is not inherited when forking.
3119            let target_mapping_flags = mapping.flags().difference(MappingFlags::LOCKED);
3120            match state.get_mapping_backing(mapping) {
3121                MappingBacking::Memory(backing) => {
3122                    let memory_offset = backing.address_to_offset(range.start);
3123                    let length = range.end - range.start;
3124
3125                    let target_memory = if mapping.flags().contains(MappingFlags::SHARED)
3126                        || mapping.name() == MappingName::Vvar
3127                    {
3128                        // Note that the Vvar is a special mapping that behaves like a shared mapping but
3129                        // is private to each process.
3130                        backing.memory().clone()
3131                    } else if mapping.flags().contains(MappingFlags::WIPEONFORK) {
3132                        create_anonymous_mapping_memory(length as u64)?
3133                    } else {
3134                        let basic_info = backing.memory().basic_info();
3135                        let memory =
3136                            clone_cache.entry(basic_info.koid).or_insert_with_fallible(|| {
3137                                backing.memory().clone_memory(basic_info.rights)
3138                            })?;
3139                        memory.clone()
3140                    };
3141
3142                    let mut released_mappings = ReleasedMappings::default();
3143                    target_state.map_memory(
3144                        target,
3145                        DesiredAddress::Fixed(range.start),
3146                        target_memory,
3147                        memory_offset,
3148                        length,
3149                        target_mapping_flags,
3150                        mapping.max_access(),
3151                        false,
3152                        mapping.name().clone(),
3153                        &mut released_mappings,
3154                    )?;
3155                    assert!(
3156                        released_mappings.is_empty(),
3157                        "target mm must be empty when cloning, got {released_mappings:#?}"
3158                    );
3159                }
3160                MappingBacking::PrivateAnonymous => {
3161                    let length = range.end - range.start;
3162                    if mapping.flags().contains(MappingFlags::WIPEONFORK) {
3163                        target_state
3164                            .private_anonymous
3165                            .zero(range.start, length)
3166                            .map_err(|_| errno!(ENOMEM))?;
3167                    }
3168
3169                    let target_memory_offset = range.start.ptr() as u64;
3170                    target_state.map_in_user_vmar(
3171                        SelectedAddress::FixedOverwrite(range.start),
3172                        &target_state.private_anonymous.backing,
3173                        target_memory_offset,
3174                        length,
3175                        target_mapping_flags,
3176                        false,
3177                    )?;
3178                    let removed_mappings = target_state.mappings.insert(
3179                        range.clone(),
3180                        Mapping::new_private_anonymous(
3181                            target_mapping_flags,
3182                            mapping.name().clone(),
3183                        ),
3184                    );
3185                    assert!(
3186                        removed_mappings.is_empty(),
3187                        "target mm must be empty when cloning, got {removed_mappings:#?}"
3188                    );
3189                }
3190            };
3191        }
3192
3193        target_state.forkable_state = state.forkable_state.clone();
3194
3195        let self_dumpable = *self.dumpable.lock(locked);
3196        *target.dumpable.lock(locked) = self_dumpable;
3197
3198        Ok(())
3199    }
3200
3201    /// Returns the replacement `MemoryManager` to be used by the `exec()`ing task.
3202    ///
3203    /// POSIX requires that "a call to any exec function from a process with more than one thread
3204    /// shall result in all threads being terminated and the new executable being loaded and
3205    /// executed. No destructor functions or cleanup handlers shall be called".
3206    /// The caller is responsible for having ensured that this is the only `Task` in the
3207    /// `ThreadGroup`, and thereby the `zx::process`, such that it is safe to tear-down the Zircon
3208    /// userspace VMAR for the current address-space.
3209    pub fn exec(
3210        &self,
3211        exe_node: NamespaceNode,
3212        arch_width: ArchWidth,
3213    ) -> Result<Arc<Self>, zx::Status> {
3214        // To safeguard against concurrent accesses by other tasks through this `MemoryManager`, the
3215        // following steps are performed while holding the write lock on this instance:
3216        //
3217        // 1. All `mappings` are removed, so that remote `MemoryAccessor` calls will fail.
3218        // 2. The `user_vmar` is `destroy()`ed to free-up the user address-space.
3219        // 3. The new `user_vmar` is created, to re-reserve the user address-space.
3220        //
3221        // Once these steps are complete the lock must first be dropped, after which it is safe for
3222        // the old mappings to be dropped.
3223        let (_old_mappings, user_vmar) = {
3224            let mut state = self.state.write();
3225            let mut info = self.root_vmar.info()?;
3226
3227            // SAFETY: This operation is safe because this is the only `Task` active in the address-
3228            // space, and accesses by remote tasks will use syscalls on the `root_vmar`.
3229            unsafe { state.user_vmar.destroy()? }
3230            state.user_vmar = zx::NullableHandle::invalid().into();
3231
3232            if arch_width.is_arch32() {
3233                info.len = (LOWER_4GB_LIMIT.ptr() - info.base) as usize;
3234            } else {
3235                info.len = RESTRICTED_ASPACE_HIGHEST_ADDRESS - info.base;
3236            }
3237
3238            // Create the new userspace VMAR, to enmsure that the address range is (re-)reserved.
3239            let user_vmar = create_user_vmar(&self.root_vmar, &info)?;
3240
3241            (std::mem::replace(&mut state.mappings, Default::default()), user_vmar)
3242        };
3243
3244        // Wrap the new user address-space VMAR into a new `MemoryManager`.
3245        let root_vmar = self.root_vmar.duplicate_handle(zx::Rights::SAME_RIGHTS)?;
3246        let user_vmar_info = user_vmar.info()?;
3247        let new_mm = Self::from_vmar(root_vmar, user_vmar, user_vmar_info);
3248
3249        // Initialize the new `MemoryManager` state.
3250        new_mm.state.write().executable_node = Some(exe_node);
3251
3252        // Initialize the appropriate address-space layout for the `arch_width`.
3253        new_mm.initialize_mmap_layout(arch_width)?;
3254
3255        Ok(Arc::new(new_mm))
3256    }
3257
3258    pub fn initialize_mmap_layout(&self, arch_width: ArchWidth) -> Result<(), Errno> {
3259        let mut state = self.state.write();
3260
3261        // Place the stack at the end of the address space, subject to ASLR adjustment.
3262        state.stack_origin = UserAddress::from_ptr(
3263            state.user_vmar_info.base + state.user_vmar_info.len
3264                - MAX_STACK_SIZE
3265                - generate_random_offset_for_aslr(arch_width),
3266        )
3267        .round_up(*PAGE_SIZE)?;
3268
3269        // Set the highest address that `mmap` will assign to the allocations that don't ask for a
3270        // specific address, subject to ASLR adjustment.
3271        state.mmap_top = state
3272            .stack_origin
3273            .checked_sub(generate_random_offset_for_aslr(arch_width))
3274            .ok_or_else(|| errno!(EINVAL))?;
3275        Ok(())
3276    }
3277
3278    // Test tasks are not initialized by exec; simulate its behavior by initializing memory layout
3279    // as if a zero-size executable was loaded.
3280    pub fn initialize_mmap_layout_for_test(self: &Arc<Self>, arch_width: ArchWidth) {
3281        self.initialize_mmap_layout(arch_width).unwrap();
3282        let fake_executable_addr = self.get_random_base_for_executable(arch_width, 0).unwrap();
3283        self.initialize_brk_origin(arch_width, fake_executable_addr).unwrap();
3284    }
3285
3286    pub fn initialize_brk_origin(
3287        self: &Arc<Self>,
3288        arch_width: ArchWidth,
3289        executable_end: UserAddress,
3290    ) -> Result<(), Errno> {
3291        self.state.write().brk_origin = executable_end
3292            .checked_add(generate_random_offset_for_aslr(arch_width))
3293            .ok_or_else(|| errno!(EINVAL))?;
3294        Ok(())
3295    }
3296
3297    // Get a randomised address for loading a position-independent executable.
3298
3299    pub fn get_random_base_for_executable(
3300        &self,
3301        arch_width: ArchWidth,
3302        length: usize,
3303    ) -> Result<UserAddress, Errno> {
3304        let state = self.state.read();
3305
3306        // Place it at approx. 2/3 of the available mmap space, subject to ASLR adjustment.
3307        let base = round_up_to_system_page_size(2 * state.mmap_top.ptr() / 3).unwrap()
3308            + generate_random_offset_for_aslr(arch_width);
3309        if base.checked_add(length).ok_or_else(|| errno!(EINVAL))? <= state.mmap_top.ptr() {
3310            Ok(UserAddress::from_ptr(base))
3311        } else {
3312            error!(EINVAL)
3313        }
3314    }
3315    pub fn executable_node(&self) -> Option<NamespaceNode> {
3316        self.state.read().executable_node.clone()
3317    }
3318
3319    #[track_caller]
3320    pub fn get_errno_for_map_err(status: zx::Status) -> Errno {
3321        match status {
3322            zx::Status::INVALID_ARGS => errno!(EINVAL),
3323            zx::Status::ACCESS_DENIED => errno!(EPERM),
3324            zx::Status::NOT_SUPPORTED => errno!(ENODEV),
3325            zx::Status::NO_MEMORY => errno!(ENOMEM),
3326            zx::Status::NO_RESOURCES => errno!(ENOMEM),
3327            zx::Status::OUT_OF_RANGE => errno!(ENOMEM),
3328            zx::Status::ALREADY_EXISTS => errno!(EEXIST),
3329            zx::Status::BAD_STATE => errno!(EINVAL),
3330            _ => impossible_error(status),
3331        }
3332    }
3333
3334    #[track_caller]
3335    pub fn get_errno_for_vmo_err(status: zx::Status) -> Errno {
3336        match status {
3337            zx::Status::NO_MEMORY => errno!(ENOMEM),
3338            zx::Status::ACCESS_DENIED => errno!(EPERM),
3339            zx::Status::NOT_SUPPORTED => errno!(EIO),
3340            zx::Status::BAD_STATE => errno!(EIO),
3341            _ => return impossible_error(status),
3342        }
3343    }
3344
3345    pub fn map_memory(
3346        self: &Arc<Self>,
3347        addr: DesiredAddress,
3348        memory: Arc<MemoryObject>,
3349        memory_offset: u64,
3350        length: usize,
3351        prot_flags: ProtectionFlags,
3352        max_access: Access,
3353        options: MappingOptions,
3354        name: MappingName,
3355    ) -> Result<UserAddress, Errno> {
3356        let flags = MappingFlags::from_access_flags_and_options(prot_flags, options);
3357
3358        // Unmapped mappings must be released after the state is unlocked.
3359        let mut released_mappings = ReleasedMappings::default();
3360        // Hold the lock throughout the operation to uphold memory manager's invariants.
3361        // See mm/README.md.
3362        let mut state = self.state.write();
3363        let result = state.map_memory(
3364            self,
3365            addr,
3366            memory,
3367            memory_offset,
3368            length,
3369            flags,
3370            max_access,
3371            options.contains(MappingOptions::POPULATE),
3372            name,
3373            &mut released_mappings,
3374        );
3375
3376        // Drop the state before the unmapped mappings, since dropping a mapping may acquire a lock
3377        // in `DirEntry`'s `drop`.
3378        released_mappings.finalize(state);
3379
3380        result
3381    }
3382
3383    pub fn map_anonymous(
3384        self: &Arc<Self>,
3385        addr: DesiredAddress,
3386        length: usize,
3387        prot_flags: ProtectionFlags,
3388        options: MappingOptions,
3389        name: MappingName,
3390    ) -> Result<UserAddress, Errno> {
3391        let mut released_mappings = ReleasedMappings::default();
3392        // Hold the lock throughout the operation to uphold memory manager's invariants.
3393        // See mm/README.md.
3394        let mut state = self.state.write();
3395        let result = state.map_anonymous(
3396            self,
3397            addr,
3398            length,
3399            prot_flags,
3400            options,
3401            name,
3402            &mut released_mappings,
3403        );
3404
3405        released_mappings.finalize(state);
3406
3407        result
3408    }
3409
3410    /// Map the stack into a pre-selected address region
3411    pub fn map_stack(
3412        self: &Arc<Self>,
3413        length: usize,
3414        prot_flags: ProtectionFlags,
3415    ) -> Result<UserAddress, Errno> {
3416        assert!(length <= MAX_STACK_SIZE);
3417        let addr = self.state.read().stack_origin;
3418        // The address range containing stack_origin should normally be available: it's above the
3419        // mmap_top, and this method is called early enough in the process lifetime that only the
3420        // main ELF and the interpreter are already loaded. However, in the rare case that the
3421        // static position-independent executable is overlapping the chosen address, mapping as Hint
3422        // will make mmap choose a new place for it.
3423        // TODO(https://fxbug.dev/370027241): Consider a more robust approach
3424        let stack_addr = self.map_anonymous(
3425            DesiredAddress::Hint(addr),
3426            length,
3427            prot_flags,
3428            MappingOptions::ANONYMOUS | MappingOptions::GROWSDOWN,
3429            MappingName::Stack,
3430        )?;
3431        if stack_addr != addr {
3432            log_warn!(
3433                "An address designated for stack ({}) was unavailable, mapping at {} instead.",
3434                addr,
3435                stack_addr
3436            );
3437        }
3438        Ok(stack_addr)
3439    }
3440
3441    pub fn remap(
3442        self: &Arc<Self>,
3443        current_task: &CurrentTask,
3444        addr: UserAddress,
3445        old_length: usize,
3446        new_length: usize,
3447        flags: MremapFlags,
3448        new_addr: UserAddress,
3449    ) -> Result<UserAddress, Errno> {
3450        let mut released_mappings = ReleasedMappings::default();
3451        // Hold the lock throughout the operation to uphold memory manager's invariants.
3452        // See mm/README.md.
3453        let mut state = self.state.write();
3454        let result = state.remap(
3455            current_task,
3456            self,
3457            addr,
3458            old_length,
3459            new_length,
3460            flags,
3461            new_addr,
3462            &mut released_mappings,
3463        );
3464
3465        released_mappings.finalize(state);
3466
3467        result
3468    }
3469
3470    pub fn unmap(self: &Arc<Self>, addr: UserAddress, length: usize) -> Result<(), Errno> {
3471        let mut released_mappings = ReleasedMappings::default();
3472        // Hold the lock throughout the operation to uphold memory manager's invariants.
3473        // See mm/README.md.
3474        let mut state = self.state.write();
3475        let result = state.unmap(self, addr, length, &mut released_mappings);
3476
3477        released_mappings.finalize(state);
3478
3479        result
3480    }
3481
3482    pub fn protect(
3483        &self,
3484        current_task: &CurrentTask,
3485        addr: UserAddress,
3486        length: usize,
3487        prot_flags: ProtectionFlags,
3488    ) -> Result<(), Errno> {
3489        // Hold the lock throughout the operation to uphold memory manager's invariants.
3490        // See mm/README.md.
3491        let mut state = self.state.write();
3492        let mut released_mappings = ReleasedMappings::default();
3493        let result = state.protect(current_task, addr, length, prot_flags, &mut released_mappings);
3494        released_mappings.finalize(state);
3495        result
3496    }
3497
3498    pub fn madvise(
3499        &self,
3500        current_task: &CurrentTask,
3501        addr: UserAddress,
3502        length: usize,
3503        advice: u32,
3504    ) -> Result<(), Errno> {
3505        let mut state = self.state.write();
3506        let mut released_mappings = ReleasedMappings::default();
3507        let result = state.madvise(current_task, addr, length, advice, &mut released_mappings);
3508        released_mappings.finalize(state);
3509        result
3510    }
3511
3512    pub fn mlock<L>(
3513        &self,
3514        current_task: &CurrentTask,
3515        locked: &mut Locked<L>,
3516        desired_addr: UserAddress,
3517        desired_length: usize,
3518        on_fault: bool,
3519    ) -> Result<(), Errno>
3520    where
3521        L: LockBefore<ThreadGroupLimits>,
3522    {
3523        let mut state = self.state.write();
3524        let mut released_mappings = ReleasedMappings::default();
3525        let result = state.mlock(
3526            current_task,
3527            locked,
3528            desired_addr,
3529            desired_length,
3530            on_fault,
3531            &mut released_mappings,
3532        );
3533        released_mappings.finalize(state);
3534        result
3535    }
3536
3537    pub fn munlock(
3538        &self,
3539        current_task: &CurrentTask,
3540        desired_addr: UserAddress,
3541        desired_length: usize,
3542    ) -> Result<(), Errno> {
3543        let mut state = self.state.write();
3544        let mut released_mappings = ReleasedMappings::default();
3545        let result =
3546            state.munlock(current_task, desired_addr, desired_length, &mut released_mappings);
3547        released_mappings.finalize(state);
3548        result
3549    }
3550
3551    pub fn handle_page_fault(
3552        self: &Arc<Self>,
3553        locked: &mut Locked<Unlocked>,
3554        decoded: PageFaultExceptionReport,
3555        error_code: zx::Status,
3556    ) -> ExceptionResult {
3557        let addr = UserAddress::from(decoded.faulting_address);
3558        // On uffd-registered range, handle according to the uffd rules
3559        if error_code == zx::Status::ACCESS_DENIED {
3560            let state = self.state.write();
3561            if let Some((_, mapping)) = state.mappings.get(addr) {
3562                if mapping.flags().contains(MappingFlags::UFFD) {
3563                    // TODO(https://fxbug.dev/391599171): Support other modes
3564                    assert!(mapping.flags().contains(MappingFlags::UFFD_MISSING));
3565
3566                    if let Some(_uffd) = state.find_uffd(locked, addr) {
3567                        // If the SIGBUS feature was set, no event will be sent to the file.
3568                        // Instead, SIGBUS is delivered to the process that triggered the fault.
3569                        // TODO(https://fxbug.dev/391599171): For now we only support this feature,
3570                        // so we assume it is set.
3571                        // Check for the SIGBUS feature when we start supporting running without it.
3572                        return ExceptionResult::Signal(SignalInfo::new(
3573                            SIGBUS,
3574                            BUS_ADRERR as i32,
3575                            SignalDetail::SigFault { addr: decoded.faulting_address },
3576                        ));
3577                    };
3578                }
3579                let exec_denied = decoded.is_execute && !mapping.can_exec();
3580                let write_denied = decoded.is_write && !mapping.can_write();
3581                let read_denied = (!decoded.is_execute && !decoded.is_write) && !mapping.can_read();
3582                // There is a data race resulting from uffd unregistration and page fault happening
3583                // at the same time. To detect it, we check if the access was meant to be rejected
3584                // according to Starnix own information about the mapping.
3585                let false_reject = !exec_denied && !write_denied && !read_denied;
3586                if false_reject {
3587                    track_stub!(
3588                        TODO("https://fxbug.dev/435171399"),
3589                        "Inconsistent permission fault"
3590                    );
3591                    return ExceptionResult::Handled;
3592                }
3593            }
3594            std::mem::drop(state);
3595        }
3596
3597        if decoded.not_present {
3598            // A page fault may be resolved by extending a growsdown mapping to cover the faulting
3599            // address. Mark the exception handled if so. Otherwise let the regular handling proceed.
3600
3601            // We should only attempt growth on a not-present fault and we should only extend if the
3602            // access type matches the protection on the GROWSDOWN mapping.
3603            match self.extend_growsdown_mapping_to_address(
3604                UserAddress::from(decoded.faulting_address),
3605                decoded.is_write,
3606            ) {
3607                Ok(true) => {
3608                    return ExceptionResult::Handled;
3609                }
3610                Err(e) => {
3611                    log_warn!("Error handling page fault: {e}")
3612                }
3613                _ => {}
3614            }
3615        }
3616        // For this exception type, the synth_code field in the exception report's context is the
3617        // error generated by the page fault handler. For us this is used to distinguish between a
3618        // segmentation violation and a bus error. Unfortunately this detail is not documented in
3619        // Zircon's public documentation and is only described in the architecture-specific
3620        // exception definitions such as:
3621        // zircon/kernel/arch/x86/include/arch/x86.h
3622        // zircon/kernel/arch/arm64/include/arch/arm64.h
3623        let signo = match error_code {
3624            zx::Status::OUT_OF_RANGE => SIGBUS,
3625            _ => SIGSEGV,
3626        };
3627        ExceptionResult::Signal(SignalInfo::new(
3628            signo,
3629            SI_KERNEL as i32,
3630            SignalDetail::SigFault { addr: decoded.faulting_address },
3631        ))
3632    }
3633
3634    pub fn set_mapping_name(
3635        &self,
3636        addr: UserAddress,
3637        length: usize,
3638        name: Option<FsString>,
3639    ) -> Result<(), Errno> {
3640        let mut state = self.state.write();
3641        let mut released_mappings = ReleasedMappings::default();
3642        let result = state.set_mapping_name(addr, length, name, &mut released_mappings);
3643        released_mappings.finalize(state);
3644        result
3645    }
3646
3647    /// Returns [`Ok`] if the entire range specified by `addr..(addr+length)` contains valid
3648    /// mappings.
3649    ///
3650    /// # Errors
3651    ///
3652    /// Returns [`Err(errno)`] where `errno` is:
3653    ///
3654    ///   - `EINVAL`: `addr` is not page-aligned, or the range is too large,
3655    ///   - `ENOMEM`: one or more pages in the range are not mapped.
3656    pub fn ensure_mapped(&self, addr: UserAddress, length: usize) -> Result<(), Errno> {
3657        if !addr.is_aligned(*PAGE_SIZE) {
3658            return error!(EINVAL);
3659        }
3660
3661        let length = round_up_to_system_page_size(length)?;
3662        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
3663        let state = self.state.read();
3664        let mut last_end = addr;
3665        for (range, _) in state.mappings.range(addr..end_addr) {
3666            if range.start > last_end {
3667                // This mapping does not start immediately after the last.
3668                return error!(ENOMEM);
3669            }
3670            last_end = range.end;
3671        }
3672        if last_end < end_addr {
3673            // There is a gap of no mappings at the end of the range.
3674            error!(ENOMEM)
3675        } else {
3676            Ok(())
3677        }
3678    }
3679
3680    /// Returns the memory object mapped at the address and the offset into the memory object of
3681    /// the address. Intended for implementing futexes.
3682    pub fn get_mapping_memory(
3683        &self,
3684        addr: UserAddress,
3685        perms: ProtectionFlags,
3686    ) -> Result<(Arc<MemoryObject>, u64), Errno> {
3687        let state = self.state.read();
3688        let (_, mapping) = state.mappings.get(addr).ok_or_else(|| errno!(EFAULT))?;
3689        if !mapping.flags().access_flags().contains(perms) {
3690            return error!(EACCES);
3691        }
3692        match state.get_mapping_backing(mapping) {
3693            MappingBacking::Memory(backing) => {
3694                Ok((Arc::clone(backing.memory()), mapping.address_to_offset(addr)))
3695            }
3696            MappingBacking::PrivateAnonymous => {
3697                Ok((Arc::clone(&state.private_anonymous.backing), addr.ptr() as u64))
3698            }
3699        }
3700    }
3701
3702    /// Does a rough check that the given address is plausibly in the address space of the
3703    /// application. This does not mean the pointer is valid for any particular purpose or that
3704    /// it will remain so!
3705    ///
3706    /// In some syscalls, Linux seems to do some initial validation of the pointer up front to
3707    /// tell the caller early if it's invalid. For example, in epoll_wait() it's returning a vector
3708    /// of events. If the caller passes an invalid pointer, it wants to fail without dropping any
3709    /// events. Failing later when actually copying the required events to userspace would mean
3710    /// those events will be lost. But holding a lock on the memory manager for an asynchronous
3711    /// wait is not desirable.
3712    ///
3713    /// Testing shows that Linux seems to do some initial plausibility checking of the pointer to
3714    /// be able to report common usage errors before doing any (possibly unreversable) work. This
3715    /// checking is easy to get around if you try, so this function is also not required to
3716    /// be particularly robust. Certainly the more advanced cases of races (the memory could be
3717    /// unmapped after this call but before it's used) are not handled.
3718    ///
3719    /// The buffer_size variable is the size of the data structure that needs to fit
3720    /// in the given memory.
3721    ///
3722    /// Returns the error EFAULT if invalid.
3723    pub fn check_plausible(&self, addr: UserAddress, buffer_size: usize) -> Result<(), Errno> {
3724        let state = self.state.read();
3725
3726        if let Some(range) = state.mappings.last_range() {
3727            if (range.end - buffer_size)? >= addr {
3728                return Ok(());
3729            }
3730        }
3731        error!(EFAULT)
3732    }
3733
3734    pub fn get_aio_context(&self, addr: UserAddress) -> Option<Arc<AioContext>> {
3735        let state = self.state.read();
3736        state.get_aio_context(addr).map(|(_, aio_context)| aio_context)
3737    }
3738
3739    pub fn destroy_aio_context(
3740        self: &Arc<Self>,
3741        addr: UserAddress,
3742    ) -> Result<Arc<AioContext>, Errno> {
3743        let mut released_mappings = ReleasedMappings::default();
3744
3745        // Hold the lock throughout the operation to uphold memory manager's invariants.
3746        // See mm/README.md.
3747        let mut state = self.state.write();
3748
3749        // Validate that this address actually has an AioContext. We need to hold the state lock
3750        // until we actually remove the mappings to ensure that another thread does not manipulate
3751        // the mappings after we've validated that they contain an AioContext.
3752        let Some((range, aio_context)) = state.get_aio_context(addr) else {
3753            return error!(EINVAL);
3754        };
3755
3756        let length = range.end - range.start;
3757        let result = state.unmap(self, range.start, length, &mut released_mappings);
3758
3759        released_mappings.finalize(state);
3760
3761        result.map(|_| aio_context)
3762    }
3763
3764    #[cfg(test)]
3765    pub fn get_mapping_name(
3766        &self,
3767        addr: UserAddress,
3768    ) -> Result<Option<flyweights::FlyByteStr>, Errno> {
3769        let state = self.state.read();
3770        let (_, mapping) = state.mappings.get(addr).ok_or_else(|| errno!(EFAULT))?;
3771        if let MappingName::Vma(name) = mapping.name() { Ok(Some(name.clone())) } else { Ok(None) }
3772    }
3773
3774    #[cfg(test)]
3775    pub fn get_mapping_count(&self) -> usize {
3776        let state = self.state.read();
3777        state.mappings.iter().count()
3778    }
3779
3780    pub fn extend_growsdown_mapping_to_address(
3781        self: &Arc<Self>,
3782        addr: UserAddress,
3783        is_write: bool,
3784    ) -> Result<bool, Error> {
3785        self.state.write().extend_growsdown_mapping_to_address(self, addr, is_write)
3786    }
3787
3788    pub fn get_stats(&self, current_task: &CurrentTask) -> MemoryStats {
3789        // Grab our state lock before reading zircon mappings so that the two are consistent.
3790        // Other Starnix threads should not make any changes to the Zircon mappings while we hold
3791        // a read lock to the memory manager state.
3792        let state = self.state.read();
3793
3794        let mut stats = MemoryStats::default();
3795        stats.vm_stack = state.stack_size;
3796
3797        state.with_zx_mappings(current_task, |zx_mappings| {
3798            for zx_mapping in zx_mappings {
3799                // We only care about map info for actual mappings.
3800                let zx_details = zx_mapping.details();
3801                let Some(zx_details) = zx_details.as_mapping() else { continue };
3802                let user_address = UserAddress::from(zx_mapping.base as u64);
3803                let (_, mm_mapping) = state
3804                    .mappings
3805                    .get(user_address)
3806                    .unwrap_or_else(|| panic!("mapping bookkeeping must be consistent with zircon's: not found: {user_address:?}"));
3807                debug_assert_eq!(
3808                    match state.get_mapping_backing(mm_mapping) {
3809                        MappingBacking::Memory(m)=>m.memory().get_koid(),
3810                        MappingBacking::PrivateAnonymous=>state.private_anonymous.backing.get_koid(),
3811                    },
3812                    zx_details.vmo_koid,
3813                    "MemoryManager and Zircon must agree on which VMO is mapped in this range",
3814                );
3815
3816                stats.vm_size += zx_mapping.size;
3817
3818                stats.vm_rss += zx_details.committed_bytes;
3819                stats.vm_swap += zx_details.populated_bytes - zx_details.committed_bytes;
3820
3821                if mm_mapping.flags().contains(MappingFlags::SHARED) {
3822                    stats.rss_shared += zx_details.committed_bytes;
3823                } else if mm_mapping.flags().contains(MappingFlags::ANONYMOUS) {
3824                    stats.rss_anonymous += zx_details.committed_bytes;
3825                } else if let MappingName::File(_) = mm_mapping.name() {
3826                    stats.rss_file += zx_details.committed_bytes;
3827                }
3828
3829                if mm_mapping.flags().contains(MappingFlags::LOCKED) {
3830                    stats.vm_lck += zx_details.committed_bytes;
3831                }
3832
3833                if mm_mapping.flags().contains(MappingFlags::ELF_BINARY)
3834                    && mm_mapping.flags().contains(MappingFlags::WRITE)
3835                {
3836                    stats.vm_data += zx_mapping.size;
3837                }
3838
3839                if mm_mapping.flags().contains(MappingFlags::ELF_BINARY)
3840                    && mm_mapping.flags().contains(MappingFlags::EXEC)
3841                {
3842                    stats.vm_exe += zx_mapping.size;
3843                }
3844            }
3845        });
3846
3847        // TODO(https://fxbug.dev/396221597): Placeholder for now. We need kernel support to track
3848        // the committed bytes high water mark.
3849        stats.vm_rss_hwm = STUB_VM_RSS_HWM;
3850        stats
3851    }
3852
3853    pub fn atomic_load_u32_acquire(&self, futex_addr: FutexAddress) -> Result<u32, Errno> {
3854        if let Some(usercopy) = usercopy() {
3855            usercopy.atomic_load_u32_acquire(futex_addr.ptr()).map_err(|_| errno!(EFAULT))
3856        } else {
3857            unreachable!("can only control memory ordering of atomics with usercopy");
3858        }
3859    }
3860
3861    pub fn atomic_load_u32_relaxed(&self, futex_addr: FutexAddress) -> Result<u32, Errno> {
3862        if let Some(usercopy) = usercopy() {
3863            usercopy.atomic_load_u32_relaxed(futex_addr.ptr()).map_err(|_| errno!(EFAULT))
3864        } else {
3865            // SAFETY: `self.state.read().read_memory` only returns `Ok` if all
3866            // bytes were read to.
3867            let buf = unsafe {
3868                read_to_array(|buf| {
3869                    self.state.read().read_memory(futex_addr.into(), buf).map(|bytes_read| {
3870                        debug_assert_eq!(bytes_read.len(), std::mem::size_of::<u32>())
3871                    })
3872                })
3873            }?;
3874            Ok(u32::from_ne_bytes(buf))
3875        }
3876    }
3877
3878    pub fn atomic_store_u32_relaxed(
3879        &self,
3880        futex_addr: FutexAddress,
3881        value: u32,
3882    ) -> Result<(), Errno> {
3883        if let Some(usercopy) = usercopy() {
3884            usercopy.atomic_store_u32_relaxed(futex_addr.ptr(), value).map_err(|_| errno!(EFAULT))
3885        } else {
3886            self.state.read().write_memory(futex_addr.into(), value.as_bytes())?;
3887            Ok(())
3888        }
3889    }
3890
3891    pub fn atomic_compare_exchange_u32_acq_rel(
3892        &self,
3893        futex_addr: FutexAddress,
3894        current: u32,
3895        new: u32,
3896    ) -> CompareExchangeResult<u32> {
3897        let Some(usercopy) = usercopy() else {
3898            unreachable!("Atomic compare/exchange requires usercopy.");
3899        };
3900        CompareExchangeResult::from_usercopy(usercopy.atomic_compare_exchange_u32_acq_rel(
3901            futex_addr.ptr(),
3902            current,
3903            new,
3904        ))
3905    }
3906
3907    pub fn atomic_compare_exchange_weak_u32_acq_rel(
3908        &self,
3909        futex_addr: FutexAddress,
3910        current: u32,
3911        new: u32,
3912    ) -> CompareExchangeResult<u32> {
3913        let Some(usercopy) = usercopy() else {
3914            unreachable!("Atomic compare/exchange requires usercopy.");
3915        };
3916        CompareExchangeResult::from_usercopy(usercopy.atomic_compare_exchange_weak_u32_acq_rel(
3917            futex_addr.ptr(),
3918            current,
3919            new,
3920        ))
3921    }
3922
3923    pub fn get_restricted_vmar_info(&self) -> Option<VmarInfo> {
3924        use zx::HandleBased;
3925        if self.root_vmar.is_invalid_handle() {
3926            return None;
3927        }
3928        Some(VmarInfo { base: RESTRICTED_ASPACE_BASE, len: RESTRICTED_ASPACE_SIZE })
3929    }
3930}
3931
3932/// The result of an atomic compare/exchange operation on user memory.
3933#[derive(Debug, Clone)]
3934pub enum CompareExchangeResult<T> {
3935    /// The current value provided matched the one observed in memory and the new value provided
3936    /// was written.
3937    Success,
3938    /// The provided current value did not match the current value in memory.
3939    Stale { observed: T },
3940    /// There was a general error while accessing the requested memory.
3941    Error(Errno),
3942}
3943
3944impl<T> CompareExchangeResult<T> {
3945    fn from_usercopy(usercopy_res: Result<Result<T, T>, ()>) -> Self {
3946        match usercopy_res {
3947            Ok(Ok(_)) => Self::Success,
3948            Ok(Err(observed)) => Self::Stale { observed },
3949            Err(()) => Self::Error(errno!(EFAULT)),
3950        }
3951    }
3952}
3953
3954impl<T> From<Errno> for CompareExchangeResult<T> {
3955    fn from(e: Errno) -> Self {
3956        Self::Error(e)
3957    }
3958}
3959
3960/// The user-space address at which a mapping should be placed. Used by [`MemoryManager::map`].
3961#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3962pub enum DesiredAddress {
3963    /// Map at any address chosen by the kernel.
3964    Any,
3965    /// The address is a hint. If the address overlaps an existing mapping a different address may
3966    /// be chosen.
3967    Hint(UserAddress),
3968    /// The address is a requirement. If the address overlaps an existing mapping (and cannot
3969    /// overwrite it), mapping fails.
3970    Fixed(UserAddress),
3971    /// The address is a requirement. If the address overlaps an existing mapping (and cannot
3972    /// overwrite it), they should be unmapped.
3973    FixedOverwrite(UserAddress),
3974}
3975
3976/// The user-space address at which a mapping should be placed. Used by [`map_in_vmar`].
3977#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3978enum SelectedAddress {
3979    /// See DesiredAddress::Fixed.
3980    Fixed(UserAddress),
3981    /// See DesiredAddress::FixedOverwrite.
3982    FixedOverwrite(UserAddress),
3983}
3984
3985impl SelectedAddress {
3986    fn addr(&self) -> UserAddress {
3987        match self {
3988            SelectedAddress::Fixed(addr) => *addr,
3989            SelectedAddress::FixedOverwrite(addr) => *addr,
3990        }
3991    }
3992}
3993
3994/// Write one line of the memory map intended for adding to `/proc/self/maps`.
3995fn write_map(
3996    task: &Task,
3997    sink: &mut DynamicFileBuf,
3998    state: &MemoryManagerState,
3999    range: &Range<UserAddress>,
4000    map: &Mapping,
4001) -> Result<(), Errno> {
4002    let line_length = write!(
4003        sink,
4004        "{:08x}-{:08x} {}{}{}{} {:08x} 00:00 {} ",
4005        range.start.ptr(),
4006        range.end.ptr(),
4007        if map.can_read() { 'r' } else { '-' },
4008        if map.can_write() { 'w' } else { '-' },
4009        if map.can_exec() { 'x' } else { '-' },
4010        if map.flags().contains(MappingFlags::SHARED) { 's' } else { 'p' },
4011        match state.get_mapping_backing(map) {
4012            MappingBacking::Memory(backing) => backing.address_to_offset(range.start),
4013            MappingBacking::PrivateAnonymous => 0,
4014        },
4015        if let MappingName::File(file) = &map.name() { file.name.entry.node.ino } else { 0 }
4016    )?;
4017    let fill_to_name = |sink: &mut DynamicFileBuf| {
4018        // The filename goes at >= the 74th column (73rd when zero indexed)
4019        for _ in line_length..73 {
4020            sink.write(b" ");
4021        }
4022    };
4023    match &map.name() {
4024        MappingName::None | MappingName::AioContext(_) => {
4025            if map.flags().contains(MappingFlags::SHARED)
4026                && map.flags().contains(MappingFlags::ANONYMOUS)
4027            {
4028                // See proc(5), "/proc/[pid]/map_files/"
4029                fill_to_name(sink);
4030                sink.write(b"/dev/zero (deleted)");
4031            }
4032        }
4033        MappingName::Stack => {
4034            fill_to_name(sink);
4035            sink.write(b"[stack]");
4036        }
4037        MappingName::Heap => {
4038            fill_to_name(sink);
4039            sink.write(b"[heap]");
4040        }
4041        MappingName::Vdso => {
4042            fill_to_name(sink);
4043            sink.write(b"[vdso]");
4044        }
4045        MappingName::Vvar => {
4046            fill_to_name(sink);
4047            sink.write(b"[vvar]");
4048        }
4049        MappingName::File(file) => {
4050            fill_to_name(sink);
4051            // File names can have newlines that need to be escaped before printing.
4052            // According to https://man7.org/linux/man-pages/man5/proc.5.html the only
4053            // escaping applied to paths is replacing newlines with an octal sequence.
4054            let path = file.name.path(task);
4055            sink.write_iter(
4056                path.iter()
4057                    .flat_map(|b| if *b == b'\n' { b"\\012" } else { std::slice::from_ref(b) })
4058                    .copied(),
4059            );
4060        }
4061        MappingName::Vma(name) => {
4062            fill_to_name(sink);
4063            sink.write(b"[anon:");
4064            sink.write(name.as_bytes());
4065            sink.write(b"]");
4066        }
4067        MappingName::Ashmem(name) => {
4068            fill_to_name(sink);
4069            sink.write(b"/dev/ashmem/");
4070            sink.write(name.as_bytes());
4071        }
4072    }
4073    sink.write(b"\n");
4074    Ok(())
4075}
4076
4077#[derive(Default)]
4078pub struct MemoryStats {
4079    pub vm_size: usize,
4080    pub vm_rss: usize,
4081    pub vm_rss_hwm: usize,
4082    pub rss_anonymous: usize,
4083    pub rss_file: usize,
4084    pub rss_shared: usize,
4085    pub vm_data: usize,
4086    pub vm_stack: usize,
4087    pub vm_exe: usize,
4088    pub vm_swap: usize,
4089    pub vm_lck: usize,
4090}
4091
4092/// Implements `/proc/self/maps`.
4093#[derive(Clone)]
4094pub struct ProcMapsFile(WeakRef<Task>);
4095impl ProcMapsFile {
4096    pub fn new_node(task: WeakRef<Task>) -> impl FsNodeOps {
4097        DynamicFile::new_node(Self(task))
4098    }
4099}
4100
4101impl SequenceFileSource for ProcMapsFile {
4102    type Cursor = UserAddress;
4103
4104    fn next(
4105        &self,
4106        _current_task: &CurrentTask,
4107        cursor: UserAddress,
4108        sink: &mut DynamicFileBuf,
4109    ) -> Result<Option<UserAddress>, Errno> {
4110        let task = Task::from_weak(&self.0)?;
4111        let Ok(mm) = task.mm() else {
4112            return Ok(None);
4113        };
4114        let state = mm.state.read();
4115        if let Some((range, map)) = state.mappings.find_at_or_after(cursor) {
4116            write_map(&task, sink, &state, range, map)?;
4117            return Ok(Some(range.end));
4118        }
4119        Ok(None)
4120    }
4121}
4122
4123#[derive(Clone)]
4124pub struct ProcSmapsFile(WeakRef<Task>);
4125impl ProcSmapsFile {
4126    pub fn new_node(task: WeakRef<Task>) -> impl FsNodeOps {
4127        DynamicFile::new_node(Self(task))
4128    }
4129}
4130
4131impl DynamicFileSource for ProcSmapsFile {
4132    fn generate(&self, current_task: &CurrentTask, sink: &mut DynamicFileBuf) -> Result<(), Errno> {
4133        let page_size_kb = *PAGE_SIZE / 1024;
4134        let task = Task::from_weak(&self.0)?;
4135        // /proc/<pid>/smaps is empty for kthreads
4136        let Ok(mm) = task.mm() else {
4137            return Ok(());
4138        };
4139        let state = mm.state.read();
4140        state.with_zx_mappings(current_task, |zx_mappings| {
4141            let mut zx_memory_info = RangeMap::<UserAddress, usize>::default();
4142            for idx in 0..zx_mappings.len() {
4143                let zx_mapping = zx_mappings[idx];
4144                // RangeMap uses #[must_use] for its default usecase but this drop is trivial.
4145                let _ = zx_memory_info.insert(
4146                    UserAddress::from_ptr(zx_mapping.base)
4147                        ..UserAddress::from_ptr(zx_mapping.base + zx_mapping.size),
4148                    idx,
4149                );
4150            }
4151
4152            for (mm_range, mm_mapping) in state.mappings.iter() {
4153                let mut committed_bytes = 0;
4154
4155                for (zx_range, zx_mapping_idx) in zx_memory_info.range(mm_range.clone()) {
4156                    let intersect_range = zx_range.intersect(mm_range);
4157                    let zx_mapping = zx_mappings[*zx_mapping_idx];
4158                    let zx_details = zx_mapping.details();
4159                    let Some(zx_details) = zx_details.as_mapping() else { continue };
4160                    let zx_committed_bytes = zx_details.committed_bytes;
4161
4162                    // TODO(https://fxbug.dev/419882465): It can happen that the same Zircon mapping
4163                    // is covered by more than one Starnix mapping. In this case we don't have
4164                    // enough granularity to answer the question of how many committed bytes belong
4165                    // to one mapping or another. Make a best-effort approximation by dividing the
4166                    // committed bytes of a Zircon mapping proportionally.
4167                    committed_bytes += if intersect_range != *zx_range {
4168                        let intersection_size =
4169                            intersect_range.end.ptr() - intersect_range.start.ptr();
4170                        let part = intersection_size as f32 / zx_mapping.size as f32;
4171                        let prorated_committed_bytes: f32 = part * zx_committed_bytes as f32;
4172                        prorated_committed_bytes as u64
4173                    } else {
4174                        zx_committed_bytes as u64
4175                    };
4176                    assert_eq!(
4177                        match state.get_mapping_backing(mm_mapping) {
4178                            MappingBacking::Memory(m) => m.memory().get_koid(),
4179                            MappingBacking::PrivateAnonymous =>
4180                                state.private_anonymous.backing.get_koid(),
4181                        },
4182                        zx_details.vmo_koid,
4183                        "MemoryManager and Zircon must agree on which VMO is mapped in this range",
4184                    );
4185                }
4186
4187                write_map(&task, sink, &state, mm_range, mm_mapping)?;
4188
4189                let size_kb = (mm_range.end.ptr() - mm_range.start.ptr()) / 1024;
4190                writeln!(sink, "Size:           {size_kb:>8} kB",)?;
4191                let share_count = match state.get_mapping_backing(mm_mapping) {
4192                    MappingBacking::Memory(backing) => {
4193                        let memory = backing.memory();
4194                        if memory.is_clock() {
4195                            // Clock memory mappings are not shared in a meaningful way.
4196                            1
4197                        } else {
4198                            let memory_info = backing.memory().info()?;
4199                            memory_info.share_count as u64
4200                        }
4201                    }
4202                    MappingBacking::PrivateAnonymous => {
4203                        1 // Private mapping
4204                    }
4205                };
4206
4207                let rss_kb = committed_bytes / 1024;
4208                writeln!(sink, "Rss:            {rss_kb:>8} kB")?;
4209
4210                let pss_kb = if mm_mapping.flags().contains(MappingFlags::SHARED) {
4211                    rss_kb / share_count
4212                } else {
4213                    rss_kb
4214                };
4215                writeln!(sink, "Pss:            {pss_kb:>8} kB")?;
4216
4217                track_stub!(TODO("https://fxbug.dev/322874967"), "smaps dirty pages");
4218                let (shared_dirty_kb, private_dirty_kb) = (0, 0);
4219
4220                let is_shared = share_count > 1;
4221                let shared_clean_kb = if is_shared { rss_kb } else { 0 };
4222                writeln!(sink, "Shared_Clean:   {shared_clean_kb:>8} kB")?;
4223                writeln!(sink, "Shared_Dirty:   {shared_dirty_kb:>8} kB")?;
4224
4225                let private_clean_kb = if is_shared { 0 } else { rss_kb };
4226                writeln!(sink, "Private_Clean:  {private_clean_kb:>8} kB")?;
4227                writeln!(sink, "Private_Dirty:  {private_dirty_kb:>8} kB")?;
4228
4229                let anonymous_kb = if mm_mapping.private_anonymous() { rss_kb } else { 0 };
4230                writeln!(sink, "Anonymous:      {anonymous_kb:>8} kB")?;
4231                writeln!(sink, "KernelPageSize: {page_size_kb:>8} kB")?;
4232                writeln!(sink, "MMUPageSize:    {page_size_kb:>8} kB")?;
4233
4234                let locked_kb =
4235                    if mm_mapping.flags().contains(MappingFlags::LOCKED) { rss_kb } else { 0 };
4236                writeln!(sink, "Locked:         {locked_kb:>8} kB")?;
4237                writeln!(sink, "VmFlags: {}", mm_mapping.vm_flags())?;
4238
4239                track_stub!(TODO("https://fxbug.dev/297444691"), "optional smaps fields");
4240            }
4241            Ok(())
4242        })
4243    }
4244}
4245
4246/// Creates a memory object that can be used in an anonymous mapping for the `mmap` syscall.
4247pub fn create_anonymous_mapping_memory(size: u64) -> Result<Arc<MemoryObject>, Errno> {
4248    // mremap can grow memory regions, so make sure the memory object is resizable.
4249    let mut memory = MemoryObject::from(
4250        zx::Vmo::create_with_opts(zx::VmoOptions::RESIZABLE, size).map_err(|s| match s {
4251            zx::Status::NO_MEMORY => errno!(ENOMEM),
4252            zx::Status::OUT_OF_RANGE => errno!(ENOMEM),
4253            _ => impossible_error(s),
4254        })?,
4255    )
4256    .with_zx_name(b"starnix:memory_manager");
4257
4258    memory.set_zx_name(b"starnix-anon");
4259
4260    // TODO(https://fxbug.dev/42056890): Audit replace_as_executable usage
4261    memory = memory.replace_as_executable(&VMEX_RESOURCE).map_err(impossible_error)?;
4262    Ok(Arc::new(memory))
4263}
4264
4265fn generate_random_offset_for_aslr(arch_width: ArchWidth) -> usize {
4266    // Generate a number with ASLR_RANDOM_BITS.
4267    let randomness = {
4268        let random_bits =
4269            if arch_width.is_arch32() { ASLR_32_RANDOM_BITS } else { ASLR_RANDOM_BITS };
4270        let mask = (1 << random_bits) - 1;
4271        let mut bytes = [0; std::mem::size_of::<usize>()];
4272        zx::cprng_draw(&mut bytes);
4273        usize::from_le_bytes(bytes) & mask
4274    };
4275
4276    // Transform it into a page-aligned offset.
4277    randomness * (*PAGE_SIZE as usize)
4278}
4279
4280#[cfg(test)]
4281mod tests {
4282    use super::*;
4283    use crate::mm::memory_accessor::MemoryAccessorExt;
4284    use crate::mm::syscalls::do_mmap;
4285    use crate::task::syscalls::sys_prctl;
4286    use crate::testing::*;
4287    use crate::vfs::FdNumber;
4288    use assert_matches::assert_matches;
4289    use itertools::assert_equal;
4290    use starnix_sync::{FileOpsCore, LockEqualOrBefore};
4291    use starnix_uapi::user_address::{UserCString, UserRef};
4292    use starnix_uapi::{
4293        MAP_ANONYMOUS, MAP_FIXED, MAP_GROWSDOWN, MAP_PRIVATE, MAP_SHARED, PR_SET_VMA,
4294        PR_SET_VMA_ANON_NAME, PROT_NONE, PROT_READ,
4295    };
4296    use std::ffi::CString;
4297    use zerocopy::{FromBytes, Immutable, KnownLayout};
4298
4299    #[::fuchsia::test]
4300    fn test_mapping_flags() {
4301        let options = MappingOptions::ANONYMOUS;
4302        let access_flags = ProtectionFlags::READ | ProtectionFlags::WRITE;
4303        let mapping_flags = MappingFlags::from_access_flags_and_options(access_flags, options);
4304        assert_eq!(mapping_flags.access_flags(), access_flags);
4305        assert_eq!(mapping_flags.options(), options);
4306
4307        let new_access_flags = ProtectionFlags::READ | ProtectionFlags::EXEC;
4308        let adusted_mapping_flags = mapping_flags.with_access_flags(new_access_flags);
4309        assert_eq!(adusted_mapping_flags.access_flags(), new_access_flags);
4310        assert_eq!(adusted_mapping_flags.options(), options);
4311    }
4312
4313    #[::fuchsia::test]
4314    async fn test_brk() {
4315        spawn_kernel_and_run(async |locked, current_task| {
4316            let mm = current_task.mm().unwrap();
4317
4318            // Look up the given addr in the mappings table.
4319            let get_range = |addr: UserAddress| {
4320                let state = mm.state.read();
4321                state.mappings.get(addr).map(|(range, mapping)| (range.clone(), mapping.clone()))
4322            };
4323
4324            // Initialize the program break.
4325            let base_addr = mm
4326                .set_brk(locked, &current_task, UserAddress::default())
4327                .expect("failed to set initial program break");
4328            assert!(base_addr > UserAddress::default());
4329
4330            // Page containing the program break address should not be mapped.
4331            assert_eq!(get_range(base_addr), None);
4332
4333            // Growing it by a single byte results in that page becoming mapped.
4334            let addr0 = mm
4335                .set_brk(locked, &current_task, (base_addr + 1u64).unwrap())
4336                .expect("failed to grow brk");
4337            assert!(addr0 > base_addr);
4338            let (range0, _) = get_range(base_addr).expect("base_addr should be mapped");
4339            assert_eq!(range0.start, base_addr);
4340            assert_eq!(range0.end, (base_addr + *PAGE_SIZE).unwrap());
4341
4342            // Grow the program break by another byte, which won't be enough to cause additional pages to be mapped.
4343            let addr1 = mm
4344                .set_brk(locked, &current_task, (base_addr + 2u64).unwrap())
4345                .expect("failed to grow brk");
4346            assert_eq!(addr1, (base_addr + 2u64).unwrap());
4347            let (range1, _) = get_range(base_addr).expect("base_addr should be mapped");
4348            assert_eq!(range1.start, range0.start);
4349            assert_eq!(range1.end, range0.end);
4350
4351            // Grow the program break by a non-trival amount and observe the larger mapping.
4352            let addr2 = mm
4353                .set_brk(locked, &current_task, (base_addr + 24893u64).unwrap())
4354                .expect("failed to grow brk");
4355            assert_eq!(addr2, (base_addr + 24893u64).unwrap());
4356            let (range2, _) = get_range(base_addr).expect("base_addr should be mapped");
4357            assert_eq!(range2.start, base_addr);
4358            assert_eq!(range2.end, addr2.round_up(*PAGE_SIZE).unwrap());
4359
4360            // Shrink the program break and observe the smaller mapping.
4361            let addr3 = mm
4362                .set_brk(locked, &current_task, (base_addr + 14832u64).unwrap())
4363                .expect("failed to shrink brk");
4364            assert_eq!(addr3, (base_addr + 14832u64).unwrap());
4365            let (range3, _) = get_range(base_addr).expect("base_addr should be mapped");
4366            assert_eq!(range3.start, base_addr);
4367            assert_eq!(range3.end, addr3.round_up(*PAGE_SIZE).unwrap());
4368
4369            // Shrink the program break close to zero and observe the smaller mapping.
4370            let addr4 = mm
4371                .set_brk(locked, &current_task, (base_addr + 3u64).unwrap())
4372                .expect("failed to drastically shrink brk");
4373            assert_eq!(addr4, (base_addr + 3u64).unwrap());
4374            let (range4, _) = get_range(base_addr).expect("base_addr should be mapped");
4375            assert_eq!(range4.start, base_addr);
4376            assert_eq!(range4.end, addr4.round_up(*PAGE_SIZE).unwrap());
4377
4378            // Shrink the program break to zero and observe that the mapping is entirely gone.
4379            let addr5 = mm
4380                .set_brk(locked, &current_task, base_addr)
4381                .expect("failed to drastically shrink brk to zero");
4382            assert_eq!(addr5, base_addr);
4383            assert_eq!(get_range(base_addr), None);
4384        })
4385        .await;
4386    }
4387
4388    #[::fuchsia::test]
4389    async fn test_mm_exec() {
4390        spawn_kernel_and_run(async |locked, current_task| {
4391            let mm = current_task.mm().unwrap();
4392
4393            let has = |addr: UserAddress| -> bool {
4394                let state = mm.state.read();
4395                state.mappings.get(addr).is_some()
4396            };
4397
4398            let brk_addr = mm
4399                .set_brk(locked, &current_task, UserAddress::default())
4400                .expect("failed to set initial program break");
4401            assert!(brk_addr > UserAddress::default());
4402
4403            // Allocate a single page of BRK space, so that the break base address is mapped.
4404            let _ = mm
4405                .set_brk(locked, &current_task, (brk_addr + 1u64).unwrap())
4406                .expect("failed to grow program break");
4407            assert!(has(brk_addr));
4408
4409            let mapped_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
4410            assert!(mapped_addr > UserAddress::default());
4411            assert!(has(mapped_addr));
4412
4413            let node = current_task.lookup_path_from_root(locked, "/".into()).unwrap();
4414            let new_mm = mm.exec(node, ArchWidth::Arch64).expect("failed to exec memory manager");
4415            current_task.mm.update(Some(new_mm));
4416
4417            assert!(!has(brk_addr));
4418            assert!(!has(mapped_addr));
4419
4420            // Check that the old addresses are actually available for mapping.
4421            let brk_addr2 = map_memory(locked, &current_task, brk_addr, *PAGE_SIZE);
4422            assert_eq!(brk_addr, brk_addr2);
4423            let mapped_addr2 = map_memory(locked, &current_task, mapped_addr, *PAGE_SIZE);
4424            assert_eq!(mapped_addr, mapped_addr2);
4425        })
4426        .await;
4427    }
4428
4429    #[::fuchsia::test]
4430    async fn test_get_contiguous_mappings_at() {
4431        spawn_kernel_and_run(async |locked, current_task| {
4432            let mm = current_task.mm().unwrap();
4433
4434            // Create four one-page mappings with a hole between the third one and the fourth one.
4435            let page_size = *PAGE_SIZE as usize;
4436            let addr_a = (mm.base_addr + 10 * page_size).unwrap();
4437            let addr_b = (mm.base_addr + 11 * page_size).unwrap();
4438            let addr_c = (mm.base_addr + 12 * page_size).unwrap();
4439            let addr_d = (mm.base_addr + 14 * page_size).unwrap();
4440            assert_eq!(map_memory(locked, &current_task, addr_a, *PAGE_SIZE), addr_a);
4441            assert_eq!(map_memory(locked, &current_task, addr_b, *PAGE_SIZE), addr_b);
4442            assert_eq!(map_memory(locked, &current_task, addr_c, *PAGE_SIZE), addr_c);
4443            assert_eq!(map_memory(locked, &current_task, addr_d, *PAGE_SIZE), addr_d);
4444
4445            {
4446                let mm_state = mm.state.read();
4447                // Verify that requesting an unmapped address returns an empty iterator.
4448                assert_equal(
4449                    mm_state.get_contiguous_mappings_at((addr_a - 100u64).unwrap(), 50).unwrap(),
4450                    vec![],
4451                );
4452                assert_equal(
4453                    mm_state.get_contiguous_mappings_at((addr_a - 100u64).unwrap(), 200).unwrap(),
4454                    vec![],
4455                );
4456
4457                // Verify that requesting zero bytes returns an empty iterator.
4458                assert_equal(mm_state.get_contiguous_mappings_at(addr_a, 0).unwrap(), vec![]);
4459
4460                // Verify errors.
4461                assert_eq!(
4462                    mm_state
4463                        .get_contiguous_mappings_at(UserAddress::from(100), usize::MAX)
4464                        .err()
4465                        .unwrap(),
4466                    errno!(EFAULT)
4467                );
4468                assert_eq!(
4469                    mm_state
4470                        .get_contiguous_mappings_at((mm_state.max_address() + 1u64).unwrap(), 0)
4471                        .err()
4472                        .unwrap(),
4473                    errno!(EFAULT)
4474                );
4475            }
4476
4477            assert_eq!(mm.get_mapping_count(), 2);
4478            let mm_state = mm.state.read();
4479            let (map_a, map_b) = {
4480                let mut it = mm_state.mappings.iter();
4481                (it.next().unwrap().1, it.next().unwrap().1)
4482            };
4483
4484            assert_equal(
4485                mm_state.get_contiguous_mappings_at(addr_a, page_size).unwrap(),
4486                vec![(map_a, page_size)],
4487            );
4488
4489            assert_equal(
4490                mm_state.get_contiguous_mappings_at(addr_a, page_size / 2).unwrap(),
4491                vec![(map_a, page_size / 2)],
4492            );
4493
4494            assert_equal(
4495                mm_state.get_contiguous_mappings_at(addr_a, page_size * 3).unwrap(),
4496                vec![(map_a, page_size * 3)],
4497            );
4498
4499            assert_equal(
4500                mm_state.get_contiguous_mappings_at(addr_b, page_size).unwrap(),
4501                vec![(map_a, page_size)],
4502            );
4503
4504            assert_equal(
4505                mm_state.get_contiguous_mappings_at(addr_d, page_size).unwrap(),
4506                vec![(map_b, page_size)],
4507            );
4508
4509            // Verify that results stop if there is a hole.
4510            assert_equal(
4511                mm_state
4512                    .get_contiguous_mappings_at((addr_a + page_size / 2).unwrap(), page_size * 10)
4513                    .unwrap(),
4514                vec![(map_a, page_size * 2 + page_size / 2)],
4515            );
4516
4517            // Verify that results stop at the last mapped page.
4518            assert_equal(
4519                mm_state.get_contiguous_mappings_at(addr_d, page_size * 10).unwrap(),
4520                vec![(map_b, page_size)],
4521            );
4522        })
4523        .await;
4524    }
4525
4526    #[::fuchsia::test]
4527    async fn test_read_write_crossing_mappings() {
4528        spawn_kernel_and_run(async |locked, current_task| {
4529            let mm = current_task.mm().unwrap();
4530            let ma = current_task.deref();
4531
4532            // Map two contiguous pages at fixed addresses, but backed by distinct mappings.
4533            let page_size = *PAGE_SIZE;
4534            let addr = (mm.base_addr + 10 * page_size).unwrap();
4535            assert_eq!(map_memory(locked, &current_task, addr, page_size), addr);
4536            assert_eq!(
4537                map_memory(locked, &current_task, (addr + page_size).unwrap(), page_size),
4538                (addr + page_size).unwrap()
4539            );
4540            // Mappings get merged since they are baked by the same memory object
4541            assert_eq!(mm.get_mapping_count(), 1);
4542
4543            // Write a pattern crossing our two mappings.
4544            let test_addr = (addr + page_size / 2).unwrap();
4545            let data: Vec<u8> = (0..page_size).map(|i| (i % 256) as u8).collect();
4546            ma.write_memory(test_addr, &data).expect("failed to write test data");
4547
4548            // Read it back.
4549            let data_readback =
4550                ma.read_memory_to_vec(test_addr, data.len()).expect("failed to read test data");
4551            assert_eq!(&data, &data_readback);
4552        })
4553        .await;
4554    }
4555
4556    #[::fuchsia::test]
4557    async fn test_read_write_errors() {
4558        spawn_kernel_and_run(async |locked, current_task| {
4559            let ma = current_task.deref();
4560
4561            let page_size = *PAGE_SIZE;
4562            let addr = map_memory(locked, &current_task, UserAddress::default(), page_size);
4563            let buf = vec![0u8; page_size as usize];
4564
4565            // Verify that accessing data that is only partially mapped is an error.
4566            let partial_addr_before = (addr - page_size / 2).unwrap();
4567            assert_eq!(ma.write_memory(partial_addr_before, &buf), error!(EFAULT));
4568            assert_eq!(ma.read_memory_to_vec(partial_addr_before, buf.len()), error!(EFAULT));
4569            let partial_addr_after = (addr + page_size / 2).unwrap();
4570            assert_eq!(ma.write_memory(partial_addr_after, &buf), error!(EFAULT));
4571            assert_eq!(ma.read_memory_to_vec(partial_addr_after, buf.len()), error!(EFAULT));
4572
4573            // Verify that accessing unmapped memory is an error.
4574            let unmapped_addr = (addr - 10 * page_size).unwrap();
4575            assert_eq!(ma.write_memory(unmapped_addr, &buf), error!(EFAULT));
4576            assert_eq!(ma.read_memory_to_vec(unmapped_addr, buf.len()), error!(EFAULT));
4577
4578            // However, accessing zero bytes in unmapped memory is not an error.
4579            ma.write_memory(unmapped_addr, &[]).expect("failed to write no data");
4580            ma.read_memory_to_vec(unmapped_addr, 0).expect("failed to read no data");
4581        })
4582        .await;
4583    }
4584
4585    #[::fuchsia::test]
4586    async fn test_read_c_string_to_vec_large() {
4587        spawn_kernel_and_run(async |locked, current_task| {
4588            let mm = current_task.mm().unwrap();
4589            let ma = current_task.deref();
4590
4591            let page_size = *PAGE_SIZE;
4592            let max_size = 4 * page_size as usize;
4593            let addr = (mm.base_addr + 10 * page_size).unwrap();
4594
4595            assert_eq!(map_memory(locked, &current_task, addr, max_size as u64), addr);
4596
4597            let mut random_data = vec![0; max_size];
4598            zx::cprng_draw(&mut random_data);
4599            // Remove all NUL bytes.
4600            for i in 0..random_data.len() {
4601                if random_data[i] == 0 {
4602                    random_data[i] = 1;
4603                }
4604            }
4605            random_data[max_size - 1] = 0;
4606
4607            ma.write_memory(addr, &random_data).expect("failed to write test string");
4608            // We should read the same value minus the last byte (NUL char).
4609            assert_eq!(
4610                ma.read_c_string_to_vec(UserCString::new(current_task, addr), max_size).unwrap(),
4611                random_data[..max_size - 1]
4612            );
4613        })
4614        .await;
4615    }
4616
4617    #[::fuchsia::test]
4618    async fn test_read_c_string_to_vec() {
4619        spawn_kernel_and_run(async |locked, current_task| {
4620            let mm = current_task.mm().unwrap();
4621            let ma = current_task.deref();
4622
4623            let page_size = *PAGE_SIZE;
4624            let max_size = 2 * page_size as usize;
4625            let addr = (mm.base_addr + 10 * page_size).unwrap();
4626
4627            // Map a page at a fixed address and write an unterminated string at the end of it.
4628            assert_eq!(map_memory(locked, &current_task, addr, page_size), addr);
4629            let test_str = b"foo!";
4630            let test_addr =
4631                addr.checked_add(page_size as usize).unwrap().checked_sub(test_str.len()).unwrap();
4632            ma.write_memory(test_addr, test_str).expect("failed to write test string");
4633
4634            // Expect error if the string is not terminated.
4635            assert_eq!(
4636                ma.read_c_string_to_vec(UserCString::new(current_task, test_addr), max_size),
4637                error!(ENAMETOOLONG)
4638            );
4639
4640            // Expect success if the string is terminated.
4641            ma.write_memory((addr + (page_size - 1)).unwrap(), b"\0").expect("failed to write nul");
4642            assert_eq!(
4643                ma.read_c_string_to_vec(UserCString::new(current_task, test_addr), max_size)
4644                    .unwrap(),
4645                "foo"
4646            );
4647
4648            // Expect success if the string spans over two mappings.
4649            assert_eq!(
4650                map_memory(locked, &current_task, (addr + page_size).unwrap(), page_size),
4651                (addr + page_size).unwrap()
4652            );
4653            // TODO: Adjacent private anonymous mappings are collapsed. To test this case this test needs to
4654            // provide a backing for the second mapping.
4655            // assert_eq!(mm.get_mapping_count(), 2);
4656            ma.write_memory((addr + (page_size - 1)).unwrap(), b"bar\0")
4657                .expect("failed to write extra chars");
4658            assert_eq!(
4659                ma.read_c_string_to_vec(UserCString::new(current_task, test_addr), max_size)
4660                    .unwrap(),
4661                "foobar",
4662            );
4663
4664            // Expect error if the string exceeds max limit
4665            assert_eq!(
4666                ma.read_c_string_to_vec(UserCString::new(current_task, test_addr), 2),
4667                error!(ENAMETOOLONG)
4668            );
4669
4670            // Expect error if the address is invalid.
4671            assert_eq!(
4672                ma.read_c_string_to_vec(UserCString::null(current_task), max_size),
4673                error!(EFAULT)
4674            );
4675        })
4676        .await;
4677    }
4678
4679    #[::fuchsia::test]
4680    async fn can_read_argv_like_regions() {
4681        spawn_kernel_and_run(async |locked, current_task| {
4682            let ma = current_task.deref();
4683
4684            // Map a page.
4685            let page_size = *PAGE_SIZE;
4686            let addr = map_memory_anywhere(locked, &current_task, page_size);
4687            assert!(!addr.is_null());
4688
4689            // Write an unterminated string.
4690            let mut payload = "first".as_bytes().to_vec();
4691            let mut expected_parses = vec![];
4692            ma.write_memory(addr, &payload).unwrap();
4693
4694            // Expect success if the string is terminated.
4695            expected_parses.push(payload.clone());
4696            payload.push(0);
4697            ma.write_memory(addr, &payload).unwrap();
4698            assert_eq!(
4699                ma.read_nul_delimited_c_string_list(addr, payload.len()).unwrap(),
4700                expected_parses,
4701            );
4702
4703            // Make sure we can parse multiple strings from the same region.
4704            let second = b"second";
4705            payload.extend(second);
4706            payload.push(0);
4707            expected_parses.push(second.to_vec());
4708
4709            let third = b"third";
4710            payload.extend(third);
4711            payload.push(0);
4712            expected_parses.push(third.to_vec());
4713
4714            ma.write_memory(addr, &payload).unwrap();
4715            assert_eq!(
4716                ma.read_nul_delimited_c_string_list(addr, payload.len()).unwrap(),
4717                expected_parses,
4718            );
4719        })
4720        .await;
4721    }
4722
4723    #[::fuchsia::test]
4724    async fn truncate_argv_like_regions() {
4725        spawn_kernel_and_run(async |locked, current_task| {
4726            let ma = current_task.deref();
4727
4728            // Map a page.
4729            let page_size = *PAGE_SIZE;
4730            let addr = map_memory_anywhere(locked, &current_task, page_size);
4731            assert!(!addr.is_null());
4732
4733            let payload = b"first\0second\0third\0";
4734            ma.write_memory(addr, payload).unwrap();
4735            assert_eq!(
4736                ma.read_nul_delimited_c_string_list(addr, payload.len() - 3).unwrap(),
4737                vec![b"first".to_vec(), b"second".to_vec(), b"thi".to_vec()],
4738                "Skipping last three bytes of payload should skip last two bytes of 3rd string"
4739            );
4740        })
4741        .await;
4742    }
4743
4744    #[::fuchsia::test]
4745    async fn test_read_c_string() {
4746        spawn_kernel_and_run(async |locked, current_task| {
4747            let mm = current_task.mm().unwrap();
4748            let ma = current_task.deref();
4749
4750            let page_size = *PAGE_SIZE;
4751            let buf_cap = 2 * page_size as usize;
4752            let mut buf = Vec::with_capacity(buf_cap);
4753            // We can't just use `spare_capacity_mut` because `Vec::with_capacity`
4754            // returns a `Vec` with _at least_ the requested capacity.
4755            let buf = &mut buf.spare_capacity_mut()[..buf_cap];
4756            let addr = (mm.base_addr + 10 * page_size).unwrap();
4757
4758            // Map a page at a fixed address and write an unterminated string at the end of it..
4759            assert_eq!(map_memory(locked, &current_task, addr, page_size), addr);
4760            let test_str = b"foo!";
4761            let test_addr = (addr + (page_size - test_str.len() as u64)).unwrap();
4762            ma.write_memory(test_addr, test_str).expect("failed to write test string");
4763
4764            // Expect error if the string is not terminated.
4765            assert_eq!(
4766                ma.read_c_string(UserCString::new(current_task, test_addr), buf),
4767                error!(ENAMETOOLONG)
4768            );
4769
4770            // Expect success if the string is terminated.
4771            ma.write_memory((addr + (page_size - 1)).unwrap(), b"\0").expect("failed to write nul");
4772            assert_eq!(
4773                ma.read_c_string(UserCString::new(current_task, test_addr), buf).unwrap(),
4774                "foo"
4775            );
4776
4777            // Expect success if the string spans over two mappings.
4778            assert_eq!(
4779                map_memory(locked, &current_task, (addr + page_size).unwrap(), page_size),
4780                (addr + page_size).unwrap()
4781            );
4782            // TODO: To be multiple mappings we need to provide a file backing for the next page or the
4783            // mappings will be collapsed.
4784            //assert_eq!(mm.get_mapping_count(), 2);
4785            ma.write_memory((addr + (page_size - 1)).unwrap(), b"bar\0")
4786                .expect("failed to write extra chars");
4787            assert_eq!(
4788                ma.read_c_string(UserCString::new(current_task, test_addr), buf).unwrap(),
4789                "foobar"
4790            );
4791
4792            // Expect error if the string does not fit in the provided buffer.
4793            assert_eq!(
4794                ma.read_c_string(
4795                    UserCString::new(current_task, test_addr),
4796                    &mut [MaybeUninit::uninit(); 2]
4797                ),
4798                error!(ENAMETOOLONG)
4799            );
4800
4801            // Expect error if the address is invalid.
4802            assert_eq!(ma.read_c_string(UserCString::null(current_task), buf), error!(EFAULT));
4803        })
4804        .await;
4805    }
4806
4807    #[::fuchsia::test]
4808    async fn test_find_next_unused_range() {
4809        spawn_kernel_and_run(async |locked, current_task| {
4810            let mm = current_task.mm().unwrap();
4811
4812            let mmap_top = mm.state.read().find_next_unused_range(0).unwrap().ptr();
4813            let page_size = *PAGE_SIZE as usize;
4814            assert!(mmap_top <= RESTRICTED_ASPACE_HIGHEST_ADDRESS);
4815
4816            // No mappings - top address minus requested size is available
4817            assert_eq!(
4818                mm.state.read().find_next_unused_range(page_size).unwrap(),
4819                UserAddress::from_ptr(mmap_top - page_size)
4820            );
4821
4822            // Fill it.
4823            let addr = UserAddress::from_ptr(mmap_top - page_size);
4824            assert_eq!(map_memory(locked, &current_task, addr, *PAGE_SIZE), addr);
4825
4826            // The next available range is right before the new mapping.
4827            assert_eq!(
4828                mm.state.read().find_next_unused_range(page_size).unwrap(),
4829                UserAddress::from_ptr(addr.ptr() - page_size)
4830            );
4831
4832            // Allocate an extra page before a one-page gap.
4833            let addr2 = UserAddress::from_ptr(addr.ptr() - 2 * page_size);
4834            assert_eq!(map_memory(locked, &current_task, addr2, *PAGE_SIZE), addr2);
4835
4836            // Searching for one-page range still gives the same result
4837            assert_eq!(
4838                mm.state.read().find_next_unused_range(page_size).unwrap(),
4839                UserAddress::from_ptr(addr.ptr() - page_size)
4840            );
4841
4842            // Searching for a bigger range results in the area before the second mapping
4843            assert_eq!(
4844                mm.state.read().find_next_unused_range(2 * page_size).unwrap(),
4845                UserAddress::from_ptr(addr2.ptr() - 2 * page_size)
4846            );
4847
4848            // Searching for more memory than available should fail.
4849            assert_eq!(mm.state.read().find_next_unused_range(mmap_top), None);
4850        })
4851        .await;
4852    }
4853
4854    #[::fuchsia::test]
4855    async fn test_count_placements() {
4856        spawn_kernel_and_run(async |locked, current_task| {
4857            let mm = current_task.mm().unwrap();
4858
4859            // ten-page range
4860            let page_size = *PAGE_SIZE as usize;
4861            let subrange_ten = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
4862                ..UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 10 * page_size);
4863
4864            assert_eq!(
4865                mm.state.read().count_possible_placements(11 * page_size, &subrange_ten),
4866                Some(0)
4867            );
4868            assert_eq!(
4869                mm.state.read().count_possible_placements(10 * page_size, &subrange_ten),
4870                Some(1)
4871            );
4872            assert_eq!(
4873                mm.state.read().count_possible_placements(9 * page_size, &subrange_ten),
4874                Some(2)
4875            );
4876            assert_eq!(
4877                mm.state.read().count_possible_placements(page_size, &subrange_ten),
4878                Some(10)
4879            );
4880
4881            // map 6th page
4882            let addr = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 5 * page_size);
4883            assert_eq!(map_memory(locked, &current_task, addr, *PAGE_SIZE), addr);
4884
4885            assert_eq!(
4886                mm.state.read().count_possible_placements(10 * page_size, &subrange_ten),
4887                Some(0)
4888            );
4889            assert_eq!(
4890                mm.state.read().count_possible_placements(5 * page_size, &subrange_ten),
4891                Some(1)
4892            );
4893            assert_eq!(
4894                mm.state.read().count_possible_placements(4 * page_size, &subrange_ten),
4895                Some(3)
4896            );
4897            assert_eq!(
4898                mm.state.read().count_possible_placements(page_size, &subrange_ten),
4899                Some(9)
4900            );
4901        })
4902        .await;
4903    }
4904
4905    #[::fuchsia::test]
4906    async fn test_pick_placement() {
4907        spawn_kernel_and_run(async |locked, current_task| {
4908            let mm = current_task.mm().unwrap();
4909
4910            let page_size = *PAGE_SIZE as usize;
4911            let subrange_ten = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
4912                ..UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 10 * page_size);
4913
4914            let addr = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 5 * page_size);
4915            assert_eq!(map_memory(locked, &current_task, addr, *PAGE_SIZE), addr);
4916            assert_eq!(
4917                mm.state.read().count_possible_placements(4 * page_size, &subrange_ten),
4918                Some(3)
4919            );
4920
4921            assert_eq!(
4922                mm.state.read().pick_placement(4 * page_size, 0, &subrange_ten),
4923                Some(UserAddress::from_ptr(RESTRICTED_ASPACE_BASE))
4924            );
4925            assert_eq!(
4926                mm.state.read().pick_placement(4 * page_size, 1, &subrange_ten),
4927                Some(UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + page_size))
4928            );
4929            assert_eq!(
4930                mm.state.read().pick_placement(4 * page_size, 2, &subrange_ten),
4931                Some(UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 6 * page_size))
4932            );
4933        })
4934        .await;
4935    }
4936
4937    #[::fuchsia::test]
4938    async fn test_find_random_unused_range() {
4939        spawn_kernel_and_run(async |locked, current_task| {
4940            let mm = current_task.mm().unwrap();
4941
4942            // ten-page range
4943            let page_size = *PAGE_SIZE as usize;
4944            let subrange_ten = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
4945                ..UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 10 * page_size);
4946
4947            for _ in 0..10 {
4948                let addr = mm.state.read().find_random_unused_range(page_size, &subrange_ten);
4949                assert!(addr.is_some());
4950                assert_eq!(
4951                    map_memory(locked, &current_task, addr.unwrap(), *PAGE_SIZE),
4952                    addr.unwrap()
4953                );
4954            }
4955            assert_eq!(mm.state.read().find_random_unused_range(page_size, &subrange_ten), None);
4956        })
4957        .await;
4958    }
4959
4960    #[::fuchsia::test]
4961    async fn test_grows_down_near_aspace_base() {
4962        spawn_kernel_and_run(async |locked, current_task| {
4963            let mm = current_task.mm().unwrap();
4964
4965            let page_count = 10;
4966
4967            let page_size = *PAGE_SIZE as usize;
4968            let addr =
4969                (UserAddress::from_ptr(RESTRICTED_ASPACE_BASE) + page_count * page_size).unwrap();
4970            assert_eq!(
4971                map_memory_with_flags(
4972                    locked,
4973                    &current_task,
4974                    addr,
4975                    page_size as u64,
4976                    MAP_ANONYMOUS | MAP_PRIVATE | MAP_GROWSDOWN
4977                ),
4978                addr
4979            );
4980
4981            let subrange_ten = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)..addr;
4982            assert_eq!(mm.state.read().find_random_unused_range(page_size, &subrange_ten), None);
4983        })
4984        .await;
4985    }
4986
4987    #[::fuchsia::test]
4988    async fn test_unmap_returned_mappings() {
4989        spawn_kernel_and_run(async |locked, current_task| {
4990            let mm = current_task.mm().unwrap();
4991
4992            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
4993
4994            let mut released_mappings = ReleasedMappings::default();
4995            let mut mm_state = mm.state.write();
4996            let unmap_result =
4997                mm_state.unmap(&mm, addr, *PAGE_SIZE as usize, &mut released_mappings);
4998            assert!(unmap_result.is_ok());
4999            assert_eq!(released_mappings.len(), 1);
5000            released_mappings.finalize(mm_state);
5001        })
5002        .await;
5003    }
5004
5005    #[::fuchsia::test]
5006    async fn test_unmap_returns_multiple_mappings() {
5007        spawn_kernel_and_run(async |locked, current_task| {
5008            let mm = current_task.mm().unwrap();
5009
5010            let addr = mm.state.read().find_next_unused_range(3 * *PAGE_SIZE as usize).unwrap();
5011            let addr = map_memory(locked, &current_task, addr, *PAGE_SIZE);
5012            let _ = map_memory(locked, &current_task, (addr + 2 * *PAGE_SIZE).unwrap(), *PAGE_SIZE);
5013
5014            let mut released_mappings = ReleasedMappings::default();
5015            let mut mm_state = mm.state.write();
5016            let unmap_result =
5017                mm_state.unmap(&mm, addr, (*PAGE_SIZE * 3) as usize, &mut released_mappings);
5018            assert!(unmap_result.is_ok());
5019            assert_eq!(released_mappings.len(), 2);
5020            released_mappings.finalize(mm_state);
5021        })
5022        .await;
5023    }
5024
5025    /// Maps two pages in separate mappings next to each other, then unmaps the first page.
5026    /// The second page should not be modified.
5027    #[::fuchsia::test]
5028    async fn test_map_two_unmap_one() {
5029        spawn_kernel_and_run(async |locked, current_task| {
5030            let mm = current_task.mm().unwrap();
5031
5032            // reserve memory for both pages
5033            let addr_reserve =
5034                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
5035            let addr1 = do_mmap(
5036                locked,
5037                &current_task,
5038                addr_reserve,
5039                *PAGE_SIZE as usize,
5040                PROT_READ, // Map read-only to avoid merging of the two mappings
5041                MAP_ANONYMOUS | MAP_SHARED | MAP_FIXED,
5042                FdNumber::from_raw(-1),
5043                0,
5044            )
5045            .expect("failed to mmap");
5046            let addr2 = map_memory_with_flags(
5047                locked,
5048                &current_task,
5049                (addr_reserve + *PAGE_SIZE).unwrap(),
5050                *PAGE_SIZE,
5051                MAP_ANONYMOUS | MAP_SHARED | MAP_FIXED,
5052            );
5053            let state = mm.state.read();
5054            let (range1, _) = state.mappings.get(addr1).expect("mapping");
5055            assert_eq!(range1.start, addr1);
5056            assert_eq!(range1.end, (addr1 + *PAGE_SIZE).unwrap());
5057            let (range2, mapping2) = state.mappings.get(addr2).expect("mapping");
5058            assert_eq!(range2.start, addr2);
5059            assert_eq!(range2.end, (addr2 + *PAGE_SIZE).unwrap());
5060            let original_memory2 = {
5061                match state.get_mapping_backing(mapping2) {
5062                    MappingBacking::Memory(backing) => {
5063                        assert_eq!(backing.memory().get_size(), *PAGE_SIZE);
5064                        backing.memory().clone()
5065                    }
5066                    MappingBacking::PrivateAnonymous => {
5067                        panic!("Unexpected private anonymous mapping")
5068                    }
5069                }
5070            };
5071            std::mem::drop(state);
5072
5073            assert_eq!(mm.unmap(addr1, *PAGE_SIZE as usize), Ok(()));
5074
5075            let state = mm.state.read();
5076
5077            // The first page should be unmapped.
5078            assert!(state.mappings.get(addr1).is_none());
5079
5080            // The second page should remain unchanged.
5081            let (range2, mapping2) = state.mappings.get(addr2).expect("second page");
5082            assert_eq!(range2.start, addr2);
5083            assert_eq!(range2.end, (addr2 + *PAGE_SIZE).unwrap());
5084            match state.get_mapping_backing(mapping2) {
5085                MappingBacking::Memory(backing) => {
5086                    assert_eq!(backing.memory().get_size(), *PAGE_SIZE);
5087                    assert_eq!(original_memory2.get_koid(), backing.memory().get_koid());
5088                }
5089                MappingBacking::PrivateAnonymous => panic!("Unexpected private anonymous mapping"),
5090            }
5091        })
5092        .await;
5093    }
5094
5095    #[::fuchsia::test]
5096    async fn test_read_write_objects() {
5097        spawn_kernel_and_run(async |locked, current_task| {
5098            let ma = current_task.deref();
5099            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5100            let items_ref = UserRef::<i32>::new(addr);
5101
5102            let items_written = vec![0, 2, 3, 7, 1];
5103            ma.write_objects(items_ref, &items_written).expect("Failed to write object array.");
5104
5105            let items_read = ma
5106                .read_objects_to_vec(items_ref, items_written.len())
5107                .expect("Failed to read object array.");
5108
5109            assert_eq!(items_written, items_read);
5110        })
5111        .await;
5112    }
5113
5114    #[::fuchsia::test]
5115    async fn test_read_write_objects_null() {
5116        spawn_kernel_and_run(async |_, current_task| {
5117            let ma = current_task.deref();
5118            let items_ref = UserRef::<i32>::new(UserAddress::default());
5119
5120            let items_written = vec![];
5121            ma.write_objects(items_ref, &items_written)
5122                .expect("Failed to write empty object array.");
5123
5124            let items_read = ma
5125                .read_objects_to_vec(items_ref, items_written.len())
5126                .expect("Failed to read empty object array.");
5127
5128            assert_eq!(items_written, items_read);
5129        })
5130        .await;
5131    }
5132
5133    #[::fuchsia::test]
5134    async fn test_read_object_partial() {
5135        #[derive(Debug, Default, Copy, Clone, KnownLayout, FromBytes, Immutable, PartialEq)]
5136        struct Items {
5137            val: [i32; 4],
5138        }
5139
5140        spawn_kernel_and_run(async |locked, current_task| {
5141            let ma = current_task.deref();
5142            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5143            let items_array_ref = UserRef::<i32>::new(addr);
5144
5145            // Populate some values.
5146            let items_written = vec![75, 23, 51, 98];
5147            ma.write_objects(items_array_ref, &items_written)
5148                .expect("Failed to write object array.");
5149
5150            // Full read of all 4 values.
5151            let items_ref = UserRef::<Items>::new(addr);
5152            let items_read = ma
5153                .read_object_partial(items_ref, std::mem::size_of::<Items>())
5154                .expect("Failed to read object");
5155            assert_eq!(items_written, items_read.val);
5156
5157            // Partial read of the first two.
5158            let items_read = ma.read_object_partial(items_ref, 8).expect("Failed to read object");
5159            assert_eq!(vec![75, 23, 0, 0], items_read.val);
5160
5161            // The API currently allows reading 0 bytes (this could be re-evaluated) so test that does
5162            // the right thing.
5163            let items_read = ma.read_object_partial(items_ref, 0).expect("Failed to read object");
5164            assert_eq!(vec![0, 0, 0, 0], items_read.val);
5165
5166            // Size bigger than the object.
5167            assert_eq!(
5168                ma.read_object_partial(items_ref, std::mem::size_of::<Items>() + 8),
5169                error!(EINVAL)
5170            );
5171
5172            // Bad pointer.
5173            assert_eq!(
5174                ma.read_object_partial(UserRef::<Items>::new(UserAddress::from(1)), 16),
5175                error!(EFAULT)
5176            );
5177        })
5178        .await;
5179    }
5180
5181    #[::fuchsia::test]
5182    async fn test_partial_read() {
5183        spawn_kernel_and_run(async |locked, current_task| {
5184            let mm = current_task.mm().unwrap();
5185            let ma = current_task.deref();
5186
5187            let addr = mm.state.read().find_next_unused_range(2 * *PAGE_SIZE as usize).unwrap();
5188            let addr = map_memory(locked, &current_task, addr, *PAGE_SIZE);
5189            let second_map =
5190                map_memory(locked, &current_task, (addr + *PAGE_SIZE).unwrap(), *PAGE_SIZE);
5191
5192            let bytes = vec![0xf; (*PAGE_SIZE * 2) as usize];
5193            assert!(ma.write_memory(addr, &bytes).is_ok());
5194            let mut state = mm.state.write();
5195            let mut released_mappings = ReleasedMappings::default();
5196            state
5197                .protect(
5198                    ma,
5199                    second_map,
5200                    *PAGE_SIZE as usize,
5201                    ProtectionFlags::empty(),
5202                    &mut released_mappings,
5203                )
5204                .unwrap();
5205            released_mappings.finalize(state);
5206            assert_eq!(
5207                ma.read_memory_partial_to_vec(addr, bytes.len()).unwrap().len(),
5208                *PAGE_SIZE as usize,
5209            );
5210        })
5211        .await;
5212    }
5213
5214    fn map_memory_growsdown<L>(
5215        locked: &mut Locked<L>,
5216        current_task: &CurrentTask,
5217        length: u64,
5218    ) -> UserAddress
5219    where
5220        L: LockEqualOrBefore<FileOpsCore>,
5221    {
5222        map_memory_with_flags(
5223            locked,
5224            current_task,
5225            UserAddress::default(),
5226            length,
5227            MAP_ANONYMOUS | MAP_PRIVATE | MAP_GROWSDOWN,
5228        )
5229    }
5230
5231    #[::fuchsia::test]
5232    async fn test_grow_mapping_empty_mm() {
5233        spawn_kernel_and_run(async |_, current_task| {
5234            let mm = current_task.mm().unwrap();
5235
5236            let addr = UserAddress::from(0x100000);
5237
5238            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, false), Ok(false));
5239        })
5240        .await;
5241    }
5242
5243    #[::fuchsia::test]
5244    async fn test_grow_inside_mapping() {
5245        spawn_kernel_and_run(async |locked, current_task| {
5246            let mm = current_task.mm().unwrap();
5247
5248            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5249
5250            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, false), Ok(false));
5251        })
5252        .await;
5253    }
5254
5255    #[::fuchsia::test]
5256    async fn test_grow_write_fault_inside_read_only_mapping() {
5257        spawn_kernel_and_run(async |locked, current_task| {
5258            let mm = current_task.mm().unwrap();
5259
5260            let addr = do_mmap(
5261                locked,
5262                &current_task,
5263                UserAddress::default(),
5264                *PAGE_SIZE as usize,
5265                PROT_READ,
5266                MAP_ANONYMOUS | MAP_PRIVATE,
5267                FdNumber::from_raw(-1),
5268                0,
5269            )
5270            .expect("Could not map memory");
5271
5272            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, false), Ok(false));
5273            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, true), Ok(false));
5274        })
5275        .await;
5276    }
5277
5278    #[::fuchsia::test]
5279    async fn test_grow_fault_inside_prot_none_mapping() {
5280        spawn_kernel_and_run(async |locked, current_task| {
5281            let mm = current_task.mm().unwrap();
5282
5283            let addr = do_mmap(
5284                locked,
5285                &current_task,
5286                UserAddress::default(),
5287                *PAGE_SIZE as usize,
5288                PROT_NONE,
5289                MAP_ANONYMOUS | MAP_PRIVATE,
5290                FdNumber::from_raw(-1),
5291                0,
5292            )
5293            .expect("Could not map memory");
5294
5295            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, false), Ok(false));
5296            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, true), Ok(false));
5297        })
5298        .await;
5299    }
5300
5301    #[::fuchsia::test]
5302    async fn test_grow_below_mapping() {
5303        spawn_kernel_and_run(async |locked, current_task| {
5304            let mm = current_task.mm().unwrap();
5305
5306            let addr = map_memory_growsdown(locked, &current_task, *PAGE_SIZE) - *PAGE_SIZE;
5307
5308            assert_matches!(mm.extend_growsdown_mapping_to_address(addr.unwrap(), false), Ok(true));
5309        })
5310        .await;
5311    }
5312
5313    #[::fuchsia::test]
5314    async fn test_grow_above_mapping() {
5315        spawn_kernel_and_run(async |locked, current_task| {
5316            let mm = current_task.mm().unwrap();
5317
5318            let addr = map_memory_growsdown(locked, &current_task, *PAGE_SIZE) + *PAGE_SIZE;
5319
5320            assert_matches!(
5321                mm.extend_growsdown_mapping_to_address(addr.unwrap(), false),
5322                Ok(false)
5323            );
5324        })
5325        .await;
5326    }
5327
5328    #[::fuchsia::test]
5329    async fn test_grow_write_fault_below_read_only_mapping() {
5330        spawn_kernel_and_run(async |locked, current_task| {
5331            let mm = current_task.mm().unwrap();
5332
5333            let mapped_addr = map_memory_growsdown(locked, &current_task, *PAGE_SIZE);
5334
5335            mm.protect(&current_task, mapped_addr, *PAGE_SIZE as usize, ProtectionFlags::READ)
5336                .unwrap();
5337
5338            assert_matches!(
5339                mm.extend_growsdown_mapping_to_address((mapped_addr - *PAGE_SIZE).unwrap(), true),
5340                Ok(false)
5341            );
5342
5343            assert_eq!(mm.get_mapping_count(), 1);
5344        })
5345        .await;
5346    }
5347
5348    #[::fuchsia::test]
5349    async fn test_snapshot_paged_memory() {
5350        use zx::sys::zx_page_request_command_t::ZX_PAGER_VMO_READ;
5351
5352        spawn_kernel_and_run(async |locked, current_task| {
5353            let kernel = current_task.kernel();
5354            let mm = current_task.mm().unwrap();
5355            let ma = current_task.deref();
5356
5357            let port = Arc::new(zx::Port::create());
5358            let port_clone = port.clone();
5359            let pager =
5360                Arc::new(zx::Pager::create(zx::PagerOptions::empty()).expect("create failed"));
5361            let pager_clone = pager.clone();
5362
5363            const VMO_SIZE: u64 = 128 * 1024;
5364            let vmo = Arc::new(
5365                pager
5366                    .create_vmo(zx::VmoOptions::RESIZABLE, &port, 1, VMO_SIZE)
5367                    .expect("create_vmo failed"),
5368            );
5369            let vmo_clone = vmo.clone();
5370
5371            // Create a thread to service the port where we will receive pager requests.
5372            let thread = std::thread::spawn(move || {
5373                loop {
5374                    let packet =
5375                        port_clone.wait(zx::MonotonicInstant::INFINITE).expect("wait failed");
5376                    match packet.contents() {
5377                        zx::PacketContents::Pager(contents) => {
5378                            if contents.command() == ZX_PAGER_VMO_READ {
5379                                let range = contents.range();
5380                                let source_vmo = zx::Vmo::create(range.end - range.start)
5381                                    .expect("create failed");
5382                                pager_clone
5383                                    .supply_pages(&vmo_clone, range, &source_vmo, 0)
5384                                    .expect("supply_pages failed");
5385                            }
5386                        }
5387                        zx::PacketContents::User(_) => break,
5388                        _ => {}
5389                    }
5390                }
5391            });
5392
5393            let child_vmo = vmo
5394                .create_child(zx::VmoChildOptions::SNAPSHOT_AT_LEAST_ON_WRITE, 0, VMO_SIZE)
5395                .unwrap();
5396
5397            // Write something to the source VMO.
5398            vmo.write(b"foo", 0).expect("write failed");
5399
5400            let prot_flags = ProtectionFlags::READ | ProtectionFlags::WRITE;
5401            let addr = mm
5402                .map_memory(
5403                    DesiredAddress::Any,
5404                    Arc::new(MemoryObject::from(child_vmo)),
5405                    0,
5406                    VMO_SIZE as usize,
5407                    prot_flags,
5408                    Access::rwx(),
5409                    MappingOptions::empty(),
5410                    MappingName::None,
5411                )
5412                .expect("map failed");
5413
5414            let target = create_task(locked, &kernel, "another-task");
5415            mm.snapshot_to(locked, &target.mm().unwrap()).expect("snapshot_to failed");
5416
5417            // Make sure it has what we wrote.
5418            let buf = target.read_memory_to_vec(addr, 3).expect("read_memory failed");
5419            assert_eq!(buf, b"foo");
5420
5421            // Write something to both source and target and make sure they are forked.
5422            ma.write_memory(addr, b"bar").expect("write_memory failed");
5423
5424            let buf = target.read_memory_to_vec(addr, 3).expect("read_memory failed");
5425            assert_eq!(buf, b"foo");
5426
5427            target.write_memory(addr, b"baz").expect("write_memory failed");
5428            let buf = ma.read_memory_to_vec(addr, 3).expect("read_memory failed");
5429            assert_eq!(buf, b"bar");
5430
5431            let buf = target.read_memory_to_vec(addr, 3).expect("read_memory failed");
5432            assert_eq!(buf, b"baz");
5433
5434            port.queue(&zx::Packet::from_user_packet(0, 0, zx::UserPacket::from_u8_array([0; 32])))
5435                .unwrap();
5436            thread.join().unwrap();
5437        })
5438        .await;
5439    }
5440
5441    #[::fuchsia::test]
5442    async fn test_set_vma_name() {
5443        spawn_kernel_and_run(async |locked, mut current_task| {
5444            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5445
5446            let vma_name = "vma name";
5447            current_task.write_memory(name_addr, vma_name.as_bytes()).unwrap();
5448
5449            let mapping_addr =
5450                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5451
5452            sys_prctl(
5453                locked,
5454                &mut current_task,
5455                PR_SET_VMA,
5456                PR_SET_VMA_ANON_NAME as u64,
5457                mapping_addr.ptr() as u64,
5458                *PAGE_SIZE,
5459                name_addr.ptr() as u64,
5460            )
5461            .unwrap();
5462
5463            assert_eq!(
5464                *current_task.mm().unwrap().get_mapping_name(mapping_addr).unwrap().unwrap(),
5465                vma_name
5466            );
5467        })
5468        .await;
5469    }
5470
5471    #[::fuchsia::test]
5472    async fn test_set_vma_name_adjacent_mappings() {
5473        spawn_kernel_and_run(async |locked, mut current_task| {
5474            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5475            current_task
5476                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
5477                .unwrap();
5478
5479            let first_mapping_addr =
5480                map_memory(locked, &current_task, UserAddress::default(), 2 * *PAGE_SIZE);
5481            let second_mapping_addr = map_memory_with_flags(
5482                locked,
5483                &current_task,
5484                (first_mapping_addr + *PAGE_SIZE).unwrap(),
5485                *PAGE_SIZE,
5486                MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
5487            );
5488
5489            assert_eq!((first_mapping_addr + *PAGE_SIZE).unwrap(), second_mapping_addr);
5490
5491            sys_prctl(
5492                locked,
5493                &mut current_task,
5494                PR_SET_VMA,
5495                PR_SET_VMA_ANON_NAME as u64,
5496                first_mapping_addr.ptr() as u64,
5497                2 * *PAGE_SIZE,
5498                name_addr.ptr() as u64,
5499            )
5500            .unwrap();
5501
5502            {
5503                let mm = current_task.mm().unwrap();
5504                let state = mm.state.read();
5505
5506                // The name should apply to both mappings.
5507                let (_, mapping) = state.mappings.get(first_mapping_addr).unwrap();
5508                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
5509
5510                let (_, mapping) = state.mappings.get(second_mapping_addr).unwrap();
5511                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
5512            }
5513        })
5514        .await;
5515    }
5516
5517    #[::fuchsia::test]
5518    async fn test_set_vma_name_beyond_end() {
5519        spawn_kernel_and_run(async |locked, mut current_task| {
5520            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5521            current_task
5522                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
5523                .unwrap();
5524
5525            let mapping_addr =
5526                map_memory(locked, &current_task, UserAddress::default(), 2 * *PAGE_SIZE);
5527
5528            let second_page = (mapping_addr + *PAGE_SIZE).unwrap();
5529            current_task.mm().unwrap().unmap(second_page, *PAGE_SIZE as usize).unwrap();
5530
5531            // This should fail with ENOMEM since it extends past the end of the mapping into unmapped memory.
5532            assert_eq!(
5533                sys_prctl(
5534                    locked,
5535                    &mut current_task,
5536                    PR_SET_VMA,
5537                    PR_SET_VMA_ANON_NAME as u64,
5538                    mapping_addr.ptr() as u64,
5539                    2 * *PAGE_SIZE,
5540                    name_addr.ptr() as u64,
5541                ),
5542                error!(ENOMEM)
5543            );
5544
5545            // Despite returning an error, the prctl should still assign a name to the region at the start of the region.
5546            {
5547                let mm = current_task.mm().unwrap();
5548                let state = mm.state.read();
5549
5550                let (_, mapping) = state.mappings.get(mapping_addr).unwrap();
5551                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
5552            }
5553        })
5554        .await;
5555    }
5556
5557    #[::fuchsia::test]
5558    async fn test_set_vma_name_before_start() {
5559        spawn_kernel_and_run(async |locked, mut current_task| {
5560            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5561            current_task
5562                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
5563                .unwrap();
5564
5565            let mapping_addr =
5566                map_memory(locked, &current_task, UserAddress::default(), 2 * *PAGE_SIZE);
5567
5568            let second_page = (mapping_addr + *PAGE_SIZE).unwrap();
5569            current_task.mm().unwrap().unmap(mapping_addr, *PAGE_SIZE as usize).unwrap();
5570
5571            // This should fail with ENOMEM since the start of the range is in unmapped memory.
5572            assert_eq!(
5573                sys_prctl(
5574                    locked,
5575                    &mut current_task,
5576                    PR_SET_VMA,
5577                    PR_SET_VMA_ANON_NAME as u64,
5578                    mapping_addr.ptr() as u64,
5579                    2 * *PAGE_SIZE,
5580                    name_addr.ptr() as u64,
5581                ),
5582                error!(ENOMEM)
5583            );
5584
5585            // Unlike a range which starts within a mapping and extends past the end, this should not assign
5586            // a name to any mappings.
5587            {
5588                let mm = current_task.mm().unwrap();
5589                let state = mm.state.read();
5590
5591                let (_, mapping) = state.mappings.get(second_page).unwrap();
5592                assert_eq!(mapping.name(), MappingName::None);
5593            }
5594        })
5595        .await;
5596    }
5597
5598    #[::fuchsia::test]
5599    async fn test_set_vma_name_partial() {
5600        spawn_kernel_and_run(async |locked, mut current_task| {
5601            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5602            current_task
5603                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
5604                .unwrap();
5605
5606            let mapping_addr =
5607                map_memory(locked, &current_task, UserAddress::default(), 3 * *PAGE_SIZE);
5608
5609            assert_eq!(
5610                sys_prctl(
5611                    locked,
5612                    &mut current_task,
5613                    PR_SET_VMA,
5614                    PR_SET_VMA_ANON_NAME as u64,
5615                    (mapping_addr + *PAGE_SIZE).unwrap().ptr() as u64,
5616                    *PAGE_SIZE,
5617                    name_addr.ptr() as u64,
5618                ),
5619                Ok(starnix_syscalls::SUCCESS)
5620            );
5621
5622            // This should split the mapping into 3 pieces with the second piece having the name "foo"
5623            {
5624                let mm = current_task.mm().unwrap();
5625                let state = mm.state.read();
5626
5627                let (_, mapping) = state.mappings.get(mapping_addr).unwrap();
5628                assert_eq!(mapping.name(), MappingName::None);
5629
5630                let (_, mapping) =
5631                    state.mappings.get((mapping_addr + *PAGE_SIZE).unwrap()).unwrap();
5632                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
5633
5634                let (_, mapping) =
5635                    state.mappings.get((mapping_addr + (2 * *PAGE_SIZE)).unwrap()).unwrap();
5636                assert_eq!(mapping.name(), MappingName::None);
5637            }
5638        })
5639        .await;
5640    }
5641
5642    #[::fuchsia::test]
5643    async fn test_preserve_name_snapshot() {
5644        spawn_kernel_and_run(async |locked, mut current_task| {
5645            let kernel = current_task.kernel().clone();
5646            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5647            current_task
5648                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
5649                .unwrap();
5650
5651            let mapping_addr =
5652                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5653
5654            assert_eq!(
5655                sys_prctl(
5656                    locked,
5657                    &mut current_task,
5658                    PR_SET_VMA,
5659                    PR_SET_VMA_ANON_NAME as u64,
5660                    mapping_addr.ptr() as u64,
5661                    *PAGE_SIZE,
5662                    name_addr.ptr() as u64,
5663                ),
5664                Ok(starnix_syscalls::SUCCESS)
5665            );
5666
5667            let target = create_task(locked, &kernel, "another-task");
5668            current_task
5669                .mm()
5670                .unwrap()
5671                .snapshot_to(locked, &target.mm().unwrap())
5672                .expect("snapshot_to failed");
5673
5674            {
5675                let mm = target.mm().unwrap();
5676                let state = mm.state.read();
5677
5678                let (_, mapping) = state.mappings.get(mapping_addr).unwrap();
5679                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
5680            }
5681        })
5682        .await;
5683    }
5684}