starnix_core/mm/
memory_manager.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::barrier::{BarrierType, system_barrier};
6use crate::mm::mapping::MappingBackingMemory;
7use crate::mm::memory::MemoryObject;
8use crate::mm::memory_accessor::{MemoryAccessor, TaskMemoryAccessor};
9use crate::mm::private_anonymous_memory_manager::PrivateAnonymousMemoryManager;
10use crate::mm::{
11    FaultRegisterMode, FutexTable, InflightVmsplicedPayloads, MapInfoCache, Mapping,
12    MappingBacking, MappingFlags, MappingName, MlockPinFlavor, PrivateFutexKey, ProtectionFlags,
13    UserFault, VMEX_RESOURCE, VmsplicePayload, VmsplicePayloadSegment, read_to_array,
14};
15use crate::security;
16use crate::signals::{SignalDetail, SignalInfo};
17use crate::task::{CurrentTask, ExceptionResult, PageFaultExceptionReport, Task};
18use crate::vfs::aio::AioContext;
19use crate::vfs::pseudo::dynamic_file::{
20    DynamicFile, DynamicFileBuf, DynamicFileSource, SequenceFileSource,
21};
22use crate::vfs::{FsString, NamespaceNode};
23use anyhow::{Error, anyhow};
24use bitflags::bitflags;
25use flyweights::FlyByteStr;
26use linux_uapi::BUS_ADRERR;
27use memory_pinning::PinnedMapping;
28use range_map::RangeMap;
29use starnix_ext::map_ext::EntryExt;
30use starnix_lifecycle::DropNotifier;
31use starnix_logging::{
32    CATEGORY_STARNIX_MM, impossible_error, log_warn, trace_duration, track_stub,
33};
34use starnix_sync::{
35    LockBefore, Locked, MmDumpable, OrderedMutex, RwLock, RwLockWriteGuard, ThreadGroupLimits,
36    Unlocked, UserFaultInner,
37};
38use starnix_types::arch::ArchWidth;
39use starnix_types::futex_address::FutexAddress;
40use starnix_types::math::{round_down_to_system_page_size, round_up_to_system_page_size};
41use starnix_types::ownership::{TempRef, WeakRef};
42use starnix_types::user_buffer::{UserBuffer, UserBuffers};
43use starnix_uapi::auth::CAP_IPC_LOCK;
44use starnix_uapi::errors::Errno;
45use starnix_uapi::file_mode::Access;
46use starnix_uapi::range_ext::RangeExt;
47use starnix_uapi::resource_limits::Resource;
48use starnix_uapi::restricted_aspace::{
49    RESTRICTED_ASPACE_BASE, RESTRICTED_ASPACE_HIGHEST_ADDRESS, RESTRICTED_ASPACE_RANGE,
50    RESTRICTED_ASPACE_SIZE,
51};
52use starnix_uapi::signals::{SIGBUS, SIGSEGV};
53use starnix_uapi::user_address::{ArchSpecific, UserAddress};
54use starnix_uapi::{
55    MADV_COLD, MADV_COLLAPSE, MADV_DODUMP, MADV_DOFORK, MADV_DONTDUMP, MADV_DONTFORK,
56    MADV_DONTNEED, MADV_DONTNEED_LOCKED, MADV_FREE, MADV_HUGEPAGE, MADV_HWPOISON, MADV_KEEPONFORK,
57    MADV_MERGEABLE, MADV_NOHUGEPAGE, MADV_NORMAL, MADV_PAGEOUT, MADV_POPULATE_READ, MADV_RANDOM,
58    MADV_REMOVE, MADV_SEQUENTIAL, MADV_SOFT_OFFLINE, MADV_UNMERGEABLE, MADV_WILLNEED,
59    MADV_WIPEONFORK, MREMAP_DONTUNMAP, MREMAP_FIXED, MREMAP_MAYMOVE, SI_KERNEL, errno, error,
60};
61use std::collections::HashMap;
62use std::mem::MaybeUninit;
63use std::ops::{Deref, DerefMut, Range, RangeBounds};
64use std::sync::{Arc, LazyLock, Weak};
65use syncio::zxio::zxio_default_maybe_faultable_copy;
66use zerocopy::IntoBytes;
67use zx::{HandleBased, Rights, VmarInfo, VmoChildOptions};
68
69pub const ZX_VM_SPECIFIC_OVERWRITE: zx::VmarFlags =
70    zx::VmarFlags::from_bits_retain(zx::VmarFlagsExtended::SPECIFIC_OVERWRITE.bits());
71
72// We do not create shared processes in unit tests.
73pub(crate) const UNIFIED_ASPACES_ENABLED: bool = cfg!(not(test));
74
75/// Initializes the usercopy utilities.
76///
77/// It is useful to explicitly call this so that the usercopy is initialized
78/// at a known instant. For example, Starnix may want to make sure the usercopy
79/// thread created to support user copying is associated to the Starnix process
80/// and not a restricted-mode process.
81pub fn init_usercopy() {
82    // This call lazily initializes the `Usercopy` instance.
83    let _ = usercopy();
84}
85
86pub const GUARD_PAGE_COUNT_FOR_GROWSDOWN_MAPPINGS: usize = 256;
87
88#[cfg(target_arch = "x86_64")]
89const ASLR_RANDOM_BITS: usize = 27;
90
91#[cfg(target_arch = "aarch64")]
92const ASLR_RANDOM_BITS: usize = 28;
93
94#[cfg(target_arch = "riscv64")]
95const ASLR_RANDOM_BITS: usize = 18;
96
97/// Number of bits of entropy for processes running in 32 bits mode.
98const ASLR_32_RANDOM_BITS: usize = 8;
99
100// The biggest we expect stack to be; increase as needed
101// TODO(https://fxbug.dev/322874791): Once setting RLIMIT_STACK is implemented, we should use it.
102const MAX_STACK_SIZE: usize = 512 * 1024 * 1024;
103
104// Value to report temporarily as the VM RSS HWM.
105// TODO(https://fxbug.dev/396221597): Need support from the kernel to track the committed bytes high
106// water mark.
107const STUB_VM_RSS_HWM: usize = 2 * 1024 * 1024;
108
109fn usercopy() -> Option<&'static usercopy::Usercopy> {
110    static USERCOPY: LazyLock<Option<usercopy::Usercopy>> = LazyLock::new(|| {
111        // We do not create shared processes in unit tests.
112        if UNIFIED_ASPACES_ENABLED {
113            // ASUMPTION: All Starnix managed Linux processes have the same
114            // restricted mode address range.
115            Some(usercopy::Usercopy::new(RESTRICTED_ASPACE_RANGE).unwrap())
116        } else {
117            None
118        }
119    });
120
121    LazyLock::force(&USERCOPY).as_ref()
122}
123
124/// Provides an implementation for zxio's `zxio_maybe_faultable_copy` that supports
125/// catching faults.
126///
127/// See zxio's `zxio_maybe_faultable_copy` documentation for more details.
128///
129/// # Safety
130///
131/// Only one of `src`/`dest` may be an address to a buffer owned by user/restricted-mode
132/// (`ret_dest` indicates whether the user-owned buffer is `dest` when `true`).
133/// The other must be a valid Starnix/normal-mode buffer that will never cause a fault
134/// when the first `count` bytes are read/written.
135#[unsafe(no_mangle)]
136pub unsafe fn zxio_maybe_faultable_copy_impl(
137    dest: *mut u8,
138    src: *const u8,
139    count: usize,
140    ret_dest: bool,
141) -> bool {
142    if let Some(usercopy) = usercopy() {
143        #[allow(clippy::undocumented_unsafe_blocks, reason = "2024 edition migration")]
144        let ret = unsafe { usercopy.raw_hermetic_copy(dest, src, count, ret_dest) };
145        ret == count
146    } else {
147        #[allow(clippy::undocumented_unsafe_blocks, reason = "2024 edition migration")]
148        unsafe {
149            zxio_default_maybe_faultable_copy(dest, src, count, ret_dest)
150        }
151    }
152}
153
154pub static PAGE_SIZE: LazyLock<u64> = LazyLock::new(|| zx::system_get_page_size() as u64);
155
156bitflags! {
157    #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
158    pub struct MappingOptions: u16 {
159      const SHARED      = 1 << 0;
160      const ANONYMOUS   = 1 << 1;
161      const LOWER_32BIT = 1 << 2;
162      const GROWSDOWN   = 1 << 3;
163      const ELF_BINARY  = 1 << 4;
164      const DONTFORK    = 1 << 5;
165      const WIPEONFORK  = 1 << 6;
166      const DONT_SPLIT  = 1 << 7;
167      const DONT_EXPAND = 1 << 8;
168      const POPULATE    = 1 << 9;
169    }
170}
171
172bitflags! {
173    #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
174    pub struct MremapFlags: u32 {
175        const MAYMOVE = MREMAP_MAYMOVE;
176        const FIXED = MREMAP_FIXED;
177        const DONTUNMAP = MREMAP_DONTUNMAP;
178    }
179}
180
181const PROGRAM_BREAK_LIMIT: u64 = 64 * 1024 * 1024;
182
183#[derive(Debug, Clone, Eq, PartialEq)]
184struct ProgramBreak {
185    // These base address at which the data segment is mapped.
186    base: UserAddress,
187
188    // The current program break.
189    //
190    // The addresses from [base, current.round_up(*PAGE_SIZE)) are mapped into the
191    // client address space from the underlying |memory|.
192    current: UserAddress,
193}
194
195/// The policy about whether the address space can be dumped.
196#[derive(Debug, Clone, Copy, Eq, PartialEq)]
197pub enum DumpPolicy {
198    /// The address space cannot be dumped.
199    ///
200    /// Corresponds to SUID_DUMP_DISABLE.
201    Disable,
202
203    /// The address space can be dumped.
204    ///
205    /// Corresponds to SUID_DUMP_USER.
206    User,
207}
208
209// Supported types of membarriers.
210pub enum MembarrierType {
211    Memory,   // MEMBARRIER_CMD_GLOBAL, etc
212    SyncCore, // MEMBARRIER_CMD_..._SYNC_CORE
213}
214
215// Tracks the types of membarriers this address space is registered to receive.
216#[derive(Default, Clone)]
217struct MembarrierRegistrations {
218    memory: bool,
219    sync_core: bool,
220}
221
222pub struct MemoryManagerState {
223    /// The VMAR in which userspace mappings occur.
224    ///
225    /// We map userspace memory in this child VMAR so that we can destroy the
226    /// entire VMAR during exec.
227    /// For 32-bit tasks, we limit the user_vmar to correspond to the available memory.
228    ///
229    /// This field is set to `ZX_HANDLE_INVALID` when the address-space has been destroyed (e.g. on
230    /// `exec()`), allowing the value to be pro-actively checked for, or the `ZX_ERR_BAD_HANDLE`
231    /// status return from Zircon operations handled, to suit the call-site.
232    user_vmar: zx::Vmar,
233
234    /// Cached VmarInfo for user_vmar.
235    user_vmar_info: zx::VmarInfo,
236
237    /// The memory mappings currently used by this address space.
238    ///
239    /// The mappings record which object backs each address.
240    mappings: RangeMap<UserAddress, Mapping>,
241
242    /// Memory object backing private, anonymous memory allocations in this address space.
243    private_anonymous: PrivateAnonymousMemoryManager,
244
245    /// UserFaults registered with this memory manager.
246    userfaultfds: Vec<Weak<UserFault>>,
247
248    /// Shadow mappings for mlock()'d pages.
249    ///
250    /// Used for MlockPinFlavor::ShadowProcess to keep track of when we need to unmap
251    /// memory from the shadow process.
252    shadow_mappings_for_mlock: RangeMap<UserAddress, Arc<PinnedMapping>>,
253
254    forkable_state: MemoryManagerForkableState,
255}
256
257// 64k under the 4GB
258const LOWER_4GB_LIMIT: UserAddress = UserAddress::const_from(0xffff_0000);
259
260#[derive(Default, Clone)]
261pub struct MemoryManagerForkableState {
262    /// State for the brk and sbrk syscalls.
263    brk: Option<ProgramBreak>,
264
265    /// The namespace node that represents the executable associated with this task.
266    executable_node: Option<NamespaceNode>,
267
268    pub stack_size: usize,
269    pub stack_start: UserAddress,
270    pub auxv_start: UserAddress,
271    pub auxv_end: UserAddress,
272    pub argv_start: UserAddress,
273    pub argv_end: UserAddress,
274    pub environ_start: UserAddress,
275    pub environ_end: UserAddress,
276
277    /// vDSO location
278    pub vdso_base: UserAddress,
279
280    /// Randomized regions:
281    pub mmap_top: UserAddress,
282    pub stack_origin: UserAddress,
283    pub brk_origin: UserAddress,
284
285    // Membarrier registrations
286    membarrier_registrations: MembarrierRegistrations,
287}
288
289impl Deref for MemoryManagerState {
290    type Target = MemoryManagerForkableState;
291    fn deref(&self) -> &Self::Target {
292        &self.forkable_state
293    }
294}
295
296impl DerefMut for MemoryManagerState {
297    fn deref_mut(&mut self) -> &mut Self::Target {
298        &mut self.forkable_state
299    }
300}
301
302#[derive(Debug, Default)]
303struct ReleasedMappings {
304    doomed: Vec<Mapping>,
305    doomed_pins: Vec<Arc<PinnedMapping>>,
306}
307
308impl ReleasedMappings {
309    fn extend(&mut self, mappings: impl IntoIterator<Item = Mapping>) {
310        self.doomed.extend(mappings);
311    }
312
313    fn extend_pins(&mut self, mappings: impl IntoIterator<Item = Arc<PinnedMapping>>) {
314        self.doomed_pins.extend(mappings);
315    }
316
317    fn is_empty(&self) -> bool {
318        self.doomed.is_empty() && self.doomed_pins.is_empty()
319    }
320
321    #[cfg(test)]
322    fn len(&self) -> usize {
323        self.doomed.len() + self.doomed_pins.len()
324    }
325
326    fn finalize(&mut self, mm_state: RwLockWriteGuard<'_, MemoryManagerState>) {
327        // Drop the state before the unmapped mappings, since dropping a mapping may acquire a lock
328        // in `DirEntry`'s `drop`.
329        std::mem::drop(mm_state);
330        std::mem::take(&mut self.doomed);
331        std::mem::take(&mut self.doomed_pins);
332    }
333}
334
335impl Drop for ReleasedMappings {
336    fn drop(&mut self) {
337        assert!(self.is_empty(), "ReleasedMappings::finalize() must be called before drop");
338    }
339}
340
341fn map_in_vmar(
342    vmar: &zx::Vmar,
343    vmar_info: &zx::VmarInfo,
344    addr: SelectedAddress,
345    memory: &MemoryObject,
346    memory_offset: u64,
347    length: usize,
348    flags: MappingFlags,
349    populate: bool,
350) -> Result<UserAddress, Errno> {
351    let vmar_offset = addr.addr().checked_sub(vmar_info.base).ok_or_else(|| errno!(ENOMEM))?;
352    let vmar_extra_flags = match addr {
353        SelectedAddress::Fixed(_) => zx::VmarFlags::SPECIFIC,
354        SelectedAddress::FixedOverwrite(_) => ZX_VM_SPECIFIC_OVERWRITE,
355    };
356
357    if populate {
358        let op = if flags.contains(MappingFlags::WRITE) {
359            // Requires ZX_RIGHT_WRITEABLE which we should expect when the mapping is writeable.
360            zx::VmoOp::COMMIT
361        } else {
362            // When we don't expect to have ZX_RIGHT_WRITEABLE, fall back to a VMO op that doesn't
363            // need it.
364            zx::VmoOp::PREFETCH
365        };
366        trace_duration!(CATEGORY_STARNIX_MM, "MmapCommitPages");
367        let _ = memory.op_range(op, memory_offset, length as u64);
368        // "The mmap() call doesn't fail if the mapping cannot be populated."
369    }
370
371    let vmar_maybe_map_range = if populate && !vmar_extra_flags.contains(ZX_VM_SPECIFIC_OVERWRITE) {
372        zx::VmarFlags::MAP_RANGE
373    } else {
374        zx::VmarFlags::empty()
375    };
376    let vmar_flags = flags.access_flags().to_vmar_flags()
377        | zx::VmarFlags::ALLOW_FAULTS
378        | vmar_extra_flags
379        | vmar_maybe_map_range;
380
381    let map_result = memory.map_in_vmar(vmar, vmar_offset.ptr(), memory_offset, length, vmar_flags);
382    let mapped_addr = map_result.map_err(MemoryManager::get_errno_for_map_err)?;
383
384    Ok(UserAddress::from_ptr(mapped_addr))
385}
386
387impl MemoryManagerState {
388    /// Returns occupied address ranges that intersect with the given range.
389    ///
390    /// An address range is "occupied" if (a) there is already a mapping in that range or (b) there
391    /// is a GROWSDOWN mapping <= 256 pages above that range. The 256 pages below a GROWSDOWN
392    /// mapping is the "guard region." The memory manager avoids mapping memory in the guard region
393    /// in some circumstances to preserve space for the GROWSDOWN mapping to grow down.
394    fn get_occupied_address_ranges<'a>(
395        &'a self,
396        subrange: &'a Range<UserAddress>,
397    ) -> impl Iterator<Item = Range<UserAddress>> + 'a {
398        let query_range = subrange.start
399            ..(subrange
400                .end
401                .saturating_add(*PAGE_SIZE as usize * GUARD_PAGE_COUNT_FOR_GROWSDOWN_MAPPINGS));
402        self.mappings.range(query_range).filter_map(|(range, mapping)| {
403            let occupied_range = mapping.inflate_to_include_guard_pages(range);
404            if occupied_range.start < subrange.end && subrange.start < occupied_range.end {
405                Some(occupied_range)
406            } else {
407                None
408            }
409        })
410    }
411
412    fn count_possible_placements(
413        &self,
414        length: usize,
415        subrange: &Range<UserAddress>,
416    ) -> Option<usize> {
417        let mut occupied_ranges = self.get_occupied_address_ranges(subrange);
418        let mut possible_placements = 0;
419        // If the allocation is placed at the first available address, every page that is left
420        // before the next mapping or the end of subrange is +1 potential placement.
421        let mut first_fill_end = subrange.start.checked_add(length)?;
422        while first_fill_end <= subrange.end {
423            let Some(mapping) = occupied_ranges.next() else {
424                possible_placements += (subrange.end - first_fill_end) / (*PAGE_SIZE as usize) + 1;
425                break;
426            };
427            if mapping.start >= first_fill_end {
428                possible_placements += (mapping.start - first_fill_end) / (*PAGE_SIZE as usize) + 1;
429            }
430            first_fill_end = mapping.end.checked_add(length)?;
431        }
432        Some(possible_placements)
433    }
434
435    fn pick_placement(
436        &self,
437        length: usize,
438        mut chosen_placement_idx: usize,
439        subrange: &Range<UserAddress>,
440    ) -> Option<UserAddress> {
441        let mut candidate =
442            Range { start: subrange.start, end: subrange.start.checked_add(length)? };
443        let mut occupied_ranges = self.get_occupied_address_ranges(subrange);
444        loop {
445            let Some(mapping) = occupied_ranges.next() else {
446                // No more mappings: treat the rest of the index as an offset.
447                let res =
448                    candidate.start.checked_add(chosen_placement_idx * *PAGE_SIZE as usize)?;
449                debug_assert!(res.checked_add(length)? <= subrange.end);
450                return Some(res);
451            };
452            if mapping.start < candidate.end {
453                // doesn't fit, skip
454                candidate = Range { start: mapping.end, end: mapping.end.checked_add(length)? };
455                continue;
456            }
457            let unused_space =
458                (mapping.start.ptr() - candidate.end.ptr()) / (*PAGE_SIZE as usize) + 1;
459            if unused_space > chosen_placement_idx {
460                // Chosen placement is within the range; treat the rest of the index as an offset.
461                let res =
462                    candidate.start.checked_add(chosen_placement_idx * *PAGE_SIZE as usize)?;
463                return Some(res);
464            }
465
466            // chosen address is further up, skip
467            chosen_placement_idx -= unused_space;
468            candidate = Range { start: mapping.end, end: mapping.end.checked_add(length)? };
469        }
470    }
471
472    fn find_random_unused_range(
473        &self,
474        length: usize,
475        subrange: &Range<UserAddress>,
476    ) -> Option<UserAddress> {
477        let possible_placements = self.count_possible_placements(length, subrange)?;
478        if possible_placements == 0 {
479            return None;
480        }
481        let chosen_placement_idx = rand::random_range(0..possible_placements);
482        self.pick_placement(length, chosen_placement_idx, subrange)
483    }
484
485    // Find the first unused range of addresses that fits a mapping of `length` bytes, searching
486    // from `mmap_top` downwards.
487    pub fn find_next_unused_range(&self, length: usize) -> Option<UserAddress> {
488        let gap_size = length as u64;
489        let mut upper_bound = self.mmap_top;
490
491        loop {
492            let gap_end = self.mappings.find_gap_end(gap_size, &upper_bound);
493            let candidate = gap_end.checked_sub(length)?;
494
495            // Is there a next mapping? If not, the candidate is already good.
496            let Some((occupied_range, mapping)) = self.mappings.get(gap_end) else {
497                return Some(candidate);
498            };
499            let occupied_range = mapping.inflate_to_include_guard_pages(occupied_range);
500            // If it doesn't overlap, the gap is big enough to fit.
501            if occupied_range.start >= gap_end {
502                return Some(candidate);
503            }
504            // If there was a mapping in the way, use the start of that range as the upper bound.
505            upper_bound = occupied_range.start;
506        }
507    }
508
509    // Accept the hint if the range is unused and within the range available for mapping.
510    fn is_hint_acceptable(&self, hint_addr: UserAddress, length: usize) -> bool {
511        let Some(hint_end) = hint_addr.checked_add(length) else {
512            return false;
513        };
514        if !RESTRICTED_ASPACE_RANGE.contains(&hint_addr.ptr())
515            || !RESTRICTED_ASPACE_RANGE.contains(&hint_end.ptr())
516        {
517            return false;
518        };
519        self.get_occupied_address_ranges(&(hint_addr..hint_end)).next().is_none()
520    }
521
522    fn select_address(
523        &self,
524        addr: DesiredAddress,
525        length: usize,
526        flags: MappingFlags,
527    ) -> Result<SelectedAddress, Errno> {
528        let adjusted_length = round_up_to_system_page_size(length).or_else(|_| error!(ENOMEM))?;
529
530        let find_address = || -> Result<SelectedAddress, Errno> {
531            let new_addr = if flags.contains(MappingFlags::LOWER_32BIT) {
532                // MAP_32BIT specifies that the memory allocated will
533                // be within the first 2 GB of the process address space.
534                self.find_random_unused_range(
535                    adjusted_length,
536                    &(UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
537                        ..UserAddress::from_ptr(0x80000000)),
538                )
539                .ok_or_else(|| errno!(ENOMEM))?
540            } else {
541                self.find_next_unused_range(adjusted_length).ok_or_else(|| errno!(ENOMEM))?
542            };
543
544            Ok(SelectedAddress::Fixed(new_addr))
545        };
546
547        Ok(match addr {
548            DesiredAddress::Any => find_address()?,
549            DesiredAddress::Hint(hint_addr) => {
550                // Round down to page size
551                let hint_addr =
552                    UserAddress::from_ptr(hint_addr.ptr() - hint_addr.ptr() % *PAGE_SIZE as usize);
553                if self.is_hint_acceptable(hint_addr, adjusted_length) {
554                    SelectedAddress::Fixed(hint_addr)
555                } else {
556                    find_address()?
557                }
558            }
559            DesiredAddress::Fixed(addr) => SelectedAddress::Fixed(addr),
560            DesiredAddress::FixedOverwrite(addr) => SelectedAddress::FixedOverwrite(addr),
561        })
562    }
563
564    // Map the memory without updating `self.mappings`.
565    fn map_in_user_vmar(
566        &self,
567        addr: SelectedAddress,
568        memory: &MemoryObject,
569        memory_offset: u64,
570        length: usize,
571        flags: MappingFlags,
572        populate: bool,
573    ) -> Result<UserAddress, Errno> {
574        map_in_vmar(
575            &self.user_vmar,
576            &self.user_vmar_info,
577            addr,
578            memory,
579            memory_offset,
580            length,
581            flags,
582            populate,
583        )
584    }
585
586    fn validate_addr(&self, addr: DesiredAddress, length: usize) -> Result<(), Errno> {
587        if let DesiredAddress::FixedOverwrite(addr) = addr {
588            if self.check_has_unauthorized_splits(addr, length) {
589                return error!(ENOMEM);
590            }
591        }
592        Ok(())
593    }
594
595    fn map_memory(
596        &mut self,
597        mm: &Arc<MemoryManager>,
598        addr: DesiredAddress,
599        memory: Arc<MemoryObject>,
600        memory_offset: u64,
601        length: usize,
602        flags: MappingFlags,
603        max_access: Access,
604        populate: bool,
605        name: MappingName,
606        released_mappings: &mut ReleasedMappings,
607    ) -> Result<UserAddress, Errno> {
608        self.validate_addr(addr, length)?;
609
610        let selected_address = self.select_address(addr, length, flags)?;
611        let mapped_addr = self.map_in_user_vmar(
612            selected_address,
613            &memory,
614            memory_offset,
615            length,
616            flags,
617            populate,
618        )?;
619
620        #[cfg(any(test, debug_assertions))]
621        {
622            // Take the lock on directory entry while holding the one on the mm state to ensure any
623            // wrong ordering will trigger the tracing-mutex at the right call site.
624            if let MappingName::File(file) = &name {
625                let _l1 = file.name.entry.read();
626            }
627        }
628
629        let end = (mapped_addr + length)?.round_up(*PAGE_SIZE)?;
630
631        if let DesiredAddress::FixedOverwrite(addr) = addr {
632            assert_eq!(addr, mapped_addr);
633            self.update_after_unmap(mm, addr, end - addr, released_mappings)?;
634        }
635
636        let mapping = Mapping::with_name(
637            self.create_memory_backing(mapped_addr, memory, memory_offset),
638            flags,
639            max_access,
640            name,
641        );
642        released_mappings.extend(self.mappings.insert(mapped_addr..end, mapping));
643
644        Ok(mapped_addr)
645    }
646
647    fn map_private_anonymous(
648        &mut self,
649        mm: &Arc<MemoryManager>,
650        addr: DesiredAddress,
651        length: usize,
652        prot_flags: ProtectionFlags,
653        options: MappingOptions,
654        populate: bool,
655        name: MappingName,
656        released_mappings: &mut ReleasedMappings,
657    ) -> Result<UserAddress, Errno> {
658        self.validate_addr(addr, length)?;
659
660        let flags = MappingFlags::from_access_flags_and_options(prot_flags, options);
661        let selected_addr = self.select_address(addr, length, flags)?;
662        let backing_memory_offset = selected_addr.addr().ptr();
663
664        let mapped_addr = self.map_in_user_vmar(
665            selected_addr,
666            &self.private_anonymous.backing,
667            backing_memory_offset as u64,
668            length,
669            flags,
670            populate,
671        )?;
672
673        let end = (mapped_addr + length)?.round_up(*PAGE_SIZE)?;
674        if let DesiredAddress::FixedOverwrite(addr) = addr {
675            assert_eq!(addr, mapped_addr);
676            self.update_after_unmap(mm, addr, end - addr, released_mappings)?;
677        }
678
679        let mapping = Mapping::new_private_anonymous(flags, name);
680        released_mappings.extend(self.mappings.insert(mapped_addr..end, mapping));
681
682        Ok(mapped_addr)
683    }
684
685    fn map_anonymous(
686        &mut self,
687        mm: &Arc<MemoryManager>,
688        addr: DesiredAddress,
689        length: usize,
690        prot_flags: ProtectionFlags,
691        options: MappingOptions,
692        name: MappingName,
693        released_mappings: &mut ReleasedMappings,
694    ) -> Result<UserAddress, Errno> {
695        if !options.contains(MappingOptions::SHARED) {
696            return self.map_private_anonymous(
697                mm,
698                addr,
699                length,
700                prot_flags,
701                options,
702                options.contains(MappingOptions::POPULATE),
703                name,
704                released_mappings,
705            );
706        }
707        let memory = create_anonymous_mapping_memory(length as u64)?;
708        let flags = MappingFlags::from_access_flags_and_options(prot_flags, options);
709        self.map_memory(
710            mm,
711            addr,
712            memory,
713            0,
714            length,
715            flags,
716            Access::rwx(),
717            options.contains(MappingOptions::POPULATE),
718            name,
719            released_mappings,
720        )
721    }
722
723    fn remap(
724        &mut self,
725        _current_task: &CurrentTask,
726        mm: &Arc<MemoryManager>,
727        old_addr: UserAddress,
728        old_length: usize,
729        new_length: usize,
730        flags: MremapFlags,
731        new_addr: UserAddress,
732        released_mappings: &mut ReleasedMappings,
733    ) -> Result<UserAddress, Errno> {
734        // MREMAP_FIXED moves a mapping, which requires MREMAP_MAYMOVE.
735        if flags.contains(MremapFlags::FIXED) && !flags.contains(MremapFlags::MAYMOVE) {
736            return error!(EINVAL);
737        }
738
739        // MREMAP_DONTUNMAP is always a move, so it requires MREMAP_MAYMOVE.
740        // There is no resizing allowed either.
741        if flags.contains(MremapFlags::DONTUNMAP)
742            && (!flags.contains(MremapFlags::MAYMOVE) || old_length != new_length)
743        {
744            return error!(EINVAL);
745        }
746
747        // In-place copies are invalid.
748        if !flags.contains(MremapFlags::MAYMOVE) && old_length == 0 {
749            return error!(ENOMEM);
750        }
751
752        if new_length == 0 {
753            return error!(EINVAL);
754        }
755
756        // Make sure old_addr is page-aligned.
757        if !old_addr.is_aligned(*PAGE_SIZE) {
758            return error!(EINVAL);
759        }
760
761        let old_length = round_up_to_system_page_size(old_length)?;
762        let new_length = round_up_to_system_page_size(new_length)?;
763
764        if self.check_has_unauthorized_splits(old_addr, old_length) {
765            return error!(EINVAL);
766        }
767
768        if self.check_has_unauthorized_splits(new_addr, new_length) {
769            return error!(EINVAL);
770        }
771
772        if !flags.contains(MremapFlags::DONTUNMAP)
773            && !flags.contains(MremapFlags::FIXED)
774            && old_length != 0
775        {
776            // We are not requested to remap to a specific address, so first we see if we can remap
777            // in-place. In-place copies (old_length == 0) are not allowed.
778            if let Some(new_addr) =
779                self.try_remap_in_place(mm, old_addr, old_length, new_length, released_mappings)?
780            {
781                return Ok(new_addr);
782            }
783        }
784
785        // There is no space to grow in place, or there is an explicit request to move.
786        if flags.contains(MremapFlags::MAYMOVE) {
787            let dst_address =
788                if flags.contains(MremapFlags::FIXED) { Some(new_addr) } else { None };
789            self.remap_move(
790                mm,
791                old_addr,
792                old_length,
793                dst_address,
794                new_length,
795                flags.contains(MremapFlags::DONTUNMAP),
796                released_mappings,
797            )
798        } else {
799            error!(ENOMEM)
800        }
801    }
802
803    /// Attempts to grow or shrink the mapping in-place. Returns `Ok(Some(addr))` if the remap was
804    /// successful. Returns `Ok(None)` if there was no space to grow.
805    fn try_remap_in_place(
806        &mut self,
807        mm: &Arc<MemoryManager>,
808        old_addr: UserAddress,
809        old_length: usize,
810        new_length: usize,
811        released_mappings: &mut ReleasedMappings,
812    ) -> Result<Option<UserAddress>, Errno> {
813        let old_range = old_addr..old_addr.checked_add(old_length).ok_or_else(|| errno!(EINVAL))?;
814        let new_range_in_place =
815            old_addr..old_addr.checked_add(new_length).ok_or_else(|| errno!(EINVAL))?;
816
817        if new_length <= old_length {
818            // Shrink the mapping in-place, which should always succeed.
819            // This is done by unmapping the extraneous region.
820            if new_length != old_length {
821                self.unmap(mm, new_range_in_place.end, old_length - new_length, released_mappings)?;
822            }
823            return Ok(Some(old_addr));
824        }
825
826        if self.mappings.range(old_range.end..new_range_in_place.end).next().is_some() {
827            // There is some mapping in the growth range prevening an in-place growth.
828            return Ok(None);
829        }
830
831        // There is space to grow in-place. The old range must be one contiguous mapping.
832        let (original_range, mapping) =
833            self.mappings.get(old_addr).ok_or_else(|| errno!(EINVAL))?;
834
835        if old_range.end > original_range.end {
836            return error!(EFAULT);
837        }
838        let original_range = original_range.clone();
839        let original_mapping = mapping.clone();
840
841        // Compute the new length of the entire mapping once it has grown.
842        let final_length = (original_range.end - original_range.start) + (new_length - old_length);
843
844        match self.get_mapping_backing(&original_mapping) {
845            MappingBacking::Memory(backing) => {
846                // Re-map the original range, which may include pages before the requested range.
847                Ok(Some(self.map_memory(
848                    mm,
849                    DesiredAddress::FixedOverwrite(original_range.start),
850                    backing.memory().clone(),
851                    backing.address_to_offset(original_range.start),
852                    final_length,
853                    original_mapping.flags(),
854                    original_mapping.max_access(),
855                    false,
856                    original_mapping.name(),
857                    released_mappings,
858                )?))
859            }
860            MappingBacking::PrivateAnonymous => {
861                let growth_start = original_range.end;
862                let growth_length = new_length - old_length;
863                let final_end = (original_range.start + final_length)?;
864                // Map new pages to back the growth.
865                self.map_in_user_vmar(
866                    SelectedAddress::FixedOverwrite(growth_start),
867                    &self.private_anonymous.backing,
868                    growth_start.ptr() as u64,
869                    growth_length,
870                    original_mapping.flags(),
871                    false,
872                )?;
873                // Overwrite the mapping entry with the new larger size.
874                released_mappings.extend(
875                    self.mappings.insert(original_range.start..final_end, original_mapping.clone()),
876                );
877                Ok(Some(original_range.start))
878            }
879        }
880    }
881
882    /// Grows or shrinks the mapping while moving it to a new destination.
883    fn remap_move(
884        &mut self,
885        mm: &Arc<MemoryManager>,
886        src_addr: UserAddress,
887        src_length: usize,
888        dst_addr: Option<UserAddress>,
889        dst_length: usize,
890        keep_source: bool,
891        released_mappings: &mut ReleasedMappings,
892    ) -> Result<UserAddress, Errno> {
893        let src_range = src_addr..src_addr.checked_add(src_length).ok_or_else(|| errno!(EINVAL))?;
894        let (original_range, src_mapping) =
895            self.mappings.get(src_addr).ok_or_else(|| errno!(EINVAL))?;
896        let original_range = original_range.clone();
897        let src_mapping = src_mapping.clone();
898
899        if src_length == 0 && !src_mapping.flags().contains(MappingFlags::SHARED) {
900            // src_length == 0 means that the mapping is to be copied. This behavior is only valid
901            // with MAP_SHARED mappings.
902            return error!(EINVAL);
903        }
904
905        // If the destination range is smaller than the source range, we must first shrink
906        // the source range in place. This must be done now and visible to processes, even if
907        // a later failure causes the remap operation to fail.
908        if src_length != 0 && src_length > dst_length {
909            self.unmap(mm, (src_addr + dst_length)?, src_length - dst_length, released_mappings)?;
910        }
911
912        let dst_addr_for_map = match dst_addr {
913            None => DesiredAddress::Any,
914            Some(dst_addr) => {
915                // The mapping is being moved to a specific address.
916                let dst_range =
917                    dst_addr..(dst_addr.checked_add(dst_length).ok_or_else(|| errno!(EINVAL))?);
918                if !src_range.intersect(&dst_range).is_empty() {
919                    return error!(EINVAL);
920                }
921
922                // The destination range must be unmapped. This must be done now and visible to
923                // processes, even if a later failure causes the remap operation to fail.
924                self.unmap(mm, dst_addr, dst_length, released_mappings)?;
925
926                DesiredAddress::Fixed(dst_addr)
927            }
928        };
929
930        // According to gVisor's aio_test, Linux checks for DONT_EXPAND after unmapping the dst
931        // range.
932        if dst_length > src_length && src_mapping.flags().contains(MappingFlags::DONT_EXPAND) {
933            return error!(EFAULT);
934        }
935
936        if src_range.end > original_range.end {
937            // The source range is not one contiguous mapping. This check must be done only after
938            // the source range is shrunk and the destination unmapped.
939            return error!(EFAULT);
940        }
941
942        match self.get_mapping_backing(&src_mapping) {
943            MappingBacking::PrivateAnonymous => {
944                let dst_addr =
945                    self.select_address(dst_addr_for_map, dst_length, src_mapping.flags())?.addr();
946                let dst_end = (dst_addr + dst_length)?;
947
948                let length_to_move = std::cmp::min(dst_length, src_length) as u64;
949                let growth_start_addr = (dst_addr + length_to_move)?;
950
951                if dst_addr != src_addr {
952                    let src_move_end = (src_range.start + length_to_move)?;
953                    let range_to_move = src_range.start..src_move_end;
954                    // Move the previously mapped pages into their new location.
955                    self.private_anonymous.move_pages(&range_to_move, dst_addr)?;
956                }
957
958                // Userfault registration is not preserved by remap
959                let new_flags =
960                    src_mapping.flags().difference(MappingFlags::UFFD | MappingFlags::UFFD_MISSING);
961                self.map_in_user_vmar(
962                    SelectedAddress::FixedOverwrite(dst_addr),
963                    &self.private_anonymous.backing,
964                    dst_addr.ptr() as u64,
965                    dst_length,
966                    new_flags,
967                    false,
968                )?;
969
970                if dst_length > src_length {
971                    // The mapping has grown, map new pages in to cover the growth.
972                    let growth_length = dst_length - src_length;
973
974                    self.map_private_anonymous(
975                        mm,
976                        DesiredAddress::FixedOverwrite(growth_start_addr),
977                        growth_length,
978                        new_flags.access_flags(),
979                        new_flags.options(),
980                        false,
981                        src_mapping.name(),
982                        released_mappings,
983                    )?;
984                }
985
986                released_mappings.extend(self.mappings.insert(
987                    dst_addr..dst_end,
988                    Mapping::new_private_anonymous(new_flags, src_mapping.name()),
989                ));
990
991                if dst_addr != src_addr && src_length != 0 && !keep_source {
992                    self.unmap(mm, src_addr, src_length, released_mappings)?;
993                }
994
995                return Ok(dst_addr);
996            }
997            MappingBacking::Memory(backing) => {
998                // This mapping is backed by an FD or is a shared anonymous mapping. Just map the
999                // range of the memory object covering the moved pages. If the memory object already
1000                // had COW semantics, this preserves them.
1001                let (dst_memory_offset, memory) =
1002                    (backing.address_to_offset(src_addr), backing.memory().clone());
1003
1004                let new_address = self.map_memory(
1005                    mm,
1006                    dst_addr_for_map,
1007                    memory,
1008                    dst_memory_offset,
1009                    dst_length,
1010                    src_mapping.flags(),
1011                    src_mapping.max_access(),
1012                    false,
1013                    src_mapping.name(),
1014                    released_mappings,
1015                )?;
1016
1017                if src_length != 0 && !keep_source {
1018                    // Only unmap the source range if this is not a copy and if there was not a specific
1019                    // request to not unmap. It was checked earlier that in case of src_length == 0
1020                    // this mapping is MAP_SHARED.
1021                    self.unmap(mm, src_addr, src_length, released_mappings)?;
1022                }
1023
1024                return Ok(new_address);
1025            }
1026        };
1027    }
1028
1029    // Checks if an operation may be performed over the target mapping that may
1030    // result in a split mapping.
1031    //
1032    // An operation may be forbidden if the target mapping only partially covers
1033    // an existing mapping with the `MappingOptions::DONT_SPLIT` flag set.
1034    fn check_has_unauthorized_splits(&self, addr: UserAddress, length: usize) -> bool {
1035        let query_range = addr..addr.saturating_add(length);
1036        let mut intersection = self.mappings.range(query_range.clone());
1037
1038        // A mapping is not OK if it disallows splitting and the target range
1039        // does not fully cover the mapping range.
1040        let check_if_mapping_has_unauthorized_split =
1041            |mapping: Option<(&Range<UserAddress>, &Mapping)>| {
1042                mapping.is_some_and(|(mapping_range, mapping)| {
1043                    mapping.flags().contains(MappingFlags::DONT_SPLIT)
1044                        && (mapping_range.start < query_range.start
1045                            || query_range.end < mapping_range.end)
1046                })
1047            };
1048
1049        // We only check the first and last mappings in the range because naturally,
1050        // the mappings in the middle are fully covered by the target mapping and
1051        // won't be split.
1052        check_if_mapping_has_unauthorized_split(intersection.next())
1053            || check_if_mapping_has_unauthorized_split(intersection.next_back())
1054    }
1055
1056    /// Unmaps the specified range. Unmapped mappings are placed in `released_mappings`.
1057    fn unmap(
1058        &mut self,
1059        mm: &Arc<MemoryManager>,
1060        addr: UserAddress,
1061        length: usize,
1062        released_mappings: &mut ReleasedMappings,
1063    ) -> Result<(), Errno> {
1064        if !addr.is_aligned(*PAGE_SIZE) {
1065            return error!(EINVAL);
1066        }
1067        let length = round_up_to_system_page_size(length)?;
1068        if length == 0 {
1069            return error!(EINVAL);
1070        }
1071
1072        if self.check_has_unauthorized_splits(addr, length) {
1073            return error!(EINVAL);
1074        }
1075
1076        // Unmap the range, including the the tail of any range that would have been split. This
1077        // operation is safe because we're operating on another process.
1078        #[allow(
1079            clippy::undocumented_unsafe_blocks,
1080            reason = "Force documented unsafe blocks in Starnix"
1081        )]
1082        match unsafe { self.user_vmar.unmap(addr.ptr(), length) } {
1083            Ok(_) => (),
1084            Err(zx::Status::NOT_FOUND) => (),
1085            Err(zx::Status::INVALID_ARGS) => return error!(EINVAL),
1086            Err(status) => {
1087                impossible_error(status);
1088            }
1089        };
1090
1091        self.update_after_unmap(mm, addr, length, released_mappings)?;
1092
1093        Ok(())
1094    }
1095
1096    // Updates `self.mappings` after the specified range was unmaped.
1097    //
1098    // The range to unmap can span multiple mappings, and can split mappings if
1099    // the range start or end falls in the middle of a mapping.
1100    //
1101    // Private anonymous memory is contained in the same memory object; The pages of that object
1102    // that are no longer reachable should be released.
1103    //
1104    // File-backed mappings don't need to have their memory object modified.
1105    //
1106    // Unmapped mappings are placed in `released_mappings`.
1107    fn update_after_unmap(
1108        &mut self,
1109        mm: &Arc<MemoryManager>,
1110        addr: UserAddress,
1111        length: usize,
1112        released_mappings: &mut ReleasedMappings,
1113    ) -> Result<(), Errno> {
1114        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
1115        let unmap_range = addr..end_addr;
1116
1117        // Remove any shadow mappings for mlock()'d pages that are now unmapped.
1118        released_mappings.extend_pins(self.shadow_mappings_for_mlock.remove(unmap_range.clone()));
1119
1120        for (range, mapping) in self.mappings.range(unmap_range.clone()) {
1121            // Deallocate any pages in the private, anonymous backing that are now unreachable.
1122            if let MappingBacking::PrivateAnonymous = self.get_mapping_backing(mapping) {
1123                let unmapped_range = &unmap_range.intersect(range);
1124
1125                mm.inflight_vmspliced_payloads
1126                    .handle_unmapping(&self.private_anonymous.backing, unmapped_range)?;
1127
1128                self.private_anonymous
1129                    .zero(unmapped_range.start, unmapped_range.end - unmapped_range.start)?;
1130            }
1131        }
1132        released_mappings.extend(self.mappings.remove(unmap_range));
1133        return Ok(());
1134    }
1135
1136    fn protect_vmar_range(
1137        &self,
1138        addr: UserAddress,
1139        length: usize,
1140        prot_flags: ProtectionFlags,
1141    ) -> Result<(), Errno> {
1142        let vmar_flags = prot_flags.to_vmar_flags();
1143        // SAFETY: Modifying user vmar
1144        unsafe { self.user_vmar.protect(addr.ptr(), length, vmar_flags) }.map_err(|s| match s {
1145            zx::Status::INVALID_ARGS => errno!(EINVAL),
1146            zx::Status::NOT_FOUND => errno!(ENOMEM),
1147            zx::Status::ACCESS_DENIED => errno!(EACCES),
1148            _ => impossible_error(s),
1149        })
1150    }
1151
1152    fn protect(
1153        &mut self,
1154        current_task: &CurrentTask,
1155        addr: UserAddress,
1156        length: usize,
1157        prot_flags: ProtectionFlags,
1158        released_mappings: &mut ReleasedMappings,
1159    ) -> Result<(), Errno> {
1160        let vmar_flags = prot_flags.to_vmar_flags();
1161        let page_size = *PAGE_SIZE;
1162        let end = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?.round_up(page_size)?;
1163
1164        if self.check_has_unauthorized_splits(addr, length) {
1165            return error!(EINVAL);
1166        }
1167
1168        let prot_range = if prot_flags.contains(ProtectionFlags::GROWSDOWN) {
1169            let mut start = addr;
1170            let Some((range, mapping)) = self.mappings.get(start) else {
1171                return error!(EINVAL);
1172            };
1173            // Ensure that the mapping has GROWSDOWN if PROT_GROWSDOWN was specified.
1174            if !mapping.flags().contains(MappingFlags::GROWSDOWN) {
1175                return error!(EINVAL);
1176            }
1177            let access_flags = mapping.flags().access_flags();
1178            // From <https://man7.org/linux/man-pages/man2/mprotect.2.html>:
1179            //
1180            //   PROT_GROWSDOWN
1181            //     Apply the protection mode down to the beginning of a
1182            //     mapping that grows downward (which should be a stack
1183            //     segment or a segment mapped with the MAP_GROWSDOWN flag
1184            //     set).
1185            start = range.start;
1186            while let Some((range, mapping)) =
1187                self.mappings.get(start.saturating_sub(page_size as usize))
1188            {
1189                if !mapping.flags().contains(MappingFlags::GROWSDOWN)
1190                    || mapping.flags().access_flags() != access_flags
1191                {
1192                    break;
1193                }
1194                start = range.start;
1195            }
1196            start..end
1197        } else {
1198            addr..end
1199        };
1200
1201        let addr = prot_range.start;
1202        let length = prot_range.end - prot_range.start;
1203
1204        // TODO: We should check the max_access flags on all the mappings in this range.
1205        //       There are cases where max_access is more restrictive than the Zircon rights
1206        //       we hold on the underlying VMOs.
1207
1208        // TODO(https://fxbug.dev/411617451): `mprotect` should apply the protection flags
1209        // until it encounters a mapping that doesn't allow it, rather than not apply the protection
1210        // flags at all if a single mapping doesn't allow it.
1211        for (range, mapping) in self.mappings.range(prot_range.clone()) {
1212            security::file_mprotect(current_task, range, mapping, prot_flags)?;
1213        }
1214
1215        // Make one call to mprotect to update all the zircon protections.
1216        // SAFETY: This is safe because the vmar belongs to a different process.
1217        unsafe { self.user_vmar.protect(addr.ptr(), length, vmar_flags) }.map_err(|s| match s {
1218            zx::Status::INVALID_ARGS => errno!(EINVAL),
1219            zx::Status::NOT_FOUND => {
1220                track_stub!(
1221                    TODO("https://fxbug.dev/322875024"),
1222                    "mprotect: succeed and update prot after NOT_FOUND"
1223                );
1224                errno!(EINVAL)
1225            }
1226            zx::Status::ACCESS_DENIED => errno!(EACCES),
1227            _ => impossible_error(s),
1228        })?;
1229
1230        // Update the flags on each mapping in the range.
1231        let mut updates = vec![];
1232        for (range, mapping) in self.mappings.range(prot_range.clone()) {
1233            if mapping.flags().contains(MappingFlags::UFFD) {
1234                track_stub!(
1235                    TODO("https://fxbug.dev/297375964"),
1236                    "mprotect on uffd-registered range should not alter protections"
1237                );
1238                return error!(EINVAL);
1239            }
1240            let range = range.intersect(&prot_range);
1241            let mut mapping = mapping.clone();
1242            mapping.set_flags(mapping.flags().with_access_flags(prot_flags));
1243            updates.push((range, mapping));
1244        }
1245        // Use a separate loop to avoid mutating the mappings structure while iterating over it.
1246        for (range, mapping) in updates {
1247            released_mappings.extend(self.mappings.insert(range, mapping));
1248        }
1249        Ok(())
1250    }
1251
1252    fn madvise(
1253        &mut self,
1254        _current_task: &CurrentTask,
1255        addr: UserAddress,
1256        length: usize,
1257        advice: u32,
1258        released_mappings: &mut ReleasedMappings,
1259    ) -> Result<(), Errno> {
1260        if !addr.is_aligned(*PAGE_SIZE) {
1261            return error!(EINVAL);
1262        }
1263
1264        let end_addr =
1265            addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?.round_up(*PAGE_SIZE)?;
1266        if end_addr > self.max_address() {
1267            return error!(EFAULT);
1268        }
1269
1270        if advice == MADV_NORMAL {
1271            track_stub!(TODO("https://fxbug.dev/322874202"), "madvise undo hints for MADV_NORMAL");
1272            return Ok(());
1273        }
1274
1275        let mut updates = vec![];
1276        let range_for_op = addr..end_addr;
1277        for (range, mapping) in self.mappings.range(range_for_op.clone()) {
1278            let range_to_zero = range.intersect(&range_for_op);
1279            if range_to_zero.is_empty() {
1280                continue;
1281            }
1282            let start_offset = mapping.address_to_offset(range_to_zero.start);
1283            let end_offset = mapping.address_to_offset(range_to_zero.end);
1284            if advice == MADV_DONTFORK
1285                || advice == MADV_DOFORK
1286                || advice == MADV_WIPEONFORK
1287                || advice == MADV_KEEPONFORK
1288                || advice == MADV_DONTDUMP
1289                || advice == MADV_DODUMP
1290                || advice == MADV_MERGEABLE
1291                || advice == MADV_UNMERGEABLE
1292            {
1293                // WIPEONFORK is only supported on private anonymous mappings per madvise(2).
1294                // KEEPONFORK can be specified on ranges that cover other sorts of mappings. It should
1295                // have no effect on mappings that are not private and anonymous as such mappings cannot
1296                // have the WIPEONFORK option set.
1297                if advice == MADV_WIPEONFORK && !mapping.private_anonymous() {
1298                    return error!(EINVAL);
1299                }
1300                let new_flags = match advice {
1301                    MADV_DONTFORK => mapping.flags() | MappingFlags::DONTFORK,
1302                    MADV_DOFORK => mapping.flags() & MappingFlags::DONTFORK.complement(),
1303                    MADV_WIPEONFORK => mapping.flags() | MappingFlags::WIPEONFORK,
1304                    MADV_KEEPONFORK => mapping.flags() & MappingFlags::WIPEONFORK.complement(),
1305                    MADV_DONTDUMP => {
1306                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_DONTDUMP");
1307                        mapping.flags()
1308                    }
1309                    MADV_DODUMP => {
1310                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_DODUMP");
1311                        mapping.flags()
1312                    }
1313                    MADV_MERGEABLE => {
1314                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_MERGEABLE");
1315                        mapping.flags()
1316                    }
1317                    MADV_UNMERGEABLE => {
1318                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_UNMERGEABLE");
1319                        mapping.flags()
1320                    }
1321                    // Only the variants in this match should be reachable given the condition for
1322                    // the containing branch.
1323                    unknown_advice => unreachable!("unknown advice {unknown_advice}"),
1324                };
1325                let mut new_mapping = mapping.clone();
1326                new_mapping.set_flags(new_flags);
1327                updates.push((range_to_zero, new_mapping));
1328            } else {
1329                if mapping.flags().contains(MappingFlags::SHARED) {
1330                    continue;
1331                }
1332                let op = match advice {
1333                    MADV_DONTNEED if !mapping.flags().contains(MappingFlags::ANONYMOUS) => {
1334                        // Note, we cannot simply implemented MADV_DONTNEED with
1335                        // zx::VmoOp::DONT_NEED because they have different
1336                        // semantics.
1337                        track_stub!(
1338                            TODO("https://fxbug.dev/322874496"),
1339                            "MADV_DONTNEED with file-backed mapping"
1340                        );
1341                        return error!(EINVAL);
1342                    }
1343                    MADV_DONTNEED if mapping.flags().contains(MappingFlags::LOCKED) => {
1344                        return error!(EINVAL);
1345                    }
1346                    MADV_DONTNEED => zx::VmoOp::ZERO,
1347                    MADV_DONTNEED_LOCKED => {
1348                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_DONTNEED_LOCKED");
1349                        return error!(EINVAL);
1350                    }
1351                    MADV_WILLNEED => {
1352                        if mapping.flags().contains(MappingFlags::WRITE) {
1353                            zx::VmoOp::COMMIT
1354                        } else {
1355                            zx::VmoOp::PREFETCH
1356                        }
1357                    }
1358                    MADV_COLD => {
1359                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_COLD");
1360                        return error!(EINVAL);
1361                    }
1362                    MADV_PAGEOUT => {
1363                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_PAGEOUT");
1364                        return error!(EINVAL);
1365                    }
1366                    MADV_POPULATE_READ => {
1367                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_POPULATE_READ");
1368                        return error!(EINVAL);
1369                    }
1370                    MADV_RANDOM => {
1371                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_RANDOM");
1372                        return error!(EINVAL);
1373                    }
1374                    MADV_SEQUENTIAL => {
1375                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_SEQUENTIAL");
1376                        return error!(EINVAL);
1377                    }
1378                    MADV_FREE if !mapping.flags().contains(MappingFlags::ANONYMOUS) => {
1379                        track_stub!(
1380                            TODO("https://fxbug.dev/411748419"),
1381                            "MADV_FREE with file-backed mapping"
1382                        );
1383                        return error!(EINVAL);
1384                    }
1385                    MADV_FREE if mapping.flags().contains(MappingFlags::LOCKED) => {
1386                        return error!(EINVAL);
1387                    }
1388                    MADV_FREE => {
1389                        track_stub!(TODO("https://fxbug.dev/411748419"), "MADV_FREE");
1390                        // TODO(https://fxbug.dev/411748419) For now, treat MADV_FREE like
1391                        // MADV_DONTNEED as a stopgap until we have proper support.
1392                        zx::VmoOp::ZERO
1393                    }
1394                    MADV_REMOVE => {
1395                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_REMOVE");
1396                        return error!(EINVAL);
1397                    }
1398                    MADV_HWPOISON => {
1399                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_HWPOISON");
1400                        return error!(EINVAL);
1401                    }
1402                    MADV_SOFT_OFFLINE => {
1403                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_SOFT_OFFLINE");
1404                        return error!(EINVAL);
1405                    }
1406                    MADV_HUGEPAGE => {
1407                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_HUGEPAGE");
1408                        return error!(EINVAL);
1409                    }
1410                    MADV_COLLAPSE => {
1411                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_COLLAPSE");
1412                        return error!(EINVAL);
1413                    }
1414                    MADV_NOHUGEPAGE => return Ok(()),
1415                    advice => {
1416                        track_stub!(TODO("https://fxbug.dev/322874202"), "madvise", advice);
1417                        return error!(EINVAL);
1418                    }
1419                };
1420
1421                let memory = match self.get_mapping_backing(mapping) {
1422                    MappingBacking::Memory(backing) => backing.memory(),
1423                    MappingBacking::PrivateAnonymous => &self.private_anonymous.backing,
1424                };
1425                memory.op_range(op, start_offset, end_offset - start_offset).map_err(
1426                    |s| match s {
1427                        zx::Status::OUT_OF_RANGE => errno!(EINVAL),
1428                        zx::Status::NO_MEMORY => errno!(ENOMEM),
1429                        zx::Status::INVALID_ARGS => errno!(EINVAL),
1430                        zx::Status::ACCESS_DENIED => errno!(EACCES),
1431                        _ => impossible_error(s),
1432                    },
1433                )?;
1434            }
1435        }
1436        // Use a separate loop to avoid mutating the mappings structure while iterating over it.
1437        for (range, mapping) in updates {
1438            released_mappings.extend(self.mappings.insert(range, mapping));
1439        }
1440        Ok(())
1441    }
1442
1443    fn mlock<L>(
1444        &mut self,
1445        current_task: &CurrentTask,
1446        locked: &mut Locked<L>,
1447        desired_addr: UserAddress,
1448        desired_length: usize,
1449        on_fault: bool,
1450        released_mappings: &mut ReleasedMappings,
1451    ) -> Result<(), Errno>
1452    where
1453        L: LockBefore<ThreadGroupLimits>,
1454    {
1455        let desired_end_addr =
1456            desired_addr.checked_add(desired_length).ok_or_else(|| errno!(EINVAL))?;
1457        let start_addr = round_down_to_system_page_size(desired_addr)?;
1458        let end_addr = round_up_to_system_page_size(desired_end_addr)?;
1459
1460        let mut updates = vec![];
1461        let mut bytes_mapped_in_range = 0;
1462        let mut num_new_locked_bytes = 0;
1463        let mut failed_to_lock = false;
1464        for (range, mapping) in self.mappings.range(start_addr..end_addr) {
1465            let mut range = range.clone();
1466            let mut mapping = mapping.clone();
1467
1468            // Handle mappings that start before the region to be locked.
1469            range.start = std::cmp::max(range.start, start_addr);
1470            // Handle mappings that extend past the region to be locked.
1471            range.end = std::cmp::min(range.end, end_addr);
1472
1473            bytes_mapped_in_range += (range.end - range.start) as u64;
1474
1475            // PROT_NONE mappings generate ENOMEM but are left locked.
1476            if !mapping
1477                .flags()
1478                .intersects(MappingFlags::READ | MappingFlags::WRITE | MappingFlags::EXEC)
1479            {
1480                failed_to_lock = true;
1481            }
1482
1483            if !mapping.flags().contains(MappingFlags::LOCKED) {
1484                num_new_locked_bytes += (range.end - range.start) as u64;
1485                let shadow_mapping = match current_task.kernel().features.mlock_pin_flavor {
1486                    // Pin the memory by mapping the backing memory into the high priority vmar.
1487                    MlockPinFlavor::ShadowProcess => {
1488                        // Keep different shadow processes distinct for accounting purposes.
1489                        struct MlockShadowProcess(memory_pinning::ShadowProcess);
1490                        let shadow_process =
1491                            current_task.kernel().expando.get_or_try_init(|| {
1492                                memory_pinning::ShadowProcess::new(zx::Name::new_lossy(
1493                                    "starnix_mlock_pins",
1494                                ))
1495                                .map(MlockShadowProcess)
1496                                .map_err(|_| errno!(EPERM))
1497                            })?;
1498
1499                        let (vmo, offset) = match self.get_mapping_backing(&mapping) {
1500                            MappingBacking::Memory(m) => (
1501                                m.memory().as_vmo().ok_or_else(|| errno!(ENOMEM))?,
1502                                m.address_to_offset(range.start),
1503                            ),
1504                            MappingBacking::PrivateAnonymous => (
1505                                self.private_anonymous
1506                                    .backing
1507                                    .as_vmo()
1508                                    .ok_or_else(|| errno!(ENOMEM))?,
1509                                range.start.ptr() as u64,
1510                            ),
1511                        };
1512                        Some(shadow_process.0.pin_pages(vmo, offset, range.end - range.start)?)
1513                    }
1514
1515                    // Relying on VMAR-level operations means just flags are set per-mapping.
1516                    MlockPinFlavor::Noop | MlockPinFlavor::VmarAlwaysNeed => None,
1517                };
1518                mapping.set_mlock();
1519                updates.push((range, mapping, shadow_mapping));
1520            }
1521        }
1522
1523        if bytes_mapped_in_range as usize != end_addr - start_addr {
1524            return error!(ENOMEM);
1525        }
1526
1527        let memlock_rlimit = current_task.thread_group().get_rlimit(locked, Resource::MEMLOCK);
1528        if self.total_locked_bytes() + num_new_locked_bytes > memlock_rlimit {
1529            if crate::security::check_task_capable(current_task, CAP_IPC_LOCK).is_err() {
1530                let code = if memlock_rlimit > 0 { errno!(ENOMEM) } else { errno!(EPERM) };
1531                return Err(code);
1532            }
1533        }
1534
1535        let op_range_status_to_errno = |e| match e {
1536            zx::Status::BAD_STATE | zx::Status::NOT_SUPPORTED => errno!(ENOMEM),
1537            zx::Status::INVALID_ARGS | zx::Status::OUT_OF_RANGE => errno!(EINVAL),
1538            zx::Status::ACCESS_DENIED => {
1539                unreachable!("user vmar should always have needed rights")
1540            }
1541            zx::Status::BAD_HANDLE => {
1542                unreachable!("user vmar should always be a valid handle")
1543            }
1544            zx::Status::WRONG_TYPE => unreachable!("user vmar handle should be a vmar"),
1545            _ => unreachable!("unknown error from op_range on user vmar for mlock: {e}"),
1546        };
1547
1548        if !on_fault && !current_task.kernel().features.mlock_always_onfault {
1549            self.user_vmar
1550                .op_range(zx::VmarOp::PREFETCH, start_addr.ptr(), end_addr - start_addr)
1551                .map_err(op_range_status_to_errno)?;
1552        }
1553
1554        match current_task.kernel().features.mlock_pin_flavor {
1555            MlockPinFlavor::VmarAlwaysNeed => {
1556                self.user_vmar
1557                    .op_range(zx::VmarOp::ALWAYS_NEED, start_addr.ptr(), end_addr - start_addr)
1558                    .map_err(op_range_status_to_errno)?;
1559            }
1560            // The shadow process doesn't use any vmar-level operations to pin memory.
1561            MlockPinFlavor::Noop | MlockPinFlavor::ShadowProcess => (),
1562        }
1563
1564        for (range, mapping, shadow_mapping) in updates {
1565            if let Some(shadow_mapping) = shadow_mapping {
1566                released_mappings.extend_pins(
1567                    self.shadow_mappings_for_mlock.insert(range.clone(), shadow_mapping),
1568                );
1569            }
1570            released_mappings.extend(self.mappings.insert(range, mapping));
1571        }
1572
1573        if failed_to_lock { error!(ENOMEM) } else { Ok(()) }
1574    }
1575
1576    fn munlock(
1577        &mut self,
1578        _current_task: &CurrentTask,
1579        desired_addr: UserAddress,
1580        desired_length: usize,
1581        released_mappings: &mut ReleasedMappings,
1582    ) -> Result<(), Errno> {
1583        let desired_end_addr =
1584            desired_addr.checked_add(desired_length).ok_or_else(|| errno!(EINVAL))?;
1585        let start_addr = round_down_to_system_page_size(desired_addr)?;
1586        let end_addr = round_up_to_system_page_size(desired_end_addr)?;
1587
1588        let mut updates = vec![];
1589        let mut bytes_mapped_in_range = 0;
1590        for (range, mapping) in self.mappings.range(start_addr..end_addr) {
1591            let mut range = range.clone();
1592            let mut mapping = mapping.clone();
1593
1594            // Handle mappings that start before the region to be locked.
1595            range.start = std::cmp::max(range.start, start_addr);
1596            // Handle mappings that extend past the region to be locked.
1597            range.end = std::cmp::min(range.end, end_addr);
1598
1599            bytes_mapped_in_range += (range.end - range.start) as u64;
1600
1601            if mapping.flags().contains(MappingFlags::LOCKED) {
1602                // This clears the locking for the shadow process pin flavor. It's not currently
1603                // possible to actually unlock pages that were locked with the
1604                // ZX_VMAR_OP_ALWAYS_NEED pin flavor.
1605                mapping.clear_mlock();
1606                updates.push((range, mapping));
1607            }
1608        }
1609
1610        if bytes_mapped_in_range as usize != end_addr - start_addr {
1611            return error!(ENOMEM);
1612        }
1613
1614        for (range, mapping) in updates {
1615            released_mappings.extend(self.mappings.insert(range.clone(), mapping));
1616            released_mappings.extend_pins(self.shadow_mappings_for_mlock.remove(range));
1617        }
1618
1619        Ok(())
1620    }
1621
1622    pub fn total_locked_bytes(&self) -> u64 {
1623        self.num_locked_bytes(
1624            UserAddress::from(self.user_vmar_info.base as u64)
1625                ..UserAddress::from((self.user_vmar_info.base + self.user_vmar_info.len) as u64),
1626        )
1627    }
1628
1629    pub fn num_locked_bytes(&self, range: impl RangeBounds<UserAddress>) -> u64 {
1630        self.mappings
1631            .range(range)
1632            .filter(|(_, mapping)| mapping.flags().contains(MappingFlags::LOCKED))
1633            .map(|(range, _)| (range.end - range.start) as u64)
1634            .sum()
1635    }
1636
1637    fn max_address(&self) -> UserAddress {
1638        UserAddress::from_ptr(self.user_vmar_info.base + self.user_vmar_info.len)
1639    }
1640
1641    fn get_mappings_for_vmsplice(
1642        &self,
1643        mm: &Arc<MemoryManager>,
1644        buffers: &UserBuffers,
1645    ) -> Result<Vec<Arc<VmsplicePayload>>, Errno> {
1646        let mut vmsplice_mappings = Vec::new();
1647
1648        for UserBuffer { mut address, length } in buffers.iter().copied() {
1649            let mappings = self.get_contiguous_mappings_at(address, length)?;
1650            for (mapping, length) in mappings {
1651                let vmsplice_payload = match self.get_mapping_backing(mapping) {
1652                    MappingBacking::Memory(m) => VmsplicePayloadSegment {
1653                        addr_offset: address,
1654                        length,
1655                        memory: m.memory().clone(),
1656                        memory_offset: m.address_to_offset(address),
1657                    },
1658                    MappingBacking::PrivateAnonymous => VmsplicePayloadSegment {
1659                        addr_offset: address,
1660                        length,
1661                        memory: self.private_anonymous.backing.clone(),
1662                        memory_offset: address.ptr() as u64,
1663                    },
1664                };
1665                vmsplice_mappings.push(VmsplicePayload::new(Arc::downgrade(mm), vmsplice_payload));
1666
1667                address = (address + length)?;
1668            }
1669        }
1670
1671        Ok(vmsplice_mappings)
1672    }
1673
1674    /// Returns all the mappings starting at `addr`, and continuing until either `length` bytes have
1675    /// been covered or an unmapped page is reached.
1676    ///
1677    /// Mappings are returned in ascending order along with the number of bytes that intersect the
1678    /// requested range. The returned mappings are guaranteed to be contiguous and the total length
1679    /// corresponds to the number of contiguous mapped bytes starting from `addr`, i.e.:
1680    /// - 0 (empty iterator) if `addr` is not mapped.
1681    /// - exactly `length` if the requested range is fully mapped.
1682    /// - the offset of the first unmapped page (between 0 and `length`) if the requested range is
1683    ///   only partially mapped.
1684    ///
1685    /// Returns EFAULT if the requested range overflows or extends past the end of the vmar.
1686    fn get_contiguous_mappings_at(
1687        &self,
1688        addr: UserAddress,
1689        length: usize,
1690    ) -> Result<impl Iterator<Item = (&Mapping, usize)>, Errno> {
1691        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EFAULT))?;
1692        if end_addr > self.max_address() {
1693            return error!(EFAULT);
1694        }
1695
1696        // Iterate over all contiguous mappings intersecting the requested range.
1697        let mut mappings = self.mappings.range(addr..end_addr);
1698        let mut prev_range_end = None;
1699        let mut offset = 0;
1700        let result = std::iter::from_fn(move || {
1701            if offset != length {
1702                if let Some((range, mapping)) = mappings.next() {
1703                    return match prev_range_end {
1704                        // If this is the first mapping that we are considering, it may not actually
1705                        // contain `addr` at all.
1706                        None if range.start > addr => None,
1707
1708                        // Subsequent mappings may not be contiguous.
1709                        Some(prev_range_end) if range.start != prev_range_end => None,
1710
1711                        // This mapping can be returned.
1712                        _ => {
1713                            let mapping_length = std::cmp::min(length, range.end - addr) - offset;
1714                            offset += mapping_length;
1715                            prev_range_end = Some(range.end);
1716                            Some((mapping, mapping_length))
1717                        }
1718                    };
1719                }
1720            }
1721
1722            None
1723        });
1724
1725        Ok(result)
1726    }
1727
1728    /// Determines whether a fault at the given address could be covered by extending a growsdown
1729    /// mapping.
1730    ///
1731    /// If the address already belongs to a mapping, this function returns `None`. If the next
1732    /// mapping above the given address has the `MappingFlags::GROWSDOWN` flag, this function
1733    /// returns the address at which that mapping starts and the mapping itself. Otherwise, this
1734    /// function returns `None`.
1735    fn find_growsdown_mapping(&self, addr: UserAddress) -> Option<(UserAddress, &Mapping)> {
1736        match self.mappings.range(addr..).next() {
1737            Some((range, mapping)) => {
1738                if range.contains(&addr) {
1739                    // |addr| is already contained within a mapping, nothing to grow.
1740                    return None;
1741                } else if !mapping.flags().contains(MappingFlags::GROWSDOWN) {
1742                    // The next mapping above the given address does not have the
1743                    // `MappingFlags::GROWSDOWN` flag.
1744                    None
1745                } else {
1746                    Some((range.start, mapping))
1747                }
1748            }
1749            None => None,
1750        }
1751    }
1752
1753    /// Determines if an access at a given address could be covered by extending a growsdown mapping
1754    /// and extends it if possible. Returns true if the given address is covered by a mapping.
1755    fn extend_growsdown_mapping_to_address(
1756        &mut self,
1757        mm: &Arc<MemoryManager>,
1758        addr: UserAddress,
1759        is_write: bool,
1760    ) -> Result<bool, Error> {
1761        let Some((mapping_low_addr, mapping_to_grow)) = self.find_growsdown_mapping(addr) else {
1762            return Ok(false);
1763        };
1764        if is_write && !mapping_to_grow.can_write() {
1765            // Don't grow a read-only GROWSDOWN mapping for a write fault, it won't work.
1766            return Ok(false);
1767        }
1768        if !mapping_to_grow.flags().contains(MappingFlags::ANONYMOUS) {
1769            // Currently, we only grow anonymous mappings.
1770            return Ok(false);
1771        }
1772        let low_addr = (addr - (addr.ptr() as u64 % *PAGE_SIZE))?;
1773        let high_addr = mapping_low_addr;
1774
1775        let length = high_addr
1776            .ptr()
1777            .checked_sub(low_addr.ptr())
1778            .ok_or_else(|| anyhow!("Invalid growth range"))?;
1779
1780        let mut released_mappings = ReleasedMappings::default();
1781        self.map_anonymous(
1782            mm,
1783            DesiredAddress::FixedOverwrite(low_addr),
1784            length,
1785            mapping_to_grow.flags().access_flags(),
1786            mapping_to_grow.flags().options(),
1787            mapping_to_grow.name(),
1788            &mut released_mappings,
1789        )?;
1790        // We can't have any released mappings because `find_growsdown_mapping` will return None if
1791        // the mapping already exists in this range.
1792        assert!(
1793            released_mappings.is_empty(),
1794            "expected to not remove mappings by inserting, got {released_mappings:#?}"
1795        );
1796        Ok(true)
1797    }
1798
1799    /// Reads exactly `bytes.len()` bytes of memory.
1800    ///
1801    /// # Parameters
1802    /// - `addr`: The address to read data from.
1803    /// - `bytes`: The byte array to read into.
1804    fn read_memory<'a>(
1805        &self,
1806        addr: UserAddress,
1807        bytes: &'a mut [MaybeUninit<u8>],
1808    ) -> Result<&'a mut [u8], Errno> {
1809        let mut bytes_read = 0;
1810        for (mapping, len) in self.get_contiguous_mappings_at(addr, bytes.len())? {
1811            let next_offset = bytes_read + len;
1812            self.read_mapping_memory(
1813                (addr + bytes_read)?,
1814                mapping,
1815                &mut bytes[bytes_read..next_offset],
1816            )?;
1817            bytes_read = next_offset;
1818        }
1819
1820        if bytes_read != bytes.len() {
1821            error!(EFAULT)
1822        } else {
1823            // SAFETY: The created slice is properly aligned/sized since it
1824            // is a subset of the `bytes` slice. Note that `MaybeUninit<T>` has
1825            // the same layout as `T`. Also note that `bytes_read` bytes have
1826            // been properly initialized.
1827            let bytes = unsafe {
1828                std::slice::from_raw_parts_mut(bytes.as_mut_ptr() as *mut u8, bytes_read)
1829            };
1830            Ok(bytes)
1831        }
1832    }
1833
1834    /// Reads exactly `bytes.len()` bytes of memory from `addr`.
1835    ///
1836    /// # Parameters
1837    /// - `addr`: The address to read data from.
1838    /// - `bytes`: The byte array to read into.
1839    fn read_mapping_memory<'a>(
1840        &self,
1841        addr: UserAddress,
1842        mapping: &Mapping,
1843        bytes: &'a mut [MaybeUninit<u8>],
1844    ) -> Result<&'a mut [u8], Errno> {
1845        if !mapping.can_read() {
1846            return error!(EFAULT, "read_mapping_memory called on unreadable mapping");
1847        }
1848        match self.get_mapping_backing(mapping) {
1849            MappingBacking::Memory(backing) => backing.read_memory(addr, bytes),
1850            MappingBacking::PrivateAnonymous => self.private_anonymous.read_memory(addr, bytes),
1851        }
1852    }
1853
1854    /// Reads bytes starting at `addr`, continuing until either `bytes.len()` bytes have been read
1855    /// or no more bytes can be read.
1856    ///
1857    /// This is used, for example, to read null-terminated strings where the exact length is not
1858    /// known, only the maximum length is.
1859    ///
1860    /// # Parameters
1861    /// - `addr`: The address to read data from.
1862    /// - `bytes`: The byte array to read into.
1863    fn read_memory_partial<'a>(
1864        &self,
1865        addr: UserAddress,
1866        bytes: &'a mut [MaybeUninit<u8>],
1867    ) -> Result<&'a mut [u8], Errno> {
1868        let mut bytes_read = 0;
1869        for (mapping, len) in self.get_contiguous_mappings_at(addr, bytes.len())? {
1870            let next_offset = bytes_read + len;
1871            if self
1872                .read_mapping_memory(
1873                    (addr + bytes_read)?,
1874                    mapping,
1875                    &mut bytes[bytes_read..next_offset],
1876                )
1877                .is_err()
1878            {
1879                break;
1880            }
1881            bytes_read = next_offset;
1882        }
1883
1884        // If at least one byte was requested but we got none, it means that `addr` was invalid.
1885        if !bytes.is_empty() && bytes_read == 0 {
1886            error!(EFAULT)
1887        } else {
1888            // SAFETY: The created slice is properly aligned/sized since it
1889            // is a subset of the `bytes` slice. Note that `MaybeUninit<T>` has
1890            // the same layout as `T`. Also note that `bytes_read` bytes have
1891            // been properly initialized.
1892            let bytes = unsafe {
1893                std::slice::from_raw_parts_mut(bytes.as_mut_ptr() as *mut u8, bytes_read)
1894            };
1895            Ok(bytes)
1896        }
1897    }
1898
1899    /// Like `read_memory_partial` but only returns the bytes up to and including
1900    /// a null (zero) byte.
1901    fn read_memory_partial_until_null_byte<'a>(
1902        &self,
1903        addr: UserAddress,
1904        bytes: &'a mut [MaybeUninit<u8>],
1905    ) -> Result<&'a mut [u8], Errno> {
1906        let read_bytes = self.read_memory_partial(addr, bytes)?;
1907        let max_len = memchr::memchr(b'\0', read_bytes)
1908            .map_or_else(|| read_bytes.len(), |null_index| null_index + 1);
1909        Ok(&mut read_bytes[..max_len])
1910    }
1911
1912    /// Writes the provided bytes.
1913    ///
1914    /// In case of success, the number of bytes written will always be `bytes.len()`.
1915    ///
1916    /// # Parameters
1917    /// - `addr`: The address to write to.
1918    /// - `bytes`: The bytes to write.
1919    fn write_memory(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
1920        let mut bytes_written = 0;
1921        for (mapping, len) in self.get_contiguous_mappings_at(addr, bytes.len())? {
1922            let next_offset = bytes_written + len;
1923            self.write_mapping_memory(
1924                (addr + bytes_written)?,
1925                mapping,
1926                &bytes[bytes_written..next_offset],
1927            )?;
1928            bytes_written = next_offset;
1929        }
1930
1931        if bytes_written != bytes.len() { error!(EFAULT) } else { Ok(bytes.len()) }
1932    }
1933
1934    /// Writes the provided bytes to `addr`.
1935    ///
1936    /// # Parameters
1937    /// - `addr`: The address to write to.
1938    /// - `bytes`: The bytes to write to the memory object.
1939    fn write_mapping_memory(
1940        &self,
1941        addr: UserAddress,
1942        mapping: &Mapping,
1943        bytes: &[u8],
1944    ) -> Result<(), Errno> {
1945        if !mapping.can_write() {
1946            return error!(EFAULT, "write_mapping_memory called on unwritable memory");
1947        }
1948        match self.get_mapping_backing(mapping) {
1949            MappingBacking::Memory(backing) => backing.write_memory(addr, bytes),
1950            MappingBacking::PrivateAnonymous => self.private_anonymous.write_memory(addr, bytes),
1951        }
1952    }
1953
1954    /// Writes bytes starting at `addr`, continuing until either `bytes.len()` bytes have been
1955    /// written or no more bytes can be written.
1956    ///
1957    /// # Parameters
1958    /// - `addr`: The address to read data from.
1959    /// - `bytes`: The byte array to write from.
1960    fn write_memory_partial(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
1961        let mut bytes_written = 0;
1962        for (mapping, len) in self.get_contiguous_mappings_at(addr, bytes.len())? {
1963            let next_offset = bytes_written + len;
1964            if self
1965                .write_mapping_memory(
1966                    (addr + bytes_written)?,
1967                    mapping,
1968                    &bytes[bytes_written..next_offset],
1969                )
1970                .is_err()
1971            {
1972                break;
1973            }
1974            bytes_written = next_offset;
1975        }
1976
1977        if !bytes.is_empty() && bytes_written == 0 { error!(EFAULT) } else { Ok(bytes.len()) }
1978    }
1979
1980    fn zero(&self, addr: UserAddress, length: usize) -> Result<usize, Errno> {
1981        let mut bytes_written = 0;
1982        for (mapping, len) in self.get_contiguous_mappings_at(addr, length)? {
1983            let next_offset = bytes_written + len;
1984            if self.zero_mapping((addr + bytes_written)?, mapping, len).is_err() {
1985                break;
1986            }
1987            bytes_written = next_offset;
1988        }
1989
1990        if length != bytes_written { error!(EFAULT) } else { Ok(length) }
1991    }
1992
1993    fn zero_mapping(
1994        &self,
1995        addr: UserAddress,
1996        mapping: &Mapping,
1997        length: usize,
1998    ) -> Result<usize, Errno> {
1999        if !mapping.can_write() {
2000            return error!(EFAULT);
2001        }
2002
2003        match self.get_mapping_backing(mapping) {
2004            MappingBacking::Memory(backing) => backing.zero(addr, length),
2005            MappingBacking::PrivateAnonymous => self.private_anonymous.zero(addr, length),
2006        }
2007    }
2008
2009    pub fn create_memory_backing(
2010        &self,
2011        base: UserAddress,
2012        memory: Arc<MemoryObject>,
2013        memory_offset: u64,
2014    ) -> MappingBacking {
2015        MappingBacking::Memory(Box::new(MappingBackingMemory::new(base, memory, memory_offset)))
2016    }
2017
2018    pub fn get_mapping_backing<'a>(&self, mapping: &'a Mapping) -> &'a MappingBacking {
2019        mapping.get_backing_internal()
2020    }
2021
2022    fn get_aio_context(&self, addr: UserAddress) -> Option<(Range<UserAddress>, Arc<AioContext>)> {
2023        let Some((range, mapping)) = self.mappings.get(addr) else {
2024            return None;
2025        };
2026        let MappingName::AioContext(ref aio_context) = mapping.name() else {
2027            return None;
2028        };
2029        if !mapping.can_read() {
2030            return None;
2031        }
2032        Some((range.clone(), aio_context.clone()))
2033    }
2034
2035    fn find_uffd<L>(&self, locked: &mut Locked<L>, addr: UserAddress) -> Option<Arc<UserFault>>
2036    where
2037        L: LockBefore<UserFaultInner>,
2038    {
2039        for userfault in self.userfaultfds.iter() {
2040            if let Some(userfault) = userfault.upgrade() {
2041                if userfault.contains_addr(locked, addr) {
2042                    return Some(userfault);
2043                }
2044            }
2045        }
2046        None
2047    }
2048
2049    pub fn mrelease(&self) -> Result<(), Errno> {
2050        self.private_anonymous
2051            .zero(UserAddress::from_ptr(self.user_vmar_info.base), self.user_vmar_info.len)?;
2052        return Ok(());
2053    }
2054
2055    fn cache_flush(&self, range: Range<UserAddress>) -> Result<(), Errno> {
2056        let mut addr = range.start;
2057        let size = range.end - range.start;
2058        for (mapping, len) in self.get_contiguous_mappings_at(addr, size)? {
2059            if !mapping.can_read() {
2060                return error!(EFAULT);
2061            }
2062            // SAFETY: This is operating on a readable restricted mode mapping and will not fault.
2063            zx::Status::ok(unsafe {
2064                zx::sys::zx_cache_flush(
2065                    addr.ptr() as *const u8,
2066                    len,
2067                    zx::sys::ZX_CACHE_FLUSH_DATA | zx::sys::ZX_CACHE_FLUSH_INSN,
2068                )
2069            })
2070            .map_err(impossible_error)?;
2071
2072            addr = (addr + len).unwrap(); // unwrap since we're iterating within the address space.
2073        }
2074        // Did we flush the entire range?
2075        if addr != range.end { error!(EFAULT) } else { Ok(()) }
2076    }
2077
2078    // Returns details of mappings in the `user_vmar`, or an empty vector if the `user_vmar` has
2079    // been destroyed.
2080    fn with_zx_mappings<R>(
2081        &self,
2082        current_task: &CurrentTask,
2083        op: impl FnOnce(&[zx::MapInfo]) -> R,
2084    ) -> R {
2085        if self.user_vmar.is_invalid_handle() {
2086            return op(&[]);
2087        };
2088
2089        MapInfoCache::get_or_init(current_task)
2090            .expect("must be able to retrieve map info cache")
2091            .with_map_infos(&self.user_vmar, |infos| {
2092                // No other https://fuchsia.dev/reference/syscalls/object_get_info?hl=en#errors
2093                // are possible, because we created the VMAR and the `zx` crate ensures that the
2094                // info query is well-formed.
2095                op(infos.expect("must be able to query mappings for private user VMAR"))
2096            })
2097    }
2098
2099    /// Register the address space managed by this memory manager for interest in
2100    /// receiving private expedited memory barriers of the given kind.
2101    pub fn register_membarrier_private_expedited(
2102        &mut self,
2103        mtype: MembarrierType,
2104    ) -> Result<(), Errno> {
2105        let registrations = &mut self.forkable_state.membarrier_registrations;
2106        match mtype {
2107            MembarrierType::Memory => {
2108                registrations.memory = true;
2109            }
2110            MembarrierType::SyncCore => {
2111                registrations.sync_core = true;
2112            }
2113        }
2114        Ok(())
2115    }
2116
2117    /// Checks if the address space managed by this memory manager is registered
2118    /// for interest in private expedited barriers of the given kind.
2119    pub fn membarrier_private_expedited_registered(&self, mtype: MembarrierType) -> bool {
2120        let registrations = &self.forkable_state.membarrier_registrations;
2121        match mtype {
2122            MembarrierType::Memory => registrations.memory,
2123            MembarrierType::SyncCore => registrations.sync_core,
2124        }
2125    }
2126
2127    fn force_write_memory(
2128        &mut self,
2129        addr: UserAddress,
2130        bytes: &[u8],
2131        released_mappings: &mut ReleasedMappings,
2132    ) -> Result<(), Errno> {
2133        let (range, mapping) = self.mappings.get(addr).ok_or_else(|| errno!(EFAULT))?;
2134        if range.end < addr.saturating_add(bytes.len()) {
2135            track_stub!(
2136                TODO("https://fxbug.dev/445790710"),
2137                "ptrace poke across multiple mappings"
2138            );
2139            return error!(EFAULT);
2140        }
2141
2142        // Don't create CoW copy of shared memory, go through regular syscall writing.
2143        if mapping.flags().contains(MappingFlags::SHARED) {
2144            if !mapping.can_write() {
2145                // Linux returns EIO here instead of EFAULT.
2146                return error!(EIO);
2147            }
2148            return self.write_mapping_memory(addr, mapping, &bytes);
2149        }
2150
2151        let backing = match self.get_mapping_backing(mapping) {
2152            MappingBacking::PrivateAnonymous => {
2153                // Starnix has a writable handle to private anonymous memory.
2154                return self.private_anonymous.write_memory(addr, &bytes);
2155            }
2156            MappingBacking::Memory(backing) => backing,
2157        };
2158
2159        let vmo = backing.memory().as_vmo().ok_or_else(|| errno!(EFAULT))?;
2160        let addr_offset = backing.address_to_offset(addr);
2161        let can_exec =
2162            vmo.basic_info().expect("get VMO handle info").rights.contains(Rights::EXECUTE);
2163
2164        // Attempt to write to existing VMO
2165        match vmo.write(&bytes, addr_offset) {
2166            Ok(()) => {
2167                if can_exec {
2168                    // Issue a barrier to avoid executing stale instructions.
2169                    system_barrier(BarrierType::InstructionStream);
2170                }
2171                return Ok(());
2172            }
2173
2174            Err(zx::Status::ACCESS_DENIED) => { /* Fall through */ }
2175
2176            Err(status) => {
2177                return Err(MemoryManager::get_errno_for_vmo_err(status));
2178            }
2179        }
2180
2181        // Create a CoW child of the entire VMO and swap with the backing.
2182        let mapping_offset = backing.address_to_offset(range.start);
2183        let len = range.end - range.start;
2184
2185        // 1. Obtain a writable child of the VMO.
2186        let size = vmo.get_size().map_err(MemoryManager::get_errno_for_vmo_err)?;
2187        let child_vmo = vmo
2188            .create_child(VmoChildOptions::SNAPSHOT_AT_LEAST_ON_WRITE, 0, size)
2189            .map_err(MemoryManager::get_errno_for_vmo_err)?;
2190
2191        // 2. Modify the memory.
2192        child_vmo.write(&bytes, addr_offset).map_err(MemoryManager::get_errno_for_vmo_err)?;
2193
2194        // 3. If needed, remint the VMO as executable. Zircon flushes instruction caches when
2195        // mapping executable memory below, so a barrier isn't necessary here.
2196        let child_vmo = if can_exec {
2197            child_vmo
2198                .replace_as_executable(&VMEX_RESOURCE)
2199                .map_err(MemoryManager::get_errno_for_vmo_err)?
2200        } else {
2201            child_vmo
2202        };
2203
2204        // 4. Map the new VMO into user VMAR
2205        let memory = Arc::new(MemoryObject::from(child_vmo));
2206        let mapped_addr = self.map_in_user_vmar(
2207            SelectedAddress::FixedOverwrite(range.start),
2208            &memory,
2209            mapping_offset,
2210            len,
2211            mapping.flags(),
2212            false,
2213        )?;
2214        assert_eq!(mapped_addr, range.start);
2215
2216        // 5. Update mappings
2217        let new_backing = MappingBackingMemory::new(range.start, memory, mapping_offset);
2218
2219        let mut new_mapping = mapping.clone();
2220        new_mapping.set_backing_internal(MappingBacking::Memory(Box::new(new_backing)));
2221
2222        let range = range.clone();
2223        released_mappings.extend(self.mappings.insert(range, new_mapping));
2224
2225        Ok(())
2226    }
2227
2228    fn set_brk<L>(
2229        &mut self,
2230        locked: &mut Locked<L>,
2231        current_task: &CurrentTask,
2232        mm: &Arc<MemoryManager>,
2233        addr: UserAddress,
2234        released_mappings: &mut ReleasedMappings,
2235    ) -> Result<UserAddress, Errno>
2236    where
2237        L: LockBefore<ThreadGroupLimits>,
2238    {
2239        let rlimit_data = std::cmp::min(
2240            PROGRAM_BREAK_LIMIT,
2241            current_task.thread_group().get_rlimit(locked, Resource::DATA),
2242        );
2243
2244        let brk = match self.brk.clone() {
2245            None => {
2246                let brk = ProgramBreak { base: self.brk_origin, current: self.brk_origin };
2247                self.brk = Some(brk.clone());
2248                brk
2249            }
2250            Some(brk) => brk,
2251        };
2252
2253        let Ok(last_address) = brk.base + rlimit_data else {
2254            // The requested program break is out-of-range. We're supposed to simply
2255            // return the current program break.
2256            return Ok(brk.current);
2257        };
2258
2259        if addr < brk.base || addr > last_address {
2260            // The requested program break is out-of-range. We're supposed to simply
2261            // return the current program break.
2262            return Ok(brk.current);
2263        }
2264
2265        let old_end = brk.current.round_up(*PAGE_SIZE).unwrap();
2266        let new_end = addr.round_up(*PAGE_SIZE).unwrap();
2267
2268        match new_end.cmp(&old_end) {
2269            std::cmp::Ordering::Less => {
2270                // Shrinking the program break removes any mapped pages in the
2271                // affected range, regardless of whether they were actually program
2272                // break pages, or other mappings.
2273                let delta = old_end - new_end;
2274
2275                if self.unmap(mm, new_end, delta, released_mappings).is_err() {
2276                    return Ok(brk.current);
2277                }
2278            }
2279            std::cmp::Ordering::Greater => {
2280                let range = old_end..new_end;
2281                let delta = new_end - old_end;
2282
2283                // Check for mappings over the program break region.
2284                if self.mappings.range(range).next().is_some() {
2285                    return Ok(brk.current);
2286                }
2287
2288                if self
2289                    .map_anonymous(
2290                        mm,
2291                        DesiredAddress::FixedOverwrite(old_end),
2292                        delta,
2293                        ProtectionFlags::READ | ProtectionFlags::WRITE,
2294                        MappingOptions::ANONYMOUS,
2295                        MappingName::Heap,
2296                        released_mappings,
2297                    )
2298                    .is_err()
2299                {
2300                    return Ok(brk.current);
2301                }
2302            }
2303            _ => {}
2304        };
2305
2306        // Any required updates to the program break succeeded, so update internal state.
2307        let mut new_brk = brk;
2308        new_brk.current = addr;
2309        self.brk = Some(new_brk);
2310
2311        Ok(addr)
2312    }
2313
2314    fn register_with_uffd<L>(
2315        &mut self,
2316        locked: &mut Locked<L>,
2317        addr: UserAddress,
2318        length: usize,
2319        userfault: &Arc<UserFault>,
2320        mode: FaultRegisterMode,
2321        released_mappings: &mut ReleasedMappings,
2322    ) -> Result<(), Errno>
2323    where
2324        L: LockBefore<UserFaultInner>,
2325    {
2326        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
2327        let range_for_op = addr..end_addr;
2328        let mut updates = vec![];
2329
2330        for (range, mapping) in self.mappings.range(range_for_op.clone()) {
2331            if !mapping.private_anonymous() {
2332                track_stub!(TODO("https://fxbug.dev/391599171"), "uffd for shmem and hugetlbfs");
2333                return error!(EINVAL);
2334            }
2335            if mapping.flags().contains(MappingFlags::UFFD) {
2336                return error!(EBUSY);
2337            }
2338            let range = range.intersect(&range_for_op);
2339            let mut mapping = mapping.clone();
2340            mapping.set_uffd(mode);
2341            updates.push((range, mapping));
2342        }
2343        if updates.is_empty() {
2344            return error!(EINVAL);
2345        }
2346
2347        self.protect_vmar_range(addr, length, ProtectionFlags::empty())
2348            .expect("Failed to remove protections on uffd-registered range");
2349
2350        // Use a separate loop to avoid mutating the mappings structure while iterating over it.
2351        for (range, mapping) in updates {
2352            released_mappings.extend(self.mappings.insert(range, mapping));
2353        }
2354
2355        userfault.insert_pages(locked, range_for_op, false);
2356
2357        Ok(())
2358    }
2359
2360    fn unregister_range_from_uffd<L>(
2361        &mut self,
2362        locked: &mut Locked<L>,
2363        userfault: &Arc<UserFault>,
2364        addr: UserAddress,
2365        length: usize,
2366        released_mappings: &mut ReleasedMappings,
2367    ) -> Result<(), Errno>
2368    where
2369        L: LockBefore<UserFaultInner>,
2370    {
2371        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
2372        let range_for_op = addr..end_addr;
2373        let mut updates = vec![];
2374
2375        for (range, mapping) in self.mappings.range(range_for_op.clone()) {
2376            if !mapping.private_anonymous() {
2377                track_stub!(TODO("https://fxbug.dev/391599171"), "uffd for shmem and hugetlbfs");
2378                return error!(EINVAL);
2379            }
2380            if mapping.flags().contains(MappingFlags::UFFD) {
2381                let range = range.intersect(&range_for_op);
2382                if userfault.remove_pages(locked, range.clone()) {
2383                    let mut mapping = mapping.clone();
2384                    mapping.clear_uffd();
2385                    updates.push((range, mapping));
2386                }
2387            }
2388        }
2389        for (range, mapping) in updates {
2390            let length = range.end - range.start;
2391            let restored_flags = mapping.flags().access_flags();
2392
2393            released_mappings.extend(self.mappings.insert(range.clone(), mapping));
2394
2395            self.protect_vmar_range(range.start, length, restored_flags)
2396                .expect("Failed to restore original protection bits on uffd-registered range");
2397        }
2398        Ok(())
2399    }
2400
2401    fn unregister_uffd<L>(
2402        &mut self,
2403        locked: &mut Locked<L>,
2404        userfault: &Arc<UserFault>,
2405        released_mappings: &mut ReleasedMappings,
2406    ) where
2407        L: LockBefore<UserFaultInner>,
2408    {
2409        let mut updates = vec![];
2410
2411        for (range, mapping) in self.mappings.iter() {
2412            if mapping.flags().contains(MappingFlags::UFFD) {
2413                for range in userfault.get_registered_pages_overlapping_range(locked, range.clone())
2414                {
2415                    let mut mapping = mapping.clone();
2416                    mapping.clear_uffd();
2417                    updates.push((range.clone(), mapping));
2418                }
2419            }
2420        }
2421        // Use a separate loop to avoid mutating the mappings structure while iterating over it.
2422        for (range, mapping) in updates {
2423            let length = range.end - range.start;
2424            let restored_flags = mapping.flags().access_flags();
2425            released_mappings.extend(self.mappings.insert(range.clone(), mapping));
2426            // We can't recover from an error here as this is run during the cleanup.
2427            self.protect_vmar_range(range.start, length, restored_flags)
2428                .expect("Failed to restore original protection bits on uffd-registered range");
2429        }
2430
2431        userfault.remove_pages(
2432            locked,
2433            UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
2434                ..UserAddress::from_ptr(RESTRICTED_ASPACE_HIGHEST_ADDRESS),
2435        );
2436
2437        let weak_userfault = Arc::downgrade(userfault);
2438        self.userfaultfds.retain(|uf| !Weak::ptr_eq(uf, &weak_userfault));
2439    }
2440
2441    fn set_mapping_name(
2442        &mut self,
2443        addr: UserAddress,
2444        length: usize,
2445        name: Option<FsString>,
2446        released_mappings: &mut ReleasedMappings,
2447    ) -> Result<(), Errno> {
2448        if addr.ptr() % *PAGE_SIZE as usize != 0 {
2449            return error!(EINVAL);
2450        }
2451        let end = match addr.checked_add(length) {
2452            Some(addr) => addr.round_up(*PAGE_SIZE).map_err(|_| errno!(ENOMEM))?,
2453            None => return error!(EINVAL),
2454        };
2455
2456        let mappings_in_range =
2457            self.mappings.range(addr..end).map(|(r, m)| (r.clone(), m.clone())).collect::<Vec<_>>();
2458
2459        if mappings_in_range.is_empty() {
2460            return error!(EINVAL);
2461        }
2462        if !mappings_in_range.first().unwrap().0.contains(&addr) {
2463            return error!(ENOMEM);
2464        }
2465
2466        let mut last_range_end = None;
2467        // There's no get_mut on RangeMap, because it would be hard to implement correctly in
2468        // combination with merging of adjacent mappings. Instead, make a copy, change the copy,
2469        // and insert the copy.
2470        for (mut range, mut mapping) in mappings_in_range {
2471            if let MappingName::File(_) = mapping.name() {
2472                // It's invalid to assign a name to a file-backed mapping.
2473                return error!(EBADF);
2474            }
2475            // Handle mappings that start before the region to be named.
2476            range.start = std::cmp::max(range.start, addr);
2477            // Handle mappings that extend past the region to be named.
2478            range.end = std::cmp::min(range.end, end);
2479
2480            if let Some(last_range_end) = last_range_end {
2481                if last_range_end != range.start {
2482                    // The name must apply to a contiguous range of mapped pages.
2483                    return error!(ENOMEM);
2484                }
2485            }
2486            last_range_end = Some(range.end.round_up(*PAGE_SIZE)?);
2487            // TODO(b/310255065): We have no place to store names in a way visible to programs outside of Starnix
2488            // such as memory analysis tools.
2489            if let MappingBacking::Memory(backing) = self.get_mapping_backing(&mapping) {
2490                match &name {
2491                    Some(memory_name) => {
2492                        backing.memory().set_zx_name(memory_name);
2493                    }
2494                    None => {
2495                        backing.memory().set_zx_name(b"");
2496                    }
2497                }
2498            }
2499            mapping.set_name(match &name {
2500                Some(name) => MappingName::Vma(FlyByteStr::new(name.as_bytes())),
2501                None => MappingName::None,
2502            });
2503            released_mappings.extend(self.mappings.insert(range, mapping));
2504        }
2505        if let Some(last_range_end) = last_range_end {
2506            if last_range_end < end {
2507                // The name must apply to a contiguous range of mapped pages.
2508                return error!(ENOMEM);
2509            }
2510        }
2511        Ok(())
2512    }
2513}
2514
2515fn create_user_vmar(vmar: &zx::Vmar, vmar_info: &zx::VmarInfo) -> Result<zx::Vmar, zx::Status> {
2516    let (vmar, ptr) = vmar.allocate(
2517        0,
2518        vmar_info.len,
2519        zx::VmarFlags::SPECIFIC
2520            | zx::VmarFlags::CAN_MAP_SPECIFIC
2521            | zx::VmarFlags::CAN_MAP_READ
2522            | zx::VmarFlags::CAN_MAP_WRITE
2523            | zx::VmarFlags::CAN_MAP_EXECUTE,
2524    )?;
2525    assert_eq!(ptr, vmar_info.base);
2526    Ok(vmar)
2527}
2528
2529/// A memory manager for another thread.
2530///
2531/// When accessing memory through this object, we use less efficient codepaths that work across
2532/// address spaces.
2533pub struct RemoteMemoryManager {
2534    mm: Arc<MemoryManager>,
2535}
2536
2537impl RemoteMemoryManager {
2538    fn new(mm: Arc<MemoryManager>) -> Self {
2539        Self { mm }
2540    }
2541}
2542
2543// If we just have a MemoryManager, we cannot assume that its address space is current, which means
2544// we need to use the slower "syscall" mechanism to access its memory.
2545impl MemoryAccessor for RemoteMemoryManager {
2546    fn read_memory<'a>(
2547        &self,
2548        addr: UserAddress,
2549        bytes: &'a mut [MaybeUninit<u8>],
2550    ) -> Result<&'a mut [u8], Errno> {
2551        self.mm.syscall_read_memory(addr, bytes)
2552    }
2553
2554    fn read_memory_partial_until_null_byte<'a>(
2555        &self,
2556        addr: UserAddress,
2557        bytes: &'a mut [MaybeUninit<u8>],
2558    ) -> Result<&'a mut [u8], Errno> {
2559        self.mm.syscall_read_memory_partial_until_null_byte(addr, bytes)
2560    }
2561
2562    fn read_memory_partial<'a>(
2563        &self,
2564        addr: UserAddress,
2565        bytes: &'a mut [MaybeUninit<u8>],
2566    ) -> Result<&'a mut [u8], Errno> {
2567        self.mm.syscall_read_memory_partial(addr, bytes)
2568    }
2569
2570    fn write_memory(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
2571        self.mm.syscall_write_memory(addr, bytes)
2572    }
2573
2574    fn write_memory_partial(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
2575        self.mm.syscall_write_memory_partial(addr, bytes)
2576    }
2577
2578    fn zero(&self, addr: UserAddress, length: usize) -> Result<usize, Errno> {
2579        self.mm.syscall_zero(addr, length)
2580    }
2581}
2582
2583impl TaskMemoryAccessor for RemoteMemoryManager {
2584    fn maximum_valid_address(&self) -> Option<UserAddress> {
2585        Some(self.mm.maximum_valid_user_address)
2586    }
2587}
2588
2589impl MemoryManager {
2590    pub fn summarize(&self, summary: &mut crate::mm::MappingSummary) {
2591        let state = self.state.read();
2592        for (_, mapping) in state.mappings.iter() {
2593            summary.add(&state, mapping);
2594        }
2595    }
2596
2597    pub fn get_mappings_for_vmsplice(
2598        self: &Arc<MemoryManager>,
2599        buffers: &UserBuffers,
2600    ) -> Result<Vec<Arc<VmsplicePayload>>, Errno> {
2601        self.state.read().get_mappings_for_vmsplice(self, buffers)
2602    }
2603
2604    pub fn has_same_address_space(&self, other: &Self) -> bool {
2605        self.root_vmar == other.root_vmar
2606    }
2607
2608    pub fn unified_read_memory<'a>(
2609        &self,
2610        current_task: &CurrentTask,
2611        addr: UserAddress,
2612        bytes: &'a mut [MaybeUninit<u8>],
2613    ) -> Result<&'a mut [u8], Errno> {
2614        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2615
2616        if let Some(usercopy) = usercopy() {
2617            let (read_bytes, unread_bytes) = usercopy.copyin(addr.ptr(), bytes);
2618            if unread_bytes.is_empty() { Ok(read_bytes) } else { error!(EFAULT) }
2619        } else {
2620            self.syscall_read_memory(addr, bytes)
2621        }
2622    }
2623
2624    pub fn syscall_read_memory<'a>(
2625        &self,
2626        addr: UserAddress,
2627        bytes: &'a mut [MaybeUninit<u8>],
2628    ) -> Result<&'a mut [u8], Errno> {
2629        self.state.read().read_memory(addr, bytes)
2630    }
2631
2632    pub fn unified_read_memory_partial_until_null_byte<'a>(
2633        &self,
2634        current_task: &CurrentTask,
2635        addr: UserAddress,
2636        bytes: &'a mut [MaybeUninit<u8>],
2637    ) -> Result<&'a mut [u8], Errno> {
2638        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2639
2640        if let Some(usercopy) = usercopy() {
2641            let (read_bytes, unread_bytes) = usercopy.copyin_until_null_byte(addr.ptr(), bytes);
2642            if read_bytes.is_empty() && !unread_bytes.is_empty() {
2643                error!(EFAULT)
2644            } else {
2645                Ok(read_bytes)
2646            }
2647        } else {
2648            self.syscall_read_memory_partial_until_null_byte(addr, bytes)
2649        }
2650    }
2651
2652    pub fn syscall_read_memory_partial_until_null_byte<'a>(
2653        &self,
2654        addr: UserAddress,
2655        bytes: &'a mut [MaybeUninit<u8>],
2656    ) -> Result<&'a mut [u8], Errno> {
2657        self.state.read().read_memory_partial_until_null_byte(addr, bytes)
2658    }
2659
2660    pub fn unified_read_memory_partial<'a>(
2661        &self,
2662        current_task: &CurrentTask,
2663        addr: UserAddress,
2664        bytes: &'a mut [MaybeUninit<u8>],
2665    ) -> Result<&'a mut [u8], Errno> {
2666        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2667
2668        if let Some(usercopy) = usercopy() {
2669            let (read_bytes, unread_bytes) = usercopy.copyin(addr.ptr(), bytes);
2670            if read_bytes.is_empty() && !unread_bytes.is_empty() {
2671                error!(EFAULT)
2672            } else {
2673                Ok(read_bytes)
2674            }
2675        } else {
2676            self.syscall_read_memory_partial(addr, bytes)
2677        }
2678    }
2679
2680    pub fn syscall_read_memory_partial<'a>(
2681        &self,
2682        addr: UserAddress,
2683        bytes: &'a mut [MaybeUninit<u8>],
2684    ) -> Result<&'a mut [u8], Errno> {
2685        self.state.read().read_memory_partial(addr, bytes)
2686    }
2687
2688    pub fn unified_write_memory(
2689        &self,
2690        current_task: &CurrentTask,
2691        addr: UserAddress,
2692        bytes: &[u8],
2693    ) -> Result<usize, Errno> {
2694        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2695
2696        if let Some(usercopy) = usercopy() {
2697            let num_copied = usercopy.copyout(bytes, addr.ptr());
2698            if num_copied != bytes.len() {
2699                error!(
2700                    EFAULT,
2701                    format!("expected {:?} bytes, copied {:?} bytes", bytes.len(), num_copied)
2702                )
2703            } else {
2704                Ok(num_copied)
2705            }
2706        } else {
2707            self.syscall_write_memory(addr, bytes)
2708        }
2709    }
2710
2711    /// Write `bytes` to memory address `addr`, making a copy-on-write child of the VMO backing and
2712    /// replacing the mapping if necessary.
2713    ///
2714    /// NOTE: this bypasses userspace's memory protection configuration and should only be called
2715    /// by codepaths like ptrace which bypass memory protection.
2716    pub fn force_write_memory(&self, addr: UserAddress, bytes: &[u8]) -> Result<(), Errno> {
2717        let mut state = self.state.write();
2718        let mut released_mappings = ReleasedMappings::default();
2719        let result = state.force_write_memory(addr, bytes, &mut released_mappings);
2720        released_mappings.finalize(state);
2721        result
2722    }
2723
2724    pub fn syscall_write_memory(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
2725        self.state.read().write_memory(addr, bytes)
2726    }
2727
2728    pub fn unified_write_memory_partial(
2729        &self,
2730        current_task: &CurrentTask,
2731        addr: UserAddress,
2732        bytes: &[u8],
2733    ) -> Result<usize, Errno> {
2734        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2735
2736        if let Some(usercopy) = usercopy() {
2737            let num_copied = usercopy.copyout(bytes, addr.ptr());
2738            if num_copied == 0 && !bytes.is_empty() { error!(EFAULT) } else { Ok(num_copied) }
2739        } else {
2740            self.syscall_write_memory_partial(addr, bytes)
2741        }
2742    }
2743
2744    pub fn syscall_write_memory_partial(
2745        &self,
2746        addr: UserAddress,
2747        bytes: &[u8],
2748    ) -> Result<usize, Errno> {
2749        self.state.read().write_memory_partial(addr, bytes)
2750    }
2751
2752    pub fn unified_zero(
2753        &self,
2754        current_task: &CurrentTask,
2755        addr: UserAddress,
2756        length: usize,
2757    ) -> Result<usize, Errno> {
2758        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2759
2760        {
2761            let page_size = *PAGE_SIZE as usize;
2762            // Get the page boundary immediately following `addr` if `addr` is
2763            // not page aligned.
2764            let next_page_boundary = round_up_to_system_page_size(addr.ptr())?;
2765            // The number of bytes needed to zero at least a full page (not just
2766            // a pages worth of bytes) starting at `addr`.
2767            let length_with_atleast_one_full_page = page_size + (next_page_boundary - addr.ptr());
2768            // If at least one full page is being zeroed, go through the memory object since Zircon
2769            // can swap the mapped pages with the zero page which should be cheaper than zeroing
2770            // out a pages worth of bytes manually.
2771            //
2772            // If we are not zeroing out a full page, then go through usercopy
2773            // if unified aspaces is enabled.
2774            if length >= length_with_atleast_one_full_page {
2775                return self.syscall_zero(addr, length);
2776            }
2777        }
2778
2779        if let Some(usercopy) = usercopy() {
2780            if usercopy.zero(addr.ptr(), length) == length { Ok(length) } else { error!(EFAULT) }
2781        } else {
2782            self.syscall_zero(addr, length)
2783        }
2784    }
2785
2786    pub fn syscall_zero(&self, addr: UserAddress, length: usize) -> Result<usize, Errno> {
2787        self.state.read().zero(addr, length)
2788    }
2789
2790    /// Obtain a reference to this memory manager that can be used from another thread.
2791    pub fn as_remote(self: &Arc<Self>) -> RemoteMemoryManager {
2792        RemoteMemoryManager::new(self.clone())
2793    }
2794
2795    /// Performs a data and instruction cache flush over the given address range.
2796    pub fn cache_flush(&self, range: Range<UserAddress>) -> Result<(), Errno> {
2797        self.state.read().cache_flush(range)
2798    }
2799
2800    /// Register the address space managed by this memory manager for interest in
2801    /// receiving private expedited memory barriers of the given type.
2802    pub fn register_membarrier_private_expedited(
2803        &self,
2804        mtype: MembarrierType,
2805    ) -> Result<(), Errno> {
2806        self.state.write().register_membarrier_private_expedited(mtype)
2807    }
2808
2809    /// Checks if the address space managed by this memory manager is registered
2810    /// for interest in private expedited barriers of the given kind.
2811    pub fn membarrier_private_expedited_registered(&self, mtype: MembarrierType) -> bool {
2812        self.state.read().membarrier_private_expedited_registered(mtype)
2813    }
2814}
2815
2816pub struct MemoryManager {
2817    /// The root VMAR for the child process.
2818    ///
2819    /// Instead of mapping memory directly in this VMAR, we map the memory in
2820    /// `state.user_vmar`.
2821    root_vmar: zx::Vmar,
2822
2823    /// The base address of the root_vmar.
2824    pub base_addr: UserAddress,
2825
2826    /// The futexes in this address space.
2827    pub futex: Arc<FutexTable<PrivateFutexKey>>,
2828
2829    /// Mutable state for the memory manager.
2830    pub state: RwLock<MemoryManagerState>,
2831
2832    /// Whether this address space is dumpable.
2833    pub dumpable: OrderedMutex<DumpPolicy, MmDumpable>,
2834
2835    /// Maximum valid user address for this vmar.
2836    pub maximum_valid_user_address: UserAddress,
2837
2838    /// In-flight payloads enqueued to a pipe as a consequence of a `vmsplice(2)`
2839    /// operation.
2840    ///
2841    /// For details on why we need to keep track of in-flight vmspliced payloads,
2842    /// see [`VmsplicePayload`].
2843    ///
2844    /// For details on why this isn't under the `RwLock` protected `MemoryManagerState`,
2845    /// See [`InflightVmsplicedPayloads::payloads`].
2846    pub inflight_vmspliced_payloads: InflightVmsplicedPayloads,
2847
2848    /// A mechanism to be notified when this `MemoryManager` is destroyed.
2849    pub drop_notifier: DropNotifier,
2850}
2851
2852impl MemoryManager {
2853    pub fn new(root_vmar: zx::Vmar) -> Result<Self, zx::Status> {
2854        let info = root_vmar.info()?;
2855        let user_vmar = create_user_vmar(&root_vmar, &info)?;
2856        let user_vmar_info = user_vmar.info()?;
2857
2858        debug_assert_eq!(RESTRICTED_ASPACE_BASE, user_vmar_info.base);
2859        debug_assert_eq!(RESTRICTED_ASPACE_SIZE, user_vmar_info.len);
2860
2861        Ok(Self::from_vmar(root_vmar, user_vmar, user_vmar_info))
2862    }
2863
2864    fn from_vmar(root_vmar: zx::Vmar, user_vmar: zx::Vmar, user_vmar_info: zx::VmarInfo) -> Self {
2865        // The private anonymous backing memory object extend from the user address 0 up to the
2866        // highest mappable address. The pages below `user_vmar_info.base` are never mapped, but
2867        // including them in the memory object makes the math for mapping address to memory object
2868        // offsets simpler.
2869        let backing_size = (user_vmar_info.base + user_vmar_info.len) as u64;
2870
2871        MemoryManager {
2872            root_vmar,
2873            base_addr: UserAddress::from_ptr(user_vmar_info.base),
2874            futex: Arc::<FutexTable<PrivateFutexKey>>::default(),
2875            state: RwLock::new(MemoryManagerState {
2876                user_vmar: user_vmar,
2877                user_vmar_info,
2878                mappings: Default::default(),
2879                private_anonymous: PrivateAnonymousMemoryManager::new(backing_size),
2880                userfaultfds: Default::default(),
2881                shadow_mappings_for_mlock: Default::default(),
2882                forkable_state: Default::default(),
2883            }),
2884            // TODO(security): Reset to DISABLE, or the value in the fs.suid_dumpable sysctl, under
2885            // certain conditions as specified in the prctl(2) man page.
2886            dumpable: OrderedMutex::new(DumpPolicy::User),
2887            maximum_valid_user_address: UserAddress::from_ptr(
2888                user_vmar_info.base + user_vmar_info.len,
2889            ),
2890            inflight_vmspliced_payloads: Default::default(),
2891            drop_notifier: DropNotifier::default(),
2892        }
2893    }
2894
2895    pub fn set_brk<L>(
2896        self: &Arc<Self>,
2897        locked: &mut Locked<L>,
2898        current_task: &CurrentTask,
2899        addr: UserAddress,
2900    ) -> Result<UserAddress, Errno>
2901    where
2902        L: LockBefore<ThreadGroupLimits>,
2903    {
2904        let mut state = self.state.write();
2905        let mut released_mappings = ReleasedMappings::default();
2906        let result = state.set_brk(locked, current_task, self, addr, &mut released_mappings);
2907        released_mappings.finalize(state);
2908        result
2909    }
2910
2911    pub fn register_uffd(&self, userfault: &Arc<UserFault>) {
2912        let mut state = self.state.write();
2913        state.userfaultfds.push(Arc::downgrade(userfault));
2914    }
2915
2916    /// Register a given memory range with a userfault object.
2917    pub fn register_with_uffd<L>(
2918        self: &Arc<Self>,
2919        locked: &mut Locked<L>,
2920        addr: UserAddress,
2921        length: usize,
2922        userfault: &Arc<UserFault>,
2923        mode: FaultRegisterMode,
2924    ) -> Result<(), Errno>
2925    where
2926        L: LockBefore<UserFaultInner>,
2927    {
2928        let mut state = self.state.write();
2929        let mut released_mappings = ReleasedMappings::default();
2930        let result =
2931            state.register_with_uffd(locked, addr, length, userfault, mode, &mut released_mappings);
2932        released_mappings.finalize(state);
2933        result
2934    }
2935
2936    /// Unregister a given range from any userfault objects associated with it.
2937    pub fn unregister_range_from_uffd<L>(
2938        &self,
2939        locked: &mut Locked<L>,
2940        userfault: &Arc<UserFault>,
2941        addr: UserAddress,
2942        length: usize,
2943    ) -> Result<(), Errno>
2944    where
2945        L: LockBefore<UserFaultInner>,
2946    {
2947        let mut state = self.state.write();
2948        let mut released_mappings = ReleasedMappings::default();
2949        let result = state.unregister_range_from_uffd(
2950            locked,
2951            userfault,
2952            addr,
2953            length,
2954            &mut released_mappings,
2955        );
2956        released_mappings.finalize(state);
2957        result
2958    }
2959
2960    /// Unregister any mappings registered with a given userfault object. Used when closing the last
2961    /// file descriptor associated to it.
2962    pub fn unregister_uffd<L>(&self, locked: &mut Locked<L>, userfault: &Arc<UserFault>)
2963    where
2964        L: LockBefore<UserFaultInner>,
2965    {
2966        let mut state = self.state.write();
2967        let mut released_mappings = ReleasedMappings::default();
2968        state.unregister_uffd(locked, userfault, &mut released_mappings);
2969        released_mappings.finalize(state);
2970    }
2971
2972    /// Populate a range of pages registered with an userfaulfd according to a `populate` function.
2973    /// This will fail if the pages were not registered with userfaultfd, or if the page at `addr`
2974    /// was already populated. If any page other than the first one was populated, the `length`
2975    /// is adjusted to only include the first N unpopulated pages, and this adjusted length
2976    /// is then passed to `populate`. On success, returns the number of populated bytes.
2977    pub fn populate_from_uffd<F, L>(
2978        &self,
2979        locked: &mut Locked<L>,
2980        addr: UserAddress,
2981        length: usize,
2982        userfault: &Arc<UserFault>,
2983        populate: F,
2984    ) -> Result<usize, Errno>
2985    where
2986        F: FnOnce(&MemoryManagerState, usize) -> Result<usize, Errno>,
2987        L: LockBefore<UserFaultInner>,
2988    {
2989        let state = self.state.read();
2990
2991        // Check that the addr..length range is a contiguous range of mappings which are all
2992        // registered with an userfault object.
2993        let mut bytes_registered_with_uffd = 0;
2994        for (mapping, len) in state.get_contiguous_mappings_at(addr, length)? {
2995            if mapping.flags().contains(MappingFlags::UFFD) {
2996                // Check that the mapping is registered with the same uffd. This is not required,
2997                // but we don't support cross-uffd operations yet.
2998                if !userfault.contains_addr(locked, addr) {
2999                    track_stub!(
3000                        TODO("https://fxbug.dev/391599171"),
3001                        "operations across different uffds"
3002                    );
3003                    return error!(ENOTSUP);
3004                };
3005            } else {
3006                return error!(ENOENT);
3007            }
3008            bytes_registered_with_uffd += len;
3009        }
3010        if bytes_registered_with_uffd != length {
3011            return error!(ENOENT);
3012        }
3013
3014        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
3015
3016        // Determine how many pages in the requested range are already populated
3017        let first_populated =
3018            userfault.get_first_populated_page_after(locked, addr).ok_or_else(|| errno!(ENOENT))?;
3019        // If the very first page is already populated, uffd operations should just return EEXIST
3020        if first_populated == addr {
3021            return error!(EEXIST);
3022        }
3023        // Otherwise it is possible to do an incomplete operation by only populating pages until
3024        // the first populated one.
3025        let trimmed_end = std::cmp::min(first_populated, end_addr);
3026        let effective_length = trimmed_end - addr;
3027
3028        populate(&state, effective_length)?;
3029        userfault.insert_pages(locked, addr..trimmed_end, true);
3030
3031        // Since we used protection bits to force pagefaults, we now need to reverse this change by
3032        // restoring the protections on the underlying Zircon mappings to the "real" protection bits
3033        // that were kept in the Starnix mappings. This will prevent new pagefaults from being
3034        // generated. Only do this on the pages that were populated by this operation.
3035        for (range, mapping) in state.mappings.range(addr..trimmed_end) {
3036            let range_to_protect = range.intersect(&(addr..trimmed_end));
3037            let restored_flags = mapping.flags().access_flags();
3038            let length = range_to_protect.end - range_to_protect.start;
3039            state
3040                .protect_vmar_range(range_to_protect.start, length, restored_flags)
3041                .expect("Failed to restore original protection bits on uffd-registered range");
3042        }
3043        // Return the number of effectively populated bytes, which might be smaller than the
3044        // requested number.
3045        Ok(effective_length)
3046    }
3047
3048    pub fn zero_from_uffd<L>(
3049        &self,
3050        locked: &mut Locked<L>,
3051        addr: UserAddress,
3052        length: usize,
3053        userfault: &Arc<UserFault>,
3054    ) -> Result<usize, Errno>
3055    where
3056        L: LockBefore<UserFaultInner>,
3057    {
3058        self.populate_from_uffd(locked, addr, length, userfault, |state, effective_length| {
3059            state.zero(addr, effective_length)
3060        })
3061    }
3062
3063    pub fn fill_from_uffd<L>(
3064        &self,
3065        locked: &mut Locked<L>,
3066        addr: UserAddress,
3067        buf: &[u8],
3068        length: usize,
3069        userfault: &Arc<UserFault>,
3070    ) -> Result<usize, Errno>
3071    where
3072        L: LockBefore<UserFaultInner>,
3073    {
3074        self.populate_from_uffd(locked, addr, length, userfault, |state, effective_length| {
3075            state.write_memory(addr, &buf[..effective_length])
3076        })
3077    }
3078
3079    pub fn copy_from_uffd<L>(
3080        &self,
3081        locked: &mut Locked<L>,
3082        source_addr: UserAddress,
3083        dst_addr: UserAddress,
3084        length: usize,
3085        userfault: &Arc<UserFault>,
3086    ) -> Result<usize, Errno>
3087    where
3088        L: LockBefore<UserFaultInner>,
3089    {
3090        self.populate_from_uffd(locked, dst_addr, length, userfault, |state, effective_length| {
3091            let mut buf = vec![std::mem::MaybeUninit::uninit(); effective_length];
3092            let buf = state.read_memory(source_addr, &mut buf)?;
3093            state.write_memory(dst_addr, &buf[..effective_length])
3094        })
3095    }
3096
3097    /// Create a snapshot of the memory mapping from `self` into `target`. All
3098    /// memory mappings are copied entry-for-entry, and the copies end up at
3099    /// exactly the same addresses.
3100    pub fn snapshot_to<L>(
3101        &self,
3102        locked: &mut Locked<L>,
3103        target: &Arc<MemoryManager>,
3104    ) -> Result<(), Errno>
3105    where
3106        L: LockBefore<MmDumpable>,
3107    {
3108        // Hold the lock throughout the operation to uphold memory manager's invariants.
3109        // See mm/README.md.
3110        let state: &mut MemoryManagerState = &mut self.state.write();
3111        let mut target_state = target.state.write();
3112        let mut clone_cache = HashMap::<zx::Koid, Arc<MemoryObject>>::new();
3113
3114        let backing_size = (state.user_vmar_info.base + state.user_vmar_info.len) as u64;
3115        target_state.private_anonymous = state.private_anonymous.snapshot(backing_size)?;
3116
3117        for (range, mapping) in state.mappings.iter() {
3118            if mapping.flags().contains(MappingFlags::DONTFORK) {
3119                continue;
3120            }
3121            // Locking is not inherited when forking.
3122            let target_mapping_flags = mapping.flags().difference(MappingFlags::LOCKED);
3123            match state.get_mapping_backing(mapping) {
3124                MappingBacking::Memory(backing) => {
3125                    let memory_offset = backing.address_to_offset(range.start);
3126                    let length = range.end - range.start;
3127
3128                    let target_memory = if mapping.flags().contains(MappingFlags::SHARED)
3129                        || mapping.name() == MappingName::Vvar
3130                    {
3131                        // Note that the Vvar is a special mapping that behaves like a shared mapping but
3132                        // is private to each process.
3133                        backing.memory().clone()
3134                    } else if mapping.flags().contains(MappingFlags::WIPEONFORK) {
3135                        create_anonymous_mapping_memory(length as u64)?
3136                    } else {
3137                        let basic_info = backing.memory().basic_info();
3138                        let memory =
3139                            clone_cache.entry(basic_info.koid).or_insert_with_fallible(|| {
3140                                backing.memory().clone_memory(basic_info.rights)
3141                            })?;
3142                        memory.clone()
3143                    };
3144
3145                    let mut released_mappings = ReleasedMappings::default();
3146                    target_state.map_memory(
3147                        target,
3148                        DesiredAddress::Fixed(range.start),
3149                        target_memory,
3150                        memory_offset,
3151                        length,
3152                        target_mapping_flags,
3153                        mapping.max_access(),
3154                        false,
3155                        mapping.name().clone(),
3156                        &mut released_mappings,
3157                    )?;
3158                    assert!(
3159                        released_mappings.is_empty(),
3160                        "target mm must be empty when cloning, got {released_mappings:#?}"
3161                    );
3162                }
3163                MappingBacking::PrivateAnonymous => {
3164                    let length = range.end - range.start;
3165                    if mapping.flags().contains(MappingFlags::WIPEONFORK) {
3166                        target_state
3167                            .private_anonymous
3168                            .zero(range.start, length)
3169                            .map_err(|_| errno!(ENOMEM))?;
3170                    }
3171
3172                    let target_memory_offset = range.start.ptr() as u64;
3173                    target_state.map_in_user_vmar(
3174                        SelectedAddress::FixedOverwrite(range.start),
3175                        &target_state.private_anonymous.backing,
3176                        target_memory_offset,
3177                        length,
3178                        target_mapping_flags,
3179                        false,
3180                    )?;
3181                    let removed_mappings = target_state.mappings.insert(
3182                        range.clone(),
3183                        Mapping::new_private_anonymous(
3184                            target_mapping_flags,
3185                            mapping.name().clone(),
3186                        ),
3187                    );
3188                    assert!(
3189                        removed_mappings.is_empty(),
3190                        "target mm must be empty when cloning, got {removed_mappings:#?}"
3191                    );
3192                }
3193            };
3194        }
3195
3196        target_state.forkable_state = state.forkable_state.clone();
3197
3198        let self_dumpable = *self.dumpable.lock(locked);
3199        *target.dumpable.lock(locked) = self_dumpable;
3200
3201        Ok(())
3202    }
3203
3204    /// Returns the replacement `MemoryManager` to be used by the `exec()`ing task.
3205    ///
3206    /// POSIX requires that "a call to any exec function from a process with more than one thread
3207    /// shall result in all threads being terminated and the new executable being loaded and
3208    /// executed. No destructor functions or cleanup handlers shall be called".
3209    /// The caller is responsible for having ensured that this is the only `Task` in the
3210    /// `ThreadGroup`, and thereby the `zx::process`, such that it is safe to tear-down the Zircon
3211    /// userspace VMAR for the current address-space.
3212    pub fn exec(
3213        &self,
3214        exe_node: NamespaceNode,
3215        arch_width: ArchWidth,
3216    ) -> Result<Arc<Self>, zx::Status> {
3217        // To safeguard against concurrent accesses by other tasks through this `MemoryManager`, the
3218        // following steps are performed while holding the write lock on this instance:
3219        //
3220        // 1. All `mappings` are removed, so that remote `MemoryAccessor` calls will fail.
3221        // 2. The `user_vmar` is `destroy()`ed to free-up the user address-space.
3222        // 3. The new `user_vmar` is created, to re-reserve the user address-space.
3223        //
3224        // Once these steps are complete the lock must first be dropped, after which it is safe for
3225        // the old mappings to be dropped.
3226        let (_old_mappings, user_vmar) = {
3227            let mut state = self.state.write();
3228            let mut info = self.root_vmar.info()?;
3229
3230            // SAFETY: This operation is safe because this is the only `Task` active in the address-
3231            // space, and accesses by remote tasks will use syscalls on the `root_vmar`.
3232            unsafe { state.user_vmar.destroy()? }
3233            state.user_vmar = zx::NullableHandle::invalid().into();
3234
3235            if arch_width.is_arch32() {
3236                info.len = (LOWER_4GB_LIMIT.ptr() - info.base) as usize;
3237            } else {
3238                info.len = RESTRICTED_ASPACE_HIGHEST_ADDRESS - info.base;
3239            }
3240
3241            // Create the new userspace VMAR, to enmsure that the address range is (re-)reserved.
3242            let user_vmar = create_user_vmar(&self.root_vmar, &info)?;
3243
3244            (std::mem::replace(&mut state.mappings, Default::default()), user_vmar)
3245        };
3246
3247        // Wrap the new user address-space VMAR into a new `MemoryManager`.
3248        let root_vmar = self.root_vmar.duplicate_handle(zx::Rights::SAME_RIGHTS)?;
3249        let user_vmar_info = user_vmar.info()?;
3250        let new_mm = Self::from_vmar(root_vmar, user_vmar, user_vmar_info);
3251
3252        // Initialize the new `MemoryManager` state.
3253        new_mm.state.write().executable_node = Some(exe_node);
3254
3255        // Initialize the appropriate address-space layout for the `arch_width`.
3256        new_mm.initialize_mmap_layout(arch_width)?;
3257
3258        Ok(Arc::new(new_mm))
3259    }
3260
3261    pub fn initialize_mmap_layout(&self, arch_width: ArchWidth) -> Result<(), Errno> {
3262        let mut state = self.state.write();
3263
3264        // Place the stack at the end of the address space, subject to ASLR adjustment.
3265        state.stack_origin = UserAddress::from_ptr(
3266            state.user_vmar_info.base + state.user_vmar_info.len
3267                - MAX_STACK_SIZE
3268                - generate_random_offset_for_aslr(arch_width),
3269        )
3270        .round_up(*PAGE_SIZE)?;
3271
3272        // Set the highest address that `mmap` will assign to the allocations that don't ask for a
3273        // specific address, subject to ASLR adjustment.
3274        state.mmap_top = state
3275            .stack_origin
3276            .checked_sub(generate_random_offset_for_aslr(arch_width))
3277            .ok_or_else(|| errno!(EINVAL))?;
3278        Ok(())
3279    }
3280
3281    // Test tasks are not initialized by exec; simulate its behavior by initializing memory layout
3282    // as if a zero-size executable was loaded.
3283    pub fn initialize_mmap_layout_for_test(self: &Arc<Self>, arch_width: ArchWidth) {
3284        self.initialize_mmap_layout(arch_width).unwrap();
3285        let fake_executable_addr = self.get_random_base_for_executable(arch_width, 0).unwrap();
3286        self.initialize_brk_origin(arch_width, fake_executable_addr).unwrap();
3287    }
3288
3289    pub fn initialize_brk_origin(
3290        self: &Arc<Self>,
3291        arch_width: ArchWidth,
3292        executable_end: UserAddress,
3293    ) -> Result<(), Errno> {
3294        self.state.write().brk_origin = executable_end
3295            .checked_add(generate_random_offset_for_aslr(arch_width))
3296            .ok_or_else(|| errno!(EINVAL))?;
3297        Ok(())
3298    }
3299
3300    // Get a randomised address for loading a position-independent executable.
3301
3302    pub fn get_random_base_for_executable(
3303        &self,
3304        arch_width: ArchWidth,
3305        length: usize,
3306    ) -> Result<UserAddress, Errno> {
3307        let state = self.state.read();
3308
3309        // Place it at approx. 2/3 of the available mmap space, subject to ASLR adjustment.
3310        let base = round_up_to_system_page_size(2 * state.mmap_top.ptr() / 3).unwrap()
3311            + generate_random_offset_for_aslr(arch_width);
3312        if base.checked_add(length).ok_or_else(|| errno!(EINVAL))? <= state.mmap_top.ptr() {
3313            Ok(UserAddress::from_ptr(base))
3314        } else {
3315            error!(EINVAL)
3316        }
3317    }
3318    pub fn executable_node(&self) -> Option<NamespaceNode> {
3319        self.state.read().executable_node.clone()
3320    }
3321
3322    #[track_caller]
3323    pub fn get_errno_for_map_err(status: zx::Status) -> Errno {
3324        match status {
3325            zx::Status::INVALID_ARGS => errno!(EINVAL),
3326            zx::Status::ACCESS_DENIED => errno!(EPERM),
3327            zx::Status::NOT_SUPPORTED => errno!(ENODEV),
3328            zx::Status::NO_MEMORY => errno!(ENOMEM),
3329            zx::Status::NO_RESOURCES => errno!(ENOMEM),
3330            zx::Status::OUT_OF_RANGE => errno!(ENOMEM),
3331            zx::Status::ALREADY_EXISTS => errno!(EEXIST),
3332            zx::Status::BAD_STATE => errno!(EINVAL),
3333            _ => impossible_error(status),
3334        }
3335    }
3336
3337    #[track_caller]
3338    pub fn get_errno_for_vmo_err(status: zx::Status) -> Errno {
3339        match status {
3340            zx::Status::NO_MEMORY => errno!(ENOMEM),
3341            zx::Status::ACCESS_DENIED => errno!(EPERM),
3342            zx::Status::NOT_SUPPORTED => errno!(EIO),
3343            zx::Status::BAD_STATE => errno!(EIO),
3344            _ => return impossible_error(status),
3345        }
3346    }
3347
3348    pub fn map_memory(
3349        self: &Arc<Self>,
3350        addr: DesiredAddress,
3351        memory: Arc<MemoryObject>,
3352        memory_offset: u64,
3353        length: usize,
3354        prot_flags: ProtectionFlags,
3355        max_access: Access,
3356        options: MappingOptions,
3357        name: MappingName,
3358    ) -> Result<UserAddress, Errno> {
3359        let flags = MappingFlags::from_access_flags_and_options(prot_flags, options);
3360
3361        // Unmapped mappings must be released after the state is unlocked.
3362        let mut released_mappings = ReleasedMappings::default();
3363        // Hold the lock throughout the operation to uphold memory manager's invariants.
3364        // See mm/README.md.
3365        let mut state = self.state.write();
3366        let result = state.map_memory(
3367            self,
3368            addr,
3369            memory,
3370            memory_offset,
3371            length,
3372            flags,
3373            max_access,
3374            options.contains(MappingOptions::POPULATE),
3375            name,
3376            &mut released_mappings,
3377        );
3378
3379        // Drop the state before the unmapped mappings, since dropping a mapping may acquire a lock
3380        // in `DirEntry`'s `drop`.
3381        released_mappings.finalize(state);
3382
3383        result
3384    }
3385
3386    pub fn map_anonymous(
3387        self: &Arc<Self>,
3388        addr: DesiredAddress,
3389        length: usize,
3390        prot_flags: ProtectionFlags,
3391        options: MappingOptions,
3392        name: MappingName,
3393    ) -> Result<UserAddress, Errno> {
3394        let mut released_mappings = ReleasedMappings::default();
3395        // Hold the lock throughout the operation to uphold memory manager's invariants.
3396        // See mm/README.md.
3397        let mut state = self.state.write();
3398        let result = state.map_anonymous(
3399            self,
3400            addr,
3401            length,
3402            prot_flags,
3403            options,
3404            name,
3405            &mut released_mappings,
3406        );
3407
3408        released_mappings.finalize(state);
3409
3410        result
3411    }
3412
3413    /// Map the stack into a pre-selected address region
3414    pub fn map_stack(
3415        self: &Arc<Self>,
3416        length: usize,
3417        prot_flags: ProtectionFlags,
3418    ) -> Result<UserAddress, Errno> {
3419        assert!(length <= MAX_STACK_SIZE);
3420        let addr = self.state.read().stack_origin;
3421        // The address range containing stack_origin should normally be available: it's above the
3422        // mmap_top, and this method is called early enough in the process lifetime that only the
3423        // main ELF and the interpreter are already loaded. However, in the rare case that the
3424        // static position-independent executable is overlapping the chosen address, mapping as Hint
3425        // will make mmap choose a new place for it.
3426        // TODO(https://fxbug.dev/370027241): Consider a more robust approach
3427        let stack_addr = self.map_anonymous(
3428            DesiredAddress::Hint(addr),
3429            length,
3430            prot_flags,
3431            MappingOptions::ANONYMOUS | MappingOptions::GROWSDOWN,
3432            MappingName::Stack,
3433        )?;
3434        if stack_addr != addr {
3435            log_warn!(
3436                "An address designated for stack ({}) was unavailable, mapping at {} instead.",
3437                addr,
3438                stack_addr
3439            );
3440        }
3441        Ok(stack_addr)
3442    }
3443
3444    pub fn remap(
3445        self: &Arc<Self>,
3446        current_task: &CurrentTask,
3447        addr: UserAddress,
3448        old_length: usize,
3449        new_length: usize,
3450        flags: MremapFlags,
3451        new_addr: UserAddress,
3452    ) -> Result<UserAddress, Errno> {
3453        let mut released_mappings = ReleasedMappings::default();
3454        // Hold the lock throughout the operation to uphold memory manager's invariants.
3455        // See mm/README.md.
3456        let mut state = self.state.write();
3457        let result = state.remap(
3458            current_task,
3459            self,
3460            addr,
3461            old_length,
3462            new_length,
3463            flags,
3464            new_addr,
3465            &mut released_mappings,
3466        );
3467
3468        released_mappings.finalize(state);
3469
3470        result
3471    }
3472
3473    pub fn unmap(self: &Arc<Self>, addr: UserAddress, length: usize) -> Result<(), Errno> {
3474        let mut released_mappings = ReleasedMappings::default();
3475        // Hold the lock throughout the operation to uphold memory manager's invariants.
3476        // See mm/README.md.
3477        let mut state = self.state.write();
3478        let result = state.unmap(self, addr, length, &mut released_mappings);
3479
3480        released_mappings.finalize(state);
3481
3482        result
3483    }
3484
3485    pub fn protect(
3486        &self,
3487        current_task: &CurrentTask,
3488        addr: UserAddress,
3489        length: usize,
3490        prot_flags: ProtectionFlags,
3491    ) -> Result<(), Errno> {
3492        // Hold the lock throughout the operation to uphold memory manager's invariants.
3493        // See mm/README.md.
3494        let mut state = self.state.write();
3495        let mut released_mappings = ReleasedMappings::default();
3496        let result = state.protect(current_task, addr, length, prot_flags, &mut released_mappings);
3497        released_mappings.finalize(state);
3498        result
3499    }
3500
3501    pub fn madvise(
3502        &self,
3503        current_task: &CurrentTask,
3504        addr: UserAddress,
3505        length: usize,
3506        advice: u32,
3507    ) -> Result<(), Errno> {
3508        let mut state = self.state.write();
3509        let mut released_mappings = ReleasedMappings::default();
3510        let result = state.madvise(current_task, addr, length, advice, &mut released_mappings);
3511        released_mappings.finalize(state);
3512        result
3513    }
3514
3515    pub fn mlock<L>(
3516        &self,
3517        current_task: &CurrentTask,
3518        locked: &mut Locked<L>,
3519        desired_addr: UserAddress,
3520        desired_length: usize,
3521        on_fault: bool,
3522    ) -> Result<(), Errno>
3523    where
3524        L: LockBefore<ThreadGroupLimits>,
3525    {
3526        let mut state = self.state.write();
3527        let mut released_mappings = ReleasedMappings::default();
3528        let result = state.mlock(
3529            current_task,
3530            locked,
3531            desired_addr,
3532            desired_length,
3533            on_fault,
3534            &mut released_mappings,
3535        );
3536        released_mappings.finalize(state);
3537        result
3538    }
3539
3540    pub fn munlock(
3541        &self,
3542        current_task: &CurrentTask,
3543        desired_addr: UserAddress,
3544        desired_length: usize,
3545    ) -> Result<(), Errno> {
3546        let mut state = self.state.write();
3547        let mut released_mappings = ReleasedMappings::default();
3548        let result =
3549            state.munlock(current_task, desired_addr, desired_length, &mut released_mappings);
3550        released_mappings.finalize(state);
3551        result
3552    }
3553
3554    pub fn handle_page_fault(
3555        self: &Arc<Self>,
3556        locked: &mut Locked<Unlocked>,
3557        decoded: PageFaultExceptionReport,
3558        error_code: zx::Status,
3559    ) -> ExceptionResult {
3560        let addr = UserAddress::from(decoded.faulting_address);
3561        // On uffd-registered range, handle according to the uffd rules
3562        if error_code == zx::Status::ACCESS_DENIED {
3563            let state = self.state.write();
3564            if let Some((_, mapping)) = state.mappings.get(addr) {
3565                if mapping.flags().contains(MappingFlags::UFFD) {
3566                    // TODO(https://fxbug.dev/391599171): Support other modes
3567                    assert!(mapping.flags().contains(MappingFlags::UFFD_MISSING));
3568
3569                    if let Some(_uffd) = state.find_uffd(locked, addr) {
3570                        // If the SIGBUS feature was set, no event will be sent to the file.
3571                        // Instead, SIGBUS is delivered to the process that triggered the fault.
3572                        // TODO(https://fxbug.dev/391599171): For now we only support this feature,
3573                        // so we assume it is set.
3574                        // Check for the SIGBUS feature when we start supporting running without it.
3575                        return ExceptionResult::Signal(SignalInfo::new(
3576                            SIGBUS,
3577                            BUS_ADRERR as i32,
3578                            SignalDetail::SigFault { addr: decoded.faulting_address },
3579                        ));
3580                    };
3581                }
3582                let exec_denied = decoded.is_execute && !mapping.can_exec();
3583                let write_denied = decoded.is_write && !mapping.can_write();
3584                let read_denied = (!decoded.is_execute && !decoded.is_write) && !mapping.can_read();
3585                // There is a data race resulting from uffd unregistration and page fault happening
3586                // at the same time. To detect it, we check if the access was meant to be rejected
3587                // according to Starnix own information about the mapping.
3588                let false_reject = !exec_denied && !write_denied && !read_denied;
3589                if false_reject {
3590                    track_stub!(
3591                        TODO("https://fxbug.dev/435171399"),
3592                        "Inconsistent permission fault"
3593                    );
3594                    return ExceptionResult::Handled;
3595                }
3596            }
3597            std::mem::drop(state);
3598        }
3599
3600        if decoded.not_present {
3601            // A page fault may be resolved by extending a growsdown mapping to cover the faulting
3602            // address. Mark the exception handled if so. Otherwise let the regular handling proceed.
3603
3604            // We should only attempt growth on a not-present fault and we should only extend if the
3605            // access type matches the protection on the GROWSDOWN mapping.
3606            match self.extend_growsdown_mapping_to_address(
3607                UserAddress::from(decoded.faulting_address),
3608                decoded.is_write,
3609            ) {
3610                Ok(true) => {
3611                    return ExceptionResult::Handled;
3612                }
3613                Err(e) => {
3614                    log_warn!("Error handling page fault: {e}")
3615                }
3616                _ => {}
3617            }
3618        }
3619        // For this exception type, the synth_code field in the exception report's context is the
3620        // error generated by the page fault handler. For us this is used to distinguish between a
3621        // segmentation violation and a bus error. Unfortunately this detail is not documented in
3622        // Zircon's public documentation and is only described in the architecture-specific
3623        // exception definitions such as:
3624        // zircon/kernel/arch/x86/include/arch/x86.h
3625        // zircon/kernel/arch/arm64/include/arch/arm64.h
3626        let signo = match error_code {
3627            zx::Status::OUT_OF_RANGE => SIGBUS,
3628            _ => SIGSEGV,
3629        };
3630        ExceptionResult::Signal(SignalInfo::new(
3631            signo,
3632            SI_KERNEL as i32,
3633            SignalDetail::SigFault { addr: decoded.faulting_address },
3634        ))
3635    }
3636
3637    pub fn set_mapping_name(
3638        &self,
3639        addr: UserAddress,
3640        length: usize,
3641        name: Option<FsString>,
3642    ) -> Result<(), Errno> {
3643        let mut state = self.state.write();
3644        let mut released_mappings = ReleasedMappings::default();
3645        let result = state.set_mapping_name(addr, length, name, &mut released_mappings);
3646        released_mappings.finalize(state);
3647        result
3648    }
3649
3650    /// Returns [`Ok`] if the entire range specified by `addr..(addr+length)` contains valid
3651    /// mappings.
3652    ///
3653    /// # Errors
3654    ///
3655    /// Returns [`Err(errno)`] where `errno` is:
3656    ///
3657    ///   - `EINVAL`: `addr` is not page-aligned, or the range is too large,
3658    ///   - `ENOMEM`: one or more pages in the range are not mapped.
3659    pub fn ensure_mapped(&self, addr: UserAddress, length: usize) -> Result<(), Errno> {
3660        if !addr.is_aligned(*PAGE_SIZE) {
3661            return error!(EINVAL);
3662        }
3663
3664        let length = round_up_to_system_page_size(length)?;
3665        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
3666        let state = self.state.read();
3667        let mut last_end = addr;
3668        for (range, _) in state.mappings.range(addr..end_addr) {
3669            if range.start > last_end {
3670                // This mapping does not start immediately after the last.
3671                return error!(ENOMEM);
3672            }
3673            last_end = range.end;
3674        }
3675        if last_end < end_addr {
3676            // There is a gap of no mappings at the end of the range.
3677            error!(ENOMEM)
3678        } else {
3679            Ok(())
3680        }
3681    }
3682
3683    /// Returns the memory object mapped at the address and the offset into the memory object of
3684    /// the address. Intended for implementing futexes.
3685    pub fn get_mapping_memory(
3686        &self,
3687        addr: UserAddress,
3688        perms: ProtectionFlags,
3689    ) -> Result<(Arc<MemoryObject>, u64), Errno> {
3690        let state = self.state.read();
3691        let (_, mapping) = state.mappings.get(addr).ok_or_else(|| errno!(EFAULT))?;
3692        if !mapping.flags().access_flags().contains(perms) {
3693            return error!(EACCES);
3694        }
3695        match state.get_mapping_backing(mapping) {
3696            MappingBacking::Memory(backing) => {
3697                Ok((Arc::clone(backing.memory()), mapping.address_to_offset(addr)))
3698            }
3699            MappingBacking::PrivateAnonymous => {
3700                Ok((Arc::clone(&state.private_anonymous.backing), addr.ptr() as u64))
3701            }
3702        }
3703    }
3704
3705    /// Does a rough check that the given address is plausibly in the address space of the
3706    /// application. This does not mean the pointer is valid for any particular purpose or that
3707    /// it will remain so!
3708    ///
3709    /// In some syscalls, Linux seems to do some initial validation of the pointer up front to
3710    /// tell the caller early if it's invalid. For example, in epoll_wait() it's returning a vector
3711    /// of events. If the caller passes an invalid pointer, it wants to fail without dropping any
3712    /// events. Failing later when actually copying the required events to userspace would mean
3713    /// those events will be lost. But holding a lock on the memory manager for an asynchronous
3714    /// wait is not desirable.
3715    ///
3716    /// Testing shows that Linux seems to do some initial plausibility checking of the pointer to
3717    /// be able to report common usage errors before doing any (possibly unreversable) work. This
3718    /// checking is easy to get around if you try, so this function is also not required to
3719    /// be particularly robust. Certainly the more advanced cases of races (the memory could be
3720    /// unmapped after this call but before it's used) are not handled.
3721    ///
3722    /// The buffer_size variable is the size of the data structure that needs to fit
3723    /// in the given memory.
3724    ///
3725    /// Returns the error EFAULT if invalid.
3726    pub fn check_plausible(&self, addr: UserAddress, buffer_size: usize) -> Result<(), Errno> {
3727        let state = self.state.read();
3728
3729        if let Some(range) = state.mappings.last_range() {
3730            if (range.end - buffer_size)? >= addr {
3731                return Ok(());
3732            }
3733        }
3734        error!(EFAULT)
3735    }
3736
3737    pub fn get_aio_context(&self, addr: UserAddress) -> Option<Arc<AioContext>> {
3738        let state = self.state.read();
3739        state.get_aio_context(addr).map(|(_, aio_context)| aio_context)
3740    }
3741
3742    pub fn destroy_aio_context(
3743        self: &Arc<Self>,
3744        addr: UserAddress,
3745    ) -> Result<Arc<AioContext>, Errno> {
3746        let mut released_mappings = ReleasedMappings::default();
3747
3748        // Hold the lock throughout the operation to uphold memory manager's invariants.
3749        // See mm/README.md.
3750        let mut state = self.state.write();
3751
3752        // Validate that this address actually has an AioContext. We need to hold the state lock
3753        // until we actually remove the mappings to ensure that another thread does not manipulate
3754        // the mappings after we've validated that they contain an AioContext.
3755        let Some((range, aio_context)) = state.get_aio_context(addr) else {
3756            return error!(EINVAL);
3757        };
3758
3759        let length = range.end - range.start;
3760        let result = state.unmap(self, range.start, length, &mut released_mappings);
3761
3762        released_mappings.finalize(state);
3763
3764        result.map(|_| aio_context)
3765    }
3766
3767    #[cfg(test)]
3768    pub fn get_mapping_name(
3769        &self,
3770        addr: UserAddress,
3771    ) -> Result<Option<flyweights::FlyByteStr>, Errno> {
3772        let state = self.state.read();
3773        let (_, mapping) = state.mappings.get(addr).ok_or_else(|| errno!(EFAULT))?;
3774        if let MappingName::Vma(name) = mapping.name() { Ok(Some(name.clone())) } else { Ok(None) }
3775    }
3776
3777    #[cfg(test)]
3778    pub fn get_mapping_count(&self) -> usize {
3779        let state = self.state.read();
3780        state.mappings.iter().count()
3781    }
3782
3783    pub fn extend_growsdown_mapping_to_address(
3784        self: &Arc<Self>,
3785        addr: UserAddress,
3786        is_write: bool,
3787    ) -> Result<bool, Error> {
3788        self.state.write().extend_growsdown_mapping_to_address(self, addr, is_write)
3789    }
3790
3791    pub fn get_stats(&self, current_task: &CurrentTask) -> MemoryStats {
3792        // Grab our state lock before reading zircon mappings so that the two are consistent.
3793        // Other Starnix threads should not make any changes to the Zircon mappings while we hold
3794        // a read lock to the memory manager state.
3795        let state = self.state.read();
3796
3797        let mut stats = MemoryStats::default();
3798        stats.vm_stack = state.stack_size;
3799
3800        state.with_zx_mappings(current_task, |zx_mappings| {
3801            for zx_mapping in zx_mappings {
3802                // We only care about map info for actual mappings.
3803                let zx_details = zx_mapping.details();
3804                let Some(zx_details) = zx_details.as_mapping() else { continue };
3805                let user_address = UserAddress::from(zx_mapping.base as u64);
3806                let (_, mm_mapping) = state
3807                    .mappings
3808                    .get(user_address)
3809                    .unwrap_or_else(|| panic!("mapping bookkeeping must be consistent with zircon's: not found: {user_address:?}"));
3810                debug_assert_eq!(
3811                    match state.get_mapping_backing(mm_mapping) {
3812                        MappingBacking::Memory(m)=>m.memory().get_koid(),
3813                        MappingBacking::PrivateAnonymous=>state.private_anonymous.backing.get_koid(),
3814                    },
3815                    zx_details.vmo_koid,
3816                    "MemoryManager and Zircon must agree on which VMO is mapped in this range",
3817                );
3818
3819                stats.vm_size += zx_mapping.size;
3820
3821                stats.vm_rss += zx_details.committed_bytes;
3822                stats.vm_swap += zx_details.populated_bytes - zx_details.committed_bytes;
3823
3824                if mm_mapping.flags().contains(MappingFlags::SHARED) {
3825                    stats.rss_shared += zx_details.committed_bytes;
3826                } else if mm_mapping.flags().contains(MappingFlags::ANONYMOUS) {
3827                    stats.rss_anonymous += zx_details.committed_bytes;
3828                } else if let MappingName::File(_) = mm_mapping.name() {
3829                    stats.rss_file += zx_details.committed_bytes;
3830                }
3831
3832                if mm_mapping.flags().contains(MappingFlags::LOCKED) {
3833                    stats.vm_lck += zx_details.committed_bytes;
3834                }
3835
3836                if mm_mapping.flags().contains(MappingFlags::ELF_BINARY)
3837                    && mm_mapping.flags().contains(MappingFlags::WRITE)
3838                {
3839                    stats.vm_data += zx_mapping.size;
3840                }
3841
3842                if mm_mapping.flags().contains(MappingFlags::ELF_BINARY)
3843                    && mm_mapping.flags().contains(MappingFlags::EXEC)
3844                {
3845                    stats.vm_exe += zx_mapping.size;
3846                }
3847            }
3848        });
3849
3850        // TODO(https://fxbug.dev/396221597): Placeholder for now. We need kernel support to track
3851        // the committed bytes high water mark.
3852        stats.vm_rss_hwm = STUB_VM_RSS_HWM;
3853        stats
3854    }
3855
3856    pub fn atomic_load_u32_acquire(&self, futex_addr: FutexAddress) -> Result<u32, Errno> {
3857        if let Some(usercopy) = usercopy() {
3858            usercopy.atomic_load_u32_acquire(futex_addr.ptr()).map_err(|_| errno!(EFAULT))
3859        } else {
3860            unreachable!("can only control memory ordering of atomics with usercopy");
3861        }
3862    }
3863
3864    pub fn atomic_load_u32_relaxed(&self, futex_addr: FutexAddress) -> Result<u32, Errno> {
3865        if let Some(usercopy) = usercopy() {
3866            usercopy.atomic_load_u32_relaxed(futex_addr.ptr()).map_err(|_| errno!(EFAULT))
3867        } else {
3868            // SAFETY: `self.state.read().read_memory` only returns `Ok` if all
3869            // bytes were read to.
3870            let buf = unsafe {
3871                read_to_array(|buf| {
3872                    self.state.read().read_memory(futex_addr.into(), buf).map(|bytes_read| {
3873                        debug_assert_eq!(bytes_read.len(), std::mem::size_of::<u32>())
3874                    })
3875                })
3876            }?;
3877            Ok(u32::from_ne_bytes(buf))
3878        }
3879    }
3880
3881    pub fn atomic_store_u32_relaxed(
3882        &self,
3883        futex_addr: FutexAddress,
3884        value: u32,
3885    ) -> Result<(), Errno> {
3886        if let Some(usercopy) = usercopy() {
3887            usercopy.atomic_store_u32_relaxed(futex_addr.ptr(), value).map_err(|_| errno!(EFAULT))
3888        } else {
3889            self.state.read().write_memory(futex_addr.into(), value.as_bytes())?;
3890            Ok(())
3891        }
3892    }
3893
3894    pub fn atomic_compare_exchange_u32_acq_rel(
3895        &self,
3896        futex_addr: FutexAddress,
3897        current: u32,
3898        new: u32,
3899    ) -> CompareExchangeResult<u32> {
3900        let Some(usercopy) = usercopy() else {
3901            unreachable!("Atomic compare/exchange requires usercopy.");
3902        };
3903        CompareExchangeResult::from_usercopy(usercopy.atomic_compare_exchange_u32_acq_rel(
3904            futex_addr.ptr(),
3905            current,
3906            new,
3907        ))
3908    }
3909
3910    pub fn atomic_compare_exchange_weak_u32_acq_rel(
3911        &self,
3912        futex_addr: FutexAddress,
3913        current: u32,
3914        new: u32,
3915    ) -> CompareExchangeResult<u32> {
3916        let Some(usercopy) = usercopy() else {
3917            unreachable!("Atomic compare/exchange requires usercopy.");
3918        };
3919        CompareExchangeResult::from_usercopy(usercopy.atomic_compare_exchange_weak_u32_acq_rel(
3920            futex_addr.ptr(),
3921            current,
3922            new,
3923        ))
3924    }
3925
3926    pub fn get_restricted_vmar_info(&self) -> Option<VmarInfo> {
3927        use zx::HandleBased;
3928        if self.root_vmar.is_invalid_handle() {
3929            return None;
3930        }
3931        Some(VmarInfo { base: RESTRICTED_ASPACE_BASE, len: RESTRICTED_ASPACE_SIZE })
3932    }
3933}
3934
3935/// The result of an atomic compare/exchange operation on user memory.
3936#[derive(Debug, Clone)]
3937pub enum CompareExchangeResult<T> {
3938    /// The current value provided matched the one observed in memory and the new value provided
3939    /// was written.
3940    Success,
3941    /// The provided current value did not match the current value in memory.
3942    Stale { observed: T },
3943    /// There was a general error while accessing the requested memory.
3944    Error(Errno),
3945}
3946
3947impl<T> CompareExchangeResult<T> {
3948    fn from_usercopy(usercopy_res: Result<Result<T, T>, ()>) -> Self {
3949        match usercopy_res {
3950            Ok(Ok(_)) => Self::Success,
3951            Ok(Err(observed)) => Self::Stale { observed },
3952            Err(()) => Self::Error(errno!(EFAULT)),
3953        }
3954    }
3955}
3956
3957impl<T> From<Errno> for CompareExchangeResult<T> {
3958    fn from(e: Errno) -> Self {
3959        Self::Error(e)
3960    }
3961}
3962
3963/// The user-space address at which a mapping should be placed. Used by [`MemoryManager::map`].
3964#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3965pub enum DesiredAddress {
3966    /// Map at any address chosen by the kernel.
3967    Any,
3968    /// The address is a hint. If the address overlaps an existing mapping a different address may
3969    /// be chosen.
3970    Hint(UserAddress),
3971    /// The address is a requirement. If the address overlaps an existing mapping (and cannot
3972    /// overwrite it), mapping fails.
3973    Fixed(UserAddress),
3974    /// The address is a requirement. If the address overlaps an existing mapping (and cannot
3975    /// overwrite it), they should be unmapped.
3976    FixedOverwrite(UserAddress),
3977}
3978
3979/// The user-space address at which a mapping should be placed. Used by [`map_in_vmar`].
3980#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3981enum SelectedAddress {
3982    /// See DesiredAddress::Fixed.
3983    Fixed(UserAddress),
3984    /// See DesiredAddress::FixedOverwrite.
3985    FixedOverwrite(UserAddress),
3986}
3987
3988impl SelectedAddress {
3989    fn addr(&self) -> UserAddress {
3990        match self {
3991            SelectedAddress::Fixed(addr) => *addr,
3992            SelectedAddress::FixedOverwrite(addr) => *addr,
3993        }
3994    }
3995}
3996
3997/// Write one line of the memory map intended for adding to `/proc/self/maps`.
3998fn write_map(
3999    task: &Task,
4000    sink: &mut DynamicFileBuf,
4001    state: &MemoryManagerState,
4002    range: &Range<UserAddress>,
4003    map: &Mapping,
4004) -> Result<(), Errno> {
4005    let line_length = write!(
4006        sink,
4007        "{:08x}-{:08x} {}{}{}{} {:08x} 00:00 {} ",
4008        range.start.ptr(),
4009        range.end.ptr(),
4010        if map.can_read() { 'r' } else { '-' },
4011        if map.can_write() { 'w' } else { '-' },
4012        if map.can_exec() { 'x' } else { '-' },
4013        if map.flags().contains(MappingFlags::SHARED) { 's' } else { 'p' },
4014        match state.get_mapping_backing(map) {
4015            MappingBacking::Memory(backing) => backing.address_to_offset(range.start),
4016            MappingBacking::PrivateAnonymous => 0,
4017        },
4018        if let MappingName::File(file) = &map.name() { file.name.entry.node.ino } else { 0 }
4019    )?;
4020    let fill_to_name = |sink: &mut DynamicFileBuf| {
4021        // The filename goes at >= the 74th column (73rd when zero indexed)
4022        for _ in line_length..73 {
4023            sink.write(b" ");
4024        }
4025    };
4026    match &map.name() {
4027        MappingName::None | MappingName::AioContext(_) => {
4028            if map.flags().contains(MappingFlags::SHARED)
4029                && map.flags().contains(MappingFlags::ANONYMOUS)
4030            {
4031                // See proc(5), "/proc/[pid]/map_files/"
4032                fill_to_name(sink);
4033                sink.write(b"/dev/zero (deleted)");
4034            }
4035        }
4036        MappingName::Stack => {
4037            fill_to_name(sink);
4038            sink.write(b"[stack]");
4039        }
4040        MappingName::Heap => {
4041            fill_to_name(sink);
4042            sink.write(b"[heap]");
4043        }
4044        MappingName::Vdso => {
4045            fill_to_name(sink);
4046            sink.write(b"[vdso]");
4047        }
4048        MappingName::Vvar => {
4049            fill_to_name(sink);
4050            sink.write(b"[vvar]");
4051        }
4052        MappingName::File(file) => {
4053            fill_to_name(sink);
4054            // File names can have newlines that need to be escaped before printing.
4055            // According to https://man7.org/linux/man-pages/man5/proc.5.html the only
4056            // escaping applied to paths is replacing newlines with an octal sequence.
4057            let path = file.name.path(task);
4058            sink.write_iter(
4059                path.iter()
4060                    .flat_map(|b| if *b == b'\n' { b"\\012" } else { std::slice::from_ref(b) })
4061                    .copied(),
4062            );
4063        }
4064        MappingName::Vma(name) => {
4065            fill_to_name(sink);
4066            sink.write(b"[anon:");
4067            sink.write(name.as_bytes());
4068            sink.write(b"]");
4069        }
4070        MappingName::Ashmem(name) => {
4071            fill_to_name(sink);
4072            sink.write(b"/dev/ashmem/");
4073            sink.write(name.as_bytes());
4074        }
4075    }
4076    sink.write(b"\n");
4077    Ok(())
4078}
4079
4080#[derive(Default)]
4081pub struct MemoryStats {
4082    pub vm_size: usize,
4083    pub vm_rss: usize,
4084    pub vm_rss_hwm: usize,
4085    pub rss_anonymous: usize,
4086    pub rss_file: usize,
4087    pub rss_shared: usize,
4088    pub vm_data: usize,
4089    pub vm_stack: usize,
4090    pub vm_exe: usize,
4091    pub vm_swap: usize,
4092    pub vm_lck: usize,
4093}
4094
4095/// Implements `/proc/self/maps`.
4096#[derive(Clone)]
4097pub struct ProcMapsFile {
4098    mm: Weak<MemoryManager>,
4099    task: WeakRef<Task>,
4100}
4101impl ProcMapsFile {
4102    pub fn new(task: TempRef<'_, Task>) -> DynamicFile<Self> {
4103        // "maps" is empty for kthreads, rather than inaccessible.
4104        let mm = task.mm().map_or_else(|_| Weak::default(), |mm| Arc::downgrade(&mm));
4105        let task = task.into();
4106        DynamicFile::new(Self { mm, task })
4107    }
4108}
4109
4110impl SequenceFileSource for ProcMapsFile {
4111    type Cursor = UserAddress;
4112
4113    fn next(
4114        &self,
4115        _current_task: &CurrentTask,
4116        cursor: UserAddress,
4117        sink: &mut DynamicFileBuf,
4118    ) -> Result<Option<UserAddress>, Errno> {
4119        let task = Task::from_weak(&self.task)?;
4120        // /proc/<pid>/maps is empty for kthreads and tasks whose memory manager has changed.
4121        let Some(mm) = self.mm.upgrade() else {
4122            return Ok(None);
4123        };
4124        let state = mm.state.read();
4125        if let Some((range, map)) = state.mappings.find_at_or_after(cursor) {
4126            write_map(&task, sink, &state, range, map)?;
4127            return Ok(Some(range.end));
4128        }
4129        Ok(None)
4130    }
4131}
4132
4133#[derive(Clone)]
4134pub struct ProcSmapsFile {
4135    mm: Weak<MemoryManager>,
4136    task: WeakRef<Task>,
4137}
4138impl ProcSmapsFile {
4139    pub fn new(task: TempRef<'_, Task>) -> DynamicFile<Self> {
4140        // "smaps" is empty for kthreads, rather than inaccessible.
4141        let mm = task.mm().map_or_else(|_| Weak::default(), |mm| Arc::downgrade(&mm));
4142        DynamicFile::new(Self { mm, task: task.into() })
4143    }
4144}
4145
4146impl DynamicFileSource for ProcSmapsFile {
4147    fn generate(&self, current_task: &CurrentTask, sink: &mut DynamicFileBuf) -> Result<(), Errno> {
4148        let page_size_kb = *PAGE_SIZE / 1024;
4149        let task = Task::from_weak(&self.task)?;
4150        // /proc/<pid>/smaps is empty for kthreads and tasks whose memory manager has changed.
4151        let Some(mm) = self.mm.upgrade() else {
4152            return Ok(());
4153        };
4154        let state = mm.state.read();
4155        state.with_zx_mappings(current_task, |zx_mappings| {
4156            let mut zx_memory_info = RangeMap::<UserAddress, usize>::default();
4157            for idx in 0..zx_mappings.len() {
4158                let zx_mapping = zx_mappings[idx];
4159                // RangeMap uses #[must_use] for its default usecase but this drop is trivial.
4160                let _ = zx_memory_info.insert(
4161                    UserAddress::from_ptr(zx_mapping.base)
4162                        ..UserAddress::from_ptr(zx_mapping.base + zx_mapping.size),
4163                    idx,
4164                );
4165            }
4166
4167            for (mm_range, mm_mapping) in state.mappings.iter() {
4168                let mut committed_bytes = 0;
4169
4170                for (zx_range, zx_mapping_idx) in zx_memory_info.range(mm_range.clone()) {
4171                    let intersect_range = zx_range.intersect(mm_range);
4172                    let zx_mapping = zx_mappings[*zx_mapping_idx];
4173                    let zx_details = zx_mapping.details();
4174                    let Some(zx_details) = zx_details.as_mapping() else { continue };
4175                    let zx_committed_bytes = zx_details.committed_bytes;
4176
4177                    // TODO(https://fxbug.dev/419882465): It can happen that the same Zircon mapping
4178                    // is covered by more than one Starnix mapping. In this case we don't have
4179                    // enough granularity to answer the question of how many committed bytes belong
4180                    // to one mapping or another. Make a best-effort approximation by dividing the
4181                    // committed bytes of a Zircon mapping proportionally.
4182                    committed_bytes += if intersect_range != *zx_range {
4183                        let intersection_size =
4184                            intersect_range.end.ptr() - intersect_range.start.ptr();
4185                        let part = intersection_size as f32 / zx_mapping.size as f32;
4186                        let prorated_committed_bytes: f32 = part * zx_committed_bytes as f32;
4187                        prorated_committed_bytes as u64
4188                    } else {
4189                        zx_committed_bytes as u64
4190                    };
4191                    assert_eq!(
4192                        match state.get_mapping_backing(mm_mapping) {
4193                            MappingBacking::Memory(m) => m.memory().get_koid(),
4194                            MappingBacking::PrivateAnonymous =>
4195                                state.private_anonymous.backing.get_koid(),
4196                        },
4197                        zx_details.vmo_koid,
4198                        "MemoryManager and Zircon must agree on which VMO is mapped in this range",
4199                    );
4200                }
4201
4202                write_map(&task, sink, &state, mm_range, mm_mapping)?;
4203
4204                let size_kb = (mm_range.end.ptr() - mm_range.start.ptr()) / 1024;
4205                writeln!(sink, "Size:           {size_kb:>8} kB",)?;
4206                let share_count = match state.get_mapping_backing(mm_mapping) {
4207                    MappingBacking::Memory(backing) => {
4208                        let memory = backing.memory();
4209                        if memory.is_clock() {
4210                            // Clock memory mappings are not shared in a meaningful way.
4211                            1
4212                        } else {
4213                            let memory_info = backing.memory().info()?;
4214                            memory_info.share_count as u64
4215                        }
4216                    }
4217                    MappingBacking::PrivateAnonymous => {
4218                        1 // Private mapping
4219                    }
4220                };
4221
4222                let rss_kb = committed_bytes / 1024;
4223                writeln!(sink, "Rss:            {rss_kb:>8} kB")?;
4224
4225                let pss_kb = if mm_mapping.flags().contains(MappingFlags::SHARED) {
4226                    rss_kb / share_count
4227                } else {
4228                    rss_kb
4229                };
4230                writeln!(sink, "Pss:            {pss_kb:>8} kB")?;
4231
4232                track_stub!(TODO("https://fxbug.dev/322874967"), "smaps dirty pages");
4233                let (shared_dirty_kb, private_dirty_kb) = (0, 0);
4234
4235                let is_shared = share_count > 1;
4236                let shared_clean_kb = if is_shared { rss_kb } else { 0 };
4237                writeln!(sink, "Shared_Clean:   {shared_clean_kb:>8} kB")?;
4238                writeln!(sink, "Shared_Dirty:   {shared_dirty_kb:>8} kB")?;
4239
4240                let private_clean_kb = if is_shared { 0 } else { rss_kb };
4241                writeln!(sink, "Private_Clean:  {private_clean_kb:>8} kB")?;
4242                writeln!(sink, "Private_Dirty:  {private_dirty_kb:>8} kB")?;
4243
4244                let anonymous_kb = if mm_mapping.private_anonymous() { rss_kb } else { 0 };
4245                writeln!(sink, "Anonymous:      {anonymous_kb:>8} kB")?;
4246                writeln!(sink, "KernelPageSize: {page_size_kb:>8} kB")?;
4247                writeln!(sink, "MMUPageSize:    {page_size_kb:>8} kB")?;
4248
4249                let locked_kb =
4250                    if mm_mapping.flags().contains(MappingFlags::LOCKED) { rss_kb } else { 0 };
4251                writeln!(sink, "Locked:         {locked_kb:>8} kB")?;
4252                writeln!(sink, "VmFlags: {}", mm_mapping.vm_flags())?;
4253
4254                track_stub!(TODO("https://fxbug.dev/297444691"), "optional smaps fields");
4255            }
4256            Ok(())
4257        })
4258    }
4259}
4260
4261/// Creates a memory object that can be used in an anonymous mapping for the `mmap` syscall.
4262pub fn create_anonymous_mapping_memory(size: u64) -> Result<Arc<MemoryObject>, Errno> {
4263    // mremap can grow memory regions, so make sure the memory object is resizable.
4264    let mut memory = MemoryObject::from(
4265        zx::Vmo::create_with_opts(zx::VmoOptions::RESIZABLE, size).map_err(|s| match s {
4266            zx::Status::NO_MEMORY => errno!(ENOMEM),
4267            zx::Status::OUT_OF_RANGE => errno!(ENOMEM),
4268            _ => impossible_error(s),
4269        })?,
4270    )
4271    .with_zx_name(b"starnix:memory_manager");
4272
4273    memory.set_zx_name(b"starnix-anon");
4274
4275    // TODO(https://fxbug.dev/42056890): Audit replace_as_executable usage
4276    memory = memory.replace_as_executable(&VMEX_RESOURCE).map_err(impossible_error)?;
4277    Ok(Arc::new(memory))
4278}
4279
4280fn generate_random_offset_for_aslr(arch_width: ArchWidth) -> usize {
4281    // Generate a number with ASLR_RANDOM_BITS.
4282    let randomness = {
4283        let random_bits =
4284            if arch_width.is_arch32() { ASLR_32_RANDOM_BITS } else { ASLR_RANDOM_BITS };
4285        let mask = (1 << random_bits) - 1;
4286        let mut bytes = [0; std::mem::size_of::<usize>()];
4287        zx::cprng_draw(&mut bytes);
4288        usize::from_le_bytes(bytes) & mask
4289    };
4290
4291    // Transform it into a page-aligned offset.
4292    randomness * (*PAGE_SIZE as usize)
4293}
4294
4295#[cfg(test)]
4296mod tests {
4297    use super::*;
4298    use crate::mm::memory_accessor::MemoryAccessorExt;
4299    use crate::mm::syscalls::do_mmap;
4300    use crate::task::syscalls::sys_prctl;
4301    use crate::testing::*;
4302    use crate::vfs::FdNumber;
4303    use assert_matches::assert_matches;
4304    use itertools::assert_equal;
4305    use starnix_sync::{FileOpsCore, LockEqualOrBefore};
4306    use starnix_uapi::user_address::{UserCString, UserRef};
4307    use starnix_uapi::{
4308        MAP_ANONYMOUS, MAP_FIXED, MAP_GROWSDOWN, MAP_PRIVATE, MAP_SHARED, PR_SET_VMA,
4309        PR_SET_VMA_ANON_NAME, PROT_NONE, PROT_READ,
4310    };
4311    use std::ffi::CString;
4312    use zerocopy::{FromBytes, Immutable, KnownLayout};
4313
4314    #[::fuchsia::test]
4315    fn test_mapping_flags() {
4316        let options = MappingOptions::ANONYMOUS;
4317        let access_flags = ProtectionFlags::READ | ProtectionFlags::WRITE;
4318        let mapping_flags = MappingFlags::from_access_flags_and_options(access_flags, options);
4319        assert_eq!(mapping_flags.access_flags(), access_flags);
4320        assert_eq!(mapping_flags.options(), options);
4321
4322        let new_access_flags = ProtectionFlags::READ | ProtectionFlags::EXEC;
4323        let adusted_mapping_flags = mapping_flags.with_access_flags(new_access_flags);
4324        assert_eq!(adusted_mapping_flags.access_flags(), new_access_flags);
4325        assert_eq!(adusted_mapping_flags.options(), options);
4326    }
4327
4328    #[::fuchsia::test]
4329    async fn test_brk() {
4330        spawn_kernel_and_run(async |locked, current_task| {
4331            let mm = current_task.mm().unwrap();
4332
4333            // Look up the given addr in the mappings table.
4334            let get_range = |addr: UserAddress| {
4335                let state = mm.state.read();
4336                state.mappings.get(addr).map(|(range, mapping)| (range.clone(), mapping.clone()))
4337            };
4338
4339            // Initialize the program break.
4340            let base_addr = mm
4341                .set_brk(locked, &current_task, UserAddress::default())
4342                .expect("failed to set initial program break");
4343            assert!(base_addr > UserAddress::default());
4344
4345            // Page containing the program break address should not be mapped.
4346            assert_eq!(get_range(base_addr), None);
4347
4348            // Growing it by a single byte results in that page becoming mapped.
4349            let addr0 = mm
4350                .set_brk(locked, &current_task, (base_addr + 1u64).unwrap())
4351                .expect("failed to grow brk");
4352            assert!(addr0 > base_addr);
4353            let (range0, _) = get_range(base_addr).expect("base_addr should be mapped");
4354            assert_eq!(range0.start, base_addr);
4355            assert_eq!(range0.end, (base_addr + *PAGE_SIZE).unwrap());
4356
4357            // Grow the program break by another byte, which won't be enough to cause additional pages to be mapped.
4358            let addr1 = mm
4359                .set_brk(locked, &current_task, (base_addr + 2u64).unwrap())
4360                .expect("failed to grow brk");
4361            assert_eq!(addr1, (base_addr + 2u64).unwrap());
4362            let (range1, _) = get_range(base_addr).expect("base_addr should be mapped");
4363            assert_eq!(range1.start, range0.start);
4364            assert_eq!(range1.end, range0.end);
4365
4366            // Grow the program break by a non-trival amount and observe the larger mapping.
4367            let addr2 = mm
4368                .set_brk(locked, &current_task, (base_addr + 24893u64).unwrap())
4369                .expect("failed to grow brk");
4370            assert_eq!(addr2, (base_addr + 24893u64).unwrap());
4371            let (range2, _) = get_range(base_addr).expect("base_addr should be mapped");
4372            assert_eq!(range2.start, base_addr);
4373            assert_eq!(range2.end, addr2.round_up(*PAGE_SIZE).unwrap());
4374
4375            // Shrink the program break and observe the smaller mapping.
4376            let addr3 = mm
4377                .set_brk(locked, &current_task, (base_addr + 14832u64).unwrap())
4378                .expect("failed to shrink brk");
4379            assert_eq!(addr3, (base_addr + 14832u64).unwrap());
4380            let (range3, _) = get_range(base_addr).expect("base_addr should be mapped");
4381            assert_eq!(range3.start, base_addr);
4382            assert_eq!(range3.end, addr3.round_up(*PAGE_SIZE).unwrap());
4383
4384            // Shrink the program break close to zero and observe the smaller mapping.
4385            let addr4 = mm
4386                .set_brk(locked, &current_task, (base_addr + 3u64).unwrap())
4387                .expect("failed to drastically shrink brk");
4388            assert_eq!(addr4, (base_addr + 3u64).unwrap());
4389            let (range4, _) = get_range(base_addr).expect("base_addr should be mapped");
4390            assert_eq!(range4.start, base_addr);
4391            assert_eq!(range4.end, addr4.round_up(*PAGE_SIZE).unwrap());
4392
4393            // Shrink the program break to zero and observe that the mapping is entirely gone.
4394            let addr5 = mm
4395                .set_brk(locked, &current_task, base_addr)
4396                .expect("failed to drastically shrink brk to zero");
4397            assert_eq!(addr5, base_addr);
4398            assert_eq!(get_range(base_addr), None);
4399        })
4400        .await;
4401    }
4402
4403    #[::fuchsia::test]
4404    async fn test_mm_exec() {
4405        spawn_kernel_and_run(async |locked, current_task| {
4406            let mm = current_task.mm().unwrap();
4407
4408            let has = |addr: UserAddress| -> bool {
4409                let state = mm.state.read();
4410                state.mappings.get(addr).is_some()
4411            };
4412
4413            let brk_addr = mm
4414                .set_brk(locked, &current_task, UserAddress::default())
4415                .expect("failed to set initial program break");
4416            assert!(brk_addr > UserAddress::default());
4417
4418            // Allocate a single page of BRK space, so that the break base address is mapped.
4419            let _ = mm
4420                .set_brk(locked, &current_task, (brk_addr + 1u64).unwrap())
4421                .expect("failed to grow program break");
4422            assert!(has(brk_addr));
4423
4424            let mapped_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
4425            assert!(mapped_addr > UserAddress::default());
4426            assert!(has(mapped_addr));
4427
4428            let node = current_task.lookup_path_from_root(locked, "/".into()).unwrap();
4429            let new_mm = mm.exec(node, ArchWidth::Arch64).expect("failed to exec memory manager");
4430            current_task.mm.update(Some(new_mm));
4431
4432            assert!(!has(brk_addr));
4433            assert!(!has(mapped_addr));
4434
4435            // Check that the old addresses are actually available for mapping.
4436            let brk_addr2 = map_memory(locked, &current_task, brk_addr, *PAGE_SIZE);
4437            assert_eq!(brk_addr, brk_addr2);
4438            let mapped_addr2 = map_memory(locked, &current_task, mapped_addr, *PAGE_SIZE);
4439            assert_eq!(mapped_addr, mapped_addr2);
4440        })
4441        .await;
4442    }
4443
4444    #[::fuchsia::test]
4445    async fn test_get_contiguous_mappings_at() {
4446        spawn_kernel_and_run(async |locked, current_task| {
4447            let mm = current_task.mm().unwrap();
4448
4449            // Create four one-page mappings with a hole between the third one and the fourth one.
4450            let page_size = *PAGE_SIZE as usize;
4451            let addr_a = (mm.base_addr + 10 * page_size).unwrap();
4452            let addr_b = (mm.base_addr + 11 * page_size).unwrap();
4453            let addr_c = (mm.base_addr + 12 * page_size).unwrap();
4454            let addr_d = (mm.base_addr + 14 * page_size).unwrap();
4455            assert_eq!(map_memory(locked, &current_task, addr_a, *PAGE_SIZE), addr_a);
4456            assert_eq!(map_memory(locked, &current_task, addr_b, *PAGE_SIZE), addr_b);
4457            assert_eq!(map_memory(locked, &current_task, addr_c, *PAGE_SIZE), addr_c);
4458            assert_eq!(map_memory(locked, &current_task, addr_d, *PAGE_SIZE), addr_d);
4459
4460            {
4461                let mm_state = mm.state.read();
4462                // Verify that requesting an unmapped address returns an empty iterator.
4463                assert_equal(
4464                    mm_state.get_contiguous_mappings_at((addr_a - 100u64).unwrap(), 50).unwrap(),
4465                    vec![],
4466                );
4467                assert_equal(
4468                    mm_state.get_contiguous_mappings_at((addr_a - 100u64).unwrap(), 200).unwrap(),
4469                    vec![],
4470                );
4471
4472                // Verify that requesting zero bytes returns an empty iterator.
4473                assert_equal(mm_state.get_contiguous_mappings_at(addr_a, 0).unwrap(), vec![]);
4474
4475                // Verify errors.
4476                assert_eq!(
4477                    mm_state
4478                        .get_contiguous_mappings_at(UserAddress::from(100), usize::MAX)
4479                        .err()
4480                        .unwrap(),
4481                    errno!(EFAULT)
4482                );
4483                assert_eq!(
4484                    mm_state
4485                        .get_contiguous_mappings_at((mm_state.max_address() + 1u64).unwrap(), 0)
4486                        .err()
4487                        .unwrap(),
4488                    errno!(EFAULT)
4489                );
4490            }
4491
4492            assert_eq!(mm.get_mapping_count(), 2);
4493            let mm_state = mm.state.read();
4494            let (map_a, map_b) = {
4495                let mut it = mm_state.mappings.iter();
4496                (it.next().unwrap().1, it.next().unwrap().1)
4497            };
4498
4499            assert_equal(
4500                mm_state.get_contiguous_mappings_at(addr_a, page_size).unwrap(),
4501                vec![(map_a, page_size)],
4502            );
4503
4504            assert_equal(
4505                mm_state.get_contiguous_mappings_at(addr_a, page_size / 2).unwrap(),
4506                vec![(map_a, page_size / 2)],
4507            );
4508
4509            assert_equal(
4510                mm_state.get_contiguous_mappings_at(addr_a, page_size * 3).unwrap(),
4511                vec![(map_a, page_size * 3)],
4512            );
4513
4514            assert_equal(
4515                mm_state.get_contiguous_mappings_at(addr_b, page_size).unwrap(),
4516                vec![(map_a, page_size)],
4517            );
4518
4519            assert_equal(
4520                mm_state.get_contiguous_mappings_at(addr_d, page_size).unwrap(),
4521                vec![(map_b, page_size)],
4522            );
4523
4524            // Verify that results stop if there is a hole.
4525            assert_equal(
4526                mm_state
4527                    .get_contiguous_mappings_at((addr_a + page_size / 2).unwrap(), page_size * 10)
4528                    .unwrap(),
4529                vec![(map_a, page_size * 2 + page_size / 2)],
4530            );
4531
4532            // Verify that results stop at the last mapped page.
4533            assert_equal(
4534                mm_state.get_contiguous_mappings_at(addr_d, page_size * 10).unwrap(),
4535                vec![(map_b, page_size)],
4536            );
4537        })
4538        .await;
4539    }
4540
4541    #[::fuchsia::test]
4542    async fn test_read_write_crossing_mappings() {
4543        spawn_kernel_and_run(async |locked, current_task| {
4544            let mm = current_task.mm().unwrap();
4545            let ma = current_task.deref();
4546
4547            // Map two contiguous pages at fixed addresses, but backed by distinct mappings.
4548            let page_size = *PAGE_SIZE;
4549            let addr = (mm.base_addr + 10 * page_size).unwrap();
4550            assert_eq!(map_memory(locked, &current_task, addr, page_size), addr);
4551            assert_eq!(
4552                map_memory(locked, &current_task, (addr + page_size).unwrap(), page_size),
4553                (addr + page_size).unwrap()
4554            );
4555            // Mappings get merged since they are baked by the same memory object
4556            assert_eq!(mm.get_mapping_count(), 1);
4557
4558            // Write a pattern crossing our two mappings.
4559            let test_addr = (addr + page_size / 2).unwrap();
4560            let data: Vec<u8> = (0..page_size).map(|i| (i % 256) as u8).collect();
4561            ma.write_memory(test_addr, &data).expect("failed to write test data");
4562
4563            // Read it back.
4564            let data_readback =
4565                ma.read_memory_to_vec(test_addr, data.len()).expect("failed to read test data");
4566            assert_eq!(&data, &data_readback);
4567        })
4568        .await;
4569    }
4570
4571    #[::fuchsia::test]
4572    async fn test_read_write_errors() {
4573        spawn_kernel_and_run(async |locked, current_task| {
4574            let ma = current_task.deref();
4575
4576            let page_size = *PAGE_SIZE;
4577            let addr = map_memory(locked, &current_task, UserAddress::default(), page_size);
4578            let buf = vec![0u8; page_size as usize];
4579
4580            // Verify that accessing data that is only partially mapped is an error.
4581            let partial_addr_before = (addr - page_size / 2).unwrap();
4582            assert_eq!(ma.write_memory(partial_addr_before, &buf), error!(EFAULT));
4583            assert_eq!(ma.read_memory_to_vec(partial_addr_before, buf.len()), error!(EFAULT));
4584            let partial_addr_after = (addr + page_size / 2).unwrap();
4585            assert_eq!(ma.write_memory(partial_addr_after, &buf), error!(EFAULT));
4586            assert_eq!(ma.read_memory_to_vec(partial_addr_after, buf.len()), error!(EFAULT));
4587
4588            // Verify that accessing unmapped memory is an error.
4589            let unmapped_addr = (addr - 10 * page_size).unwrap();
4590            assert_eq!(ma.write_memory(unmapped_addr, &buf), error!(EFAULT));
4591            assert_eq!(ma.read_memory_to_vec(unmapped_addr, buf.len()), error!(EFAULT));
4592
4593            // However, accessing zero bytes in unmapped memory is not an error.
4594            ma.write_memory(unmapped_addr, &[]).expect("failed to write no data");
4595            ma.read_memory_to_vec(unmapped_addr, 0).expect("failed to read no data");
4596        })
4597        .await;
4598    }
4599
4600    #[::fuchsia::test]
4601    async fn test_read_c_string_to_vec_large() {
4602        spawn_kernel_and_run(async |locked, current_task| {
4603            let mm = current_task.mm().unwrap();
4604            let ma = current_task.deref();
4605
4606            let page_size = *PAGE_SIZE;
4607            let max_size = 4 * page_size as usize;
4608            let addr = (mm.base_addr + 10 * page_size).unwrap();
4609
4610            assert_eq!(map_memory(locked, &current_task, addr, max_size as u64), addr);
4611
4612            let mut random_data = vec![0; max_size];
4613            zx::cprng_draw(&mut random_data);
4614            // Remove all NUL bytes.
4615            for i in 0..random_data.len() {
4616                if random_data[i] == 0 {
4617                    random_data[i] = 1;
4618                }
4619            }
4620            random_data[max_size - 1] = 0;
4621
4622            ma.write_memory(addr, &random_data).expect("failed to write test string");
4623            // We should read the same value minus the last byte (NUL char).
4624            assert_eq!(
4625                ma.read_c_string_to_vec(UserCString::new(current_task, addr), max_size).unwrap(),
4626                random_data[..max_size - 1]
4627            );
4628        })
4629        .await;
4630    }
4631
4632    #[::fuchsia::test]
4633    async fn test_read_c_string_to_vec() {
4634        spawn_kernel_and_run(async |locked, current_task| {
4635            let mm = current_task.mm().unwrap();
4636            let ma = current_task.deref();
4637
4638            let page_size = *PAGE_SIZE;
4639            let max_size = 2 * page_size as usize;
4640            let addr = (mm.base_addr + 10 * page_size).unwrap();
4641
4642            // Map a page at a fixed address and write an unterminated string at the end of it.
4643            assert_eq!(map_memory(locked, &current_task, addr, page_size), addr);
4644            let test_str = b"foo!";
4645            let test_addr =
4646                addr.checked_add(page_size as usize).unwrap().checked_sub(test_str.len()).unwrap();
4647            ma.write_memory(test_addr, test_str).expect("failed to write test string");
4648
4649            // Expect error if the string is not terminated.
4650            assert_eq!(
4651                ma.read_c_string_to_vec(UserCString::new(current_task, test_addr), max_size),
4652                error!(ENAMETOOLONG)
4653            );
4654
4655            // Expect success if the string is terminated.
4656            ma.write_memory((addr + (page_size - 1)).unwrap(), b"\0").expect("failed to write nul");
4657            assert_eq!(
4658                ma.read_c_string_to_vec(UserCString::new(current_task, test_addr), max_size)
4659                    .unwrap(),
4660                "foo"
4661            );
4662
4663            // Expect success if the string spans over two mappings.
4664            assert_eq!(
4665                map_memory(locked, &current_task, (addr + page_size).unwrap(), page_size),
4666                (addr + page_size).unwrap()
4667            );
4668            // TODO: Adjacent private anonymous mappings are collapsed. To test this case this test needs to
4669            // provide a backing for the second mapping.
4670            // assert_eq!(mm.get_mapping_count(), 2);
4671            ma.write_memory((addr + (page_size - 1)).unwrap(), b"bar\0")
4672                .expect("failed to write extra chars");
4673            assert_eq!(
4674                ma.read_c_string_to_vec(UserCString::new(current_task, test_addr), max_size)
4675                    .unwrap(),
4676                "foobar",
4677            );
4678
4679            // Expect error if the string exceeds max limit
4680            assert_eq!(
4681                ma.read_c_string_to_vec(UserCString::new(current_task, test_addr), 2),
4682                error!(ENAMETOOLONG)
4683            );
4684
4685            // Expect error if the address is invalid.
4686            assert_eq!(
4687                ma.read_c_string_to_vec(UserCString::null(current_task), max_size),
4688                error!(EFAULT)
4689            );
4690        })
4691        .await;
4692    }
4693
4694    #[::fuchsia::test]
4695    async fn can_read_argv_like_regions() {
4696        spawn_kernel_and_run(async |locked, current_task| {
4697            let ma = current_task.deref();
4698
4699            // Map a page.
4700            let page_size = *PAGE_SIZE;
4701            let addr = map_memory_anywhere(locked, &current_task, page_size);
4702            assert!(!addr.is_null());
4703
4704            // Write an unterminated string.
4705            let mut payload = "first".as_bytes().to_vec();
4706            let mut expected_parses = vec![];
4707            ma.write_memory(addr, &payload).unwrap();
4708
4709            // Expect success if the string is terminated.
4710            expected_parses.push(payload.clone());
4711            payload.push(0);
4712            ma.write_memory(addr, &payload).unwrap();
4713            assert_eq!(
4714                ma.read_nul_delimited_c_string_list(addr, payload.len()).unwrap(),
4715                expected_parses,
4716            );
4717
4718            // Make sure we can parse multiple strings from the same region.
4719            let second = b"second";
4720            payload.extend(second);
4721            payload.push(0);
4722            expected_parses.push(second.to_vec());
4723
4724            let third = b"third";
4725            payload.extend(third);
4726            payload.push(0);
4727            expected_parses.push(third.to_vec());
4728
4729            ma.write_memory(addr, &payload).unwrap();
4730            assert_eq!(
4731                ma.read_nul_delimited_c_string_list(addr, payload.len()).unwrap(),
4732                expected_parses,
4733            );
4734        })
4735        .await;
4736    }
4737
4738    #[::fuchsia::test]
4739    async fn truncate_argv_like_regions() {
4740        spawn_kernel_and_run(async |locked, current_task| {
4741            let ma = current_task.deref();
4742
4743            // Map a page.
4744            let page_size = *PAGE_SIZE;
4745            let addr = map_memory_anywhere(locked, &current_task, page_size);
4746            assert!(!addr.is_null());
4747
4748            let payload = b"first\0second\0third\0";
4749            ma.write_memory(addr, payload).unwrap();
4750            assert_eq!(
4751                ma.read_nul_delimited_c_string_list(addr, payload.len() - 3).unwrap(),
4752                vec![b"first".to_vec(), b"second".to_vec(), b"thi".to_vec()],
4753                "Skipping last three bytes of payload should skip last two bytes of 3rd string"
4754            );
4755        })
4756        .await;
4757    }
4758
4759    #[::fuchsia::test]
4760    async fn test_read_c_string() {
4761        spawn_kernel_and_run(async |locked, current_task| {
4762            let mm = current_task.mm().unwrap();
4763            let ma = current_task.deref();
4764
4765            let page_size = *PAGE_SIZE;
4766            let buf_cap = 2 * page_size as usize;
4767            let mut buf = Vec::with_capacity(buf_cap);
4768            // We can't just use `spare_capacity_mut` because `Vec::with_capacity`
4769            // returns a `Vec` with _at least_ the requested capacity.
4770            let buf = &mut buf.spare_capacity_mut()[..buf_cap];
4771            let addr = (mm.base_addr + 10 * page_size).unwrap();
4772
4773            // Map a page at a fixed address and write an unterminated string at the end of it..
4774            assert_eq!(map_memory(locked, &current_task, addr, page_size), addr);
4775            let test_str = b"foo!";
4776            let test_addr = (addr + (page_size - test_str.len() as u64)).unwrap();
4777            ma.write_memory(test_addr, test_str).expect("failed to write test string");
4778
4779            // Expect error if the string is not terminated.
4780            assert_eq!(
4781                ma.read_c_string(UserCString::new(current_task, test_addr), buf),
4782                error!(ENAMETOOLONG)
4783            );
4784
4785            // Expect success if the string is terminated.
4786            ma.write_memory((addr + (page_size - 1)).unwrap(), b"\0").expect("failed to write nul");
4787            assert_eq!(
4788                ma.read_c_string(UserCString::new(current_task, test_addr), buf).unwrap(),
4789                "foo"
4790            );
4791
4792            // Expect success if the string spans over two mappings.
4793            assert_eq!(
4794                map_memory(locked, &current_task, (addr + page_size).unwrap(), page_size),
4795                (addr + page_size).unwrap()
4796            );
4797            // TODO: To be multiple mappings we need to provide a file backing for the next page or the
4798            // mappings will be collapsed.
4799            //assert_eq!(mm.get_mapping_count(), 2);
4800            ma.write_memory((addr + (page_size - 1)).unwrap(), b"bar\0")
4801                .expect("failed to write extra chars");
4802            assert_eq!(
4803                ma.read_c_string(UserCString::new(current_task, test_addr), buf).unwrap(),
4804                "foobar"
4805            );
4806
4807            // Expect error if the string does not fit in the provided buffer.
4808            assert_eq!(
4809                ma.read_c_string(
4810                    UserCString::new(current_task, test_addr),
4811                    &mut [MaybeUninit::uninit(); 2]
4812                ),
4813                error!(ENAMETOOLONG)
4814            );
4815
4816            // Expect error if the address is invalid.
4817            assert_eq!(ma.read_c_string(UserCString::null(current_task), buf), error!(EFAULT));
4818        })
4819        .await;
4820    }
4821
4822    #[::fuchsia::test]
4823    async fn test_find_next_unused_range() {
4824        spawn_kernel_and_run(async |locked, current_task| {
4825            let mm = current_task.mm().unwrap();
4826
4827            let mmap_top = mm.state.read().find_next_unused_range(0).unwrap().ptr();
4828            let page_size = *PAGE_SIZE as usize;
4829            assert!(mmap_top <= RESTRICTED_ASPACE_HIGHEST_ADDRESS);
4830
4831            // No mappings - top address minus requested size is available
4832            assert_eq!(
4833                mm.state.read().find_next_unused_range(page_size).unwrap(),
4834                UserAddress::from_ptr(mmap_top - page_size)
4835            );
4836
4837            // Fill it.
4838            let addr = UserAddress::from_ptr(mmap_top - page_size);
4839            assert_eq!(map_memory(locked, &current_task, addr, *PAGE_SIZE), addr);
4840
4841            // The next available range is right before the new mapping.
4842            assert_eq!(
4843                mm.state.read().find_next_unused_range(page_size).unwrap(),
4844                UserAddress::from_ptr(addr.ptr() - page_size)
4845            );
4846
4847            // Allocate an extra page before a one-page gap.
4848            let addr2 = UserAddress::from_ptr(addr.ptr() - 2 * page_size);
4849            assert_eq!(map_memory(locked, &current_task, addr2, *PAGE_SIZE), addr2);
4850
4851            // Searching for one-page range still gives the same result
4852            assert_eq!(
4853                mm.state.read().find_next_unused_range(page_size).unwrap(),
4854                UserAddress::from_ptr(addr.ptr() - page_size)
4855            );
4856
4857            // Searching for a bigger range results in the area before the second mapping
4858            assert_eq!(
4859                mm.state.read().find_next_unused_range(2 * page_size).unwrap(),
4860                UserAddress::from_ptr(addr2.ptr() - 2 * page_size)
4861            );
4862
4863            // Searching for more memory than available should fail.
4864            assert_eq!(mm.state.read().find_next_unused_range(mmap_top), None);
4865        })
4866        .await;
4867    }
4868
4869    #[::fuchsia::test]
4870    async fn test_count_placements() {
4871        spawn_kernel_and_run(async |locked, current_task| {
4872            let mm = current_task.mm().unwrap();
4873
4874            // ten-page range
4875            let page_size = *PAGE_SIZE as usize;
4876            let subrange_ten = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
4877                ..UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 10 * page_size);
4878
4879            assert_eq!(
4880                mm.state.read().count_possible_placements(11 * page_size, &subrange_ten),
4881                Some(0)
4882            );
4883            assert_eq!(
4884                mm.state.read().count_possible_placements(10 * page_size, &subrange_ten),
4885                Some(1)
4886            );
4887            assert_eq!(
4888                mm.state.read().count_possible_placements(9 * page_size, &subrange_ten),
4889                Some(2)
4890            );
4891            assert_eq!(
4892                mm.state.read().count_possible_placements(page_size, &subrange_ten),
4893                Some(10)
4894            );
4895
4896            // map 6th page
4897            let addr = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 5 * page_size);
4898            assert_eq!(map_memory(locked, &current_task, addr, *PAGE_SIZE), addr);
4899
4900            assert_eq!(
4901                mm.state.read().count_possible_placements(10 * page_size, &subrange_ten),
4902                Some(0)
4903            );
4904            assert_eq!(
4905                mm.state.read().count_possible_placements(5 * page_size, &subrange_ten),
4906                Some(1)
4907            );
4908            assert_eq!(
4909                mm.state.read().count_possible_placements(4 * page_size, &subrange_ten),
4910                Some(3)
4911            );
4912            assert_eq!(
4913                mm.state.read().count_possible_placements(page_size, &subrange_ten),
4914                Some(9)
4915            );
4916        })
4917        .await;
4918    }
4919
4920    #[::fuchsia::test]
4921    async fn test_pick_placement() {
4922        spawn_kernel_and_run(async |locked, current_task| {
4923            let mm = current_task.mm().unwrap();
4924
4925            let page_size = *PAGE_SIZE as usize;
4926            let subrange_ten = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
4927                ..UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 10 * page_size);
4928
4929            let addr = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 5 * page_size);
4930            assert_eq!(map_memory(locked, &current_task, addr, *PAGE_SIZE), addr);
4931            assert_eq!(
4932                mm.state.read().count_possible_placements(4 * page_size, &subrange_ten),
4933                Some(3)
4934            );
4935
4936            assert_eq!(
4937                mm.state.read().pick_placement(4 * page_size, 0, &subrange_ten),
4938                Some(UserAddress::from_ptr(RESTRICTED_ASPACE_BASE))
4939            );
4940            assert_eq!(
4941                mm.state.read().pick_placement(4 * page_size, 1, &subrange_ten),
4942                Some(UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + page_size))
4943            );
4944            assert_eq!(
4945                mm.state.read().pick_placement(4 * page_size, 2, &subrange_ten),
4946                Some(UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 6 * page_size))
4947            );
4948        })
4949        .await;
4950    }
4951
4952    #[::fuchsia::test]
4953    async fn test_find_random_unused_range() {
4954        spawn_kernel_and_run(async |locked, current_task| {
4955            let mm = current_task.mm().unwrap();
4956
4957            // ten-page range
4958            let page_size = *PAGE_SIZE as usize;
4959            let subrange_ten = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
4960                ..UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 10 * page_size);
4961
4962            for _ in 0..10 {
4963                let addr = mm.state.read().find_random_unused_range(page_size, &subrange_ten);
4964                assert!(addr.is_some());
4965                assert_eq!(
4966                    map_memory(locked, &current_task, addr.unwrap(), *PAGE_SIZE),
4967                    addr.unwrap()
4968                );
4969            }
4970            assert_eq!(mm.state.read().find_random_unused_range(page_size, &subrange_ten), None);
4971        })
4972        .await;
4973    }
4974
4975    #[::fuchsia::test]
4976    async fn test_grows_down_near_aspace_base() {
4977        spawn_kernel_and_run(async |locked, current_task| {
4978            let mm = current_task.mm().unwrap();
4979
4980            let page_count = 10;
4981
4982            let page_size = *PAGE_SIZE as usize;
4983            let addr =
4984                (UserAddress::from_ptr(RESTRICTED_ASPACE_BASE) + page_count * page_size).unwrap();
4985            assert_eq!(
4986                map_memory_with_flags(
4987                    locked,
4988                    &current_task,
4989                    addr,
4990                    page_size as u64,
4991                    MAP_ANONYMOUS | MAP_PRIVATE | MAP_GROWSDOWN
4992                ),
4993                addr
4994            );
4995
4996            let subrange_ten = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)..addr;
4997            assert_eq!(mm.state.read().find_random_unused_range(page_size, &subrange_ten), None);
4998        })
4999        .await;
5000    }
5001
5002    #[::fuchsia::test]
5003    async fn test_unmap_returned_mappings() {
5004        spawn_kernel_and_run(async |locked, current_task| {
5005            let mm = current_task.mm().unwrap();
5006
5007            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
5008
5009            let mut released_mappings = ReleasedMappings::default();
5010            let mut mm_state = mm.state.write();
5011            let unmap_result =
5012                mm_state.unmap(&mm, addr, *PAGE_SIZE as usize, &mut released_mappings);
5013            assert!(unmap_result.is_ok());
5014            assert_eq!(released_mappings.len(), 1);
5015            released_mappings.finalize(mm_state);
5016        })
5017        .await;
5018    }
5019
5020    #[::fuchsia::test]
5021    async fn test_unmap_returns_multiple_mappings() {
5022        spawn_kernel_and_run(async |locked, current_task| {
5023            let mm = current_task.mm().unwrap();
5024
5025            let addr = mm.state.read().find_next_unused_range(3 * *PAGE_SIZE as usize).unwrap();
5026            let addr = map_memory(locked, &current_task, addr, *PAGE_SIZE);
5027            let _ = map_memory(locked, &current_task, (addr + 2 * *PAGE_SIZE).unwrap(), *PAGE_SIZE);
5028
5029            let mut released_mappings = ReleasedMappings::default();
5030            let mut mm_state = mm.state.write();
5031            let unmap_result =
5032                mm_state.unmap(&mm, addr, (*PAGE_SIZE * 3) as usize, &mut released_mappings);
5033            assert!(unmap_result.is_ok());
5034            assert_eq!(released_mappings.len(), 2);
5035            released_mappings.finalize(mm_state);
5036        })
5037        .await;
5038    }
5039
5040    /// Maps two pages in separate mappings next to each other, then unmaps the first page.
5041    /// The second page should not be modified.
5042    #[::fuchsia::test]
5043    async fn test_map_two_unmap_one() {
5044        spawn_kernel_and_run(async |locked, current_task| {
5045            let mm = current_task.mm().unwrap();
5046
5047            // reserve memory for both pages
5048            let addr_reserve =
5049                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
5050            let addr1 = do_mmap(
5051                locked,
5052                &current_task,
5053                addr_reserve,
5054                *PAGE_SIZE as usize,
5055                PROT_READ, // Map read-only to avoid merging of the two mappings
5056                MAP_ANONYMOUS | MAP_SHARED | MAP_FIXED,
5057                FdNumber::from_raw(-1),
5058                0,
5059            )
5060            .expect("failed to mmap");
5061            let addr2 = map_memory_with_flags(
5062                locked,
5063                &current_task,
5064                (addr_reserve + *PAGE_SIZE).unwrap(),
5065                *PAGE_SIZE,
5066                MAP_ANONYMOUS | MAP_SHARED | MAP_FIXED,
5067            );
5068            let state = mm.state.read();
5069            let (range1, _) = state.mappings.get(addr1).expect("mapping");
5070            assert_eq!(range1.start, addr1);
5071            assert_eq!(range1.end, (addr1 + *PAGE_SIZE).unwrap());
5072            let (range2, mapping2) = state.mappings.get(addr2).expect("mapping");
5073            assert_eq!(range2.start, addr2);
5074            assert_eq!(range2.end, (addr2 + *PAGE_SIZE).unwrap());
5075            let original_memory2 = {
5076                match state.get_mapping_backing(mapping2) {
5077                    MappingBacking::Memory(backing) => {
5078                        assert_eq!(backing.memory().get_size(), *PAGE_SIZE);
5079                        backing.memory().clone()
5080                    }
5081                    MappingBacking::PrivateAnonymous => {
5082                        panic!("Unexpected private anonymous mapping")
5083                    }
5084                }
5085            };
5086            std::mem::drop(state);
5087
5088            assert_eq!(mm.unmap(addr1, *PAGE_SIZE as usize), Ok(()));
5089
5090            let state = mm.state.read();
5091
5092            // The first page should be unmapped.
5093            assert!(state.mappings.get(addr1).is_none());
5094
5095            // The second page should remain unchanged.
5096            let (range2, mapping2) = state.mappings.get(addr2).expect("second page");
5097            assert_eq!(range2.start, addr2);
5098            assert_eq!(range2.end, (addr2 + *PAGE_SIZE).unwrap());
5099            match state.get_mapping_backing(mapping2) {
5100                MappingBacking::Memory(backing) => {
5101                    assert_eq!(backing.memory().get_size(), *PAGE_SIZE);
5102                    assert_eq!(original_memory2.get_koid(), backing.memory().get_koid());
5103                }
5104                MappingBacking::PrivateAnonymous => panic!("Unexpected private anonymous mapping"),
5105            }
5106        })
5107        .await;
5108    }
5109
5110    #[::fuchsia::test]
5111    async fn test_read_write_objects() {
5112        spawn_kernel_and_run(async |locked, current_task| {
5113            let ma = current_task.deref();
5114            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5115            let items_ref = UserRef::<i32>::new(addr);
5116
5117            let items_written = vec![0, 2, 3, 7, 1];
5118            ma.write_objects(items_ref, &items_written).expect("Failed to write object array.");
5119
5120            let items_read = ma
5121                .read_objects_to_vec(items_ref, items_written.len())
5122                .expect("Failed to read object array.");
5123
5124            assert_eq!(items_written, items_read);
5125        })
5126        .await;
5127    }
5128
5129    #[::fuchsia::test]
5130    async fn test_read_write_objects_null() {
5131        spawn_kernel_and_run(async |_, current_task| {
5132            let ma = current_task.deref();
5133            let items_ref = UserRef::<i32>::new(UserAddress::default());
5134
5135            let items_written = vec![];
5136            ma.write_objects(items_ref, &items_written)
5137                .expect("Failed to write empty object array.");
5138
5139            let items_read = ma
5140                .read_objects_to_vec(items_ref, items_written.len())
5141                .expect("Failed to read empty object array.");
5142
5143            assert_eq!(items_written, items_read);
5144        })
5145        .await;
5146    }
5147
5148    #[::fuchsia::test]
5149    async fn test_read_object_partial() {
5150        #[derive(Debug, Default, Copy, Clone, KnownLayout, FromBytes, Immutable, PartialEq)]
5151        struct Items {
5152            val: [i32; 4],
5153        }
5154
5155        spawn_kernel_and_run(async |locked, current_task| {
5156            let ma = current_task.deref();
5157            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5158            let items_array_ref = UserRef::<i32>::new(addr);
5159
5160            // Populate some values.
5161            let items_written = vec![75, 23, 51, 98];
5162            ma.write_objects(items_array_ref, &items_written)
5163                .expect("Failed to write object array.");
5164
5165            // Full read of all 4 values.
5166            let items_ref = UserRef::<Items>::new(addr);
5167            let items_read = ma
5168                .read_object_partial(items_ref, std::mem::size_of::<Items>())
5169                .expect("Failed to read object");
5170            assert_eq!(items_written, items_read.val);
5171
5172            // Partial read of the first two.
5173            let items_read = ma.read_object_partial(items_ref, 8).expect("Failed to read object");
5174            assert_eq!(vec![75, 23, 0, 0], items_read.val);
5175
5176            // The API currently allows reading 0 bytes (this could be re-evaluated) so test that does
5177            // the right thing.
5178            let items_read = ma.read_object_partial(items_ref, 0).expect("Failed to read object");
5179            assert_eq!(vec![0, 0, 0, 0], items_read.val);
5180
5181            // Size bigger than the object.
5182            assert_eq!(
5183                ma.read_object_partial(items_ref, std::mem::size_of::<Items>() + 8),
5184                error!(EINVAL)
5185            );
5186
5187            // Bad pointer.
5188            assert_eq!(
5189                ma.read_object_partial(UserRef::<Items>::new(UserAddress::from(1)), 16),
5190                error!(EFAULT)
5191            );
5192        })
5193        .await;
5194    }
5195
5196    #[::fuchsia::test]
5197    async fn test_partial_read() {
5198        spawn_kernel_and_run(async |locked, current_task| {
5199            let mm = current_task.mm().unwrap();
5200            let ma = current_task.deref();
5201
5202            let addr = mm.state.read().find_next_unused_range(2 * *PAGE_SIZE as usize).unwrap();
5203            let addr = map_memory(locked, &current_task, addr, *PAGE_SIZE);
5204            let second_map =
5205                map_memory(locked, &current_task, (addr + *PAGE_SIZE).unwrap(), *PAGE_SIZE);
5206
5207            let bytes = vec![0xf; (*PAGE_SIZE * 2) as usize];
5208            assert!(ma.write_memory(addr, &bytes).is_ok());
5209            let mut state = mm.state.write();
5210            let mut released_mappings = ReleasedMappings::default();
5211            state
5212                .protect(
5213                    ma,
5214                    second_map,
5215                    *PAGE_SIZE as usize,
5216                    ProtectionFlags::empty(),
5217                    &mut released_mappings,
5218                )
5219                .unwrap();
5220            released_mappings.finalize(state);
5221            assert_eq!(
5222                ma.read_memory_partial_to_vec(addr, bytes.len()).unwrap().len(),
5223                *PAGE_SIZE as usize,
5224            );
5225        })
5226        .await;
5227    }
5228
5229    fn map_memory_growsdown<L>(
5230        locked: &mut Locked<L>,
5231        current_task: &CurrentTask,
5232        length: u64,
5233    ) -> UserAddress
5234    where
5235        L: LockEqualOrBefore<FileOpsCore>,
5236    {
5237        map_memory_with_flags(
5238            locked,
5239            current_task,
5240            UserAddress::default(),
5241            length,
5242            MAP_ANONYMOUS | MAP_PRIVATE | MAP_GROWSDOWN,
5243        )
5244    }
5245
5246    #[::fuchsia::test]
5247    async fn test_grow_mapping_empty_mm() {
5248        spawn_kernel_and_run(async |_, current_task| {
5249            let mm = current_task.mm().unwrap();
5250
5251            let addr = UserAddress::from(0x100000);
5252
5253            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, false), Ok(false));
5254        })
5255        .await;
5256    }
5257
5258    #[::fuchsia::test]
5259    async fn test_grow_inside_mapping() {
5260        spawn_kernel_and_run(async |locked, current_task| {
5261            let mm = current_task.mm().unwrap();
5262
5263            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5264
5265            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, false), Ok(false));
5266        })
5267        .await;
5268    }
5269
5270    #[::fuchsia::test]
5271    async fn test_grow_write_fault_inside_read_only_mapping() {
5272        spawn_kernel_and_run(async |locked, current_task| {
5273            let mm = current_task.mm().unwrap();
5274
5275            let addr = do_mmap(
5276                locked,
5277                &current_task,
5278                UserAddress::default(),
5279                *PAGE_SIZE as usize,
5280                PROT_READ,
5281                MAP_ANONYMOUS | MAP_PRIVATE,
5282                FdNumber::from_raw(-1),
5283                0,
5284            )
5285            .expect("Could not map memory");
5286
5287            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, false), Ok(false));
5288            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, true), Ok(false));
5289        })
5290        .await;
5291    }
5292
5293    #[::fuchsia::test]
5294    async fn test_grow_fault_inside_prot_none_mapping() {
5295        spawn_kernel_and_run(async |locked, current_task| {
5296            let mm = current_task.mm().unwrap();
5297
5298            let addr = do_mmap(
5299                locked,
5300                &current_task,
5301                UserAddress::default(),
5302                *PAGE_SIZE as usize,
5303                PROT_NONE,
5304                MAP_ANONYMOUS | MAP_PRIVATE,
5305                FdNumber::from_raw(-1),
5306                0,
5307            )
5308            .expect("Could not map memory");
5309
5310            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, false), Ok(false));
5311            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, true), Ok(false));
5312        })
5313        .await;
5314    }
5315
5316    #[::fuchsia::test]
5317    async fn test_grow_below_mapping() {
5318        spawn_kernel_and_run(async |locked, current_task| {
5319            let mm = current_task.mm().unwrap();
5320
5321            let addr = map_memory_growsdown(locked, &current_task, *PAGE_SIZE) - *PAGE_SIZE;
5322
5323            assert_matches!(mm.extend_growsdown_mapping_to_address(addr.unwrap(), false), Ok(true));
5324        })
5325        .await;
5326    }
5327
5328    #[::fuchsia::test]
5329    async fn test_grow_above_mapping() {
5330        spawn_kernel_and_run(async |locked, current_task| {
5331            let mm = current_task.mm().unwrap();
5332
5333            let addr = map_memory_growsdown(locked, &current_task, *PAGE_SIZE) + *PAGE_SIZE;
5334
5335            assert_matches!(
5336                mm.extend_growsdown_mapping_to_address(addr.unwrap(), false),
5337                Ok(false)
5338            );
5339        })
5340        .await;
5341    }
5342
5343    #[::fuchsia::test]
5344    async fn test_grow_write_fault_below_read_only_mapping() {
5345        spawn_kernel_and_run(async |locked, current_task| {
5346            let mm = current_task.mm().unwrap();
5347
5348            let mapped_addr = map_memory_growsdown(locked, &current_task, *PAGE_SIZE);
5349
5350            mm.protect(&current_task, mapped_addr, *PAGE_SIZE as usize, ProtectionFlags::READ)
5351                .unwrap();
5352
5353            assert_matches!(
5354                mm.extend_growsdown_mapping_to_address((mapped_addr - *PAGE_SIZE).unwrap(), true),
5355                Ok(false)
5356            );
5357
5358            assert_eq!(mm.get_mapping_count(), 1);
5359        })
5360        .await;
5361    }
5362
5363    #[::fuchsia::test]
5364    async fn test_snapshot_paged_memory() {
5365        use zx::sys::zx_page_request_command_t::ZX_PAGER_VMO_READ;
5366
5367        spawn_kernel_and_run(async |locked, current_task| {
5368            let kernel = current_task.kernel();
5369            let mm = current_task.mm().unwrap();
5370            let ma = current_task.deref();
5371
5372            let port = Arc::new(zx::Port::create());
5373            let port_clone = port.clone();
5374            let pager =
5375                Arc::new(zx::Pager::create(zx::PagerOptions::empty()).expect("create failed"));
5376            let pager_clone = pager.clone();
5377
5378            const VMO_SIZE: u64 = 128 * 1024;
5379            let vmo = Arc::new(
5380                pager
5381                    .create_vmo(zx::VmoOptions::RESIZABLE, &port, 1, VMO_SIZE)
5382                    .expect("create_vmo failed"),
5383            );
5384            let vmo_clone = vmo.clone();
5385
5386            // Create a thread to service the port where we will receive pager requests.
5387            let thread = std::thread::spawn(move || {
5388                loop {
5389                    let packet =
5390                        port_clone.wait(zx::MonotonicInstant::INFINITE).expect("wait failed");
5391                    match packet.contents() {
5392                        zx::PacketContents::Pager(contents) => {
5393                            if contents.command() == ZX_PAGER_VMO_READ {
5394                                let range = contents.range();
5395                                let source_vmo = zx::Vmo::create(range.end - range.start)
5396                                    .expect("create failed");
5397                                pager_clone
5398                                    .supply_pages(&vmo_clone, range, &source_vmo, 0)
5399                                    .expect("supply_pages failed");
5400                            }
5401                        }
5402                        zx::PacketContents::User(_) => break,
5403                        _ => {}
5404                    }
5405                }
5406            });
5407
5408            let child_vmo = vmo
5409                .create_child(zx::VmoChildOptions::SNAPSHOT_AT_LEAST_ON_WRITE, 0, VMO_SIZE)
5410                .unwrap();
5411
5412            // Write something to the source VMO.
5413            vmo.write(b"foo", 0).expect("write failed");
5414
5415            let prot_flags = ProtectionFlags::READ | ProtectionFlags::WRITE;
5416            let addr = mm
5417                .map_memory(
5418                    DesiredAddress::Any,
5419                    Arc::new(MemoryObject::from(child_vmo)),
5420                    0,
5421                    VMO_SIZE as usize,
5422                    prot_flags,
5423                    Access::rwx(),
5424                    MappingOptions::empty(),
5425                    MappingName::None,
5426                )
5427                .expect("map failed");
5428
5429            let target = create_task(locked, &kernel, "another-task");
5430            mm.snapshot_to(locked, &target.mm().unwrap()).expect("snapshot_to failed");
5431
5432            // Make sure it has what we wrote.
5433            let buf = target.read_memory_to_vec(addr, 3).expect("read_memory failed");
5434            assert_eq!(buf, b"foo");
5435
5436            // Write something to both source and target and make sure they are forked.
5437            ma.write_memory(addr, b"bar").expect("write_memory failed");
5438
5439            let buf = target.read_memory_to_vec(addr, 3).expect("read_memory failed");
5440            assert_eq!(buf, b"foo");
5441
5442            target.write_memory(addr, b"baz").expect("write_memory failed");
5443            let buf = ma.read_memory_to_vec(addr, 3).expect("read_memory failed");
5444            assert_eq!(buf, b"bar");
5445
5446            let buf = target.read_memory_to_vec(addr, 3).expect("read_memory failed");
5447            assert_eq!(buf, b"baz");
5448
5449            port.queue(&zx::Packet::from_user_packet(0, 0, zx::UserPacket::from_u8_array([0; 32])))
5450                .unwrap();
5451            thread.join().unwrap();
5452        })
5453        .await;
5454    }
5455
5456    #[::fuchsia::test]
5457    async fn test_set_vma_name() {
5458        spawn_kernel_and_run(async |locked, mut current_task| {
5459            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5460
5461            let vma_name = "vma name";
5462            current_task.write_memory(name_addr, vma_name.as_bytes()).unwrap();
5463
5464            let mapping_addr =
5465                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5466
5467            sys_prctl(
5468                locked,
5469                &mut current_task,
5470                PR_SET_VMA,
5471                PR_SET_VMA_ANON_NAME as u64,
5472                mapping_addr.ptr() as u64,
5473                *PAGE_SIZE,
5474                name_addr.ptr() as u64,
5475            )
5476            .unwrap();
5477
5478            assert_eq!(
5479                *current_task.mm().unwrap().get_mapping_name(mapping_addr).unwrap().unwrap(),
5480                vma_name
5481            );
5482        })
5483        .await;
5484    }
5485
5486    #[::fuchsia::test]
5487    async fn test_set_vma_name_adjacent_mappings() {
5488        spawn_kernel_and_run(async |locked, mut current_task| {
5489            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5490            current_task
5491                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
5492                .unwrap();
5493
5494            let first_mapping_addr =
5495                map_memory(locked, &current_task, UserAddress::default(), 2 * *PAGE_SIZE);
5496            let second_mapping_addr = map_memory_with_flags(
5497                locked,
5498                &current_task,
5499                (first_mapping_addr + *PAGE_SIZE).unwrap(),
5500                *PAGE_SIZE,
5501                MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
5502            );
5503
5504            assert_eq!((first_mapping_addr + *PAGE_SIZE).unwrap(), second_mapping_addr);
5505
5506            sys_prctl(
5507                locked,
5508                &mut current_task,
5509                PR_SET_VMA,
5510                PR_SET_VMA_ANON_NAME as u64,
5511                first_mapping_addr.ptr() as u64,
5512                2 * *PAGE_SIZE,
5513                name_addr.ptr() as u64,
5514            )
5515            .unwrap();
5516
5517            {
5518                let mm = current_task.mm().unwrap();
5519                let state = mm.state.read();
5520
5521                // The name should apply to both mappings.
5522                let (_, mapping) = state.mappings.get(first_mapping_addr).unwrap();
5523                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
5524
5525                let (_, mapping) = state.mappings.get(second_mapping_addr).unwrap();
5526                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
5527            }
5528        })
5529        .await;
5530    }
5531
5532    #[::fuchsia::test]
5533    async fn test_set_vma_name_beyond_end() {
5534        spawn_kernel_and_run(async |locked, mut current_task| {
5535            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5536            current_task
5537                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
5538                .unwrap();
5539
5540            let mapping_addr =
5541                map_memory(locked, &current_task, UserAddress::default(), 2 * *PAGE_SIZE);
5542
5543            let second_page = (mapping_addr + *PAGE_SIZE).unwrap();
5544            current_task.mm().unwrap().unmap(second_page, *PAGE_SIZE as usize).unwrap();
5545
5546            // This should fail with ENOMEM since it extends past the end of the mapping into unmapped memory.
5547            assert_eq!(
5548                sys_prctl(
5549                    locked,
5550                    &mut current_task,
5551                    PR_SET_VMA,
5552                    PR_SET_VMA_ANON_NAME as u64,
5553                    mapping_addr.ptr() as u64,
5554                    2 * *PAGE_SIZE,
5555                    name_addr.ptr() as u64,
5556                ),
5557                error!(ENOMEM)
5558            );
5559
5560            // Despite returning an error, the prctl should still assign a name to the region at the start of the region.
5561            {
5562                let mm = current_task.mm().unwrap();
5563                let state = mm.state.read();
5564
5565                let (_, mapping) = state.mappings.get(mapping_addr).unwrap();
5566                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
5567            }
5568        })
5569        .await;
5570    }
5571
5572    #[::fuchsia::test]
5573    async fn test_set_vma_name_before_start() {
5574        spawn_kernel_and_run(async |locked, mut current_task| {
5575            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5576            current_task
5577                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
5578                .unwrap();
5579
5580            let mapping_addr =
5581                map_memory(locked, &current_task, UserAddress::default(), 2 * *PAGE_SIZE);
5582
5583            let second_page = (mapping_addr + *PAGE_SIZE).unwrap();
5584            current_task.mm().unwrap().unmap(mapping_addr, *PAGE_SIZE as usize).unwrap();
5585
5586            // This should fail with ENOMEM since the start of the range is in unmapped memory.
5587            assert_eq!(
5588                sys_prctl(
5589                    locked,
5590                    &mut current_task,
5591                    PR_SET_VMA,
5592                    PR_SET_VMA_ANON_NAME as u64,
5593                    mapping_addr.ptr() as u64,
5594                    2 * *PAGE_SIZE,
5595                    name_addr.ptr() as u64,
5596                ),
5597                error!(ENOMEM)
5598            );
5599
5600            // Unlike a range which starts within a mapping and extends past the end, this should not assign
5601            // a name to any mappings.
5602            {
5603                let mm = current_task.mm().unwrap();
5604                let state = mm.state.read();
5605
5606                let (_, mapping) = state.mappings.get(second_page).unwrap();
5607                assert_eq!(mapping.name(), MappingName::None);
5608            }
5609        })
5610        .await;
5611    }
5612
5613    #[::fuchsia::test]
5614    async fn test_set_vma_name_partial() {
5615        spawn_kernel_and_run(async |locked, mut current_task| {
5616            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5617            current_task
5618                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
5619                .unwrap();
5620
5621            let mapping_addr =
5622                map_memory(locked, &current_task, UserAddress::default(), 3 * *PAGE_SIZE);
5623
5624            assert_eq!(
5625                sys_prctl(
5626                    locked,
5627                    &mut current_task,
5628                    PR_SET_VMA,
5629                    PR_SET_VMA_ANON_NAME as u64,
5630                    (mapping_addr + *PAGE_SIZE).unwrap().ptr() as u64,
5631                    *PAGE_SIZE,
5632                    name_addr.ptr() as u64,
5633                ),
5634                Ok(starnix_syscalls::SUCCESS)
5635            );
5636
5637            // This should split the mapping into 3 pieces with the second piece having the name "foo"
5638            {
5639                let mm = current_task.mm().unwrap();
5640                let state = mm.state.read();
5641
5642                let (_, mapping) = state.mappings.get(mapping_addr).unwrap();
5643                assert_eq!(mapping.name(), MappingName::None);
5644
5645                let (_, mapping) =
5646                    state.mappings.get((mapping_addr + *PAGE_SIZE).unwrap()).unwrap();
5647                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
5648
5649                let (_, mapping) =
5650                    state.mappings.get((mapping_addr + (2 * *PAGE_SIZE)).unwrap()).unwrap();
5651                assert_eq!(mapping.name(), MappingName::None);
5652            }
5653        })
5654        .await;
5655    }
5656
5657    #[::fuchsia::test]
5658    async fn test_preserve_name_snapshot() {
5659        spawn_kernel_and_run(async |locked, mut current_task| {
5660            let kernel = current_task.kernel().clone();
5661            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5662            current_task
5663                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
5664                .unwrap();
5665
5666            let mapping_addr =
5667                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5668
5669            assert_eq!(
5670                sys_prctl(
5671                    locked,
5672                    &mut current_task,
5673                    PR_SET_VMA,
5674                    PR_SET_VMA_ANON_NAME as u64,
5675                    mapping_addr.ptr() as u64,
5676                    *PAGE_SIZE,
5677                    name_addr.ptr() as u64,
5678                ),
5679                Ok(starnix_syscalls::SUCCESS)
5680            );
5681
5682            let target = create_task(locked, &kernel, "another-task");
5683            current_task
5684                .mm()
5685                .unwrap()
5686                .snapshot_to(locked, &target.mm().unwrap())
5687                .expect("snapshot_to failed");
5688
5689            {
5690                let mm = target.mm().unwrap();
5691                let state = mm.state.read();
5692
5693                let (_, mapping) = state.mappings.get(mapping_addr).unwrap();
5694                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
5695            }
5696        })
5697        .await;
5698    }
5699}