starnix_core/mm/
memory_manager.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::barrier::{BarrierType, system_barrier};
6use crate::mm::mapping::MappingBackingMemory;
7use crate::mm::memory::MemoryObject;
8use crate::mm::memory_accessor::{MemoryAccessor, TaskMemoryAccessor};
9use crate::mm::private_anonymous_memory_manager::PrivateAnonymousMemoryManager;
10use crate::mm::{
11    FaultRegisterMode, FutexTable, InflightVmsplicedPayloads, MapInfoCache, Mapping,
12    MappingBacking, MappingFlags, MappingName, MlockPinFlavor, PrivateFutexKey, ProtectionFlags,
13    UserFault, VMEX_RESOURCE, VmsplicePayload, VmsplicePayloadSegment, read_to_array,
14};
15use crate::security;
16use crate::signals::{SignalDetail, SignalInfo};
17use crate::task::{CurrentTask, ExceptionResult, PageFaultExceptionReport, Task};
18use crate::vfs::aio::AioContext;
19use crate::vfs::pseudo::dynamic_file::{
20    DynamicFile, DynamicFileBuf, DynamicFileSource, SequenceFileSource,
21};
22use crate::vfs::{FsString, NamespaceNode};
23use anyhow::{Error, anyhow};
24use bitflags::bitflags;
25use flyweights::FlyByteStr;
26use linux_uapi::BUS_ADRERR;
27use memory_pinning::PinnedMapping;
28use range_map::RangeMap;
29use starnix_ext::map_ext::EntryExt;
30use starnix_lifecycle::DropNotifier;
31use starnix_logging::{
32    CATEGORY_STARNIX_MM, impossible_error, log_warn, trace_duration, track_stub,
33};
34use starnix_sync::{
35    LockBefore, Locked, MmDumpable, OrderedMutex, RwLock, RwLockWriteGuard, ThreadGroupLimits,
36    Unlocked, UserFaultInner,
37};
38use starnix_types::arch::ArchWidth;
39use starnix_types::futex_address::FutexAddress;
40use starnix_types::math::{round_down_to_system_page_size, round_up_to_system_page_size};
41use starnix_types::ownership::{TempRef, WeakRef};
42use starnix_types::user_buffer::{UserBuffer, UserBuffers};
43use starnix_uapi::auth::CAP_IPC_LOCK;
44use starnix_uapi::errors::Errno;
45use starnix_uapi::file_mode::Access;
46use starnix_uapi::range_ext::RangeExt;
47use starnix_uapi::resource_limits::Resource;
48use starnix_uapi::restricted_aspace::{
49    RESTRICTED_ASPACE_BASE, RESTRICTED_ASPACE_HIGHEST_ADDRESS, RESTRICTED_ASPACE_RANGE,
50    RESTRICTED_ASPACE_SIZE,
51};
52use starnix_uapi::signals::{SIGBUS, SIGSEGV};
53use starnix_uapi::user_address::{ArchSpecific, UserAddress};
54use starnix_uapi::{
55    MADV_COLD, MADV_COLLAPSE, MADV_DODUMP, MADV_DOFORK, MADV_DONTDUMP, MADV_DONTFORK,
56    MADV_DONTNEED, MADV_DONTNEED_LOCKED, MADV_FREE, MADV_HUGEPAGE, MADV_HWPOISON, MADV_KEEPONFORK,
57    MADV_MERGEABLE, MADV_NOHUGEPAGE, MADV_NORMAL, MADV_PAGEOUT, MADV_POPULATE_READ, MADV_RANDOM,
58    MADV_REMOVE, MADV_SEQUENTIAL, MADV_SOFT_OFFLINE, MADV_UNMERGEABLE, MADV_WILLNEED,
59    MADV_WIPEONFORK, MREMAP_DONTUNMAP, MREMAP_FIXED, MREMAP_MAYMOVE, SI_KERNEL, errno, error,
60};
61use std::collections::HashMap;
62use std::mem::MaybeUninit;
63use std::ops::{Deref, DerefMut, Range, RangeBounds};
64use std::sync::{Arc, LazyLock, Weak};
65use syncio::zxio::zxio_default_maybe_faultable_copy;
66use zerocopy::IntoBytes;
67use zx::{HandleBased, Rights, VmarInfo, VmoChildOptions};
68
69pub const ZX_VM_SPECIFIC_OVERWRITE: zx::VmarFlags =
70    zx::VmarFlags::from_bits_retain(zx::VmarFlagsExtended::SPECIFIC_OVERWRITE.bits());
71
72// We do not create shared processes in unit tests.
73pub(crate) const UNIFIED_ASPACES_ENABLED: bool = cfg!(not(test));
74
75/// Initializes the usercopy utilities.
76///
77/// It is useful to explicitly call this so that the usercopy is initialized
78/// at a known instant. For example, Starnix may want to make sure the usercopy
79/// thread created to support user copying is associated to the Starnix process
80/// and not a restricted-mode process.
81pub fn init_usercopy() {
82    // This call lazily initializes the `Usercopy` instance.
83    let _ = usercopy();
84}
85
86pub const GUARD_PAGE_COUNT_FOR_GROWSDOWN_MAPPINGS: usize = 256;
87
88#[cfg(target_arch = "x86_64")]
89const ASLR_RANDOM_BITS: usize = 27;
90
91#[cfg(target_arch = "aarch64")]
92const ASLR_RANDOM_BITS: usize = 28;
93
94#[cfg(target_arch = "riscv64")]
95const ASLR_RANDOM_BITS: usize = 18;
96
97/// Number of bits of entropy for processes running in 32 bits mode.
98const ASLR_32_RANDOM_BITS: usize = 8;
99
100// The biggest we expect stack to be; increase as needed
101// TODO(https://fxbug.dev/322874791): Once setting RLIMIT_STACK is implemented, we should use it.
102const MAX_STACK_SIZE: usize = 512 * 1024 * 1024;
103
104// Value to report temporarily as the VM RSS HWM.
105// TODO(https://fxbug.dev/396221597): Need support from the kernel to track the committed bytes high
106// water mark.
107const STUB_VM_RSS_HWM: usize = 2 * 1024 * 1024;
108
109fn usercopy() -> Option<&'static usercopy::Usercopy> {
110    static USERCOPY: LazyLock<Option<usercopy::Usercopy>> = LazyLock::new(|| {
111        // We do not create shared processes in unit tests.
112        if UNIFIED_ASPACES_ENABLED {
113            // ASUMPTION: All Starnix managed Linux processes have the same
114            // restricted mode address range.
115            Some(usercopy::Usercopy::new(RESTRICTED_ASPACE_RANGE).unwrap())
116        } else {
117            None
118        }
119    });
120
121    LazyLock::force(&USERCOPY).as_ref()
122}
123
124/// Provides an implementation for zxio's `zxio_maybe_faultable_copy` that supports
125/// catching faults.
126///
127/// See zxio's `zxio_maybe_faultable_copy` documentation for more details.
128///
129/// # Safety
130///
131/// Only one of `src`/`dest` may be an address to a buffer owned by user/restricted-mode
132/// (`ret_dest` indicates whether the user-owned buffer is `dest` when `true`).
133/// The other must be a valid Starnix/normal-mode buffer that will never cause a fault
134/// when the first `count` bytes are read/written.
135#[unsafe(no_mangle)]
136pub unsafe fn zxio_maybe_faultable_copy_impl(
137    dest: *mut u8,
138    src: *const u8,
139    count: usize,
140    ret_dest: bool,
141) -> bool {
142    if let Some(usercopy) = usercopy() {
143        #[allow(clippy::undocumented_unsafe_blocks, reason = "2024 edition migration")]
144        let ret = unsafe { usercopy.raw_hermetic_copy(dest, src, count, ret_dest) };
145        ret == count
146    } else {
147        #[allow(clippy::undocumented_unsafe_blocks, reason = "2024 edition migration")]
148        unsafe {
149            zxio_default_maybe_faultable_copy(dest, src, count, ret_dest)
150        }
151    }
152}
153
154pub static PAGE_SIZE: LazyLock<u64> = LazyLock::new(|| zx::system_get_page_size() as u64);
155
156bitflags! {
157    #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
158    pub struct MappingOptions: u16 {
159      const SHARED      = 1 << 0;
160      const ANONYMOUS   = 1 << 1;
161      const LOWER_32BIT = 1 << 2;
162      const GROWSDOWN   = 1 << 3;
163      const ELF_BINARY  = 1 << 4;
164      const DONTFORK    = 1 << 5;
165      const WIPEONFORK  = 1 << 6;
166      const DONT_SPLIT  = 1 << 7;
167      const DONT_EXPAND = 1 << 8;
168      const POPULATE    = 1 << 9;
169    }
170}
171
172bitflags! {
173    #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
174    pub struct MremapFlags: u32 {
175        const MAYMOVE = MREMAP_MAYMOVE;
176        const FIXED = MREMAP_FIXED;
177        const DONTUNMAP = MREMAP_DONTUNMAP;
178    }
179}
180
181const PROGRAM_BREAK_LIMIT: u64 = 64 * 1024 * 1024;
182
183#[derive(Debug, Clone, Eq, PartialEq)]
184struct ProgramBreak {
185    // These base address at which the data segment is mapped.
186    base: UserAddress,
187
188    // The current program break.
189    //
190    // The addresses from [base, current.round_up(*PAGE_SIZE)) are mapped into the
191    // client address space from the underlying |memory|.
192    current: UserAddress,
193}
194
195/// The policy about whether the address space can be dumped.
196#[derive(Debug, Clone, Copy, Eq, PartialEq)]
197pub enum DumpPolicy {
198    /// The address space cannot be dumped.
199    ///
200    /// Corresponds to SUID_DUMP_DISABLE.
201    Disable,
202
203    /// The address space can be dumped.
204    ///
205    /// Corresponds to SUID_DUMP_USER.
206    User,
207}
208
209// Supported types of membarriers.
210pub enum MembarrierType {
211    Memory,   // MEMBARRIER_CMD_GLOBAL, etc
212    SyncCore, // MEMBARRIER_CMD_..._SYNC_CORE
213}
214
215// Tracks the types of membarriers this address space is registered to receive.
216#[derive(Default, Clone)]
217struct MembarrierRegistrations {
218    memory: bool,
219    sync_core: bool,
220}
221
222pub struct MemoryManagerState {
223    /// The VMAR in which userspace mappings occur.
224    ///
225    /// We map userspace memory in this child VMAR so that we can destroy the
226    /// entire VMAR during exec.
227    /// For 32-bit tasks, we limit the user_vmar to correspond to the available memory.
228    ///
229    /// This field is set to `ZX_HANDLE_INVALID` when the address-space has been destroyed (e.g. on
230    /// `exec()`), allowing the value to be pro-actively checked for, or the `ZX_ERR_BAD_HANDLE`
231    /// status return from Zircon operations handled, to suit the call-site.
232    user_vmar: zx::Vmar,
233
234    /// Cached VmarInfo for user_vmar.
235    user_vmar_info: zx::VmarInfo,
236
237    /// The memory mappings currently used by this address space.
238    ///
239    /// The mappings record which object backs each address.
240    mappings: RangeMap<UserAddress, Mapping>,
241
242    /// Memory object backing private, anonymous memory allocations in this address space.
243    private_anonymous: PrivateAnonymousMemoryManager,
244
245    /// UserFaults registered with this memory manager.
246    userfaultfds: Vec<Weak<UserFault>>,
247
248    /// Shadow mappings for mlock()'d pages.
249    ///
250    /// Used for MlockPinFlavor::ShadowProcess to keep track of when we need to unmap
251    /// memory from the shadow process.
252    shadow_mappings_for_mlock: RangeMap<UserAddress, Arc<PinnedMapping>>,
253
254    forkable_state: MemoryManagerForkableState,
255}
256
257// 64k under the 4GB
258const LOWER_4GB_LIMIT: UserAddress = UserAddress::const_from(0xffff_0000);
259
260#[derive(Default, Clone)]
261pub struct MemoryManagerForkableState {
262    /// State for the brk and sbrk syscalls.
263    brk: Option<ProgramBreak>,
264
265    /// The namespace node that represents the executable associated with this task.
266    executable_node: Option<NamespaceNode>,
267
268    pub stack_size: usize,
269    pub stack_start: UserAddress,
270    pub auxv_start: UserAddress,
271    pub auxv_end: UserAddress,
272    pub argv_start: UserAddress,
273    pub argv_end: UserAddress,
274    pub environ_start: UserAddress,
275    pub environ_end: UserAddress,
276
277    /// vDSO location
278    pub vdso_base: UserAddress,
279
280    /// Randomized regions:
281    pub mmap_top: UserAddress,
282    pub stack_origin: UserAddress,
283    pub brk_origin: UserAddress,
284
285    // Membarrier registrations
286    membarrier_registrations: MembarrierRegistrations,
287}
288
289impl Deref for MemoryManagerState {
290    type Target = MemoryManagerForkableState;
291    fn deref(&self) -> &Self::Target {
292        &self.forkable_state
293    }
294}
295
296impl DerefMut for MemoryManagerState {
297    fn deref_mut(&mut self) -> &mut Self::Target {
298        &mut self.forkable_state
299    }
300}
301
302#[derive(Debug, Default)]
303struct ReleasedMappings {
304    doomed: Vec<Mapping>,
305    doomed_pins: Vec<Arc<PinnedMapping>>,
306}
307
308impl ReleasedMappings {
309    fn extend(&mut self, mappings: impl IntoIterator<Item = Mapping>) {
310        self.doomed.extend(mappings);
311    }
312
313    fn extend_pins(&mut self, mappings: impl IntoIterator<Item = Arc<PinnedMapping>>) {
314        self.doomed_pins.extend(mappings);
315    }
316
317    fn is_empty(&self) -> bool {
318        self.doomed.is_empty() && self.doomed_pins.is_empty()
319    }
320
321    #[cfg(test)]
322    fn len(&self) -> usize {
323        self.doomed.len() + self.doomed_pins.len()
324    }
325
326    fn finalize(&mut self, mm_state: RwLockWriteGuard<'_, MemoryManagerState>) {
327        // Drop the state before the unmapped mappings, since dropping a mapping may acquire a lock
328        // in `DirEntry`'s `drop`.
329        std::mem::drop(mm_state);
330        std::mem::take(&mut self.doomed);
331        std::mem::take(&mut self.doomed_pins);
332    }
333}
334
335impl Drop for ReleasedMappings {
336    fn drop(&mut self) {
337        assert!(self.is_empty(), "ReleasedMappings::finalize() must be called before drop");
338    }
339}
340
341fn map_in_vmar(
342    vmar: &zx::Vmar,
343    vmar_info: &zx::VmarInfo,
344    addr: SelectedAddress,
345    memory: &MemoryObject,
346    memory_offset: u64,
347    length: usize,
348    flags: MappingFlags,
349    populate: bool,
350) -> Result<UserAddress, Errno> {
351    let vmar_offset = addr.addr().checked_sub(vmar_info.base).ok_or_else(|| errno!(ENOMEM))?;
352    let vmar_extra_flags = match addr {
353        SelectedAddress::Fixed(_) => zx::VmarFlags::SPECIFIC,
354        SelectedAddress::FixedOverwrite(_) => ZX_VM_SPECIFIC_OVERWRITE,
355    };
356
357    if populate {
358        let op = if flags.contains(MappingFlags::WRITE) {
359            // Requires ZX_RIGHT_WRITEABLE which we should expect when the mapping is writeable.
360            zx::VmoOp::COMMIT
361        } else {
362            // When we don't expect to have ZX_RIGHT_WRITEABLE, fall back to a VMO op that doesn't
363            // need it.
364            zx::VmoOp::PREFETCH
365        };
366        trace_duration!(CATEGORY_STARNIX_MM, "MmapCommitPages");
367        let _ = memory.op_range(op, memory_offset, length as u64);
368        // "The mmap() call doesn't fail if the mapping cannot be populated."
369    }
370
371    let vmar_maybe_map_range = if populate && !vmar_extra_flags.contains(ZX_VM_SPECIFIC_OVERWRITE) {
372        zx::VmarFlags::MAP_RANGE
373    } else {
374        zx::VmarFlags::empty()
375    };
376    let vmar_flags = flags.access_flags().to_vmar_flags()
377        | zx::VmarFlags::ALLOW_FAULTS
378        | vmar_extra_flags
379        | vmar_maybe_map_range;
380
381    let map_result = memory.map_in_vmar(vmar, vmar_offset.ptr(), memory_offset, length, vmar_flags);
382    let mapped_addr = map_result.map_err(MemoryManager::get_errno_for_map_err)?;
383
384    Ok(UserAddress::from_ptr(mapped_addr))
385}
386
387impl MemoryManagerState {
388    /// Returns occupied address ranges that intersect with the given range.
389    ///
390    /// An address range is "occupied" if (a) there is already a mapping in that range or (b) there
391    /// is a GROWSDOWN mapping <= 256 pages above that range. The 256 pages below a GROWSDOWN
392    /// mapping is the "guard region." The memory manager avoids mapping memory in the guard region
393    /// in some circumstances to preserve space for the GROWSDOWN mapping to grow down.
394    fn get_occupied_address_ranges<'a>(
395        &'a self,
396        subrange: &'a Range<UserAddress>,
397    ) -> impl Iterator<Item = Range<UserAddress>> + 'a {
398        let query_range = subrange.start
399            ..(subrange
400                .end
401                .saturating_add(*PAGE_SIZE as usize * GUARD_PAGE_COUNT_FOR_GROWSDOWN_MAPPINGS));
402        self.mappings.range(query_range).filter_map(|(range, mapping)| {
403            let occupied_range = mapping.inflate_to_include_guard_pages(range);
404            if occupied_range.start < subrange.end && subrange.start < occupied_range.end {
405                Some(occupied_range)
406            } else {
407                None
408            }
409        })
410    }
411
412    fn count_possible_placements(
413        &self,
414        length: usize,
415        subrange: &Range<UserAddress>,
416    ) -> Option<usize> {
417        let mut occupied_ranges = self.get_occupied_address_ranges(subrange);
418        let mut possible_placements = 0;
419        // If the allocation is placed at the first available address, every page that is left
420        // before the next mapping or the end of subrange is +1 potential placement.
421        let mut first_fill_end = subrange.start.checked_add(length)?;
422        while first_fill_end <= subrange.end {
423            let Some(mapping) = occupied_ranges.next() else {
424                possible_placements += (subrange.end - first_fill_end) / (*PAGE_SIZE as usize) + 1;
425                break;
426            };
427            if mapping.start >= first_fill_end {
428                possible_placements += (mapping.start - first_fill_end) / (*PAGE_SIZE as usize) + 1;
429            }
430            first_fill_end = mapping.end.checked_add(length)?;
431        }
432        Some(possible_placements)
433    }
434
435    fn pick_placement(
436        &self,
437        length: usize,
438        mut chosen_placement_idx: usize,
439        subrange: &Range<UserAddress>,
440    ) -> Option<UserAddress> {
441        let mut candidate =
442            Range { start: subrange.start, end: subrange.start.checked_add(length)? };
443        let mut occupied_ranges = self.get_occupied_address_ranges(subrange);
444        loop {
445            let Some(mapping) = occupied_ranges.next() else {
446                // No more mappings: treat the rest of the index as an offset.
447                let res =
448                    candidate.start.checked_add(chosen_placement_idx * *PAGE_SIZE as usize)?;
449                debug_assert!(res.checked_add(length)? <= subrange.end);
450                return Some(res);
451            };
452            if mapping.start < candidate.end {
453                // doesn't fit, skip
454                candidate = Range { start: mapping.end, end: mapping.end.checked_add(length)? };
455                continue;
456            }
457            let unused_space =
458                (mapping.start.ptr() - candidate.end.ptr()) / (*PAGE_SIZE as usize) + 1;
459            if unused_space > chosen_placement_idx {
460                // Chosen placement is within the range; treat the rest of the index as an offset.
461                let res =
462                    candidate.start.checked_add(chosen_placement_idx * *PAGE_SIZE as usize)?;
463                return Some(res);
464            }
465
466            // chosen address is further up, skip
467            chosen_placement_idx -= unused_space;
468            candidate = Range { start: mapping.end, end: mapping.end.checked_add(length)? };
469        }
470    }
471
472    fn find_random_unused_range(
473        &self,
474        length: usize,
475        subrange: &Range<UserAddress>,
476    ) -> Option<UserAddress> {
477        let possible_placements = self.count_possible_placements(length, subrange)?;
478        if possible_placements == 0 {
479            return None;
480        }
481        let chosen_placement_idx = rand::random_range(0..possible_placements);
482        self.pick_placement(length, chosen_placement_idx, subrange)
483    }
484
485    // Find the first unused range of addresses that fits a mapping of `length` bytes, searching
486    // from `mmap_top` downwards.
487    pub fn find_next_unused_range(&self, length: usize) -> Option<UserAddress> {
488        let gap_size = length as u64;
489        let mut upper_bound = self.mmap_top;
490
491        loop {
492            let gap_end = self.mappings.find_gap_end(gap_size, &upper_bound);
493            let candidate = gap_end.checked_sub(length)?;
494
495            // Is there a next mapping? If not, the candidate is already good.
496            let Some((occupied_range, mapping)) = self.mappings.get(gap_end) else {
497                return Some(candidate);
498            };
499            let occupied_range = mapping.inflate_to_include_guard_pages(occupied_range);
500            // If it doesn't overlap, the gap is big enough to fit.
501            if occupied_range.start >= gap_end {
502                return Some(candidate);
503            }
504            // If there was a mapping in the way, use the start of that range as the upper bound.
505            upper_bound = occupied_range.start;
506        }
507    }
508
509    // Accept the hint if the range is unused and within the range available for mapping.
510    fn is_hint_acceptable(&self, hint_addr: UserAddress, length: usize) -> bool {
511        let Some(hint_end) = hint_addr.checked_add(length) else {
512            return false;
513        };
514        if !RESTRICTED_ASPACE_RANGE.contains(&hint_addr.ptr())
515            || !RESTRICTED_ASPACE_RANGE.contains(&hint_end.ptr())
516        {
517            return false;
518        };
519        self.get_occupied_address_ranges(&(hint_addr..hint_end)).next().is_none()
520    }
521
522    fn select_address(
523        &self,
524        addr: DesiredAddress,
525        length: usize,
526        flags: MappingFlags,
527    ) -> Result<SelectedAddress, Errno> {
528        let adjusted_length = round_up_to_system_page_size(length).or_else(|_| error!(ENOMEM))?;
529
530        let find_address = || -> Result<SelectedAddress, Errno> {
531            let new_addr = if flags.contains(MappingFlags::LOWER_32BIT) {
532                // MAP_32BIT specifies that the memory allocated will
533                // be within the first 2 GB of the process address space.
534                self.find_random_unused_range(
535                    adjusted_length,
536                    &(UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
537                        ..UserAddress::from_ptr(0x80000000)),
538                )
539                .ok_or_else(|| errno!(ENOMEM))?
540            } else {
541                self.find_next_unused_range(adjusted_length).ok_or_else(|| errno!(ENOMEM))?
542            };
543
544            Ok(SelectedAddress::Fixed(new_addr))
545        };
546
547        Ok(match addr {
548            DesiredAddress::Any => find_address()?,
549            DesiredAddress::Hint(hint_addr) => {
550                // Round down to page size
551                let hint_addr =
552                    UserAddress::from_ptr(hint_addr.ptr() - hint_addr.ptr() % *PAGE_SIZE as usize);
553                if self.is_hint_acceptable(hint_addr, adjusted_length) {
554                    SelectedAddress::Fixed(hint_addr)
555                } else {
556                    find_address()?
557                }
558            }
559            DesiredAddress::Fixed(addr) => SelectedAddress::Fixed(addr),
560            DesiredAddress::FixedOverwrite(addr) => SelectedAddress::FixedOverwrite(addr),
561        })
562    }
563
564    // Map the memory without updating `self.mappings`.
565    fn map_in_user_vmar(
566        &self,
567        addr: SelectedAddress,
568        memory: &MemoryObject,
569        memory_offset: u64,
570        length: usize,
571        flags: MappingFlags,
572        populate: bool,
573    ) -> Result<UserAddress, Errno> {
574        map_in_vmar(
575            &self.user_vmar,
576            &self.user_vmar_info,
577            addr,
578            memory,
579            memory_offset,
580            length,
581            flags,
582            populate,
583        )
584    }
585
586    fn validate_addr(&self, addr: DesiredAddress, length: usize) -> Result<(), Errno> {
587        if let DesiredAddress::FixedOverwrite(addr) = addr {
588            if self.check_has_unauthorized_splits(addr, length) {
589                return error!(ENOMEM);
590            }
591        }
592        Ok(())
593    }
594
595    fn map_memory(
596        &mut self,
597        mm: &Arc<MemoryManager>,
598        addr: DesiredAddress,
599        memory: Arc<MemoryObject>,
600        memory_offset: u64,
601        length: usize,
602        flags: MappingFlags,
603        max_access: Access,
604        populate: bool,
605        name: MappingName,
606        released_mappings: &mut ReleasedMappings,
607    ) -> Result<UserAddress, Errno> {
608        self.validate_addr(addr, length)?;
609
610        let selected_address = self.select_address(addr, length, flags)?;
611        let mapped_addr = self.map_in_user_vmar(
612            selected_address,
613            &memory,
614            memory_offset,
615            length,
616            flags,
617            populate,
618        )?;
619
620        let end = (mapped_addr + length)?.round_up(*PAGE_SIZE)?;
621
622        if let DesiredAddress::FixedOverwrite(addr) = addr {
623            assert_eq!(addr, mapped_addr);
624            self.update_after_unmap(mm, addr, end - addr, released_mappings)?;
625        }
626
627        let mapping = Mapping::with_name(
628            self.create_memory_backing(mapped_addr, memory, memory_offset),
629            flags,
630            max_access,
631            name,
632        );
633        released_mappings.extend(self.mappings.insert(mapped_addr..end, mapping));
634
635        Ok(mapped_addr)
636    }
637
638    fn map_private_anonymous(
639        &mut self,
640        mm: &Arc<MemoryManager>,
641        addr: DesiredAddress,
642        length: usize,
643        prot_flags: ProtectionFlags,
644        options: MappingOptions,
645        populate: bool,
646        name: MappingName,
647        released_mappings: &mut ReleasedMappings,
648    ) -> Result<UserAddress, Errno> {
649        self.validate_addr(addr, length)?;
650
651        let flags = MappingFlags::from_access_flags_and_options(prot_flags, options);
652        let selected_addr = self.select_address(addr, length, flags)?;
653        let backing_memory_offset = selected_addr.addr().ptr();
654
655        let mapped_addr = self.map_in_user_vmar(
656            selected_addr,
657            &self.private_anonymous.backing,
658            backing_memory_offset as u64,
659            length,
660            flags,
661            populate,
662        )?;
663
664        let end = (mapped_addr + length)?.round_up(*PAGE_SIZE)?;
665        if let DesiredAddress::FixedOverwrite(addr) = addr {
666            assert_eq!(addr, mapped_addr);
667            self.update_after_unmap(mm, addr, end - addr, released_mappings)?;
668        }
669
670        let mapping = Mapping::new_private_anonymous(flags, name);
671        released_mappings.extend(self.mappings.insert(mapped_addr..end, mapping));
672
673        Ok(mapped_addr)
674    }
675
676    fn map_anonymous(
677        &mut self,
678        mm: &Arc<MemoryManager>,
679        addr: DesiredAddress,
680        length: usize,
681        prot_flags: ProtectionFlags,
682        options: MappingOptions,
683        name: MappingName,
684        released_mappings: &mut ReleasedMappings,
685    ) -> Result<UserAddress, Errno> {
686        if !options.contains(MappingOptions::SHARED) {
687            return self.map_private_anonymous(
688                mm,
689                addr,
690                length,
691                prot_flags,
692                options,
693                options.contains(MappingOptions::POPULATE),
694                name,
695                released_mappings,
696            );
697        }
698        let memory = create_anonymous_mapping_memory(length as u64)?;
699        let flags = MappingFlags::from_access_flags_and_options(prot_flags, options);
700        self.map_memory(
701            mm,
702            addr,
703            memory,
704            0,
705            length,
706            flags,
707            Access::rwx(),
708            options.contains(MappingOptions::POPULATE),
709            name,
710            released_mappings,
711        )
712    }
713
714    fn remap(
715        &mut self,
716        _current_task: &CurrentTask,
717        mm: &Arc<MemoryManager>,
718        old_addr: UserAddress,
719        old_length: usize,
720        new_length: usize,
721        flags: MremapFlags,
722        new_addr: UserAddress,
723        released_mappings: &mut ReleasedMappings,
724    ) -> Result<UserAddress, Errno> {
725        // MREMAP_FIXED moves a mapping, which requires MREMAP_MAYMOVE.
726        if flags.contains(MremapFlags::FIXED) && !flags.contains(MremapFlags::MAYMOVE) {
727            return error!(EINVAL);
728        }
729
730        // MREMAP_DONTUNMAP is always a move, so it requires MREMAP_MAYMOVE.
731        // There is no resizing allowed either.
732        if flags.contains(MremapFlags::DONTUNMAP)
733            && (!flags.contains(MremapFlags::MAYMOVE) || old_length != new_length)
734        {
735            return error!(EINVAL);
736        }
737
738        // In-place copies are invalid.
739        if !flags.contains(MremapFlags::MAYMOVE) && old_length == 0 {
740            return error!(ENOMEM);
741        }
742
743        if new_length == 0 {
744            return error!(EINVAL);
745        }
746
747        // Make sure old_addr is page-aligned.
748        if !old_addr.is_aligned(*PAGE_SIZE) {
749            return error!(EINVAL);
750        }
751
752        let old_length = round_up_to_system_page_size(old_length)?;
753        let new_length = round_up_to_system_page_size(new_length)?;
754
755        if self.check_has_unauthorized_splits(old_addr, old_length) {
756            return error!(EINVAL);
757        }
758
759        if self.check_has_unauthorized_splits(new_addr, new_length) {
760            return error!(EINVAL);
761        }
762
763        if !flags.contains(MremapFlags::DONTUNMAP)
764            && !flags.contains(MremapFlags::FIXED)
765            && old_length != 0
766        {
767            // We are not requested to remap to a specific address, so first we see if we can remap
768            // in-place. In-place copies (old_length == 0) are not allowed.
769            if let Some(new_addr) =
770                self.try_remap_in_place(mm, old_addr, old_length, new_length, released_mappings)?
771            {
772                return Ok(new_addr);
773            }
774        }
775
776        // There is no space to grow in place, or there is an explicit request to move.
777        if flags.contains(MremapFlags::MAYMOVE) {
778            let dst_address =
779                if flags.contains(MremapFlags::FIXED) { Some(new_addr) } else { None };
780            self.remap_move(
781                mm,
782                old_addr,
783                old_length,
784                dst_address,
785                new_length,
786                flags.contains(MremapFlags::DONTUNMAP),
787                released_mappings,
788            )
789        } else {
790            error!(ENOMEM)
791        }
792    }
793
794    /// Attempts to grow or shrink the mapping in-place. Returns `Ok(Some(addr))` if the remap was
795    /// successful. Returns `Ok(None)` if there was no space to grow.
796    fn try_remap_in_place(
797        &mut self,
798        mm: &Arc<MemoryManager>,
799        old_addr: UserAddress,
800        old_length: usize,
801        new_length: usize,
802        released_mappings: &mut ReleasedMappings,
803    ) -> Result<Option<UserAddress>, Errno> {
804        let old_range = old_addr..old_addr.checked_add(old_length).ok_or_else(|| errno!(EINVAL))?;
805        let new_range_in_place =
806            old_addr..old_addr.checked_add(new_length).ok_or_else(|| errno!(EINVAL))?;
807
808        if new_length <= old_length {
809            // Shrink the mapping in-place, which should always succeed.
810            // This is done by unmapping the extraneous region.
811            if new_length != old_length {
812                self.unmap(mm, new_range_in_place.end, old_length - new_length, released_mappings)?;
813            }
814            return Ok(Some(old_addr));
815        }
816
817        if self.mappings.range(old_range.end..new_range_in_place.end).next().is_some() {
818            // There is some mapping in the growth range prevening an in-place growth.
819            return Ok(None);
820        }
821
822        // There is space to grow in-place. The old range must be one contiguous mapping.
823        let (original_range, mapping) =
824            self.mappings.get(old_addr).ok_or_else(|| errno!(EINVAL))?;
825
826        if old_range.end > original_range.end {
827            return error!(EFAULT);
828        }
829        let original_range = original_range.clone();
830        let original_mapping = mapping.clone();
831
832        // Compute the new length of the entire mapping once it has grown.
833        let final_length = (original_range.end - original_range.start) + (new_length - old_length);
834
835        match self.get_mapping_backing(&original_mapping) {
836            MappingBacking::Memory(backing) => {
837                // Re-map the original range, which may include pages before the requested range.
838                Ok(Some(self.map_memory(
839                    mm,
840                    DesiredAddress::FixedOverwrite(original_range.start),
841                    backing.memory().clone(),
842                    backing.address_to_offset(original_range.start),
843                    final_length,
844                    original_mapping.flags(),
845                    original_mapping.max_access(),
846                    false,
847                    original_mapping.name(),
848                    released_mappings,
849                )?))
850            }
851            MappingBacking::PrivateAnonymous => {
852                let growth_start = original_range.end;
853                let growth_length = new_length - old_length;
854                let final_end = (original_range.start + final_length)?;
855                // Map new pages to back the growth.
856                self.map_in_user_vmar(
857                    SelectedAddress::FixedOverwrite(growth_start),
858                    &self.private_anonymous.backing,
859                    growth_start.ptr() as u64,
860                    growth_length,
861                    original_mapping.flags(),
862                    false,
863                )?;
864                // Overwrite the mapping entry with the new larger size.
865                released_mappings.extend(
866                    self.mappings.insert(original_range.start..final_end, original_mapping.clone()),
867                );
868                Ok(Some(original_range.start))
869            }
870        }
871    }
872
873    /// Grows or shrinks the mapping while moving it to a new destination.
874    fn remap_move(
875        &mut self,
876        mm: &Arc<MemoryManager>,
877        src_addr: UserAddress,
878        src_length: usize,
879        dst_addr: Option<UserAddress>,
880        dst_length: usize,
881        keep_source: bool,
882        released_mappings: &mut ReleasedMappings,
883    ) -> Result<UserAddress, Errno> {
884        let src_range = src_addr..src_addr.checked_add(src_length).ok_or_else(|| errno!(EINVAL))?;
885        let (original_range, src_mapping) =
886            self.mappings.get(src_addr).ok_or_else(|| errno!(EINVAL))?;
887        let original_range = original_range.clone();
888        let src_mapping = src_mapping.clone();
889
890        if src_length == 0 && !src_mapping.flags().contains(MappingFlags::SHARED) {
891            // src_length == 0 means that the mapping is to be copied. This behavior is only valid
892            // with MAP_SHARED mappings.
893            return error!(EINVAL);
894        }
895
896        // If the destination range is smaller than the source range, we must first shrink
897        // the source range in place. This must be done now and visible to processes, even if
898        // a later failure causes the remap operation to fail.
899        if src_length != 0 && src_length > dst_length {
900            self.unmap(mm, (src_addr + dst_length)?, src_length - dst_length, released_mappings)?;
901        }
902
903        let dst_addr_for_map = match dst_addr {
904            None => DesiredAddress::Any,
905            Some(dst_addr) => {
906                // The mapping is being moved to a specific address.
907                let dst_range =
908                    dst_addr..(dst_addr.checked_add(dst_length).ok_or_else(|| errno!(EINVAL))?);
909                if !src_range.intersect(&dst_range).is_empty() {
910                    return error!(EINVAL);
911                }
912
913                // The destination range must be unmapped. This must be done now and visible to
914                // processes, even if a later failure causes the remap operation to fail.
915                self.unmap(mm, dst_addr, dst_length, released_mappings)?;
916
917                DesiredAddress::Fixed(dst_addr)
918            }
919        };
920
921        // According to gVisor's aio_test, Linux checks for DONT_EXPAND after unmapping the dst
922        // range.
923        if dst_length > src_length && src_mapping.flags().contains(MappingFlags::DONT_EXPAND) {
924            return error!(EFAULT);
925        }
926
927        if src_range.end > original_range.end {
928            // The source range is not one contiguous mapping. This check must be done only after
929            // the source range is shrunk and the destination unmapped.
930            return error!(EFAULT);
931        }
932
933        match self.get_mapping_backing(&src_mapping) {
934            MappingBacking::PrivateAnonymous => {
935                let dst_addr =
936                    self.select_address(dst_addr_for_map, dst_length, src_mapping.flags())?.addr();
937                let dst_end = (dst_addr + dst_length)?;
938
939                let length_to_move = std::cmp::min(dst_length, src_length) as u64;
940                let growth_start_addr = (dst_addr + length_to_move)?;
941
942                if dst_addr != src_addr {
943                    let src_move_end = (src_range.start + length_to_move)?;
944                    let range_to_move = src_range.start..src_move_end;
945                    // Move the previously mapped pages into their new location.
946                    self.private_anonymous.move_pages(&range_to_move, dst_addr)?;
947                }
948
949                // Userfault registration is not preserved by remap
950                let new_flags =
951                    src_mapping.flags().difference(MappingFlags::UFFD | MappingFlags::UFFD_MISSING);
952                self.map_in_user_vmar(
953                    SelectedAddress::FixedOverwrite(dst_addr),
954                    &self.private_anonymous.backing,
955                    dst_addr.ptr() as u64,
956                    dst_length,
957                    new_flags,
958                    false,
959                )?;
960
961                if dst_length > src_length {
962                    // The mapping has grown, map new pages in to cover the growth.
963                    let growth_length = dst_length - src_length;
964
965                    self.map_private_anonymous(
966                        mm,
967                        DesiredAddress::FixedOverwrite(growth_start_addr),
968                        growth_length,
969                        new_flags.access_flags(),
970                        new_flags.options(),
971                        false,
972                        src_mapping.name(),
973                        released_mappings,
974                    )?;
975                }
976
977                released_mappings.extend(self.mappings.insert(
978                    dst_addr..dst_end,
979                    Mapping::new_private_anonymous(new_flags, src_mapping.name()),
980                ));
981
982                if dst_addr != src_addr && src_length != 0 && !keep_source {
983                    self.unmap(mm, src_addr, src_length, released_mappings)?;
984                }
985
986                return Ok(dst_addr);
987            }
988            MappingBacking::Memory(backing) => {
989                // This mapping is backed by an FD or is a shared anonymous mapping. Just map the
990                // range of the memory object covering the moved pages. If the memory object already
991                // had COW semantics, this preserves them.
992                let (dst_memory_offset, memory) =
993                    (backing.address_to_offset(src_addr), backing.memory().clone());
994
995                let new_address = self.map_memory(
996                    mm,
997                    dst_addr_for_map,
998                    memory,
999                    dst_memory_offset,
1000                    dst_length,
1001                    src_mapping.flags(),
1002                    src_mapping.max_access(),
1003                    false,
1004                    src_mapping.name(),
1005                    released_mappings,
1006                )?;
1007
1008                if src_length != 0 && !keep_source {
1009                    // Only unmap the source range if this is not a copy and if there was not a specific
1010                    // request to not unmap. It was checked earlier that in case of src_length == 0
1011                    // this mapping is MAP_SHARED.
1012                    self.unmap(mm, src_addr, src_length, released_mappings)?;
1013                }
1014
1015                return Ok(new_address);
1016            }
1017        };
1018    }
1019
1020    // Checks if an operation may be performed over the target mapping that may
1021    // result in a split mapping.
1022    //
1023    // An operation may be forbidden if the target mapping only partially covers
1024    // an existing mapping with the `MappingOptions::DONT_SPLIT` flag set.
1025    fn check_has_unauthorized_splits(&self, addr: UserAddress, length: usize) -> bool {
1026        let query_range = addr..addr.saturating_add(length);
1027        let mut intersection = self.mappings.range(query_range.clone());
1028
1029        // A mapping is not OK if it disallows splitting and the target range
1030        // does not fully cover the mapping range.
1031        let check_if_mapping_has_unauthorized_split =
1032            |mapping: Option<(&Range<UserAddress>, &Mapping)>| {
1033                mapping.is_some_and(|(mapping_range, mapping)| {
1034                    mapping.flags().contains(MappingFlags::DONT_SPLIT)
1035                        && (mapping_range.start < query_range.start
1036                            || query_range.end < mapping_range.end)
1037                })
1038            };
1039
1040        // We only check the first and last mappings in the range because naturally,
1041        // the mappings in the middle are fully covered by the target mapping and
1042        // won't be split.
1043        check_if_mapping_has_unauthorized_split(intersection.next())
1044            || check_if_mapping_has_unauthorized_split(intersection.next_back())
1045    }
1046
1047    /// Unmaps the specified range. Unmapped mappings are placed in `released_mappings`.
1048    fn unmap(
1049        &mut self,
1050        mm: &Arc<MemoryManager>,
1051        addr: UserAddress,
1052        length: usize,
1053        released_mappings: &mut ReleasedMappings,
1054    ) -> Result<(), Errno> {
1055        if !addr.is_aligned(*PAGE_SIZE) {
1056            return error!(EINVAL);
1057        }
1058        let length = round_up_to_system_page_size(length)?;
1059        if length == 0 {
1060            return error!(EINVAL);
1061        }
1062
1063        if self.check_has_unauthorized_splits(addr, length) {
1064            return error!(EINVAL);
1065        }
1066
1067        // Unmap the range, including the the tail of any range that would have been split. This
1068        // operation is safe because we're operating on another process.
1069        #[allow(
1070            clippy::undocumented_unsafe_blocks,
1071            reason = "Force documented unsafe blocks in Starnix"
1072        )]
1073        match unsafe { self.user_vmar.unmap(addr.ptr(), length) } {
1074            Ok(_) => (),
1075            Err(zx::Status::NOT_FOUND) => (),
1076            Err(zx::Status::INVALID_ARGS) => return error!(EINVAL),
1077            Err(status) => {
1078                impossible_error(status);
1079            }
1080        };
1081
1082        self.update_after_unmap(mm, addr, length, released_mappings)?;
1083
1084        Ok(())
1085    }
1086
1087    // Updates `self.mappings` after the specified range was unmaped.
1088    //
1089    // The range to unmap can span multiple mappings, and can split mappings if
1090    // the range start or end falls in the middle of a mapping.
1091    //
1092    // Private anonymous memory is contained in the same memory object; The pages of that object
1093    // that are no longer reachable should be released.
1094    //
1095    // File-backed mappings don't need to have their memory object modified.
1096    //
1097    // Unmapped mappings are placed in `released_mappings`.
1098    fn update_after_unmap(
1099        &mut self,
1100        mm: &Arc<MemoryManager>,
1101        addr: UserAddress,
1102        length: usize,
1103        released_mappings: &mut ReleasedMappings,
1104    ) -> Result<(), Errno> {
1105        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
1106        let unmap_range = addr..end_addr;
1107
1108        // Remove any shadow mappings for mlock()'d pages that are now unmapped.
1109        released_mappings.extend_pins(self.shadow_mappings_for_mlock.remove(unmap_range.clone()));
1110
1111        for (range, mapping) in self.mappings.range(unmap_range.clone()) {
1112            // Deallocate any pages in the private, anonymous backing that are now unreachable.
1113            if let MappingBacking::PrivateAnonymous = self.get_mapping_backing(mapping) {
1114                let unmapped_range = &unmap_range.intersect(range);
1115
1116                mm.inflight_vmspliced_payloads
1117                    .handle_unmapping(&self.private_anonymous.backing, unmapped_range)?;
1118
1119                self.private_anonymous
1120                    .zero(unmapped_range.start, unmapped_range.end - unmapped_range.start)?;
1121            }
1122        }
1123        released_mappings.extend(self.mappings.remove(unmap_range));
1124        return Ok(());
1125    }
1126
1127    fn protect_vmar_range(
1128        &self,
1129        addr: UserAddress,
1130        length: usize,
1131        prot_flags: ProtectionFlags,
1132    ) -> Result<(), Errno> {
1133        let vmar_flags = prot_flags.to_vmar_flags();
1134        // SAFETY: Modifying user vmar
1135        unsafe { self.user_vmar.protect(addr.ptr(), length, vmar_flags) }.map_err(|s| match s {
1136            zx::Status::INVALID_ARGS => errno!(EINVAL),
1137            zx::Status::NOT_FOUND => errno!(ENOMEM),
1138            zx::Status::ACCESS_DENIED => errno!(EACCES),
1139            _ => impossible_error(s),
1140        })
1141    }
1142
1143    fn protect(
1144        &mut self,
1145        current_task: &CurrentTask,
1146        addr: UserAddress,
1147        length: usize,
1148        prot_flags: ProtectionFlags,
1149        released_mappings: &mut ReleasedMappings,
1150    ) -> Result<(), Errno> {
1151        let vmar_flags = prot_flags.to_vmar_flags();
1152        let page_size = *PAGE_SIZE;
1153        let end = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?.round_up(page_size)?;
1154
1155        if self.check_has_unauthorized_splits(addr, length) {
1156            return error!(EINVAL);
1157        }
1158
1159        let prot_range = if prot_flags.contains(ProtectionFlags::GROWSDOWN) {
1160            let mut start = addr;
1161            let Some((range, mapping)) = self.mappings.get(start) else {
1162                return error!(EINVAL);
1163            };
1164            // Ensure that the mapping has GROWSDOWN if PROT_GROWSDOWN was specified.
1165            if !mapping.flags().contains(MappingFlags::GROWSDOWN) {
1166                return error!(EINVAL);
1167            }
1168            let access_flags = mapping.flags().access_flags();
1169            // From <https://man7.org/linux/man-pages/man2/mprotect.2.html>:
1170            //
1171            //   PROT_GROWSDOWN
1172            //     Apply the protection mode down to the beginning of a
1173            //     mapping that grows downward (which should be a stack
1174            //     segment or a segment mapped with the MAP_GROWSDOWN flag
1175            //     set).
1176            start = range.start;
1177            while let Some((range, mapping)) =
1178                self.mappings.get(start.saturating_sub(page_size as usize))
1179            {
1180                if !mapping.flags().contains(MappingFlags::GROWSDOWN)
1181                    || mapping.flags().access_flags() != access_flags
1182                {
1183                    break;
1184                }
1185                start = range.start;
1186            }
1187            start..end
1188        } else {
1189            addr..end
1190        };
1191
1192        let addr = prot_range.start;
1193        let length = prot_range.end - prot_range.start;
1194
1195        // TODO: We should check the max_access flags on all the mappings in this range.
1196        //       There are cases where max_access is more restrictive than the Zircon rights
1197        //       we hold on the underlying VMOs.
1198
1199        // TODO(https://fxbug.dev/411617451): `mprotect` should apply the protection flags
1200        // until it encounters a mapping that doesn't allow it, rather than not apply the protection
1201        // flags at all if a single mapping doesn't allow it.
1202        for (range, mapping) in self.mappings.range(prot_range.clone()) {
1203            security::file_mprotect(current_task, range, mapping, prot_flags)?;
1204        }
1205
1206        // Make one call to mprotect to update all the zircon protections.
1207        // SAFETY: This is safe because the vmar belongs to a different process.
1208        unsafe { self.user_vmar.protect(addr.ptr(), length, vmar_flags) }.map_err(|s| match s {
1209            zx::Status::INVALID_ARGS => errno!(EINVAL),
1210            zx::Status::NOT_FOUND => {
1211                track_stub!(
1212                    TODO("https://fxbug.dev/322875024"),
1213                    "mprotect: succeed and update prot after NOT_FOUND"
1214                );
1215                errno!(EINVAL)
1216            }
1217            zx::Status::ACCESS_DENIED => errno!(EACCES),
1218            _ => impossible_error(s),
1219        })?;
1220
1221        // Update the flags on each mapping in the range.
1222        let mut updates = vec![];
1223        for (range, mapping) in self.mappings.range(prot_range.clone()) {
1224            if mapping.flags().contains(MappingFlags::UFFD) {
1225                track_stub!(
1226                    TODO("https://fxbug.dev/297375964"),
1227                    "mprotect on uffd-registered range should not alter protections"
1228                );
1229                return error!(EINVAL);
1230            }
1231            let range = range.intersect(&prot_range);
1232            let mut mapping = mapping.clone();
1233            mapping.set_flags(mapping.flags().with_access_flags(prot_flags));
1234            updates.push((range, mapping));
1235        }
1236        // Use a separate loop to avoid mutating the mappings structure while iterating over it.
1237        for (range, mapping) in updates {
1238            released_mappings.extend(self.mappings.insert(range, mapping));
1239        }
1240        Ok(())
1241    }
1242
1243    fn madvise(
1244        &mut self,
1245        _current_task: &CurrentTask,
1246        addr: UserAddress,
1247        length: usize,
1248        advice: u32,
1249        released_mappings: &mut ReleasedMappings,
1250    ) -> Result<(), Errno> {
1251        if !addr.is_aligned(*PAGE_SIZE) {
1252            return error!(EINVAL);
1253        }
1254
1255        let end_addr =
1256            addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?.round_up(*PAGE_SIZE)?;
1257        if end_addr > self.max_address() {
1258            return error!(EFAULT);
1259        }
1260
1261        if advice == MADV_NORMAL {
1262            track_stub!(TODO("https://fxbug.dev/322874202"), "madvise undo hints for MADV_NORMAL");
1263            return Ok(());
1264        }
1265
1266        let mut updates = vec![];
1267        let range_for_op = addr..end_addr;
1268        for (range, mapping) in self.mappings.range(range_for_op.clone()) {
1269            let range_to_zero = range.intersect(&range_for_op);
1270            if range_to_zero.is_empty() {
1271                continue;
1272            }
1273            let start_offset = mapping.address_to_offset(range_to_zero.start);
1274            let end_offset = mapping.address_to_offset(range_to_zero.end);
1275            if advice == MADV_DONTFORK
1276                || advice == MADV_DOFORK
1277                || advice == MADV_WIPEONFORK
1278                || advice == MADV_KEEPONFORK
1279                || advice == MADV_DONTDUMP
1280                || advice == MADV_DODUMP
1281                || advice == MADV_MERGEABLE
1282                || advice == MADV_UNMERGEABLE
1283            {
1284                // WIPEONFORK is only supported on private anonymous mappings per madvise(2).
1285                // KEEPONFORK can be specified on ranges that cover other sorts of mappings. It should
1286                // have no effect on mappings that are not private and anonymous as such mappings cannot
1287                // have the WIPEONFORK option set.
1288                if advice == MADV_WIPEONFORK && !mapping.private_anonymous() {
1289                    return error!(EINVAL);
1290                }
1291                let new_flags = match advice {
1292                    MADV_DONTFORK => mapping.flags() | MappingFlags::DONTFORK,
1293                    MADV_DOFORK => mapping.flags() & MappingFlags::DONTFORK.complement(),
1294                    MADV_WIPEONFORK => mapping.flags() | MappingFlags::WIPEONFORK,
1295                    MADV_KEEPONFORK => mapping.flags() & MappingFlags::WIPEONFORK.complement(),
1296                    MADV_DONTDUMP => {
1297                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_DONTDUMP");
1298                        mapping.flags()
1299                    }
1300                    MADV_DODUMP => {
1301                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_DODUMP");
1302                        mapping.flags()
1303                    }
1304                    MADV_MERGEABLE => {
1305                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_MERGEABLE");
1306                        mapping.flags()
1307                    }
1308                    MADV_UNMERGEABLE => {
1309                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_UNMERGEABLE");
1310                        mapping.flags()
1311                    }
1312                    // Only the variants in this match should be reachable given the condition for
1313                    // the containing branch.
1314                    unknown_advice => unreachable!("unknown advice {unknown_advice}"),
1315                };
1316                let mut new_mapping = mapping.clone();
1317                new_mapping.set_flags(new_flags);
1318                updates.push((range_to_zero, new_mapping));
1319            } else {
1320                if mapping.flags().contains(MappingFlags::SHARED) {
1321                    continue;
1322                }
1323                let op = match advice {
1324                    MADV_DONTNEED if !mapping.flags().contains(MappingFlags::ANONYMOUS) => {
1325                        // Note, we cannot simply implemented MADV_DONTNEED with
1326                        // zx::VmoOp::DONT_NEED because they have different
1327                        // semantics.
1328                        track_stub!(
1329                            TODO("https://fxbug.dev/322874496"),
1330                            "MADV_DONTNEED with file-backed mapping"
1331                        );
1332                        return error!(EINVAL);
1333                    }
1334                    MADV_DONTNEED if mapping.flags().contains(MappingFlags::LOCKED) => {
1335                        return error!(EINVAL);
1336                    }
1337                    MADV_DONTNEED => zx::VmoOp::ZERO,
1338                    MADV_DONTNEED_LOCKED => {
1339                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_DONTNEED_LOCKED");
1340                        return error!(EINVAL);
1341                    }
1342                    MADV_WILLNEED => {
1343                        if mapping.flags().contains(MappingFlags::WRITE) {
1344                            zx::VmoOp::COMMIT
1345                        } else {
1346                            zx::VmoOp::PREFETCH
1347                        }
1348                    }
1349                    MADV_COLD => {
1350                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_COLD");
1351                        return error!(EINVAL);
1352                    }
1353                    MADV_PAGEOUT => {
1354                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_PAGEOUT");
1355                        return error!(EINVAL);
1356                    }
1357                    MADV_POPULATE_READ => {
1358                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_POPULATE_READ");
1359                        return error!(EINVAL);
1360                    }
1361                    MADV_RANDOM => {
1362                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_RANDOM");
1363                        return error!(EINVAL);
1364                    }
1365                    MADV_SEQUENTIAL => {
1366                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_SEQUENTIAL");
1367                        return error!(EINVAL);
1368                    }
1369                    MADV_FREE if !mapping.flags().contains(MappingFlags::ANONYMOUS) => {
1370                        track_stub!(
1371                            TODO("https://fxbug.dev/411748419"),
1372                            "MADV_FREE with file-backed mapping"
1373                        );
1374                        return error!(EINVAL);
1375                    }
1376                    MADV_FREE if mapping.flags().contains(MappingFlags::LOCKED) => {
1377                        return error!(EINVAL);
1378                    }
1379                    MADV_FREE => {
1380                        track_stub!(TODO("https://fxbug.dev/411748419"), "MADV_FREE");
1381                        // TODO(https://fxbug.dev/411748419) For now, treat MADV_FREE like
1382                        // MADV_DONTNEED as a stopgap until we have proper support.
1383                        zx::VmoOp::ZERO
1384                    }
1385                    MADV_REMOVE => {
1386                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_REMOVE");
1387                        return error!(EINVAL);
1388                    }
1389                    MADV_HWPOISON => {
1390                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_HWPOISON");
1391                        return error!(EINVAL);
1392                    }
1393                    MADV_SOFT_OFFLINE => {
1394                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_SOFT_OFFLINE");
1395                        return error!(EINVAL);
1396                    }
1397                    MADV_HUGEPAGE => {
1398                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_HUGEPAGE");
1399                        return error!(EINVAL);
1400                    }
1401                    MADV_COLLAPSE => {
1402                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_COLLAPSE");
1403                        return error!(EINVAL);
1404                    }
1405                    MADV_NOHUGEPAGE => return Ok(()),
1406                    advice => {
1407                        track_stub!(TODO("https://fxbug.dev/322874202"), "madvise", advice);
1408                        return error!(EINVAL);
1409                    }
1410                };
1411
1412                let memory = match self.get_mapping_backing(mapping) {
1413                    MappingBacking::Memory(backing) => backing.memory(),
1414                    MappingBacking::PrivateAnonymous => &self.private_anonymous.backing,
1415                };
1416                memory.op_range(op, start_offset, end_offset - start_offset).map_err(
1417                    |s| match s {
1418                        zx::Status::OUT_OF_RANGE => errno!(EINVAL),
1419                        zx::Status::NO_MEMORY => errno!(ENOMEM),
1420                        zx::Status::INVALID_ARGS => errno!(EINVAL),
1421                        zx::Status::ACCESS_DENIED => errno!(EACCES),
1422                        _ => impossible_error(s),
1423                    },
1424                )?;
1425            }
1426        }
1427        // Use a separate loop to avoid mutating the mappings structure while iterating over it.
1428        for (range, mapping) in updates {
1429            released_mappings.extend(self.mappings.insert(range, mapping));
1430        }
1431        Ok(())
1432    }
1433
1434    fn mlock<L>(
1435        &mut self,
1436        current_task: &CurrentTask,
1437        locked: &mut Locked<L>,
1438        desired_addr: UserAddress,
1439        desired_length: usize,
1440        on_fault: bool,
1441        released_mappings: &mut ReleasedMappings,
1442    ) -> Result<(), Errno>
1443    where
1444        L: LockBefore<ThreadGroupLimits>,
1445    {
1446        let desired_end_addr =
1447            desired_addr.checked_add(desired_length).ok_or_else(|| errno!(EINVAL))?;
1448        let start_addr = round_down_to_system_page_size(desired_addr)?;
1449        let end_addr = round_up_to_system_page_size(desired_end_addr)?;
1450
1451        let mut updates = vec![];
1452        let mut bytes_mapped_in_range = 0;
1453        let mut num_new_locked_bytes = 0;
1454        let mut failed_to_lock = false;
1455        for (range, mapping) in self.mappings.range(start_addr..end_addr) {
1456            let mut range = range.clone();
1457            let mut mapping = mapping.clone();
1458
1459            // Handle mappings that start before the region to be locked.
1460            range.start = std::cmp::max(range.start, start_addr);
1461            // Handle mappings that extend past the region to be locked.
1462            range.end = std::cmp::min(range.end, end_addr);
1463
1464            bytes_mapped_in_range += (range.end - range.start) as u64;
1465
1466            // PROT_NONE mappings generate ENOMEM but are left locked.
1467            if !mapping
1468                .flags()
1469                .intersects(MappingFlags::READ | MappingFlags::WRITE | MappingFlags::EXEC)
1470            {
1471                failed_to_lock = true;
1472            }
1473
1474            if !mapping.flags().contains(MappingFlags::LOCKED) {
1475                num_new_locked_bytes += (range.end - range.start) as u64;
1476                let shadow_mapping = match current_task.kernel().features.mlock_pin_flavor {
1477                    // Pin the memory by mapping the backing memory into the high priority vmar.
1478                    MlockPinFlavor::ShadowProcess => {
1479                        // Keep different shadow processes distinct for accounting purposes.
1480                        struct MlockShadowProcess(memory_pinning::ShadowProcess);
1481                        let shadow_process =
1482                            current_task.kernel().expando.get_or_try_init(|| {
1483                                memory_pinning::ShadowProcess::new(zx::Name::new_lossy(
1484                                    "starnix_mlock_pins",
1485                                ))
1486                                .map(MlockShadowProcess)
1487                                .map_err(|_| errno!(EPERM))
1488                            })?;
1489
1490                        let (vmo, offset) = match self.get_mapping_backing(&mapping) {
1491                            MappingBacking::Memory(m) => (
1492                                m.memory().as_vmo().ok_or_else(|| errno!(ENOMEM))?,
1493                                m.address_to_offset(range.start),
1494                            ),
1495                            MappingBacking::PrivateAnonymous => (
1496                                self.private_anonymous
1497                                    .backing
1498                                    .as_vmo()
1499                                    .ok_or_else(|| errno!(ENOMEM))?,
1500                                range.start.ptr() as u64,
1501                            ),
1502                        };
1503                        Some(shadow_process.0.pin_pages(vmo, offset, range.end - range.start)?)
1504                    }
1505
1506                    // Relying on VMAR-level operations means just flags are set per-mapping.
1507                    MlockPinFlavor::Noop | MlockPinFlavor::VmarAlwaysNeed => None,
1508                };
1509                mapping.set_mlock();
1510                updates.push((range, mapping, shadow_mapping));
1511            }
1512        }
1513
1514        if bytes_mapped_in_range as usize != end_addr - start_addr {
1515            return error!(ENOMEM);
1516        }
1517
1518        let memlock_rlimit = current_task.thread_group().get_rlimit(locked, Resource::MEMLOCK);
1519        if self.total_locked_bytes() + num_new_locked_bytes > memlock_rlimit {
1520            if crate::security::check_task_capable(current_task, CAP_IPC_LOCK).is_err() {
1521                let code = if memlock_rlimit > 0 { errno!(ENOMEM) } else { errno!(EPERM) };
1522                return Err(code);
1523            }
1524        }
1525
1526        let op_range_status_to_errno = |e| match e {
1527            zx::Status::BAD_STATE | zx::Status::NOT_SUPPORTED => errno!(ENOMEM),
1528            zx::Status::INVALID_ARGS | zx::Status::OUT_OF_RANGE => errno!(EINVAL),
1529            zx::Status::ACCESS_DENIED => {
1530                unreachable!("user vmar should always have needed rights")
1531            }
1532            zx::Status::BAD_HANDLE => {
1533                unreachable!("user vmar should always be a valid handle")
1534            }
1535            zx::Status::WRONG_TYPE => unreachable!("user vmar handle should be a vmar"),
1536            _ => unreachable!("unknown error from op_range on user vmar for mlock: {e}"),
1537        };
1538
1539        if !on_fault && !current_task.kernel().features.mlock_always_onfault {
1540            self.user_vmar
1541                .op_range(zx::VmarOp::PREFETCH, start_addr.ptr(), end_addr - start_addr)
1542                .map_err(op_range_status_to_errno)?;
1543        }
1544
1545        match current_task.kernel().features.mlock_pin_flavor {
1546            MlockPinFlavor::VmarAlwaysNeed => {
1547                self.user_vmar
1548                    .op_range(zx::VmarOp::ALWAYS_NEED, start_addr.ptr(), end_addr - start_addr)
1549                    .map_err(op_range_status_to_errno)?;
1550            }
1551            // The shadow process doesn't use any vmar-level operations to pin memory.
1552            MlockPinFlavor::Noop | MlockPinFlavor::ShadowProcess => (),
1553        }
1554
1555        for (range, mapping, shadow_mapping) in updates {
1556            if let Some(shadow_mapping) = shadow_mapping {
1557                released_mappings.extend_pins(
1558                    self.shadow_mappings_for_mlock.insert(range.clone(), shadow_mapping),
1559                );
1560            }
1561            released_mappings.extend(self.mappings.insert(range, mapping));
1562        }
1563
1564        if failed_to_lock { error!(ENOMEM) } else { Ok(()) }
1565    }
1566
1567    fn munlock(
1568        &mut self,
1569        _current_task: &CurrentTask,
1570        desired_addr: UserAddress,
1571        desired_length: usize,
1572        released_mappings: &mut ReleasedMappings,
1573    ) -> Result<(), Errno> {
1574        let desired_end_addr =
1575            desired_addr.checked_add(desired_length).ok_or_else(|| errno!(EINVAL))?;
1576        let start_addr = round_down_to_system_page_size(desired_addr)?;
1577        let end_addr = round_up_to_system_page_size(desired_end_addr)?;
1578
1579        let mut updates = vec![];
1580        let mut bytes_mapped_in_range = 0;
1581        for (range, mapping) in self.mappings.range(start_addr..end_addr) {
1582            let mut range = range.clone();
1583            let mut mapping = mapping.clone();
1584
1585            // Handle mappings that start before the region to be locked.
1586            range.start = std::cmp::max(range.start, start_addr);
1587            // Handle mappings that extend past the region to be locked.
1588            range.end = std::cmp::min(range.end, end_addr);
1589
1590            bytes_mapped_in_range += (range.end - range.start) as u64;
1591
1592            if mapping.flags().contains(MappingFlags::LOCKED) {
1593                // This clears the locking for the shadow process pin flavor. It's not currently
1594                // possible to actually unlock pages that were locked with the
1595                // ZX_VMAR_OP_ALWAYS_NEED pin flavor.
1596                mapping.clear_mlock();
1597                updates.push((range, mapping));
1598            }
1599        }
1600
1601        if bytes_mapped_in_range as usize != end_addr - start_addr {
1602            return error!(ENOMEM);
1603        }
1604
1605        for (range, mapping) in updates {
1606            released_mappings.extend(self.mappings.insert(range.clone(), mapping));
1607            released_mappings.extend_pins(self.shadow_mappings_for_mlock.remove(range));
1608        }
1609
1610        Ok(())
1611    }
1612
1613    pub fn total_locked_bytes(&self) -> u64 {
1614        self.num_locked_bytes(
1615            UserAddress::from(self.user_vmar_info.base as u64)
1616                ..UserAddress::from((self.user_vmar_info.base + self.user_vmar_info.len) as u64),
1617        )
1618    }
1619
1620    pub fn num_locked_bytes(&self, range: impl RangeBounds<UserAddress>) -> u64 {
1621        self.mappings
1622            .range(range)
1623            .filter(|(_, mapping)| mapping.flags().contains(MappingFlags::LOCKED))
1624            .map(|(range, _)| (range.end - range.start) as u64)
1625            .sum()
1626    }
1627
1628    fn max_address(&self) -> UserAddress {
1629        UserAddress::from_ptr(self.user_vmar_info.base + self.user_vmar_info.len)
1630    }
1631
1632    fn get_mappings_for_vmsplice(
1633        &self,
1634        mm: &Arc<MemoryManager>,
1635        buffers: &UserBuffers,
1636    ) -> Result<Vec<Arc<VmsplicePayload>>, Errno> {
1637        let mut vmsplice_mappings = Vec::new();
1638
1639        for UserBuffer { mut address, length } in buffers.iter().copied() {
1640            let mappings = self.get_contiguous_mappings_at(address, length)?;
1641            for (mapping, length) in mappings {
1642                let vmsplice_payload = match self.get_mapping_backing(mapping) {
1643                    MappingBacking::Memory(m) => VmsplicePayloadSegment {
1644                        addr_offset: address,
1645                        length,
1646                        memory: m.memory().clone(),
1647                        memory_offset: m.address_to_offset(address),
1648                    },
1649                    MappingBacking::PrivateAnonymous => VmsplicePayloadSegment {
1650                        addr_offset: address,
1651                        length,
1652                        memory: self.private_anonymous.backing.clone(),
1653                        memory_offset: address.ptr() as u64,
1654                    },
1655                };
1656                vmsplice_mappings.push(VmsplicePayload::new(Arc::downgrade(mm), vmsplice_payload));
1657
1658                address = (address + length)?;
1659            }
1660        }
1661
1662        Ok(vmsplice_mappings)
1663    }
1664
1665    /// Returns all the mappings starting at `addr`, and continuing until either `length` bytes have
1666    /// been covered or an unmapped page is reached.
1667    ///
1668    /// Mappings are returned in ascending order along with the number of bytes that intersect the
1669    /// requested range. The returned mappings are guaranteed to be contiguous and the total length
1670    /// corresponds to the number of contiguous mapped bytes starting from `addr`, i.e.:
1671    /// - 0 (empty iterator) if `addr` is not mapped.
1672    /// - exactly `length` if the requested range is fully mapped.
1673    /// - the offset of the first unmapped page (between 0 and `length`) if the requested range is
1674    ///   only partially mapped.
1675    ///
1676    /// Returns EFAULT if the requested range overflows or extends past the end of the vmar.
1677    fn get_contiguous_mappings_at(
1678        &self,
1679        addr: UserAddress,
1680        length: usize,
1681    ) -> Result<impl Iterator<Item = (&Mapping, usize)>, Errno> {
1682        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EFAULT))?;
1683        if end_addr > self.max_address() {
1684            return error!(EFAULT);
1685        }
1686
1687        // Iterate over all contiguous mappings intersecting the requested range.
1688        let mut mappings = self.mappings.range(addr..end_addr);
1689        let mut prev_range_end = None;
1690        let mut offset = 0;
1691        let result = std::iter::from_fn(move || {
1692            if offset != length {
1693                if let Some((range, mapping)) = mappings.next() {
1694                    return match prev_range_end {
1695                        // If this is the first mapping that we are considering, it may not actually
1696                        // contain `addr` at all.
1697                        None if range.start > addr => None,
1698
1699                        // Subsequent mappings may not be contiguous.
1700                        Some(prev_range_end) if range.start != prev_range_end => None,
1701
1702                        // This mapping can be returned.
1703                        _ => {
1704                            let mapping_length = std::cmp::min(length, range.end - addr) - offset;
1705                            offset += mapping_length;
1706                            prev_range_end = Some(range.end);
1707                            Some((mapping, mapping_length))
1708                        }
1709                    };
1710                }
1711            }
1712
1713            None
1714        });
1715
1716        Ok(result)
1717    }
1718
1719    /// Determines whether a fault at the given address could be covered by extending a growsdown
1720    /// mapping.
1721    ///
1722    /// If the address already belongs to a mapping, this function returns `None`. If the next
1723    /// mapping above the given address has the `MappingFlags::GROWSDOWN` flag, this function
1724    /// returns the address at which that mapping starts and the mapping itself. Otherwise, this
1725    /// function returns `None`.
1726    fn find_growsdown_mapping(&self, addr: UserAddress) -> Option<(UserAddress, &Mapping)> {
1727        match self.mappings.range(addr..).next() {
1728            Some((range, mapping)) => {
1729                if range.contains(&addr) {
1730                    // |addr| is already contained within a mapping, nothing to grow.
1731                    return None;
1732                } else if !mapping.flags().contains(MappingFlags::GROWSDOWN) {
1733                    // The next mapping above the given address does not have the
1734                    // `MappingFlags::GROWSDOWN` flag.
1735                    None
1736                } else {
1737                    Some((range.start, mapping))
1738                }
1739            }
1740            None => None,
1741        }
1742    }
1743
1744    /// Determines if an access at a given address could be covered by extending a growsdown mapping
1745    /// and extends it if possible. Returns true if the given address is covered by a mapping.
1746    fn extend_growsdown_mapping_to_address(
1747        &mut self,
1748        mm: &Arc<MemoryManager>,
1749        addr: UserAddress,
1750        is_write: bool,
1751    ) -> Result<bool, Error> {
1752        let Some((mapping_low_addr, mapping_to_grow)) = self.find_growsdown_mapping(addr) else {
1753            return Ok(false);
1754        };
1755        if is_write && !mapping_to_grow.can_write() {
1756            // Don't grow a read-only GROWSDOWN mapping for a write fault, it won't work.
1757            return Ok(false);
1758        }
1759        if !mapping_to_grow.flags().contains(MappingFlags::ANONYMOUS) {
1760            // Currently, we only grow anonymous mappings.
1761            return Ok(false);
1762        }
1763        let low_addr = (addr - (addr.ptr() as u64 % *PAGE_SIZE))?;
1764        let high_addr = mapping_low_addr;
1765
1766        let length = high_addr
1767            .ptr()
1768            .checked_sub(low_addr.ptr())
1769            .ok_or_else(|| anyhow!("Invalid growth range"))?;
1770
1771        let mut released_mappings = ReleasedMappings::default();
1772        self.map_anonymous(
1773            mm,
1774            DesiredAddress::FixedOverwrite(low_addr),
1775            length,
1776            mapping_to_grow.flags().access_flags(),
1777            mapping_to_grow.flags().options(),
1778            mapping_to_grow.name(),
1779            &mut released_mappings,
1780        )?;
1781        // We can't have any released mappings because `find_growsdown_mapping` will return None if
1782        // the mapping already exists in this range.
1783        assert!(
1784            released_mappings.is_empty(),
1785            "expected to not remove mappings by inserting, got {released_mappings:#?}"
1786        );
1787        Ok(true)
1788    }
1789
1790    /// Reads exactly `bytes.len()` bytes of memory.
1791    ///
1792    /// # Parameters
1793    /// - `addr`: The address to read data from.
1794    /// - `bytes`: The byte array to read into.
1795    fn read_memory<'a>(
1796        &self,
1797        addr: UserAddress,
1798        bytes: &'a mut [MaybeUninit<u8>],
1799    ) -> Result<&'a mut [u8], Errno> {
1800        let mut bytes_read = 0;
1801        for (mapping, len) in self.get_contiguous_mappings_at(addr, bytes.len())? {
1802            let next_offset = bytes_read + len;
1803            self.read_mapping_memory(
1804                (addr + bytes_read)?,
1805                mapping,
1806                &mut bytes[bytes_read..next_offset],
1807            )?;
1808            bytes_read = next_offset;
1809        }
1810
1811        if bytes_read != bytes.len() {
1812            error!(EFAULT)
1813        } else {
1814            // SAFETY: The created slice is properly aligned/sized since it
1815            // is a subset of the `bytes` slice. Note that `MaybeUninit<T>` has
1816            // the same layout as `T`. Also note that `bytes_read` bytes have
1817            // been properly initialized.
1818            let bytes = unsafe {
1819                std::slice::from_raw_parts_mut(bytes.as_mut_ptr() as *mut u8, bytes_read)
1820            };
1821            Ok(bytes)
1822        }
1823    }
1824
1825    /// Reads exactly `bytes.len()` bytes of memory from `addr`.
1826    ///
1827    /// # Parameters
1828    /// - `addr`: The address to read data from.
1829    /// - `bytes`: The byte array to read into.
1830    fn read_mapping_memory<'a>(
1831        &self,
1832        addr: UserAddress,
1833        mapping: &Mapping,
1834        bytes: &'a mut [MaybeUninit<u8>],
1835    ) -> Result<&'a mut [u8], Errno> {
1836        if !mapping.can_read() {
1837            return error!(EFAULT, "read_mapping_memory called on unreadable mapping");
1838        }
1839        match self.get_mapping_backing(mapping) {
1840            MappingBacking::Memory(backing) => backing.read_memory(addr, bytes),
1841            MappingBacking::PrivateAnonymous => self.private_anonymous.read_memory(addr, bytes),
1842        }
1843    }
1844
1845    /// Reads bytes starting at `addr`, continuing until either `bytes.len()` bytes have been read
1846    /// or no more bytes can be read.
1847    ///
1848    /// This is used, for example, to read null-terminated strings where the exact length is not
1849    /// known, only the maximum length is.
1850    ///
1851    /// # Parameters
1852    /// - `addr`: The address to read data from.
1853    /// - `bytes`: The byte array to read into.
1854    fn read_memory_partial<'a>(
1855        &self,
1856        addr: UserAddress,
1857        bytes: &'a mut [MaybeUninit<u8>],
1858    ) -> Result<&'a mut [u8], Errno> {
1859        let mut bytes_read = 0;
1860        for (mapping, len) in self.get_contiguous_mappings_at(addr, bytes.len())? {
1861            let next_offset = bytes_read + len;
1862            if self
1863                .read_mapping_memory(
1864                    (addr + bytes_read)?,
1865                    mapping,
1866                    &mut bytes[bytes_read..next_offset],
1867                )
1868                .is_err()
1869            {
1870                break;
1871            }
1872            bytes_read = next_offset;
1873        }
1874
1875        // If at least one byte was requested but we got none, it means that `addr` was invalid.
1876        if !bytes.is_empty() && bytes_read == 0 {
1877            error!(EFAULT)
1878        } else {
1879            // SAFETY: The created slice is properly aligned/sized since it
1880            // is a subset of the `bytes` slice. Note that `MaybeUninit<T>` has
1881            // the same layout as `T`. Also note that `bytes_read` bytes have
1882            // been properly initialized.
1883            let bytes = unsafe {
1884                std::slice::from_raw_parts_mut(bytes.as_mut_ptr() as *mut u8, bytes_read)
1885            };
1886            Ok(bytes)
1887        }
1888    }
1889
1890    /// Like `read_memory_partial` but only returns the bytes up to and including
1891    /// a null (zero) byte.
1892    fn read_memory_partial_until_null_byte<'a>(
1893        &self,
1894        addr: UserAddress,
1895        bytes: &'a mut [MaybeUninit<u8>],
1896    ) -> Result<&'a mut [u8], Errno> {
1897        let read_bytes = self.read_memory_partial(addr, bytes)?;
1898        let max_len = memchr::memchr(b'\0', read_bytes)
1899            .map_or_else(|| read_bytes.len(), |null_index| null_index + 1);
1900        Ok(&mut read_bytes[..max_len])
1901    }
1902
1903    /// Writes the provided bytes.
1904    ///
1905    /// In case of success, the number of bytes written will always be `bytes.len()`.
1906    ///
1907    /// # Parameters
1908    /// - `addr`: The address to write to.
1909    /// - `bytes`: The bytes to write.
1910    fn write_memory(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
1911        let mut bytes_written = 0;
1912        for (mapping, len) in self.get_contiguous_mappings_at(addr, bytes.len())? {
1913            let next_offset = bytes_written + len;
1914            self.write_mapping_memory(
1915                (addr + bytes_written)?,
1916                mapping,
1917                &bytes[bytes_written..next_offset],
1918            )?;
1919            bytes_written = next_offset;
1920        }
1921
1922        if bytes_written != bytes.len() { error!(EFAULT) } else { Ok(bytes.len()) }
1923    }
1924
1925    /// Writes the provided bytes to `addr`.
1926    ///
1927    /// # Parameters
1928    /// - `addr`: The address to write to.
1929    /// - `bytes`: The bytes to write to the memory object.
1930    fn write_mapping_memory(
1931        &self,
1932        addr: UserAddress,
1933        mapping: &Mapping,
1934        bytes: &[u8],
1935    ) -> Result<(), Errno> {
1936        if !mapping.can_write() {
1937            return error!(EFAULT, "write_mapping_memory called on unwritable memory");
1938        }
1939        match self.get_mapping_backing(mapping) {
1940            MappingBacking::Memory(backing) => backing.write_memory(addr, bytes),
1941            MappingBacking::PrivateAnonymous => self.private_anonymous.write_memory(addr, bytes),
1942        }
1943    }
1944
1945    /// Writes bytes starting at `addr`, continuing until either `bytes.len()` bytes have been
1946    /// written or no more bytes can be written.
1947    ///
1948    /// # Parameters
1949    /// - `addr`: The address to read data from.
1950    /// - `bytes`: The byte array to write from.
1951    fn write_memory_partial(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
1952        let mut bytes_written = 0;
1953        for (mapping, len) in self.get_contiguous_mappings_at(addr, bytes.len())? {
1954            let next_offset = bytes_written + len;
1955            if self
1956                .write_mapping_memory(
1957                    (addr + bytes_written)?,
1958                    mapping,
1959                    &bytes[bytes_written..next_offset],
1960                )
1961                .is_err()
1962            {
1963                break;
1964            }
1965            bytes_written = next_offset;
1966        }
1967
1968        if !bytes.is_empty() && bytes_written == 0 { error!(EFAULT) } else { Ok(bytes.len()) }
1969    }
1970
1971    fn zero(&self, addr: UserAddress, length: usize) -> Result<usize, Errno> {
1972        let mut bytes_written = 0;
1973        for (mapping, len) in self.get_contiguous_mappings_at(addr, length)? {
1974            let next_offset = bytes_written + len;
1975            if self.zero_mapping((addr + bytes_written)?, mapping, len).is_err() {
1976                break;
1977            }
1978            bytes_written = next_offset;
1979        }
1980
1981        if length != bytes_written { error!(EFAULT) } else { Ok(length) }
1982    }
1983
1984    fn zero_mapping(
1985        &self,
1986        addr: UserAddress,
1987        mapping: &Mapping,
1988        length: usize,
1989    ) -> Result<usize, Errno> {
1990        if !mapping.can_write() {
1991            return error!(EFAULT);
1992        }
1993
1994        match self.get_mapping_backing(mapping) {
1995            MappingBacking::Memory(backing) => backing.zero(addr, length),
1996            MappingBacking::PrivateAnonymous => self.private_anonymous.zero(addr, length),
1997        }
1998    }
1999
2000    pub fn create_memory_backing(
2001        &self,
2002        base: UserAddress,
2003        memory: Arc<MemoryObject>,
2004        memory_offset: u64,
2005    ) -> MappingBacking {
2006        MappingBacking::Memory(Box::new(MappingBackingMemory::new(base, memory, memory_offset)))
2007    }
2008
2009    pub fn get_mapping_backing<'a>(&self, mapping: &'a Mapping) -> &'a MappingBacking {
2010        mapping.get_backing_internal()
2011    }
2012
2013    fn get_aio_context(&self, addr: UserAddress) -> Option<(Range<UserAddress>, Arc<AioContext>)> {
2014        let Some((range, mapping)) = self.mappings.get(addr) else {
2015            return None;
2016        };
2017        let MappingName::AioContext(ref aio_context) = mapping.name() else {
2018            return None;
2019        };
2020        if !mapping.can_read() {
2021            return None;
2022        }
2023        Some((range.clone(), aio_context.clone()))
2024    }
2025
2026    fn find_uffd<L>(&self, locked: &mut Locked<L>, addr: UserAddress) -> Option<Arc<UserFault>>
2027    where
2028        L: LockBefore<UserFaultInner>,
2029    {
2030        for userfault in self.userfaultfds.iter() {
2031            if let Some(userfault) = userfault.upgrade() {
2032                if userfault.contains_addr(locked, addr) {
2033                    return Some(userfault);
2034                }
2035            }
2036        }
2037        None
2038    }
2039
2040    pub fn mrelease(&self) -> Result<(), Errno> {
2041        self.private_anonymous
2042            .zero(UserAddress::from_ptr(self.user_vmar_info.base), self.user_vmar_info.len)?;
2043        return Ok(());
2044    }
2045
2046    fn cache_flush(&self, range: Range<UserAddress>) -> Result<(), Errno> {
2047        let mut addr = range.start;
2048        let size = range.end - range.start;
2049        for (mapping, len) in self.get_contiguous_mappings_at(addr, size)? {
2050            if !mapping.can_read() {
2051                return error!(EFAULT);
2052            }
2053            // SAFETY: This is operating on a readable restricted mode mapping and will not fault.
2054            zx::Status::ok(unsafe {
2055                zx::sys::zx_cache_flush(
2056                    addr.ptr() as *const u8,
2057                    len,
2058                    zx::sys::ZX_CACHE_FLUSH_DATA | zx::sys::ZX_CACHE_FLUSH_INSN,
2059                )
2060            })
2061            .map_err(impossible_error)?;
2062
2063            addr = (addr + len).unwrap(); // unwrap since we're iterating within the address space.
2064        }
2065        // Did we flush the entire range?
2066        if addr != range.end { error!(EFAULT) } else { Ok(()) }
2067    }
2068
2069    // Returns details of mappings in the `user_vmar`, or an empty vector if the `user_vmar` has
2070    // been destroyed.
2071    fn with_zx_mappings<R>(
2072        &self,
2073        current_task: &CurrentTask,
2074        op: impl FnOnce(&[zx::MapInfo]) -> R,
2075    ) -> R {
2076        if self.user_vmar.is_invalid_handle() {
2077            return op(&[]);
2078        };
2079
2080        MapInfoCache::get_or_init(current_task)
2081            .expect("must be able to retrieve map info cache")
2082            .with_map_infos(&self.user_vmar, |infos| {
2083                // No other https://fuchsia.dev/reference/syscalls/object_get_info?hl=en#errors
2084                // are possible, because we created the VMAR and the `zx` crate ensures that the
2085                // info query is well-formed.
2086                op(infos.expect("must be able to query mappings for private user VMAR"))
2087            })
2088    }
2089
2090    /// Register the address space managed by this memory manager for interest in
2091    /// receiving private expedited memory barriers of the given kind.
2092    pub fn register_membarrier_private_expedited(
2093        &mut self,
2094        mtype: MembarrierType,
2095    ) -> Result<(), Errno> {
2096        let registrations = &mut self.forkable_state.membarrier_registrations;
2097        match mtype {
2098            MembarrierType::Memory => {
2099                registrations.memory = true;
2100            }
2101            MembarrierType::SyncCore => {
2102                registrations.sync_core = true;
2103            }
2104        }
2105        Ok(())
2106    }
2107
2108    /// Checks if the address space managed by this memory manager is registered
2109    /// for interest in private expedited barriers of the given kind.
2110    pub fn membarrier_private_expedited_registered(&self, mtype: MembarrierType) -> bool {
2111        let registrations = &self.forkable_state.membarrier_registrations;
2112        match mtype {
2113            MembarrierType::Memory => registrations.memory,
2114            MembarrierType::SyncCore => registrations.sync_core,
2115        }
2116    }
2117
2118    fn force_write_memory(
2119        &mut self,
2120        addr: UserAddress,
2121        bytes: &[u8],
2122        released_mappings: &mut ReleasedMappings,
2123    ) -> Result<(), Errno> {
2124        let (range, mapping) = self.mappings.get(addr).ok_or_else(|| errno!(EFAULT))?;
2125        if range.end < addr.saturating_add(bytes.len()) {
2126            track_stub!(
2127                TODO("https://fxbug.dev/445790710"),
2128                "ptrace poke across multiple mappings"
2129            );
2130            return error!(EFAULT);
2131        }
2132
2133        // Don't create CoW copy of shared memory, go through regular syscall writing.
2134        if mapping.flags().contains(MappingFlags::SHARED) {
2135            if !mapping.can_write() {
2136                // Linux returns EIO here instead of EFAULT.
2137                return error!(EIO);
2138            }
2139            return self.write_mapping_memory(addr, mapping, &bytes);
2140        }
2141
2142        let backing = match self.get_mapping_backing(mapping) {
2143            MappingBacking::PrivateAnonymous => {
2144                // Starnix has a writable handle to private anonymous memory.
2145                return self.private_anonymous.write_memory(addr, &bytes);
2146            }
2147            MappingBacking::Memory(backing) => backing,
2148        };
2149
2150        let vmo = backing.memory().as_vmo().ok_or_else(|| errno!(EFAULT))?;
2151        let addr_offset = backing.address_to_offset(addr);
2152        let can_exec =
2153            vmo.basic_info().expect("get VMO handle info").rights.contains(Rights::EXECUTE);
2154
2155        // Attempt to write to existing VMO
2156        match vmo.write(&bytes, addr_offset) {
2157            Ok(()) => {
2158                if can_exec {
2159                    // Issue a barrier to avoid executing stale instructions.
2160                    system_barrier(BarrierType::InstructionStream);
2161                }
2162                return Ok(());
2163            }
2164
2165            Err(zx::Status::ACCESS_DENIED) => { /* Fall through */ }
2166
2167            Err(status) => {
2168                return Err(MemoryManager::get_errno_for_vmo_err(status));
2169            }
2170        }
2171
2172        // Create a CoW child of the entire VMO and swap with the backing.
2173        let mapping_offset = backing.address_to_offset(range.start);
2174        let len = range.end - range.start;
2175
2176        // 1. Obtain a writable child of the VMO.
2177        let size = vmo.get_size().map_err(MemoryManager::get_errno_for_vmo_err)?;
2178        let child_vmo = vmo
2179            .create_child(VmoChildOptions::SNAPSHOT_AT_LEAST_ON_WRITE, 0, size)
2180            .map_err(MemoryManager::get_errno_for_vmo_err)?;
2181
2182        // 2. Modify the memory.
2183        child_vmo.write(&bytes, addr_offset).map_err(MemoryManager::get_errno_for_vmo_err)?;
2184
2185        // 3. If needed, remint the VMO as executable. Zircon flushes instruction caches when
2186        // mapping executable memory below, so a barrier isn't necessary here.
2187        let child_vmo = if can_exec {
2188            child_vmo
2189                .replace_as_executable(&VMEX_RESOURCE)
2190                .map_err(MemoryManager::get_errno_for_vmo_err)?
2191        } else {
2192            child_vmo
2193        };
2194
2195        // 4. Map the new VMO into user VMAR
2196        let memory = Arc::new(MemoryObject::from(child_vmo));
2197        let mapped_addr = self.map_in_user_vmar(
2198            SelectedAddress::FixedOverwrite(range.start),
2199            &memory,
2200            mapping_offset,
2201            len,
2202            mapping.flags(),
2203            false,
2204        )?;
2205        assert_eq!(mapped_addr, range.start);
2206
2207        // 5. Update mappings
2208        let new_backing = MappingBackingMemory::new(range.start, memory, mapping_offset);
2209
2210        let mut new_mapping = mapping.clone();
2211        new_mapping.set_backing_internal(MappingBacking::Memory(Box::new(new_backing)));
2212
2213        let range = range.clone();
2214        released_mappings.extend(self.mappings.insert(range, new_mapping));
2215
2216        Ok(())
2217    }
2218
2219    fn set_brk<L>(
2220        &mut self,
2221        locked: &mut Locked<L>,
2222        current_task: &CurrentTask,
2223        mm: &Arc<MemoryManager>,
2224        addr: UserAddress,
2225        released_mappings: &mut ReleasedMappings,
2226    ) -> Result<UserAddress, Errno>
2227    where
2228        L: LockBefore<ThreadGroupLimits>,
2229    {
2230        let rlimit_data = std::cmp::min(
2231            PROGRAM_BREAK_LIMIT,
2232            current_task.thread_group().get_rlimit(locked, Resource::DATA),
2233        );
2234
2235        let brk = match self.brk.clone() {
2236            None => {
2237                let brk = ProgramBreak { base: self.brk_origin, current: self.brk_origin };
2238                self.brk = Some(brk.clone());
2239                brk
2240            }
2241            Some(brk) => brk,
2242        };
2243
2244        let Ok(last_address) = brk.base + rlimit_data else {
2245            // The requested program break is out-of-range. We're supposed to simply
2246            // return the current program break.
2247            return Ok(brk.current);
2248        };
2249
2250        if addr < brk.base || addr > last_address {
2251            // The requested program break is out-of-range. We're supposed to simply
2252            // return the current program break.
2253            return Ok(brk.current);
2254        }
2255
2256        let old_end = brk.current.round_up(*PAGE_SIZE).unwrap();
2257        let new_end = addr.round_up(*PAGE_SIZE).unwrap();
2258
2259        match new_end.cmp(&old_end) {
2260            std::cmp::Ordering::Less => {
2261                // Shrinking the program break removes any mapped pages in the
2262                // affected range, regardless of whether they were actually program
2263                // break pages, or other mappings.
2264                let delta = old_end - new_end;
2265
2266                if self.unmap(mm, new_end, delta, released_mappings).is_err() {
2267                    return Ok(brk.current);
2268                }
2269            }
2270            std::cmp::Ordering::Greater => {
2271                let range = old_end..new_end;
2272                let delta = new_end - old_end;
2273
2274                // Check for mappings over the program break region.
2275                if self.mappings.range(range).next().is_some() {
2276                    return Ok(brk.current);
2277                }
2278
2279                if self
2280                    .map_anonymous(
2281                        mm,
2282                        DesiredAddress::FixedOverwrite(old_end),
2283                        delta,
2284                        ProtectionFlags::READ | ProtectionFlags::WRITE,
2285                        MappingOptions::ANONYMOUS,
2286                        MappingName::Heap,
2287                        released_mappings,
2288                    )
2289                    .is_err()
2290                {
2291                    return Ok(brk.current);
2292                }
2293            }
2294            _ => {}
2295        };
2296
2297        // Any required updates to the program break succeeded, so update internal state.
2298        let mut new_brk = brk;
2299        new_brk.current = addr;
2300        self.brk = Some(new_brk);
2301
2302        Ok(addr)
2303    }
2304
2305    fn register_with_uffd<L>(
2306        &mut self,
2307        locked: &mut Locked<L>,
2308        addr: UserAddress,
2309        length: usize,
2310        userfault: &Arc<UserFault>,
2311        mode: FaultRegisterMode,
2312        released_mappings: &mut ReleasedMappings,
2313    ) -> Result<(), Errno>
2314    where
2315        L: LockBefore<UserFaultInner>,
2316    {
2317        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
2318        let range_for_op = addr..end_addr;
2319        let mut updates = vec![];
2320
2321        for (range, mapping) in self.mappings.range(range_for_op.clone()) {
2322            if !mapping.private_anonymous() {
2323                track_stub!(TODO("https://fxbug.dev/391599171"), "uffd for shmem and hugetlbfs");
2324                return error!(EINVAL);
2325            }
2326            if mapping.flags().contains(MappingFlags::UFFD) {
2327                return error!(EBUSY);
2328            }
2329            let range = range.intersect(&range_for_op);
2330            let mut mapping = mapping.clone();
2331            mapping.set_uffd(mode);
2332            updates.push((range, mapping));
2333        }
2334        if updates.is_empty() {
2335            return error!(EINVAL);
2336        }
2337
2338        self.protect_vmar_range(addr, length, ProtectionFlags::empty())
2339            .expect("Failed to remove protections on uffd-registered range");
2340
2341        // Use a separate loop to avoid mutating the mappings structure while iterating over it.
2342        for (range, mapping) in updates {
2343            released_mappings.extend(self.mappings.insert(range, mapping));
2344        }
2345
2346        userfault.insert_pages(locked, range_for_op, false);
2347
2348        Ok(())
2349    }
2350
2351    fn unregister_range_from_uffd<L>(
2352        &mut self,
2353        locked: &mut Locked<L>,
2354        userfault: &Arc<UserFault>,
2355        addr: UserAddress,
2356        length: usize,
2357        released_mappings: &mut ReleasedMappings,
2358    ) -> Result<(), Errno>
2359    where
2360        L: LockBefore<UserFaultInner>,
2361    {
2362        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
2363        let range_for_op = addr..end_addr;
2364        let mut updates = vec![];
2365
2366        for (range, mapping) in self.mappings.range(range_for_op.clone()) {
2367            if !mapping.private_anonymous() {
2368                track_stub!(TODO("https://fxbug.dev/391599171"), "uffd for shmem and hugetlbfs");
2369                return error!(EINVAL);
2370            }
2371            if mapping.flags().contains(MappingFlags::UFFD) {
2372                let range = range.intersect(&range_for_op);
2373                if userfault.remove_pages(locked, range.clone()) {
2374                    let mut mapping = mapping.clone();
2375                    mapping.clear_uffd();
2376                    updates.push((range, mapping));
2377                }
2378            }
2379        }
2380        for (range, mapping) in updates {
2381            let length = range.end - range.start;
2382            let restored_flags = mapping.flags().access_flags();
2383
2384            released_mappings.extend(self.mappings.insert(range.clone(), mapping));
2385
2386            self.protect_vmar_range(range.start, length, restored_flags)
2387                .expect("Failed to restore original protection bits on uffd-registered range");
2388        }
2389        Ok(())
2390    }
2391
2392    fn unregister_uffd<L>(
2393        &mut self,
2394        locked: &mut Locked<L>,
2395        userfault: &Arc<UserFault>,
2396        released_mappings: &mut ReleasedMappings,
2397    ) where
2398        L: LockBefore<UserFaultInner>,
2399    {
2400        let mut updates = vec![];
2401
2402        for (range, mapping) in self.mappings.iter() {
2403            if mapping.flags().contains(MappingFlags::UFFD) {
2404                for range in userfault.get_registered_pages_overlapping_range(locked, range.clone())
2405                {
2406                    let mut mapping = mapping.clone();
2407                    mapping.clear_uffd();
2408                    updates.push((range.clone(), mapping));
2409                }
2410            }
2411        }
2412        // Use a separate loop to avoid mutating the mappings structure while iterating over it.
2413        for (range, mapping) in updates {
2414            let length = range.end - range.start;
2415            let restored_flags = mapping.flags().access_flags();
2416            released_mappings.extend(self.mappings.insert(range.clone(), mapping));
2417            // We can't recover from an error here as this is run during the cleanup.
2418            self.protect_vmar_range(range.start, length, restored_flags)
2419                .expect("Failed to restore original protection bits on uffd-registered range");
2420        }
2421
2422        userfault.remove_pages(
2423            locked,
2424            UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
2425                ..UserAddress::from_ptr(RESTRICTED_ASPACE_HIGHEST_ADDRESS),
2426        );
2427
2428        let weak_userfault = Arc::downgrade(userfault);
2429        self.userfaultfds.retain(|uf| !Weak::ptr_eq(uf, &weak_userfault));
2430    }
2431
2432    fn set_mapping_name(
2433        &mut self,
2434        addr: UserAddress,
2435        length: usize,
2436        name: Option<FsString>,
2437        released_mappings: &mut ReleasedMappings,
2438    ) -> Result<(), Errno> {
2439        if addr.ptr() % *PAGE_SIZE as usize != 0 {
2440            return error!(EINVAL);
2441        }
2442        let end = match addr.checked_add(length) {
2443            Some(addr) => addr.round_up(*PAGE_SIZE).map_err(|_| errno!(ENOMEM))?,
2444            None => return error!(EINVAL),
2445        };
2446
2447        let mappings_in_range =
2448            self.mappings.range(addr..end).map(|(r, m)| (r.clone(), m.clone())).collect::<Vec<_>>();
2449
2450        if mappings_in_range.is_empty() {
2451            return error!(EINVAL);
2452        }
2453        if !mappings_in_range.first().unwrap().0.contains(&addr) {
2454            return error!(ENOMEM);
2455        }
2456
2457        let mut last_range_end = None;
2458        // There's no get_mut on RangeMap, because it would be hard to implement correctly in
2459        // combination with merging of adjacent mappings. Instead, make a copy, change the copy,
2460        // and insert the copy.
2461        for (mut range, mut mapping) in mappings_in_range {
2462            if let MappingName::File(_) = mapping.name() {
2463                // It's invalid to assign a name to a file-backed mapping.
2464                return error!(EBADF);
2465            }
2466            // Handle mappings that start before the region to be named.
2467            range.start = std::cmp::max(range.start, addr);
2468            // Handle mappings that extend past the region to be named.
2469            range.end = std::cmp::min(range.end, end);
2470
2471            if let Some(last_range_end) = last_range_end {
2472                if last_range_end != range.start {
2473                    // The name must apply to a contiguous range of mapped pages.
2474                    return error!(ENOMEM);
2475                }
2476            }
2477            last_range_end = Some(range.end.round_up(*PAGE_SIZE)?);
2478            // TODO(b/310255065): We have no place to store names in a way visible to programs outside of Starnix
2479            // such as memory analysis tools.
2480            if let MappingBacking::Memory(backing) = self.get_mapping_backing(&mapping) {
2481                match &name {
2482                    Some(memory_name) => {
2483                        backing.memory().set_zx_name(memory_name);
2484                    }
2485                    None => {
2486                        backing.memory().set_zx_name(b"");
2487                    }
2488                }
2489            }
2490            mapping.set_name(match &name {
2491                Some(name) => MappingName::Vma(FlyByteStr::new(name.as_bytes())),
2492                None => MappingName::None,
2493            });
2494            released_mappings.extend(self.mappings.insert(range, mapping));
2495        }
2496        if let Some(last_range_end) = last_range_end {
2497            if last_range_end < end {
2498                // The name must apply to a contiguous range of mapped pages.
2499                return error!(ENOMEM);
2500            }
2501        }
2502        Ok(())
2503    }
2504}
2505
2506fn create_user_vmar(vmar: &zx::Vmar, vmar_info: &zx::VmarInfo) -> Result<zx::Vmar, zx::Status> {
2507    let (vmar, ptr) = vmar.allocate(
2508        0,
2509        vmar_info.len,
2510        zx::VmarFlags::SPECIFIC
2511            | zx::VmarFlags::CAN_MAP_SPECIFIC
2512            | zx::VmarFlags::CAN_MAP_READ
2513            | zx::VmarFlags::CAN_MAP_WRITE
2514            | zx::VmarFlags::CAN_MAP_EXECUTE,
2515    )?;
2516    assert_eq!(ptr, vmar_info.base);
2517    Ok(vmar)
2518}
2519
2520/// A memory manager for another thread.
2521///
2522/// When accessing memory through this object, we use less efficient codepaths that work across
2523/// address spaces.
2524pub struct RemoteMemoryManager {
2525    mm: Arc<MemoryManager>,
2526}
2527
2528impl RemoteMemoryManager {
2529    fn new(mm: Arc<MemoryManager>) -> Self {
2530        Self { mm }
2531    }
2532}
2533
2534// If we just have a MemoryManager, we cannot assume that its address space is current, which means
2535// we need to use the slower "syscall" mechanism to access its memory.
2536impl MemoryAccessor for RemoteMemoryManager {
2537    fn read_memory<'a>(
2538        &self,
2539        addr: UserAddress,
2540        bytes: &'a mut [MaybeUninit<u8>],
2541    ) -> Result<&'a mut [u8], Errno> {
2542        self.mm.syscall_read_memory(addr, bytes)
2543    }
2544
2545    fn read_memory_partial_until_null_byte<'a>(
2546        &self,
2547        addr: UserAddress,
2548        bytes: &'a mut [MaybeUninit<u8>],
2549    ) -> Result<&'a mut [u8], Errno> {
2550        self.mm.syscall_read_memory_partial_until_null_byte(addr, bytes)
2551    }
2552
2553    fn read_memory_partial<'a>(
2554        &self,
2555        addr: UserAddress,
2556        bytes: &'a mut [MaybeUninit<u8>],
2557    ) -> Result<&'a mut [u8], Errno> {
2558        self.mm.syscall_read_memory_partial(addr, bytes)
2559    }
2560
2561    fn write_memory(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
2562        self.mm.syscall_write_memory(addr, bytes)
2563    }
2564
2565    fn write_memory_partial(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
2566        self.mm.syscall_write_memory_partial(addr, bytes)
2567    }
2568
2569    fn zero(&self, addr: UserAddress, length: usize) -> Result<usize, Errno> {
2570        self.mm.syscall_zero(addr, length)
2571    }
2572}
2573
2574impl TaskMemoryAccessor for RemoteMemoryManager {
2575    fn maximum_valid_address(&self) -> Option<UserAddress> {
2576        Some(self.mm.maximum_valid_user_address)
2577    }
2578}
2579
2580impl MemoryManager {
2581    pub fn summarize(&self, summary: &mut crate::mm::MappingSummary) {
2582        let state = self.state.read();
2583        for (_, mapping) in state.mappings.iter() {
2584            summary.add(&state, mapping);
2585        }
2586    }
2587
2588    pub fn get_mappings_for_vmsplice(
2589        self: &Arc<MemoryManager>,
2590        buffers: &UserBuffers,
2591    ) -> Result<Vec<Arc<VmsplicePayload>>, Errno> {
2592        self.state.read().get_mappings_for_vmsplice(self, buffers)
2593    }
2594
2595    pub fn has_same_address_space(&self, other: &Self) -> bool {
2596        self.root_vmar == other.root_vmar
2597    }
2598
2599    pub fn unified_read_memory<'a>(
2600        &self,
2601        current_task: &CurrentTask,
2602        addr: UserAddress,
2603        bytes: &'a mut [MaybeUninit<u8>],
2604    ) -> Result<&'a mut [u8], Errno> {
2605        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2606
2607        if let Some(usercopy) = usercopy() {
2608            let (read_bytes, unread_bytes) = usercopy.copyin(addr.ptr(), bytes);
2609            if unread_bytes.is_empty() { Ok(read_bytes) } else { error!(EFAULT) }
2610        } else {
2611            self.syscall_read_memory(addr, bytes)
2612        }
2613    }
2614
2615    pub fn syscall_read_memory<'a>(
2616        &self,
2617        addr: UserAddress,
2618        bytes: &'a mut [MaybeUninit<u8>],
2619    ) -> Result<&'a mut [u8], Errno> {
2620        self.state.read().read_memory(addr, bytes)
2621    }
2622
2623    pub fn unified_read_memory_partial_until_null_byte<'a>(
2624        &self,
2625        current_task: &CurrentTask,
2626        addr: UserAddress,
2627        bytes: &'a mut [MaybeUninit<u8>],
2628    ) -> Result<&'a mut [u8], Errno> {
2629        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2630
2631        if let Some(usercopy) = usercopy() {
2632            let (read_bytes, unread_bytes) = usercopy.copyin_until_null_byte(addr.ptr(), bytes);
2633            if read_bytes.is_empty() && !unread_bytes.is_empty() {
2634                error!(EFAULT)
2635            } else {
2636                Ok(read_bytes)
2637            }
2638        } else {
2639            self.syscall_read_memory_partial_until_null_byte(addr, bytes)
2640        }
2641    }
2642
2643    pub fn syscall_read_memory_partial_until_null_byte<'a>(
2644        &self,
2645        addr: UserAddress,
2646        bytes: &'a mut [MaybeUninit<u8>],
2647    ) -> Result<&'a mut [u8], Errno> {
2648        self.state.read().read_memory_partial_until_null_byte(addr, bytes)
2649    }
2650
2651    pub fn unified_read_memory_partial<'a>(
2652        &self,
2653        current_task: &CurrentTask,
2654        addr: UserAddress,
2655        bytes: &'a mut [MaybeUninit<u8>],
2656    ) -> Result<&'a mut [u8], Errno> {
2657        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2658
2659        if let Some(usercopy) = usercopy() {
2660            let (read_bytes, unread_bytes) = usercopy.copyin(addr.ptr(), bytes);
2661            if read_bytes.is_empty() && !unread_bytes.is_empty() {
2662                error!(EFAULT)
2663            } else {
2664                Ok(read_bytes)
2665            }
2666        } else {
2667            self.syscall_read_memory_partial(addr, bytes)
2668        }
2669    }
2670
2671    pub fn syscall_read_memory_partial<'a>(
2672        &self,
2673        addr: UserAddress,
2674        bytes: &'a mut [MaybeUninit<u8>],
2675    ) -> Result<&'a mut [u8], Errno> {
2676        self.state.read().read_memory_partial(addr, bytes)
2677    }
2678
2679    pub fn unified_write_memory(
2680        &self,
2681        current_task: &CurrentTask,
2682        addr: UserAddress,
2683        bytes: &[u8],
2684    ) -> Result<usize, Errno> {
2685        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2686
2687        if let Some(usercopy) = usercopy() {
2688            let num_copied = usercopy.copyout(bytes, addr.ptr());
2689            if num_copied != bytes.len() {
2690                error!(
2691                    EFAULT,
2692                    format!("expected {:?} bytes, copied {:?} bytes", bytes.len(), num_copied)
2693                )
2694            } else {
2695                Ok(num_copied)
2696            }
2697        } else {
2698            self.syscall_write_memory(addr, bytes)
2699        }
2700    }
2701
2702    /// Write `bytes` to memory address `addr`, making a copy-on-write child of the VMO backing and
2703    /// replacing the mapping if necessary.
2704    ///
2705    /// NOTE: this bypasses userspace's memory protection configuration and should only be called
2706    /// by codepaths like ptrace which bypass memory protection.
2707    pub fn force_write_memory(&self, addr: UserAddress, bytes: &[u8]) -> Result<(), Errno> {
2708        let mut state = self.state.write();
2709        let mut released_mappings = ReleasedMappings::default();
2710        let result = state.force_write_memory(addr, bytes, &mut released_mappings);
2711        released_mappings.finalize(state);
2712        result
2713    }
2714
2715    pub fn syscall_write_memory(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
2716        self.state.read().write_memory(addr, bytes)
2717    }
2718
2719    pub fn unified_write_memory_partial(
2720        &self,
2721        current_task: &CurrentTask,
2722        addr: UserAddress,
2723        bytes: &[u8],
2724    ) -> Result<usize, Errno> {
2725        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2726
2727        if let Some(usercopy) = usercopy() {
2728            let num_copied = usercopy.copyout(bytes, addr.ptr());
2729            if num_copied == 0 && !bytes.is_empty() { error!(EFAULT) } else { Ok(num_copied) }
2730        } else {
2731            self.syscall_write_memory_partial(addr, bytes)
2732        }
2733    }
2734
2735    pub fn syscall_write_memory_partial(
2736        &self,
2737        addr: UserAddress,
2738        bytes: &[u8],
2739    ) -> Result<usize, Errno> {
2740        self.state.read().write_memory_partial(addr, bytes)
2741    }
2742
2743    pub fn unified_zero(
2744        &self,
2745        current_task: &CurrentTask,
2746        addr: UserAddress,
2747        length: usize,
2748    ) -> Result<usize, Errno> {
2749        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2750
2751        {
2752            let page_size = *PAGE_SIZE as usize;
2753            // Get the page boundary immediately following `addr` if `addr` is
2754            // not page aligned.
2755            let next_page_boundary = round_up_to_system_page_size(addr.ptr())?;
2756            // The number of bytes needed to zero at least a full page (not just
2757            // a pages worth of bytes) starting at `addr`.
2758            let length_with_atleast_one_full_page = page_size + (next_page_boundary - addr.ptr());
2759            // If at least one full page is being zeroed, go through the memory object since Zircon
2760            // can swap the mapped pages with the zero page which should be cheaper than zeroing
2761            // out a pages worth of bytes manually.
2762            //
2763            // If we are not zeroing out a full page, then go through usercopy
2764            // if unified aspaces is enabled.
2765            if length >= length_with_atleast_one_full_page {
2766                return self.syscall_zero(addr, length);
2767            }
2768        }
2769
2770        if let Some(usercopy) = usercopy() {
2771            if usercopy.zero(addr.ptr(), length) == length { Ok(length) } else { error!(EFAULT) }
2772        } else {
2773            self.syscall_zero(addr, length)
2774        }
2775    }
2776
2777    pub fn syscall_zero(&self, addr: UserAddress, length: usize) -> Result<usize, Errno> {
2778        self.state.read().zero(addr, length)
2779    }
2780
2781    /// Obtain a reference to this memory manager that can be used from another thread.
2782    pub fn as_remote(self: &Arc<Self>) -> RemoteMemoryManager {
2783        RemoteMemoryManager::new(self.clone())
2784    }
2785
2786    /// Performs a data and instruction cache flush over the given address range.
2787    pub fn cache_flush(&self, range: Range<UserAddress>) -> Result<(), Errno> {
2788        self.state.read().cache_flush(range)
2789    }
2790
2791    /// Register the address space managed by this memory manager for interest in
2792    /// receiving private expedited memory barriers of the given type.
2793    pub fn register_membarrier_private_expedited(
2794        &self,
2795        mtype: MembarrierType,
2796    ) -> Result<(), Errno> {
2797        self.state.write().register_membarrier_private_expedited(mtype)
2798    }
2799
2800    /// Checks if the address space managed by this memory manager is registered
2801    /// for interest in private expedited barriers of the given kind.
2802    pub fn membarrier_private_expedited_registered(&self, mtype: MembarrierType) -> bool {
2803        self.state.read().membarrier_private_expedited_registered(mtype)
2804    }
2805}
2806
2807pub struct MemoryManager {
2808    /// The root VMAR for the child process.
2809    ///
2810    /// Instead of mapping memory directly in this VMAR, we map the memory in
2811    /// `state.user_vmar`.
2812    root_vmar: zx::Vmar,
2813
2814    /// The base address of the root_vmar.
2815    pub base_addr: UserAddress,
2816
2817    /// The futexes in this address space.
2818    pub futex: Arc<FutexTable<PrivateFutexKey>>,
2819
2820    /// Mutable state for the memory manager.
2821    pub state: RwLock<MemoryManagerState>,
2822
2823    /// Whether this address space is dumpable.
2824    pub dumpable: OrderedMutex<DumpPolicy, MmDumpable>,
2825
2826    /// Maximum valid user address for this vmar.
2827    pub maximum_valid_user_address: UserAddress,
2828
2829    /// In-flight payloads enqueued to a pipe as a consequence of a `vmsplice(2)`
2830    /// operation.
2831    ///
2832    /// For details on why we need to keep track of in-flight vmspliced payloads,
2833    /// see [`VmsplicePayload`].
2834    ///
2835    /// For details on why this isn't under the `RwLock` protected `MemoryManagerState`,
2836    /// See [`InflightVmsplicedPayloads::payloads`].
2837    pub inflight_vmspliced_payloads: InflightVmsplicedPayloads,
2838
2839    /// A mechanism to be notified when this `MemoryManager` is destroyed.
2840    pub drop_notifier: DropNotifier,
2841}
2842
2843impl MemoryManager {
2844    pub fn new(root_vmar: zx::Vmar) -> Result<Self, zx::Status> {
2845        let info = root_vmar.info()?;
2846        let user_vmar = create_user_vmar(&root_vmar, &info)?;
2847        let user_vmar_info = user_vmar.info()?;
2848
2849        debug_assert_eq!(RESTRICTED_ASPACE_BASE, user_vmar_info.base);
2850        debug_assert_eq!(RESTRICTED_ASPACE_SIZE, user_vmar_info.len);
2851
2852        Ok(Self::from_vmar(root_vmar, user_vmar, user_vmar_info))
2853    }
2854
2855    fn from_vmar(root_vmar: zx::Vmar, user_vmar: zx::Vmar, user_vmar_info: zx::VmarInfo) -> Self {
2856        // The private anonymous backing memory object extend from the user address 0 up to the
2857        // highest mappable address. The pages below `user_vmar_info.base` are never mapped, but
2858        // including them in the memory object makes the math for mapping address to memory object
2859        // offsets simpler.
2860        let backing_size = (user_vmar_info.base + user_vmar_info.len) as u64;
2861
2862        MemoryManager {
2863            root_vmar,
2864            base_addr: UserAddress::from_ptr(user_vmar_info.base),
2865            futex: Arc::<FutexTable<PrivateFutexKey>>::default(),
2866            state: RwLock::new(MemoryManagerState {
2867                user_vmar: user_vmar,
2868                user_vmar_info,
2869                mappings: Default::default(),
2870                private_anonymous: PrivateAnonymousMemoryManager::new(backing_size),
2871                userfaultfds: Default::default(),
2872                shadow_mappings_for_mlock: Default::default(),
2873                forkable_state: Default::default(),
2874            }),
2875            // TODO(security): Reset to DISABLE, or the value in the fs.suid_dumpable sysctl, under
2876            // certain conditions as specified in the prctl(2) man page.
2877            dumpable: OrderedMutex::new(DumpPolicy::User),
2878            maximum_valid_user_address: UserAddress::from_ptr(
2879                user_vmar_info.base + user_vmar_info.len,
2880            ),
2881            inflight_vmspliced_payloads: Default::default(),
2882            drop_notifier: DropNotifier::default(),
2883        }
2884    }
2885
2886    pub fn set_brk<L>(
2887        self: &Arc<Self>,
2888        locked: &mut Locked<L>,
2889        current_task: &CurrentTask,
2890        addr: UserAddress,
2891    ) -> Result<UserAddress, Errno>
2892    where
2893        L: LockBefore<ThreadGroupLimits>,
2894    {
2895        let mut state = self.state.write();
2896        let mut released_mappings = ReleasedMappings::default();
2897        let result = state.set_brk(locked, current_task, self, addr, &mut released_mappings);
2898        released_mappings.finalize(state);
2899        result
2900    }
2901
2902    pub fn register_uffd(&self, userfault: &Arc<UserFault>) {
2903        let mut state = self.state.write();
2904        state.userfaultfds.push(Arc::downgrade(userfault));
2905    }
2906
2907    /// Register a given memory range with a userfault object.
2908    pub fn register_with_uffd<L>(
2909        self: &Arc<Self>,
2910        locked: &mut Locked<L>,
2911        addr: UserAddress,
2912        length: usize,
2913        userfault: &Arc<UserFault>,
2914        mode: FaultRegisterMode,
2915    ) -> Result<(), Errno>
2916    where
2917        L: LockBefore<UserFaultInner>,
2918    {
2919        let mut state = self.state.write();
2920        let mut released_mappings = ReleasedMappings::default();
2921        let result =
2922            state.register_with_uffd(locked, addr, length, userfault, mode, &mut released_mappings);
2923        released_mappings.finalize(state);
2924        result
2925    }
2926
2927    /// Unregister a given range from any userfault objects associated with it.
2928    pub fn unregister_range_from_uffd<L>(
2929        &self,
2930        locked: &mut Locked<L>,
2931        userfault: &Arc<UserFault>,
2932        addr: UserAddress,
2933        length: usize,
2934    ) -> Result<(), Errno>
2935    where
2936        L: LockBefore<UserFaultInner>,
2937    {
2938        let mut state = self.state.write();
2939        let mut released_mappings = ReleasedMappings::default();
2940        let result = state.unregister_range_from_uffd(
2941            locked,
2942            userfault,
2943            addr,
2944            length,
2945            &mut released_mappings,
2946        );
2947        released_mappings.finalize(state);
2948        result
2949    }
2950
2951    /// Unregister any mappings registered with a given userfault object. Used when closing the last
2952    /// file descriptor associated to it.
2953    pub fn unregister_uffd<L>(&self, locked: &mut Locked<L>, userfault: &Arc<UserFault>)
2954    where
2955        L: LockBefore<UserFaultInner>,
2956    {
2957        let mut state = self.state.write();
2958        let mut released_mappings = ReleasedMappings::default();
2959        state.unregister_uffd(locked, userfault, &mut released_mappings);
2960        released_mappings.finalize(state);
2961    }
2962
2963    /// Populate a range of pages registered with an userfaulfd according to a `populate` function.
2964    /// This will fail if the pages were not registered with userfaultfd, or if the page at `addr`
2965    /// was already populated. If any page other than the first one was populated, the `length`
2966    /// is adjusted to only include the first N unpopulated pages, and this adjusted length
2967    /// is then passed to `populate`. On success, returns the number of populated bytes.
2968    pub fn populate_from_uffd<F, L>(
2969        &self,
2970        locked: &mut Locked<L>,
2971        addr: UserAddress,
2972        length: usize,
2973        userfault: &Arc<UserFault>,
2974        populate: F,
2975    ) -> Result<usize, Errno>
2976    where
2977        F: FnOnce(&MemoryManagerState, usize) -> Result<usize, Errno>,
2978        L: LockBefore<UserFaultInner>,
2979    {
2980        let state = self.state.read();
2981
2982        // Check that the addr..length range is a contiguous range of mappings which are all
2983        // registered with an userfault object.
2984        let mut bytes_registered_with_uffd = 0;
2985        for (mapping, len) in state.get_contiguous_mappings_at(addr, length)? {
2986            if mapping.flags().contains(MappingFlags::UFFD) {
2987                // Check that the mapping is registered with the same uffd. This is not required,
2988                // but we don't support cross-uffd operations yet.
2989                if !userfault.contains_addr(locked, addr) {
2990                    track_stub!(
2991                        TODO("https://fxbug.dev/391599171"),
2992                        "operations across different uffds"
2993                    );
2994                    return error!(ENOTSUP);
2995                };
2996            } else {
2997                return error!(ENOENT);
2998            }
2999            bytes_registered_with_uffd += len;
3000        }
3001        if bytes_registered_with_uffd != length {
3002            return error!(ENOENT);
3003        }
3004
3005        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
3006
3007        // Determine how many pages in the requested range are already populated
3008        let first_populated =
3009            userfault.get_first_populated_page_after(locked, addr).ok_or_else(|| errno!(ENOENT))?;
3010        // If the very first page is already populated, uffd operations should just return EEXIST
3011        if first_populated == addr {
3012            return error!(EEXIST);
3013        }
3014        // Otherwise it is possible to do an incomplete operation by only populating pages until
3015        // the first populated one.
3016        let trimmed_end = std::cmp::min(first_populated, end_addr);
3017        let effective_length = trimmed_end - addr;
3018
3019        populate(&state, effective_length)?;
3020        userfault.insert_pages(locked, addr..trimmed_end, true);
3021
3022        // Since we used protection bits to force pagefaults, we now need to reverse this change by
3023        // restoring the protections on the underlying Zircon mappings to the "real" protection bits
3024        // that were kept in the Starnix mappings. This will prevent new pagefaults from being
3025        // generated. Only do this on the pages that were populated by this operation.
3026        for (range, mapping) in state.mappings.range(addr..trimmed_end) {
3027            let range_to_protect = range.intersect(&(addr..trimmed_end));
3028            let restored_flags = mapping.flags().access_flags();
3029            let length = range_to_protect.end - range_to_protect.start;
3030            state
3031                .protect_vmar_range(range_to_protect.start, length, restored_flags)
3032                .expect("Failed to restore original protection bits on uffd-registered range");
3033        }
3034        // Return the number of effectively populated bytes, which might be smaller than the
3035        // requested number.
3036        Ok(effective_length)
3037    }
3038
3039    pub fn zero_from_uffd<L>(
3040        &self,
3041        locked: &mut Locked<L>,
3042        addr: UserAddress,
3043        length: usize,
3044        userfault: &Arc<UserFault>,
3045    ) -> Result<usize, Errno>
3046    where
3047        L: LockBefore<UserFaultInner>,
3048    {
3049        self.populate_from_uffd(locked, addr, length, userfault, |state, effective_length| {
3050            state.zero(addr, effective_length)
3051        })
3052    }
3053
3054    pub fn fill_from_uffd<L>(
3055        &self,
3056        locked: &mut Locked<L>,
3057        addr: UserAddress,
3058        buf: &[u8],
3059        length: usize,
3060        userfault: &Arc<UserFault>,
3061    ) -> Result<usize, Errno>
3062    where
3063        L: LockBefore<UserFaultInner>,
3064    {
3065        self.populate_from_uffd(locked, addr, length, userfault, |state, effective_length| {
3066            state.write_memory(addr, &buf[..effective_length])
3067        })
3068    }
3069
3070    pub fn copy_from_uffd<L>(
3071        &self,
3072        locked: &mut Locked<L>,
3073        source_addr: UserAddress,
3074        dst_addr: UserAddress,
3075        length: usize,
3076        userfault: &Arc<UserFault>,
3077    ) -> Result<usize, Errno>
3078    where
3079        L: LockBefore<UserFaultInner>,
3080    {
3081        self.populate_from_uffd(locked, dst_addr, length, userfault, |state, effective_length| {
3082            let mut buf = vec![std::mem::MaybeUninit::uninit(); effective_length];
3083            let buf = state.read_memory(source_addr, &mut buf)?;
3084            state.write_memory(dst_addr, &buf[..effective_length])
3085        })
3086    }
3087
3088    /// Create a snapshot of the memory mapping from `self` into `target`. All
3089    /// memory mappings are copied entry-for-entry, and the copies end up at
3090    /// exactly the same addresses.
3091    pub fn snapshot_to<L>(
3092        &self,
3093        locked: &mut Locked<L>,
3094        target: &Arc<MemoryManager>,
3095    ) -> Result<(), Errno>
3096    where
3097        L: LockBefore<MmDumpable>,
3098    {
3099        // Hold the lock throughout the operation to uphold memory manager's invariants.
3100        // See mm/README.md.
3101        let state: &mut MemoryManagerState = &mut self.state.write();
3102        let mut target_state = target.state.write();
3103        let mut clone_cache = HashMap::<zx::Koid, Arc<MemoryObject>>::new();
3104
3105        let backing_size = (state.user_vmar_info.base + state.user_vmar_info.len) as u64;
3106        target_state.private_anonymous = state.private_anonymous.snapshot(backing_size)?;
3107
3108        for (range, mapping) in state.mappings.iter() {
3109            if mapping.flags().contains(MappingFlags::DONTFORK) {
3110                continue;
3111            }
3112            // Locking is not inherited when forking.
3113            let target_mapping_flags = mapping.flags().difference(MappingFlags::LOCKED);
3114            match state.get_mapping_backing(mapping) {
3115                MappingBacking::Memory(backing) => {
3116                    let memory_offset = backing.address_to_offset(range.start);
3117                    let length = range.end - range.start;
3118
3119                    let target_memory = if mapping.flags().contains(MappingFlags::SHARED)
3120                        || mapping.name() == MappingName::Vvar
3121                    {
3122                        // Note that the Vvar is a special mapping that behaves like a shared mapping but
3123                        // is private to each process.
3124                        backing.memory().clone()
3125                    } else if mapping.flags().contains(MappingFlags::WIPEONFORK) {
3126                        create_anonymous_mapping_memory(length as u64)?
3127                    } else {
3128                        let basic_info = backing.memory().basic_info();
3129                        let memory =
3130                            clone_cache.entry(basic_info.koid).or_insert_with_fallible(|| {
3131                                backing.memory().clone_memory(basic_info.rights)
3132                            })?;
3133                        memory.clone()
3134                    };
3135
3136                    let mut released_mappings = ReleasedMappings::default();
3137                    target_state.map_memory(
3138                        target,
3139                        DesiredAddress::Fixed(range.start),
3140                        target_memory,
3141                        memory_offset,
3142                        length,
3143                        target_mapping_flags,
3144                        mapping.max_access(),
3145                        false,
3146                        mapping.name().clone(),
3147                        &mut released_mappings,
3148                    )?;
3149                    assert!(
3150                        released_mappings.is_empty(),
3151                        "target mm must be empty when cloning, got {released_mappings:#?}"
3152                    );
3153                }
3154                MappingBacking::PrivateAnonymous => {
3155                    let length = range.end - range.start;
3156                    if mapping.flags().contains(MappingFlags::WIPEONFORK) {
3157                        target_state
3158                            .private_anonymous
3159                            .zero(range.start, length)
3160                            .map_err(|_| errno!(ENOMEM))?;
3161                    }
3162
3163                    let target_memory_offset = range.start.ptr() as u64;
3164                    target_state.map_in_user_vmar(
3165                        SelectedAddress::FixedOverwrite(range.start),
3166                        &target_state.private_anonymous.backing,
3167                        target_memory_offset,
3168                        length,
3169                        target_mapping_flags,
3170                        false,
3171                    )?;
3172                    let removed_mappings = target_state.mappings.insert(
3173                        range.clone(),
3174                        Mapping::new_private_anonymous(
3175                            target_mapping_flags,
3176                            mapping.name().clone(),
3177                        ),
3178                    );
3179                    assert!(
3180                        removed_mappings.is_empty(),
3181                        "target mm must be empty when cloning, got {removed_mappings:#?}"
3182                    );
3183                }
3184            };
3185        }
3186
3187        target_state.forkable_state = state.forkable_state.clone();
3188
3189        let self_dumpable = *self.dumpable.lock(locked);
3190        *target.dumpable.lock(locked) = self_dumpable;
3191
3192        Ok(())
3193    }
3194
3195    /// Returns the replacement `MemoryManager` to be used by the `exec()`ing task.
3196    ///
3197    /// POSIX requires that "a call to any exec function from a process with more than one thread
3198    /// shall result in all threads being terminated and the new executable being loaded and
3199    /// executed. No destructor functions or cleanup handlers shall be called".
3200    /// The caller is responsible for having ensured that this is the only `Task` in the
3201    /// `ThreadGroup`, and thereby the `zx::process`, such that it is safe to tear-down the Zircon
3202    /// userspace VMAR for the current address-space.
3203    pub fn exec(
3204        &self,
3205        exe_node: NamespaceNode,
3206        arch_width: ArchWidth,
3207    ) -> Result<Arc<Self>, zx::Status> {
3208        // To safeguard against concurrent accesses by other tasks through this `MemoryManager`, the
3209        // following steps are performed while holding the write lock on this instance:
3210        //
3211        // 1. All `mappings` are removed, so that remote `MemoryAccessor` calls will fail.
3212        // 2. The `user_vmar` is `destroy()`ed to free-up the user address-space.
3213        // 3. The new `user_vmar` is created, to re-reserve the user address-space.
3214        //
3215        // Once these steps are complete the lock must first be dropped, after which it is safe for
3216        // the old mappings to be dropped.
3217        let (_old_mappings, user_vmar) = {
3218            let mut state = self.state.write();
3219            let mut info = self.root_vmar.info()?;
3220
3221            // SAFETY: This operation is safe because this is the only `Task` active in the address-
3222            // space, and accesses by remote tasks will use syscalls on the `root_vmar`.
3223            unsafe { state.user_vmar.destroy()? }
3224            state.user_vmar = zx::NullableHandle::invalid().into();
3225
3226            if arch_width.is_arch32() {
3227                info.len = (LOWER_4GB_LIMIT.ptr() - info.base) as usize;
3228            } else {
3229                info.len = RESTRICTED_ASPACE_HIGHEST_ADDRESS - info.base;
3230            }
3231
3232            // Create the new userspace VMAR, to enmsure that the address range is (re-)reserved.
3233            let user_vmar = create_user_vmar(&self.root_vmar, &info)?;
3234
3235            (std::mem::replace(&mut state.mappings, Default::default()), user_vmar)
3236        };
3237
3238        // Wrap the new user address-space VMAR into a new `MemoryManager`.
3239        let root_vmar = self.root_vmar.duplicate_handle(zx::Rights::SAME_RIGHTS)?;
3240        let user_vmar_info = user_vmar.info()?;
3241        let new_mm = Self::from_vmar(root_vmar, user_vmar, user_vmar_info);
3242
3243        // Initialize the new `MemoryManager` state.
3244        new_mm.state.write().executable_node = Some(exe_node);
3245
3246        // Initialize the appropriate address-space layout for the `arch_width`.
3247        new_mm.initialize_mmap_layout(arch_width)?;
3248
3249        Ok(Arc::new(new_mm))
3250    }
3251
3252    pub fn initialize_mmap_layout(&self, arch_width: ArchWidth) -> Result<(), Errno> {
3253        let mut state = self.state.write();
3254
3255        // Place the stack at the end of the address space, subject to ASLR adjustment.
3256        state.stack_origin = UserAddress::from_ptr(
3257            state.user_vmar_info.base + state.user_vmar_info.len
3258                - MAX_STACK_SIZE
3259                - generate_random_offset_for_aslr(arch_width),
3260        )
3261        .round_up(*PAGE_SIZE)?;
3262
3263        // Set the highest address that `mmap` will assign to the allocations that don't ask for a
3264        // specific address, subject to ASLR adjustment.
3265        state.mmap_top = state
3266            .stack_origin
3267            .checked_sub(generate_random_offset_for_aslr(arch_width))
3268            .ok_or_else(|| errno!(EINVAL))?;
3269        Ok(())
3270    }
3271
3272    // Test tasks are not initialized by exec; simulate its behavior by initializing memory layout
3273    // as if a zero-size executable was loaded.
3274    pub fn initialize_mmap_layout_for_test(self: &Arc<Self>, arch_width: ArchWidth) {
3275        self.initialize_mmap_layout(arch_width).unwrap();
3276        let fake_executable_addr = self.get_random_base_for_executable(arch_width, 0).unwrap();
3277        self.initialize_brk_origin(arch_width, fake_executable_addr).unwrap();
3278    }
3279
3280    pub fn initialize_brk_origin(
3281        self: &Arc<Self>,
3282        arch_width: ArchWidth,
3283        executable_end: UserAddress,
3284    ) -> Result<(), Errno> {
3285        self.state.write().brk_origin = executable_end
3286            .checked_add(generate_random_offset_for_aslr(arch_width))
3287            .ok_or_else(|| errno!(EINVAL))?;
3288        Ok(())
3289    }
3290
3291    // Get a randomised address for loading a position-independent executable.
3292
3293    pub fn get_random_base_for_executable(
3294        &self,
3295        arch_width: ArchWidth,
3296        length: usize,
3297    ) -> Result<UserAddress, Errno> {
3298        let state = self.state.read();
3299
3300        // Place it at approx. 2/3 of the available mmap space, subject to ASLR adjustment.
3301        let base = round_up_to_system_page_size(2 * state.mmap_top.ptr() / 3).unwrap()
3302            + generate_random_offset_for_aslr(arch_width);
3303        if base.checked_add(length).ok_or_else(|| errno!(EINVAL))? <= state.mmap_top.ptr() {
3304            Ok(UserAddress::from_ptr(base))
3305        } else {
3306            error!(EINVAL)
3307        }
3308    }
3309    pub fn executable_node(&self) -> Option<NamespaceNode> {
3310        self.state.read().executable_node.clone()
3311    }
3312
3313    #[track_caller]
3314    pub fn get_errno_for_map_err(status: zx::Status) -> Errno {
3315        match status {
3316            zx::Status::INVALID_ARGS => errno!(EINVAL),
3317            zx::Status::ACCESS_DENIED => errno!(EPERM),
3318            zx::Status::NOT_SUPPORTED => errno!(ENODEV),
3319            zx::Status::NO_MEMORY => errno!(ENOMEM),
3320            zx::Status::NO_RESOURCES => errno!(ENOMEM),
3321            zx::Status::OUT_OF_RANGE => errno!(ENOMEM),
3322            zx::Status::ALREADY_EXISTS => errno!(EEXIST),
3323            zx::Status::BAD_STATE => errno!(EINVAL),
3324            _ => impossible_error(status),
3325        }
3326    }
3327
3328    #[track_caller]
3329    pub fn get_errno_for_vmo_err(status: zx::Status) -> Errno {
3330        match status {
3331            zx::Status::NO_MEMORY => errno!(ENOMEM),
3332            zx::Status::ACCESS_DENIED => errno!(EPERM),
3333            zx::Status::NOT_SUPPORTED => errno!(EIO),
3334            zx::Status::BAD_STATE => errno!(EIO),
3335            _ => return impossible_error(status),
3336        }
3337    }
3338
3339    pub fn map_memory(
3340        self: &Arc<Self>,
3341        addr: DesiredAddress,
3342        memory: Arc<MemoryObject>,
3343        memory_offset: u64,
3344        length: usize,
3345        prot_flags: ProtectionFlags,
3346        max_access: Access,
3347        options: MappingOptions,
3348        name: MappingName,
3349    ) -> Result<UserAddress, Errno> {
3350        let flags = MappingFlags::from_access_flags_and_options(prot_flags, options);
3351
3352        // Unmapped mappings must be released after the state is unlocked.
3353        let mut released_mappings = ReleasedMappings::default();
3354        // Hold the lock throughout the operation to uphold memory manager's invariants.
3355        // See mm/README.md.
3356        let mut state = self.state.write();
3357        let result = state.map_memory(
3358            self,
3359            addr,
3360            memory,
3361            memory_offset,
3362            length,
3363            flags,
3364            max_access,
3365            options.contains(MappingOptions::POPULATE),
3366            name,
3367            &mut released_mappings,
3368        );
3369
3370        // Drop the state before the unmapped mappings, since dropping a mapping may acquire a lock
3371        // in `DirEntry`'s `drop`.
3372        released_mappings.finalize(state);
3373
3374        result
3375    }
3376
3377    pub fn map_anonymous(
3378        self: &Arc<Self>,
3379        addr: DesiredAddress,
3380        length: usize,
3381        prot_flags: ProtectionFlags,
3382        options: MappingOptions,
3383        name: MappingName,
3384    ) -> Result<UserAddress, Errno> {
3385        let mut released_mappings = ReleasedMappings::default();
3386        // Hold the lock throughout the operation to uphold memory manager's invariants.
3387        // See mm/README.md.
3388        let mut state = self.state.write();
3389        let result = state.map_anonymous(
3390            self,
3391            addr,
3392            length,
3393            prot_flags,
3394            options,
3395            name,
3396            &mut released_mappings,
3397        );
3398
3399        released_mappings.finalize(state);
3400
3401        result
3402    }
3403
3404    /// Map the stack into a pre-selected address region
3405    pub fn map_stack(
3406        self: &Arc<Self>,
3407        length: usize,
3408        prot_flags: ProtectionFlags,
3409    ) -> Result<UserAddress, Errno> {
3410        assert!(length <= MAX_STACK_SIZE);
3411        let addr = self.state.read().stack_origin;
3412        // The address range containing stack_origin should normally be available: it's above the
3413        // mmap_top, and this method is called early enough in the process lifetime that only the
3414        // main ELF and the interpreter are already loaded. However, in the rare case that the
3415        // static position-independent executable is overlapping the chosen address, mapping as Hint
3416        // will make mmap choose a new place for it.
3417        // TODO(https://fxbug.dev/370027241): Consider a more robust approach
3418        let stack_addr = self.map_anonymous(
3419            DesiredAddress::Hint(addr),
3420            length,
3421            prot_flags,
3422            MappingOptions::ANONYMOUS | MappingOptions::GROWSDOWN,
3423            MappingName::Stack,
3424        )?;
3425        if stack_addr != addr {
3426            log_warn!(
3427                "An address designated for stack ({}) was unavailable, mapping at {} instead.",
3428                addr,
3429                stack_addr
3430            );
3431        }
3432        Ok(stack_addr)
3433    }
3434
3435    pub fn remap(
3436        self: &Arc<Self>,
3437        current_task: &CurrentTask,
3438        addr: UserAddress,
3439        old_length: usize,
3440        new_length: usize,
3441        flags: MremapFlags,
3442        new_addr: UserAddress,
3443    ) -> Result<UserAddress, Errno> {
3444        let mut released_mappings = ReleasedMappings::default();
3445        // Hold the lock throughout the operation to uphold memory manager's invariants.
3446        // See mm/README.md.
3447        let mut state = self.state.write();
3448        let result = state.remap(
3449            current_task,
3450            self,
3451            addr,
3452            old_length,
3453            new_length,
3454            flags,
3455            new_addr,
3456            &mut released_mappings,
3457        );
3458
3459        released_mappings.finalize(state);
3460
3461        result
3462    }
3463
3464    pub fn unmap(self: &Arc<Self>, addr: UserAddress, length: usize) -> Result<(), Errno> {
3465        let mut released_mappings = ReleasedMappings::default();
3466        // Hold the lock throughout the operation to uphold memory manager's invariants.
3467        // See mm/README.md.
3468        let mut state = self.state.write();
3469        let result = state.unmap(self, addr, length, &mut released_mappings);
3470
3471        released_mappings.finalize(state);
3472
3473        result
3474    }
3475
3476    pub fn protect(
3477        &self,
3478        current_task: &CurrentTask,
3479        addr: UserAddress,
3480        length: usize,
3481        prot_flags: ProtectionFlags,
3482    ) -> Result<(), Errno> {
3483        // Hold the lock throughout the operation to uphold memory manager's invariants.
3484        // See mm/README.md.
3485        let mut state = self.state.write();
3486        let mut released_mappings = ReleasedMappings::default();
3487        let result = state.protect(current_task, addr, length, prot_flags, &mut released_mappings);
3488        released_mappings.finalize(state);
3489        result
3490    }
3491
3492    pub fn madvise(
3493        &self,
3494        current_task: &CurrentTask,
3495        addr: UserAddress,
3496        length: usize,
3497        advice: u32,
3498    ) -> Result<(), Errno> {
3499        let mut state = self.state.write();
3500        let mut released_mappings = ReleasedMappings::default();
3501        let result = state.madvise(current_task, addr, length, advice, &mut released_mappings);
3502        released_mappings.finalize(state);
3503        result
3504    }
3505
3506    pub fn mlock<L>(
3507        &self,
3508        current_task: &CurrentTask,
3509        locked: &mut Locked<L>,
3510        desired_addr: UserAddress,
3511        desired_length: usize,
3512        on_fault: bool,
3513    ) -> Result<(), Errno>
3514    where
3515        L: LockBefore<ThreadGroupLimits>,
3516    {
3517        let mut state = self.state.write();
3518        let mut released_mappings = ReleasedMappings::default();
3519        let result = state.mlock(
3520            current_task,
3521            locked,
3522            desired_addr,
3523            desired_length,
3524            on_fault,
3525            &mut released_mappings,
3526        );
3527        released_mappings.finalize(state);
3528        result
3529    }
3530
3531    pub fn munlock(
3532        &self,
3533        current_task: &CurrentTask,
3534        desired_addr: UserAddress,
3535        desired_length: usize,
3536    ) -> Result<(), Errno> {
3537        let mut state = self.state.write();
3538        let mut released_mappings = ReleasedMappings::default();
3539        let result =
3540            state.munlock(current_task, desired_addr, desired_length, &mut released_mappings);
3541        released_mappings.finalize(state);
3542        result
3543    }
3544
3545    pub fn log_memory_map(&self, task: &Task, fault_address: UserAddress) {
3546        let state = self.state.read();
3547        log_warn!("Memory map for pid={}:", task.thread_group.leader);
3548        let mut last_end = UserAddress::from_ptr(0);
3549        for (range, map) in state.mappings.iter() {
3550            if fault_address >= last_end && fault_address < range.start {
3551                log_warn!("{:08x} <= FAULT", fault_address.ptr());
3552            }
3553
3554            let perms = format!(
3555                "{}{}{}{}",
3556                if map.can_read() { 'r' } else { '-' },
3557                if map.can_write() { 'w' } else { '-' },
3558                if map.can_exec() { 'x' } else { '-' },
3559                if map.flags().contains(MappingFlags::SHARED) { 's' } else { 'p' }
3560            );
3561
3562            let backing = match state.get_mapping_backing(map) {
3563                MappingBacking::Memory(backing) => backing.address_to_offset(range.start),
3564                MappingBacking::PrivateAnonymous => 0,
3565            };
3566
3567            let name_str = match &map.name() {
3568                MappingName::File(file) => {
3569                    String::from_utf8_lossy(&file.name.path(task)).into_owned()
3570                }
3571                MappingName::None | MappingName::AioContext(_) => {
3572                    if map.flags().contains(MappingFlags::SHARED)
3573                        && map.flags().contains(MappingFlags::ANONYMOUS)
3574                    {
3575                        "/dev/zero (deleted)".to_string()
3576                    } else {
3577                        "".to_string()
3578                    }
3579                }
3580                MappingName::Stack => "[stack]".to_string(),
3581                MappingName::Heap => "[heap]".to_string(),
3582                MappingName::Vdso => "[vdso]".to_string(),
3583                MappingName::Vvar => "[vvar]".to_string(),
3584                _ => format!("{:?}", map.name()),
3585            };
3586
3587            let fault_marker = if range.contains(&fault_address) { " <= FAULT" } else { "" };
3588
3589            log_warn!(
3590                "{:08x}-{:08x} {} {:08x} {}{}",
3591                range.start.ptr(),
3592                range.end.ptr(),
3593                perms,
3594                backing,
3595                name_str,
3596                fault_marker
3597            );
3598            last_end = range.end;
3599        }
3600
3601        if fault_address >= last_end {
3602            log_warn!("{:08x} <= FAULT", fault_address.ptr());
3603        }
3604    }
3605
3606    pub fn handle_page_fault(
3607        self: &Arc<Self>,
3608        locked: &mut Locked<Unlocked>,
3609        decoded: PageFaultExceptionReport,
3610        error_code: zx::Status,
3611    ) -> ExceptionResult {
3612        let addr = UserAddress::from(decoded.faulting_address);
3613        // On uffd-registered range, handle according to the uffd rules
3614        if error_code == zx::Status::ACCESS_DENIED {
3615            let state = self.state.write();
3616            if let Some((_, mapping)) = state.mappings.get(addr) {
3617                if mapping.flags().contains(MappingFlags::UFFD) {
3618                    // TODO(https://fxbug.dev/391599171): Support other modes
3619                    assert!(mapping.flags().contains(MappingFlags::UFFD_MISSING));
3620
3621                    if let Some(_uffd) = state.find_uffd(locked, addr) {
3622                        // If the SIGBUS feature was set, no event will be sent to the file.
3623                        // Instead, SIGBUS is delivered to the process that triggered the fault.
3624                        // TODO(https://fxbug.dev/391599171): For now we only support this feature,
3625                        // so we assume it is set.
3626                        // Check for the SIGBUS feature when we start supporting running without it.
3627                        return ExceptionResult::Signal(SignalInfo::with_detail(
3628                            SIGBUS,
3629                            BUS_ADRERR as i32,
3630                            SignalDetail::SigFault { addr: decoded.faulting_address },
3631                        ));
3632                    };
3633                }
3634                let exec_denied = decoded.is_execute && !mapping.can_exec();
3635                let write_denied = decoded.is_write && !mapping.can_write();
3636                let read_denied = (!decoded.is_execute && !decoded.is_write) && !mapping.can_read();
3637                // There is a data race resulting from uffd unregistration and page fault happening
3638                // at the same time. To detect it, we check if the access was meant to be rejected
3639                // according to Starnix own information about the mapping.
3640                let false_reject = !exec_denied && !write_denied && !read_denied;
3641                if false_reject {
3642                    track_stub!(
3643                        TODO("https://fxbug.dev/435171399"),
3644                        "Inconsistent permission fault"
3645                    );
3646                    return ExceptionResult::Handled;
3647                }
3648            }
3649            std::mem::drop(state);
3650        }
3651
3652        if decoded.not_present {
3653            // A page fault may be resolved by extending a growsdown mapping to cover the faulting
3654            // address. Mark the exception handled if so. Otherwise let the regular handling proceed.
3655
3656            // We should only attempt growth on a not-present fault and we should only extend if the
3657            // access type matches the protection on the GROWSDOWN mapping.
3658            match self.extend_growsdown_mapping_to_address(
3659                UserAddress::from(decoded.faulting_address),
3660                decoded.is_write,
3661            ) {
3662                Ok(true) => {
3663                    return ExceptionResult::Handled;
3664                }
3665                Err(e) => {
3666                    log_warn!("Error handling page fault: {e}")
3667                }
3668                _ => {}
3669            }
3670        }
3671        // For this exception type, the synth_code field in the exception report's context is the
3672        // error generated by the page fault handler. For us this is used to distinguish between a
3673        // segmentation violation and a bus error. Unfortunately this detail is not documented in
3674        // Zircon's public documentation and is only described in the architecture-specific
3675        // exception definitions such as:
3676        // zircon/kernel/arch/x86/include/arch/x86.h
3677        // zircon/kernel/arch/arm64/include/arch/arm64.h
3678        let signo = match error_code {
3679            zx::Status::OUT_OF_RANGE => SIGBUS,
3680            _ => SIGSEGV,
3681        };
3682        ExceptionResult::Signal(SignalInfo::with_detail(
3683            signo,
3684            SI_KERNEL as i32,
3685            SignalDetail::SigFault { addr: decoded.faulting_address },
3686        ))
3687    }
3688
3689    pub fn set_mapping_name(
3690        &self,
3691        addr: UserAddress,
3692        length: usize,
3693        name: Option<FsString>,
3694    ) -> Result<(), Errno> {
3695        let mut state = self.state.write();
3696        let mut released_mappings = ReleasedMappings::default();
3697        let result = state.set_mapping_name(addr, length, name, &mut released_mappings);
3698        released_mappings.finalize(state);
3699        result
3700    }
3701
3702    /// Returns [`Ok`] if the entire range specified by `addr..(addr+length)` contains valid
3703    /// mappings.
3704    ///
3705    /// # Errors
3706    ///
3707    /// Returns [`Err(errno)`] where `errno` is:
3708    ///
3709    ///   - `EINVAL`: `addr` is not page-aligned, or the range is too large,
3710    ///   - `ENOMEM`: one or more pages in the range are not mapped.
3711    pub fn ensure_mapped(&self, addr: UserAddress, length: usize) -> Result<(), Errno> {
3712        if !addr.is_aligned(*PAGE_SIZE) {
3713            return error!(EINVAL);
3714        }
3715
3716        let length = round_up_to_system_page_size(length)?;
3717        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
3718        let state = self.state.read();
3719        let mut last_end = addr;
3720        for (range, _) in state.mappings.range(addr..end_addr) {
3721            if range.start > last_end {
3722                // This mapping does not start immediately after the last.
3723                return error!(ENOMEM);
3724            }
3725            last_end = range.end;
3726        }
3727        if last_end < end_addr {
3728            // There is a gap of no mappings at the end of the range.
3729            error!(ENOMEM)
3730        } else {
3731            Ok(())
3732        }
3733    }
3734
3735    /// Returns the memory object mapped at the address and the offset into the memory object of
3736    /// the address. Intended for implementing futexes.
3737    pub fn get_mapping_memory(
3738        &self,
3739        addr: UserAddress,
3740        perms: ProtectionFlags,
3741    ) -> Result<(Arc<MemoryObject>, u64), Errno> {
3742        let state = self.state.read();
3743        let (_, mapping) = state.mappings.get(addr).ok_or_else(|| errno!(EFAULT))?;
3744        if !mapping.flags().access_flags().contains(perms) {
3745            return error!(EACCES);
3746        }
3747        match state.get_mapping_backing(mapping) {
3748            MappingBacking::Memory(backing) => {
3749                Ok((Arc::clone(backing.memory()), mapping.address_to_offset(addr)))
3750            }
3751            MappingBacking::PrivateAnonymous => {
3752                Ok((Arc::clone(&state.private_anonymous.backing), addr.ptr() as u64))
3753            }
3754        }
3755    }
3756
3757    /// Does a rough check that the given address is plausibly in the address space of the
3758    /// application. This does not mean the pointer is valid for any particular purpose or that
3759    /// it will remain so!
3760    ///
3761    /// In some syscalls, Linux seems to do some initial validation of the pointer up front to
3762    /// tell the caller early if it's invalid. For example, in epoll_wait() it's returning a vector
3763    /// of events. If the caller passes an invalid pointer, it wants to fail without dropping any
3764    /// events. Failing later when actually copying the required events to userspace would mean
3765    /// those events will be lost. But holding a lock on the memory manager for an asynchronous
3766    /// wait is not desirable.
3767    ///
3768    /// Testing shows that Linux seems to do some initial plausibility checking of the pointer to
3769    /// be able to report common usage errors before doing any (possibly unreversable) work. This
3770    /// checking is easy to get around if you try, so this function is also not required to
3771    /// be particularly robust. Certainly the more advanced cases of races (the memory could be
3772    /// unmapped after this call but before it's used) are not handled.
3773    ///
3774    /// The buffer_size variable is the size of the data structure that needs to fit
3775    /// in the given memory.
3776    ///
3777    /// Returns the error EFAULT if invalid.
3778    pub fn check_plausible(&self, addr: UserAddress, buffer_size: usize) -> Result<(), Errno> {
3779        let state = self.state.read();
3780
3781        if let Some(range) = state.mappings.last_range() {
3782            if (range.end - buffer_size)? >= addr {
3783                return Ok(());
3784            }
3785        }
3786        error!(EFAULT)
3787    }
3788
3789    pub fn get_aio_context(&self, addr: UserAddress) -> Option<Arc<AioContext>> {
3790        let state = self.state.read();
3791        state.get_aio_context(addr).map(|(_, aio_context)| aio_context)
3792    }
3793
3794    pub fn destroy_aio_context(
3795        self: &Arc<Self>,
3796        addr: UserAddress,
3797    ) -> Result<Arc<AioContext>, Errno> {
3798        let mut released_mappings = ReleasedMappings::default();
3799
3800        // Hold the lock throughout the operation to uphold memory manager's invariants.
3801        // See mm/README.md.
3802        let mut state = self.state.write();
3803
3804        // Validate that this address actually has an AioContext. We need to hold the state lock
3805        // until we actually remove the mappings to ensure that another thread does not manipulate
3806        // the mappings after we've validated that they contain an AioContext.
3807        let Some((range, aio_context)) = state.get_aio_context(addr) else {
3808            return error!(EINVAL);
3809        };
3810
3811        let length = range.end - range.start;
3812        let result = state.unmap(self, range.start, length, &mut released_mappings);
3813
3814        released_mappings.finalize(state);
3815
3816        result.map(|_| aio_context)
3817    }
3818
3819    #[cfg(test)]
3820    pub fn get_mapping_name(
3821        &self,
3822        addr: UserAddress,
3823    ) -> Result<Option<flyweights::FlyByteStr>, Errno> {
3824        let state = self.state.read();
3825        let (_, mapping) = state.mappings.get(addr).ok_or_else(|| errno!(EFAULT))?;
3826        if let MappingName::Vma(name) = mapping.name() { Ok(Some(name.clone())) } else { Ok(None) }
3827    }
3828
3829    #[cfg(test)]
3830    pub fn get_mapping_count(&self) -> usize {
3831        let state = self.state.read();
3832        state.mappings.iter().count()
3833    }
3834
3835    pub fn extend_growsdown_mapping_to_address(
3836        self: &Arc<Self>,
3837        addr: UserAddress,
3838        is_write: bool,
3839    ) -> Result<bool, Error> {
3840        self.state.write().extend_growsdown_mapping_to_address(self, addr, is_write)
3841    }
3842
3843    pub fn get_stats(&self, current_task: &CurrentTask) -> MemoryStats {
3844        // Grab our state lock before reading zircon mappings so that the two are consistent.
3845        // Other Starnix threads should not make any changes to the Zircon mappings while we hold
3846        // a read lock to the memory manager state.
3847        let state = self.state.read();
3848
3849        let mut stats = MemoryStats::default();
3850        stats.vm_stack = state.stack_size;
3851
3852        state.with_zx_mappings(current_task, |zx_mappings| {
3853            for zx_mapping in zx_mappings {
3854                // We only care about map info for actual mappings.
3855                let zx_details = zx_mapping.details();
3856                let Some(zx_details) = zx_details.as_mapping() else { continue };
3857                let user_address = UserAddress::from(zx_mapping.base as u64);
3858                let (_, mm_mapping) = state
3859                    .mappings
3860                    .get(user_address)
3861                    .unwrap_or_else(|| panic!("mapping bookkeeping must be consistent with zircon's: not found: {user_address:?}"));
3862                debug_assert_eq!(
3863                    match state.get_mapping_backing(mm_mapping) {
3864                        MappingBacking::Memory(m)=>m.memory().get_koid(),
3865                        MappingBacking::PrivateAnonymous=>state.private_anonymous.backing.get_koid(),
3866                    },
3867                    zx_details.vmo_koid,
3868                    "MemoryManager and Zircon must agree on which VMO is mapped in this range",
3869                );
3870
3871                stats.vm_size += zx_mapping.size;
3872
3873                stats.vm_rss += zx_details.committed_bytes;
3874                stats.vm_swap += zx_details.populated_bytes - zx_details.committed_bytes;
3875
3876                if mm_mapping.flags().contains(MappingFlags::SHARED) {
3877                    stats.rss_shared += zx_details.committed_bytes;
3878                } else if mm_mapping.flags().contains(MappingFlags::ANONYMOUS) {
3879                    stats.rss_anonymous += zx_details.committed_bytes;
3880                } else if let MappingName::File(_) = mm_mapping.name() {
3881                    stats.rss_file += zx_details.committed_bytes;
3882                }
3883
3884                if mm_mapping.flags().contains(MappingFlags::LOCKED) {
3885                    stats.vm_lck += zx_details.committed_bytes;
3886                }
3887
3888                if mm_mapping.flags().contains(MappingFlags::ELF_BINARY)
3889                    && mm_mapping.flags().contains(MappingFlags::WRITE)
3890                {
3891                    stats.vm_data += zx_mapping.size;
3892                }
3893
3894                if mm_mapping.flags().contains(MappingFlags::ELF_BINARY)
3895                    && mm_mapping.flags().contains(MappingFlags::EXEC)
3896                {
3897                    stats.vm_exe += zx_mapping.size;
3898                }
3899            }
3900        });
3901
3902        // TODO(https://fxbug.dev/396221597): Placeholder for now. We need kernel support to track
3903        // the committed bytes high water mark.
3904        stats.vm_rss_hwm = STUB_VM_RSS_HWM;
3905        stats
3906    }
3907
3908    pub fn atomic_load_u32_acquire(&self, futex_addr: FutexAddress) -> Result<u32, Errno> {
3909        if let Some(usercopy) = usercopy() {
3910            usercopy.atomic_load_u32_acquire(futex_addr.ptr()).map_err(|_| errno!(EFAULT))
3911        } else {
3912            unreachable!("can only control memory ordering of atomics with usercopy");
3913        }
3914    }
3915
3916    pub fn atomic_load_u32_relaxed(&self, futex_addr: FutexAddress) -> Result<u32, Errno> {
3917        if let Some(usercopy) = usercopy() {
3918            usercopy.atomic_load_u32_relaxed(futex_addr.ptr()).map_err(|_| errno!(EFAULT))
3919        } else {
3920            // SAFETY: `self.state.read().read_memory` only returns `Ok` if all
3921            // bytes were read to.
3922            let buf = unsafe {
3923                read_to_array(|buf| {
3924                    self.state.read().read_memory(futex_addr.into(), buf).map(|bytes_read| {
3925                        debug_assert_eq!(bytes_read.len(), std::mem::size_of::<u32>())
3926                    })
3927                })
3928            }?;
3929            Ok(u32::from_ne_bytes(buf))
3930        }
3931    }
3932
3933    pub fn atomic_store_u32_relaxed(
3934        &self,
3935        futex_addr: FutexAddress,
3936        value: u32,
3937    ) -> Result<(), Errno> {
3938        if let Some(usercopy) = usercopy() {
3939            usercopy.atomic_store_u32_relaxed(futex_addr.ptr(), value).map_err(|_| errno!(EFAULT))
3940        } else {
3941            self.state.read().write_memory(futex_addr.into(), value.as_bytes())?;
3942            Ok(())
3943        }
3944    }
3945
3946    pub fn atomic_compare_exchange_u32_acq_rel(
3947        &self,
3948        futex_addr: FutexAddress,
3949        current: u32,
3950        new: u32,
3951    ) -> CompareExchangeResult<u32> {
3952        let Some(usercopy) = usercopy() else {
3953            unreachable!("Atomic compare/exchange requires usercopy.");
3954        };
3955        CompareExchangeResult::from_usercopy(usercopy.atomic_compare_exchange_u32_acq_rel(
3956            futex_addr.ptr(),
3957            current,
3958            new,
3959        ))
3960    }
3961
3962    pub fn atomic_compare_exchange_weak_u32_acq_rel(
3963        &self,
3964        futex_addr: FutexAddress,
3965        current: u32,
3966        new: u32,
3967    ) -> CompareExchangeResult<u32> {
3968        let Some(usercopy) = usercopy() else {
3969            unreachable!("Atomic compare/exchange requires usercopy.");
3970        };
3971        CompareExchangeResult::from_usercopy(usercopy.atomic_compare_exchange_weak_u32_acq_rel(
3972            futex_addr.ptr(),
3973            current,
3974            new,
3975        ))
3976    }
3977
3978    pub fn get_restricted_vmar_info(&self) -> Option<VmarInfo> {
3979        use zx::HandleBased;
3980        if self.root_vmar.is_invalid_handle() {
3981            return None;
3982        }
3983        Some(VmarInfo { base: RESTRICTED_ASPACE_BASE, len: RESTRICTED_ASPACE_SIZE })
3984    }
3985}
3986
3987/// The result of an atomic compare/exchange operation on user memory.
3988#[derive(Debug, Clone)]
3989pub enum CompareExchangeResult<T> {
3990    /// The current value provided matched the one observed in memory and the new value provided
3991    /// was written.
3992    Success,
3993    /// The provided current value did not match the current value in memory.
3994    Stale { observed: T },
3995    /// There was a general error while accessing the requested memory.
3996    Error(Errno),
3997}
3998
3999impl<T> CompareExchangeResult<T> {
4000    fn from_usercopy(usercopy_res: Result<Result<T, T>, ()>) -> Self {
4001        match usercopy_res {
4002            Ok(Ok(_)) => Self::Success,
4003            Ok(Err(observed)) => Self::Stale { observed },
4004            Err(()) => Self::Error(errno!(EFAULT)),
4005        }
4006    }
4007}
4008
4009impl<T> From<Errno> for CompareExchangeResult<T> {
4010    fn from(e: Errno) -> Self {
4011        Self::Error(e)
4012    }
4013}
4014
4015/// The user-space address at which a mapping should be placed. Used by [`MemoryManager::map`].
4016#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4017pub enum DesiredAddress {
4018    /// Map at any address chosen by the kernel.
4019    Any,
4020    /// The address is a hint. If the address overlaps an existing mapping a different address may
4021    /// be chosen.
4022    Hint(UserAddress),
4023    /// The address is a requirement. If the address overlaps an existing mapping (and cannot
4024    /// overwrite it), mapping fails.
4025    Fixed(UserAddress),
4026    /// The address is a requirement. If the address overlaps an existing mapping (and cannot
4027    /// overwrite it), they should be unmapped.
4028    FixedOverwrite(UserAddress),
4029}
4030
4031/// The user-space address at which a mapping should be placed. Used by [`map_in_vmar`].
4032#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4033enum SelectedAddress {
4034    /// See DesiredAddress::Fixed.
4035    Fixed(UserAddress),
4036    /// See DesiredAddress::FixedOverwrite.
4037    FixedOverwrite(UserAddress),
4038}
4039
4040impl SelectedAddress {
4041    fn addr(&self) -> UserAddress {
4042        match self {
4043            SelectedAddress::Fixed(addr) => *addr,
4044            SelectedAddress::FixedOverwrite(addr) => *addr,
4045        }
4046    }
4047}
4048
4049/// Write one line of the memory map intended for adding to `/proc/self/maps`.
4050fn write_map(
4051    task: &Task,
4052    sink: &mut DynamicFileBuf,
4053    state: &MemoryManagerState,
4054    range: &Range<UserAddress>,
4055    map: &Mapping,
4056) -> Result<(), Errno> {
4057    let line_length = write!(
4058        sink,
4059        "{:08x}-{:08x} {}{}{}{} {:08x} 00:00 {} ",
4060        range.start.ptr(),
4061        range.end.ptr(),
4062        if map.can_read() { 'r' } else { '-' },
4063        if map.can_write() { 'w' } else { '-' },
4064        if map.can_exec() { 'x' } else { '-' },
4065        if map.flags().contains(MappingFlags::SHARED) { 's' } else { 'p' },
4066        match state.get_mapping_backing(map) {
4067            MappingBacking::Memory(backing) => backing.address_to_offset(range.start),
4068            MappingBacking::PrivateAnonymous => 0,
4069        },
4070        if let MappingName::File(file) = &map.name() { file.name.entry.node.ino } else { 0 }
4071    )?;
4072    let fill_to_name = |sink: &mut DynamicFileBuf| {
4073        // The filename goes at >= the 74th column (73rd when zero indexed)
4074        for _ in line_length..73 {
4075            sink.write(b" ");
4076        }
4077    };
4078    match &map.name() {
4079        MappingName::None | MappingName::AioContext(_) => {
4080            if map.flags().contains(MappingFlags::SHARED)
4081                && map.flags().contains(MappingFlags::ANONYMOUS)
4082            {
4083                // See proc(5), "/proc/[pid]/map_files/"
4084                fill_to_name(sink);
4085                sink.write(b"/dev/zero (deleted)");
4086            }
4087        }
4088        MappingName::Stack => {
4089            fill_to_name(sink);
4090            sink.write(b"[stack]");
4091        }
4092        MappingName::Heap => {
4093            fill_to_name(sink);
4094            sink.write(b"[heap]");
4095        }
4096        MappingName::Vdso => {
4097            fill_to_name(sink);
4098            sink.write(b"[vdso]");
4099        }
4100        MappingName::Vvar => {
4101            fill_to_name(sink);
4102            sink.write(b"[vvar]");
4103        }
4104        MappingName::File(file) => {
4105            fill_to_name(sink);
4106            // File names can have newlines that need to be escaped before printing.
4107            // According to https://man7.org/linux/man-pages/man5/proc.5.html the only
4108            // escaping applied to paths is replacing newlines with an octal sequence.
4109            let path = file.name.path(task);
4110            sink.write_iter(
4111                path.iter()
4112                    .flat_map(|b| if *b == b'\n' { b"\\012" } else { std::slice::from_ref(b) })
4113                    .copied(),
4114            );
4115        }
4116        MappingName::Vma(name) => {
4117            fill_to_name(sink);
4118            sink.write(b"[anon:");
4119            sink.write(name.as_bytes());
4120            sink.write(b"]");
4121        }
4122        MappingName::Ashmem(name) => {
4123            fill_to_name(sink);
4124            sink.write(b"/dev/ashmem/");
4125            sink.write(name.as_bytes());
4126        }
4127    }
4128    sink.write(b"\n");
4129    Ok(())
4130}
4131
4132#[derive(Default)]
4133pub struct MemoryStats {
4134    pub vm_size: usize,
4135    pub vm_rss: usize,
4136    pub vm_rss_hwm: usize,
4137    pub rss_anonymous: usize,
4138    pub rss_file: usize,
4139    pub rss_shared: usize,
4140    pub vm_data: usize,
4141    pub vm_stack: usize,
4142    pub vm_exe: usize,
4143    pub vm_swap: usize,
4144    pub vm_lck: usize,
4145}
4146
4147/// Implements `/proc/self/maps`.
4148#[derive(Clone)]
4149pub struct ProcMapsFile {
4150    mm: Weak<MemoryManager>,
4151    task: WeakRef<Task>,
4152}
4153impl ProcMapsFile {
4154    pub fn new(task: TempRef<'_, Task>) -> DynamicFile<Self> {
4155        // "maps" is empty for kthreads, rather than inaccessible.
4156        let mm = task.mm().map_or_else(|_| Weak::default(), |mm| Arc::downgrade(&mm));
4157        let task = task.into();
4158        DynamicFile::new(Self { mm, task })
4159    }
4160}
4161
4162impl SequenceFileSource for ProcMapsFile {
4163    type Cursor = UserAddress;
4164
4165    fn next(
4166        &self,
4167        _current_task: &CurrentTask,
4168        cursor: UserAddress,
4169        sink: &mut DynamicFileBuf,
4170    ) -> Result<Option<UserAddress>, Errno> {
4171        let task = Task::from_weak(&self.task)?;
4172        // /proc/<pid>/maps is empty for kthreads and tasks whose memory manager has changed.
4173        let Some(mm) = self.mm.upgrade() else {
4174            return Ok(None);
4175        };
4176        let state = mm.state.read();
4177        if let Some((range, map)) = state.mappings.find_at_or_after(cursor) {
4178            write_map(&task, sink, &state, range, map)?;
4179            return Ok(Some(range.end));
4180        }
4181        Ok(None)
4182    }
4183}
4184
4185#[derive(Clone)]
4186pub struct ProcSmapsFile {
4187    mm: Weak<MemoryManager>,
4188    task: WeakRef<Task>,
4189}
4190impl ProcSmapsFile {
4191    pub fn new(task: TempRef<'_, Task>) -> DynamicFile<Self> {
4192        // "smaps" is empty for kthreads, rather than inaccessible.
4193        let mm = task.mm().map_or_else(|_| Weak::default(), |mm| Arc::downgrade(&mm));
4194        DynamicFile::new(Self { mm, task: task.into() })
4195    }
4196}
4197
4198impl DynamicFileSource for ProcSmapsFile {
4199    fn generate(&self, current_task: &CurrentTask, sink: &mut DynamicFileBuf) -> Result<(), Errno> {
4200        let page_size_kb = *PAGE_SIZE / 1024;
4201        let task = Task::from_weak(&self.task)?;
4202        // /proc/<pid>/smaps is empty for kthreads and tasks whose memory manager has changed.
4203        let Some(mm) = self.mm.upgrade() else {
4204            return Ok(());
4205        };
4206        let state = mm.state.read();
4207        state.with_zx_mappings(current_task, |zx_mappings| {
4208            let mut zx_memory_info = RangeMap::<UserAddress, usize>::default();
4209            for idx in 0..zx_mappings.len() {
4210                let zx_mapping = zx_mappings[idx];
4211                // RangeMap uses #[must_use] for its default usecase but this drop is trivial.
4212                let _ = zx_memory_info.insert(
4213                    UserAddress::from_ptr(zx_mapping.base)
4214                        ..UserAddress::from_ptr(zx_mapping.base + zx_mapping.size),
4215                    idx,
4216                );
4217            }
4218
4219            for (mm_range, mm_mapping) in state.mappings.iter() {
4220                let mut committed_bytes = 0;
4221
4222                for (zx_range, zx_mapping_idx) in zx_memory_info.range(mm_range.clone()) {
4223                    let intersect_range = zx_range.intersect(mm_range);
4224                    let zx_mapping = zx_mappings[*zx_mapping_idx];
4225                    let zx_details = zx_mapping.details();
4226                    let Some(zx_details) = zx_details.as_mapping() else { continue };
4227                    let zx_committed_bytes = zx_details.committed_bytes;
4228
4229                    // TODO(https://fxbug.dev/419882465): It can happen that the same Zircon mapping
4230                    // is covered by more than one Starnix mapping. In this case we don't have
4231                    // enough granularity to answer the question of how many committed bytes belong
4232                    // to one mapping or another. Make a best-effort approximation by dividing the
4233                    // committed bytes of a Zircon mapping proportionally.
4234                    committed_bytes += if intersect_range != *zx_range {
4235                        let intersection_size =
4236                            intersect_range.end.ptr() - intersect_range.start.ptr();
4237                        let part = intersection_size as f32 / zx_mapping.size as f32;
4238                        let prorated_committed_bytes: f32 = part * zx_committed_bytes as f32;
4239                        prorated_committed_bytes as u64
4240                    } else {
4241                        zx_committed_bytes as u64
4242                    };
4243                    assert_eq!(
4244                        match state.get_mapping_backing(mm_mapping) {
4245                            MappingBacking::Memory(m) => m.memory().get_koid(),
4246                            MappingBacking::PrivateAnonymous =>
4247                                state.private_anonymous.backing.get_koid(),
4248                        },
4249                        zx_details.vmo_koid,
4250                        "MemoryManager and Zircon must agree on which VMO is mapped in this range",
4251                    );
4252                }
4253
4254                write_map(&task, sink, &state, mm_range, mm_mapping)?;
4255
4256                let size_kb = (mm_range.end.ptr() - mm_range.start.ptr()) / 1024;
4257                writeln!(sink, "Size:           {size_kb:>8} kB",)?;
4258                let share_count = match state.get_mapping_backing(mm_mapping) {
4259                    MappingBacking::Memory(backing) => {
4260                        let memory = backing.memory();
4261                        if memory.is_clock() {
4262                            // Clock memory mappings are not shared in a meaningful way.
4263                            1
4264                        } else {
4265                            let memory_info = backing.memory().info()?;
4266                            memory_info.share_count as u64
4267                        }
4268                    }
4269                    MappingBacking::PrivateAnonymous => {
4270                        1 // Private mapping
4271                    }
4272                };
4273
4274                let rss_kb = committed_bytes / 1024;
4275                writeln!(sink, "Rss:            {rss_kb:>8} kB")?;
4276
4277                let pss_kb = if mm_mapping.flags().contains(MappingFlags::SHARED) {
4278                    rss_kb / share_count
4279                } else {
4280                    rss_kb
4281                };
4282                writeln!(sink, "Pss:            {pss_kb:>8} kB")?;
4283
4284                track_stub!(TODO("https://fxbug.dev/322874967"), "smaps dirty pages");
4285                let (shared_dirty_kb, private_dirty_kb) = (0, 0);
4286
4287                let is_shared = share_count > 1;
4288                let shared_clean_kb = if is_shared { rss_kb } else { 0 };
4289                writeln!(sink, "Shared_Clean:   {shared_clean_kb:>8} kB")?;
4290                writeln!(sink, "Shared_Dirty:   {shared_dirty_kb:>8} kB")?;
4291
4292                let private_clean_kb = if is_shared { 0 } else { rss_kb };
4293                writeln!(sink, "Private_Clean:  {private_clean_kb:>8} kB")?;
4294                writeln!(sink, "Private_Dirty:  {private_dirty_kb:>8} kB")?;
4295
4296                let anonymous_kb = if mm_mapping.private_anonymous() { rss_kb } else { 0 };
4297                writeln!(sink, "Anonymous:      {anonymous_kb:>8} kB")?;
4298                writeln!(sink, "KernelPageSize: {page_size_kb:>8} kB")?;
4299                writeln!(sink, "MMUPageSize:    {page_size_kb:>8} kB")?;
4300
4301                let locked_kb =
4302                    if mm_mapping.flags().contains(MappingFlags::LOCKED) { rss_kb } else { 0 };
4303                writeln!(sink, "Locked:         {locked_kb:>8} kB")?;
4304                writeln!(sink, "VmFlags: {}", mm_mapping.vm_flags())?;
4305
4306                track_stub!(TODO("https://fxbug.dev/297444691"), "optional smaps fields");
4307            }
4308            Ok(())
4309        })
4310    }
4311}
4312
4313/// Creates a memory object that can be used in an anonymous mapping for the `mmap` syscall.
4314pub fn create_anonymous_mapping_memory(size: u64) -> Result<Arc<MemoryObject>, Errno> {
4315    // mremap can grow memory regions, so make sure the memory object is resizable.
4316    let mut memory = MemoryObject::from(
4317        zx::Vmo::create_with_opts(zx::VmoOptions::RESIZABLE, size).map_err(|s| match s {
4318            zx::Status::NO_MEMORY => errno!(ENOMEM),
4319            zx::Status::OUT_OF_RANGE => errno!(ENOMEM),
4320            _ => impossible_error(s),
4321        })?,
4322    )
4323    .with_zx_name(b"starnix:memory_manager");
4324
4325    memory.set_zx_name(b"starnix-anon");
4326
4327    // TODO(https://fxbug.dev/42056890): Audit replace_as_executable usage
4328    memory = memory.replace_as_executable(&VMEX_RESOURCE).map_err(impossible_error)?;
4329    Ok(Arc::new(memory))
4330}
4331
4332fn generate_random_offset_for_aslr(arch_width: ArchWidth) -> usize {
4333    // Generate a number with ASLR_RANDOM_BITS.
4334    let randomness = {
4335        let random_bits =
4336            if arch_width.is_arch32() { ASLR_32_RANDOM_BITS } else { ASLR_RANDOM_BITS };
4337        let mask = (1 << random_bits) - 1;
4338        let mut bytes = [0; std::mem::size_of::<usize>()];
4339        zx::cprng_draw(&mut bytes);
4340        usize::from_le_bytes(bytes) & mask
4341    };
4342
4343    // Transform it into a page-aligned offset.
4344    randomness * (*PAGE_SIZE as usize)
4345}
4346
4347#[cfg(test)]
4348mod tests {
4349    use super::*;
4350    use crate::mm::memory_accessor::MemoryAccessorExt;
4351    use crate::mm::syscalls::do_mmap;
4352    use crate::task::syscalls::sys_prctl;
4353    use crate::testing::*;
4354    use crate::vfs::FdNumber;
4355    use assert_matches::assert_matches;
4356    use itertools::assert_equal;
4357    use starnix_sync::{FileOpsCore, LockEqualOrBefore};
4358    use starnix_uapi::user_address::{UserCString, UserRef};
4359    use starnix_uapi::{
4360        MAP_ANONYMOUS, MAP_FIXED, MAP_GROWSDOWN, MAP_PRIVATE, MAP_SHARED, PR_SET_VMA,
4361        PR_SET_VMA_ANON_NAME, PROT_NONE, PROT_READ,
4362    };
4363    use std::ffi::CString;
4364    use zerocopy::{FromBytes, Immutable, KnownLayout};
4365
4366    #[::fuchsia::test]
4367    fn test_mapping_flags() {
4368        let options = MappingOptions::ANONYMOUS;
4369        let access_flags = ProtectionFlags::READ | ProtectionFlags::WRITE;
4370        let mapping_flags = MappingFlags::from_access_flags_and_options(access_flags, options);
4371        assert_eq!(mapping_flags.access_flags(), access_flags);
4372        assert_eq!(mapping_flags.options(), options);
4373
4374        let new_access_flags = ProtectionFlags::READ | ProtectionFlags::EXEC;
4375        let adusted_mapping_flags = mapping_flags.with_access_flags(new_access_flags);
4376        assert_eq!(adusted_mapping_flags.access_flags(), new_access_flags);
4377        assert_eq!(adusted_mapping_flags.options(), options);
4378    }
4379
4380    #[::fuchsia::test]
4381    async fn test_brk() {
4382        spawn_kernel_and_run(async |locked, current_task| {
4383            let mm = current_task.mm().unwrap();
4384
4385            // Look up the given addr in the mappings table.
4386            let get_range = |addr: UserAddress| {
4387                let state = mm.state.read();
4388                state.mappings.get(addr).map(|(range, mapping)| (range.clone(), mapping.clone()))
4389            };
4390
4391            // Initialize the program break.
4392            let base_addr = mm
4393                .set_brk(locked, &current_task, UserAddress::default())
4394                .expect("failed to set initial program break");
4395            assert!(base_addr > UserAddress::default());
4396
4397            // Page containing the program break address should not be mapped.
4398            assert_eq!(get_range(base_addr), None);
4399
4400            // Growing it by a single byte results in that page becoming mapped.
4401            let addr0 = mm
4402                .set_brk(locked, &current_task, (base_addr + 1u64).unwrap())
4403                .expect("failed to grow brk");
4404            assert!(addr0 > base_addr);
4405            let (range0, _) = get_range(base_addr).expect("base_addr should be mapped");
4406            assert_eq!(range0.start, base_addr);
4407            assert_eq!(range0.end, (base_addr + *PAGE_SIZE).unwrap());
4408
4409            // Grow the program break by another byte, which won't be enough to cause additional pages to be mapped.
4410            let addr1 = mm
4411                .set_brk(locked, &current_task, (base_addr + 2u64).unwrap())
4412                .expect("failed to grow brk");
4413            assert_eq!(addr1, (base_addr + 2u64).unwrap());
4414            let (range1, _) = get_range(base_addr).expect("base_addr should be mapped");
4415            assert_eq!(range1.start, range0.start);
4416            assert_eq!(range1.end, range0.end);
4417
4418            // Grow the program break by a non-trival amount and observe the larger mapping.
4419            let addr2 = mm
4420                .set_brk(locked, &current_task, (base_addr + 24893u64).unwrap())
4421                .expect("failed to grow brk");
4422            assert_eq!(addr2, (base_addr + 24893u64).unwrap());
4423            let (range2, _) = get_range(base_addr).expect("base_addr should be mapped");
4424            assert_eq!(range2.start, base_addr);
4425            assert_eq!(range2.end, addr2.round_up(*PAGE_SIZE).unwrap());
4426
4427            // Shrink the program break and observe the smaller mapping.
4428            let addr3 = mm
4429                .set_brk(locked, &current_task, (base_addr + 14832u64).unwrap())
4430                .expect("failed to shrink brk");
4431            assert_eq!(addr3, (base_addr + 14832u64).unwrap());
4432            let (range3, _) = get_range(base_addr).expect("base_addr should be mapped");
4433            assert_eq!(range3.start, base_addr);
4434            assert_eq!(range3.end, addr3.round_up(*PAGE_SIZE).unwrap());
4435
4436            // Shrink the program break close to zero and observe the smaller mapping.
4437            let addr4 = mm
4438                .set_brk(locked, &current_task, (base_addr + 3u64).unwrap())
4439                .expect("failed to drastically shrink brk");
4440            assert_eq!(addr4, (base_addr + 3u64).unwrap());
4441            let (range4, _) = get_range(base_addr).expect("base_addr should be mapped");
4442            assert_eq!(range4.start, base_addr);
4443            assert_eq!(range4.end, addr4.round_up(*PAGE_SIZE).unwrap());
4444
4445            // Shrink the program break to zero and observe that the mapping is entirely gone.
4446            let addr5 = mm
4447                .set_brk(locked, &current_task, base_addr)
4448                .expect("failed to drastically shrink brk to zero");
4449            assert_eq!(addr5, base_addr);
4450            assert_eq!(get_range(base_addr), None);
4451        })
4452        .await;
4453    }
4454
4455    #[::fuchsia::test]
4456    async fn test_mm_exec() {
4457        spawn_kernel_and_run(async |locked, current_task| {
4458            let mm = current_task.mm().unwrap();
4459
4460            let has = |addr: UserAddress| -> bool {
4461                let state = mm.state.read();
4462                state.mappings.get(addr).is_some()
4463            };
4464
4465            let brk_addr = mm
4466                .set_brk(locked, &current_task, UserAddress::default())
4467                .expect("failed to set initial program break");
4468            assert!(brk_addr > UserAddress::default());
4469
4470            // Allocate a single page of BRK space, so that the break base address is mapped.
4471            let _ = mm
4472                .set_brk(locked, &current_task, (brk_addr + 1u64).unwrap())
4473                .expect("failed to grow program break");
4474            assert!(has(brk_addr));
4475
4476            let mapped_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
4477            assert!(mapped_addr > UserAddress::default());
4478            assert!(has(mapped_addr));
4479
4480            let node = current_task.lookup_path_from_root(locked, "/".into()).unwrap();
4481            let new_mm = mm.exec(node, ArchWidth::Arch64).expect("failed to exec memory manager");
4482            current_task.mm.update(Some(new_mm));
4483
4484            assert!(!has(brk_addr));
4485            assert!(!has(mapped_addr));
4486
4487            // Check that the old addresses are actually available for mapping.
4488            let brk_addr2 = map_memory(locked, &current_task, brk_addr, *PAGE_SIZE);
4489            assert_eq!(brk_addr, brk_addr2);
4490            let mapped_addr2 = map_memory(locked, &current_task, mapped_addr, *PAGE_SIZE);
4491            assert_eq!(mapped_addr, mapped_addr2);
4492        })
4493        .await;
4494    }
4495
4496    #[::fuchsia::test]
4497    async fn test_get_contiguous_mappings_at() {
4498        spawn_kernel_and_run(async |locked, current_task| {
4499            let mm = current_task.mm().unwrap();
4500
4501            // Create four one-page mappings with a hole between the third one and the fourth one.
4502            let page_size = *PAGE_SIZE as usize;
4503            let addr_a = (mm.base_addr + 10 * page_size).unwrap();
4504            let addr_b = (mm.base_addr + 11 * page_size).unwrap();
4505            let addr_c = (mm.base_addr + 12 * page_size).unwrap();
4506            let addr_d = (mm.base_addr + 14 * page_size).unwrap();
4507            assert_eq!(map_memory(locked, &current_task, addr_a, *PAGE_SIZE), addr_a);
4508            assert_eq!(map_memory(locked, &current_task, addr_b, *PAGE_SIZE), addr_b);
4509            assert_eq!(map_memory(locked, &current_task, addr_c, *PAGE_SIZE), addr_c);
4510            assert_eq!(map_memory(locked, &current_task, addr_d, *PAGE_SIZE), addr_d);
4511
4512            {
4513                let mm_state = mm.state.read();
4514                // Verify that requesting an unmapped address returns an empty iterator.
4515                assert_equal(
4516                    mm_state.get_contiguous_mappings_at((addr_a - 100u64).unwrap(), 50).unwrap(),
4517                    vec![],
4518                );
4519                assert_equal(
4520                    mm_state.get_contiguous_mappings_at((addr_a - 100u64).unwrap(), 200).unwrap(),
4521                    vec![],
4522                );
4523
4524                // Verify that requesting zero bytes returns an empty iterator.
4525                assert_equal(mm_state.get_contiguous_mappings_at(addr_a, 0).unwrap(), vec![]);
4526
4527                // Verify errors.
4528                assert_eq!(
4529                    mm_state
4530                        .get_contiguous_mappings_at(UserAddress::from(100), usize::MAX)
4531                        .err()
4532                        .unwrap(),
4533                    errno!(EFAULT)
4534                );
4535                assert_eq!(
4536                    mm_state
4537                        .get_contiguous_mappings_at((mm_state.max_address() + 1u64).unwrap(), 0)
4538                        .err()
4539                        .unwrap(),
4540                    errno!(EFAULT)
4541                );
4542            }
4543
4544            assert_eq!(mm.get_mapping_count(), 2);
4545            let mm_state = mm.state.read();
4546            let (map_a, map_b) = {
4547                let mut it = mm_state.mappings.iter();
4548                (it.next().unwrap().1, it.next().unwrap().1)
4549            };
4550
4551            assert_equal(
4552                mm_state.get_contiguous_mappings_at(addr_a, page_size).unwrap(),
4553                vec![(map_a, page_size)],
4554            );
4555
4556            assert_equal(
4557                mm_state.get_contiguous_mappings_at(addr_a, page_size / 2).unwrap(),
4558                vec![(map_a, page_size / 2)],
4559            );
4560
4561            assert_equal(
4562                mm_state.get_contiguous_mappings_at(addr_a, page_size * 3).unwrap(),
4563                vec![(map_a, page_size * 3)],
4564            );
4565
4566            assert_equal(
4567                mm_state.get_contiguous_mappings_at(addr_b, page_size).unwrap(),
4568                vec![(map_a, page_size)],
4569            );
4570
4571            assert_equal(
4572                mm_state.get_contiguous_mappings_at(addr_d, page_size).unwrap(),
4573                vec![(map_b, page_size)],
4574            );
4575
4576            // Verify that results stop if there is a hole.
4577            assert_equal(
4578                mm_state
4579                    .get_contiguous_mappings_at((addr_a + page_size / 2).unwrap(), page_size * 10)
4580                    .unwrap(),
4581                vec![(map_a, page_size * 2 + page_size / 2)],
4582            );
4583
4584            // Verify that results stop at the last mapped page.
4585            assert_equal(
4586                mm_state.get_contiguous_mappings_at(addr_d, page_size * 10).unwrap(),
4587                vec![(map_b, page_size)],
4588            );
4589        })
4590        .await;
4591    }
4592
4593    #[::fuchsia::test]
4594    async fn test_read_write_crossing_mappings() {
4595        spawn_kernel_and_run(async |locked, current_task| {
4596            let mm = current_task.mm().unwrap();
4597            let ma = current_task.deref();
4598
4599            // Map two contiguous pages at fixed addresses, but backed by distinct mappings.
4600            let page_size = *PAGE_SIZE;
4601            let addr = (mm.base_addr + 10 * page_size).unwrap();
4602            assert_eq!(map_memory(locked, &current_task, addr, page_size), addr);
4603            assert_eq!(
4604                map_memory(locked, &current_task, (addr + page_size).unwrap(), page_size),
4605                (addr + page_size).unwrap()
4606            );
4607            // Mappings get merged since they are baked by the same memory object
4608            assert_eq!(mm.get_mapping_count(), 1);
4609
4610            // Write a pattern crossing our two mappings.
4611            let test_addr = (addr + page_size / 2).unwrap();
4612            let data: Vec<u8> = (0..page_size).map(|i| (i % 256) as u8).collect();
4613            ma.write_memory(test_addr, &data).expect("failed to write test data");
4614
4615            // Read it back.
4616            let data_readback =
4617                ma.read_memory_to_vec(test_addr, data.len()).expect("failed to read test data");
4618            assert_eq!(&data, &data_readback);
4619        })
4620        .await;
4621    }
4622
4623    #[::fuchsia::test]
4624    async fn test_read_write_errors() {
4625        spawn_kernel_and_run(async |locked, current_task| {
4626            let ma = current_task.deref();
4627
4628            let page_size = *PAGE_SIZE;
4629            let addr = map_memory(locked, &current_task, UserAddress::default(), page_size);
4630            let buf = vec![0u8; page_size as usize];
4631
4632            // Verify that accessing data that is only partially mapped is an error.
4633            let partial_addr_before = (addr - page_size / 2).unwrap();
4634            assert_eq!(ma.write_memory(partial_addr_before, &buf), error!(EFAULT));
4635            assert_eq!(ma.read_memory_to_vec(partial_addr_before, buf.len()), error!(EFAULT));
4636            let partial_addr_after = (addr + page_size / 2).unwrap();
4637            assert_eq!(ma.write_memory(partial_addr_after, &buf), error!(EFAULT));
4638            assert_eq!(ma.read_memory_to_vec(partial_addr_after, buf.len()), error!(EFAULT));
4639
4640            // Verify that accessing unmapped memory is an error.
4641            let unmapped_addr = (addr - 10 * page_size).unwrap();
4642            assert_eq!(ma.write_memory(unmapped_addr, &buf), error!(EFAULT));
4643            assert_eq!(ma.read_memory_to_vec(unmapped_addr, buf.len()), error!(EFAULT));
4644
4645            // However, accessing zero bytes in unmapped memory is not an error.
4646            ma.write_memory(unmapped_addr, &[]).expect("failed to write no data");
4647            ma.read_memory_to_vec(unmapped_addr, 0).expect("failed to read no data");
4648        })
4649        .await;
4650    }
4651
4652    #[::fuchsia::test]
4653    async fn test_read_c_string_to_vec_large() {
4654        spawn_kernel_and_run(async |locked, current_task| {
4655            let mm = current_task.mm().unwrap();
4656            let ma = current_task.deref();
4657
4658            let page_size = *PAGE_SIZE;
4659            let max_size = 4 * page_size as usize;
4660            let addr = (mm.base_addr + 10 * page_size).unwrap();
4661
4662            assert_eq!(map_memory(locked, &current_task, addr, max_size as u64), addr);
4663
4664            let mut random_data = vec![0; max_size];
4665            zx::cprng_draw(&mut random_data);
4666            // Remove all NUL bytes.
4667            for i in 0..random_data.len() {
4668                if random_data[i] == 0 {
4669                    random_data[i] = 1;
4670                }
4671            }
4672            random_data[max_size - 1] = 0;
4673
4674            ma.write_memory(addr, &random_data).expect("failed to write test string");
4675            // We should read the same value minus the last byte (NUL char).
4676            assert_eq!(
4677                ma.read_c_string_to_vec(UserCString::new(current_task, addr), max_size).unwrap(),
4678                random_data[..max_size - 1]
4679            );
4680        })
4681        .await;
4682    }
4683
4684    #[::fuchsia::test]
4685    async fn test_read_c_string_to_vec() {
4686        spawn_kernel_and_run(async |locked, current_task| {
4687            let mm = current_task.mm().unwrap();
4688            let ma = current_task.deref();
4689
4690            let page_size = *PAGE_SIZE;
4691            let max_size = 2 * page_size as usize;
4692            let addr = (mm.base_addr + 10 * page_size).unwrap();
4693
4694            // Map a page at a fixed address and write an unterminated string at the end of it.
4695            assert_eq!(map_memory(locked, &current_task, addr, page_size), addr);
4696            let test_str = b"foo!";
4697            let test_addr =
4698                addr.checked_add(page_size as usize).unwrap().checked_sub(test_str.len()).unwrap();
4699            ma.write_memory(test_addr, test_str).expect("failed to write test string");
4700
4701            // Expect error if the string is not terminated.
4702            assert_eq!(
4703                ma.read_c_string_to_vec(UserCString::new(current_task, test_addr), max_size),
4704                error!(ENAMETOOLONG)
4705            );
4706
4707            // Expect success if the string is terminated.
4708            ma.write_memory((addr + (page_size - 1)).unwrap(), b"\0").expect("failed to write nul");
4709            assert_eq!(
4710                ma.read_c_string_to_vec(UserCString::new(current_task, test_addr), max_size)
4711                    .unwrap(),
4712                "foo"
4713            );
4714
4715            // Expect success if the string spans over two mappings.
4716            assert_eq!(
4717                map_memory(locked, &current_task, (addr + page_size).unwrap(), page_size),
4718                (addr + page_size).unwrap()
4719            );
4720            // TODO: Adjacent private anonymous mappings are collapsed. To test this case this test needs to
4721            // provide a backing for the second mapping.
4722            // assert_eq!(mm.get_mapping_count(), 2);
4723            ma.write_memory((addr + (page_size - 1)).unwrap(), b"bar\0")
4724                .expect("failed to write extra chars");
4725            assert_eq!(
4726                ma.read_c_string_to_vec(UserCString::new(current_task, test_addr), max_size)
4727                    .unwrap(),
4728                "foobar",
4729            );
4730
4731            // Expect error if the string exceeds max limit
4732            assert_eq!(
4733                ma.read_c_string_to_vec(UserCString::new(current_task, test_addr), 2),
4734                error!(ENAMETOOLONG)
4735            );
4736
4737            // Expect error if the address is invalid.
4738            assert_eq!(
4739                ma.read_c_string_to_vec(UserCString::null(current_task), max_size),
4740                error!(EFAULT)
4741            );
4742        })
4743        .await;
4744    }
4745
4746    #[::fuchsia::test]
4747    async fn can_read_argv_like_regions() {
4748        spawn_kernel_and_run(async |locked, current_task| {
4749            let ma = current_task.deref();
4750
4751            // Map a page.
4752            let page_size = *PAGE_SIZE;
4753            let addr = map_memory_anywhere(locked, &current_task, page_size);
4754            assert!(!addr.is_null());
4755
4756            // Write an unterminated string.
4757            let mut payload = "first".as_bytes().to_vec();
4758            let mut expected_parses = vec![];
4759            ma.write_memory(addr, &payload).unwrap();
4760
4761            // Expect success if the string is terminated.
4762            expected_parses.push(payload.clone());
4763            payload.push(0);
4764            ma.write_memory(addr, &payload).unwrap();
4765            assert_eq!(
4766                ma.read_nul_delimited_c_string_list(addr, payload.len()).unwrap(),
4767                expected_parses,
4768            );
4769
4770            // Make sure we can parse multiple strings from the same region.
4771            let second = b"second";
4772            payload.extend(second);
4773            payload.push(0);
4774            expected_parses.push(second.to_vec());
4775
4776            let third = b"third";
4777            payload.extend(third);
4778            payload.push(0);
4779            expected_parses.push(third.to_vec());
4780
4781            ma.write_memory(addr, &payload).unwrap();
4782            assert_eq!(
4783                ma.read_nul_delimited_c_string_list(addr, payload.len()).unwrap(),
4784                expected_parses,
4785            );
4786        })
4787        .await;
4788    }
4789
4790    #[::fuchsia::test]
4791    async fn truncate_argv_like_regions() {
4792        spawn_kernel_and_run(async |locked, current_task| {
4793            let ma = current_task.deref();
4794
4795            // Map a page.
4796            let page_size = *PAGE_SIZE;
4797            let addr = map_memory_anywhere(locked, &current_task, page_size);
4798            assert!(!addr.is_null());
4799
4800            let payload = b"first\0second\0third\0";
4801            ma.write_memory(addr, payload).unwrap();
4802            assert_eq!(
4803                ma.read_nul_delimited_c_string_list(addr, payload.len() - 3).unwrap(),
4804                vec![b"first".to_vec(), b"second".to_vec(), b"thi".to_vec()],
4805                "Skipping last three bytes of payload should skip last two bytes of 3rd string"
4806            );
4807        })
4808        .await;
4809    }
4810
4811    #[::fuchsia::test]
4812    async fn test_read_c_string() {
4813        spawn_kernel_and_run(async |locked, current_task| {
4814            let mm = current_task.mm().unwrap();
4815            let ma = current_task.deref();
4816
4817            let page_size = *PAGE_SIZE;
4818            let buf_cap = 2 * page_size as usize;
4819            let mut buf = Vec::with_capacity(buf_cap);
4820            // We can't just use `spare_capacity_mut` because `Vec::with_capacity`
4821            // returns a `Vec` with _at least_ the requested capacity.
4822            let buf = &mut buf.spare_capacity_mut()[..buf_cap];
4823            let addr = (mm.base_addr + 10 * page_size).unwrap();
4824
4825            // Map a page at a fixed address and write an unterminated string at the end of it..
4826            assert_eq!(map_memory(locked, &current_task, addr, page_size), addr);
4827            let test_str = b"foo!";
4828            let test_addr = (addr + (page_size - test_str.len() as u64)).unwrap();
4829            ma.write_memory(test_addr, test_str).expect("failed to write test string");
4830
4831            // Expect error if the string is not terminated.
4832            assert_eq!(
4833                ma.read_c_string(UserCString::new(current_task, test_addr), buf),
4834                error!(ENAMETOOLONG)
4835            );
4836
4837            // Expect success if the string is terminated.
4838            ma.write_memory((addr + (page_size - 1)).unwrap(), b"\0").expect("failed to write nul");
4839            assert_eq!(
4840                ma.read_c_string(UserCString::new(current_task, test_addr), buf).unwrap(),
4841                "foo"
4842            );
4843
4844            // Expect success if the string spans over two mappings.
4845            assert_eq!(
4846                map_memory(locked, &current_task, (addr + page_size).unwrap(), page_size),
4847                (addr + page_size).unwrap()
4848            );
4849            // TODO: To be multiple mappings we need to provide a file backing for the next page or the
4850            // mappings will be collapsed.
4851            //assert_eq!(mm.get_mapping_count(), 2);
4852            ma.write_memory((addr + (page_size - 1)).unwrap(), b"bar\0")
4853                .expect("failed to write extra chars");
4854            assert_eq!(
4855                ma.read_c_string(UserCString::new(current_task, test_addr), buf).unwrap(),
4856                "foobar"
4857            );
4858
4859            // Expect error if the string does not fit in the provided buffer.
4860            assert_eq!(
4861                ma.read_c_string(
4862                    UserCString::new(current_task, test_addr),
4863                    &mut [MaybeUninit::uninit(); 2]
4864                ),
4865                error!(ENAMETOOLONG)
4866            );
4867
4868            // Expect error if the address is invalid.
4869            assert_eq!(ma.read_c_string(UserCString::null(current_task), buf), error!(EFAULT));
4870        })
4871        .await;
4872    }
4873
4874    #[::fuchsia::test]
4875    async fn test_find_next_unused_range() {
4876        spawn_kernel_and_run(async |locked, current_task| {
4877            let mm = current_task.mm().unwrap();
4878
4879            let mmap_top = mm.state.read().find_next_unused_range(0).unwrap().ptr();
4880            let page_size = *PAGE_SIZE as usize;
4881            assert!(mmap_top <= RESTRICTED_ASPACE_HIGHEST_ADDRESS);
4882
4883            // No mappings - top address minus requested size is available
4884            assert_eq!(
4885                mm.state.read().find_next_unused_range(page_size).unwrap(),
4886                UserAddress::from_ptr(mmap_top - page_size)
4887            );
4888
4889            // Fill it.
4890            let addr = UserAddress::from_ptr(mmap_top - page_size);
4891            assert_eq!(map_memory(locked, &current_task, addr, *PAGE_SIZE), addr);
4892
4893            // The next available range is right before the new mapping.
4894            assert_eq!(
4895                mm.state.read().find_next_unused_range(page_size).unwrap(),
4896                UserAddress::from_ptr(addr.ptr() - page_size)
4897            );
4898
4899            // Allocate an extra page before a one-page gap.
4900            let addr2 = UserAddress::from_ptr(addr.ptr() - 2 * page_size);
4901            assert_eq!(map_memory(locked, &current_task, addr2, *PAGE_SIZE), addr2);
4902
4903            // Searching for one-page range still gives the same result
4904            assert_eq!(
4905                mm.state.read().find_next_unused_range(page_size).unwrap(),
4906                UserAddress::from_ptr(addr.ptr() - page_size)
4907            );
4908
4909            // Searching for a bigger range results in the area before the second mapping
4910            assert_eq!(
4911                mm.state.read().find_next_unused_range(2 * page_size).unwrap(),
4912                UserAddress::from_ptr(addr2.ptr() - 2 * page_size)
4913            );
4914
4915            // Searching for more memory than available should fail.
4916            assert_eq!(mm.state.read().find_next_unused_range(mmap_top), None);
4917        })
4918        .await;
4919    }
4920
4921    #[::fuchsia::test]
4922    async fn test_count_placements() {
4923        spawn_kernel_and_run(async |locked, current_task| {
4924            let mm = current_task.mm().unwrap();
4925
4926            // ten-page range
4927            let page_size = *PAGE_SIZE as usize;
4928            let subrange_ten = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
4929                ..UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 10 * page_size);
4930
4931            assert_eq!(
4932                mm.state.read().count_possible_placements(11 * page_size, &subrange_ten),
4933                Some(0)
4934            );
4935            assert_eq!(
4936                mm.state.read().count_possible_placements(10 * page_size, &subrange_ten),
4937                Some(1)
4938            );
4939            assert_eq!(
4940                mm.state.read().count_possible_placements(9 * page_size, &subrange_ten),
4941                Some(2)
4942            );
4943            assert_eq!(
4944                mm.state.read().count_possible_placements(page_size, &subrange_ten),
4945                Some(10)
4946            );
4947
4948            // map 6th page
4949            let addr = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 5 * page_size);
4950            assert_eq!(map_memory(locked, &current_task, addr, *PAGE_SIZE), addr);
4951
4952            assert_eq!(
4953                mm.state.read().count_possible_placements(10 * page_size, &subrange_ten),
4954                Some(0)
4955            );
4956            assert_eq!(
4957                mm.state.read().count_possible_placements(5 * page_size, &subrange_ten),
4958                Some(1)
4959            );
4960            assert_eq!(
4961                mm.state.read().count_possible_placements(4 * page_size, &subrange_ten),
4962                Some(3)
4963            );
4964            assert_eq!(
4965                mm.state.read().count_possible_placements(page_size, &subrange_ten),
4966                Some(9)
4967            );
4968        })
4969        .await;
4970    }
4971
4972    #[::fuchsia::test]
4973    async fn test_pick_placement() {
4974        spawn_kernel_and_run(async |locked, current_task| {
4975            let mm = current_task.mm().unwrap();
4976
4977            let page_size = *PAGE_SIZE as usize;
4978            let subrange_ten = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
4979                ..UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 10 * page_size);
4980
4981            let addr = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 5 * page_size);
4982            assert_eq!(map_memory(locked, &current_task, addr, *PAGE_SIZE), addr);
4983            assert_eq!(
4984                mm.state.read().count_possible_placements(4 * page_size, &subrange_ten),
4985                Some(3)
4986            );
4987
4988            assert_eq!(
4989                mm.state.read().pick_placement(4 * page_size, 0, &subrange_ten),
4990                Some(UserAddress::from_ptr(RESTRICTED_ASPACE_BASE))
4991            );
4992            assert_eq!(
4993                mm.state.read().pick_placement(4 * page_size, 1, &subrange_ten),
4994                Some(UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + page_size))
4995            );
4996            assert_eq!(
4997                mm.state.read().pick_placement(4 * page_size, 2, &subrange_ten),
4998                Some(UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 6 * page_size))
4999            );
5000        })
5001        .await;
5002    }
5003
5004    #[::fuchsia::test]
5005    async fn test_find_random_unused_range() {
5006        spawn_kernel_and_run(async |locked, current_task| {
5007            let mm = current_task.mm().unwrap();
5008
5009            // ten-page range
5010            let page_size = *PAGE_SIZE as usize;
5011            let subrange_ten = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
5012                ..UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 10 * page_size);
5013
5014            for _ in 0..10 {
5015                let addr = mm.state.read().find_random_unused_range(page_size, &subrange_ten);
5016                assert!(addr.is_some());
5017                assert_eq!(
5018                    map_memory(locked, &current_task, addr.unwrap(), *PAGE_SIZE),
5019                    addr.unwrap()
5020                );
5021            }
5022            assert_eq!(mm.state.read().find_random_unused_range(page_size, &subrange_ten), None);
5023        })
5024        .await;
5025    }
5026
5027    #[::fuchsia::test]
5028    async fn test_grows_down_near_aspace_base() {
5029        spawn_kernel_and_run(async |locked, current_task| {
5030            let mm = current_task.mm().unwrap();
5031
5032            let page_count = 10;
5033
5034            let page_size = *PAGE_SIZE as usize;
5035            let addr =
5036                (UserAddress::from_ptr(RESTRICTED_ASPACE_BASE) + page_count * page_size).unwrap();
5037            assert_eq!(
5038                map_memory_with_flags(
5039                    locked,
5040                    &current_task,
5041                    addr,
5042                    page_size as u64,
5043                    MAP_ANONYMOUS | MAP_PRIVATE | MAP_GROWSDOWN
5044                ),
5045                addr
5046            );
5047
5048            let subrange_ten = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)..addr;
5049            assert_eq!(mm.state.read().find_random_unused_range(page_size, &subrange_ten), None);
5050        })
5051        .await;
5052    }
5053
5054    #[::fuchsia::test]
5055    async fn test_unmap_returned_mappings() {
5056        spawn_kernel_and_run(async |locked, current_task| {
5057            let mm = current_task.mm().unwrap();
5058
5059            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
5060
5061            let mut released_mappings = ReleasedMappings::default();
5062            let mut mm_state = mm.state.write();
5063            let unmap_result =
5064                mm_state.unmap(&mm, addr, *PAGE_SIZE as usize, &mut released_mappings);
5065            assert!(unmap_result.is_ok());
5066            assert_eq!(released_mappings.len(), 1);
5067            released_mappings.finalize(mm_state);
5068        })
5069        .await;
5070    }
5071
5072    #[::fuchsia::test]
5073    async fn test_unmap_returns_multiple_mappings() {
5074        spawn_kernel_and_run(async |locked, current_task| {
5075            let mm = current_task.mm().unwrap();
5076
5077            let addr = mm.state.read().find_next_unused_range(3 * *PAGE_SIZE as usize).unwrap();
5078            let addr = map_memory(locked, &current_task, addr, *PAGE_SIZE);
5079            let _ = map_memory(locked, &current_task, (addr + 2 * *PAGE_SIZE).unwrap(), *PAGE_SIZE);
5080
5081            let mut released_mappings = ReleasedMappings::default();
5082            let mut mm_state = mm.state.write();
5083            let unmap_result =
5084                mm_state.unmap(&mm, addr, (*PAGE_SIZE * 3) as usize, &mut released_mappings);
5085            assert!(unmap_result.is_ok());
5086            assert_eq!(released_mappings.len(), 2);
5087            released_mappings.finalize(mm_state);
5088        })
5089        .await;
5090    }
5091
5092    /// Maps two pages in separate mappings next to each other, then unmaps the first page.
5093    /// The second page should not be modified.
5094    #[::fuchsia::test]
5095    async fn test_map_two_unmap_one() {
5096        spawn_kernel_and_run(async |locked, current_task| {
5097            let mm = current_task.mm().unwrap();
5098
5099            // reserve memory for both pages
5100            let addr_reserve =
5101                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
5102            let addr1 = do_mmap(
5103                locked,
5104                &current_task,
5105                addr_reserve,
5106                *PAGE_SIZE as usize,
5107                PROT_READ, // Map read-only to avoid merging of the two mappings
5108                MAP_ANONYMOUS | MAP_SHARED | MAP_FIXED,
5109                FdNumber::from_raw(-1),
5110                0,
5111            )
5112            .expect("failed to mmap");
5113            let addr2 = map_memory_with_flags(
5114                locked,
5115                &current_task,
5116                (addr_reserve + *PAGE_SIZE).unwrap(),
5117                *PAGE_SIZE,
5118                MAP_ANONYMOUS | MAP_SHARED | MAP_FIXED,
5119            );
5120            let state = mm.state.read();
5121            let (range1, _) = state.mappings.get(addr1).expect("mapping");
5122            assert_eq!(range1.start, addr1);
5123            assert_eq!(range1.end, (addr1 + *PAGE_SIZE).unwrap());
5124            let (range2, mapping2) = state.mappings.get(addr2).expect("mapping");
5125            assert_eq!(range2.start, addr2);
5126            assert_eq!(range2.end, (addr2 + *PAGE_SIZE).unwrap());
5127            let original_memory2 = {
5128                match state.get_mapping_backing(mapping2) {
5129                    MappingBacking::Memory(backing) => {
5130                        assert_eq!(backing.memory().get_size(), *PAGE_SIZE);
5131                        backing.memory().clone()
5132                    }
5133                    MappingBacking::PrivateAnonymous => {
5134                        panic!("Unexpected private anonymous mapping")
5135                    }
5136                }
5137            };
5138            std::mem::drop(state);
5139
5140            assert_eq!(mm.unmap(addr1, *PAGE_SIZE as usize), Ok(()));
5141
5142            let state = mm.state.read();
5143
5144            // The first page should be unmapped.
5145            assert!(state.mappings.get(addr1).is_none());
5146
5147            // The second page should remain unchanged.
5148            let (range2, mapping2) = state.mappings.get(addr2).expect("second page");
5149            assert_eq!(range2.start, addr2);
5150            assert_eq!(range2.end, (addr2 + *PAGE_SIZE).unwrap());
5151            match state.get_mapping_backing(mapping2) {
5152                MappingBacking::Memory(backing) => {
5153                    assert_eq!(backing.memory().get_size(), *PAGE_SIZE);
5154                    assert_eq!(original_memory2.get_koid(), backing.memory().get_koid());
5155                }
5156                MappingBacking::PrivateAnonymous => panic!("Unexpected private anonymous mapping"),
5157            }
5158        })
5159        .await;
5160    }
5161
5162    #[::fuchsia::test]
5163    async fn test_read_write_objects() {
5164        spawn_kernel_and_run(async |locked, current_task| {
5165            let ma = current_task.deref();
5166            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5167            let items_ref = UserRef::<i32>::new(addr);
5168
5169            let items_written = vec![0, 2, 3, 7, 1];
5170            ma.write_objects(items_ref, &items_written).expect("Failed to write object array.");
5171
5172            let items_read = ma
5173                .read_objects_to_vec(items_ref, items_written.len())
5174                .expect("Failed to read object array.");
5175
5176            assert_eq!(items_written, items_read);
5177        })
5178        .await;
5179    }
5180
5181    #[::fuchsia::test]
5182    async fn test_read_write_objects_null() {
5183        spawn_kernel_and_run(async |_, current_task| {
5184            let ma = current_task.deref();
5185            let items_ref = UserRef::<i32>::new(UserAddress::default());
5186
5187            let items_written = vec![];
5188            ma.write_objects(items_ref, &items_written)
5189                .expect("Failed to write empty object array.");
5190
5191            let items_read = ma
5192                .read_objects_to_vec(items_ref, items_written.len())
5193                .expect("Failed to read empty object array.");
5194
5195            assert_eq!(items_written, items_read);
5196        })
5197        .await;
5198    }
5199
5200    #[::fuchsia::test]
5201    async fn test_read_object_partial() {
5202        #[derive(Debug, Default, Copy, Clone, KnownLayout, FromBytes, Immutable, PartialEq)]
5203        struct Items {
5204            val: [i32; 4],
5205        }
5206
5207        spawn_kernel_and_run(async |locked, current_task| {
5208            let ma = current_task.deref();
5209            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5210            let items_array_ref = UserRef::<i32>::new(addr);
5211
5212            // Populate some values.
5213            let items_written = vec![75, 23, 51, 98];
5214            ma.write_objects(items_array_ref, &items_written)
5215                .expect("Failed to write object array.");
5216
5217            // Full read of all 4 values.
5218            let items_ref = UserRef::<Items>::new(addr);
5219            let items_read = ma
5220                .read_object_partial(items_ref, std::mem::size_of::<Items>())
5221                .expect("Failed to read object");
5222            assert_eq!(items_written, items_read.val);
5223
5224            // Partial read of the first two.
5225            let items_read = ma.read_object_partial(items_ref, 8).expect("Failed to read object");
5226            assert_eq!(vec![75, 23, 0, 0], items_read.val);
5227
5228            // The API currently allows reading 0 bytes (this could be re-evaluated) so test that does
5229            // the right thing.
5230            let items_read = ma.read_object_partial(items_ref, 0).expect("Failed to read object");
5231            assert_eq!(vec![0, 0, 0, 0], items_read.val);
5232
5233            // Size bigger than the object.
5234            assert_eq!(
5235                ma.read_object_partial(items_ref, std::mem::size_of::<Items>() + 8),
5236                error!(EINVAL)
5237            );
5238
5239            // Bad pointer.
5240            assert_eq!(
5241                ma.read_object_partial(UserRef::<Items>::new(UserAddress::from(1)), 16),
5242                error!(EFAULT)
5243            );
5244        })
5245        .await;
5246    }
5247
5248    #[::fuchsia::test]
5249    async fn test_partial_read() {
5250        spawn_kernel_and_run(async |locked, current_task| {
5251            let mm = current_task.mm().unwrap();
5252            let ma = current_task.deref();
5253
5254            let addr = mm.state.read().find_next_unused_range(2 * *PAGE_SIZE as usize).unwrap();
5255            let addr = map_memory(locked, &current_task, addr, *PAGE_SIZE);
5256            let second_map =
5257                map_memory(locked, &current_task, (addr + *PAGE_SIZE).unwrap(), *PAGE_SIZE);
5258
5259            let bytes = vec![0xf; (*PAGE_SIZE * 2) as usize];
5260            assert!(ma.write_memory(addr, &bytes).is_ok());
5261            let mut state = mm.state.write();
5262            let mut released_mappings = ReleasedMappings::default();
5263            state
5264                .protect(
5265                    ma,
5266                    second_map,
5267                    *PAGE_SIZE as usize,
5268                    ProtectionFlags::empty(),
5269                    &mut released_mappings,
5270                )
5271                .unwrap();
5272            released_mappings.finalize(state);
5273            assert_eq!(
5274                ma.read_memory_partial_to_vec(addr, bytes.len()).unwrap().len(),
5275                *PAGE_SIZE as usize,
5276            );
5277        })
5278        .await;
5279    }
5280
5281    fn map_memory_growsdown<L>(
5282        locked: &mut Locked<L>,
5283        current_task: &CurrentTask,
5284        length: u64,
5285    ) -> UserAddress
5286    where
5287        L: LockEqualOrBefore<FileOpsCore>,
5288    {
5289        map_memory_with_flags(
5290            locked,
5291            current_task,
5292            UserAddress::default(),
5293            length,
5294            MAP_ANONYMOUS | MAP_PRIVATE | MAP_GROWSDOWN,
5295        )
5296    }
5297
5298    #[::fuchsia::test]
5299    async fn test_grow_mapping_empty_mm() {
5300        spawn_kernel_and_run(async |_, current_task| {
5301            let mm = current_task.mm().unwrap();
5302
5303            let addr = UserAddress::from(0x100000);
5304
5305            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, false), Ok(false));
5306        })
5307        .await;
5308    }
5309
5310    #[::fuchsia::test]
5311    async fn test_grow_inside_mapping() {
5312        spawn_kernel_and_run(async |locked, current_task| {
5313            let mm = current_task.mm().unwrap();
5314
5315            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5316
5317            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, false), Ok(false));
5318        })
5319        .await;
5320    }
5321
5322    #[::fuchsia::test]
5323    async fn test_grow_write_fault_inside_read_only_mapping() {
5324        spawn_kernel_and_run(async |locked, current_task| {
5325            let mm = current_task.mm().unwrap();
5326
5327            let addr = do_mmap(
5328                locked,
5329                &current_task,
5330                UserAddress::default(),
5331                *PAGE_SIZE as usize,
5332                PROT_READ,
5333                MAP_ANONYMOUS | MAP_PRIVATE,
5334                FdNumber::from_raw(-1),
5335                0,
5336            )
5337            .expect("Could not map memory");
5338
5339            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, false), Ok(false));
5340            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, true), Ok(false));
5341        })
5342        .await;
5343    }
5344
5345    #[::fuchsia::test]
5346    async fn test_grow_fault_inside_prot_none_mapping() {
5347        spawn_kernel_and_run(async |locked, current_task| {
5348            let mm = current_task.mm().unwrap();
5349
5350            let addr = do_mmap(
5351                locked,
5352                &current_task,
5353                UserAddress::default(),
5354                *PAGE_SIZE as usize,
5355                PROT_NONE,
5356                MAP_ANONYMOUS | MAP_PRIVATE,
5357                FdNumber::from_raw(-1),
5358                0,
5359            )
5360            .expect("Could not map memory");
5361
5362            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, false), Ok(false));
5363            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, true), Ok(false));
5364        })
5365        .await;
5366    }
5367
5368    #[::fuchsia::test]
5369    async fn test_grow_below_mapping() {
5370        spawn_kernel_and_run(async |locked, current_task| {
5371            let mm = current_task.mm().unwrap();
5372
5373            let addr = map_memory_growsdown(locked, &current_task, *PAGE_SIZE) - *PAGE_SIZE;
5374
5375            assert_matches!(mm.extend_growsdown_mapping_to_address(addr.unwrap(), false), Ok(true));
5376        })
5377        .await;
5378    }
5379
5380    #[::fuchsia::test]
5381    async fn test_grow_above_mapping() {
5382        spawn_kernel_and_run(async |locked, current_task| {
5383            let mm = current_task.mm().unwrap();
5384
5385            let addr = map_memory_growsdown(locked, &current_task, *PAGE_SIZE) + *PAGE_SIZE;
5386
5387            assert_matches!(
5388                mm.extend_growsdown_mapping_to_address(addr.unwrap(), false),
5389                Ok(false)
5390            );
5391        })
5392        .await;
5393    }
5394
5395    #[::fuchsia::test]
5396    async fn test_grow_write_fault_below_read_only_mapping() {
5397        spawn_kernel_and_run(async |locked, current_task| {
5398            let mm = current_task.mm().unwrap();
5399
5400            let mapped_addr = map_memory_growsdown(locked, &current_task, *PAGE_SIZE);
5401
5402            mm.protect(&current_task, mapped_addr, *PAGE_SIZE as usize, ProtectionFlags::READ)
5403                .unwrap();
5404
5405            assert_matches!(
5406                mm.extend_growsdown_mapping_to_address((mapped_addr - *PAGE_SIZE).unwrap(), true),
5407                Ok(false)
5408            );
5409
5410            assert_eq!(mm.get_mapping_count(), 1);
5411        })
5412        .await;
5413    }
5414
5415    #[::fuchsia::test]
5416    async fn test_snapshot_paged_memory() {
5417        use zx::sys::zx_page_request_command_t::ZX_PAGER_VMO_READ;
5418
5419        spawn_kernel_and_run(async |locked, current_task| {
5420            let kernel = current_task.kernel();
5421            let mm = current_task.mm().unwrap();
5422            let ma = current_task.deref();
5423
5424            let port = Arc::new(zx::Port::create());
5425            let port_clone = port.clone();
5426            let pager =
5427                Arc::new(zx::Pager::create(zx::PagerOptions::empty()).expect("create failed"));
5428            let pager_clone = pager.clone();
5429
5430            const VMO_SIZE: u64 = 128 * 1024;
5431            let vmo = Arc::new(
5432                pager
5433                    .create_vmo(zx::VmoOptions::RESIZABLE, &port, 1, VMO_SIZE)
5434                    .expect("create_vmo failed"),
5435            );
5436            let vmo_clone = vmo.clone();
5437
5438            // Create a thread to service the port where we will receive pager requests.
5439            let thread = std::thread::spawn(move || {
5440                loop {
5441                    let packet =
5442                        port_clone.wait(zx::MonotonicInstant::INFINITE).expect("wait failed");
5443                    match packet.contents() {
5444                        zx::PacketContents::Pager(contents) => {
5445                            if contents.command() == ZX_PAGER_VMO_READ {
5446                                let range = contents.range();
5447                                let source_vmo = zx::Vmo::create(range.end - range.start)
5448                                    .expect("create failed");
5449                                pager_clone
5450                                    .supply_pages(&vmo_clone, range, &source_vmo, 0)
5451                                    .expect("supply_pages failed");
5452                            }
5453                        }
5454                        zx::PacketContents::User(_) => break,
5455                        _ => {}
5456                    }
5457                }
5458            });
5459
5460            let child_vmo = vmo
5461                .create_child(zx::VmoChildOptions::SNAPSHOT_AT_LEAST_ON_WRITE, 0, VMO_SIZE)
5462                .unwrap();
5463
5464            // Write something to the source VMO.
5465            vmo.write(b"foo", 0).expect("write failed");
5466
5467            let prot_flags = ProtectionFlags::READ | ProtectionFlags::WRITE;
5468            let addr = mm
5469                .map_memory(
5470                    DesiredAddress::Any,
5471                    Arc::new(MemoryObject::from(child_vmo)),
5472                    0,
5473                    VMO_SIZE as usize,
5474                    prot_flags,
5475                    Access::rwx(),
5476                    MappingOptions::empty(),
5477                    MappingName::None,
5478                )
5479                .expect("map failed");
5480
5481            let target = create_task(locked, &kernel, "another-task");
5482            mm.snapshot_to(locked, &target.mm().unwrap()).expect("snapshot_to failed");
5483
5484            // Make sure it has what we wrote.
5485            let buf = target.read_memory_to_vec(addr, 3).expect("read_memory failed");
5486            assert_eq!(buf, b"foo");
5487
5488            // Write something to both source and target and make sure they are forked.
5489            ma.write_memory(addr, b"bar").expect("write_memory failed");
5490
5491            let buf = target.read_memory_to_vec(addr, 3).expect("read_memory failed");
5492            assert_eq!(buf, b"foo");
5493
5494            target.write_memory(addr, b"baz").expect("write_memory failed");
5495            let buf = ma.read_memory_to_vec(addr, 3).expect("read_memory failed");
5496            assert_eq!(buf, b"bar");
5497
5498            let buf = target.read_memory_to_vec(addr, 3).expect("read_memory failed");
5499            assert_eq!(buf, b"baz");
5500
5501            port.queue(&zx::Packet::from_user_packet(0, 0, zx::UserPacket::from_u8_array([0; 32])))
5502                .unwrap();
5503            thread.join().unwrap();
5504        })
5505        .await;
5506    }
5507
5508    #[::fuchsia::test]
5509    async fn test_set_vma_name() {
5510        spawn_kernel_and_run(async |locked, mut current_task| {
5511            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5512
5513            let vma_name = "vma name";
5514            current_task.write_memory(name_addr, vma_name.as_bytes()).unwrap();
5515
5516            let mapping_addr =
5517                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5518
5519            sys_prctl(
5520                locked,
5521                &mut current_task,
5522                PR_SET_VMA,
5523                PR_SET_VMA_ANON_NAME as u64,
5524                mapping_addr.ptr() as u64,
5525                *PAGE_SIZE,
5526                name_addr.ptr() as u64,
5527            )
5528            .unwrap();
5529
5530            assert_eq!(
5531                *current_task.mm().unwrap().get_mapping_name(mapping_addr).unwrap().unwrap(),
5532                vma_name
5533            );
5534        })
5535        .await;
5536    }
5537
5538    #[::fuchsia::test]
5539    async fn test_set_vma_name_adjacent_mappings() {
5540        spawn_kernel_and_run(async |locked, mut current_task| {
5541            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5542            current_task
5543                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
5544                .unwrap();
5545
5546            let first_mapping_addr =
5547                map_memory(locked, &current_task, UserAddress::default(), 2 * *PAGE_SIZE);
5548            let second_mapping_addr = map_memory_with_flags(
5549                locked,
5550                &current_task,
5551                (first_mapping_addr + *PAGE_SIZE).unwrap(),
5552                *PAGE_SIZE,
5553                MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
5554            );
5555
5556            assert_eq!((first_mapping_addr + *PAGE_SIZE).unwrap(), second_mapping_addr);
5557
5558            sys_prctl(
5559                locked,
5560                &mut current_task,
5561                PR_SET_VMA,
5562                PR_SET_VMA_ANON_NAME as u64,
5563                first_mapping_addr.ptr() as u64,
5564                2 * *PAGE_SIZE,
5565                name_addr.ptr() as u64,
5566            )
5567            .unwrap();
5568
5569            {
5570                let mm = current_task.mm().unwrap();
5571                let state = mm.state.read();
5572
5573                // The name should apply to both mappings.
5574                let (_, mapping) = state.mappings.get(first_mapping_addr).unwrap();
5575                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
5576
5577                let (_, mapping) = state.mappings.get(second_mapping_addr).unwrap();
5578                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
5579            }
5580        })
5581        .await;
5582    }
5583
5584    #[::fuchsia::test]
5585    async fn test_set_vma_name_beyond_end() {
5586        spawn_kernel_and_run(async |locked, mut current_task| {
5587            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5588            current_task
5589                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
5590                .unwrap();
5591
5592            let mapping_addr =
5593                map_memory(locked, &current_task, UserAddress::default(), 2 * *PAGE_SIZE);
5594
5595            let second_page = (mapping_addr + *PAGE_SIZE).unwrap();
5596            current_task.mm().unwrap().unmap(second_page, *PAGE_SIZE as usize).unwrap();
5597
5598            // This should fail with ENOMEM since it extends past the end of the mapping into unmapped memory.
5599            assert_eq!(
5600                sys_prctl(
5601                    locked,
5602                    &mut current_task,
5603                    PR_SET_VMA,
5604                    PR_SET_VMA_ANON_NAME as u64,
5605                    mapping_addr.ptr() as u64,
5606                    2 * *PAGE_SIZE,
5607                    name_addr.ptr() as u64,
5608                ),
5609                error!(ENOMEM)
5610            );
5611
5612            // Despite returning an error, the prctl should still assign a name to the region at the start of the region.
5613            {
5614                let mm = current_task.mm().unwrap();
5615                let state = mm.state.read();
5616
5617                let (_, mapping) = state.mappings.get(mapping_addr).unwrap();
5618                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
5619            }
5620        })
5621        .await;
5622    }
5623
5624    #[::fuchsia::test]
5625    async fn test_set_vma_name_before_start() {
5626        spawn_kernel_and_run(async |locked, mut current_task| {
5627            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5628            current_task
5629                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
5630                .unwrap();
5631
5632            let mapping_addr =
5633                map_memory(locked, &current_task, UserAddress::default(), 2 * *PAGE_SIZE);
5634
5635            let second_page = (mapping_addr + *PAGE_SIZE).unwrap();
5636            current_task.mm().unwrap().unmap(mapping_addr, *PAGE_SIZE as usize).unwrap();
5637
5638            // This should fail with ENOMEM since the start of the range is in unmapped memory.
5639            assert_eq!(
5640                sys_prctl(
5641                    locked,
5642                    &mut current_task,
5643                    PR_SET_VMA,
5644                    PR_SET_VMA_ANON_NAME as u64,
5645                    mapping_addr.ptr() as u64,
5646                    2 * *PAGE_SIZE,
5647                    name_addr.ptr() as u64,
5648                ),
5649                error!(ENOMEM)
5650            );
5651
5652            // Unlike a range which starts within a mapping and extends past the end, this should not assign
5653            // a name to any mappings.
5654            {
5655                let mm = current_task.mm().unwrap();
5656                let state = mm.state.read();
5657
5658                let (_, mapping) = state.mappings.get(second_page).unwrap();
5659                assert_eq!(mapping.name(), MappingName::None);
5660            }
5661        })
5662        .await;
5663    }
5664
5665    #[::fuchsia::test]
5666    async fn test_set_vma_name_partial() {
5667        spawn_kernel_and_run(async |locked, mut current_task| {
5668            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5669            current_task
5670                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
5671                .unwrap();
5672
5673            let mapping_addr =
5674                map_memory(locked, &current_task, UserAddress::default(), 3 * *PAGE_SIZE);
5675
5676            assert_eq!(
5677                sys_prctl(
5678                    locked,
5679                    &mut current_task,
5680                    PR_SET_VMA,
5681                    PR_SET_VMA_ANON_NAME as u64,
5682                    (mapping_addr + *PAGE_SIZE).unwrap().ptr() as u64,
5683                    *PAGE_SIZE,
5684                    name_addr.ptr() as u64,
5685                ),
5686                Ok(starnix_syscalls::SUCCESS)
5687            );
5688
5689            // This should split the mapping into 3 pieces with the second piece having the name "foo"
5690            {
5691                let mm = current_task.mm().unwrap();
5692                let state = mm.state.read();
5693
5694                let (_, mapping) = state.mappings.get(mapping_addr).unwrap();
5695                assert_eq!(mapping.name(), MappingName::None);
5696
5697                let (_, mapping) =
5698                    state.mappings.get((mapping_addr + *PAGE_SIZE).unwrap()).unwrap();
5699                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
5700
5701                let (_, mapping) =
5702                    state.mappings.get((mapping_addr + (2 * *PAGE_SIZE)).unwrap()).unwrap();
5703                assert_eq!(mapping.name(), MappingName::None);
5704            }
5705        })
5706        .await;
5707    }
5708
5709    #[::fuchsia::test]
5710    async fn test_preserve_name_snapshot() {
5711        spawn_kernel_and_run(async |locked, mut current_task| {
5712            let kernel = current_task.kernel().clone();
5713            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5714            current_task
5715                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
5716                .unwrap();
5717
5718            let mapping_addr =
5719                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5720
5721            assert_eq!(
5722                sys_prctl(
5723                    locked,
5724                    &mut current_task,
5725                    PR_SET_VMA,
5726                    PR_SET_VMA_ANON_NAME as u64,
5727                    mapping_addr.ptr() as u64,
5728                    *PAGE_SIZE,
5729                    name_addr.ptr() as u64,
5730                ),
5731                Ok(starnix_syscalls::SUCCESS)
5732            );
5733
5734            let target = create_task(locked, &kernel, "another-task");
5735            current_task
5736                .mm()
5737                .unwrap()
5738                .snapshot_to(locked, &target.mm().unwrap())
5739                .expect("snapshot_to failed");
5740
5741            {
5742                let mm = target.mm().unwrap();
5743                let state = mm.state.read();
5744
5745                let (_, mapping) = state.mappings.get(mapping_addr).unwrap();
5746                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
5747            }
5748        })
5749        .await;
5750    }
5751}