Skip to main content

starnix_core/mm/
memory_manager.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::barrier::{BarrierType, system_barrier};
6use crate::mm::mapping::MappingBackingMemory;
7use crate::mm::memory::MemoryObject;
8use crate::mm::memory_accessor::{MemoryAccessor, TaskMemoryAccessor};
9use crate::mm::private_anonymous_memory_manager::PrivateAnonymousMemoryManager;
10use crate::mm::{
11    FaultRegisterMode, FutexTable, InflightVmsplicedPayloads, MapInfoCache, Mapping,
12    MappingBacking, MappingFlags, MappingMode, MappingName, MappingNameRef, MlockPinFlavor,
13    PrivateFutexKey, ProtectionFlags, UserFault, VMEX_RESOURCE, VmsplicePayload,
14    VmsplicePayloadSegment, read_to_array,
15};
16use crate::security;
17use crate::signals::{SignalDetail, SignalInfo};
18use crate::task::{CurrentTask, ExceptionResult, PageFaultExceptionReport, Task};
19use crate::vfs::aio::AioContext;
20use crate::vfs::pseudo::dynamic_file::{
21    DynamicFile, DynamicFileBuf, DynamicFileSource, SequenceFileSource,
22};
23use crate::vfs::{FsString, NamespaceNode};
24use anyhow::{Error, anyhow};
25use bitflags::bitflags;
26use flyweights::FlyByteStr;
27use linux_uapi::BUS_ADRERR;
28use memory_pinning::PinnedMapping;
29use range_map::RangeMap;
30use smallvec::SmallVec;
31use starnix_ext::map_ext::EntryExt;
32use starnix_lifecycle::DropNotifier;
33use starnix_logging::{
34    CATEGORY_STARNIX_MM, impossible_error, log_error, log_warn, trace_duration, track_stub,
35};
36use starnix_sync::{
37    LockBefore, Locked, MmDumpable, OrderedMutex, RwLock, RwLockWriteGuard, ThreadGroupLimits,
38    Unlocked, UserFaultInner,
39};
40use starnix_types::arch::ArchWidth;
41use starnix_types::futex_address::FutexAddress;
42use starnix_types::math::{round_down_to_system_page_size, round_up_to_system_page_size};
43use starnix_types::ownership::{TempRef, WeakRef};
44use starnix_types::user_buffer::{UserBuffer, UserBuffers};
45use starnix_uapi::auth::CAP_IPC_LOCK;
46use starnix_uapi::errors::Errno;
47use starnix_uapi::file_mode::Access;
48use starnix_uapi::range_ext::RangeExt;
49use starnix_uapi::resource_limits::Resource;
50use starnix_uapi::restricted_aspace::{
51    RESTRICTED_ASPACE_BASE, RESTRICTED_ASPACE_HIGHEST_ADDRESS, RESTRICTED_ASPACE_RANGE,
52    RESTRICTED_ASPACE_SIZE,
53};
54use starnix_uapi::signals::{SIGBUS, SIGSEGV};
55use starnix_uapi::user_address::{ArchSpecific, UserAddress};
56use starnix_uapi::{
57    MADV_COLD, MADV_COLLAPSE, MADV_DODUMP, MADV_DOFORK, MADV_DONTDUMP, MADV_DONTFORK,
58    MADV_DONTNEED, MADV_DONTNEED_LOCKED, MADV_FREE, MADV_HUGEPAGE, MADV_HWPOISON, MADV_KEEPONFORK,
59    MADV_MERGEABLE, MADV_NOHUGEPAGE, MADV_NORMAL, MADV_PAGEOUT, MADV_POPULATE_READ, MADV_RANDOM,
60    MADV_REMOVE, MADV_SEQUENTIAL, MADV_SOFT_OFFLINE, MADV_UNMERGEABLE, MADV_WILLNEED,
61    MADV_WIPEONFORK, MREMAP_DONTUNMAP, MREMAP_FIXED, MREMAP_MAYMOVE, SI_KERNEL, errno, error,
62    from_status_like_fdio,
63};
64use std::collections::HashMap;
65use std::mem::MaybeUninit;
66use std::ops::{ControlFlow, Deref, DerefMut, Range, RangeBounds};
67use std::sync::{Arc, LazyLock, Weak};
68use syncio::zxio::zxio_default_maybe_faultable_copy;
69use zerocopy::IntoBytes;
70use zx::{Rights, VmoChildOptions};
71
72pub const ZX_VM_SPECIFIC_OVERWRITE: zx::VmarFlags =
73    zx::VmarFlags::from_bits_retain(zx::VmarFlagsExtended::SPECIFIC_OVERWRITE.bits());
74
75// We do not create shared processes in unit tests.
76pub(crate) const UNIFIED_ASPACES_ENABLED: bool = cfg!(not(test));
77
78/// Initializes the usercopy utilities.
79///
80/// It is useful to explicitly call this so that the usercopy is initialized
81/// at a known instant. For example, Starnix may want to make sure the usercopy
82/// thread created to support user copying is associated to the Starnix process
83/// and not a restricted-mode process.
84pub fn init_usercopy() {
85    // This call lazily initializes the `Usercopy` instance.
86    let _ = usercopy();
87}
88
89thread_local! {
90    /// The last mapping generation seen by this thread.
91    /// Used to prevent infinite loops in page fault handling.
92    static LAST_SEEN_MAPPING_GENERATION: std::cell::Cell<u64> = const { std::cell::Cell::new(0) };
93}
94
95pub const GUARD_PAGE_COUNT_FOR_GROWSDOWN_MAPPINGS: usize = 256;
96
97#[cfg(target_arch = "x86_64")]
98const ASLR_RANDOM_BITS: usize = 27;
99
100#[cfg(target_arch = "aarch64")]
101const ASLR_RANDOM_BITS: usize = 28;
102
103#[cfg(target_arch = "riscv64")]
104const ASLR_RANDOM_BITS: usize = 18;
105
106/// Number of bits of entropy for processes running in 32 bits mode.
107const ASLR_32_RANDOM_BITS: usize = 8;
108
109// The biggest we expect stack to be; increase as needed
110// TODO(https://fxbug.dev/322874791): Once setting RLIMIT_STACK is implemented, we should use it.
111const MAX_STACK_SIZE: usize = 512 * 1024 * 1024;
112
113// Value to report temporarily as the VM RSS HWM.
114// TODO(https://fxbug.dev/396221597): Need support from the kernel to track the committed bytes high
115// water mark.
116const STUB_VM_RSS_HWM: usize = 2 * 1024 * 1024;
117
118fn usercopy() -> Option<&'static usercopy::Usercopy> {
119    static USERCOPY: LazyLock<Option<usercopy::Usercopy>> = LazyLock::new(|| {
120        // We do not create shared processes in unit tests.
121        if UNIFIED_ASPACES_ENABLED {
122            // ASUMPTION: All Starnix managed Linux processes have the same
123            // restricted mode address range.
124            Some(usercopy::Usercopy::new(RESTRICTED_ASPACE_RANGE).unwrap())
125        } else {
126            None
127        }
128    });
129
130    LazyLock::force(&USERCOPY).as_ref()
131}
132
133/// Provides an implementation for zxio's `zxio_maybe_faultable_copy` that supports
134/// catching faults.
135///
136/// See zxio's `zxio_maybe_faultable_copy` documentation for more details.
137///
138/// # Safety
139///
140/// Only one of `src`/`dest` may be an address to a buffer owned by user/restricted-mode
141/// (`ret_dest` indicates whether the user-owned buffer is `dest` when `true`).
142/// The other must be a valid Starnix/normal-mode buffer that will never cause a fault
143/// when the first `count` bytes are read/written.
144#[unsafe(no_mangle)]
145pub unsafe fn zxio_maybe_faultable_copy_impl(
146    dest: *mut u8,
147    src: *const u8,
148    count: usize,
149    ret_dest: bool,
150) -> bool {
151    if let Some(usercopy) = usercopy() {
152        #[allow(clippy::undocumented_unsafe_blocks, reason = "2024 edition migration")]
153        let ret = unsafe { usercopy.raw_hermetic_copy(dest, src, count, ret_dest) };
154        ret == count
155    } else {
156        #[allow(clippy::undocumented_unsafe_blocks, reason = "2024 edition migration")]
157        unsafe {
158            zxio_default_maybe_faultable_copy(dest, src, count, ret_dest)
159        }
160    }
161}
162
163pub static PAGE_SIZE: LazyLock<u64> = LazyLock::new(|| zx::system_get_page_size() as u64);
164
165bitflags! {
166    #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
167    pub struct MappingOptions: u16 {
168      const SHARED      = 1 << 0;
169      const ANONYMOUS   = 1 << 1;
170      const LOWER_32BIT = 1 << 2;
171      const GROWSDOWN   = 1 << 3;
172      const ELF_BINARY  = 1 << 4;
173      const DONTFORK    = 1 << 5;
174      const WIPEONFORK  = 1 << 6;
175      const DONT_SPLIT  = 1 << 7;
176      const DONT_EXPAND = 1 << 8;
177      const POPULATE    = 1 << 9;
178    }
179}
180
181bitflags! {
182    #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
183    pub struct MremapFlags: u32 {
184        const MAYMOVE = MREMAP_MAYMOVE;
185        const FIXED = MREMAP_FIXED;
186        const DONTUNMAP = MREMAP_DONTUNMAP;
187    }
188}
189
190bitflags! {
191    #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
192    pub struct MsyncFlags: u32 {
193        const ASYNC = starnix_uapi::MS_ASYNC;
194        const INVALIDATE = starnix_uapi::MS_INVALIDATE;
195        const SYNC = starnix_uapi::MS_SYNC;
196    }
197}
198
199const PROGRAM_BREAK_LIMIT: u64 = 64 * 1024 * 1024;
200
201#[derive(Debug, Clone, Eq, PartialEq)]
202struct ProgramBreak {
203    // These base address at which the data segment is mapped.
204    base: UserAddress,
205
206    // The current program break.
207    //
208    // The addresses from [base, current.round_up(*PAGE_SIZE)) are mapped into the
209    // client address space from the underlying |memory|.
210    current: UserAddress,
211}
212
213/// The policy about whether the address space can be dumped.
214#[derive(Debug, Clone, Copy, Eq, PartialEq)]
215pub enum DumpPolicy {
216    /// The address space cannot be dumped.
217    ///
218    /// Corresponds to SUID_DUMP_DISABLE.
219    Disable,
220
221    /// The address space can be dumped.
222    ///
223    /// Corresponds to SUID_DUMP_USER.
224    User,
225}
226
227// Supported types of membarriers.
228pub enum MembarrierType {
229    Memory,   // MEMBARRIER_CMD_GLOBAL, etc
230    SyncCore, // MEMBARRIER_CMD_..._SYNC_CORE
231}
232
233// Tracks the types of membarriers this address space is registered to receive.
234#[derive(Default, Clone)]
235struct MembarrierRegistrations {
236    memory: bool,
237    sync_core: bool,
238}
239
240#[derive(Default)]
241struct Mappings {
242    /// The mappings record which object backs each address.
243    map: RangeMap<UserAddress, Mapping>,
244
245    /// Generation counter for mappings. Incremented on any modification to `mappings`.
246    ///
247    /// This is used to detect stale mappings in `handle_page_fault`.
248    generation: u64,
249}
250
251impl Deref for Mappings {
252    type Target = RangeMap<UserAddress, Mapping>;
253
254    fn deref(&self) -> &Self::Target {
255        &self.map
256    }
257}
258
259impl DerefMut for Mappings {
260    fn deref_mut(&mut self) -> &mut Self::Target {
261        self.generation = self.generation.wrapping_add(1);
262        &mut self.map
263    }
264}
265
266pub struct MemoryManagerState {
267    /// The memory mappings currently used by this address space.
268    mappings: Mappings,
269
270    /// UserFaults registered with this memory manager.
271    userfaultfds: Vec<Weak<UserFault>>,
272
273    /// Shadow mappings for mlock()'d pages.
274    ///
275    /// Used for MlockPinFlavor::ShadowProcess to keep track of when we need to unmap
276    /// memory from the shadow process.
277    shadow_mappings_for_mlock: RangeMap<UserAddress, Arc<PinnedMapping>>,
278
279    forkable_state: MemoryManagerForkableState,
280}
281
282// 64k under the 4GB
283const LOWER_4GB_LIMIT: UserAddress = UserAddress::const_from(0xffff_0000);
284
285#[derive(Default, Clone)]
286pub struct MemoryManagerForkableState {
287    /// State for the brk and sbrk syscalls.
288    brk: Option<ProgramBreak>,
289
290    /// The namespace node that represents the executable associated with this task.
291    executable_node: Option<NamespaceNode>,
292
293    pub stack_size: usize,
294    pub stack_start: UserAddress,
295    pub auxv_start: UserAddress,
296    pub auxv_end: UserAddress,
297    pub argv_start: UserAddress,
298    pub argv_end: UserAddress,
299    pub environ_start: UserAddress,
300    pub environ_end: UserAddress,
301
302    /// vDSO location
303    pub vdso_base: UserAddress,
304
305    /// Randomized regions:
306    pub mmap_top: UserAddress,
307    pub stack_origin: UserAddress,
308    pub brk_origin: UserAddress,
309
310    // Membarrier registrations
311    membarrier_registrations: MembarrierRegistrations,
312}
313
314impl Deref for MemoryManagerState {
315    type Target = MemoryManagerForkableState;
316    fn deref(&self) -> &Self::Target {
317        &self.forkable_state
318    }
319}
320
321impl DerefMut for MemoryManagerState {
322    fn deref_mut(&mut self) -> &mut Self::Target {
323        &mut self.forkable_state
324    }
325}
326
327#[derive(Debug, Default)]
328struct ReleasedMappings {
329    doomed: Vec<Mapping>,
330    doomed_pins: Vec<Arc<PinnedMapping>>,
331}
332
333impl ReleasedMappings {
334    fn extend(&mut self, mappings: impl IntoIterator<Item = Mapping>) {
335        self.doomed.extend(mappings);
336    }
337
338    fn extend_pins(&mut self, mappings: impl IntoIterator<Item = Arc<PinnedMapping>>) {
339        self.doomed_pins.extend(mappings);
340    }
341
342    fn is_empty(&self) -> bool {
343        self.doomed.is_empty() && self.doomed_pins.is_empty()
344    }
345
346    #[cfg(test)]
347    fn len(&self) -> usize {
348        self.doomed.len() + self.doomed_pins.len()
349    }
350
351    fn finalize(&mut self, mm_state: RwLockWriteGuard<'_, MemoryManagerState>) {
352        // Drop the state before the unmapped mappings, since dropping a mapping may acquire a lock
353        // in `DirEntry`'s `drop`.
354        std::mem::drop(mm_state);
355        std::mem::take(&mut self.doomed);
356        std::mem::take(&mut self.doomed_pins);
357    }
358}
359
360impl Drop for ReleasedMappings {
361    fn drop(&mut self) {
362        assert!(self.is_empty(), "ReleasedMappings::finalize() must be called before drop");
363    }
364}
365
366fn map_in_vmar(
367    vmar: &zx::Vmar,
368    vmar_info: &zx::VmarInfo,
369    addr: SelectedAddress,
370    memory: &MemoryObject,
371    memory_offset: u64,
372    length: usize,
373    flags: MappingFlags,
374    populate: bool,
375) -> Result<(), Errno> {
376    let vmar_offset = addr.addr().checked_sub(vmar_info.base).ok_or_else(|| errno!(ENOMEM))?;
377    let vmar_extra_flags = match addr {
378        SelectedAddress::Fixed(_) => zx::VmarFlags::SPECIFIC,
379        SelectedAddress::FixedOverwrite(_) => ZX_VM_SPECIFIC_OVERWRITE,
380    };
381
382    if populate {
383        let op = if flags.contains(MappingFlags::WRITE) {
384            // Requires ZX_RIGHT_WRITEABLE which we should expect when the mapping is writeable.
385            zx::VmoOp::COMMIT
386        } else {
387            // When we don't expect to have ZX_RIGHT_WRITEABLE, fall back to a VMO op that doesn't
388            // need it.
389            zx::VmoOp::PREFETCH
390        };
391        trace_duration!(CATEGORY_STARNIX_MM, "MmapCommitPages");
392        let _ = memory.op_range(op, memory_offset, length as u64);
393        // "The mmap() call doesn't fail if the mapping cannot be populated."
394    }
395
396    let vmar_maybe_map_range = if populate && !vmar_extra_flags.contains(ZX_VM_SPECIFIC_OVERWRITE) {
397        zx::VmarFlags::MAP_RANGE
398    } else {
399        zx::VmarFlags::empty()
400    };
401    let vmar_flags = flags.access_flags().to_vmar_flags()
402        | zx::VmarFlags::ALLOW_FAULTS
403        | vmar_extra_flags
404        | vmar_maybe_map_range;
405
406    let map_result = memory.map_in_vmar(vmar, vmar_offset.ptr(), memory_offset, length, vmar_flags);
407    let mapped_addr = map_result.map_err(MemoryManager::get_errno_for_map_err)?;
408
409    let expected_addr = addr.addr().ptr();
410    debug_assert_eq!(
411        mapped_addr, expected_addr,
412        "Zircon mapped to a different address than requested!"
413    );
414
415    Ok(())
416}
417
418impl MemoryManagerState {
419    /// Returns occupied address ranges that intersect with the given range.
420    ///
421    /// An address range is "occupied" if (a) there is already a mapping in that range or (b) there
422    /// is a GROWSDOWN mapping <= 256 pages above that range. The 256 pages below a GROWSDOWN
423    /// mapping is the "guard region." The memory manager avoids mapping memory in the guard region
424    /// in some circumstances to preserve space for the GROWSDOWN mapping to grow down.
425    fn get_occupied_address_ranges<'a>(
426        &'a self,
427        subrange: &'a Range<UserAddress>,
428    ) -> impl Iterator<Item = Range<UserAddress>> + 'a {
429        let query_range = subrange.start
430            ..(subrange
431                .end
432                .saturating_add(*PAGE_SIZE as usize * GUARD_PAGE_COUNT_FOR_GROWSDOWN_MAPPINGS));
433        self.mappings.range(query_range).filter_map(|(range, mapping)| {
434            let occupied_range = mapping.inflate_to_include_guard_pages(range);
435            if occupied_range.start < subrange.end && subrange.start < occupied_range.end {
436                Some(occupied_range)
437            } else {
438                None
439            }
440        })
441    }
442
443    fn count_possible_placements(
444        &self,
445        length: usize,
446        subrange: &Range<UserAddress>,
447    ) -> Option<usize> {
448        let mut occupied_ranges = self.get_occupied_address_ranges(subrange);
449        let mut possible_placements = 0;
450        // If the allocation is placed at the first available address, every page that is left
451        // before the next mapping or the end of subrange is +1 potential placement.
452        let mut first_fill_end = subrange.start.checked_add(length)?;
453        while first_fill_end <= subrange.end {
454            let Some(mapping) = occupied_ranges.next() else {
455                possible_placements += (subrange.end - first_fill_end) / (*PAGE_SIZE as usize) + 1;
456                break;
457            };
458            if mapping.start >= first_fill_end {
459                possible_placements += (mapping.start - first_fill_end) / (*PAGE_SIZE as usize) + 1;
460            }
461            first_fill_end = mapping.end.checked_add(length)?;
462        }
463        Some(possible_placements)
464    }
465
466    fn pick_placement(
467        &self,
468        length: usize,
469        mut chosen_placement_idx: usize,
470        subrange: &Range<UserAddress>,
471    ) -> Option<UserAddress> {
472        let mut candidate =
473            Range { start: subrange.start, end: subrange.start.checked_add(length)? };
474        let mut occupied_ranges = self.get_occupied_address_ranges(subrange);
475        loop {
476            let Some(mapping) = occupied_ranges.next() else {
477                // No more mappings: treat the rest of the index as an offset.
478                let res =
479                    candidate.start.checked_add(chosen_placement_idx * *PAGE_SIZE as usize)?;
480                debug_assert!(res.checked_add(length)? <= subrange.end);
481                return Some(res);
482            };
483            if mapping.start < candidate.end {
484                // doesn't fit, skip
485                candidate = Range { start: mapping.end, end: mapping.end.checked_add(length)? };
486                continue;
487            }
488            let unused_space =
489                (mapping.start.ptr() - candidate.end.ptr()) / (*PAGE_SIZE as usize) + 1;
490            if unused_space > chosen_placement_idx {
491                // Chosen placement is within the range; treat the rest of the index as an offset.
492                let res =
493                    candidate.start.checked_add(chosen_placement_idx * *PAGE_SIZE as usize)?;
494                return Some(res);
495            }
496
497            // chosen address is further up, skip
498            chosen_placement_idx -= unused_space;
499            candidate = Range { start: mapping.end, end: mapping.end.checked_add(length)? };
500        }
501    }
502
503    fn find_random_unused_range(
504        &self,
505        length: usize,
506        subrange: &Range<UserAddress>,
507    ) -> Option<UserAddress> {
508        let possible_placements = self.count_possible_placements(length, subrange)?;
509        if possible_placements == 0 {
510            return None;
511        }
512        let chosen_placement_idx = rand::random_range(0..possible_placements);
513        self.pick_placement(length, chosen_placement_idx, subrange)
514    }
515
516    // Find the first unused range of addresses that fits a mapping of `length` bytes, searching
517    // from `mmap_top` downwards.
518    pub fn find_next_unused_range(&self, length: usize) -> Option<UserAddress> {
519        let gap_size = length as u64;
520        let mut upper_bound = self.mmap_top;
521
522        loop {
523            let gap_end = self.mappings.find_gap_end(gap_size, &upper_bound);
524            let candidate = gap_end.checked_sub(length)?;
525
526            // Is there a next mapping? If not, the candidate is already good.
527            let Some((occupied_range, mapping)) = self.mappings.get(gap_end) else {
528                return Some(candidate);
529            };
530            let occupied_range = mapping.inflate_to_include_guard_pages(occupied_range);
531            // If it doesn't overlap, the gap is big enough to fit.
532            if occupied_range.start >= gap_end {
533                return Some(candidate);
534            }
535            // If there was a mapping in the way, use the start of that range as the upper bound.
536            upper_bound = occupied_range.start;
537        }
538    }
539
540    // Accept the hint if the range is unused and within the range available for mapping.
541    fn is_hint_acceptable(&self, hint_addr: UserAddress, length: usize) -> bool {
542        let Some(hint_end) = hint_addr.checked_add(length) else {
543            return false;
544        };
545        if !RESTRICTED_ASPACE_RANGE.contains(&hint_addr.ptr())
546            || !RESTRICTED_ASPACE_RANGE.contains(&hint_end.ptr())
547        {
548            return false;
549        };
550        self.get_occupied_address_ranges(&(hint_addr..hint_end)).next().is_none()
551    }
552
553    fn select_address(
554        &self,
555        addr: DesiredAddress,
556        length: usize,
557        flags: MappingFlags,
558    ) -> Result<SelectedAddress, Errno> {
559        let adjusted_length = round_up_to_system_page_size(length).or_else(|_| error!(ENOMEM))?;
560
561        let find_address = || -> Result<SelectedAddress, Errno> {
562            let new_addr = if flags.contains(MappingFlags::LOWER_32BIT) {
563                // MAP_32BIT specifies that the memory allocated will
564                // be within the first 2 GB of the process address space.
565                self.find_random_unused_range(
566                    adjusted_length,
567                    &(UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
568                        ..UserAddress::from_ptr(0x80000000)),
569                )
570                .ok_or_else(|| errno!(ENOMEM))?
571            } else {
572                self.find_next_unused_range(adjusted_length).ok_or_else(|| errno!(ENOMEM))?
573            };
574
575            Ok(SelectedAddress::Fixed(new_addr))
576        };
577
578        Ok(match addr {
579            DesiredAddress::Any => find_address()?,
580            DesiredAddress::Hint(hint_addr) => {
581                // Round down to page size
582                let hint_addr =
583                    UserAddress::from_ptr(hint_addr.ptr() - hint_addr.ptr() % *PAGE_SIZE as usize);
584                if self.is_hint_acceptable(hint_addr, adjusted_length) {
585                    SelectedAddress::Fixed(hint_addr)
586                } else {
587                    find_address()?
588                }
589            }
590            DesiredAddress::Fixed(addr) => SelectedAddress::Fixed(addr),
591            DesiredAddress::FixedOverwrite(addr) => SelectedAddress::FixedOverwrite(addr),
592        })
593    }
594
595    fn validate_addr(&self, addr: DesiredAddress, length: usize) -> Result<(), Errno> {
596        if let DesiredAddress::FixedOverwrite(addr) = addr {
597            if self.check_has_unauthorized_splits(addr, length) {
598                return error!(ENOMEM);
599            }
600        }
601        Ok(())
602    }
603
604    fn add_memory_mapping(
605        &mut self,
606        mm: &Arc<MemoryManager>,
607        addr: DesiredAddress,
608        memory: Arc<MemoryObject>,
609        memory_offset: u64,
610        length: usize,
611        flags: MappingFlags,
612        max_access: Access,
613        populate: bool,
614        name: MappingName,
615        mapping_mode: MappingMode,
616        released_mappings: &mut ReleasedMappings,
617    ) -> Result<UserAddress, Errno> {
618        self.validate_addr(addr, length)?;
619
620        let selected_address = self.select_address(addr, length, flags)?;
621        let mapped_addr = selected_address.addr();
622        if mapping_mode == MappingMode::Eager {
623            mm.mapping_context.map_in_user_vmar(
624                selected_address,
625                &memory,
626                memory_offset,
627                length,
628                flags,
629                populate,
630            )?;
631        }
632
633        let end = (mapped_addr + length)?.round_up(*PAGE_SIZE)?;
634
635        if let DesiredAddress::FixedOverwrite(addr) = addr {
636            assert_eq!(addr, mapped_addr);
637            self.update_after_unmap(mm, addr, end - addr, released_mappings)?;
638        }
639
640        let mapping = Mapping::with_name(
641            self.create_memory_backing(mapped_addr, memory, memory_offset),
642            flags,
643            max_access,
644            name,
645            mapping_mode,
646        );
647        released_mappings.extend(self.mappings.insert(mapped_addr..end, mapping));
648
649        Ok(mapped_addr)
650    }
651
652    fn map_private_anonymous(
653        &mut self,
654        mm: &Arc<MemoryManager>,
655        addr: DesiredAddress,
656        length: usize,
657        prot_flags: ProtectionFlags,
658        options: MappingOptions,
659        populate: bool,
660        name: MappingName,
661        released_mappings: &mut ReleasedMappings,
662    ) -> Result<UserAddress, Errno> {
663        self.validate_addr(addr, length)?;
664
665        let flags = MappingFlags::from_access_flags_and_options(prot_flags, options);
666        let selected_addr = self.select_address(addr, length, flags)?;
667        let mapped_addr = selected_addr.addr();
668        let backing_memory_offset = selected_addr.addr().ptr();
669
670        mm.mapping_context.map_in_user_vmar(
671            selected_addr,
672            &mm.mapping_context.private_anonymous.backing,
673            backing_memory_offset as u64,
674            length,
675            flags,
676            populate,
677        )?;
678
679        let end = (mapped_addr + length)?.round_up(*PAGE_SIZE)?;
680        if let DesiredAddress::FixedOverwrite(addr) = addr {
681            assert_eq!(addr, mapped_addr);
682            self.update_after_unmap(mm, addr, end - addr, released_mappings)?;
683        }
684
685        let mapping = Mapping::new_private_anonymous(flags, name, MappingMode::Eager);
686        released_mappings.extend(self.mappings.insert(mapped_addr..end, mapping));
687
688        Ok(mapped_addr)
689    }
690
691    fn map_anonymous(
692        &mut self,
693        mm: &Arc<MemoryManager>,
694        addr: DesiredAddress,
695        length: usize,
696        prot_flags: ProtectionFlags,
697        options: MappingOptions,
698        name: MappingName,
699        released_mappings: &mut ReleasedMappings,
700    ) -> Result<UserAddress, Errno> {
701        if !options.contains(MappingOptions::SHARED) {
702            return self.map_private_anonymous(
703                mm,
704                addr,
705                length,
706                prot_flags,
707                options,
708                options.contains(MappingOptions::POPULATE),
709                name,
710                released_mappings,
711            );
712        }
713        let memory = create_anonymous_mapping_memory(length as u64)?;
714        let flags = MappingFlags::from_access_flags_and_options(prot_flags, options);
715        self.add_memory_mapping(
716            mm,
717            addr,
718            memory,
719            0,
720            length,
721            flags,
722            Access::rwx(),
723            options.contains(MappingOptions::POPULATE),
724            name,
725            MappingMode::Eager,
726            released_mappings,
727        )
728    }
729
730    fn ensure_range_mapped_in_user_vmar(
731        &mut self,
732        addr: UserAddress,
733        length: Option<usize>,
734        context: &MappingContext,
735    ) -> Result<bool, Errno> {
736        self.ensure_ranges_mapped_in_user_vmar(std::iter::once((addr, length)), context)
737    }
738
739    fn ensure_ranges_mapped_in_user_vmar<I>(
740        &mut self,
741        ranges: I,
742        context: &MappingContext,
743    ) -> Result<bool, Errno>
744    where
745        I: IntoIterator<Item = (UserAddress, Option<usize>)>,
746    {
747        // This is most likely to contain one range, so use `SmallVec` to avoid
748        // heap allocation and better performance in the common case.
749        let mut ranges_to_update = SmallVec::<[std::ops::Range<UserAddress>; 1]>::new();
750        for (addr, length) in ranges {
751            match length {
752                None => {
753                    if let Some((range, mapping)) = self.mappings.get(addr) {
754                        if mapping.mapping_mode() == MappingMode::Lazy {
755                            ranges_to_update.push(range.clone());
756                        }
757                    }
758                }
759                Some(len) => {
760                    assert!(len > 0);
761                    let end = addr.checked_add(len).expect("address overflowed after validation");
762                    for (range, mapping) in self.mappings.range(addr..end) {
763                        if mapping.mapping_mode() == MappingMode::Lazy {
764                            ranges_to_update.push(range.clone());
765                        }
766                    }
767                }
768            }
769        }
770
771        if ranges_to_update.is_empty() {
772            return Ok(false);
773        }
774
775        for range in ranges_to_update {
776            let mut mappings = self.mappings.remove(range.clone());
777            assert_eq!(mappings.len(), 1, "Expected to remove exactly one mapping");
778            let mut mapping = mappings.pop().unwrap();
779
780            let addr = SelectedAddress::FixedOverwrite(range.start);
781            let flags = mapping.flags();
782            let (backing, backing_memory_offset) = match self.get_mapping_backing(&mapping) {
783                MappingBacking::Memory(backing) => {
784                    (backing.memory(), backing.address_to_offset(addr.addr()))
785                }
786                MappingBacking::PrivateAnonymous => {
787                    (&context.private_anonymous.backing, addr.addr().ptr() as u64)
788                }
789            };
790
791            let mapping_length = range.end - range.start;
792            context.map_in_user_vmar(
793                addr,
794                backing,
795                backing_memory_offset,
796                mapping_length,
797                flags,
798                false,
799            )?;
800
801            mapping.set_mapping_mode(MappingMode::Eager);
802            let _ = self.mappings.insert(range.start..range.end, mapping);
803        }
804
805        Ok(true)
806    }
807
808    fn remap(
809        &mut self,
810        _current_task: &CurrentTask,
811        mm: &Arc<MemoryManager>,
812        old_addr: UserAddress,
813        old_length: usize,
814        new_length: usize,
815        flags: MremapFlags,
816        new_addr: UserAddress,
817        released_mappings: &mut ReleasedMappings,
818    ) -> Result<UserAddress, Errno> {
819        // MREMAP_FIXED moves a mapping, which requires MREMAP_MAYMOVE.
820        if flags.contains(MremapFlags::FIXED) && !flags.contains(MremapFlags::MAYMOVE) {
821            return error!(EINVAL);
822        }
823
824        // MREMAP_DONTUNMAP is always a move, so it requires MREMAP_MAYMOVE.
825        // There is no resizing allowed either.
826        if flags.contains(MremapFlags::DONTUNMAP)
827            && (!flags.contains(MremapFlags::MAYMOVE) || old_length != new_length)
828        {
829            return error!(EINVAL);
830        }
831
832        // In-place copies are invalid.
833        if !flags.contains(MremapFlags::MAYMOVE) && old_length == 0 {
834            return error!(ENOMEM);
835        }
836
837        if new_length == 0 {
838            return error!(EINVAL);
839        }
840
841        // Make sure old_addr is page-aligned.
842        if !old_addr.is_aligned(*PAGE_SIZE) {
843            return error!(EINVAL);
844        }
845
846        let old_length = round_up_to_system_page_size(old_length)?;
847        let new_length = round_up_to_system_page_size(new_length)?;
848
849        if self.check_has_unauthorized_splits(old_addr, old_length) {
850            return error!(EINVAL);
851        }
852
853        if self.check_has_unauthorized_splits(new_addr, new_length) {
854            return error!(EINVAL);
855        }
856
857        if !flags.contains(MremapFlags::DONTUNMAP)
858            && !flags.contains(MremapFlags::FIXED)
859            && old_length != 0
860        {
861            // We are not requested to remap to a specific address, so first we see if we can remap
862            // in-place. In-place copies (old_length == 0) are not allowed.
863            if let Some(new_addr) =
864                self.try_remap_in_place(mm, old_addr, old_length, new_length, released_mappings)?
865            {
866                return Ok(new_addr);
867            }
868        }
869
870        // There is no space to grow in place, or there is an explicit request to move.
871        if flags.contains(MremapFlags::MAYMOVE) {
872            let dst_address =
873                if flags.contains(MremapFlags::FIXED) { Some(new_addr) } else { None };
874            self.remap_move(
875                mm,
876                old_addr,
877                old_length,
878                dst_address,
879                new_length,
880                flags.contains(MremapFlags::DONTUNMAP),
881                released_mappings,
882            )
883        } else {
884            error!(ENOMEM)
885        }
886    }
887
888    /// Attempts to grow or shrink the mapping in-place. Returns `Ok(Some(addr))` if the remap was
889    /// successful. Returns `Ok(None)` if there was no space to grow.
890    fn try_remap_in_place(
891        &mut self,
892        mm: &Arc<MemoryManager>,
893        old_addr: UserAddress,
894        old_length: usize,
895        new_length: usize,
896        released_mappings: &mut ReleasedMappings,
897    ) -> Result<Option<UserAddress>, Errno> {
898        let old_range = old_addr..old_addr.checked_add(old_length).ok_or_else(|| errno!(EINVAL))?;
899        let new_range_in_place =
900            old_addr..old_addr.checked_add(new_length).ok_or_else(|| errno!(EINVAL))?;
901
902        if new_length <= old_length {
903            // Shrink the mapping in-place, which should always succeed.
904            // This is done by unmapping the extraneous region.
905            if new_length != old_length {
906                self.unmap(mm, new_range_in_place.end, old_length - new_length, released_mappings)?;
907            }
908            return Ok(Some(old_addr));
909        }
910
911        if self.mappings.range(old_range.end..new_range_in_place.end).next().is_some() {
912            // There is some mapping in the growth range prevening an in-place growth.
913            return Ok(None);
914        }
915
916        // There is space to grow in-place. The old range must be one contiguous mapping.
917        let (original_range, mapping) =
918            self.mappings.get(old_addr).ok_or_else(|| errno!(EINVAL))?;
919
920        if old_range.end > original_range.end {
921            return error!(EFAULT);
922        }
923        let original_range = original_range.clone();
924        let original_mapping = mapping.clone();
925
926        // Compute the new length of the entire mapping once it has grown.
927        let final_length = (original_range.end - original_range.start) + (new_length - old_length);
928
929        match self.get_mapping_backing(&original_mapping) {
930            MappingBacking::Memory(backing) => {
931                // Re-map the original range, which may include pages before the requested range.
932                Ok(Some(self.add_memory_mapping(
933                    mm,
934                    DesiredAddress::FixedOverwrite(original_range.start),
935                    backing.memory().clone(),
936                    backing.address_to_offset(original_range.start),
937                    final_length,
938                    original_mapping.flags(),
939                    original_mapping.max_access(),
940                    false,
941                    original_mapping.name().to_owned(),
942                    original_mapping.mapping_mode(),
943                    released_mappings,
944                )?))
945            }
946            MappingBacking::PrivateAnonymous => {
947                let growth_start = original_range.end;
948                let growth_length = new_length - old_length;
949                let final_end = (original_range.start + final_length)?;
950                // Map new pages to back the growth.
951                mm.mapping_context.map_in_user_vmar(
952                    SelectedAddress::FixedOverwrite(growth_start),
953                    &mm.mapping_context.private_anonymous.backing,
954                    growth_start.ptr() as u64,
955                    growth_length,
956                    original_mapping.flags(),
957                    false,
958                )?;
959                // Overwrite the mapping entry with the new larger size.
960                released_mappings.extend(
961                    self.mappings.insert(original_range.start..final_end, original_mapping.clone()),
962                );
963                Ok(Some(original_range.start))
964            }
965        }
966    }
967
968    /// Grows or shrinks the mapping while moving it to a new destination.
969    fn remap_move(
970        &mut self,
971        mm: &Arc<MemoryManager>,
972        src_addr: UserAddress,
973        src_length: usize,
974        dst_addr: Option<UserAddress>,
975        dst_length: usize,
976        keep_source: bool,
977        released_mappings: &mut ReleasedMappings,
978    ) -> Result<UserAddress, Errno> {
979        let src_range = src_addr..src_addr.checked_add(src_length).ok_or_else(|| errno!(EINVAL))?;
980        let (original_range, src_mapping) =
981            self.mappings.get(src_addr).ok_or_else(|| errno!(EINVAL))?;
982        let original_range = original_range.clone();
983        let src_mapping = src_mapping.clone();
984
985        if src_length == 0 && !src_mapping.flags().contains(MappingFlags::SHARED) {
986            // src_length == 0 means that the mapping is to be copied. This behavior is only valid
987            // with MAP_SHARED mappings.
988            return error!(EINVAL);
989        }
990
991        // If the destination range is smaller than the source range, we must first shrink
992        // the source range in place. This must be done now and visible to processes, even if
993        // a later failure causes the remap operation to fail.
994        if src_length != 0 && src_length > dst_length {
995            self.unmap(mm, (src_addr + dst_length)?, src_length - dst_length, released_mappings)?;
996        }
997
998        let dst_addr_for_map = match dst_addr {
999            None => DesiredAddress::Any,
1000            Some(dst_addr) => {
1001                // The mapping is being moved to a specific address.
1002                let dst_range =
1003                    dst_addr..(dst_addr.checked_add(dst_length).ok_or_else(|| errno!(EINVAL))?);
1004                if !src_range.intersect(&dst_range).is_empty() {
1005                    return error!(EINVAL);
1006                }
1007
1008                // The destination range must be unmapped. This must be done now and visible to
1009                // processes, even if a later failure causes the remap operation to fail.
1010                self.unmap(mm, dst_addr, dst_length, released_mappings)?;
1011
1012                DesiredAddress::Fixed(dst_addr)
1013            }
1014        };
1015
1016        // According to gVisor's aio_test, Linux checks for DONT_EXPAND after unmapping the dst
1017        // range.
1018        if dst_length > src_length && src_mapping.flags().contains(MappingFlags::DONT_EXPAND) {
1019            return error!(EFAULT);
1020        }
1021
1022        if src_range.end > original_range.end {
1023            // The source range is not one contiguous mapping. This check must be done only after
1024            // the source range is shrunk and the destination unmapped.
1025            return error!(EFAULT);
1026        }
1027
1028        match self.get_mapping_backing(&src_mapping) {
1029            MappingBacking::PrivateAnonymous => {
1030                let dst_addr =
1031                    self.select_address(dst_addr_for_map, dst_length, src_mapping.flags())?.addr();
1032                let dst_end = (dst_addr + dst_length)?;
1033
1034                let length_to_move = std::cmp::min(dst_length, src_length) as u64;
1035                let growth_start_addr = (dst_addr + length_to_move)?;
1036
1037                if dst_addr != src_addr {
1038                    let src_move_end = (src_range.start + length_to_move)?;
1039                    let range_to_move = src_range.start..src_move_end;
1040                    // Move the previously mapped pages into their new location.
1041                    mm.mapping_context.private_anonymous.move_pages(&range_to_move, dst_addr)?;
1042                }
1043
1044                // Userfault registration is not preserved by remap
1045                let new_flags =
1046                    src_mapping.flags().difference(MappingFlags::UFFD | MappingFlags::UFFD_MISSING);
1047                if src_mapping.mapping_mode() == MappingMode::Eager {
1048                    mm.mapping_context.map_in_user_vmar(
1049                        SelectedAddress::FixedOverwrite(dst_addr),
1050                        &mm.mapping_context.private_anonymous.backing,
1051                        dst_addr.ptr() as u64,
1052                        dst_length,
1053                        new_flags,
1054                        false,
1055                    )?;
1056
1057                    if dst_length > src_length {
1058                        // The mapping has grown, map new pages in to cover the growth.
1059                        let growth_length = dst_length - src_length;
1060
1061                        self.map_private_anonymous(
1062                            mm,
1063                            DesiredAddress::FixedOverwrite(growth_start_addr),
1064                            growth_length,
1065                            new_flags.access_flags(),
1066                            new_flags.options(),
1067                            false,
1068                            src_mapping.name().to_owned(),
1069                            released_mappings,
1070                        )?;
1071                    }
1072                }
1073
1074                released_mappings.extend(self.mappings.insert(
1075                    dst_addr..dst_end,
1076                    Mapping::new_private_anonymous(
1077                        new_flags,
1078                        src_mapping.name().to_owned(),
1079                        src_mapping.mapping_mode(),
1080                    ),
1081                ));
1082
1083                if dst_addr != src_addr && src_length != 0 && !keep_source {
1084                    self.unmap(mm, src_addr, src_length, released_mappings)?;
1085                }
1086
1087                return Ok(dst_addr);
1088            }
1089            MappingBacking::Memory(backing) => {
1090                // This mapping is backed by an FD or is a shared anonymous mapping. Just map the
1091                // range of the memory object covering the moved pages. If the memory object already
1092                // had COW semantics, this preserves them.
1093                let (dst_memory_offset, memory) =
1094                    (backing.address_to_offset(src_addr), backing.memory().clone());
1095
1096                let new_address = self.add_memory_mapping(
1097                    mm,
1098                    dst_addr_for_map,
1099                    memory,
1100                    dst_memory_offset,
1101                    dst_length,
1102                    src_mapping.flags(),
1103                    src_mapping.max_access(),
1104                    false,
1105                    src_mapping.name().to_owned(),
1106                    src_mapping.mapping_mode(),
1107                    released_mappings,
1108                )?;
1109
1110                if src_length != 0 && !keep_source {
1111                    // Only unmap the source range if this is not a copy and if there was not a specific
1112                    // request to not unmap. It was checked earlier that in case of src_length == 0
1113                    // this mapping is MAP_SHARED.
1114                    self.unmap(mm, src_addr, src_length, released_mappings)?;
1115                }
1116
1117                return Ok(new_address);
1118            }
1119        };
1120    }
1121
1122    // Checks if an operation may be performed over the target mapping that may
1123    // result in a split mapping.
1124    //
1125    // An operation may be forbidden if the target mapping only partially covers
1126    // an existing mapping with the `MappingOptions::DONT_SPLIT` flag set.
1127    fn check_has_unauthorized_splits(&self, addr: UserAddress, length: usize) -> bool {
1128        let query_range = addr..addr.saturating_add(length);
1129        let mut intersection = self.mappings.range(query_range.clone());
1130
1131        // A mapping is not OK if it disallows splitting and the target range
1132        // does not fully cover the mapping range.
1133        let check_if_mapping_has_unauthorized_split =
1134            |mapping: Option<(&Range<UserAddress>, &Mapping)>| {
1135                mapping.is_some_and(|(mapping_range, mapping)| {
1136                    mapping.flags().contains(MappingFlags::DONT_SPLIT)
1137                        && (mapping_range.start < query_range.start
1138                            || query_range.end < mapping_range.end)
1139                })
1140            };
1141
1142        // We only check the first and last mappings in the range because naturally,
1143        // the mappings in the middle are fully covered by the target mapping and
1144        // won't be split.
1145        check_if_mapping_has_unauthorized_split(intersection.next())
1146            || check_if_mapping_has_unauthorized_split(intersection.next_back())
1147    }
1148
1149    /// Unmaps the specified range. Unmapped mappings are placed in `released_mappings`.
1150    fn unmap(
1151        &mut self,
1152        mm: &Arc<MemoryManager>,
1153        addr: UserAddress,
1154        length: usize,
1155        released_mappings: &mut ReleasedMappings,
1156    ) -> Result<(), Errno> {
1157        if !addr.is_aligned(*PAGE_SIZE) {
1158            return error!(EINVAL);
1159        }
1160        let length = round_up_to_system_page_size(length)?;
1161        if length == 0 {
1162            return error!(EINVAL);
1163        }
1164
1165        if self.check_has_unauthorized_splits(addr, length) {
1166            return error!(EINVAL);
1167        }
1168
1169        // Unmap the range, including the the tail of any range that would have been split. This
1170        // operation is safe because we're operating on another process.
1171        #[allow(
1172            clippy::undocumented_unsafe_blocks,
1173            reason = "Force documented unsafe blocks in Starnix"
1174        )]
1175        match unsafe { mm.mapping_context.user_vmar.unmap(addr.ptr(), length) } {
1176            Ok(_) => (),
1177            Err(zx::Status::NOT_FOUND) => (),
1178            Err(zx::Status::INVALID_ARGS) => return error!(EINVAL),
1179            Err(status) => {
1180                impossible_error(status);
1181            }
1182        };
1183
1184        self.update_after_unmap(mm, addr, length, released_mappings)?;
1185
1186        Ok(())
1187    }
1188
1189    // Updates `self.mappings` after the specified range was unmaped.
1190    //
1191    // The range to unmap can span multiple mappings, and can split mappings if
1192    // the range start or end falls in the middle of a mapping.
1193    //
1194    // Private anonymous memory is contained in the same memory object; The pages of that object
1195    // that are no longer reachable should be released.
1196    //
1197    // File-backed mappings don't need to have their memory object modified.
1198    //
1199    // Unmapped mappings are placed in `released_mappings`.
1200    fn update_after_unmap(
1201        &mut self,
1202        mm: &Arc<MemoryManager>,
1203        addr: UserAddress,
1204        length: usize,
1205        released_mappings: &mut ReleasedMappings,
1206    ) -> Result<(), Errno> {
1207        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
1208        let unmap_range = addr..end_addr;
1209
1210        // Remove any shadow mappings for mlock()'d pages that are now unmapped.
1211        released_mappings.extend_pins(self.shadow_mappings_for_mlock.remove(unmap_range.clone()));
1212
1213        for (range, mapping) in self.mappings.range(unmap_range.clone()) {
1214            // Deallocate any pages in the private, anonymous backing that are now unreachable.
1215            if let MappingBacking::PrivateAnonymous = self.get_mapping_backing(mapping) {
1216                let unmapped_range = &unmap_range.intersect(range);
1217
1218                mm.inflight_vmspliced_payloads.handle_unmapping(
1219                    &mm.mapping_context.private_anonymous.backing,
1220                    unmapped_range,
1221                )?;
1222
1223                mm.mapping_context
1224                    .private_anonymous
1225                    .zero(unmapped_range.start, unmapped_range.end - unmapped_range.start)?;
1226            }
1227        }
1228        released_mappings.extend(self.mappings.remove(unmap_range));
1229        return Ok(());
1230    }
1231
1232    fn protect(
1233        &mut self,
1234        current_task: &CurrentTask,
1235        addr: UserAddress,
1236        length: usize,
1237        prot_flags: ProtectionFlags,
1238        released_mappings: &mut ReleasedMappings,
1239    ) -> Result<(), Errno> {
1240        let vmar_flags = prot_flags.to_vmar_flags();
1241        let page_size = *PAGE_SIZE;
1242        let end = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?.round_up(page_size)?;
1243
1244        if self.check_has_unauthorized_splits(addr, length) {
1245            return error!(EINVAL);
1246        }
1247
1248        let prot_range = if prot_flags.contains(ProtectionFlags::GROWSDOWN) {
1249            let mut start = addr;
1250            let Some((range, mapping)) = self.mappings.get(start) else {
1251                return error!(EINVAL);
1252            };
1253            // Ensure that the mapping has GROWSDOWN if PROT_GROWSDOWN was specified.
1254            if !mapping.flags().contains(MappingFlags::GROWSDOWN) {
1255                return error!(EINVAL);
1256            }
1257            let access_flags = mapping.flags().access_flags();
1258            // From <https://man7.org/linux/man-pages/man2/mprotect.2.html>:
1259            //
1260            //   PROT_GROWSDOWN
1261            //     Apply the protection mode down to the beginning of a
1262            //     mapping that grows downward (which should be a stack
1263            //     segment or a segment mapped with the MAP_GROWSDOWN flag
1264            //     set).
1265            start = range.start;
1266            while let Some((range, mapping)) =
1267                self.mappings.get(start.saturating_sub(page_size as usize))
1268            {
1269                if !mapping.flags().contains(MappingFlags::GROWSDOWN)
1270                    || mapping.flags().access_flags() != access_flags
1271                {
1272                    break;
1273                }
1274                start = range.start;
1275            }
1276            start..end
1277        } else {
1278            addr..end
1279        };
1280
1281        let addr = prot_range.start;
1282        let length = prot_range.end - prot_range.start;
1283
1284        // TODO: We should check the max_access flags on all the mappings in this range.
1285        //       There are cases where max_access is more restrictive than the Zircon rights
1286        //       we hold on the underlying VMOs.
1287
1288        // TODO(https://fxbug.dev/411617451): `mprotect` should apply the protection flags
1289        // until it encounters a mapping that doesn't allow it, rather than not apply the protection
1290        // flags at all if a single mapping doesn't allow it.
1291        for (range, mapping) in self.mappings.range(prot_range.clone()) {
1292            security::file_mprotect(current_task, range, mapping, prot_flags)?;
1293        }
1294
1295        // We need to map any lazy mappings before we can protect them.
1296        let mapping_context = &current_task.mm()?.mapping_context;
1297        self.ensure_range_mapped_in_user_vmar(addr, Some(length), mapping_context)?;
1298
1299        // Make one call to mprotect to update all the zircon protections.
1300        // SAFETY: This is safe because the vmar belongs to a different process.
1301        unsafe { mapping_context.user_vmar.protect(addr.ptr(), length, vmar_flags) }.map_err(
1302            |s| match s {
1303                zx::Status::INVALID_ARGS => errno!(EINVAL),
1304                zx::Status::NOT_FOUND => {
1305                    track_stub!(
1306                        TODO("https://fxbug.dev/322875024"),
1307                        "mprotect: succeed and update prot after NOT_FOUND"
1308                    );
1309                    errno!(EINVAL)
1310                }
1311                zx::Status::ACCESS_DENIED => errno!(EACCES),
1312                _ => impossible_error(s),
1313            },
1314        )?;
1315
1316        // Update the flags on each mapping in the range.
1317        let mut updates = vec![];
1318        for (range, mapping) in self.mappings.range(prot_range.clone()) {
1319            if mapping.flags().contains(MappingFlags::UFFD) {
1320                track_stub!(
1321                    TODO("https://fxbug.dev/297375964"),
1322                    "mprotect on uffd-registered range should not alter protections"
1323                );
1324                return error!(EINVAL);
1325            }
1326            let range = range.intersect(&prot_range);
1327            let mut mapping = mapping.clone();
1328            mapping.set_flags(mapping.flags().with_access_flags(prot_flags));
1329            updates.push((range, mapping));
1330        }
1331        // Use a separate loop to avoid mutating the mappings structure while iterating over it.
1332        for (range, mapping) in updates {
1333            released_mappings.extend(self.mappings.insert(range, mapping));
1334        }
1335        Ok(())
1336    }
1337
1338    fn madvise(
1339        &mut self,
1340        context: &MappingContext,
1341        addr: UserAddress,
1342        length: usize,
1343        advice: u32,
1344        released_mappings: &mut ReleasedMappings,
1345    ) -> Result<(), Errno> {
1346        if !addr.is_aligned(*PAGE_SIZE) {
1347            return error!(EINVAL);
1348        }
1349
1350        let end_addr =
1351            addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?.round_up(*PAGE_SIZE)?;
1352        if end_addr > context.max_address() {
1353            return error!(EFAULT);
1354        }
1355
1356        if advice == MADV_NORMAL {
1357            track_stub!(TODO("https://fxbug.dev/322874202"), "madvise undo hints for MADV_NORMAL");
1358            return Ok(());
1359        }
1360
1361        let mut updates = vec![];
1362        let range_for_op = addr..end_addr;
1363        for (range, mapping) in self.mappings.range(range_for_op.clone()) {
1364            let range_to_zero = range.intersect(&range_for_op);
1365            if range_to_zero.is_empty() {
1366                continue;
1367            }
1368            let start_offset = mapping.address_to_offset(range_to_zero.start);
1369            let end_offset = mapping.address_to_offset(range_to_zero.end);
1370            if advice == MADV_DONTFORK
1371                || advice == MADV_DOFORK
1372                || advice == MADV_WIPEONFORK
1373                || advice == MADV_KEEPONFORK
1374                || advice == MADV_DONTDUMP
1375                || advice == MADV_DODUMP
1376                || advice == MADV_MERGEABLE
1377                || advice == MADV_UNMERGEABLE
1378            {
1379                // WIPEONFORK is only supported on private anonymous mappings per madvise(2).
1380                // KEEPONFORK can be specified on ranges that cover other sorts of mappings. It should
1381                // have no effect on mappings that are not private and anonymous as such mappings cannot
1382                // have the WIPEONFORK option set.
1383                if advice == MADV_WIPEONFORK && !mapping.private_anonymous() {
1384                    return error!(EINVAL);
1385                }
1386                let new_flags = match advice {
1387                    MADV_DONTFORK => mapping.flags() | MappingFlags::DONTFORK,
1388                    MADV_DOFORK => mapping.flags() & MappingFlags::DONTFORK.complement(),
1389                    MADV_WIPEONFORK => mapping.flags() | MappingFlags::WIPEONFORK,
1390                    MADV_KEEPONFORK => mapping.flags() & MappingFlags::WIPEONFORK.complement(),
1391                    MADV_DONTDUMP => {
1392                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_DONTDUMP");
1393                        mapping.flags()
1394                    }
1395                    MADV_DODUMP => {
1396                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_DODUMP");
1397                        mapping.flags()
1398                    }
1399                    MADV_MERGEABLE => {
1400                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_MERGEABLE");
1401                        mapping.flags()
1402                    }
1403                    MADV_UNMERGEABLE => {
1404                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_UNMERGEABLE");
1405                        mapping.flags()
1406                    }
1407                    // Only the variants in this match should be reachable given the condition for
1408                    // the containing branch.
1409                    unknown_advice => unreachable!("unknown advice {unknown_advice}"),
1410                };
1411                let mut new_mapping = mapping.clone();
1412                new_mapping.set_flags(new_flags);
1413                updates.push((range_to_zero, new_mapping));
1414            } else {
1415                if mapping.flags().contains(MappingFlags::SHARED) {
1416                    continue;
1417                }
1418                let op = match advice {
1419                    MADV_DONTNEED if !mapping.flags().contains(MappingFlags::ANONYMOUS) => {
1420                        // Note, we cannot simply implemented MADV_DONTNEED with
1421                        // zx::VmoOp::DONT_NEED because they have different
1422                        // semantics.
1423                        track_stub!(
1424                            TODO("https://fxbug.dev/322874496"),
1425                            "MADV_DONTNEED with file-backed mapping"
1426                        );
1427                        return error!(EINVAL);
1428                    }
1429                    MADV_DONTNEED if mapping.flags().contains(MappingFlags::LOCKED) => {
1430                        return error!(EINVAL);
1431                    }
1432                    MADV_DONTNEED => zx::VmoOp::ZERO,
1433                    MADV_DONTNEED_LOCKED => {
1434                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_DONTNEED_LOCKED");
1435                        return error!(EINVAL);
1436                    }
1437                    MADV_WILLNEED => {
1438                        if mapping.flags().contains(MappingFlags::WRITE) {
1439                            zx::VmoOp::COMMIT
1440                        } else {
1441                            zx::VmoOp::PREFETCH
1442                        }
1443                    }
1444                    MADV_COLD => {
1445                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_COLD");
1446                        return error!(EINVAL);
1447                    }
1448                    MADV_PAGEOUT => {
1449                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_PAGEOUT");
1450                        return error!(EINVAL);
1451                    }
1452                    MADV_POPULATE_READ => {
1453                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_POPULATE_READ");
1454                        return error!(EINVAL);
1455                    }
1456                    MADV_RANDOM => {
1457                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_RANDOM");
1458                        return error!(EINVAL);
1459                    }
1460                    MADV_SEQUENTIAL => {
1461                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_SEQUENTIAL");
1462                        return error!(EINVAL);
1463                    }
1464                    MADV_FREE if !mapping.flags().contains(MappingFlags::ANONYMOUS) => {
1465                        track_stub!(
1466                            TODO("https://fxbug.dev/411748419"),
1467                            "MADV_FREE with file-backed mapping"
1468                        );
1469                        return error!(EINVAL);
1470                    }
1471                    MADV_FREE if mapping.flags().contains(MappingFlags::LOCKED) => {
1472                        return error!(EINVAL);
1473                    }
1474                    MADV_FREE => {
1475                        track_stub!(TODO("https://fxbug.dev/411748419"), "MADV_FREE");
1476                        // TODO(https://fxbug.dev/411748419) For now, treat MADV_FREE like
1477                        // MADV_DONTNEED as a stopgap until we have proper support.
1478                        zx::VmoOp::ZERO
1479                    }
1480                    MADV_REMOVE => {
1481                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_REMOVE");
1482                        return error!(EINVAL);
1483                    }
1484                    MADV_HWPOISON => {
1485                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_HWPOISON");
1486                        return error!(EINVAL);
1487                    }
1488                    MADV_SOFT_OFFLINE => {
1489                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_SOFT_OFFLINE");
1490                        return error!(EINVAL);
1491                    }
1492                    MADV_HUGEPAGE => {
1493                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_HUGEPAGE");
1494                        return error!(EINVAL);
1495                    }
1496                    MADV_COLLAPSE => {
1497                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_COLLAPSE");
1498                        return error!(EINVAL);
1499                    }
1500                    MADV_NOHUGEPAGE => return Ok(()),
1501                    advice => {
1502                        track_stub!(TODO("https://fxbug.dev/322874202"), "madvise", advice);
1503                        return error!(EINVAL);
1504                    }
1505                };
1506
1507                let memory = match self.get_mapping_backing(mapping) {
1508                    MappingBacking::Memory(backing) => backing.memory(),
1509                    MappingBacking::PrivateAnonymous => &context.private_anonymous.backing,
1510                };
1511                memory.op_range(op, start_offset, end_offset - start_offset).map_err(
1512                    |s| match s {
1513                        zx::Status::OUT_OF_RANGE => errno!(EINVAL),
1514                        zx::Status::NO_MEMORY => errno!(ENOMEM),
1515                        zx::Status::INVALID_ARGS => errno!(EINVAL),
1516                        zx::Status::ACCESS_DENIED => errno!(EACCES),
1517                        _ => impossible_error(s),
1518                    },
1519                )?;
1520            }
1521        }
1522        // Use a separate loop to avoid mutating the mappings structure while iterating over it.
1523        for (range, mapping) in updates {
1524            released_mappings.extend(self.mappings.insert(range, mapping));
1525        }
1526        Ok(())
1527    }
1528
1529    fn mlock<L>(
1530        &mut self,
1531        context: &MappingContext,
1532        current_task: &CurrentTask,
1533        locked: &mut Locked<L>,
1534        desired_addr: UserAddress,
1535        desired_length: usize,
1536        on_fault: bool,
1537        released_mappings: &mut ReleasedMappings,
1538    ) -> Result<(), Errno>
1539    where
1540        L: LockBefore<ThreadGroupLimits>,
1541    {
1542        let desired_end_addr =
1543            desired_addr.checked_add(desired_length).ok_or_else(|| errno!(EINVAL))?;
1544        let start_addr = round_down_to_system_page_size(desired_addr)?;
1545        let end_addr = round_up_to_system_page_size(desired_end_addr)?;
1546
1547        let mut updates = vec![];
1548        let mut bytes_mapped_in_range = 0;
1549        let mut num_new_locked_bytes = 0;
1550        let mut failed_to_lock = false;
1551        for (range, mapping) in self.mappings.range(start_addr..end_addr) {
1552            let mut range = range.clone();
1553            let mut mapping = mapping.clone();
1554
1555            // Handle mappings that start before the region to be locked.
1556            range.start = std::cmp::max(range.start, start_addr);
1557            // Handle mappings that extend past the region to be locked.
1558            range.end = std::cmp::min(range.end, end_addr);
1559
1560            bytes_mapped_in_range += (range.end - range.start) as u64;
1561
1562            // PROT_NONE mappings generate ENOMEM but are left locked.
1563            if !mapping
1564                .flags()
1565                .intersects(MappingFlags::READ | MappingFlags::WRITE | MappingFlags::EXEC)
1566            {
1567                failed_to_lock = true;
1568            }
1569
1570            if !mapping.flags().contains(MappingFlags::LOCKED) {
1571                num_new_locked_bytes += (range.end - range.start) as u64;
1572                let shadow_mapping = match current_task.kernel().features.mlock_pin_flavor {
1573                    // Pin the memory by mapping the backing memory into the high priority vmar.
1574                    MlockPinFlavor::ShadowProcess => {
1575                        let shadow_process =
1576                            current_task.kernel().expando.get_or_try_init(|| {
1577                                memory_pinning::ShadowProcess::new(zx::Name::new_lossy(
1578                                    "starnix_mlock_pins",
1579                                ))
1580                                .map(MlockShadowProcess)
1581                                .map_err(|_| errno!(EPERM))
1582                            })?;
1583
1584                        let (vmo, offset) = match self.get_mapping_backing(&mapping) {
1585                            MappingBacking::Memory(m) => (
1586                                m.memory().as_vmo().ok_or_else(|| errno!(ENOMEM))?,
1587                                m.address_to_offset(range.start),
1588                            ),
1589                            MappingBacking::PrivateAnonymous => (
1590                                context
1591                                    .private_anonymous
1592                                    .backing
1593                                    .as_vmo()
1594                                    .ok_or_else(|| errno!(ENOMEM))?,
1595                                range.start.ptr() as u64,
1596                            ),
1597                        };
1598                        Some(shadow_process.0.pin_pages(vmo, offset, range.end - range.start)?)
1599                    }
1600
1601                    // Relying on VMAR-level operations means just flags are set per-mapping.
1602                    MlockPinFlavor::Noop | MlockPinFlavor::VmarAlwaysNeed => None,
1603                };
1604                mapping.set_mlock();
1605                updates.push((range, mapping, shadow_mapping));
1606            }
1607        }
1608
1609        if bytes_mapped_in_range as usize != end_addr - start_addr {
1610            return error!(ENOMEM);
1611        }
1612
1613        let memlock_rlimit = current_task.thread_group().get_rlimit(locked, Resource::MEMLOCK);
1614        let total_locked = self.num_locked_bytes(
1615            UserAddress::from(context.user_vmar_info.base as u64)
1616                ..UserAddress::from(
1617                    (context.user_vmar_info.base + context.user_vmar_info.len) as u64,
1618                ),
1619        );
1620        if total_locked + num_new_locked_bytes > memlock_rlimit {
1621            if crate::security::check_task_capable(current_task, CAP_IPC_LOCK).is_err() {
1622                let code = if memlock_rlimit > 0 { errno!(ENOMEM) } else { errno!(EPERM) };
1623                return Err(code);
1624            }
1625        }
1626
1627        let op_range_status_to_errno = |e| match e {
1628            zx::Status::BAD_STATE | zx::Status::NOT_SUPPORTED => errno!(ENOMEM),
1629            zx::Status::INVALID_ARGS | zx::Status::OUT_OF_RANGE => errno!(EINVAL),
1630            zx::Status::ACCESS_DENIED => {
1631                unreachable!("user vmar should always have needed rights")
1632            }
1633            zx::Status::BAD_HANDLE => {
1634                unreachable!("user vmar should always be a valid handle")
1635            }
1636            zx::Status::WRONG_TYPE => unreachable!("user vmar handle should be a vmar"),
1637            _ => unreachable!("unknown error from op_range on user vmar for mlock: {e}"),
1638        };
1639
1640        self.ensure_range_mapped_in_user_vmar(start_addr, Some(end_addr - start_addr), context)?;
1641
1642        if !on_fault && !current_task.kernel().features.mlock_always_onfault {
1643            context
1644                .user_vmar
1645                .op_range(zx::VmarOp::PREFETCH, start_addr.ptr(), end_addr - start_addr)
1646                .map_err(op_range_status_to_errno)?;
1647        }
1648
1649        match current_task.kernel().features.mlock_pin_flavor {
1650            MlockPinFlavor::VmarAlwaysNeed => {
1651                context
1652                    .user_vmar
1653                    .op_range(zx::VmarOp::ALWAYS_NEED, start_addr.ptr(), end_addr - start_addr)
1654                    .map_err(op_range_status_to_errno)?;
1655            }
1656            // The shadow process doesn't use any vmar-level operations to pin memory.
1657            MlockPinFlavor::Noop | MlockPinFlavor::ShadowProcess => (),
1658        }
1659
1660        for (range, mapping, shadow_mapping) in updates {
1661            if let Some(shadow_mapping) = shadow_mapping {
1662                released_mappings.extend_pins(
1663                    self.shadow_mappings_for_mlock.insert(range.clone(), shadow_mapping),
1664                );
1665            }
1666            released_mappings.extend(self.mappings.insert(range, mapping));
1667        }
1668
1669        if failed_to_lock { error!(ENOMEM) } else { Ok(()) }
1670    }
1671
1672    fn munlock(
1673        &mut self,
1674        _current_task: &CurrentTask,
1675        desired_addr: UserAddress,
1676        desired_length: usize,
1677        released_mappings: &mut ReleasedMappings,
1678    ) -> Result<(), Errno> {
1679        let desired_end_addr =
1680            desired_addr.checked_add(desired_length).ok_or_else(|| errno!(EINVAL))?;
1681        let start_addr = round_down_to_system_page_size(desired_addr)?;
1682        let end_addr = round_up_to_system_page_size(desired_end_addr)?;
1683
1684        let mut updates = vec![];
1685        let mut bytes_mapped_in_range = 0;
1686        for (range, mapping) in self.mappings.range(start_addr..end_addr) {
1687            let mut range = range.clone();
1688            let mut mapping = mapping.clone();
1689
1690            // Handle mappings that start before the region to be locked.
1691            range.start = std::cmp::max(range.start, start_addr);
1692            // Handle mappings that extend past the region to be locked.
1693            range.end = std::cmp::min(range.end, end_addr);
1694
1695            bytes_mapped_in_range += (range.end - range.start) as u64;
1696
1697            if mapping.flags().contains(MappingFlags::LOCKED) {
1698                // This clears the locking for the shadow process pin flavor. It's not currently
1699                // possible to actually unlock pages that were locked with the
1700                // ZX_VMAR_OP_ALWAYS_NEED pin flavor.
1701                mapping.clear_mlock();
1702                updates.push((range, mapping));
1703            }
1704        }
1705
1706        if bytes_mapped_in_range as usize != end_addr - start_addr {
1707            return error!(ENOMEM);
1708        }
1709
1710        for (range, mapping) in updates {
1711            released_mappings.extend(self.mappings.insert(range.clone(), mapping));
1712            released_mappings.extend_pins(self.shadow_mappings_for_mlock.remove(range));
1713        }
1714
1715        Ok(())
1716    }
1717
1718    pub fn num_locked_bytes(&self, range: impl RangeBounds<UserAddress>) -> u64 {
1719        self.mappings
1720            .map
1721            .range(range)
1722            .filter(|(_, mapping)| mapping.flags().contains(MappingFlags::LOCKED))
1723            .map(|(range, _)| (range.end - range.start) as u64)
1724            .sum()
1725    }
1726
1727    fn get_mappings_for_vmsplice(
1728        &self,
1729        mm: &Arc<MemoryManager>,
1730        buffers: &UserBuffers,
1731    ) -> Result<Vec<Arc<VmsplicePayload>>, Errno> {
1732        let mut vmsplice_mappings = Vec::new();
1733
1734        for UserBuffer { mut address, length } in buffers.iter().copied() {
1735            let mappings = self.get_contiguous_mappings_at(address, length, &mm.mapping_context)?;
1736            for (mapping, length) in mappings {
1737                let vmsplice_payload = match self.get_mapping_backing(mapping) {
1738                    MappingBacking::Memory(m) => VmsplicePayloadSegment {
1739                        addr_offset: address,
1740                        length,
1741                        memory: m.memory().clone(),
1742                        memory_offset: m.address_to_offset(address),
1743                    },
1744                    MappingBacking::PrivateAnonymous => VmsplicePayloadSegment {
1745                        addr_offset: address,
1746                        length,
1747                        memory: mm.mapping_context.private_anonymous.backing.clone(),
1748                        memory_offset: address.ptr() as u64,
1749                    },
1750                };
1751                vmsplice_mappings.push(VmsplicePayload::new(Arc::downgrade(mm), vmsplice_payload));
1752
1753                address = (address + length)?;
1754            }
1755        }
1756
1757        Ok(vmsplice_mappings)
1758    }
1759
1760    /// Returns all the mappings starting at `addr`, and continuing until either `length` bytes have
1761    /// been covered or an unmapped page is reached.
1762    ///
1763    /// Mappings are returned in ascending order along with the number of bytes that intersect the
1764    /// requested range. The returned mappings are guaranteed to be contiguous and the total length
1765    /// corresponds to the number of contiguous mapped bytes starting from `addr`, i.e.:
1766    /// - 0 (empty iterator) if `addr` is not mapped.
1767    /// - exactly `length` if the requested range is fully mapped.
1768    /// - the offset of the first unmapped page (between 0 and `length`) if the requested range is
1769    ///   only partially mapped.
1770    ///
1771    /// Returns EFAULT if the requested range overflows or extends past the end of the vmar.
1772    fn get_contiguous_mappings_at(
1773        &self,
1774        addr: UserAddress,
1775        length: usize,
1776        context: &MappingContext,
1777    ) -> Result<impl Iterator<Item = (&Mapping, usize)>, Errno> {
1778        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EFAULT))?;
1779        if end_addr > context.max_address() {
1780            return error!(EFAULT);
1781        }
1782
1783        // Iterate over all contiguous mappings intersecting the requested range.
1784        let mut mappings = self.mappings.range(addr..end_addr);
1785        let mut prev_range_end = None;
1786        let mut offset = 0;
1787        let result = std::iter::from_fn(move || {
1788            if offset != length {
1789                if let Some((range, mapping)) = mappings.next() {
1790                    return match prev_range_end {
1791                        // If this is the first mapping that we are considering, it may not actually
1792                        // contain `addr` at all.
1793                        None if range.start > addr => None,
1794
1795                        // Subsequent mappings may not be contiguous.
1796                        Some(prev_range_end) if range.start != prev_range_end => None,
1797
1798                        // This mapping can be returned.
1799                        _ => {
1800                            let mapping_length = std::cmp::min(length, range.end - addr) - offset;
1801                            offset += mapping_length;
1802                            prev_range_end = Some(range.end);
1803                            Some((mapping, mapping_length))
1804                        }
1805                    };
1806                }
1807            }
1808
1809            None
1810        });
1811
1812        Ok(result)
1813    }
1814
1815    /// Determines whether a fault at the given address could be covered by extending a growsdown
1816    /// mapping.
1817    ///
1818    /// If the address already belongs to a mapping, this function returns `None`. If the next
1819    /// mapping above the given address has the `MappingFlags::GROWSDOWN` flag, this function
1820    /// returns the address at which that mapping starts and the mapping itself. Otherwise, this
1821    /// function returns `None`.
1822    fn find_growsdown_mapping(&self, addr: UserAddress) -> Option<(UserAddress, &Mapping)> {
1823        match self.mappings.range(addr..).next() {
1824            Some((range, mapping)) => {
1825                if range.contains(&addr) {
1826                    // |addr| is already contained within a mapping, nothing to grow.
1827                    return None;
1828                } else if !mapping.flags().contains(MappingFlags::GROWSDOWN) {
1829                    // The next mapping above the given address does not have the
1830                    // `MappingFlags::GROWSDOWN` flag.
1831                    None
1832                } else {
1833                    Some((range.start, mapping))
1834                }
1835            }
1836            None => None,
1837        }
1838    }
1839
1840    /// Determines if an access at a given address could be covered by extending a growsdown mapping
1841    /// and extends it if possible. Returns true if the given address is covered by a mapping.
1842    fn extend_growsdown_mapping_to_address(
1843        &mut self,
1844        mm: &Arc<MemoryManager>,
1845        addr: UserAddress,
1846        is_write: bool,
1847    ) -> Result<bool, Error> {
1848        let Some((mapping_low_addr, mapping_to_grow)) = self.find_growsdown_mapping(addr) else {
1849            return Ok(false);
1850        };
1851        if is_write && !mapping_to_grow.can_write() {
1852            // Don't grow a read-only GROWSDOWN mapping for a write fault, it won't work.
1853            return Ok(false);
1854        }
1855        if !mapping_to_grow.flags().contains(MappingFlags::ANONYMOUS) {
1856            // Currently, we only grow anonymous mappings.
1857            return Ok(false);
1858        }
1859        let low_addr = (addr - (addr.ptr() as u64 % *PAGE_SIZE))?;
1860        let high_addr = mapping_low_addr;
1861
1862        let length = high_addr
1863            .ptr()
1864            .checked_sub(low_addr.ptr())
1865            .ok_or_else(|| anyhow!("Invalid growth range"))?;
1866
1867        let mut released_mappings = ReleasedMappings::default();
1868        self.map_anonymous(
1869            mm,
1870            DesiredAddress::FixedOverwrite(low_addr),
1871            length,
1872            mapping_to_grow.flags().access_flags(),
1873            mapping_to_grow.flags().options(),
1874            mapping_to_grow.name().to_owned(),
1875            &mut released_mappings,
1876        )?;
1877        // We can't have any released mappings because `find_growsdown_mapping` will return None if
1878        // the mapping already exists in this range.
1879        assert!(
1880            released_mappings.is_empty(),
1881            "expected to not remove mappings by inserting, got {released_mappings:#?}"
1882        );
1883        Ok(true)
1884    }
1885
1886    /// Reads exactly `bytes.len()` bytes of memory.
1887    ///
1888    /// # Parameters
1889    /// - `addr`: The address to read data from.
1890    /// - `bytes`: The byte array to read into.
1891    fn read_memory<'a>(
1892        &self,
1893        addr: UserAddress,
1894        bytes: &'a mut [MaybeUninit<u8>],
1895        context: &MappingContext,
1896    ) -> Result<&'a mut [u8], Errno> {
1897        let mut bytes_read = 0;
1898        for (mapping, len) in self.get_contiguous_mappings_at(addr, bytes.len(), context)? {
1899            let next_offset = bytes_read + len;
1900            self.read_mapping_memory(
1901                (addr + bytes_read)?,
1902                mapping,
1903                &mut bytes[bytes_read..next_offset],
1904                context,
1905            )?;
1906            bytes_read = next_offset;
1907        }
1908
1909        if bytes_read != bytes.len() {
1910            error!(EFAULT)
1911        } else {
1912            // SAFETY: The created slice is properly aligned/sized since it
1913            // is a subset of the `bytes` slice. Note that `MaybeUninit<T>` has
1914            // the same layout as `T`. Also note that `bytes_read` bytes have
1915            // been properly initialized.
1916            let bytes = unsafe {
1917                std::slice::from_raw_parts_mut(bytes.as_mut_ptr() as *mut u8, bytes_read)
1918            };
1919            Ok(bytes)
1920        }
1921    }
1922
1923    /// Reads exactly `bytes.len()` bytes of memory from `addr`.
1924    ///
1925    /// # Parameters
1926    /// - `addr`: The address to read data from.
1927    /// - `bytes`: The byte array to read into.
1928    fn read_mapping_memory<'a>(
1929        &self,
1930        addr: UserAddress,
1931        mapping: &Mapping,
1932        bytes: &'a mut [MaybeUninit<u8>],
1933        context: &MappingContext,
1934    ) -> Result<&'a mut [u8], Errno> {
1935        if !mapping.can_read() {
1936            return error!(EFAULT, "read_mapping_memory called on unreadable mapping");
1937        }
1938        match self.get_mapping_backing(mapping) {
1939            MappingBacking::Memory(backing) => backing.read_memory(addr, bytes),
1940            MappingBacking::PrivateAnonymous => context.private_anonymous.read_memory(addr, bytes),
1941        }
1942    }
1943
1944    /// Reads bytes starting at `addr`, continuing until either `bytes.len()` bytes have been read
1945    /// or no more bytes can be read.
1946    ///
1947    /// This is used, for example, to read null-terminated strings where the exact length is not
1948    /// known, only the maximum length is.
1949    ///
1950    /// # Parameters
1951    /// - `addr`: The address to read data from.
1952    /// - `bytes`: The byte array to read into.
1953    fn read_memory_partial<'a>(
1954        &self,
1955        addr: UserAddress,
1956        bytes: &'a mut [MaybeUninit<u8>],
1957        context: &MappingContext,
1958    ) -> Result<&'a mut [u8], Errno> {
1959        let mut bytes_read = 0;
1960        for (mapping, len) in self.get_contiguous_mappings_at(addr, bytes.len(), context)? {
1961            let next_offset = bytes_read + len;
1962            if self
1963                .read_mapping_memory(
1964                    (addr + bytes_read)?,
1965                    mapping,
1966                    &mut bytes[bytes_read..next_offset],
1967                    context,
1968                )
1969                .is_err()
1970            {
1971                break;
1972            }
1973            bytes_read = next_offset;
1974        }
1975
1976        // If at least one byte was requested but we got none, it means that `addr` was invalid.
1977        if !bytes.is_empty() && bytes_read == 0 {
1978            error!(EFAULT)
1979        } else {
1980            // SAFETY: The created slice is properly aligned/sized since it
1981            // is a subset of the `bytes` slice. Note that `MaybeUninit<T>` has
1982            // the same layout as `T`. Also note that `bytes_read` bytes have
1983            // been properly initialized.
1984            let bytes = unsafe {
1985                std::slice::from_raw_parts_mut(bytes.as_mut_ptr() as *mut u8, bytes_read)
1986            };
1987            Ok(bytes)
1988        }
1989    }
1990
1991    /// Like `read_memory_partial` but only returns the bytes up to and including
1992    /// a null (zero) byte.
1993    fn read_memory_partial_until_null_byte<'a>(
1994        &self,
1995        addr: UserAddress,
1996        bytes: &'a mut [MaybeUninit<u8>],
1997        context: &MappingContext,
1998    ) -> Result<&'a mut [u8], Errno> {
1999        let read_bytes = self.read_memory_partial(addr, bytes, context)?;
2000        let max_len = memchr::memchr(b'\0', read_bytes)
2001            .map_or_else(|| read_bytes.len(), |null_index| null_index + 1);
2002        Ok(&mut read_bytes[..max_len])
2003    }
2004
2005    /// Writes the provided bytes.
2006    ///
2007    /// In case of success, the number of bytes written will always be `bytes.len()`.
2008    ///
2009    /// # Parameters
2010    /// - `addr`: The address to write to.
2011    /// - `bytes`: The bytes to write.
2012    fn write_memory(
2013        &self,
2014        addr: UserAddress,
2015        bytes: &[u8],
2016        context: &MappingContext,
2017    ) -> Result<usize, Errno> {
2018        let mut bytes_written = 0;
2019        for (mapping, len) in self.get_contiguous_mappings_at(addr, bytes.len(), context)? {
2020            let next_offset = bytes_written + len;
2021            self.write_mapping_memory(
2022                (addr + bytes_written)?,
2023                mapping,
2024                &bytes[bytes_written..next_offset],
2025                context,
2026            )?;
2027            bytes_written = next_offset;
2028        }
2029
2030        if bytes_written != bytes.len() { error!(EFAULT) } else { Ok(bytes.len()) }
2031    }
2032
2033    /// Writes the provided bytes to `addr`.
2034    ///
2035    /// # Parameters
2036    /// - `addr`: The address to write to.
2037    /// - `bytes`: The bytes to write to the memory object.
2038    fn write_mapping_memory(
2039        &self,
2040        addr: UserAddress,
2041        mapping: &Mapping,
2042        bytes: &[u8],
2043        context: &MappingContext,
2044    ) -> Result<(), Errno> {
2045        if !mapping.can_write() {
2046            return error!(EFAULT, "write_mapping_memory called on unwritable memory");
2047        }
2048        match self.get_mapping_backing(mapping) {
2049            MappingBacking::Memory(backing) => backing.write_memory(addr, bytes),
2050            MappingBacking::PrivateAnonymous => context.private_anonymous.write_memory(addr, bytes),
2051        }
2052    }
2053
2054    /// Writes bytes starting at `addr`, continuing until either `bytes.len()` bytes have been
2055    /// written or no more bytes can be written.
2056    ///
2057    /// # Parameters
2058    /// - `addr`: The address to read data from.
2059    /// - `bytes`: The byte array to write from.
2060    fn write_memory_partial(
2061        &self,
2062        addr: UserAddress,
2063        bytes: &[u8],
2064        context: &MappingContext,
2065    ) -> Result<usize, Errno> {
2066        let mut bytes_written = 0;
2067        for (mapping, len) in self.get_contiguous_mappings_at(addr, bytes.len(), context)? {
2068            let next_offset = bytes_written + len;
2069            if self
2070                .write_mapping_memory(
2071                    (addr + bytes_written)?,
2072                    mapping,
2073                    &bytes[bytes_written..next_offset],
2074                    context,
2075                )
2076                .is_err()
2077            {
2078                break;
2079            }
2080            bytes_written = next_offset;
2081        }
2082
2083        if !bytes.is_empty() && bytes_written == 0 { error!(EFAULT) } else { Ok(bytes.len()) }
2084    }
2085
2086    fn zero(
2087        &self,
2088        addr: UserAddress,
2089        length: usize,
2090        context: &MappingContext,
2091    ) -> Result<usize, Errno> {
2092        let mut bytes_written = 0;
2093        for (mapping, len) in self.get_contiguous_mappings_at(addr, length, context)? {
2094            let next_offset = bytes_written + len;
2095            if self.zero_mapping((addr + bytes_written)?, mapping, len, context).is_err() {
2096                break;
2097            }
2098            bytes_written = next_offset;
2099        }
2100
2101        if length != bytes_written { error!(EFAULT) } else { Ok(length) }
2102    }
2103
2104    fn zero_mapping(
2105        &self,
2106        addr: UserAddress,
2107        mapping: &Mapping,
2108        length: usize,
2109        context: &MappingContext,
2110    ) -> Result<usize, Errno> {
2111        if !mapping.can_write() {
2112            return error!(EFAULT);
2113        }
2114
2115        match self.get_mapping_backing(mapping) {
2116            MappingBacking::Memory(backing) => backing.zero(addr, length),
2117            MappingBacking::PrivateAnonymous => context.private_anonymous.zero(addr, length),
2118        }
2119    }
2120
2121    pub fn create_memory_backing(
2122        &self,
2123        base: UserAddress,
2124        memory: Arc<MemoryObject>,
2125        memory_offset: u64,
2126    ) -> MappingBacking {
2127        MappingBacking::Memory(Box::new(MappingBackingMemory::new(base, memory, memory_offset)))
2128    }
2129
2130    pub fn get_mapping_backing<'a>(&self, mapping: &'a Mapping) -> &'a MappingBacking {
2131        mapping.get_backing_internal()
2132    }
2133
2134    fn get_aio_context(&self, addr: UserAddress) -> Option<(Range<UserAddress>, Arc<AioContext>)> {
2135        let Some((range, mapping)) = self.mappings.get(addr) else {
2136            return None;
2137        };
2138        let MappingNameRef::AioContext(ref aio_context) = mapping.name() else {
2139            return None;
2140        };
2141        if !mapping.can_read() {
2142            return None;
2143        }
2144        Some((range.clone(), Arc::clone(aio_context)))
2145    }
2146
2147    fn find_uffd<L>(&self, locked: &mut Locked<L>, addr: UserAddress) -> Option<Arc<UserFault>>
2148    where
2149        L: LockBefore<UserFaultInner>,
2150    {
2151        for userfault in self.userfaultfds.iter() {
2152            if let Some(userfault) = userfault.upgrade() {
2153                if userfault.contains_addr(locked, addr) {
2154                    return Some(userfault);
2155                }
2156            }
2157        }
2158        None
2159    }
2160
2161    fn cache_flush(
2162        &self,
2163        range: Range<UserAddress>,
2164        context: &MappingContext,
2165    ) -> Result<(), Errno> {
2166        let mut addr = range.start;
2167        let size = range.end - range.start;
2168        for (mapping, len) in self.get_contiguous_mappings_at(addr, size, context)? {
2169            if !mapping.can_read() {
2170                return error!(EFAULT);
2171            }
2172            if mapping.mapping_mode() == MappingMode::Lazy {
2173                addr = (addr + len)?;
2174                continue;
2175            }
2176            // SAFETY: This is operating on a readable restricted mode mapping and will not fault.
2177            zx::Status::ok(unsafe {
2178                zx::sys::zx_cache_flush(
2179                    addr.ptr() as *const u8,
2180                    len,
2181                    zx::sys::ZX_CACHE_FLUSH_DATA | zx::sys::ZX_CACHE_FLUSH_INSN,
2182                )
2183            })
2184            .map_err(impossible_error)?;
2185
2186            addr = (addr + len).unwrap(); // unwrap since we're iterating within the address space.
2187        }
2188        // Did we flush the entire range?
2189        if addr != range.end { error!(EFAULT) } else { Ok(()) }
2190    }
2191
2192    /// Register the address space managed by this memory manager for interest in
2193    /// receiving private expedited memory barriers of the given kind.
2194    pub fn register_membarrier_private_expedited(
2195        &mut self,
2196        mtype: MembarrierType,
2197    ) -> Result<(), Errno> {
2198        let registrations = &mut self.forkable_state.membarrier_registrations;
2199        match mtype {
2200            MembarrierType::Memory => {
2201                registrations.memory = true;
2202            }
2203            MembarrierType::SyncCore => {
2204                registrations.sync_core = true;
2205            }
2206        }
2207        Ok(())
2208    }
2209
2210    /// Checks if the address space managed by this memory manager is registered
2211    /// for interest in private expedited barriers of the given kind.
2212    pub fn membarrier_private_expedited_registered(&self, mtype: MembarrierType) -> bool {
2213        let registrations = &self.forkable_state.membarrier_registrations;
2214        match mtype {
2215            MembarrierType::Memory => registrations.memory,
2216            MembarrierType::SyncCore => registrations.sync_core,
2217        }
2218    }
2219
2220    fn force_write_memory(
2221        &mut self,
2222        context: &MappingContext,
2223        addr: UserAddress,
2224        bytes: &[u8],
2225        released_mappings: &mut ReleasedMappings,
2226    ) -> Result<(), Errno> {
2227        let (range, mapping) = {
2228            let (r, m) = self.mappings.get(addr).ok_or_else(|| errno!(EFAULT))?;
2229            (r.clone(), m.clone())
2230        };
2231        if range.end < addr.saturating_add(bytes.len()) {
2232            track_stub!(
2233                TODO("https://fxbug.dev/445790710"),
2234                "ptrace poke across multiple mappings"
2235            );
2236            return error!(EFAULT);
2237        }
2238
2239        // Don't create CoW copy of shared memory, go through regular syscall writing.
2240        if mapping.flags().contains(MappingFlags::SHARED) {
2241            if !mapping.can_write() {
2242                // Linux returns EIO here instead of EFAULT.
2243                return error!(EIO);
2244            }
2245            return self.write_mapping_memory(addr, &mapping, &bytes, context);
2246        }
2247
2248        let backing = match self.get_mapping_backing(&mapping) {
2249            MappingBacking::PrivateAnonymous => {
2250                // Starnix has a writable handle to private anonymous memory.
2251                return context.private_anonymous.write_memory(addr, &bytes);
2252            }
2253            MappingBacking::Memory(backing) => backing,
2254        };
2255
2256        let vmo = backing.memory().as_vmo().ok_or_else(|| errno!(EFAULT))?;
2257        let addr_offset = backing.address_to_offset(addr);
2258        let can_exec =
2259            vmo.basic_info().expect("get VMO handle info").rights.contains(Rights::EXECUTE);
2260
2261        // Attempt to write to existing VMO
2262        match vmo.write(&bytes, addr_offset) {
2263            Ok(()) => {
2264                if can_exec {
2265                    // Issue a barrier to avoid executing stale instructions.
2266                    system_barrier(BarrierType::InstructionStream);
2267                }
2268                return Ok(());
2269            }
2270
2271            Err(zx::Status::ACCESS_DENIED) => { /* Fall through */ }
2272
2273            Err(status) => {
2274                return Err(MemoryManager::get_errno_for_vmo_err(status));
2275            }
2276        }
2277
2278        // Create a CoW child of the entire VMO and swap with the backing.
2279        let mapping_offset = backing.address_to_offset(range.start);
2280        let len = range.end - range.start;
2281
2282        // 1. Obtain a writable child of the VMO.
2283        let size = vmo.get_size().map_err(MemoryManager::get_errno_for_vmo_err)?;
2284        let child_vmo = vmo
2285            .create_child(VmoChildOptions::SNAPSHOT_AT_LEAST_ON_WRITE, 0, size)
2286            .map_err(MemoryManager::get_errno_for_vmo_err)?;
2287
2288        // 2. Modify the memory.
2289        child_vmo.write(&bytes, addr_offset).map_err(MemoryManager::get_errno_for_vmo_err)?;
2290
2291        // 3. If needed, remint the VMO as executable. Zircon flushes instruction caches when
2292        // mapping executable memory below, so a barrier isn't necessary here.
2293        let child_vmo = if can_exec {
2294            child_vmo
2295                .replace_as_executable(&VMEX_RESOURCE)
2296                .map_err(MemoryManager::get_errno_for_vmo_err)?
2297        } else {
2298            child_vmo
2299        };
2300
2301        // Ensure that the mapping that `addr` falls into is mapped in the user VMAR.
2302        // This ensures that the mapping's mode becomes `Eager` (if it was `Lazy`),
2303        // otherwise, we might clone a `Lazy` mapping but map it unconditionally below,
2304        // leading to state drift where a mapping is mapped in Zircon but marked as lazy in Starnix.
2305        self.ensure_range_mapped_in_user_vmar(addr, None, context)?;
2306
2307        // 4. Map the new VMO into user VMAR
2308        let memory = Arc::new(MemoryObject::from(child_vmo));
2309        context.map_in_user_vmar(
2310            SelectedAddress::FixedOverwrite(range.start),
2311            &memory,
2312            mapping_offset,
2313            len,
2314            mapping.flags(),
2315            false,
2316        )?;
2317
2318        // 5. Update mappings
2319        let new_backing = MappingBackingMemory::new(range.start, memory, mapping_offset);
2320
2321        let mut new_mapping = mapping.clone();
2322        new_mapping.set_backing_internal(MappingBacking::Memory(Box::new(new_backing)));
2323
2324        released_mappings.extend(self.mappings.insert(range, new_mapping));
2325
2326        Ok(())
2327    }
2328
2329    fn set_brk<L>(
2330        &mut self,
2331        locked: &mut Locked<L>,
2332        current_task: &CurrentTask,
2333        mm: &Arc<MemoryManager>,
2334        addr: UserAddress,
2335        released_mappings: &mut ReleasedMappings,
2336    ) -> Result<UserAddress, Errno>
2337    where
2338        L: LockBefore<ThreadGroupLimits>,
2339    {
2340        let rlimit_data = std::cmp::min(
2341            PROGRAM_BREAK_LIMIT,
2342            current_task.thread_group().get_rlimit(locked, Resource::DATA),
2343        );
2344
2345        let brk = match self.brk.clone() {
2346            None => {
2347                let brk = ProgramBreak { base: self.brk_origin, current: self.brk_origin };
2348                self.brk = Some(brk.clone());
2349                brk
2350            }
2351            Some(brk) => brk,
2352        };
2353
2354        let Ok(last_address) = brk.base + rlimit_data else {
2355            // The requested program break is out-of-range. We're supposed to simply
2356            // return the current program break.
2357            return Ok(brk.current);
2358        };
2359
2360        if addr < brk.base || addr > last_address {
2361            // The requested program break is out-of-range. We're supposed to simply
2362            // return the current program break.
2363            return Ok(brk.current);
2364        }
2365
2366        let old_end = brk.current.round_up(*PAGE_SIZE).unwrap();
2367        let new_end = addr.round_up(*PAGE_SIZE).unwrap();
2368
2369        match new_end.cmp(&old_end) {
2370            std::cmp::Ordering::Less => {
2371                // Shrinking the program break removes any mapped pages in the
2372                // affected range, regardless of whether they were actually program
2373                // break pages, or other mappings.
2374                let delta = old_end - new_end;
2375
2376                if self.unmap(mm, new_end, delta, released_mappings).is_err() {
2377                    return Ok(brk.current);
2378                }
2379            }
2380            std::cmp::Ordering::Greater => {
2381                let range = old_end..new_end;
2382                let delta = new_end - old_end;
2383
2384                // Check for mappings over the program break region.
2385                if self.mappings.range(range).next().is_some() {
2386                    return Ok(brk.current);
2387                }
2388
2389                if self
2390                    .map_anonymous(
2391                        mm,
2392                        DesiredAddress::FixedOverwrite(old_end),
2393                        delta,
2394                        ProtectionFlags::READ | ProtectionFlags::WRITE,
2395                        MappingOptions::ANONYMOUS,
2396                        MappingName::Heap,
2397                        released_mappings,
2398                    )
2399                    .is_err()
2400                {
2401                    return Ok(brk.current);
2402                }
2403            }
2404            _ => {}
2405        };
2406
2407        // Any required updates to the program break succeeded, so update internal state.
2408        let mut new_brk = brk;
2409        new_brk.current = addr;
2410        self.brk = Some(new_brk);
2411
2412        Ok(addr)
2413    }
2414
2415    fn register_with_uffd<L>(
2416        &mut self,
2417        mm: &MemoryManager,
2418        locked: &mut Locked<L>,
2419        addr: UserAddress,
2420        length: usize,
2421        userfault: &Arc<UserFault>,
2422        mode: FaultRegisterMode,
2423        released_mappings: &mut ReleasedMappings,
2424    ) -> Result<(), Errno>
2425    where
2426        L: LockBefore<UserFaultInner>,
2427    {
2428        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
2429        let range_for_op = addr..end_addr;
2430        let mut updates = vec![];
2431
2432        for (range, mapping) in self.mappings.range(range_for_op.clone()) {
2433            if !mapping.private_anonymous() {
2434                track_stub!(TODO("https://fxbug.dev/391599171"), "uffd for shmem and hugetlbfs");
2435                return error!(EINVAL);
2436            }
2437            if mapping.flags().contains(MappingFlags::UFFD) {
2438                return error!(EBUSY);
2439            }
2440            let range = range.intersect(&range_for_op);
2441            let mut mapping = mapping.clone();
2442            mapping.set_uffd(mode);
2443            updates.push((range, mapping));
2444        }
2445        if updates.is_empty() {
2446            return error!(EINVAL);
2447        }
2448
2449        mm.protect_vmar_range(addr, length, ProtectionFlags::empty())
2450            .expect("Failed to remove protections on uffd-registered range");
2451
2452        // Use a separate loop to avoid mutating the mappings structure while iterating over it.
2453        for (range, mapping) in updates {
2454            released_mappings.extend(self.mappings.insert(range, mapping));
2455        }
2456
2457        userfault.insert_pages(locked, range_for_op, false);
2458
2459        Ok(())
2460    }
2461
2462    fn unregister_range_from_uffd<L>(
2463        &mut self,
2464        mm: &MemoryManager,
2465        locked: &mut Locked<L>,
2466        userfault: &Arc<UserFault>,
2467        addr: UserAddress,
2468        length: usize,
2469        released_mappings: &mut ReleasedMappings,
2470    ) -> Result<(), Errno>
2471    where
2472        L: LockBefore<UserFaultInner>,
2473    {
2474        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
2475        let range_for_op = addr..end_addr;
2476        let mut updates = vec![];
2477
2478        for (range, mapping) in self.mappings.range(range_for_op.clone()) {
2479            if !mapping.private_anonymous() {
2480                track_stub!(TODO("https://fxbug.dev/391599171"), "uffd for shmem and hugetlbfs");
2481                return error!(EINVAL);
2482            }
2483            if mapping.flags().contains(MappingFlags::UFFD) {
2484                let range = range.intersect(&range_for_op);
2485                if userfault.remove_pages(locked, range.clone()) {
2486                    let mut mapping = mapping.clone();
2487                    mapping.clear_uffd();
2488                    updates.push((range, mapping));
2489                }
2490            }
2491        }
2492        for (range, mapping) in updates {
2493            let length = range.end - range.start;
2494            let restored_flags = mapping.flags().access_flags();
2495
2496            released_mappings.extend(self.mappings.insert(range.clone(), mapping));
2497
2498            mm.protect_vmar_range(range.start, length, restored_flags)
2499                .expect("Failed to restore original protection bits on uffd-registered range");
2500        }
2501        Ok(())
2502    }
2503
2504    fn unregister_uffd<L>(
2505        &mut self,
2506        mm: &MemoryManager,
2507        locked: &mut Locked<L>,
2508        userfault: &Arc<UserFault>,
2509        released_mappings: &mut ReleasedMappings,
2510    ) where
2511        L: LockBefore<UserFaultInner>,
2512    {
2513        let mut updates = vec![];
2514
2515        for (range, mapping) in self.mappings.iter() {
2516            if mapping.flags().contains(MappingFlags::UFFD) {
2517                for range in userfault.get_registered_pages_overlapping_range(locked, range.clone())
2518                {
2519                    let mut mapping = mapping.clone();
2520                    mapping.clear_uffd();
2521                    updates.push((range.clone(), mapping));
2522                }
2523            }
2524        }
2525        // Use a separate loop to avoid mutating the mappings structure while iterating over it.
2526        for (range, mapping) in updates {
2527            let length = range.end - range.start;
2528            let restored_flags = mapping.flags().access_flags();
2529            released_mappings.extend(self.mappings.insert(range.clone(), mapping));
2530            // We can't recover from an error here as this is run during the cleanup.
2531            mm.protect_vmar_range(range.start, length, restored_flags)
2532                .expect("Failed to restore original protection bits on uffd-registered range");
2533        }
2534
2535        userfault.remove_pages(
2536            locked,
2537            UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
2538                ..UserAddress::from_ptr(RESTRICTED_ASPACE_HIGHEST_ADDRESS),
2539        );
2540
2541        let weak_userfault = Arc::downgrade(userfault);
2542        self.userfaultfds.retain(|uf| !Weak::ptr_eq(uf, &weak_userfault));
2543    }
2544
2545    fn set_mapping_name(
2546        &mut self,
2547        addr: UserAddress,
2548        length: usize,
2549        name: Option<FsString>,
2550        released_mappings: &mut ReleasedMappings,
2551    ) -> Result<(), Errno> {
2552        if addr.ptr() % *PAGE_SIZE as usize != 0 {
2553            return error!(EINVAL);
2554        }
2555        let end = match addr.checked_add(length) {
2556            Some(addr) => addr.round_up(*PAGE_SIZE).map_err(|_| errno!(ENOMEM))?,
2557            None => return error!(EINVAL),
2558        };
2559
2560        let mappings_in_range = self
2561            .mappings
2562            .map
2563            .range(addr..end)
2564            .map(|(r, m)| (r.clone(), m.clone()))
2565            .collect::<Vec<_>>();
2566
2567        if mappings_in_range.is_empty() {
2568            return error!(EINVAL);
2569        }
2570        if !mappings_in_range.first().unwrap().0.contains(&addr) {
2571            return error!(ENOMEM);
2572        }
2573
2574        let mut last_range_end = None;
2575        // There's no get_mut on RangeMap, because it would be hard to implement correctly in
2576        // combination with merging of adjacent mappings. Instead, make a copy, change the copy,
2577        // and insert the copy.
2578        for (mut range, mut mapping) in mappings_in_range {
2579            if mapping.name().is_file() {
2580                // It's invalid to assign a name to a file-backed mapping.
2581                return error!(EBADF);
2582            }
2583            // Handle mappings that start before the region to be named.
2584            range.start = std::cmp::max(range.start, addr);
2585            // Handle mappings that extend past the region to be named.
2586            range.end = std::cmp::min(range.end, end);
2587
2588            if let Some(last_range_end) = last_range_end {
2589                if last_range_end != range.start {
2590                    // The name must apply to a contiguous range of mapped pages.
2591                    return error!(ENOMEM);
2592                }
2593            }
2594            last_range_end = Some(range.end.round_up(*PAGE_SIZE)?);
2595            // TODO(b/310255065): We have no place to store names in a way visible to programs outside of Starnix
2596            // such as memory analysis tools.
2597            if let MappingBacking::Memory(backing) = self.get_mapping_backing(&mapping) {
2598                match &name {
2599                    Some(memory_name) => {
2600                        backing.memory().set_zx_name(memory_name);
2601                    }
2602                    None => {
2603                        backing.memory().set_zx_name(b"");
2604                    }
2605                }
2606            }
2607            mapping.set_name(match &name {
2608                Some(name) => MappingName::Vma(FlyByteStr::new(name.as_bytes())),
2609                None => MappingName::None,
2610            });
2611            released_mappings.extend(self.mappings.insert(range, mapping));
2612        }
2613        if let Some(last_range_end) = last_range_end {
2614            if last_range_end < end {
2615                // The name must apply to a contiguous range of mapped pages.
2616                return error!(ENOMEM);
2617            }
2618        }
2619        Ok(())
2620    }
2621}
2622
2623/// The memory pinning shadow process used for mlock().
2624///
2625/// Uses its own distinct shadow process so that it doesn't interfere with other uses of memory
2626/// pinning.
2627pub struct MlockShadowProcess(memory_pinning::ShadowProcess);
2628
2629/// A memory manager for another thread.
2630///
2631/// When accessing memory through this object, we use less efficient codepaths that work across
2632/// address spaces.
2633pub struct RemoteMemoryManager {
2634    mm: Arc<MemoryManager>,
2635}
2636
2637impl RemoteMemoryManager {
2638    fn new(mm: Arc<MemoryManager>) -> Self {
2639        Self { mm }
2640    }
2641}
2642
2643// If we just have a MemoryManager, we cannot assume that its address space is current, which means
2644// we need to use the slower "syscall" mechanism to access its memory.
2645impl MemoryAccessor for RemoteMemoryManager {
2646    fn read_memory<'a>(
2647        &self,
2648        addr: UserAddress,
2649        bytes: &'a mut [MaybeUninit<u8>],
2650    ) -> Result<&'a mut [u8], Errno> {
2651        self.mm.syscall_read_memory(addr, bytes)
2652    }
2653
2654    fn read_memory_partial_until_null_byte<'a>(
2655        &self,
2656        addr: UserAddress,
2657        bytes: &'a mut [MaybeUninit<u8>],
2658    ) -> Result<&'a mut [u8], Errno> {
2659        self.mm.syscall_read_memory_partial_until_null_byte(addr, bytes)
2660    }
2661
2662    fn read_memory_partial<'a>(
2663        &self,
2664        addr: UserAddress,
2665        bytes: &'a mut [MaybeUninit<u8>],
2666    ) -> Result<&'a mut [u8], Errno> {
2667        self.mm.syscall_read_memory_partial(addr, bytes)
2668    }
2669
2670    fn write_memory(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
2671        self.mm.syscall_write_memory(addr, bytes)
2672    }
2673
2674    fn write_memory_partial(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
2675        self.mm.syscall_write_memory_partial(addr, bytes)
2676    }
2677
2678    fn zero(&self, addr: UserAddress, length: usize) -> Result<usize, Errno> {
2679        self.mm.syscall_zero(addr, length)
2680    }
2681}
2682
2683impl TaskMemoryAccessor for RemoteMemoryManager {
2684    fn maximum_valid_address(&self) -> Option<UserAddress> {
2685        Some(self.mm.maximum_valid_user_address)
2686    }
2687}
2688
2689impl MemoryManager {
2690    /// Ensures that any mapping at `addr` is actually mapped at in the user vmar.
2691    ///
2692    /// If `length` is `None`, it will ensure the mapping only on the page `addr` falls into.
2693    /// Returns `true` if any lazy mappings are mapped.
2694    pub fn ensure_range_mapped_in_user_vmar(
2695        &self,
2696        addr: UserAddress,
2697        length: Option<usize>,
2698    ) -> Result<bool, Errno> {
2699        self.state.write().ensure_ranges_mapped_in_user_vmar(
2700            std::iter::once((addr, length)),
2701            &self.mapping_context,
2702        )
2703    }
2704
2705    /// Ensures that any mappings in the specified ranges are actually mapped in the user vmar.
2706    ///
2707    /// If `length` is `None`, it will ensure the mapping only on the page `addr` falls into.
2708    /// Returns `true` if any lazy mappings are mapped.
2709    pub fn ensure_ranges_mapped_in_user_vmar<I>(&self, ranges: I) -> Result<bool, Errno>
2710    where
2711        I: IntoIterator<Item = (UserAddress, Option<usize>)>,
2712    {
2713        self.state.write().ensure_ranges_mapped_in_user_vmar(ranges, &self.mapping_context)
2714    }
2715
2716    pub fn mrelease(&self) -> Result<(), Errno> {
2717        self.mapping_context.private_anonymous.zero(
2718            UserAddress::from_ptr(self.mapping_context.user_vmar_info.base),
2719            self.mapping_context.user_vmar_info.len,
2720        )?;
2721        Ok(())
2722    }
2723
2724    pub fn summarize(&self, summary: &mut crate::mm::MappingSummary) {
2725        let state = self.state.read();
2726        for (_, mapping) in state.mappings.iter() {
2727            summary.add(&state, mapping);
2728        }
2729    }
2730
2731    pub fn get_mappings_for_vmsplice(
2732        self: &Arc<MemoryManager>,
2733        buffers: &UserBuffers,
2734    ) -> Result<Vec<Arc<VmsplicePayload>>, Errno> {
2735        self.state.read().get_mappings_for_vmsplice(self, buffers)
2736    }
2737
2738    pub fn has_same_address_space(&self, other: &Self) -> bool {
2739        std::ptr::eq(self, other)
2740    }
2741
2742    fn unified_transfer_loop<F>(
2743        &self,
2744        addr: UserAddress,
2745        len: usize,
2746        mut transfer_fn: F,
2747    ) -> Result<usize, Errno>
2748    where
2749        F: FnMut(UserAddress, usize) -> Result<ControlFlow<usize, usize>, Errno>,
2750    {
2751        let mut copied = 0;
2752        while copied < len {
2753            match transfer_fn((addr + copied)?, copied)? {
2754                ControlFlow::Continue(num_copied) => {
2755                    if num_copied == 0 {
2756                        let fault_addr = (addr + copied)?;
2757                        // If we successfully mapped a lazy mapping, retry the copy.
2758                        // Otherwise, this might be a permission fault or invalid address, so we
2759                        // stop and return the partial result.
2760                        //
2761                        // NOTE: We lazily materialize mappings one page at a time here.
2762                        // An alternative approach would be to materialize the entire range
2763                        // or the first mapping up front. That might avoid bouncing between
2764                        // threads on faults, but adds overhead (locks and range lookups)
2765                        // if the memory is already mapped. We use the reactive approach
2766                        // for now, but this could be tuned in the future.
2767                        if self.ensure_range_mapped_in_user_vmar(fault_addr, None)? {
2768                            continue;
2769                        } else {
2770                            break;
2771                        }
2772                    }
2773                    copied += num_copied;
2774                }
2775                ControlFlow::Break(num_copied) => {
2776                    copied += num_copied;
2777                    break;
2778                }
2779            }
2780        }
2781        Ok(copied)
2782    }
2783
2784    pub fn unified_read_memory<'a>(
2785        &self,
2786        current_task: &CurrentTask,
2787        addr: UserAddress,
2788        bytes: &'a mut [MaybeUninit<u8>],
2789    ) -> Result<&'a mut [u8], Errno> {
2790        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2791
2792        if let Some(usercopy) = usercopy() {
2793            let buf_ptr = bytes.as_mut_ptr();
2794            let buf_len = bytes.len();
2795
2796            let copied = self.unified_transfer_loop(addr, buf_len, |cur_addr, offset| {
2797                // SAFETY: Exclusive access to `bytes` for the lifetime of this function.
2798                let current_bytes = unsafe {
2799                    std::slice::from_raw_parts_mut(buf_ptr.add(offset), buf_len - offset)
2800                };
2801                let (read_bytes, _unread_bytes) = usercopy.copyin(cur_addr.ptr(), current_bytes);
2802                Ok(ControlFlow::Continue(read_bytes.len()))
2803            })?;
2804            if copied < bytes.len() {
2805                error!(EFAULT)
2806            } else {
2807                // SAFETY: All bytes up to `buf_len` have been initialized.
2808                Ok(unsafe { std::slice::from_raw_parts_mut(buf_ptr as *mut u8, buf_len) })
2809            }
2810        } else {
2811            self.syscall_read_memory(addr, bytes)
2812        }
2813    }
2814
2815    pub fn syscall_read_memory<'a>(
2816        &self,
2817        addr: UserAddress,
2818        bytes: &'a mut [MaybeUninit<u8>],
2819    ) -> Result<&'a mut [u8], Errno> {
2820        self.state.read().read_memory(addr, bytes, &self.mapping_context)
2821    }
2822
2823    pub fn unified_read_memory_partial_until_null_byte<'a>(
2824        &self,
2825        current_task: &CurrentTask,
2826        addr: UserAddress,
2827        bytes: &'a mut [MaybeUninit<u8>],
2828    ) -> Result<&'a mut [u8], Errno> {
2829        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2830
2831        if let Some(usercopy) = usercopy() {
2832            let buf_ptr = bytes.as_mut_ptr();
2833            let buf_len = bytes.len();
2834
2835            let copied = self.unified_transfer_loop(addr, buf_len, |cur_addr, offset| {
2836                // SAFETY: Exclusive access to `bytes` for the lifetime of this function.
2837                let current_bytes = unsafe {
2838                    std::slice::from_raw_parts_mut(buf_ptr.add(offset), buf_len - offset)
2839                };
2840                let (read_bytes, _unread_bytes) =
2841                    usercopy.copyin_until_null_byte(cur_addr.ptr(), current_bytes);
2842
2843                let num_copied = read_bytes.len();
2844                if read_bytes.last().map(|b| *b == 0).unwrap_or(false) {
2845                    Ok(ControlFlow::Break(num_copied))
2846                } else {
2847                    Ok(ControlFlow::Continue(num_copied))
2848                }
2849            })?;
2850            if copied == 0 && !bytes.is_empty() {
2851                error!(EFAULT)
2852            } else {
2853                // SAFETY: Bytes up to `copied` have been initialized.
2854                Ok(unsafe { std::slice::from_raw_parts_mut(buf_ptr as *mut u8, copied) })
2855            }
2856        } else {
2857            self.syscall_read_memory_partial_until_null_byte(addr, bytes)
2858        }
2859    }
2860
2861    pub fn syscall_read_memory_partial_until_null_byte<'a>(
2862        &self,
2863        addr: UserAddress,
2864        bytes: &'a mut [MaybeUninit<u8>],
2865    ) -> Result<&'a mut [u8], Errno> {
2866        self.state.read().read_memory_partial_until_null_byte(addr, bytes, &self.mapping_context)
2867    }
2868
2869    pub fn unified_read_memory_partial<'a>(
2870        &self,
2871        current_task: &CurrentTask,
2872        addr: UserAddress,
2873        bytes: &'a mut [MaybeUninit<u8>],
2874    ) -> Result<&'a mut [u8], Errno> {
2875        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2876
2877        if let Some(usercopy) = usercopy() {
2878            let buf_ptr = bytes.as_mut_ptr();
2879            let buf_len = bytes.len();
2880
2881            let copied = self.unified_transfer_loop(addr, buf_len, |cur_addr, offset| {
2882                // SAFETY: Exclusive access to `bytes` for the lifetime of this function.
2883                let current_bytes = unsafe {
2884                    std::slice::from_raw_parts_mut(buf_ptr.add(offset), buf_len - offset)
2885                };
2886                let (read_bytes, _unread_bytes) = usercopy.copyin(cur_addr.ptr(), current_bytes);
2887                Ok(ControlFlow::Continue(read_bytes.len()))
2888            })?;
2889            if copied == 0 && !bytes.is_empty() {
2890                error!(EFAULT)
2891            } else {
2892                // SAFETY: Bytes up to `copied` have been initialized.
2893                Ok(unsafe { std::slice::from_raw_parts_mut(buf_ptr as *mut u8, copied) })
2894            }
2895        } else {
2896            self.syscall_read_memory_partial(addr, bytes)
2897        }
2898    }
2899
2900    pub fn syscall_read_memory_partial<'a>(
2901        &self,
2902        addr: UserAddress,
2903        bytes: &'a mut [MaybeUninit<u8>],
2904    ) -> Result<&'a mut [u8], Errno> {
2905        self.state.read().read_memory_partial(addr, bytes, &self.mapping_context)
2906    }
2907
2908    pub fn unified_write_memory(
2909        &self,
2910        current_task: &CurrentTask,
2911        addr: UserAddress,
2912        bytes: &[u8],
2913    ) -> Result<usize, Errno> {
2914        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2915
2916        if let Some(usercopy) = usercopy() {
2917            let len = bytes.len();
2918            let copied = self.unified_transfer_loop(addr, len, |cur_addr, offset| {
2919                Ok(ControlFlow::Continue(usercopy.copyout(&bytes[offset..], cur_addr.ptr())))
2920            })?;
2921            if copied < bytes.len() { error!(EFAULT) } else { Ok(copied) }
2922        } else {
2923            self.syscall_write_memory(addr, bytes)
2924        }
2925    }
2926
2927    /// Write `bytes` to memory address `addr`, making a copy-on-write child of the VMO backing and
2928    /// replacing the mapping if necessary.
2929    ///
2930    /// NOTE: this bypasses userspace's memory protection configuration and should only be called
2931    /// by codepaths like ptrace which bypass memory protection.
2932    pub fn force_write_memory(&self, addr: UserAddress, bytes: &[u8]) -> Result<(), Errno> {
2933        let mut state = self.state.write();
2934        let mut released_mappings = ReleasedMappings::default();
2935        let result =
2936            state.force_write_memory(&self.mapping_context, addr, bytes, &mut released_mappings);
2937        released_mappings.finalize(state);
2938        result
2939    }
2940
2941    pub fn syscall_write_memory(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
2942        self.state.read().write_memory(addr, bytes, &self.mapping_context)
2943    }
2944
2945    pub fn unified_write_memory_partial(
2946        &self,
2947        current_task: &CurrentTask,
2948        addr: UserAddress,
2949        bytes: &[u8],
2950    ) -> Result<usize, Errno> {
2951        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2952
2953        if let Some(usercopy) = usercopy() {
2954            let len = bytes.len();
2955            let copied = self.unified_transfer_loop(addr, len, |cur_addr, offset| {
2956                Ok(ControlFlow::Continue(usercopy.copyout(&bytes[offset..], cur_addr.ptr())))
2957            })?;
2958            if copied == 0 && !bytes.is_empty() { error!(EFAULT) } else { Ok(copied) }
2959        } else {
2960            self.syscall_write_memory_partial(addr, bytes)
2961        }
2962    }
2963
2964    pub fn syscall_write_memory_partial(
2965        &self,
2966        addr: UserAddress,
2967        bytes: &[u8],
2968    ) -> Result<usize, Errno> {
2969        self.state.read().write_memory_partial(addr, bytes, &self.mapping_context)
2970    }
2971
2972    pub fn unified_zero(
2973        &self,
2974        current_task: &CurrentTask,
2975        addr: UserAddress,
2976        length: usize,
2977    ) -> Result<usize, Errno> {
2978        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2979
2980        {
2981            let page_size = *PAGE_SIZE as usize;
2982            // Get the page boundary immediately following `addr` if `addr` is
2983            // not page aligned.
2984            let next_page_boundary = round_up_to_system_page_size(addr.ptr())?;
2985            // The number of bytes needed to zero at least a full page (not just
2986            // a pages worth of bytes) starting at `addr`.
2987            let length_with_atleast_one_full_page = page_size + (next_page_boundary - addr.ptr());
2988            // If at least one full page is being zeroed, go through the memory object since Zircon
2989            // can swap the mapped pages with the zero page which should be cheaper than zeroing
2990            // out a pages worth of bytes manually.
2991            //
2992            // If we are not zeroing out a full page, then go through usercopy
2993            // if unified aspaces is enabled.
2994            if length >= length_with_atleast_one_full_page {
2995                return self.syscall_zero(addr, length);
2996            }
2997        }
2998
2999        if let Some(usercopy) = usercopy() {
3000            let copied = self.unified_transfer_loop(addr, length, |cur_addr, offset| {
3001                Ok(ControlFlow::Continue(usercopy.zero(cur_addr.ptr(), length - offset)))
3002            })?;
3003            if copied == 0 && length > 0 { error!(EFAULT) } else { Ok(copied) }
3004        } else {
3005            self.syscall_zero(addr, length)
3006        }
3007    }
3008
3009    pub fn syscall_zero(&self, addr: UserAddress, length: usize) -> Result<usize, Errno> {
3010        self.state.read().zero(addr, length, &self.mapping_context)
3011    }
3012
3013    /// Obtain a reference to this memory manager that can be used from another thread.
3014    pub fn as_remote(self: &Arc<Self>) -> RemoteMemoryManager {
3015        RemoteMemoryManager::new(self.clone())
3016    }
3017
3018    /// Performs a data and instruction cache flush over the given address range.
3019    pub fn cache_flush(&self, range: Range<UserAddress>) -> Result<(), Errno> {
3020        self.state.read().cache_flush(range, &self.mapping_context)
3021    }
3022
3023    /// Register the address space managed by this memory manager for interest in
3024    /// receiving private expedited memory barriers of the given type.
3025    pub fn register_membarrier_private_expedited(
3026        &self,
3027        mtype: MembarrierType,
3028    ) -> Result<(), Errno> {
3029        self.state.write().register_membarrier_private_expedited(mtype)
3030    }
3031
3032    /// Checks if the address space managed by this memory manager is registered
3033    /// for interest in private expedited barriers of the given kind.
3034    pub fn membarrier_private_expedited_registered(&self, mtype: MembarrierType) -> bool {
3035        self.state.read().membarrier_private_expedited_registered(mtype)
3036    }
3037}
3038
3039/// State and resources of the `MemoryManager` that are either immutable after creation
3040/// or handle their own interior mutability (e.g., `private_anonymous`).
3041///
3042/// This is distinct from `MemoryManagerState` in that the fields here do not require
3043/// acquisition of the `MemoryManager`'s main lock for access. This allows concurrent
3044/// access to these resources without lock contention.
3045///
3046/// This structure primarily holds the Zircon VMAR handle and the manager for private
3047/// anonymous memory, which are the core primitives used to manipulate the address space.
3048pub struct MappingContext {
3049    /// The VMAR in which userspace mappings occur.
3050    ///
3051    /// We map userspace memory in this child VMAR so that we can destroy the
3052    /// entire VMAR during exec.
3053    /// For 32-bit tasks, we limit the user_vmar to correspond to the available memory.
3054    ///
3055    /// This field is set to `ZX_HANDLE_INVALID` when the address-space has been destroyed (e.g. on
3056    /// `exec()`), allowing the value to be pro-actively checked for, or the `ZX_ERR_BAD_HANDLE`
3057    /// status return from Zircon operations handled, to suit the call-site.
3058    pub user_vmar: zx::Vmar,
3059
3060    /// Cached VmarInfo for user_vmar.
3061    pub user_vmar_info: zx::VmarInfo,
3062
3063    /// Memory object backing private, anonymous memory allocations in this address space.
3064    pub private_anonymous: PrivateAnonymousMemoryManager,
3065}
3066
3067impl MappingContext {
3068    fn map_in_user_vmar(
3069        &self,
3070        addr: SelectedAddress,
3071        memory: &MemoryObject,
3072        memory_offset: u64,
3073        length: usize,
3074        flags: MappingFlags,
3075        populate: bool,
3076    ) -> Result<(), Errno> {
3077        map_in_vmar(
3078            &self.user_vmar,
3079            &self.user_vmar_info,
3080            addr,
3081            memory,
3082            memory_offset,
3083            length,
3084            flags,
3085            populate,
3086        )
3087    }
3088
3089    pub fn max_address(&self) -> UserAddress {
3090        UserAddress::from_ptr(self.user_vmar_info.base + self.user_vmar_info.len)
3091    }
3092}
3093
3094pub struct MemoryManager {
3095    /// The base address of the root_vmar.
3096    pub base_addr: UserAddress,
3097
3098    /// The futexes in this address space.
3099    pub futex: Arc<FutexTable<PrivateFutexKey>>,
3100
3101    /// The mapping context for this address space.
3102    pub mapping_context: MappingContext,
3103
3104    /// Mutable state for the memory manager.
3105    pub state: RwLock<MemoryManagerState>,
3106
3107    /// Whether this address space is dumpable.
3108    pub dumpable: OrderedMutex<DumpPolicy, MmDumpable>,
3109
3110    /// Maximum valid user address for this vmar.
3111    pub maximum_valid_user_address: UserAddress,
3112
3113    /// In-flight payloads enqueued to a pipe as a consequence of a `vmsplice(2)`
3114    /// operation.
3115    ///
3116    /// For details on why we need to keep track of in-flight vmspliced payloads,
3117    /// see [`VmsplicePayload`].
3118    ///
3119    /// For details on why this isn't under the `RwLock` protected `MemoryManagerState`,
3120    /// See [`InflightVmsplicedPayloads::payloads`].
3121    pub inflight_vmspliced_payloads: InflightVmsplicedPayloads,
3122
3123    /// A mechanism to be notified when this `MemoryManager` is destroyed.
3124    pub drop_notifier: DropNotifier,
3125}
3126
3127fn check_access_permissions_in_page_fault(
3128    decoded: &PageFaultExceptionReport,
3129    mapping: &Mapping,
3130) -> bool {
3131    let exec_denied = decoded.is_execute && !mapping.can_exec();
3132    let write_denied = decoded.is_write && !mapping.can_write();
3133    let read_denied = (!decoded.is_execute && !decoded.is_write) && !mapping.can_read();
3134    !exec_denied && !write_denied && !read_denied
3135}
3136
3137impl MemoryManager {
3138    /// Returns a new `MemoryManager` suitable for use in tests.
3139    pub fn new_for_test(root_vmar: zx::Unowned<'_, zx::Vmar>, arch_width: ArchWidth) -> Arc<Self> {
3140        Self::new(root_vmar, arch_width, None, None).expect("can create MemoryManager")
3141    }
3142
3143    // Returns details of mappings in the `user_vmar`, or an empty vector if the `user_vmar` has
3144    // been destroyed.
3145    fn with_zx_mappings<R>(
3146        &self,
3147        current_task: &CurrentTask,
3148        op: impl FnOnce(&[zx::MapInfo]) -> R,
3149    ) -> R {
3150        MapInfoCache::get_or_init(current_task)
3151            .expect("must be able to retrieve map info cache")
3152            .with_map_infos(&self.mapping_context.user_vmar, |infos| match infos {
3153                Ok(infos) => op(infos),
3154                Err(_) => op(&[]),
3155            })
3156    }
3157
3158    fn protect_vmar_range(
3159        &self,
3160        addr: UserAddress,
3161        length: usize,
3162        prot_flags: ProtectionFlags,
3163    ) -> Result<(), Errno> {
3164        let vmar_flags = prot_flags.to_vmar_flags();
3165        // SAFETY: Modifying user vmar
3166        unsafe { self.mapping_context.user_vmar.protect(addr.ptr(), length, vmar_flags) }.map_err(
3167            |s| match s {
3168                zx::Status::INVALID_ARGS => errno!(EINVAL),
3169                zx::Status::NOT_FOUND => errno!(ENOMEM),
3170                zx::Status::ACCESS_DENIED => errno!(EACCES),
3171                _ => impossible_error(s),
3172            },
3173        )
3174    }
3175
3176    pub fn total_locked_bytes(&self) -> u64 {
3177        self.state.read().num_locked_bytes(
3178            UserAddress::from(self.mapping_context.user_vmar_info.base as u64)
3179                ..UserAddress::from(
3180                    (self.mapping_context.user_vmar_info.base
3181                        + self.mapping_context.user_vmar_info.len) as u64,
3182                ),
3183        )
3184    }
3185
3186    /// Returns a new `MemoryManager` initialized with a new userspace VMAR matching the specified
3187    /// `arch_width`, under the specified restricted-mode `root_vmar`.  The `executable_node` that
3188    /// the new address-space will execute may optionally be supplied.
3189    fn new(
3190        root_vmar: zx::Unowned<'_, zx::Vmar>,
3191        arch_width: ArchWidth,
3192        executable_node: Option<NamespaceNode>,
3193        private_anonymous: Option<PrivateAnonymousMemoryManager>,
3194    ) -> Result<Arc<Self>, Errno> {
3195        debug_assert!(!root_vmar.is_invalid());
3196
3197        let mut vmar_info = root_vmar.info().map_err(|status| from_status_like_fdio!(status))?;
3198        if arch_width.is_arch32() {
3199            vmar_info.len = (LOWER_4GB_LIMIT.ptr() - vmar_info.base) as usize;
3200        }
3201
3202        let (user_vmar, ptr) = root_vmar
3203            .allocate(
3204                0,
3205                vmar_info.len,
3206                zx::VmarFlags::SPECIFIC
3207                    | zx::VmarFlags::CAN_MAP_SPECIFIC
3208                    | zx::VmarFlags::CAN_MAP_READ
3209                    | zx::VmarFlags::CAN_MAP_WRITE
3210                    | zx::VmarFlags::CAN_MAP_EXECUTE,
3211            )
3212            .map_err(|status| from_status_like_fdio!(status))?;
3213        assert_eq!(ptr, vmar_info.base);
3214
3215        let user_vmar_info = user_vmar.info().map_err(|status| from_status_like_fdio!(status))?;
3216
3217        // Ensure that the `user_vmar_info` matches assumptions for the requested layout.
3218        debug_assert_eq!(RESTRICTED_ASPACE_BASE, user_vmar_info.base);
3219        if arch_width.is_arch32() {
3220            debug_assert_eq!(LOWER_4GB_LIMIT.ptr() - user_vmar_info.base, user_vmar_info.len);
3221        } else {
3222            debug_assert_eq!(RESTRICTED_ASPACE_SIZE, user_vmar_info.len);
3223        }
3224
3225        // The private anonymous backing memory object extend from the user address 0 up to the
3226        // highest mappable address. The pages below `user_vmar_info.base` are never mapped, but
3227        // including them in the memory object makes the math for mapping address to memory object
3228        // offsets simpler.
3229        let backing_size = (user_vmar_info.base + user_vmar_info.len) as u64;
3230
3231        // Place the stack at the end of the address space, subject to ASLR adjustment.
3232        let stack_origin = UserAddress::from_ptr(
3233            user_vmar_info.base + user_vmar_info.len
3234                - MAX_STACK_SIZE
3235                - generate_random_offset_for_aslr(arch_width),
3236        )
3237        .round_up(*PAGE_SIZE)?;
3238
3239        // Set the highest address that `mmap` will assign to the allocations that don't ask for a
3240        // specific address, subject to ASLR adjustment.
3241        let mmap_top = stack_origin
3242            .checked_sub(MAX_STACK_SIZE + generate_random_offset_for_aslr(arch_width))
3243            .ok_or_else(|| errno!(EINVAL))?;
3244
3245        Ok(Arc::new(MemoryManager {
3246            base_addr: UserAddress::from_ptr(user_vmar_info.base),
3247            futex: Arc::<FutexTable<PrivateFutexKey>>::default(),
3248            mapping_context: MappingContext {
3249                user_vmar,
3250                user_vmar_info,
3251                private_anonymous: private_anonymous
3252                    .unwrap_or_else(|| PrivateAnonymousMemoryManager::new(backing_size)),
3253            },
3254            state: RwLock::new(MemoryManagerState {
3255                mappings: Default::default(),
3256                userfaultfds: Default::default(),
3257                shadow_mappings_for_mlock: Default::default(),
3258                forkable_state: MemoryManagerForkableState {
3259                    executable_node,
3260                    stack_origin,
3261                    mmap_top,
3262                    ..Default::default()
3263                },
3264            }),
3265            // TODO(security): Reset to DISABLE, or the value in the fs.suid_dumpable sysctl, under
3266            // certain conditions as specified in the prctl(2) man page.
3267            dumpable: OrderedMutex::new(DumpPolicy::User),
3268            maximum_valid_user_address: UserAddress::from_ptr(
3269                user_vmar_info.base + user_vmar_info.len,
3270            ),
3271            inflight_vmspliced_payloads: Default::default(),
3272            drop_notifier: DropNotifier::default(),
3273        }))
3274    }
3275
3276    pub fn set_brk<L>(
3277        self: &Arc<Self>,
3278        locked: &mut Locked<L>,
3279        current_task: &CurrentTask,
3280        addr: UserAddress,
3281    ) -> Result<UserAddress, Errno>
3282    where
3283        L: LockBefore<ThreadGroupLimits>,
3284    {
3285        let mut state = self.state.write();
3286        let mut released_mappings = ReleasedMappings::default();
3287        let result = state.set_brk(locked, current_task, self, addr, &mut released_mappings);
3288        released_mappings.finalize(state);
3289        result
3290    }
3291
3292    pub fn register_uffd(&self, userfault: &Arc<UserFault>) {
3293        let mut state = self.state.write();
3294        state.userfaultfds.push(Arc::downgrade(userfault));
3295    }
3296
3297    /// Register a given memory range with a userfault object.
3298    pub fn register_with_uffd<L>(
3299        self: &Arc<Self>,
3300        locked: &mut Locked<L>,
3301        addr: UserAddress,
3302        length: usize,
3303        userfault: &Arc<UserFault>,
3304        mode: FaultRegisterMode,
3305    ) -> Result<(), Errno>
3306    where
3307        L: LockBefore<UserFaultInner>,
3308    {
3309        let mut state = self.state.write();
3310        let mut released_mappings = ReleasedMappings::default();
3311        let result = state.register_with_uffd(
3312            self,
3313            locked,
3314            addr,
3315            length,
3316            userfault,
3317            mode,
3318            &mut released_mappings,
3319        );
3320        released_mappings.finalize(state);
3321        result
3322    }
3323
3324    /// Unregister a given range from any userfault objects associated with it.
3325    pub fn unregister_range_from_uffd<L>(
3326        &self,
3327        locked: &mut Locked<L>,
3328        userfault: &Arc<UserFault>,
3329        addr: UserAddress,
3330        length: usize,
3331    ) -> Result<(), Errno>
3332    where
3333        L: LockBefore<UserFaultInner>,
3334    {
3335        let mut state = self.state.write();
3336        let mut released_mappings = ReleasedMappings::default();
3337        let result = state.unregister_range_from_uffd(
3338            self,
3339            locked,
3340            userfault,
3341            addr,
3342            length,
3343            &mut released_mappings,
3344        );
3345        released_mappings.finalize(state);
3346        result
3347    }
3348
3349    /// Unregister any mappings registered with a given userfault object. Used when closing the last
3350    /// file descriptor associated to it.
3351    pub fn unregister_uffd<L>(&self, locked: &mut Locked<L>, userfault: &Arc<UserFault>)
3352    where
3353        L: LockBefore<UserFaultInner>,
3354    {
3355        let mut state = self.state.write();
3356        let mut released_mappings = ReleasedMappings::default();
3357        state.unregister_uffd(self, locked, userfault, &mut released_mappings);
3358        released_mappings.finalize(state);
3359    }
3360
3361    /// Populate a range of pages registered with an userfaulfd according to a `populate` function.
3362    /// This will fail if the pages were not registered with userfaultfd, or if the page at `addr`
3363    /// was already populated. If any page other than the first one was populated, the `length`
3364    /// is adjusted to only include the first N unpopulated pages, and this adjusted length
3365    /// is then passed to `populate`. On success, returns the number of populated bytes.
3366    pub fn populate_from_uffd<F, L>(
3367        &self,
3368        locked: &mut Locked<L>,
3369        addr: UserAddress,
3370        length: usize,
3371        userfault: &Arc<UserFault>,
3372        populate: F,
3373    ) -> Result<usize, Errno>
3374    where
3375        F: FnOnce(&MemoryManagerState, usize) -> Result<usize, Errno>,
3376        L: LockBefore<UserFaultInner>,
3377    {
3378        let state = self.state.read();
3379        // Check that the addr..length range is a contiguous range of mappings which are all
3380        // registered with an userfault object.
3381        let mut bytes_registered_with_uffd = 0;
3382        for (mapping, len) in
3383            state.get_contiguous_mappings_at(addr, length, &self.mapping_context)?
3384        {
3385            if mapping.flags().contains(MappingFlags::UFFD) {
3386                // Check that the mapping is registered with the same uffd. This is not required,
3387                // but we don't support cross-uffd operations yet.
3388                if !userfault.contains_addr(locked, addr) {
3389                    track_stub!(
3390                        TODO("https://fxbug.dev/391599171"),
3391                        "operations across different uffds"
3392                    );
3393                    return error!(ENOTSUP);
3394                };
3395            } else {
3396                return error!(ENOENT);
3397            }
3398            bytes_registered_with_uffd += len;
3399        }
3400        if bytes_registered_with_uffd != length {
3401            return error!(ENOENT);
3402        }
3403
3404        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
3405
3406        // Determine how many pages in the requested range are already populated
3407        let first_populated =
3408            userfault.get_first_populated_page_after(locked, addr).ok_or_else(|| errno!(ENOENT))?;
3409        // If the very first page is already populated, uffd operations should just return EEXIST
3410        if first_populated == addr {
3411            return error!(EEXIST);
3412        }
3413        // Otherwise it is possible to do an incomplete operation by only populating pages until
3414        // the first populated one.
3415        let trimmed_end = std::cmp::min(first_populated, end_addr);
3416        let effective_length = trimmed_end - addr;
3417
3418        populate(&state, effective_length)?;
3419        userfault.insert_pages(locked, addr..trimmed_end, true);
3420
3421        // Since we used protection bits to force pagefaults, we now need to reverse this change by
3422        // restoring the protections on the underlying Zircon mappings to the "real" protection bits
3423        // that were kept in the Starnix mappings. This will prevent new pagefaults from being
3424        // generated. Only do this on the pages that were populated by this operation.
3425        for (range, mapping) in state.mappings.range(addr..trimmed_end) {
3426            let range_to_protect = range.intersect(&(addr..trimmed_end));
3427            let restored_flags = mapping.flags().access_flags();
3428            let length = range_to_protect.end - range_to_protect.start;
3429            self.protect_vmar_range(range_to_protect.start, length, restored_flags)
3430                .expect("Failed to restore original protection bits on uffd-registered range");
3431        }
3432        // Return the number of effectively populated bytes, which might be smaller than the
3433        // requested number.
3434        Ok(effective_length)
3435    }
3436
3437    pub fn zero_from_uffd<L>(
3438        &self,
3439        locked: &mut Locked<L>,
3440        addr: UserAddress,
3441        length: usize,
3442        userfault: &Arc<UserFault>,
3443    ) -> Result<usize, Errno>
3444    where
3445        L: LockBefore<UserFaultInner>,
3446    {
3447        self.populate_from_uffd(locked, addr, length, userfault, |state, effective_length| {
3448            state.zero(addr, effective_length, &self.mapping_context)
3449        })
3450    }
3451
3452    pub fn fill_from_uffd<L>(
3453        &self,
3454        locked: &mut Locked<L>,
3455        addr: UserAddress,
3456        buf: &[u8],
3457        length: usize,
3458        userfault: &Arc<UserFault>,
3459    ) -> Result<usize, Errno>
3460    where
3461        L: LockBefore<UserFaultInner>,
3462    {
3463        self.populate_from_uffd(locked, addr, length, userfault, |state, effective_length| {
3464            state.write_memory(addr, &buf[..effective_length], &self.mapping_context)
3465        })
3466    }
3467
3468    pub fn copy_from_uffd<L>(
3469        &self,
3470        locked: &mut Locked<L>,
3471        source_addr: UserAddress,
3472        dst_addr: UserAddress,
3473        length: usize,
3474        userfault: &Arc<UserFault>,
3475    ) -> Result<usize, Errno>
3476    where
3477        L: LockBefore<UserFaultInner>,
3478    {
3479        self.populate_from_uffd(locked, dst_addr, length, userfault, |state, effective_length| {
3480            let mut buf = vec![std::mem::MaybeUninit::uninit(); effective_length];
3481            let buf = state.read_memory(source_addr, &mut buf, &self.mapping_context)?;
3482            state.write_memory(dst_addr, &buf[..effective_length], &self.mapping_context)
3483        })
3484    }
3485
3486    /// Returns the new `MemoryManager` for a process, pre-populated with a snapshot of the layout
3487    /// and mappings of `source_mm`.  This is used during `CurrentTask::clone()` operations to
3488    /// create the initial address-space for the cloned child process.
3489    pub fn snapshot_of<L>(
3490        locked: &mut Locked<L>,
3491        source_mm: &Arc<MemoryManager>,
3492        root_vmar: zx::Unowned<'_, zx::Vmar>,
3493        arch_width: ArchWidth,
3494    ) -> Result<Arc<Self>, Errno>
3495    where
3496        L: LockBefore<MmDumpable>,
3497    {
3498        trace_duration!(CATEGORY_STARNIX_MM, "snapshot_of");
3499        let backing_size = (source_mm.mapping_context.user_vmar_info.base
3500            + source_mm.mapping_context.user_vmar_info.len) as u64;
3501        let private_anonymous =
3502            source_mm.mapping_context.private_anonymous.snapshot(backing_size)?;
3503        let target = MemoryManager::new(
3504            root_vmar,
3505            arch_width,
3506            source_mm.executable_node(),
3507            Some(private_anonymous),
3508        )?;
3509
3510        // Hold the lock throughout the operation to uphold memory manager's invariants.
3511        // See mm/README.md.
3512        {
3513            let state: &mut MemoryManagerState = &mut source_mm.state.write();
3514            let mut target_state = target.state.write();
3515            debug_assert_eq!(
3516                source_mm.mapping_context.user_vmar_info,
3517                target.mapping_context.user_vmar_info
3518            );
3519
3520            let mut clone_cache = HashMap::<zx::Koid, Arc<MemoryObject>>::new();
3521
3522            for (range, mapping) in state.mappings.iter() {
3523                if mapping.flags().contains(MappingFlags::DONTFORK) {
3524                    continue;
3525                }
3526                // Locking is not inherited when forking.
3527                let target_mapping_flags = mapping.flags().difference(MappingFlags::LOCKED);
3528                match state.get_mapping_backing(mapping) {
3529                    MappingBacking::Memory(backing) => {
3530                        trace_duration!(CATEGORY_STARNIX_MM, "memory_backing_clone");
3531                        let memory_offset = backing.address_to_offset(range.start);
3532
3533                        let target_memory = if mapping.flags().contains(MappingFlags::SHARED)
3534                            || mapping.name().is_vvar()
3535                        {
3536                            // Note that the Vvar is a special mapping that behaves like a shared mapping but
3537                            // is private to each process.
3538                            backing.memory().clone()
3539                        } else {
3540                            let memory_obj = backing.memory();
3541                            let options = mapping.flags().options();
3542                            let memory =
3543                                clone_cache.entry(memory_obj.get_koid()).or_insert_with_fallible(
3544                                    || memory_obj.clone_memory(memory_obj.get_rights(), options),
3545                                )?;
3546                            memory.clone()
3547                        };
3548
3549                        let mapping = Mapping::with_name(
3550                            MappingBacking::Memory(Box::new(MappingBackingMemory::new(
3551                                range.start,
3552                                target_memory,
3553                                memory_offset,
3554                            ))),
3555                            target_mapping_flags,
3556                            mapping.max_access(),
3557                            mapping.name().to_owned(),
3558                            MappingMode::Lazy,
3559                        );
3560                        assert!(
3561                            target_state.mappings.append_non_overlapping(range.clone(), mapping)
3562                        );
3563                    }
3564                    MappingBacking::PrivateAnonymous => {
3565                        trace_duration!(CATEGORY_STARNIX_MM, "private_anonymous_backing_clone");
3566                        let length = range.end - range.start;
3567                        if mapping.flags().contains(MappingFlags::WIPEONFORK) {
3568                            target
3569                                .mapping_context
3570                                .private_anonymous
3571                                .zero(range.start, length)
3572                                .map_err(|_| errno!(ENOMEM))?;
3573                        }
3574
3575                        let mapping = Mapping::new_private_anonymous(
3576                            target_mapping_flags,
3577                            mapping.name().to_owned(),
3578                            MappingMode::Lazy,
3579                        );
3580                        assert!(
3581                            target_state.mappings.append_non_overlapping(range.clone(), mapping)
3582                        );
3583                    }
3584                };
3585            }
3586
3587            target_state.forkable_state = state.forkable_state.clone();
3588        }
3589
3590        let self_dumpable = *source_mm.dumpable.lock(locked);
3591        *target.dumpable.lock(locked) = self_dumpable;
3592
3593        Ok(target)
3594    }
3595
3596    /// Returns the replacement `MemoryManager` to be used by the `exec()`ing task.
3597    ///
3598    /// POSIX requires that "a call to any exec function from a process with more than one thread
3599    /// shall result in all threads being terminated and the new executable being loaded and
3600    /// executed. No destructor functions or cleanup handlers shall be called".
3601    /// The caller is responsible for having ensured that this is the only `Task` in the
3602    /// `ThreadGroup`, and thereby the `zx::process`, such that it is safe to tear-down the Zircon
3603    /// userspace VMAR for the current address-space.
3604    pub fn exec(
3605        root_vmar: zx::Unowned<'_, zx::Vmar>,
3606        old_mm: Option<Arc<Self>>,
3607        exe_node: NamespaceNode,
3608        arch_width: ArchWidth,
3609    ) -> Result<Arc<Self>, Errno> {
3610        // To safeguard against concurrent accesses by other tasks through this `MemoryManager`, the
3611        // following steps are performed while holding the write lock on the old MM, if any:
3612        //
3613        // 1. All `mappings` are removed, so that remote `MemoryAccessor` calls will fail.
3614        // 2. The `user_vmar` is `destroy()`ed to free-up the user address-space.
3615        //
3616        // Once these steps are complete it is safe for the old mappings to be dropped.
3617        if let Some(old_mm) = old_mm {
3618            let _old_mappings = {
3619                let mut state = old_mm.state.write();
3620
3621                // SAFETY: This operation is safe because this is the only `Task` active in the address-
3622                // space, and accesses by remote tasks will use syscalls on the `root_vmar`.
3623                unsafe {
3624                    old_mm
3625                        .mapping_context
3626                        .user_vmar
3627                        .destroy()
3628                        .map_err(|status| from_status_like_fdio!(status))?
3629                }
3630
3631                std::mem::replace(&mut state.mappings, Default::default())
3632            };
3633        }
3634
3635        Self::new(root_vmar, arch_width, Some(exe_node), None)
3636    }
3637
3638    pub fn initialize_brk_origin(
3639        &self,
3640        arch_width: ArchWidth,
3641        executable_end: UserAddress,
3642    ) -> Result<(), Errno> {
3643        self.state.write().brk_origin = executable_end
3644            .checked_add(generate_random_offset_for_aslr(arch_width))
3645            .ok_or_else(|| errno!(EINVAL))?;
3646        Ok(())
3647    }
3648
3649    // Get a randomised address for loading a position-independent executable.
3650    pub fn get_random_base_for_executable(
3651        &self,
3652        arch_width: ArchWidth,
3653        length: usize,
3654    ) -> Result<UserAddress, Errno> {
3655        let state = self.state.read();
3656
3657        // Place it at approx. 2/3 of the available mmap space, subject to ASLR adjustment.
3658        let base = round_up_to_system_page_size(2 * state.mmap_top.ptr() / 3).unwrap()
3659            + generate_random_offset_for_aslr(arch_width);
3660        if base.checked_add(length).ok_or_else(|| errno!(EINVAL))? <= state.mmap_top.ptr() {
3661            Ok(UserAddress::from_ptr(base))
3662        } else {
3663            error!(EINVAL)
3664        }
3665    }
3666    pub fn executable_node(&self) -> Option<NamespaceNode> {
3667        self.state.read().executable_node.clone()
3668    }
3669
3670    #[track_caller]
3671    pub fn get_errno_for_map_err(status: zx::Status) -> Errno {
3672        match status {
3673            zx::Status::INVALID_ARGS => errno!(EINVAL),
3674            zx::Status::ACCESS_DENIED => errno!(EPERM),
3675            zx::Status::NOT_SUPPORTED => errno!(ENODEV),
3676            zx::Status::NO_MEMORY => errno!(ENOMEM),
3677            zx::Status::NO_RESOURCES => errno!(ENOMEM),
3678            zx::Status::OUT_OF_RANGE => errno!(ENOMEM),
3679            zx::Status::ALREADY_EXISTS => errno!(EEXIST),
3680            zx::Status::BAD_STATE => errno!(EINVAL),
3681            _ => impossible_error(status),
3682        }
3683    }
3684
3685    #[track_caller]
3686    pub fn get_errno_for_vmo_err(status: zx::Status) -> Errno {
3687        match status {
3688            zx::Status::NO_MEMORY => errno!(ENOMEM),
3689            zx::Status::ACCESS_DENIED => errno!(EPERM),
3690            zx::Status::NOT_SUPPORTED => errno!(EIO),
3691            zx::Status::BAD_STATE => errno!(EIO),
3692            _ => return impossible_error(status),
3693        }
3694    }
3695
3696    pub fn map_memory(
3697        self: &Arc<Self>,
3698        addr: DesiredAddress,
3699        memory: Arc<MemoryObject>,
3700        memory_offset: u64,
3701        length: usize,
3702        prot_flags: ProtectionFlags,
3703        max_access: Access,
3704        options: MappingOptions,
3705        name: MappingName,
3706    ) -> Result<UserAddress, Errno> {
3707        let flags = MappingFlags::from_access_flags_and_options(prot_flags, options);
3708
3709        // Unmapped mappings must be released after the state is unlocked.
3710        let mut released_mappings = ReleasedMappings::default();
3711        // Hold the lock throughout the operation to uphold memory manager's invariants.
3712        // See mm/README.md.
3713        let mut state = self.state.write();
3714        let result = state.add_memory_mapping(
3715            self,
3716            addr,
3717            memory,
3718            memory_offset,
3719            length,
3720            flags,
3721            max_access,
3722            options.contains(MappingOptions::POPULATE),
3723            name,
3724            MappingMode::Eager,
3725            &mut released_mappings,
3726        );
3727
3728        // Drop the state before the unmapped mappings, since dropping a mapping may acquire a lock
3729        // in `DirEntry`'s `drop`.
3730        released_mappings.finalize(state);
3731
3732        result
3733    }
3734
3735    pub fn map_anonymous(
3736        self: &Arc<Self>,
3737        addr: DesiredAddress,
3738        length: usize,
3739        prot_flags: ProtectionFlags,
3740        options: MappingOptions,
3741        name: MappingName,
3742    ) -> Result<UserAddress, Errno> {
3743        let mut released_mappings = ReleasedMappings::default();
3744        // Hold the lock throughout the operation to uphold memory manager's invariants.
3745        // See mm/README.md.
3746        let mut state = self.state.write();
3747        let result = state.map_anonymous(
3748            self,
3749            addr,
3750            length,
3751            prot_flags,
3752            options,
3753            name,
3754            &mut released_mappings,
3755        );
3756
3757        released_mappings.finalize(state);
3758
3759        result
3760    }
3761
3762    /// Map the stack into a pre-selected address region
3763    pub fn map_stack(
3764        self: &Arc<Self>,
3765        length: usize,
3766        prot_flags: ProtectionFlags,
3767    ) -> Result<UserAddress, Errno> {
3768        assert!(length <= MAX_STACK_SIZE);
3769        let addr = self.state.read().stack_origin;
3770        // The address range containing stack_origin should normally be available: it's above the
3771        // mmap_top, and this method is called early enough in the process lifetime that only the
3772        // main ELF and the interpreter are already loaded. However, in the rare case that the
3773        // static position-independent executable is overlapping the chosen address, mapping as Hint
3774        // will make mmap choose a new place for it.
3775        // TODO(https://fxbug.dev/370027241): Consider a more robust approach
3776        let stack_addr = self.map_anonymous(
3777            DesiredAddress::Hint(addr),
3778            length,
3779            prot_flags,
3780            MappingOptions::ANONYMOUS | MappingOptions::GROWSDOWN,
3781            MappingName::Stack,
3782        )?;
3783        if stack_addr != addr {
3784            log_warn!(
3785                "An address designated for stack ({}) was unavailable, mapping at {} instead.",
3786                addr,
3787                stack_addr
3788            );
3789        }
3790        Ok(stack_addr)
3791    }
3792
3793    pub fn remap(
3794        self: &Arc<Self>,
3795        current_task: &CurrentTask,
3796        addr: UserAddress,
3797        old_length: usize,
3798        new_length: usize,
3799        flags: MremapFlags,
3800        new_addr: UserAddress,
3801    ) -> Result<UserAddress, Errno> {
3802        let mut released_mappings = ReleasedMappings::default();
3803        // Hold the lock throughout the operation to uphold memory manager's invariants.
3804        // See mm/README.md.
3805        let mut state = self.state.write();
3806        let result = state.remap(
3807            current_task,
3808            self,
3809            addr,
3810            old_length,
3811            new_length,
3812            flags,
3813            new_addr,
3814            &mut released_mappings,
3815        );
3816
3817        released_mappings.finalize(state);
3818
3819        result
3820    }
3821
3822    pub fn unmap(self: &Arc<Self>, addr: UserAddress, length: usize) -> Result<(), Errno> {
3823        let mut released_mappings = ReleasedMappings::default();
3824        // Hold the lock throughout the operation to uphold memory manager's invariants.
3825        // See mm/README.md.
3826        let mut state = self.state.write();
3827        let result = state.unmap(self, addr, length, &mut released_mappings);
3828
3829        released_mappings.finalize(state);
3830
3831        result
3832    }
3833
3834    pub fn protect(
3835        &self,
3836        current_task: &CurrentTask,
3837        addr: UserAddress,
3838        length: usize,
3839        prot_flags: ProtectionFlags,
3840    ) -> Result<(), Errno> {
3841        // Hold the lock throughout the operation to uphold memory manager's invariants.
3842        // See mm/README.md.
3843        let mut state = self.state.write();
3844        let mut released_mappings = ReleasedMappings::default();
3845        let result = state.protect(current_task, addr, length, prot_flags, &mut released_mappings);
3846        released_mappings.finalize(state);
3847        result
3848    }
3849
3850    pub fn msync(
3851        &self,
3852        _locked: &mut Locked<Unlocked>,
3853        current_task: &CurrentTask,
3854        addr: UserAddress,
3855        length: usize,
3856        flags: MsyncFlags,
3857    ) -> Result<(), Errno> {
3858        // According to POSIX, either MS_SYNC or MS_ASYNC must be specified in flags,
3859        // and indeed failure to include one of these flags will cause msync() to fail
3860        // on some systems.  However, Linux permits a call to msync() that specifies
3861        // neither of these flags, with semantics that are (currently) equivalent to
3862        // specifying MS_ASYNC.
3863
3864        // Both MS_SYNC and MS_ASYNC are set in flags
3865        if flags.contains(MsyncFlags::ASYNC) && flags.contains(MsyncFlags::SYNC) {
3866            return error!(EINVAL);
3867        }
3868
3869        if !addr.is_aligned(*PAGE_SIZE) {
3870            return error!(EINVAL);
3871        }
3872
3873        // We collect the nodes to sync first, release the memory manager lock, and then sync them.
3874        // This avoids holding the lock during blocking I/O operations (sync), which prevents
3875        // stalling other memory operations and avoids potential deadlocks.
3876        // It also allows us to deduplicate nodes, avoiding redundant sync calls for the same file.
3877        let mut nodes_to_sync = {
3878            let mm_state = self.state.read();
3879
3880            let length_rounded = round_up_to_system_page_size(length)?;
3881            let end_addr = addr.checked_add(length_rounded).ok_or_else(|| errno!(EINVAL))?;
3882
3883            let mut last_end = addr;
3884            let mut nodes = vec![];
3885            for (range, mapping) in mm_state.mappings.range(addr..end_addr) {
3886                // Check if there is a gap between the last mapped address and the current mapping.
3887                // msync requires the entire range to be mapped, so any gap results in ENOMEM.
3888                if range.start > last_end {
3889                    return error!(ENOMEM);
3890                }
3891                last_end = range.end;
3892
3893                if flags.contains(MsyncFlags::INVALIDATE)
3894                    && mapping.flags().contains(MappingFlags::LOCKED)
3895                {
3896                    return error!(EBUSY);
3897                }
3898
3899                if flags.contains(MsyncFlags::SYNC) {
3900                    if let MappingNameRef::File(file_mapping) = mapping.name() {
3901                        nodes.push(file_mapping.name.entry.node.clone());
3902                    }
3903                }
3904            }
3905            if last_end < end_addr {
3906                return error!(ENOMEM);
3907            }
3908            nodes
3909        };
3910
3911        // Deduplicate nodes to avoid redundant sync calls.
3912        nodes_to_sync.sort_by_key(|n| Arc::as_ptr(n) as usize);
3913        nodes_to_sync.dedup_by(|a, b| Arc::ptr_eq(a, b));
3914
3915        for node in nodes_to_sync {
3916            // Range-based sync is non-trivial for Fxfs to support due to its complicated
3917            // reservation system (b/322874588#comment5). Naive range-based sync could exhaust
3918            // space reservations if called page-by-page, as transaction costs are based on the
3919            // number of dirty pages rather than file ranges. We use whole-file sync for now
3920            // to ensure data durability without adding excessive complexity.
3921            node.ops().sync(&node, current_task)?;
3922        }
3923        Ok(())
3924    }
3925
3926    pub fn madvise(&self, addr: UserAddress, length: usize, advice: u32) -> Result<(), Errno> {
3927        let mut state = self.state.write();
3928        let mut released_mappings = ReleasedMappings::default();
3929        let result =
3930            state.madvise(&self.mapping_context, addr, length, advice, &mut released_mappings);
3931        released_mappings.finalize(state);
3932        result
3933    }
3934
3935    pub fn mlock<L>(
3936        &self,
3937        current_task: &CurrentTask,
3938        locked: &mut Locked<L>,
3939        desired_addr: UserAddress,
3940        desired_length: usize,
3941        on_fault: bool,
3942    ) -> Result<(), Errno>
3943    where
3944        L: LockBefore<ThreadGroupLimits>,
3945    {
3946        let mut state = self.state.write();
3947        let mut released_mappings = ReleasedMappings::default();
3948        let result = state.mlock(
3949            &self.mapping_context,
3950            current_task,
3951            locked,
3952            desired_addr,
3953            desired_length,
3954            on_fault,
3955            &mut released_mappings,
3956        );
3957        released_mappings.finalize(state);
3958        result
3959    }
3960
3961    pub fn munlock(
3962        &self,
3963        current_task: &CurrentTask,
3964        desired_addr: UserAddress,
3965        desired_length: usize,
3966    ) -> Result<(), Errno> {
3967        let mut state = self.state.write();
3968        let mut released_mappings = ReleasedMappings::default();
3969        let result =
3970            state.munlock(current_task, desired_addr, desired_length, &mut released_mappings);
3971        released_mappings.finalize(state);
3972        result
3973    }
3974
3975    pub fn log_memory_map(&self, task: &Task, fault_address: UserAddress) {
3976        let state = self.state.read();
3977        log_warn!("Memory map for pid={}:", task.thread_group.leader);
3978        let mut last_end = UserAddress::from_ptr(0);
3979        for (range, map) in state.mappings.iter() {
3980            if fault_address >= last_end && fault_address < range.start {
3981                log_warn!("{:08x} <= FAULT", fault_address.ptr());
3982            }
3983
3984            let perms = format!(
3985                "{}{}{}{}",
3986                if map.can_read() { 'r' } else { '-' },
3987                if map.can_write() { 'w' } else { '-' },
3988                if map.can_exec() { 'x' } else { '-' },
3989                if map.flags().contains(MappingFlags::SHARED) { 's' } else { 'p' }
3990            );
3991
3992            let backing = match state.get_mapping_backing(map) {
3993                MappingBacking::Memory(backing) => backing.address_to_offset(range.start),
3994                MappingBacking::PrivateAnonymous => 0,
3995            };
3996
3997            let name_str = match &map.name() {
3998                MappingNameRef::File(file) => {
3999                    let Ok(live) = task.live() else {
4000                        log_warn!("Task {} is not live", task.get_tid());
4001                        continue;
4002                    };
4003                    String::from_utf8_lossy(&file.name.path(&live.fs())).into_owned()
4004                }
4005                MappingNameRef::None | MappingNameRef::AioContext(_) => {
4006                    if map.flags().contains(MappingFlags::SHARED)
4007                        && map.flags().contains(MappingFlags::ANONYMOUS)
4008                    {
4009                        "/dev/zero (deleted)".to_string()
4010                    } else {
4011                        "".to_string()
4012                    }
4013                }
4014                MappingNameRef::Stack => "[stack]".to_string(),
4015                MappingNameRef::Heap => "[heap]".to_string(),
4016                MappingNameRef::Vdso => "[vdso]".to_string(),
4017                MappingNameRef::Vvar => "[vvar]".to_string(),
4018                _ => format!("{:?}", map.name()),
4019            };
4020
4021            let fault_marker = if range.contains(&fault_address) { " <= FAULT" } else { "" };
4022
4023            log_warn!(
4024                "{:08x}-{:08x} {} {:08x} {}{}",
4025                range.start.ptr(),
4026                range.end.ptr(),
4027                perms,
4028                backing,
4029                name_str,
4030                fault_marker
4031            );
4032            last_end = range.end;
4033        }
4034
4035        if fault_address >= last_end {
4036            log_warn!("{:08x} <= FAULT", fault_address.ptr());
4037        }
4038    }
4039
4040    pub fn handle_page_fault(
4041        self: &Arc<Self>,
4042        locked: &mut Locked<Unlocked>,
4043        decoded: PageFaultExceptionReport,
4044        error_code: zx::Status,
4045    ) -> ExceptionResult {
4046        let addr = UserAddress::from(decoded.faulting_address);
4047
4048        // On uffd-registered range, handle according to the uffd rules
4049        if error_code == zx::Status::ACCESS_DENIED {
4050            let state = self.state.write();
4051            if let Some((_, mapping)) = state.mappings.get(addr) {
4052                if mapping.flags().contains(MappingFlags::UFFD) {
4053                    // TODO(https://fxbug.dev/391599171): Support other modes
4054                    assert!(mapping.flags().contains(MappingFlags::UFFD_MISSING));
4055
4056                    if let Some(_uffd) = state.find_uffd(locked, addr) {
4057                        // If the SIGBUS feature was set, no event will be sent to the file.
4058                        // Instead, SIGBUS is delivered to the process that triggered the fault.
4059                        // TODO(https://fxbug.dev/391599171): For now we only support this feature,
4060                        // so we assume it is set.
4061                        // Check for the SIGBUS feature when we start supporting running without it.
4062                        return ExceptionResult::Signal(SignalInfo::with_detail(
4063                            SIGBUS,
4064                            BUS_ADRERR as i32,
4065                            SignalDetail::SigFault { addr: decoded.faulting_address },
4066                        ));
4067                    };
4068                }
4069                // There is a data race resulting from uffd unregistration and page fault happening
4070                // at the same time. To detect it, we check if the access was meant to be rejected
4071                // according to Starnix own information about the mapping.
4072                if check_access_permissions_in_page_fault(&decoded, mapping) {
4073                    track_stub!(
4074                        TODO("https://fxbug.dev/435171399"),
4075                        "Inconsistent permission fault"
4076                    );
4077                    return ExceptionResult::Handled;
4078                }
4079            }
4080            std::mem::drop(state);
4081        }
4082
4083        if decoded.not_present {
4084            {
4085                let mut state = self.state.write();
4086                match state.ensure_range_mapped_in_user_vmar(addr, None, &self.mapping_context) {
4087                    Ok(true) => return ExceptionResult::Handled,
4088                    Ok(false) => {
4089                        // If the mapping generation has changed since the last time this thread
4090                        // saw it, we return `Handled` to retry the faulting instruction.
4091                        // This handles cases where the fault was spurious due to a concurrent
4092                        // mapping operation. We update the counter here to ensure we converge and
4093                        // don't loop infinitely.
4094                        let current_gen = state.mappings.generation;
4095                        let old_gen = LAST_SEEN_MAPPING_GENERATION.with(|c| c.replace(current_gen));
4096                        if current_gen != old_gen {
4097                            return ExceptionResult::Handled;
4098                        }
4099                    }
4100                    Err(e) => {
4101                        log_error!("Failed to map lazy memory: {e}")
4102                    }
4103                }
4104            }
4105
4106            // A page fault may be resolved by extending a growsdown mapping to cover the faulting
4107            // address. Mark the exception handled if so. Otherwise let the regular handling proceed.
4108
4109            // We should only attempt growth on a not-present fault and we should only extend if the
4110            // access type matches the protection on the GROWSDOWN mapping.
4111            match self.extend_growsdown_mapping_to_address(
4112                UserAddress::from(decoded.faulting_address),
4113                decoded.is_write,
4114            ) {
4115                Ok(true) => {
4116                    return ExceptionResult::Handled;
4117                }
4118                Err(e) => {
4119                    log_warn!("Error handling page fault: {e}")
4120                }
4121                _ => {}
4122            }
4123        }
4124
4125        // For this exception type, the synth_code field in the exception report's context is the
4126        // error generated by the page fault handler. For us this is used to distinguish between a
4127        // segmentation violation and a bus error. Unfortunately this detail is not documented in
4128        // Zircon's public documentation and is only described in the architecture-specific
4129        // exception definitions such as:
4130        // zircon/kernel/arch/x86/include/arch/x86.h
4131        // zircon/kernel/arch/arm64/include/arch/arm64.h
4132        let signo = match error_code {
4133            zx::Status::OUT_OF_RANGE => SIGBUS,
4134            _ => SIGSEGV,
4135        };
4136        ExceptionResult::Signal(SignalInfo::with_detail(
4137            signo,
4138            SI_KERNEL as i32,
4139            SignalDetail::SigFault { addr: decoded.faulting_address },
4140        ))
4141    }
4142
4143    pub fn set_mapping_name(
4144        &self,
4145        addr: UserAddress,
4146        length: usize,
4147        name: Option<FsString>,
4148    ) -> Result<(), Errno> {
4149        let mut state = self.state.write();
4150        let mut released_mappings = ReleasedMappings::default();
4151        let result = state.set_mapping_name(addr, length, name, &mut released_mappings);
4152        released_mappings.finalize(state);
4153        result
4154    }
4155
4156    /// Returns [`Ok`] if the entire range specified by `addr..(addr+length)` contains valid
4157    /// mappings.
4158    ///
4159    /// # Errors
4160    ///
4161    /// Returns [`Err(errno)`] where `errno` is:
4162    ///
4163    ///   - `EINVAL`: `addr` is not page-aligned, or the range is too large,
4164    ///   - `ENOMEM`: one or more pages in the range are not mapped.
4165    pub fn ensure_mapped(&self, addr: UserAddress, length: usize) -> Result<(), Errno> {
4166        if !addr.is_aligned(*PAGE_SIZE) {
4167            return error!(EINVAL);
4168        }
4169
4170        let length = round_up_to_system_page_size(length)?;
4171        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
4172        let state = self.state.read();
4173        let mut last_end = addr;
4174        for (range, _) in state.mappings.range(addr..end_addr) {
4175            if range.start > last_end {
4176                // This mapping does not start immediately after the last.
4177                return error!(ENOMEM);
4178            }
4179            last_end = range.end;
4180        }
4181        if last_end < end_addr {
4182            // There is a gap of no mappings at the end of the range.
4183            error!(ENOMEM)
4184        } else {
4185            Ok(())
4186        }
4187    }
4188
4189    /// Returns the memory object mapped at the address and the offset into the memory object of
4190    /// the address. Intended for implementing futexes.
4191    pub fn get_mapping_memory(
4192        &self,
4193        addr: UserAddress,
4194        perms: ProtectionFlags,
4195    ) -> Result<(Arc<MemoryObject>, u64), Errno> {
4196        let state = self.state.read();
4197        let (_, mapping) = state.mappings.get(addr).ok_or_else(|| errno!(EFAULT))?;
4198        if !mapping.flags().access_flags().contains(perms) {
4199            return error!(EACCES);
4200        }
4201        match state.get_mapping_backing(mapping) {
4202            MappingBacking::Memory(backing) => {
4203                Ok((Arc::clone(backing.memory()), mapping.address_to_offset(addr)))
4204            }
4205            MappingBacking::PrivateAnonymous => {
4206                Ok((Arc::clone(&self.mapping_context.private_anonymous.backing), addr.ptr() as u64))
4207            }
4208        }
4209    }
4210
4211    /// Does a rough check that the given address is plausibly in the address space of the
4212    /// application. This does not mean the pointer is valid for any particular purpose or that
4213    /// it will remain so!
4214    ///
4215    /// In some syscalls, Linux seems to do some initial validation of the pointer up front to
4216    /// tell the caller early if it's invalid. For example, in epoll_wait() it's returning a vector
4217    /// of events. If the caller passes an invalid pointer, it wants to fail without dropping any
4218    /// events. Failing later when actually copying the required events to userspace would mean
4219    /// those events will be lost. But holding a lock on the memory manager for an asynchronous
4220    /// wait is not desirable.
4221    ///
4222    /// Testing shows that Linux seems to do some initial plausibility checking of the pointer to
4223    /// be able to report common usage errors before doing any (possibly unreversable) work. This
4224    /// checking is easy to get around if you try, so this function is also not required to
4225    /// be particularly robust. Certainly the more advanced cases of races (the memory could be
4226    /// unmapped after this call but before it's used) are not handled.
4227    ///
4228    /// The buffer_size variable is the size of the data structure that needs to fit
4229    /// in the given memory.
4230    ///
4231    /// Returns the error EFAULT if invalid.
4232    pub fn check_plausible(&self, addr: UserAddress, buffer_size: usize) -> Result<(), Errno> {
4233        let state = self.state.read();
4234
4235        if let Some(range) = state.mappings.last_range() {
4236            if (range.end - buffer_size)? >= addr {
4237                return Ok(());
4238            }
4239        }
4240        error!(EFAULT)
4241    }
4242
4243    pub fn get_aio_context(&self, addr: UserAddress) -> Option<Arc<AioContext>> {
4244        let state = self.state.read();
4245        state.get_aio_context(addr).map(|(_, aio_context)| aio_context)
4246    }
4247
4248    pub fn destroy_aio_context(
4249        self: &Arc<Self>,
4250        addr: UserAddress,
4251    ) -> Result<Arc<AioContext>, Errno> {
4252        let mut released_mappings = ReleasedMappings::default();
4253
4254        // Hold the lock throughout the operation to uphold memory manager's invariants.
4255        // See mm/README.md.
4256        let mut state = self.state.write();
4257
4258        // Validate that this address actually has an AioContext. We need to hold the state lock
4259        // until we actually remove the mappings to ensure that another thread does not manipulate
4260        // the mappings after we've validated that they contain an AioContext.
4261        let Some((range, aio_context)) = state.get_aio_context(addr) else {
4262            return error!(EINVAL);
4263        };
4264
4265        let length = range.end - range.start;
4266        let result = state.unmap(self, range.start, length, &mut released_mappings);
4267
4268        released_mappings.finalize(state);
4269
4270        result.map(|_| aio_context)
4271    }
4272
4273    #[cfg(test)]
4274    pub fn get_mapping_name(
4275        &self,
4276        addr: UserAddress,
4277    ) -> Result<Option<flyweights::FlyByteStr>, Errno> {
4278        let state = self.state.read();
4279        let (_, mapping) = state.mappings.get(addr).ok_or_else(|| errno!(EFAULT))?;
4280        if let MappingNameRef::Vma(name) = mapping.name() {
4281            Ok(Some(name.clone()))
4282        } else {
4283            Ok(None)
4284        }
4285    }
4286
4287    #[cfg(test)]
4288    pub fn get_mapping_count(&self) -> usize {
4289        let state = self.state.read();
4290        state.mappings.iter().count()
4291    }
4292
4293    pub fn extend_growsdown_mapping_to_address(
4294        self: &Arc<Self>,
4295        addr: UserAddress,
4296        is_write: bool,
4297    ) -> Result<bool, Error> {
4298        self.state.write().extend_growsdown_mapping_to_address(self, addr, is_write)
4299    }
4300
4301    pub fn get_stats(&self, current_task: &CurrentTask) -> MemoryStats {
4302        // Grab our state lock before reading zircon mappings so that the two are consistent.
4303        // Other Starnix threads should not make any changes to the Zircon mappings while we hold
4304        // a read lock to the memory manager state.
4305        let state = self.state.read();
4306
4307        let mut stats = MemoryStats::default();
4308        stats.vm_stack = state.stack_size;
4309
4310        self.with_zx_mappings(current_task, |zx_mappings| {
4311            for zx_mapping in zx_mappings {
4312                // We only care about map info for actual mappings.
4313                let zx_details = zx_mapping.details();
4314                let Some(zx_details) = zx_details.as_mapping() else { continue };
4315                let user_address = UserAddress::from(zx_mapping.base as u64);
4316                let (_, mm_mapping) = state
4317                    .mappings
4318                    .get(user_address)
4319                    .unwrap_or_else(|| panic!("mapping bookkeeping must be consistent with zircon's: not found: {user_address:?}"));
4320                debug_assert_eq!(
4321                    match state.get_mapping_backing(mm_mapping) {
4322                        MappingBacking::Memory(m)=>m.memory().get_koid(),
4323                        MappingBacking::PrivateAnonymous=>self.mapping_context.private_anonymous.backing.get_koid(),
4324                    },
4325                    zx_details.vmo_koid,
4326                    "MemoryManager and Zircon must agree on which VMO is mapped in this range",
4327                );
4328
4329                stats.vm_size += zx_mapping.size;
4330
4331                stats.vm_rss += zx_details.committed_bytes;
4332                stats.vm_swap += zx_details.populated_bytes - zx_details.committed_bytes;
4333
4334                if mm_mapping.flags().contains(MappingFlags::SHARED) {
4335                    stats.rss_shared += zx_details.committed_bytes;
4336                } else if mm_mapping.flags().contains(MappingFlags::ANONYMOUS) {
4337                    stats.rss_anonymous += zx_details.committed_bytes;
4338                } else if mm_mapping.name().is_file() {
4339                    stats.rss_file += zx_details.committed_bytes;
4340                }
4341
4342                if mm_mapping.flags().contains(MappingFlags::LOCKED) {
4343                    stats.vm_lck += zx_details.committed_bytes;
4344                }
4345
4346                if mm_mapping.flags().contains(MappingFlags::ELF_BINARY)
4347                    && mm_mapping.flags().contains(MappingFlags::WRITE)
4348                {
4349                    stats.vm_data += zx_mapping.size;
4350                }
4351
4352                if mm_mapping.flags().contains(MappingFlags::ELF_BINARY)
4353                    && mm_mapping.flags().contains(MappingFlags::EXEC)
4354                {
4355                    stats.vm_exe += zx_mapping.size;
4356                }
4357            }
4358        });
4359
4360        // TODO(https://fxbug.dev/396221597): Placeholder for now. We need kernel support to track
4361        // the committed bytes high water mark.
4362        stats.vm_rss_hwm = STUB_VM_RSS_HWM;
4363        stats
4364    }
4365
4366    pub fn atomic_load_u32_acquire(&self, futex_addr: FutexAddress) -> Result<u32, Errno> {
4367        if let Some(usercopy) = usercopy() {
4368            self.ensure_range_mapped_in_user_vmar(futex_addr.into(), None)?;
4369            usercopy.atomic_load_u32_acquire(futex_addr.ptr()).map_err(|_| errno!(EFAULT))
4370        } else {
4371            unreachable!("can only control memory ordering of atomics with usercopy");
4372        }
4373    }
4374
4375    pub fn atomic_load_u32_relaxed(&self, futex_addr: FutexAddress) -> Result<u32, Errno> {
4376        if let Some(usercopy) = usercopy() {
4377            self.ensure_range_mapped_in_user_vmar(futex_addr.into(), None)?;
4378            usercopy.atomic_load_u32_relaxed(futex_addr.ptr()).map_err(|_| errno!(EFAULT))
4379        } else {
4380            // SAFETY: `self.state.read().read_memory` only returns `Ok` if all
4381            // bytes were read to.
4382            let buf = unsafe {
4383                read_to_array(|buf| {
4384                    self.state
4385                        .read()
4386                        .read_memory(futex_addr.into(), buf, &self.mapping_context)
4387                        .map(|bytes_read| {
4388                            debug_assert_eq!(bytes_read.len(), std::mem::size_of::<u32>())
4389                        })
4390                })
4391            }?;
4392            Ok(u32::from_ne_bytes(buf))
4393        }
4394    }
4395
4396    pub fn atomic_store_u32_relaxed(
4397        &self,
4398        futex_addr: FutexAddress,
4399        value: u32,
4400    ) -> Result<(), Errno> {
4401        if let Some(usercopy) = usercopy() {
4402            self.ensure_range_mapped_in_user_vmar(futex_addr.into(), None)?;
4403            usercopy.atomic_store_u32_relaxed(futex_addr.ptr(), value).map_err(|_| errno!(EFAULT))
4404        } else {
4405            self.state.read().write_memory(
4406                futex_addr.into(),
4407                value.as_bytes(),
4408                &self.mapping_context,
4409            )?;
4410            Ok(())
4411        }
4412    }
4413
4414    pub fn atomic_compare_exchange_u32_acq_rel(
4415        &self,
4416        futex_addr: FutexAddress,
4417        current: u32,
4418        new: u32,
4419    ) -> CompareExchangeResult<u32> {
4420        if let Err(e) = self.ensure_range_mapped_in_user_vmar(futex_addr.into(), None) {
4421            return CompareExchangeResult::Error(e);
4422        }
4423        let Some(usercopy) = usercopy() else {
4424            unreachable!("Atomic compare/exchange requires usercopy.");
4425        };
4426        CompareExchangeResult::from_usercopy(usercopy.atomic_compare_exchange_u32_acq_rel(
4427            futex_addr.ptr(),
4428            current,
4429            new,
4430        ))
4431    }
4432
4433    pub fn atomic_compare_exchange_weak_u32_acq_rel(
4434        &self,
4435        futex_addr: FutexAddress,
4436        current: u32,
4437        new: u32,
4438    ) -> CompareExchangeResult<u32> {
4439        if let Err(e) = self.ensure_range_mapped_in_user_vmar(futex_addr.into(), None) {
4440            return CompareExchangeResult::Error(e);
4441        }
4442        let Some(usercopy) = usercopy() else {
4443            unreachable!("Atomic compare/exchange requires usercopy.");
4444        };
4445        CompareExchangeResult::from_usercopy(usercopy.atomic_compare_exchange_weak_u32_acq_rel(
4446            futex_addr.ptr(),
4447            current,
4448            new,
4449        ))
4450    }
4451}
4452
4453/// The result of an atomic compare/exchange operation on user memory.
4454#[derive(Debug, Clone)]
4455pub enum CompareExchangeResult<T> {
4456    /// The current value provided matched the one observed in memory and the new value provided
4457    /// was written.
4458    Success,
4459    /// The provided current value did not match the current value in memory.
4460    Stale { observed: T },
4461    /// There was a general error while accessing the requested memory.
4462    Error(Errno),
4463}
4464
4465impl<T> CompareExchangeResult<T> {
4466    fn from_usercopy(usercopy_res: Result<Result<T, T>, ()>) -> Self {
4467        match usercopy_res {
4468            Ok(Ok(_)) => Self::Success,
4469            Ok(Err(observed)) => Self::Stale { observed },
4470            Err(()) => Self::Error(errno!(EFAULT)),
4471        }
4472    }
4473}
4474
4475impl<T> From<Errno> for CompareExchangeResult<T> {
4476    fn from(e: Errno) -> Self {
4477        Self::Error(e)
4478    }
4479}
4480
4481/// The user-space address at which a mapping should be placed. Used by [`MemoryManager::map`].
4482#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4483pub enum DesiredAddress {
4484    /// Map at any address chosen by the kernel.
4485    Any,
4486    /// The address is a hint. If the address overlaps an existing mapping a different address may
4487    /// be chosen.
4488    Hint(UserAddress),
4489    /// The address is a requirement. If the address overlaps an existing mapping (and cannot
4490    /// overwrite it), mapping fails.
4491    Fixed(UserAddress),
4492    /// The address is a requirement. If the address overlaps an existing mapping (and cannot
4493    /// overwrite it), they should be unmapped.
4494    FixedOverwrite(UserAddress),
4495}
4496
4497/// The user-space address at which a mapping should be placed. Used by [`map_in_vmar`].
4498#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4499enum SelectedAddress {
4500    /// See DesiredAddress::Fixed.
4501    Fixed(UserAddress),
4502    /// See DesiredAddress::FixedOverwrite.
4503    FixedOverwrite(UserAddress),
4504}
4505
4506impl SelectedAddress {
4507    fn addr(&self) -> UserAddress {
4508        match self {
4509            SelectedAddress::Fixed(addr) => *addr,
4510            SelectedAddress::FixedOverwrite(addr) => *addr,
4511        }
4512    }
4513}
4514
4515/// Write one line of the memory map intended for adding to `/proc/self/maps`.
4516fn write_map(
4517    task: &Task,
4518    sink: &mut DynamicFileBuf,
4519    state: &MemoryManagerState,
4520    range: &Range<UserAddress>,
4521    map: &Mapping,
4522) -> Result<(), Errno> {
4523    let line_length = write!(
4524        sink,
4525        "{:08x}-{:08x} {}{}{}{} {:08x} 00:00 {} ",
4526        range.start.ptr(),
4527        range.end.ptr(),
4528        if map.can_read() { 'r' } else { '-' },
4529        if map.can_write() { 'w' } else { '-' },
4530        if map.can_exec() { 'x' } else { '-' },
4531        if map.flags().contains(MappingFlags::SHARED) { 's' } else { 'p' },
4532        match state.get_mapping_backing(map) {
4533            MappingBacking::Memory(backing) => backing.address_to_offset(range.start),
4534            MappingBacking::PrivateAnonymous => 0,
4535        },
4536        if let MappingNameRef::File(file) = &map.name() { file.name.entry.node.ino } else { 0 }
4537    )?;
4538    let fill_to_name = |sink: &mut DynamicFileBuf| {
4539        // The filename goes at >= the 74th column (73rd when zero indexed)
4540        for _ in line_length..73 {
4541            sink.write(b" ");
4542        }
4543    };
4544    match &map.name() {
4545        MappingNameRef::None | MappingNameRef::AioContext(_) => {
4546            if map.flags().contains(MappingFlags::SHARED)
4547                && map.flags().contains(MappingFlags::ANONYMOUS)
4548            {
4549                // See proc(5), "/proc/[pid]/map_files/"
4550                fill_to_name(sink);
4551                sink.write(b"/dev/zero (deleted)");
4552            }
4553        }
4554        MappingNameRef::Stack => {
4555            fill_to_name(sink);
4556            sink.write(b"[stack]");
4557        }
4558        MappingNameRef::Heap => {
4559            fill_to_name(sink);
4560            sink.write(b"[heap]");
4561        }
4562        MappingNameRef::Vdso => {
4563            fill_to_name(sink);
4564            sink.write(b"[vdso]");
4565        }
4566        MappingNameRef::Vvar => {
4567            fill_to_name(sink);
4568            sink.write(b"[vvar]");
4569        }
4570        MappingNameRef::File(file) => {
4571            fill_to_name(sink);
4572            // File names can have newlines that need to be escaped before printing.
4573            // According to https://man7.org/linux/man-pages/man5/proc.5.html the only
4574            // escaping applied to paths is replacing newlines with an octal sequence.
4575            let path = file.name.path(&task.live()?.fs());
4576            sink.write_iter(
4577                path.iter()
4578                    .flat_map(|b| if *b == b'\n' { b"\\012" } else { std::slice::from_ref(b) })
4579                    .copied(),
4580            );
4581        }
4582        MappingNameRef::Vma(name) => {
4583            fill_to_name(sink);
4584            sink.write(b"[anon:");
4585            sink.write(name.as_bytes());
4586            sink.write(b"]");
4587        }
4588        MappingNameRef::Ashmem(name) => {
4589            fill_to_name(sink);
4590            sink.write(b"/dev/ashmem/");
4591            sink.write(name.as_bytes());
4592        }
4593    }
4594    sink.write(b"\n");
4595    Ok(())
4596}
4597
4598#[derive(Default)]
4599pub struct MemoryStats {
4600    pub vm_size: usize,
4601    pub vm_rss: usize,
4602    pub vm_rss_hwm: usize,
4603    pub rss_anonymous: usize,
4604    pub rss_file: usize,
4605    pub rss_shared: usize,
4606    pub vm_data: usize,
4607    pub vm_stack: usize,
4608    pub vm_exe: usize,
4609    pub vm_swap: usize,
4610    pub vm_lck: usize,
4611}
4612
4613/// Implements `/proc/self/maps`.
4614#[derive(Clone)]
4615pub struct ProcMapsFile {
4616    mm: Weak<MemoryManager>,
4617    task: WeakRef<Task>,
4618}
4619impl ProcMapsFile {
4620    pub fn new(task: TempRef<'_, Task>) -> DynamicFile<Self> {
4621        // "maps" is empty for kthreads, rather than inaccessible.
4622        let mm = task.mm().map_or_else(|_| Weak::default(), |mm| Arc::downgrade(&mm));
4623        let task = task.into();
4624        DynamicFile::new(Self { mm, task })
4625    }
4626}
4627
4628impl SequenceFileSource for ProcMapsFile {
4629    type Cursor = UserAddress;
4630
4631    fn next(
4632        &self,
4633        _current_task: &CurrentTask,
4634        cursor: UserAddress,
4635        sink: &mut DynamicFileBuf,
4636    ) -> Result<Option<UserAddress>, Errno> {
4637        let task = Task::from_weak(&self.task)?;
4638        // /proc/<pid>/maps is empty for kthreads and tasks whose memory manager has changed.
4639        let Some(mm) = self.mm.upgrade() else {
4640            return Ok(None);
4641        };
4642        let state = mm.state.read();
4643        if let Some((range, map)) = state.mappings.find_at_or_after(cursor) {
4644            write_map(&task, sink, &state, range, map)?;
4645            return Ok(Some(range.end));
4646        }
4647        Ok(None)
4648    }
4649}
4650
4651#[derive(Clone)]
4652pub struct ProcSmapsFile {
4653    mm: Weak<MemoryManager>,
4654    task: WeakRef<Task>,
4655}
4656impl ProcSmapsFile {
4657    pub fn new(task: TempRef<'_, Task>) -> DynamicFile<Self> {
4658        // "smaps" is empty for kthreads, rather than inaccessible.
4659        let mm = task.mm().map_or_else(|_| Weak::default(), |mm| Arc::downgrade(&mm));
4660        DynamicFile::new(Self { mm, task: task.into() })
4661    }
4662}
4663
4664impl DynamicFileSource for ProcSmapsFile {
4665    fn generate(&self, current_task: &CurrentTask, sink: &mut DynamicFileBuf) -> Result<(), Errno> {
4666        let page_size_kb = *PAGE_SIZE / 1024;
4667        let task = Task::from_weak(&self.task)?;
4668        // /proc/<pid>/smaps is empty for kthreads and tasks whose memory manager has changed.
4669        let Some(mm) = self.mm.upgrade() else {
4670            return Ok(());
4671        };
4672
4673        // Ensure all mappings are mapped into the user vmar.
4674        let max_addr = mm.maximum_valid_user_address;
4675        mm.ensure_range_mapped_in_user_vmar(UserAddress::from(0), Some(max_addr.ptr()))?;
4676
4677        let state = mm.state.read();
4678        let committed_bytes_vec = mm.with_zx_mappings(current_task, |zx_mappings| {
4679            let mut zx_memory_info = RangeMap::<UserAddress, usize>::default();
4680            for idx in 0..zx_mappings.len() {
4681                let zx_mapping = zx_mappings[idx];
4682                // RangeMap uses #[must_use] for its default usecase but this drop is trivial.
4683                let _ = zx_memory_info.insert(
4684                    UserAddress::from_ptr(zx_mapping.base)
4685                        ..UserAddress::from_ptr(zx_mapping.base + zx_mapping.size),
4686                    idx,
4687                );
4688            }
4689
4690            let mut committed_bytes_vec = Vec::new();
4691            for (mm_range, mm_mapping) in state.mappings.iter() {
4692                let mut committed_bytes = 0;
4693
4694                for (zx_range, zx_mapping_idx) in zx_memory_info.range(mm_range.clone()) {
4695                    let intersect_range = zx_range.intersect(mm_range);
4696                    let zx_mapping = zx_mappings[*zx_mapping_idx];
4697                    let zx_details = zx_mapping.details();
4698                    let Some(zx_details) = zx_details.as_mapping() else { continue };
4699                    let zx_committed_bytes = zx_details.committed_bytes;
4700
4701                    // TODO(https://fxbug.dev/419882465): It can happen that the same Zircon mapping
4702                    // is covered by more than one Starnix mapping. In this case we don't have
4703                    // enough granularity to answer the question of how many committed bytes belong
4704                    // to one mapping or another. Make a best-effort approximation by dividing the
4705                    // committed bytes of a Zircon mapping proportionally.
4706                    committed_bytes += if intersect_range != *zx_range {
4707                        let intersection_size =
4708                            intersect_range.end.ptr() - intersect_range.start.ptr();
4709                        let part = intersection_size as f32 / zx_mapping.size as f32;
4710                        let prorated_committed_bytes: f32 = part * zx_committed_bytes as f32;
4711                        prorated_committed_bytes as u64
4712                    } else {
4713                        zx_committed_bytes as u64
4714                    };
4715                    assert_eq!(
4716                        match state.get_mapping_backing(mm_mapping) {
4717                            MappingBacking::Memory(m) => m.memory().get_koid(),
4718                            MappingBacking::PrivateAnonymous =>
4719                                mm.mapping_context.private_anonymous.backing.get_koid(),
4720                        },
4721                        zx_details.vmo_koid,
4722                        "MemoryManager and Zircon must agree on which VMO is mapped in this range",
4723                    );
4724                }
4725                committed_bytes_vec.push(committed_bytes);
4726            }
4727            Ok(committed_bytes_vec)
4728        })?;
4729
4730        for ((mm_range, mm_mapping), committed_bytes) in
4731            state.mappings.iter().zip(committed_bytes_vec.into_iter())
4732        {
4733            write_map(&task, sink, &state, mm_range, mm_mapping)?;
4734
4735            let size_kb = (mm_range.end.ptr() - mm_range.start.ptr()) / 1024;
4736            writeln!(sink, "Size:           {size_kb:>8} kB",)?;
4737            let share_count = match state.get_mapping_backing(mm_mapping) {
4738                MappingBacking::Memory(backing) => {
4739                    let memory = backing.memory();
4740                    if memory.is_clock() {
4741                        // Clock memory mappings are not shared in a meaningful way.
4742                        1
4743                    } else {
4744                        let memory_info = backing.memory().info()?;
4745                        memory_info.share_count as u64
4746                    }
4747                }
4748                MappingBacking::PrivateAnonymous => {
4749                    1 // Private mapping
4750                }
4751            };
4752
4753            let rss_kb = committed_bytes / 1024;
4754            writeln!(sink, "Rss:            {rss_kb:>8} kB")?;
4755
4756            let pss_kb = if mm_mapping.flags().contains(MappingFlags::SHARED) {
4757                rss_kb / share_count
4758            } else {
4759                rss_kb
4760            };
4761            writeln!(sink, "Pss:            {pss_kb:>8} kB")?;
4762
4763            track_stub!(TODO("https://fxbug.dev/322874967"), "smaps dirty pages");
4764            let (shared_dirty_kb, private_dirty_kb) = (0, 0);
4765
4766            let is_shared = share_count > 1;
4767            let shared_clean_kb = if is_shared { rss_kb } else { 0 };
4768            writeln!(sink, "Shared_Clean:   {shared_clean_kb:>8} kB")?;
4769            writeln!(sink, "Shared_Dirty:   {shared_dirty_kb:>8} kB")?;
4770
4771            let private_clean_kb = if is_shared { 0 } else { rss_kb };
4772            writeln!(sink, "Private_Clean:  {private_clean_kb:>8} kB")?;
4773            writeln!(sink, "Private_Dirty:  {private_dirty_kb:>8} kB")?;
4774
4775            let anonymous_kb = if mm_mapping.private_anonymous() { rss_kb } else { 0 };
4776            writeln!(sink, "Anonymous:      {anonymous_kb:>8} kB")?;
4777            writeln!(sink, "KernelPageSize: {page_size_kb:>8} kB")?;
4778            writeln!(sink, "MMUPageSize:    {page_size_kb:>8} kB")?;
4779
4780            let locked_kb =
4781                if mm_mapping.flags().contains(MappingFlags::LOCKED) { rss_kb } else { 0 };
4782            writeln!(sink, "Locked:         {locked_kb:>8} kB")?;
4783            writeln!(sink, "VmFlags: {}", mm_mapping.vm_flags())?;
4784
4785            track_stub!(TODO("https://fxbug.dev/297444691"), "optional smaps fields");
4786        }
4787
4788        Ok(())
4789    }
4790}
4791
4792/// Creates a memory object that can be used in an anonymous mapping for the `mmap` syscall.
4793pub fn create_anonymous_mapping_memory(size: u64) -> Result<Arc<MemoryObject>, Errno> {
4794    // mremap can grow memory regions, so make sure the memory object is resizable.
4795    let mut memory = MemoryObject::from(
4796        zx::Vmo::create_with_opts(zx::VmoOptions::RESIZABLE, size).map_err(|s| match s {
4797            zx::Status::NO_MEMORY => errno!(ENOMEM),
4798            zx::Status::OUT_OF_RANGE => errno!(ENOMEM),
4799            _ => impossible_error(s),
4800        })?,
4801    )
4802    .with_zx_name(b"starnix:memory_manager");
4803
4804    memory.set_zx_name(b"starnix-anon");
4805
4806    // TODO(https://fxbug.dev/42056890): Audit replace_as_executable usage
4807    memory = memory.replace_as_executable(&VMEX_RESOURCE).map_err(impossible_error)?;
4808    Ok(Arc::new(memory))
4809}
4810
4811fn generate_random_offset_for_aslr(arch_width: ArchWidth) -> usize {
4812    // Generate a number with ASLR_RANDOM_BITS.
4813    let randomness = {
4814        let random_bits =
4815            if arch_width.is_arch32() { ASLR_32_RANDOM_BITS } else { ASLR_RANDOM_BITS };
4816        let mask = (1 << random_bits) - 1;
4817        let mut bytes = [0; std::mem::size_of::<usize>()];
4818        starnix_crypto::cprng_draw(&mut bytes);
4819        usize::from_le_bytes(bytes) & mask
4820    };
4821
4822    // Transform it into a page-aligned offset.
4823    randomness * (*PAGE_SIZE as usize)
4824}
4825
4826#[cfg(test)]
4827mod tests {
4828    use super::*;
4829    use crate::mm::memory_accessor::MemoryAccessorExt;
4830    use crate::mm::syscalls::do_mmap;
4831    use crate::task::syscalls::sys_prctl;
4832    use crate::testing::*;
4833    use crate::vfs::FdNumber;
4834    use assert_matches::assert_matches;
4835    use itertools::assert_equal;
4836    use starnix_sync::{FileOpsCore, LockEqualOrBefore};
4837    use starnix_uapi::user_address::{UserCString, UserRef};
4838    use starnix_uapi::{
4839        MAP_ANONYMOUS, MAP_FIXED, MAP_GROWSDOWN, MAP_PRIVATE, MAP_SHARED, PR_SET_VMA,
4840        PR_SET_VMA_ANON_NAME, PROT_NONE, PROT_READ,
4841    };
4842    use std::ffi::CString;
4843    use zerocopy::{FromBytes, Immutable, KnownLayout};
4844
4845    #[::fuchsia::test]
4846    fn test_mapping_flags() {
4847        let options = MappingOptions::ANONYMOUS;
4848        let access_flags = ProtectionFlags::READ | ProtectionFlags::WRITE;
4849        let mapping_flags = MappingFlags::from_access_flags_and_options(access_flags, options);
4850        assert_eq!(mapping_flags.access_flags(), access_flags);
4851        assert_eq!(mapping_flags.options(), options);
4852
4853        let new_access_flags = ProtectionFlags::READ | ProtectionFlags::EXEC;
4854        let adusted_mapping_flags = mapping_flags.with_access_flags(new_access_flags);
4855        assert_eq!(adusted_mapping_flags.access_flags(), new_access_flags);
4856        assert_eq!(adusted_mapping_flags.options(), options);
4857    }
4858
4859    #[::fuchsia::test]
4860    async fn test_brk() {
4861        spawn_kernel_and_run(async |locked, current_task| {
4862            let mm = current_task.mm().unwrap();
4863
4864            // Look up the given addr in the mappings table.
4865            let get_range = |addr: UserAddress| {
4866                let state = mm.state.read();
4867                state
4868                    .mappings
4869                    .map
4870                    .get(addr)
4871                    .map(|(range, mapping)| (range.clone(), mapping.clone()))
4872            };
4873
4874            // Initialize the program break.
4875            let base_addr = mm
4876                .set_brk(locked, &current_task, UserAddress::default())
4877                .expect("failed to set initial program break");
4878            assert!(base_addr > UserAddress::default());
4879
4880            // Page containing the program break address should not be mapped.
4881            assert_eq!(get_range(base_addr), None);
4882
4883            // Growing it by a single byte results in that page becoming mapped.
4884            let addr0 = mm
4885                .set_brk(locked, &current_task, (base_addr + 1u64).unwrap())
4886                .expect("failed to grow brk");
4887            assert!(addr0 > base_addr);
4888            let (range0, _) = get_range(base_addr).expect("base_addr should be mapped");
4889            assert_eq!(range0.start, base_addr);
4890            assert_eq!(range0.end, (base_addr + *PAGE_SIZE).unwrap());
4891
4892            // Grow the program break by another byte, which won't be enough to cause additional pages to be mapped.
4893            let addr1 = mm
4894                .set_brk(locked, &current_task, (base_addr + 2u64).unwrap())
4895                .expect("failed to grow brk");
4896            assert_eq!(addr1, (base_addr + 2u64).unwrap());
4897            let (range1, _) = get_range(base_addr).expect("base_addr should be mapped");
4898            assert_eq!(range1.start, range0.start);
4899            assert_eq!(range1.end, range0.end);
4900
4901            // Grow the program break by a non-trival amount and observe the larger mapping.
4902            let addr2 = mm
4903                .set_brk(locked, &current_task, (base_addr + 24893u64).unwrap())
4904                .expect("failed to grow brk");
4905            assert_eq!(addr2, (base_addr + 24893u64).unwrap());
4906            let (range2, _) = get_range(base_addr).expect("base_addr should be mapped");
4907            assert_eq!(range2.start, base_addr);
4908            assert_eq!(range2.end, addr2.round_up(*PAGE_SIZE).unwrap());
4909
4910            // Shrink the program break and observe the smaller mapping.
4911            let addr3 = mm
4912                .set_brk(locked, &current_task, (base_addr + 14832u64).unwrap())
4913                .expect("failed to shrink brk");
4914            assert_eq!(addr3, (base_addr + 14832u64).unwrap());
4915            let (range3, _) = get_range(base_addr).expect("base_addr should be mapped");
4916            assert_eq!(range3.start, base_addr);
4917            assert_eq!(range3.end, addr3.round_up(*PAGE_SIZE).unwrap());
4918
4919            // Shrink the program break close to zero and observe the smaller mapping.
4920            let addr4 = mm
4921                .set_brk(locked, &current_task, (base_addr + 3u64).unwrap())
4922                .expect("failed to drastically shrink brk");
4923            assert_eq!(addr4, (base_addr + 3u64).unwrap());
4924            let (range4, _) = get_range(base_addr).expect("base_addr should be mapped");
4925            assert_eq!(range4.start, base_addr);
4926            assert_eq!(range4.end, addr4.round_up(*PAGE_SIZE).unwrap());
4927
4928            // Shrink the program break to zero and observe that the mapping is entirely gone.
4929            let addr5 = mm
4930                .set_brk(locked, &current_task, base_addr)
4931                .expect("failed to drastically shrink brk to zero");
4932            assert_eq!(addr5, base_addr);
4933            assert_eq!(get_range(base_addr), None);
4934        })
4935        .await;
4936    }
4937
4938    #[::fuchsia::test]
4939    async fn test_mm_exec() {
4940        spawn_kernel_and_run(async |locked, current_task| {
4941            let mm = current_task.mm().unwrap();
4942
4943            let has = |addr: UserAddress| -> bool {
4944                let state = mm.state.read();
4945                state.mappings.get(addr).is_some()
4946            };
4947
4948            let brk_addr = mm
4949                .set_brk(locked, &current_task, UserAddress::default())
4950                .expect("failed to set initial program break");
4951            assert!(brk_addr > UserAddress::default());
4952
4953            // Allocate a single page of BRK space, so that the break base address is mapped.
4954            let _ = mm
4955                .set_brk(locked, &current_task, (brk_addr + 1u64).unwrap())
4956                .expect("failed to grow program break");
4957            assert!(has(brk_addr));
4958
4959            let mapped_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
4960            assert!(mapped_addr > UserAddress::default());
4961            assert!(has(mapped_addr));
4962
4963            let node = current_task.lookup_path_from_root(locked, "/".into()).unwrap();
4964            let new_mm = MemoryManager::exec(
4965                current_task.thread_group().root_vmar.unowned(),
4966                current_task.live().mm.to_option_arc(),
4967                node,
4968                ArchWidth::Arch64,
4969            )
4970            .expect("failed to exec memory manager");
4971            current_task.live().mm.update(Some(new_mm));
4972
4973            assert!(!has(brk_addr));
4974            assert!(!has(mapped_addr));
4975
4976            // Check that the old addresses are actually available for mapping.
4977            let brk_addr2 = map_memory(locked, &current_task, brk_addr, *PAGE_SIZE);
4978            assert_eq!(brk_addr, brk_addr2);
4979            let mapped_addr2 = map_memory(locked, &current_task, mapped_addr, *PAGE_SIZE);
4980            assert_eq!(mapped_addr, mapped_addr2);
4981        })
4982        .await;
4983    }
4984
4985    #[::fuchsia::test]
4986    async fn test_get_contiguous_mappings_at() {
4987        spawn_kernel_and_run(async |locked, current_task| {
4988            let mm = current_task.mm().unwrap();
4989            let context = &mm.mapping_context;
4990
4991            // Create four one-page mappings with a hole between the third one and the fourth one.
4992            let page_size = *PAGE_SIZE as usize;
4993            let addr_a = (mm.base_addr + 10 * page_size).unwrap();
4994            let addr_b = (mm.base_addr + 11 * page_size).unwrap();
4995            let addr_c = (mm.base_addr + 12 * page_size).unwrap();
4996            let addr_d = (mm.base_addr + 14 * page_size).unwrap();
4997            assert_eq!(map_memory(locked, &current_task, addr_a, *PAGE_SIZE), addr_a);
4998            assert_eq!(map_memory(locked, &current_task, addr_b, *PAGE_SIZE), addr_b);
4999            assert_eq!(map_memory(locked, &current_task, addr_c, *PAGE_SIZE), addr_c);
5000            assert_eq!(map_memory(locked, &current_task, addr_d, *PAGE_SIZE), addr_d);
5001
5002            {
5003                let mm_state = mm.state.read();
5004                // Verify that requesting an unmapped address returns an empty iterator.
5005                assert_equal(
5006                    mm_state
5007                        .get_contiguous_mappings_at((addr_a - 100u64).unwrap(), 50, &context)
5008                        .unwrap(),
5009                    vec![],
5010                );
5011                assert_equal(
5012                    mm_state
5013                        .get_contiguous_mappings_at((addr_a - 100u64).unwrap(), 200, &context)
5014                        .unwrap(),
5015                    vec![],
5016                );
5017
5018                // Verify that requesting zero bytes returns an empty iterator.
5019                assert_equal(
5020                    mm_state.get_contiguous_mappings_at(addr_a, 0, &context).unwrap(),
5021                    vec![],
5022                );
5023
5024                // Verify errors.
5025                assert_eq!(
5026                    mm_state
5027                        .get_contiguous_mappings_at(UserAddress::from(100), usize::MAX, &context)
5028                        .err()
5029                        .unwrap(),
5030                    errno!(EFAULT)
5031                );
5032                assert_eq!(
5033                    mm_state
5034                        .get_contiguous_mappings_at(
5035                            (context.max_address() + 1u64).unwrap(),
5036                            0,
5037                            &context
5038                        )
5039                        .err()
5040                        .unwrap(),
5041                    errno!(EFAULT)
5042                );
5043            }
5044
5045            assert_eq!(mm.get_mapping_count(), 2);
5046            let mm_state = mm.state.read();
5047            let (map_a, map_b) = {
5048                let mut it = mm_state.mappings.iter();
5049                (it.next().unwrap().1, it.next().unwrap().1)
5050            };
5051
5052            assert_equal(
5053                mm_state.get_contiguous_mappings_at(addr_a, page_size, &context).unwrap(),
5054                vec![(map_a, page_size)],
5055            );
5056
5057            assert_equal(
5058                mm_state.get_contiguous_mappings_at(addr_a, page_size / 2, &context).unwrap(),
5059                vec![(map_a, page_size / 2)],
5060            );
5061
5062            assert_equal(
5063                mm_state.get_contiguous_mappings_at(addr_a, page_size * 3, &context).unwrap(),
5064                vec![(map_a, page_size * 3)],
5065            );
5066
5067            assert_equal(
5068                mm_state.get_contiguous_mappings_at(addr_b, page_size, &context).unwrap(),
5069                vec![(map_a, page_size)],
5070            );
5071
5072            assert_equal(
5073                mm_state.get_contiguous_mappings_at(addr_d, page_size, &context).unwrap(),
5074                vec![(map_b, page_size)],
5075            );
5076
5077            // Verify that results stop if there is a hole.
5078            assert_equal(
5079                mm_state
5080                    .get_contiguous_mappings_at(
5081                        (addr_a + page_size / 2).unwrap(),
5082                        page_size * 10,
5083                        &context,
5084                    )
5085                    .unwrap(),
5086                vec![(map_a, page_size * 2 + page_size / 2)],
5087            );
5088
5089            // Verify that results stop at the last mapped page.
5090            assert_equal(
5091                mm_state.get_contiguous_mappings_at(addr_d, page_size * 10, &context).unwrap(),
5092                vec![(map_b, page_size)],
5093            );
5094        })
5095        .await;
5096    }
5097
5098    #[::fuchsia::test]
5099    async fn test_read_write_crossing_mappings() {
5100        spawn_kernel_and_run(async |locked, current_task| {
5101            let mm = current_task.mm().unwrap();
5102            let ma = current_task.deref();
5103
5104            // Map two contiguous pages at fixed addresses, but backed by distinct mappings.
5105            let page_size = *PAGE_SIZE;
5106            let addr = (mm.base_addr + 10 * page_size).unwrap();
5107            assert_eq!(map_memory(locked, &current_task, addr, page_size), addr);
5108            assert_eq!(
5109                map_memory(locked, &current_task, (addr + page_size).unwrap(), page_size),
5110                (addr + page_size).unwrap()
5111            );
5112            // Mappings get merged since they are baked by the same memory object
5113            assert_eq!(mm.get_mapping_count(), 1);
5114
5115            // Write a pattern crossing our two mappings.
5116            let test_addr = (addr + page_size / 2).unwrap();
5117            let data: Vec<u8> = (0..page_size).map(|i| (i % 256) as u8).collect();
5118            ma.write_memory(test_addr, &data).expect("failed to write test data");
5119
5120            // Read it back.
5121            let data_readback =
5122                ma.read_memory_to_vec(test_addr, data.len()).expect("failed to read test data");
5123            assert_eq!(&data, &data_readback);
5124        })
5125        .await;
5126    }
5127
5128    #[::fuchsia::test]
5129    async fn test_read_write_errors() {
5130        spawn_kernel_and_run(async |locked, current_task| {
5131            let ma = current_task.deref();
5132
5133            let page_size = *PAGE_SIZE;
5134            let addr = map_memory(locked, &current_task, UserAddress::default(), page_size);
5135            let buf = vec![0u8; page_size as usize];
5136
5137            // Verify that accessing data that is only partially mapped is an error.
5138            let partial_addr_before = (addr - page_size / 2).unwrap();
5139            assert_eq!(ma.write_memory(partial_addr_before, &buf), error!(EFAULT));
5140            assert_eq!(ma.read_memory_to_vec(partial_addr_before, buf.len()), error!(EFAULT));
5141            let partial_addr_after = (addr + page_size / 2).unwrap();
5142            assert_eq!(ma.write_memory(partial_addr_after, &buf), error!(EFAULT));
5143            assert_eq!(ma.read_memory_to_vec(partial_addr_after, buf.len()), error!(EFAULT));
5144
5145            // Verify that accessing unmapped memory is an error.
5146            let unmapped_addr = (addr - 10 * page_size).unwrap();
5147            assert_eq!(ma.write_memory(unmapped_addr, &buf), error!(EFAULT));
5148            assert_eq!(ma.read_memory_to_vec(unmapped_addr, buf.len()), error!(EFAULT));
5149
5150            // However, accessing zero bytes in unmapped memory is not an error.
5151            ma.write_memory(unmapped_addr, &[]).expect("failed to write no data");
5152            ma.read_memory_to_vec(unmapped_addr, 0).expect("failed to read no data");
5153        })
5154        .await;
5155    }
5156
5157    #[::fuchsia::test]
5158    async fn test_read_c_string_to_vec_large() {
5159        spawn_kernel_and_run(async |locked, current_task| {
5160            let mm = current_task.mm().unwrap();
5161            let ma = current_task.deref();
5162
5163            let page_size = *PAGE_SIZE;
5164            let max_size = 4 * page_size as usize;
5165            let addr = (mm.base_addr + 10 * page_size).unwrap();
5166
5167            assert_eq!(map_memory(locked, &current_task, addr, max_size as u64), addr);
5168
5169            let mut random_data = vec![0; max_size];
5170            starnix_crypto::cprng_draw(&mut random_data);
5171            // Remove all NUL bytes.
5172            for i in 0..random_data.len() {
5173                if random_data[i] == 0 {
5174                    random_data[i] = 1;
5175                }
5176            }
5177            random_data[max_size - 1] = 0;
5178
5179            ma.write_memory(addr, &random_data).expect("failed to write test string");
5180            // We should read the same value minus the last byte (NUL char).
5181            assert_eq!(
5182                ma.read_c_string_to_vec(UserCString::new(current_task, addr), max_size).unwrap(),
5183                random_data[..max_size - 1]
5184            );
5185        })
5186        .await;
5187    }
5188
5189    #[::fuchsia::test]
5190    async fn test_read_c_string_to_vec() {
5191        spawn_kernel_and_run(async |locked, current_task| {
5192            let mm = current_task.mm().unwrap();
5193            let ma = current_task.deref();
5194
5195            let page_size = *PAGE_SIZE;
5196            let max_size = 2 * page_size as usize;
5197            let addr = (mm.base_addr + 10 * page_size).unwrap();
5198
5199            // Map a page at a fixed address and write an unterminated string at the end of it.
5200            assert_eq!(map_memory(locked, &current_task, addr, page_size), addr);
5201            let test_str = b"foo!";
5202            let test_addr =
5203                addr.checked_add(page_size as usize).unwrap().checked_sub(test_str.len()).unwrap();
5204            ma.write_memory(test_addr, test_str).expect("failed to write test string");
5205
5206            // Expect error if the string is not terminated.
5207            assert_eq!(
5208                ma.read_c_string_to_vec(UserCString::new(current_task, test_addr), max_size),
5209                error!(ENAMETOOLONG)
5210            );
5211
5212            // Expect success if the string is terminated.
5213            ma.write_memory((addr + (page_size - 1)).unwrap(), b"\0").expect("failed to write nul");
5214            assert_eq!(
5215                ma.read_c_string_to_vec(UserCString::new(current_task, test_addr), max_size)
5216                    .unwrap(),
5217                "foo"
5218            );
5219
5220            // Expect success if the string spans over two mappings.
5221            assert_eq!(
5222                map_memory(locked, &current_task, (addr + page_size).unwrap(), page_size),
5223                (addr + page_size).unwrap()
5224            );
5225            // TODO: Adjacent private anonymous mappings are collapsed. To test this case this test needs to
5226            // provide a backing for the second mapping.
5227            // assert_eq!(mm.get_mapping_count(), 2);
5228            ma.write_memory((addr + (page_size - 1)).unwrap(), b"bar\0")
5229                .expect("failed to write extra chars");
5230            assert_eq!(
5231                ma.read_c_string_to_vec(UserCString::new(current_task, test_addr), max_size)
5232                    .unwrap(),
5233                "foobar",
5234            );
5235
5236            // Expect error if the string exceeds max limit
5237            assert_eq!(
5238                ma.read_c_string_to_vec(UserCString::new(current_task, test_addr), 2),
5239                error!(ENAMETOOLONG)
5240            );
5241
5242            // Expect error if the address is invalid.
5243            assert_eq!(
5244                ma.read_c_string_to_vec(UserCString::null(current_task), max_size),
5245                error!(EFAULT)
5246            );
5247        })
5248        .await;
5249    }
5250
5251    #[::fuchsia::test]
5252    async fn can_read_argv_like_regions() {
5253        spawn_kernel_and_run(async |locked, current_task| {
5254            let ma = current_task.deref();
5255
5256            // Map a page.
5257            let page_size = *PAGE_SIZE;
5258            let addr = map_memory_anywhere(locked, &current_task, page_size);
5259            assert!(!addr.is_null());
5260
5261            // Write an unterminated string.
5262            let mut payload = "first".as_bytes().to_vec();
5263            let mut expected_parses = vec![];
5264            ma.write_memory(addr, &payload).unwrap();
5265
5266            // Expect success if the string is terminated.
5267            expected_parses.push(payload.clone());
5268            payload.push(0);
5269            ma.write_memory(addr, &payload).unwrap();
5270            assert_eq!(
5271                ma.read_nul_delimited_c_string_list(addr, payload.len()).unwrap(),
5272                expected_parses,
5273            );
5274
5275            // Make sure we can parse multiple strings from the same region.
5276            let second = b"second";
5277            payload.extend(second);
5278            payload.push(0);
5279            expected_parses.push(second.to_vec());
5280
5281            let third = b"third";
5282            payload.extend(third);
5283            payload.push(0);
5284            expected_parses.push(third.to_vec());
5285
5286            ma.write_memory(addr, &payload).unwrap();
5287            assert_eq!(
5288                ma.read_nul_delimited_c_string_list(addr, payload.len()).unwrap(),
5289                expected_parses,
5290            );
5291        })
5292        .await;
5293    }
5294
5295    #[::fuchsia::test]
5296    async fn truncate_argv_like_regions() {
5297        spawn_kernel_and_run(async |locked, current_task| {
5298            let ma = current_task.deref();
5299
5300            // Map a page.
5301            let page_size = *PAGE_SIZE;
5302            let addr = map_memory_anywhere(locked, &current_task, page_size);
5303            assert!(!addr.is_null());
5304
5305            let payload = b"first\0second\0third\0";
5306            ma.write_memory(addr, payload).unwrap();
5307            assert_eq!(
5308                ma.read_nul_delimited_c_string_list(addr, payload.len() - 3).unwrap(),
5309                vec![b"first".to_vec(), b"second".to_vec(), b"thi".to_vec()],
5310                "Skipping last three bytes of payload should skip last two bytes of 3rd string"
5311            );
5312        })
5313        .await;
5314    }
5315
5316    #[::fuchsia::test]
5317    async fn test_read_c_string() {
5318        spawn_kernel_and_run(async |locked, current_task| {
5319            let mm = current_task.mm().unwrap();
5320            let ma = current_task.deref();
5321
5322            let page_size = *PAGE_SIZE;
5323            let buf_cap = 2 * page_size as usize;
5324            let mut buf = Vec::with_capacity(buf_cap);
5325            // We can't just use `spare_capacity_mut` because `Vec::with_capacity`
5326            // returns a `Vec` with _at least_ the requested capacity.
5327            let buf = &mut buf.spare_capacity_mut()[..buf_cap];
5328            let addr = (mm.base_addr + 10 * page_size).unwrap();
5329
5330            // Map a page at a fixed address and write an unterminated string at the end of it..
5331            assert_eq!(map_memory(locked, &current_task, addr, page_size), addr);
5332            let test_str = b"foo!";
5333            let test_addr = (addr + (page_size - test_str.len() as u64)).unwrap();
5334            ma.write_memory(test_addr, test_str).expect("failed to write test string");
5335
5336            // Expect error if the string is not terminated.
5337            assert_eq!(
5338                ma.read_c_string(UserCString::new(current_task, test_addr), buf),
5339                error!(ENAMETOOLONG)
5340            );
5341
5342            // Expect success if the string is terminated.
5343            ma.write_memory((addr + (page_size - 1)).unwrap(), b"\0").expect("failed to write nul");
5344            assert_eq!(
5345                ma.read_c_string(UserCString::new(current_task, test_addr), buf).unwrap(),
5346                "foo"
5347            );
5348
5349            // Expect success if the string spans over two mappings.
5350            assert_eq!(
5351                map_memory(locked, &current_task, (addr + page_size).unwrap(), page_size),
5352                (addr + page_size).unwrap()
5353            );
5354            // TODO: To be multiple mappings we need to provide a file backing for the next page or the
5355            // mappings will be collapsed.
5356            //assert_eq!(mm.get_mapping_count(), 2);
5357            ma.write_memory((addr + (page_size - 1)).unwrap(), b"bar\0")
5358                .expect("failed to write extra chars");
5359            assert_eq!(
5360                ma.read_c_string(UserCString::new(current_task, test_addr), buf).unwrap(),
5361                "foobar"
5362            );
5363
5364            // Expect error if the string does not fit in the provided buffer.
5365            assert_eq!(
5366                ma.read_c_string(
5367                    UserCString::new(current_task, test_addr),
5368                    &mut [MaybeUninit::uninit(); 2]
5369                ),
5370                error!(ENAMETOOLONG)
5371            );
5372
5373            // Expect error if the address is invalid.
5374            assert_eq!(ma.read_c_string(UserCString::null(current_task), buf), error!(EFAULT));
5375        })
5376        .await;
5377    }
5378
5379    #[::fuchsia::test]
5380    async fn test_find_next_unused_range() {
5381        spawn_kernel_and_run(async |locked, current_task| {
5382            let mm = current_task.mm().unwrap();
5383
5384            let mmap_top = mm.state.read().find_next_unused_range(0).unwrap().ptr();
5385            let page_size = *PAGE_SIZE as usize;
5386            assert!(mmap_top <= RESTRICTED_ASPACE_HIGHEST_ADDRESS);
5387
5388            // No mappings - top address minus requested size is available
5389            assert_eq!(
5390                mm.state.read().find_next_unused_range(page_size).unwrap(),
5391                UserAddress::from_ptr(mmap_top - page_size)
5392            );
5393
5394            // Fill it.
5395            let addr = UserAddress::from_ptr(mmap_top - page_size);
5396            assert_eq!(map_memory(locked, &current_task, addr, *PAGE_SIZE), addr);
5397
5398            // The next available range is right before the new mapping.
5399            assert_eq!(
5400                mm.state.read().find_next_unused_range(page_size).unwrap(),
5401                UserAddress::from_ptr(addr.ptr() - page_size)
5402            );
5403
5404            // Allocate an extra page before a one-page gap.
5405            let addr2 = UserAddress::from_ptr(addr.ptr() - 2 * page_size);
5406            assert_eq!(map_memory(locked, &current_task, addr2, *PAGE_SIZE), addr2);
5407
5408            // Searching for one-page range still gives the same result
5409            assert_eq!(
5410                mm.state.read().find_next_unused_range(page_size).unwrap(),
5411                UserAddress::from_ptr(addr.ptr() - page_size)
5412            );
5413
5414            // Searching for a bigger range results in the area before the second mapping
5415            assert_eq!(
5416                mm.state.read().find_next_unused_range(2 * page_size).unwrap(),
5417                UserAddress::from_ptr(addr2.ptr() - 2 * page_size)
5418            );
5419
5420            // Searching for more memory than available should fail.
5421            assert_eq!(mm.state.read().find_next_unused_range(mmap_top), None);
5422        })
5423        .await;
5424    }
5425
5426    #[::fuchsia::test]
5427    async fn test_count_placements() {
5428        spawn_kernel_and_run(async |locked, current_task| {
5429            let mm = current_task.mm().unwrap();
5430
5431            // ten-page range
5432            let page_size = *PAGE_SIZE as usize;
5433            let subrange_ten = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
5434                ..UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 10 * page_size);
5435
5436            assert_eq!(
5437                mm.state.read().count_possible_placements(11 * page_size, &subrange_ten),
5438                Some(0)
5439            );
5440            assert_eq!(
5441                mm.state.read().count_possible_placements(10 * page_size, &subrange_ten),
5442                Some(1)
5443            );
5444            assert_eq!(
5445                mm.state.read().count_possible_placements(9 * page_size, &subrange_ten),
5446                Some(2)
5447            );
5448            assert_eq!(
5449                mm.state.read().count_possible_placements(page_size, &subrange_ten),
5450                Some(10)
5451            );
5452
5453            // map 6th page
5454            let addr = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 5 * page_size);
5455            assert_eq!(map_memory(locked, &current_task, addr, *PAGE_SIZE), addr);
5456
5457            assert_eq!(
5458                mm.state.read().count_possible_placements(10 * page_size, &subrange_ten),
5459                Some(0)
5460            );
5461            assert_eq!(
5462                mm.state.read().count_possible_placements(5 * page_size, &subrange_ten),
5463                Some(1)
5464            );
5465            assert_eq!(
5466                mm.state.read().count_possible_placements(4 * page_size, &subrange_ten),
5467                Some(3)
5468            );
5469            assert_eq!(
5470                mm.state.read().count_possible_placements(page_size, &subrange_ten),
5471                Some(9)
5472            );
5473        })
5474        .await;
5475    }
5476
5477    #[::fuchsia::test]
5478    async fn test_pick_placement() {
5479        spawn_kernel_and_run(async |locked, current_task| {
5480            let mm = current_task.mm().unwrap();
5481
5482            let page_size = *PAGE_SIZE as usize;
5483            let subrange_ten = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
5484                ..UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 10 * page_size);
5485
5486            let addr = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 5 * page_size);
5487            assert_eq!(map_memory(locked, &current_task, addr, *PAGE_SIZE), addr);
5488            assert_eq!(
5489                mm.state.read().count_possible_placements(4 * page_size, &subrange_ten),
5490                Some(3)
5491            );
5492
5493            assert_eq!(
5494                mm.state.read().pick_placement(4 * page_size, 0, &subrange_ten),
5495                Some(UserAddress::from_ptr(RESTRICTED_ASPACE_BASE))
5496            );
5497            assert_eq!(
5498                mm.state.read().pick_placement(4 * page_size, 1, &subrange_ten),
5499                Some(UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + page_size))
5500            );
5501            assert_eq!(
5502                mm.state.read().pick_placement(4 * page_size, 2, &subrange_ten),
5503                Some(UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 6 * page_size))
5504            );
5505        })
5506        .await;
5507    }
5508
5509    #[::fuchsia::test]
5510    async fn test_find_random_unused_range() {
5511        spawn_kernel_and_run(async |locked, current_task| {
5512            let mm = current_task.mm().unwrap();
5513
5514            // ten-page range
5515            let page_size = *PAGE_SIZE as usize;
5516            let subrange_ten = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
5517                ..UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 10 * page_size);
5518
5519            for _ in 0..10 {
5520                let addr = mm.state.read().find_random_unused_range(page_size, &subrange_ten);
5521                assert!(addr.is_some());
5522                assert_eq!(
5523                    map_memory(locked, &current_task, addr.unwrap(), *PAGE_SIZE),
5524                    addr.unwrap()
5525                );
5526            }
5527            assert_eq!(mm.state.read().find_random_unused_range(page_size, &subrange_ten), None);
5528        })
5529        .await;
5530    }
5531
5532    #[::fuchsia::test]
5533    async fn test_grows_down_near_aspace_base() {
5534        spawn_kernel_and_run(async |locked, current_task| {
5535            let mm = current_task.mm().unwrap();
5536
5537            let page_count = 10;
5538
5539            let page_size = *PAGE_SIZE as usize;
5540            let addr =
5541                (UserAddress::from_ptr(RESTRICTED_ASPACE_BASE) + page_count * page_size).unwrap();
5542            assert_eq!(
5543                map_memory_with_flags(
5544                    locked,
5545                    &current_task,
5546                    addr,
5547                    page_size as u64,
5548                    MAP_ANONYMOUS | MAP_PRIVATE | MAP_GROWSDOWN
5549                ),
5550                addr
5551            );
5552
5553            let subrange_ten = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)..addr;
5554            assert_eq!(mm.state.read().find_random_unused_range(page_size, &subrange_ten), None);
5555        })
5556        .await;
5557    }
5558
5559    #[::fuchsia::test]
5560    async fn test_unmap_returned_mappings() {
5561        spawn_kernel_and_run(async |locked, current_task| {
5562            let mm = current_task.mm().unwrap();
5563
5564            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
5565
5566            let mut released_mappings = ReleasedMappings::default();
5567            let mut mm_state = mm.state.write();
5568            let unmap_result =
5569                mm_state.unmap(&mm, addr, *PAGE_SIZE as usize, &mut released_mappings);
5570            assert!(unmap_result.is_ok());
5571            assert_eq!(released_mappings.len(), 1);
5572            released_mappings.finalize(mm_state);
5573        })
5574        .await;
5575    }
5576
5577    #[::fuchsia::test]
5578    async fn test_unmap_returns_multiple_mappings() {
5579        spawn_kernel_and_run(async |locked, current_task| {
5580            let mm = current_task.mm().unwrap();
5581
5582            let addr = mm.state.read().find_next_unused_range(3 * *PAGE_SIZE as usize).unwrap();
5583            let addr = map_memory(locked, &current_task, addr, *PAGE_SIZE);
5584            let _ = map_memory(locked, &current_task, (addr + 2 * *PAGE_SIZE).unwrap(), *PAGE_SIZE);
5585
5586            let mut released_mappings = ReleasedMappings::default();
5587            let mut mm_state = mm.state.write();
5588            let unmap_result =
5589                mm_state.unmap(&mm, addr, (*PAGE_SIZE * 3) as usize, &mut released_mappings);
5590            assert!(unmap_result.is_ok());
5591            assert_eq!(released_mappings.len(), 2);
5592            released_mappings.finalize(mm_state);
5593        })
5594        .await;
5595    }
5596
5597    /// Maps two pages in separate mappings next to each other, then unmaps the first page.
5598    /// The second page should not be modified.
5599    #[::fuchsia::test]
5600    async fn test_map_two_unmap_one() {
5601        spawn_kernel_and_run(async |locked, current_task| {
5602            let mm = current_task.mm().unwrap();
5603
5604            // reserve memory for both pages
5605            let addr_reserve =
5606                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
5607            let addr1 = do_mmap(
5608                locked,
5609                &current_task,
5610                addr_reserve,
5611                *PAGE_SIZE as usize,
5612                PROT_READ, // Map read-only to avoid merging of the two mappings
5613                MAP_ANONYMOUS | MAP_SHARED | MAP_FIXED,
5614                FdNumber::from_raw(-1),
5615                0,
5616            )
5617            .expect("failed to mmap");
5618            let addr2 = map_memory_with_flags(
5619                locked,
5620                &current_task,
5621                (addr_reserve + *PAGE_SIZE).unwrap(),
5622                *PAGE_SIZE,
5623                MAP_ANONYMOUS | MAP_SHARED | MAP_FIXED,
5624            );
5625            let state = mm.state.read();
5626            let (range1, _) = state.mappings.get(addr1).expect("mapping");
5627            assert_eq!(range1.start, addr1);
5628            assert_eq!(range1.end, (addr1 + *PAGE_SIZE).unwrap());
5629            let (range2, mapping2) = state.mappings.get(addr2).expect("mapping");
5630            assert_eq!(range2.start, addr2);
5631            assert_eq!(range2.end, (addr2 + *PAGE_SIZE).unwrap());
5632            let original_memory2 = {
5633                match state.get_mapping_backing(mapping2) {
5634                    MappingBacking::Memory(backing) => {
5635                        assert_eq!(backing.memory().get_size(), *PAGE_SIZE);
5636                        backing.memory().clone()
5637                    }
5638                    MappingBacking::PrivateAnonymous => {
5639                        panic!("Unexpected private anonymous mapping")
5640                    }
5641                }
5642            };
5643            std::mem::drop(state);
5644
5645            assert_eq!(mm.unmap(addr1, *PAGE_SIZE as usize), Ok(()));
5646
5647            let state = mm.state.read();
5648
5649            // The first page should be unmapped.
5650            assert!(state.mappings.get(addr1).is_none());
5651
5652            // The second page should remain unchanged.
5653            let (range2, mapping2) = state.mappings.get(addr2).expect("second page");
5654            assert_eq!(range2.start, addr2);
5655            assert_eq!(range2.end, (addr2 + *PAGE_SIZE).unwrap());
5656            match state.get_mapping_backing(mapping2) {
5657                MappingBacking::Memory(backing) => {
5658                    assert_eq!(backing.memory().get_size(), *PAGE_SIZE);
5659                    assert_eq!(original_memory2.get_koid(), backing.memory().get_koid());
5660                }
5661                MappingBacking::PrivateAnonymous => panic!("Unexpected private anonymous mapping"),
5662            }
5663        })
5664        .await;
5665    }
5666
5667    #[::fuchsia::test]
5668    async fn test_read_write_objects() {
5669        spawn_kernel_and_run(async |locked, current_task| {
5670            let ma = current_task.deref();
5671            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5672            let items_ref = UserRef::<i32>::new(addr);
5673
5674            let items_written = vec![0, 2, 3, 7, 1];
5675            ma.write_objects(items_ref, &items_written).expect("Failed to write object array.");
5676
5677            let items_read = ma
5678                .read_objects_to_vec(items_ref, items_written.len())
5679                .expect("Failed to read object array.");
5680
5681            assert_eq!(items_written, items_read);
5682        })
5683        .await;
5684    }
5685
5686    #[::fuchsia::test]
5687    async fn test_read_write_objects_null() {
5688        spawn_kernel_and_run(async |_, current_task| {
5689            let ma = current_task.deref();
5690            let items_ref = UserRef::<i32>::new(UserAddress::default());
5691
5692            let items_written = vec![];
5693            ma.write_objects(items_ref, &items_written)
5694                .expect("Failed to write empty object array.");
5695
5696            let items_read = ma
5697                .read_objects_to_vec(items_ref, items_written.len())
5698                .expect("Failed to read empty object array.");
5699
5700            assert_eq!(items_written, items_read);
5701        })
5702        .await;
5703    }
5704
5705    #[::fuchsia::test]
5706    async fn test_read_object_partial() {
5707        #[derive(Debug, Default, Copy, Clone, KnownLayout, FromBytes, Immutable, PartialEq)]
5708        struct Items {
5709            val: [i32; 4],
5710        }
5711
5712        spawn_kernel_and_run(async |locked, current_task| {
5713            let ma = current_task.deref();
5714            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5715            let items_array_ref = UserRef::<i32>::new(addr);
5716
5717            // Populate some values.
5718            let items_written = vec![75, 23, 51, 98];
5719            ma.write_objects(items_array_ref, &items_written)
5720                .expect("Failed to write object array.");
5721
5722            // Full read of all 4 values.
5723            let items_ref = UserRef::<Items>::new(addr);
5724            let items_read = ma
5725                .read_object_partial(items_ref, std::mem::size_of::<Items>())
5726                .expect("Failed to read object");
5727            assert_eq!(items_written, items_read.val);
5728
5729            // Partial read of the first two.
5730            let items_read = ma.read_object_partial(items_ref, 8).expect("Failed to read object");
5731            assert_eq!(vec![75, 23, 0, 0], items_read.val);
5732
5733            // The API currently allows reading 0 bytes (this could be re-evaluated) so test that does
5734            // the right thing.
5735            let items_read = ma.read_object_partial(items_ref, 0).expect("Failed to read object");
5736            assert_eq!(vec![0, 0, 0, 0], items_read.val);
5737
5738            // Size bigger than the object.
5739            assert_eq!(
5740                ma.read_object_partial(items_ref, std::mem::size_of::<Items>() + 8),
5741                error!(EINVAL)
5742            );
5743
5744            // Bad pointer.
5745            assert_eq!(
5746                ma.read_object_partial(UserRef::<Items>::new(UserAddress::from(1)), 16),
5747                error!(EFAULT)
5748            );
5749        })
5750        .await;
5751    }
5752
5753    #[::fuchsia::test]
5754    async fn test_partial_read() {
5755        spawn_kernel_and_run(async |locked, current_task| {
5756            let mm = current_task.mm().unwrap();
5757            let ma = current_task.deref();
5758
5759            let addr = mm.state.read().find_next_unused_range(2 * *PAGE_SIZE as usize).unwrap();
5760            let addr = map_memory(locked, &current_task, addr, *PAGE_SIZE);
5761            let second_map =
5762                map_memory(locked, &current_task, (addr + *PAGE_SIZE).unwrap(), *PAGE_SIZE);
5763
5764            let bytes = vec![0xf; (*PAGE_SIZE * 2) as usize];
5765            assert!(ma.write_memory(addr, &bytes).is_ok());
5766            let mut state = mm.state.write();
5767            let mut released_mappings = ReleasedMappings::default();
5768            state
5769                .protect(
5770                    ma,
5771                    second_map,
5772                    *PAGE_SIZE as usize,
5773                    ProtectionFlags::empty(),
5774                    &mut released_mappings,
5775                )
5776                .unwrap();
5777            released_mappings.finalize(state);
5778            assert_eq!(
5779                ma.read_memory_partial_to_vec(addr, bytes.len()).unwrap().len(),
5780                *PAGE_SIZE as usize,
5781            );
5782        })
5783        .await;
5784    }
5785
5786    fn map_memory_growsdown<L>(
5787        locked: &mut Locked<L>,
5788        current_task: &CurrentTask,
5789        length: u64,
5790    ) -> UserAddress
5791    where
5792        L: LockEqualOrBefore<FileOpsCore>,
5793    {
5794        map_memory_with_flags(
5795            locked,
5796            current_task,
5797            UserAddress::default(),
5798            length,
5799            MAP_ANONYMOUS | MAP_PRIVATE | MAP_GROWSDOWN,
5800        )
5801    }
5802
5803    #[::fuchsia::test]
5804    async fn test_grow_mapping_empty_mm() {
5805        spawn_kernel_and_run(async |_, current_task| {
5806            let mm = current_task.mm().unwrap();
5807
5808            let addr = UserAddress::from(0x100000);
5809
5810            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, false), Ok(false));
5811        })
5812        .await;
5813    }
5814
5815    #[::fuchsia::test]
5816    async fn test_grow_inside_mapping() {
5817        spawn_kernel_and_run(async |locked, current_task| {
5818            let mm = current_task.mm().unwrap();
5819
5820            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5821
5822            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, false), Ok(false));
5823        })
5824        .await;
5825    }
5826
5827    #[::fuchsia::test]
5828    async fn test_grow_write_fault_inside_read_only_mapping() {
5829        spawn_kernel_and_run(async |locked, current_task| {
5830            let mm = current_task.mm().unwrap();
5831
5832            let addr = do_mmap(
5833                locked,
5834                &current_task,
5835                UserAddress::default(),
5836                *PAGE_SIZE as usize,
5837                PROT_READ,
5838                MAP_ANONYMOUS | MAP_PRIVATE,
5839                FdNumber::from_raw(-1),
5840                0,
5841            )
5842            .expect("Could not map memory");
5843
5844            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, false), Ok(false));
5845            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, true), Ok(false));
5846        })
5847        .await;
5848    }
5849
5850    #[::fuchsia::test]
5851    async fn test_grow_fault_inside_prot_none_mapping() {
5852        spawn_kernel_and_run(async |locked, current_task| {
5853            let mm = current_task.mm().unwrap();
5854
5855            let addr = do_mmap(
5856                locked,
5857                &current_task,
5858                UserAddress::default(),
5859                *PAGE_SIZE as usize,
5860                PROT_NONE,
5861                MAP_ANONYMOUS | MAP_PRIVATE,
5862                FdNumber::from_raw(-1),
5863                0,
5864            )
5865            .expect("Could not map memory");
5866
5867            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, false), Ok(false));
5868            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, true), Ok(false));
5869        })
5870        .await;
5871    }
5872
5873    #[::fuchsia::test]
5874    async fn test_grow_below_mapping() {
5875        spawn_kernel_and_run(async |locked, current_task| {
5876            let mm = current_task.mm().unwrap();
5877
5878            let addr = map_memory_growsdown(locked, &current_task, *PAGE_SIZE) - *PAGE_SIZE;
5879
5880            assert_matches!(mm.extend_growsdown_mapping_to_address(addr.unwrap(), false), Ok(true));
5881        })
5882        .await;
5883    }
5884
5885    #[::fuchsia::test]
5886    async fn test_grow_above_mapping() {
5887        spawn_kernel_and_run(async |locked, current_task| {
5888            let mm = current_task.mm().unwrap();
5889
5890            let addr = map_memory_growsdown(locked, &current_task, *PAGE_SIZE) + *PAGE_SIZE;
5891
5892            assert_matches!(
5893                mm.extend_growsdown_mapping_to_address(addr.unwrap(), false),
5894                Ok(false)
5895            );
5896        })
5897        .await;
5898    }
5899
5900    #[::fuchsia::test]
5901    async fn test_grow_write_fault_below_read_only_mapping() {
5902        spawn_kernel_and_run(async |locked, current_task| {
5903            let mm = current_task.mm().unwrap();
5904
5905            let mapped_addr = map_memory_growsdown(locked, &current_task, *PAGE_SIZE);
5906
5907            mm.protect(&current_task, mapped_addr, *PAGE_SIZE as usize, ProtectionFlags::READ)
5908                .unwrap();
5909
5910            assert_matches!(
5911                mm.extend_growsdown_mapping_to_address((mapped_addr - *PAGE_SIZE).unwrap(), true),
5912                Ok(false)
5913            );
5914
5915            assert_eq!(mm.get_mapping_count(), 1);
5916        })
5917        .await;
5918    }
5919
5920    #[::fuchsia::test]
5921    async fn test_snapshot_paged_memory() {
5922        use zx::sys::zx_page_request_command_t::ZX_PAGER_VMO_READ;
5923
5924        spawn_kernel_and_run(async |locked, current_task| {
5925            let mm = current_task.mm().unwrap();
5926            let ma = current_task.deref();
5927
5928            let port = Arc::new(zx::Port::create());
5929            let port_clone = port.clone();
5930            let pager =
5931                Arc::new(zx::Pager::create(zx::PagerOptions::empty()).expect("create failed"));
5932            let pager_clone = pager.clone();
5933
5934            const VMO_SIZE: u64 = 128 * 1024;
5935            let vmo = Arc::new(
5936                pager
5937                    .create_vmo(zx::VmoOptions::RESIZABLE, &port, 1, VMO_SIZE)
5938                    .expect("create_vmo failed"),
5939            );
5940            let vmo_clone = vmo.clone();
5941
5942            // Create a thread to service the port where we will receive pager requests.
5943            let thread = std::thread::spawn(move || {
5944                loop {
5945                    let packet =
5946                        port_clone.wait(zx::MonotonicInstant::INFINITE).expect("wait failed");
5947                    match packet.contents() {
5948                        zx::PacketContents::Pager(contents) => {
5949                            if contents.command() == ZX_PAGER_VMO_READ {
5950                                let range = contents.range();
5951                                let source_vmo = zx::Vmo::create(range.end - range.start)
5952                                    .expect("create failed");
5953                                pager_clone
5954                                    .supply_pages(&vmo_clone, range, &source_vmo, 0)
5955                                    .expect("supply_pages failed");
5956                            }
5957                        }
5958                        zx::PacketContents::User(_) => break,
5959                        _ => {}
5960                    }
5961                }
5962            });
5963
5964            let child_vmo = vmo
5965                .create_child(zx::VmoChildOptions::SNAPSHOT_AT_LEAST_ON_WRITE, 0, VMO_SIZE)
5966                .unwrap();
5967
5968            // Write something to the source VMO.
5969            vmo.write(b"foo", 0).expect("write failed");
5970
5971            let prot_flags = ProtectionFlags::READ | ProtectionFlags::WRITE;
5972            let addr = mm
5973                .map_memory(
5974                    DesiredAddress::Any,
5975                    Arc::new(MemoryObject::from(child_vmo)),
5976                    0,
5977                    VMO_SIZE as usize,
5978                    prot_flags,
5979                    Access::rwx(),
5980                    MappingOptions::empty(),
5981                    MappingName::None,
5982                )
5983                .expect("map failed");
5984
5985            let target = current_task.clone_task_for_test(locked, 0, None);
5986
5987            // Make sure it has what we wrote.
5988            let buf = target.read_memory_to_vec(addr, 3).expect("read_memory failed");
5989            assert_eq!(buf, b"foo");
5990
5991            // Write something to both source and target and make sure they are forked.
5992            ma.write_memory(addr, b"bar").expect("write_memory failed");
5993
5994            let buf = target.read_memory_to_vec(addr, 3).expect("read_memory failed");
5995            assert_eq!(buf, b"foo");
5996
5997            target.write_memory(addr, b"baz").expect("write_memory failed");
5998            let buf = ma.read_memory_to_vec(addr, 3).expect("read_memory failed");
5999            assert_eq!(buf, b"bar");
6000
6001            let buf = target.read_memory_to_vec(addr, 3).expect("read_memory failed");
6002            assert_eq!(buf, b"baz");
6003
6004            port.queue(&zx::Packet::from_user_packet(0, 0, zx::UserPacket::from_u8_array([0; 32])))
6005                .unwrap();
6006            thread.join().unwrap();
6007        })
6008        .await;
6009    }
6010
6011    #[::fuchsia::test]
6012    async fn test_set_vma_name() {
6013        spawn_kernel_and_run(async |locked, mut current_task| {
6014            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
6015
6016            let vma_name = "vma name";
6017            current_task.write_memory(name_addr, vma_name.as_bytes()).unwrap();
6018
6019            let mapping_addr =
6020                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
6021
6022            sys_prctl(
6023                locked,
6024                &mut current_task,
6025                PR_SET_VMA,
6026                PR_SET_VMA_ANON_NAME as u64,
6027                mapping_addr.ptr() as u64,
6028                *PAGE_SIZE,
6029                name_addr.ptr() as u64,
6030            )
6031            .unwrap();
6032
6033            assert_eq!(
6034                *current_task.mm().unwrap().get_mapping_name(mapping_addr).unwrap().unwrap(),
6035                vma_name
6036            );
6037        })
6038        .await;
6039    }
6040
6041    #[::fuchsia::test]
6042    async fn test_set_vma_name_adjacent_mappings() {
6043        spawn_kernel_and_run(async |locked, mut current_task| {
6044            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
6045            current_task
6046                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
6047                .unwrap();
6048
6049            let first_mapping_addr =
6050                map_memory(locked, &current_task, UserAddress::default(), 2 * *PAGE_SIZE);
6051            let second_mapping_addr = map_memory_with_flags(
6052                locked,
6053                &current_task,
6054                (first_mapping_addr + *PAGE_SIZE).unwrap(),
6055                *PAGE_SIZE,
6056                MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
6057            );
6058
6059            assert_eq!((first_mapping_addr + *PAGE_SIZE).unwrap(), second_mapping_addr);
6060
6061            sys_prctl(
6062                locked,
6063                &mut current_task,
6064                PR_SET_VMA,
6065                PR_SET_VMA_ANON_NAME as u64,
6066                first_mapping_addr.ptr() as u64,
6067                2 * *PAGE_SIZE,
6068                name_addr.ptr() as u64,
6069            )
6070            .unwrap();
6071
6072            {
6073                let mm = current_task.mm().unwrap();
6074                let state = mm.state.read();
6075
6076                // The name should apply to both mappings.
6077                let (_, mapping) = state.mappings.get(first_mapping_addr).unwrap();
6078                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
6079
6080                let (_, mapping) = state.mappings.get(second_mapping_addr).unwrap();
6081                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
6082            }
6083        })
6084        .await;
6085    }
6086
6087    #[::fuchsia::test]
6088    async fn test_set_vma_name_beyond_end() {
6089        spawn_kernel_and_run(async |locked, mut current_task| {
6090            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
6091            current_task
6092                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
6093                .unwrap();
6094
6095            let mapping_addr =
6096                map_memory(locked, &current_task, UserAddress::default(), 2 * *PAGE_SIZE);
6097
6098            let second_page = (mapping_addr + *PAGE_SIZE).unwrap();
6099            current_task.mm().unwrap().unmap(second_page, *PAGE_SIZE as usize).unwrap();
6100
6101            // This should fail with ENOMEM since it extends past the end of the mapping into unmapped memory.
6102            assert_eq!(
6103                sys_prctl(
6104                    locked,
6105                    &mut current_task,
6106                    PR_SET_VMA,
6107                    PR_SET_VMA_ANON_NAME as u64,
6108                    mapping_addr.ptr() as u64,
6109                    2 * *PAGE_SIZE,
6110                    name_addr.ptr() as u64,
6111                ),
6112                error!(ENOMEM)
6113            );
6114
6115            // Despite returning an error, the prctl should still assign a name to the region at the start of the region.
6116            {
6117                let mm = current_task.mm().unwrap();
6118                let state = mm.state.read();
6119
6120                let (_, mapping) = state.mappings.get(mapping_addr).unwrap();
6121                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
6122            }
6123        })
6124        .await;
6125    }
6126
6127    #[::fuchsia::test]
6128    async fn test_set_vma_name_before_start() {
6129        spawn_kernel_and_run(async |locked, mut current_task| {
6130            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
6131            current_task
6132                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
6133                .unwrap();
6134
6135            let mapping_addr =
6136                map_memory(locked, &current_task, UserAddress::default(), 2 * *PAGE_SIZE);
6137
6138            let second_page = (mapping_addr + *PAGE_SIZE).unwrap();
6139            current_task.mm().unwrap().unmap(mapping_addr, *PAGE_SIZE as usize).unwrap();
6140
6141            // This should fail with ENOMEM since the start of the range is in unmapped memory.
6142            assert_eq!(
6143                sys_prctl(
6144                    locked,
6145                    &mut current_task,
6146                    PR_SET_VMA,
6147                    PR_SET_VMA_ANON_NAME as u64,
6148                    mapping_addr.ptr() as u64,
6149                    2 * *PAGE_SIZE,
6150                    name_addr.ptr() as u64,
6151                ),
6152                error!(ENOMEM)
6153            );
6154
6155            // Unlike a range which starts within a mapping and extends past the end, this should not assign
6156            // a name to any mappings.
6157            {
6158                let mm = current_task.mm().unwrap();
6159                let state = mm.state.read();
6160
6161                let (_, mapping) = state.mappings.get(second_page).unwrap();
6162                assert_eq!(mapping.name(), MappingName::None);
6163            }
6164        })
6165        .await;
6166    }
6167
6168    #[::fuchsia::test]
6169    async fn test_set_vma_name_partial() {
6170        spawn_kernel_and_run(async |locked, mut current_task| {
6171            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
6172            current_task
6173                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
6174                .unwrap();
6175
6176            let mapping_addr =
6177                map_memory(locked, &current_task, UserAddress::default(), 3 * *PAGE_SIZE);
6178
6179            assert_eq!(
6180                sys_prctl(
6181                    locked,
6182                    &mut current_task,
6183                    PR_SET_VMA,
6184                    PR_SET_VMA_ANON_NAME as u64,
6185                    (mapping_addr + *PAGE_SIZE).unwrap().ptr() as u64,
6186                    *PAGE_SIZE,
6187                    name_addr.ptr() as u64,
6188                ),
6189                Ok(starnix_syscalls::SUCCESS)
6190            );
6191
6192            // This should split the mapping into 3 pieces with the second piece having the name "foo"
6193            {
6194                let mm = current_task.mm().unwrap();
6195                let state = mm.state.read();
6196
6197                let (_, mapping) = state.mappings.get(mapping_addr).unwrap();
6198                assert_eq!(mapping.name(), MappingName::None);
6199
6200                let (_, mapping) =
6201                    state.mappings.get((mapping_addr + *PAGE_SIZE).unwrap()).unwrap();
6202                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
6203
6204                let (_, mapping) =
6205                    state.mappings.get((mapping_addr + (2 * *PAGE_SIZE)).unwrap()).unwrap();
6206                assert_eq!(mapping.name(), MappingName::None);
6207            }
6208        })
6209        .await;
6210    }
6211
6212    #[::fuchsia::test]
6213    async fn test_preserve_name_snapshot() {
6214        spawn_kernel_and_run(async |locked, mut current_task| {
6215            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
6216            current_task
6217                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
6218                .unwrap();
6219
6220            let mapping_addr =
6221                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
6222
6223            assert_eq!(
6224                sys_prctl(
6225                    locked,
6226                    &mut current_task,
6227                    PR_SET_VMA,
6228                    PR_SET_VMA_ANON_NAME as u64,
6229                    mapping_addr.ptr() as u64,
6230                    *PAGE_SIZE,
6231                    name_addr.ptr() as u64,
6232                ),
6233                Ok(starnix_syscalls::SUCCESS)
6234            );
6235
6236            let target = current_task.clone_task_for_test(locked, 0, None);
6237
6238            {
6239                let mm = target.mm().unwrap();
6240                let state = mm.state.read();
6241
6242                let (_, mapping) = state.mappings.get(mapping_addr).unwrap();
6243                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
6244            }
6245        })
6246        .await;
6247    }
6248}