Skip to main content

starnix_core/mm/
memory_manager.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::barrier::{BarrierType, system_barrier};
6use crate::mm::mapping::MappingBackingMemory;
7use crate::mm::memory::MemoryObject;
8use crate::mm::private_anonymous_memory_manager::PrivateAnonymousMemoryManager;
9use crate::mm::{
10    FaultRegisterMode, FutexTable, InflightVmsplicedPayloads, MapInfoCache, Mapping,
11    MappingBacking, MappingFlags, MappingMode, MappingName, MappingNameRef, MlockPinFlavor,
12    PrivateFutexKey, ProtectionFlags, UserFault, VMEX_RESOURCE, VmsplicePayload,
13    VmsplicePayloadSegment, read_to_array,
14};
15use crate::security;
16use crate::signals::{SignalDetail, SignalInfo};
17use crate::task::{CurrentTask, ExceptionResult, PageFaultExceptionReport, Task};
18use crate::vfs::aio::AioContext;
19use crate::vfs::pseudo::dynamic_file::{
20    DynamicFile, DynamicFileBuf, DynamicFileSource, SequenceFileSource,
21};
22use crate::vfs::{FsString, NamespaceNode};
23use anyhow::{Error, anyhow};
24use bitflags::bitflags;
25use flyweights::FlyByteStr;
26use linux_uapi::BUS_ADRERR;
27use memory_pinning::PinnedMapping;
28use range_map::RangeMap;
29use smallvec::SmallVec;
30use starnix_ext::map_ext::EntryExt;
31use starnix_lifecycle::DropNotifier;
32use starnix_logging::{CATEGORY_STARNIX_MM, impossible_error, log_error, log_warn, track_stub};
33use starnix_sync::{
34    LockBefore, Locked, MmDumpable, OrderedMutex, RwLock, RwLockWriteGuard, ThreadGroupLimits,
35    Unlocked, UserFaultInner,
36};
37use starnix_types::arch::ArchWidth;
38use starnix_types::futex_address::FutexAddress;
39use starnix_types::math::{round_down_to_system_page_size, round_up_to_system_page_size};
40use starnix_types::user_buffer::{UserBuffer, UserBuffers};
41use starnix_uapi::auth::CAP_IPC_LOCK;
42use starnix_uapi::errors::Errno;
43use starnix_uapi::file_mode::Access;
44use starnix_uapi::range_ext::RangeExt;
45use starnix_uapi::resource_limits::Resource;
46use starnix_uapi::restricted_aspace::{
47    RESTRICTED_ASPACE_BASE, RESTRICTED_ASPACE_HIGHEST_ADDRESS, RESTRICTED_ASPACE_RANGE,
48    RESTRICTED_ASPACE_SIZE,
49};
50use starnix_uapi::signals::{SIGBUS, SIGSEGV};
51use starnix_uapi::user_address::{ArchSpecific, UserAddress};
52use starnix_uapi::{
53    MADV_COLD, MADV_COLLAPSE, MADV_DODUMP, MADV_DOFORK, MADV_DONTDUMP, MADV_DONTFORK,
54    MADV_DONTNEED, MADV_DONTNEED_LOCKED, MADV_FREE, MADV_HUGEPAGE, MADV_HWPOISON, MADV_KEEPONFORK,
55    MADV_MERGEABLE, MADV_NOHUGEPAGE, MADV_NORMAL, MADV_PAGEOUT, MADV_POPULATE_READ, MADV_RANDOM,
56    MADV_REMOVE, MADV_SEQUENTIAL, MADV_SOFT_OFFLINE, MADV_UNMERGEABLE, MADV_WILLNEED,
57    MADV_WIPEONFORK, MREMAP_DONTUNMAP, MREMAP_FIXED, MREMAP_MAYMOVE, errno, error,
58    from_status_like_fdio,
59};
60use std::collections::HashMap;
61use std::mem::MaybeUninit;
62use std::ops::{ControlFlow, Deref, DerefMut, Range, RangeBounds};
63use std::sync::{Arc, LazyLock, Weak};
64use syncio::zxio::zxio_default_maybe_faultable_copy;
65use zerocopy::IntoBytes;
66use zx::{Rights, VmoChildOptions};
67
68pub const ZX_VM_SPECIFIC_OVERWRITE: zx::VmarFlags =
69    zx::VmarFlags::from_bits_retain(zx::VmarFlagsExtended::SPECIFIC_OVERWRITE.bits());
70
71// We do not create shared processes in unit tests.
72pub(crate) const UNIFIED_ASPACES_ENABLED: bool = cfg!(not(test));
73
74/// Initializes the usercopy utilities.
75///
76/// It is useful to explicitly call this so that the usercopy is initialized
77/// at a known instant. For example, Starnix may want to make sure the usercopy
78/// thread created to support user copying is associated to the Starnix process
79/// and not a restricted-mode process.
80pub fn init_usercopy() {
81    // This call lazily initializes the `Usercopy` instance.
82    let _ = usercopy();
83}
84
85thread_local! {
86    /// The last mapping generation seen by this thread.
87    /// Used to prevent infinite loops in page fault handling.
88    static LAST_SEEN_MAPPING_GENERATION: std::cell::Cell<u64> = const { std::cell::Cell::new(0) };
89}
90
91pub const GUARD_PAGE_COUNT_FOR_GROWSDOWN_MAPPINGS: usize = 256;
92
93#[cfg(target_arch = "x86_64")]
94const ASLR_RANDOM_BITS: usize = 27;
95
96#[cfg(target_arch = "aarch64")]
97const ASLR_RANDOM_BITS: usize = 28;
98
99#[cfg(target_arch = "riscv64")]
100const ASLR_RANDOM_BITS: usize = 18;
101
102/// Number of bits of entropy for processes running in 32 bits mode.
103const ASLR_32_RANDOM_BITS: usize = 8;
104
105// The biggest we expect stack to be; increase as needed
106// TODO(https://fxbug.dev/322874791): Once setting RLIMIT_STACK is implemented, we should use it.
107const MAX_STACK_SIZE: usize = 512 * 1024 * 1024;
108
109// Value to report temporarily as the VM RSS HWM.
110// TODO(https://fxbug.dev/396221597): Need support from the kernel to track the committed bytes high
111// water mark.
112const STUB_VM_RSS_HWM: usize = 2 * 1024 * 1024;
113
114fn usercopy() -> Option<&'static usercopy::Usercopy> {
115    static USERCOPY: LazyLock<Option<usercopy::Usercopy>> = LazyLock::new(|| {
116        // We do not create shared processes in unit tests.
117        if UNIFIED_ASPACES_ENABLED {
118            // ASUMPTION: All Starnix managed Linux processes have the same
119            // restricted mode address range.
120            Some(usercopy::Usercopy::new(RESTRICTED_ASPACE_RANGE).unwrap())
121        } else {
122            None
123        }
124    });
125
126    LazyLock::force(&USERCOPY).as_ref()
127}
128
129/// Provides an implementation for zxio's `zxio_maybe_faultable_copy` that supports
130/// catching faults.
131///
132/// See zxio's `zxio_maybe_faultable_copy` documentation for more details.
133///
134/// # Safety
135///
136/// Only one of `src`/`dest` may be an address to a buffer owned by user/restricted-mode
137/// (`ret_dest` indicates whether the user-owned buffer is `dest` when `true`).
138/// The other must be a valid Starnix/normal-mode buffer that will never cause a fault
139/// when the first `count` bytes are read/written.
140#[unsafe(no_mangle)]
141pub unsafe fn zxio_maybe_faultable_copy_impl(
142    dest: *mut u8,
143    src: *const u8,
144    count: usize,
145    ret_dest: bool,
146) -> bool {
147    if let Some(usercopy) = usercopy() {
148        #[allow(clippy::undocumented_unsafe_blocks, reason = "2024 edition migration")]
149        let ret = unsafe { usercopy.raw_hermetic_copy(dest, src, count, ret_dest) };
150        ret == count
151    } else {
152        #[allow(clippy::undocumented_unsafe_blocks, reason = "2024 edition migration")]
153        unsafe {
154            zxio_default_maybe_faultable_copy(dest, src, count, ret_dest)
155        }
156    }
157}
158
159pub static PAGE_SIZE: LazyLock<u64> = LazyLock::new(|| zx::system_get_page_size() as u64);
160
161bitflags! {
162    #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
163    pub struct MappingOptions: u16 {
164      const SHARED      = 1 << 0;
165      const ANONYMOUS   = 1 << 1;
166      const LOWER_32BIT = 1 << 2;
167      const GROWSDOWN   = 1 << 3;
168      const ELF_BINARY  = 1 << 4;
169      const DONTFORK    = 1 << 5;
170      const WIPEONFORK  = 1 << 6;
171      const DONT_SPLIT  = 1 << 7;
172      const DONT_EXPAND = 1 << 8;
173      const POPULATE    = 1 << 9;
174    }
175}
176
177bitflags! {
178    #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
179    pub struct MremapFlags: u32 {
180        const MAYMOVE = MREMAP_MAYMOVE;
181        const FIXED = MREMAP_FIXED;
182        const DONTUNMAP = MREMAP_DONTUNMAP;
183    }
184}
185
186bitflags! {
187    #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
188    pub struct MsyncFlags: u32 {
189        const ASYNC = starnix_uapi::MS_ASYNC;
190        const INVALIDATE = starnix_uapi::MS_INVALIDATE;
191        const SYNC = starnix_uapi::MS_SYNC;
192    }
193}
194
195const PROGRAM_BREAK_LIMIT: u64 = 64 * 1024 * 1024;
196
197#[derive(Debug, Clone, Eq, PartialEq)]
198struct ProgramBreak {
199    // These base address at which the data segment is mapped.
200    base: UserAddress,
201
202    // The current program break.
203    //
204    // The addresses from [base, current.round_up(*PAGE_SIZE)) are mapped into the
205    // client address space from the underlying |memory|.
206    current: UserAddress,
207}
208
209/// The policy about whether the address space can be dumped.
210#[derive(Debug, Clone, Copy, Eq, PartialEq)]
211pub enum DumpPolicy {
212    /// The address space cannot be dumped.
213    ///
214    /// Corresponds to SUID_DUMP_DISABLE.
215    Disable,
216
217    /// The address space can be dumped.
218    ///
219    /// Corresponds to SUID_DUMP_USER.
220    User,
221}
222
223// Supported types of membarriers.
224pub enum MembarrierType {
225    Memory,   // MEMBARRIER_CMD_GLOBAL, etc
226    SyncCore, // MEMBARRIER_CMD_..._SYNC_CORE
227}
228
229// Tracks the types of membarriers this address space is registered to receive.
230#[derive(Default, Clone)]
231struct MembarrierRegistrations {
232    memory: bool,
233    sync_core: bool,
234}
235
236#[derive(Default)]
237struct Mappings {
238    /// The mappings record which object backs each address.
239    map: RangeMap<UserAddress, Mapping>,
240
241    /// Generation counter for mappings. Incremented on any modification to `mappings`.
242    ///
243    /// This is used to detect stale mappings in `handle_page_fault`.
244    generation: u64,
245
246    /// The cached sum of the lengths of all mapped ranges.
247    total_usage: usize,
248}
249
250impl Deref for Mappings {
251    type Target = RangeMap<UserAddress, Mapping>;
252
253    fn deref(&self) -> &Self::Target {
254        &self.map
255    }
256}
257
258impl Mappings {
259    pub fn insert(&mut self, range: std::ops::Range<UserAddress>, value: Mapping) -> Vec<Mapping> {
260        self.generation = self.generation.wrapping_add(1);
261        let range_len = range.end - range.start;
262        let removed_len: usize = self
263            .map
264            .range(range.clone())
265            .map(|(r, _)| {
266                let intersection = r.intersect(&range);
267                intersection.end - intersection.start
268            })
269            .sum();
270        let removed = self.map.insert(range, value);
271        self.total_usage = self.total_usage.saturating_add(range_len).saturating_sub(removed_len);
272        removed
273    }
274
275    pub fn remove(&mut self, range: std::ops::Range<UserAddress>) -> Vec<Mapping> {
276        self.generation = self.generation.wrapping_add(1);
277        let removed_len: usize = self
278            .map
279            .range(range.clone())
280            .map(|(r, _)| {
281                let intersection = r.intersect(&range);
282                intersection.end - intersection.start
283            })
284            .sum();
285        let removed = self.map.remove(range);
286        self.total_usage = self.total_usage.saturating_sub(removed_len);
287        removed
288    }
289
290    pub fn append_non_overlapping(
291        &mut self,
292        range: std::ops::Range<UserAddress>,
293        value: Mapping,
294    ) -> bool {
295        self.generation = self.generation.wrapping_add(1);
296        let range_len = range.end - range.start;
297        if self.map.append_non_overlapping(range, value) {
298            self.total_usage = self.total_usage.saturating_add(range_len);
299            true
300        } else {
301            false
302        }
303    }
304
305    pub fn update_exact<F, E>(
306        &mut self,
307        range: &std::ops::Range<UserAddress>,
308        f: F,
309    ) -> Result<bool, E>
310    where
311        F: FnOnce(&mut Mapping) -> Result<(), E>,
312    {
313        self.generation = self.generation.wrapping_add(1);
314        self.map.update_exact(range, f)
315    }
316}
317
318pub struct MemoryManagerState {
319    /// The memory mappings currently used by this address space.
320    mappings: Mappings,
321
322    /// UserFaults registered with this memory manager.
323    userfaultfds: Vec<Weak<UserFault>>,
324
325    /// Shadow mappings for mlock()'d pages.
326    ///
327    /// Used for MlockPinFlavor::ShadowProcess to keep track of when we need to unmap
328    /// memory from the shadow process.
329    shadow_mappings_for_mlock: RangeMap<UserAddress, Arc<PinnedMapping>>,
330
331    forkable_state: MemoryManagerForkableState,
332}
333
334// 64k under the 4GB
335const LOWER_4GB_LIMIT: UserAddress = UserAddress::const_from(0xffff_0000);
336
337#[derive(Default, Clone)]
338pub struct MemoryManagerForkableState {
339    /// State for the brk and sbrk syscalls.
340    brk: Option<ProgramBreak>,
341
342    /// The namespace node that represents the executable associated with this task.
343    executable_node: Option<NamespaceNode>,
344
345    pub stack_size: usize,
346    pub stack_start: UserAddress,
347    pub auxv_start: UserAddress,
348    pub auxv_end: UserAddress,
349    pub argv_start: UserAddress,
350    pub argv_end: UserAddress,
351    pub environ_start: UserAddress,
352    pub environ_end: UserAddress,
353
354    /// vDSO location
355    pub vdso_base: UserAddress,
356
357    /// Randomized regions:
358    pub mmap_top: UserAddress,
359    pub stack_origin: UserAddress,
360    pub brk_origin: UserAddress,
361
362    // Membarrier registrations
363    membarrier_registrations: MembarrierRegistrations,
364}
365
366impl Deref for MemoryManagerState {
367    type Target = MemoryManagerForkableState;
368    fn deref(&self) -> &Self::Target {
369        &self.forkable_state
370    }
371}
372
373impl DerefMut for MemoryManagerState {
374    fn deref_mut(&mut self) -> &mut Self::Target {
375        &mut self.forkable_state
376    }
377}
378
379#[derive(Debug, Default)]
380struct ReleasedMappings {
381    doomed: Vec<Mapping>,
382    doomed_pins: Vec<Arc<PinnedMapping>>,
383}
384
385impl ReleasedMappings {
386    fn extend(&mut self, mappings: impl IntoIterator<Item = Mapping>) {
387        self.doomed.extend(mappings);
388    }
389
390    fn extend_pins(&mut self, mappings: impl IntoIterator<Item = Arc<PinnedMapping>>) {
391        self.doomed_pins.extend(mappings);
392    }
393
394    fn is_empty(&self) -> bool {
395        self.doomed.is_empty() && self.doomed_pins.is_empty()
396    }
397
398    #[cfg(test)]
399    fn len(&self) -> usize {
400        self.doomed.len() + self.doomed_pins.len()
401    }
402
403    fn finalize(&mut self, mm_state: RwLockWriteGuard<'_, MemoryManagerState>) {
404        // Drop the state before the unmapped mappings, since dropping a mapping may acquire a lock
405        // in `DirEntry`'s `drop`.
406        std::mem::drop(mm_state);
407        std::mem::take(&mut self.doomed);
408        std::mem::take(&mut self.doomed_pins);
409    }
410}
411
412impl Drop for ReleasedMappings {
413    fn drop(&mut self) {
414        assert!(self.is_empty(), "ReleasedMappings::finalize() must be called before drop");
415    }
416}
417
418fn map_in_vmar(
419    vmar: &zx::Vmar,
420    vmar_info: &zx::VmarInfo,
421    addr: SelectedAddress,
422    memory: &MemoryObject,
423    memory_offset: u64,
424    length: usize,
425    flags: MappingFlags,
426    populate: bool,
427) -> Result<(), Errno> {
428    let vmar_offset = addr.addr().checked_sub(vmar_info.base).ok_or_else(|| errno!(ENOMEM))?;
429    let vmar_extra_flags = match addr {
430        SelectedAddress::Fixed(_) => zx::VmarFlags::SPECIFIC,
431        SelectedAddress::FixedOverwrite(_) => ZX_VM_SPECIFIC_OVERWRITE,
432    };
433
434    if populate {
435        let op = if flags.contains(MappingFlags::WRITE) {
436            // Requires ZX_RIGHT_WRITEABLE which we should expect when the mapping is writeable.
437            zx::VmoOp::COMMIT
438        } else {
439            // When we don't expect to have ZX_RIGHT_WRITEABLE, fall back to a VMO op that doesn't
440            // need it.
441            zx::VmoOp::PREFETCH
442        };
443        fuchsia_trace::duration!(CATEGORY_STARNIX_MM, "MmapCommitPages");
444        let _ = memory.op_range(op, memory_offset, length as u64);
445        // "The mmap() call doesn't fail if the mapping cannot be populated."
446    }
447
448    let vmar_maybe_map_range = if populate && !vmar_extra_flags.contains(ZX_VM_SPECIFIC_OVERWRITE) {
449        zx::VmarFlags::MAP_RANGE
450    } else {
451        zx::VmarFlags::empty()
452    };
453    let vmar_flags = flags.access_flags().to_vmar_flags()
454        | zx::VmarFlags::ALLOW_FAULTS
455        | vmar_extra_flags
456        | vmar_maybe_map_range;
457
458    let map_result = memory.map_in_vmar(vmar, vmar_offset.ptr(), memory_offset, length, vmar_flags);
459    let mapped_addr = map_result.map_err(MemoryManager::get_errno_for_map_err)?;
460
461    let expected_addr = addr.addr().ptr();
462    debug_assert_eq!(
463        mapped_addr, expected_addr,
464        "Zircon mapped to a different address than requested!"
465    );
466
467    Ok(())
468}
469
470impl MemoryManagerState {
471    /// Returns occupied address ranges that intersect with the given range.
472    ///
473    /// An address range is "occupied" if (a) there is already a mapping in that range or (b) there
474    /// is a GROWSDOWN mapping <= 256 pages above that range. The 256 pages below a GROWSDOWN
475    /// mapping is the "guard region." The memory manager avoids mapping memory in the guard region
476    /// in some circumstances to preserve space for the GROWSDOWN mapping to grow down.
477    fn get_occupied_address_ranges<'a>(
478        &'a self,
479        subrange: &'a Range<UserAddress>,
480    ) -> impl Iterator<Item = Range<UserAddress>> + 'a {
481        let query_range = subrange.start
482            ..(subrange
483                .end
484                .saturating_add(*PAGE_SIZE as usize * GUARD_PAGE_COUNT_FOR_GROWSDOWN_MAPPINGS));
485        self.mappings.range(query_range).filter_map(|(range, mapping)| {
486            let occupied_range = mapping.inflate_to_include_guard_pages(range);
487            if occupied_range.start < subrange.end && subrange.start < occupied_range.end {
488                Some(occupied_range)
489            } else {
490                None
491            }
492        })
493    }
494
495    fn count_possible_placements(
496        &self,
497        length: usize,
498        subrange: &Range<UserAddress>,
499    ) -> Option<usize> {
500        let mut occupied_ranges = self.get_occupied_address_ranges(subrange);
501        let mut possible_placements = 0;
502        // If the allocation is placed at the first available address, every page that is left
503        // before the next mapping or the end of subrange is +1 potential placement.
504        let mut first_fill_end = subrange.start.checked_add(length)?;
505        while first_fill_end <= subrange.end {
506            let Some(mapping) = occupied_ranges.next() else {
507                possible_placements += (subrange.end - first_fill_end) / (*PAGE_SIZE as usize) + 1;
508                break;
509            };
510            if mapping.start >= first_fill_end {
511                possible_placements += (mapping.start - first_fill_end) / (*PAGE_SIZE as usize) + 1;
512            }
513            first_fill_end = mapping.end.checked_add(length)?;
514        }
515        Some(possible_placements)
516    }
517
518    fn pick_placement(
519        &self,
520        length: usize,
521        mut chosen_placement_idx: usize,
522        subrange: &Range<UserAddress>,
523    ) -> Option<UserAddress> {
524        let mut candidate =
525            Range { start: subrange.start, end: subrange.start.checked_add(length)? };
526        let mut occupied_ranges = self.get_occupied_address_ranges(subrange);
527        loop {
528            let Some(mapping) = occupied_ranges.next() else {
529                // No more mappings: treat the rest of the index as an offset.
530                let res =
531                    candidate.start.checked_add(chosen_placement_idx * *PAGE_SIZE as usize)?;
532                debug_assert!(res.checked_add(length)? <= subrange.end);
533                return Some(res);
534            };
535            if mapping.start < candidate.end {
536                // doesn't fit, skip
537                candidate = Range { start: mapping.end, end: mapping.end.checked_add(length)? };
538                continue;
539            }
540            let unused_space =
541                (mapping.start.ptr() - candidate.end.ptr()) / (*PAGE_SIZE as usize) + 1;
542            if unused_space > chosen_placement_idx {
543                // Chosen placement is within the range; treat the rest of the index as an offset.
544                let res =
545                    candidate.start.checked_add(chosen_placement_idx * *PAGE_SIZE as usize)?;
546                return Some(res);
547            }
548
549            // chosen address is further up, skip
550            chosen_placement_idx -= unused_space;
551            candidate = Range { start: mapping.end, end: mapping.end.checked_add(length)? };
552        }
553    }
554
555    fn find_random_unused_range(
556        &self,
557        length: usize,
558        subrange: &Range<UserAddress>,
559    ) -> Option<UserAddress> {
560        let possible_placements = self.count_possible_placements(length, subrange)?;
561        if possible_placements == 0 {
562            return None;
563        }
564        let chosen_placement_idx = rand::random_range(0..possible_placements);
565        self.pick_placement(length, chosen_placement_idx, subrange)
566    }
567
568    // Find the first unused range of addresses that fits a mapping of `length` bytes, searching
569    // from `mmap_top` downwards.
570    pub fn find_next_unused_range(&self, length: usize) -> Option<UserAddress> {
571        let gap_size = length as u64;
572        let mut upper_bound = self.mmap_top;
573
574        loop {
575            let gap_end = self.mappings.find_gap_end(gap_size, &upper_bound);
576            let candidate = gap_end.checked_sub(length)?;
577
578            // Is there a next mapping? If not, the candidate is already good.
579            let Some((occupied_range, mapping)) = self.mappings.get(gap_end) else {
580                return Some(candidate);
581            };
582            let occupied_range = mapping.inflate_to_include_guard_pages(occupied_range);
583            // If it doesn't overlap, the gap is big enough to fit.
584            if occupied_range.start >= gap_end {
585                return Some(candidate);
586            }
587            // If there was a mapping in the way, use the start of that range as the upper bound.
588            upper_bound = occupied_range.start;
589        }
590    }
591
592    // Accept the hint if the range is unused and within the range available for mapping.
593    fn is_hint_acceptable(&self, hint_addr: UserAddress, length: usize) -> bool {
594        let Some(hint_end) = hint_addr.checked_add(length) else {
595            return false;
596        };
597        if !RESTRICTED_ASPACE_RANGE.contains(&hint_addr.ptr())
598            || !RESTRICTED_ASPACE_RANGE.contains(&hint_end.ptr())
599        {
600            return false;
601        };
602        self.get_occupied_address_ranges(&(hint_addr..hint_end)).next().is_none()
603    }
604
605    fn select_address(
606        &self,
607        addr: DesiredAddress,
608        length: usize,
609        flags: MappingFlags,
610    ) -> Result<SelectedAddress, Errno> {
611        let adjusted_length = round_up_to_system_page_size(length).or_else(|_| error!(ENOMEM))?;
612
613        let find_address = || -> Result<SelectedAddress, Errno> {
614            let new_addr = if flags.contains(MappingFlags::LOWER_32BIT) {
615                // MAP_32BIT specifies that the memory allocated will
616                // be within the first 2 GB of the process address space.
617                self.find_random_unused_range(
618                    adjusted_length,
619                    &(UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
620                        ..UserAddress::from_ptr(0x80000000)),
621                )
622                .ok_or_else(|| errno!(ENOMEM))?
623            } else {
624                self.find_next_unused_range(adjusted_length).ok_or_else(|| errno!(ENOMEM))?
625            };
626
627            Ok(SelectedAddress::Fixed(new_addr))
628        };
629
630        Ok(match addr {
631            DesiredAddress::Any => find_address()?,
632            DesiredAddress::Hint(hint_addr) => {
633                // Round down to page size
634                let hint_addr =
635                    UserAddress::from_ptr(hint_addr.ptr() - hint_addr.ptr() % *PAGE_SIZE as usize);
636                if self.is_hint_acceptable(hint_addr, adjusted_length) {
637                    SelectedAddress::Fixed(hint_addr)
638                } else {
639                    find_address()?
640                }
641            }
642            DesiredAddress::Fixed(addr) => SelectedAddress::Fixed(addr),
643            DesiredAddress::FixedOverwrite(addr) => SelectedAddress::FixedOverwrite(addr),
644        })
645    }
646
647    fn validate_addr(&self, addr: DesiredAddress, length: usize) -> Result<(), Errno> {
648        if length > RESTRICTED_ASPACE_SIZE {
649            return error!(ENOMEM);
650        }
651        match addr {
652            DesiredAddress::Fixed(a) | DesiredAddress::FixedOverwrite(a) => {
653                let end = a.checked_add(length).ok_or_else(|| errno!(ENOMEM))?;
654                if end > UserAddress::from_ptr(RESTRICTED_ASPACE_HIGHEST_ADDRESS as usize) {
655                    return error!(ENOMEM);
656                }
657                if self.check_has_unauthorized_splits(a, length) {
658                    return error!(ENOMEM);
659                }
660            }
661            _ => {}
662        }
663        Ok(())
664    }
665
666    fn add_memory_mapping(
667        &mut self,
668        mm: &Arc<MemoryManager>,
669        addr: DesiredAddress,
670        memory: Arc<MemoryObject>,
671        memory_offset: u64,
672        length: usize,
673        flags: MappingFlags,
674        max_access: Access,
675        populate: bool,
676        name: MappingName,
677        mapping_mode: MappingMode,
678        released_mappings: &mut ReleasedMappings,
679    ) -> Result<UserAddress, Errno> {
680        self.validate_addr(addr, length)?;
681
682        let selected_address = self.select_address(addr, length, flags)?;
683        let mapped_addr = selected_address.addr();
684        if mapping_mode == MappingMode::Eager {
685            mm.mapping_context.map_in_user_vmar(
686                selected_address,
687                &memory,
688                memory_offset,
689                length,
690                flags,
691                populate,
692            )?;
693        }
694
695        let end = (mapped_addr + length)?.round_up(*PAGE_SIZE)?;
696
697        if let DesiredAddress::FixedOverwrite(addr) = addr {
698            assert_eq!(addr, mapped_addr);
699            self.update_after_unmap(mm, addr, end - addr, released_mappings)?;
700        }
701
702        let mapping = Mapping::with_name(
703            self.create_memory_backing(mapped_addr, memory, memory_offset),
704            flags,
705            max_access,
706            name,
707            mapping_mode,
708        );
709        released_mappings.extend(self.mappings.insert(mapped_addr..end, mapping));
710
711        Ok(mapped_addr)
712    }
713
714    fn map_private_anonymous(
715        &mut self,
716        mm: &Arc<MemoryManager>,
717        addr: DesiredAddress,
718        length: usize,
719        prot_flags: ProtectionFlags,
720        options: MappingOptions,
721        populate: bool,
722        name: MappingName,
723        released_mappings: &mut ReleasedMappings,
724    ) -> Result<UserAddress, Errno> {
725        self.validate_addr(addr, length)?;
726
727        let flags = MappingFlags::from_access_flags_and_options(prot_flags, options);
728        let selected_addr = self.select_address(addr, length, flags)?;
729        let mapped_addr = selected_addr.addr();
730        let backing_memory_offset = selected_addr.addr().ptr();
731
732        mm.mapping_context.map_in_user_vmar(
733            selected_addr,
734            &mm.mapping_context.private_anonymous.backing,
735            backing_memory_offset as u64,
736            length,
737            flags,
738            populate,
739        )?;
740
741        let end = (mapped_addr + length)?.round_up(*PAGE_SIZE)?;
742        if let DesiredAddress::FixedOverwrite(addr) = addr {
743            assert_eq!(addr, mapped_addr);
744            self.update_after_unmap(mm, addr, end - addr, released_mappings)?;
745        }
746
747        let mapping = Mapping::new_private_anonymous(flags, name, MappingMode::Eager);
748        released_mappings.extend(self.mappings.insert(mapped_addr..end, mapping));
749
750        Ok(mapped_addr)
751    }
752
753    fn map_anonymous(
754        &mut self,
755        mm: &Arc<MemoryManager>,
756        addr: DesiredAddress,
757        length: usize,
758        prot_flags: ProtectionFlags,
759        options: MappingOptions,
760        name: MappingName,
761        released_mappings: &mut ReleasedMappings,
762    ) -> Result<UserAddress, Errno> {
763        if !options.contains(MappingOptions::SHARED) {
764            return self.map_private_anonymous(
765                mm,
766                addr,
767                length,
768                prot_flags,
769                options,
770                options.contains(MappingOptions::POPULATE),
771                name,
772                released_mappings,
773            );
774        }
775        let memory = create_anonymous_mapping_memory(length as u64)?;
776        let flags = MappingFlags::from_access_flags_and_options(prot_flags, options);
777        self.add_memory_mapping(
778            mm,
779            addr,
780            memory,
781            0,
782            length,
783            flags,
784            Access::rwx(),
785            options.contains(MappingOptions::POPULATE),
786            name,
787            MappingMode::Eager,
788            released_mappings,
789        )
790    }
791
792    fn any_ranges_lazy<I>(&self, ranges: I) -> bool
793    where
794        I: IntoIterator<Item = (UserAddress, Option<usize>)>,
795    {
796        for (addr, length) in ranges {
797            match length {
798                None => {
799                    if let Some((_, mapping)) = self.mappings.get(addr) {
800                        if mapping.mapping_mode() == MappingMode::Lazy {
801                            return true;
802                        }
803                    }
804                }
805                Some(len) => {
806                    assert!(len > 0);
807                    let end = addr.checked_add(len).expect("address overflowed after validation");
808                    if self
809                        .mappings
810                        .range(addr..end)
811                        .any(|(_, mapping)| mapping.mapping_mode() == MappingMode::Lazy)
812                    {
813                        return true;
814                    }
815                }
816            }
817        }
818        false
819    }
820
821    fn ensure_range_mapped_in_user_vmar(
822        &mut self,
823        addr: UserAddress,
824        length: Option<usize>,
825        context: &MappingContext,
826    ) -> Result<bool, Errno> {
827        self.ensure_ranges_mapped_in_user_vmar(std::iter::once((addr, length)), context)
828    }
829
830    fn ensure_ranges_mapped_in_user_vmar<I>(
831        &mut self,
832        ranges: I,
833        context: &MappingContext,
834    ) -> Result<bool, Errno>
835    where
836        I: IntoIterator<Item = (UserAddress, Option<usize>)>,
837    {
838        // This is most likely to contain one range, so use `SmallVec` to avoid
839        // heap allocation and better performance in the common case.
840        let mut ranges_to_update = SmallVec::<[std::ops::Range<UserAddress>; 1]>::new();
841        for (addr, length) in ranges {
842            match length {
843                None => {
844                    if let Some((range, mapping)) = self.mappings.get(addr) {
845                        if mapping.mapping_mode() == MappingMode::Lazy {
846                            ranges_to_update.push(range.clone());
847                        }
848                    }
849                }
850                Some(len) => {
851                    assert!(len > 0);
852                    let end = addr.checked_add(len).expect("address overflowed after validation");
853                    for (range, mapping) in self.mappings.range(addr..end) {
854                        if mapping.mapping_mode() == MappingMode::Lazy {
855                            ranges_to_update.push(range.clone());
856                        }
857                    }
858                }
859            }
860        }
861
862        if ranges_to_update.is_empty() {
863            return Ok(false);
864        }
865
866        for range in ranges_to_update {
867            let updated = self.mappings.update_exact(&range, |mapping| {
868                let addr = SelectedAddress::FixedOverwrite(range.start);
869                let flags = mapping.flags();
870                let (backing, backing_memory_offset) = match mapping.get_backing_internal() {
871                    MappingBacking::Memory(backing) => {
872                        (backing.memory(), backing.address_to_offset(addr.addr()))
873                    }
874                    MappingBacking::PrivateAnonymous => {
875                        (&context.private_anonymous.backing, addr.addr().ptr() as u64)
876                    }
877                };
878
879                let mapping_length = range.end - range.start;
880                context.map_in_user_vmar(
881                    addr,
882                    backing,
883                    backing_memory_offset,
884                    mapping_length,
885                    flags,
886                    false,
887                )?;
888
889                mapping.set_mapping_mode(MappingMode::Eager);
890                Ok(())
891            })?;
892            assert!(updated, "Expected to update exactly one mapping");
893        }
894
895        Ok(true)
896    }
897
898    fn remap(
899        &mut self,
900        _current_task: &CurrentTask,
901        mm: &Arc<MemoryManager>,
902        old_addr: UserAddress,
903        old_length: usize,
904        new_length: usize,
905        flags: MremapFlags,
906        new_addr: UserAddress,
907        released_mappings: &mut ReleasedMappings,
908    ) -> Result<UserAddress, Errno> {
909        // MREMAP_FIXED moves a mapping, which requires MREMAP_MAYMOVE.
910        if flags.contains(MremapFlags::FIXED) && !flags.contains(MremapFlags::MAYMOVE) {
911            return error!(EINVAL);
912        }
913
914        // MREMAP_DONTUNMAP is always a move, so it requires MREMAP_MAYMOVE.
915        // There is no resizing allowed either.
916        if flags.contains(MremapFlags::DONTUNMAP)
917            && (!flags.contains(MremapFlags::MAYMOVE) || old_length != new_length)
918        {
919            return error!(EINVAL);
920        }
921
922        // In-place copies are invalid.
923        if !flags.contains(MremapFlags::MAYMOVE) && old_length == 0 {
924            return error!(ENOMEM);
925        }
926
927        if new_length == 0 {
928            return error!(EINVAL);
929        }
930
931        // Make sure old_addr is page-aligned.
932        if !old_addr.is_aligned(*PAGE_SIZE) {
933            return error!(EINVAL);
934        }
935
936        let old_length = round_up_to_system_page_size(old_length)?;
937        let new_length = round_up_to_system_page_size(new_length)?;
938
939        if self.check_has_unauthorized_splits(old_addr, old_length) {
940            return error!(EINVAL);
941        }
942
943        if self.check_has_unauthorized_splits(new_addr, new_length) {
944            return error!(EINVAL);
945        }
946
947        if !flags.contains(MremapFlags::DONTUNMAP)
948            && !flags.contains(MremapFlags::FIXED)
949            && old_length != 0
950        {
951            // We are not requested to remap to a specific address, so first we see if we can remap
952            // in-place. In-place copies (old_length == 0) are not allowed.
953            if let Some(new_addr) =
954                self.try_remap_in_place(mm, old_addr, old_length, new_length, released_mappings)?
955            {
956                return Ok(new_addr);
957            }
958        }
959
960        // There is no space to grow in place, or there is an explicit request to move.
961        if flags.contains(MremapFlags::MAYMOVE) {
962            let dst_address =
963                if flags.contains(MremapFlags::FIXED) { Some(new_addr) } else { None };
964            self.remap_move(
965                mm,
966                old_addr,
967                old_length,
968                dst_address,
969                new_length,
970                flags.contains(MremapFlags::DONTUNMAP),
971                released_mappings,
972            )
973        } else {
974            error!(ENOMEM)
975        }
976    }
977
978    /// Attempts to grow or shrink the mapping in-place. Returns `Ok(Some(addr))` if the remap was
979    /// successful. Returns `Ok(None)` if there was no space to grow.
980    fn try_remap_in_place(
981        &mut self,
982        mm: &Arc<MemoryManager>,
983        old_addr: UserAddress,
984        old_length: usize,
985        new_length: usize,
986        released_mappings: &mut ReleasedMappings,
987    ) -> Result<Option<UserAddress>, Errno> {
988        let old_range = old_addr..old_addr.checked_add(old_length).ok_or_else(|| errno!(EINVAL))?;
989        let new_range_in_place =
990            old_addr..old_addr.checked_add(new_length).ok_or_else(|| errno!(EINVAL))?;
991
992        if new_length <= old_length {
993            // Shrink the mapping in-place, which should always succeed.
994            // This is done by unmapping the extraneous region.
995            if new_length != old_length {
996                self.unmap(mm, new_range_in_place.end, old_length - new_length, released_mappings)?;
997            }
998            return Ok(Some(old_addr));
999        }
1000
1001        if self.mappings.range(old_range.end..new_range_in_place.end).next().is_some() {
1002            // There is some mapping in the growth range prevening an in-place growth.
1003            return Ok(None);
1004        }
1005
1006        // There is space to grow in-place. The old range must be one contiguous mapping.
1007        let (original_range, mapping) =
1008            self.mappings.get(old_addr).ok_or_else(|| errno!(EINVAL))?;
1009
1010        if old_range.end > original_range.end {
1011            return error!(EFAULT);
1012        }
1013        let original_range = original_range.clone();
1014        let original_mapping = mapping.clone();
1015
1016        // Compute the new length of the entire mapping once it has grown.
1017        let final_length = (original_range.end - original_range.start) + (new_length - old_length);
1018
1019        match self.get_mapping_backing(&original_mapping) {
1020            MappingBacking::Memory(backing) => {
1021                // Re-map the original range, which may include pages before the requested range.
1022                Ok(Some(self.add_memory_mapping(
1023                    mm,
1024                    DesiredAddress::FixedOverwrite(original_range.start),
1025                    backing.memory().clone(),
1026                    backing.address_to_offset(original_range.start),
1027                    final_length,
1028                    original_mapping.flags(),
1029                    original_mapping.max_access(),
1030                    false,
1031                    original_mapping.name().to_owned(),
1032                    original_mapping.mapping_mode(),
1033                    released_mappings,
1034                )?))
1035            }
1036            MappingBacking::PrivateAnonymous => {
1037                let growth_start = original_range.end;
1038                let growth_length = new_length - old_length;
1039                let final_end = (original_range.start + final_length)?;
1040                // Map new pages to back the growth.
1041                mm.mapping_context.map_in_user_vmar(
1042                    SelectedAddress::FixedOverwrite(growth_start),
1043                    &mm.mapping_context.private_anonymous.backing,
1044                    growth_start.ptr() as u64,
1045                    growth_length,
1046                    original_mapping.flags(),
1047                    false,
1048                )?;
1049                // Overwrite the mapping entry with the new larger size.
1050                released_mappings.extend(
1051                    self.mappings.insert(original_range.start..final_end, original_mapping.clone()),
1052                );
1053                Ok(Some(original_range.start))
1054            }
1055        }
1056    }
1057
1058    /// Grows or shrinks the mapping while moving it to a new destination.
1059    fn remap_move(
1060        &mut self,
1061        mm: &Arc<MemoryManager>,
1062        src_addr: UserAddress,
1063        src_length: usize,
1064        dst_addr: Option<UserAddress>,
1065        dst_length: usize,
1066        keep_source: bool,
1067        released_mappings: &mut ReleasedMappings,
1068    ) -> Result<UserAddress, Errno> {
1069        let src_range = src_addr..src_addr.checked_add(src_length).ok_or_else(|| errno!(EINVAL))?;
1070        let (original_range, src_mapping) =
1071            self.mappings.get(src_addr).ok_or_else(|| errno!(EINVAL))?;
1072        let original_range = original_range.clone();
1073        let src_mapping = src_mapping.clone();
1074
1075        if src_length == 0 && !src_mapping.flags().contains(MappingFlags::SHARED) {
1076            // src_length == 0 means that the mapping is to be copied. This behavior is only valid
1077            // with MAP_SHARED mappings.
1078            return error!(EINVAL);
1079        }
1080
1081        // If the destination range is smaller than the source range, we must first shrink
1082        // the source range in place. This must be done now and visible to processes, even if
1083        // a later failure causes the remap operation to fail.
1084        if src_length != 0 && src_length > dst_length {
1085            self.unmap(mm, (src_addr + dst_length)?, src_length - dst_length, released_mappings)?;
1086        }
1087
1088        let dst_addr_for_map = match dst_addr {
1089            None => DesiredAddress::Any,
1090            Some(dst_addr) => {
1091                // The mapping is being moved to a specific address.
1092                let dst_range =
1093                    dst_addr..(dst_addr.checked_add(dst_length).ok_or_else(|| errno!(EINVAL))?);
1094                if !src_range.intersect(&dst_range).is_empty() {
1095                    return error!(EINVAL);
1096                }
1097
1098                // The destination range must be unmapped. This must be done now and visible to
1099                // processes, even if a later failure causes the remap operation to fail.
1100                self.unmap(mm, dst_addr, dst_length, released_mappings)?;
1101
1102                DesiredAddress::Fixed(dst_addr)
1103            }
1104        };
1105
1106        // According to gVisor's aio_test, Linux checks for DONT_EXPAND after unmapping the dst
1107        // range.
1108        if dst_length > src_length && src_mapping.flags().contains(MappingFlags::DONT_EXPAND) {
1109            return error!(EFAULT);
1110        }
1111
1112        if src_range.end > original_range.end {
1113            // The source range is not one contiguous mapping. This check must be done only after
1114            // the source range is shrunk and the destination unmapped.
1115            return error!(EFAULT);
1116        }
1117
1118        match self.get_mapping_backing(&src_mapping) {
1119            MappingBacking::PrivateAnonymous => {
1120                let dst_addr =
1121                    self.select_address(dst_addr_for_map, dst_length, src_mapping.flags())?.addr();
1122                let dst_end = (dst_addr + dst_length)?;
1123
1124                let length_to_move = std::cmp::min(dst_length, src_length) as u64;
1125                let growth_start_addr = (dst_addr + length_to_move)?;
1126
1127                if dst_addr != src_addr {
1128                    let src_move_end = (src_range.start + length_to_move)?;
1129                    let range_to_move = src_range.start..src_move_end;
1130                    // Move the previously mapped pages into their new location.
1131                    mm.mapping_context.private_anonymous.move_pages(&range_to_move, dst_addr)?;
1132                }
1133
1134                // Userfault registration is not preserved by remap
1135                let new_flags =
1136                    src_mapping.flags().difference(MappingFlags::UFFD | MappingFlags::UFFD_MISSING);
1137                if src_mapping.mapping_mode() == MappingMode::Eager {
1138                    mm.mapping_context.map_in_user_vmar(
1139                        SelectedAddress::FixedOverwrite(dst_addr),
1140                        &mm.mapping_context.private_anonymous.backing,
1141                        dst_addr.ptr() as u64,
1142                        dst_length,
1143                        new_flags,
1144                        false,
1145                    )?;
1146
1147                    if dst_length > src_length {
1148                        // The mapping has grown, map new pages in to cover the growth.
1149                        let growth_length = dst_length - src_length;
1150
1151                        self.map_private_anonymous(
1152                            mm,
1153                            DesiredAddress::FixedOverwrite(growth_start_addr),
1154                            growth_length,
1155                            new_flags.access_flags(),
1156                            new_flags.options(),
1157                            false,
1158                            src_mapping.name().to_owned(),
1159                            released_mappings,
1160                        )?;
1161                    }
1162                }
1163
1164                released_mappings.extend(self.mappings.insert(
1165                    dst_addr..dst_end,
1166                    Mapping::new_private_anonymous(
1167                        new_flags,
1168                        src_mapping.name().to_owned(),
1169                        src_mapping.mapping_mode(),
1170                    ),
1171                ));
1172
1173                if dst_addr != src_addr && src_length != 0 && !keep_source {
1174                    self.unmap(mm, src_addr, src_length, released_mappings)?;
1175                }
1176
1177                return Ok(dst_addr);
1178            }
1179            MappingBacking::Memory(backing) => {
1180                // This mapping is backed by an FD or is a shared anonymous mapping. Just map the
1181                // range of the memory object covering the moved pages. If the memory object already
1182                // had COW semantics, this preserves them.
1183                let (dst_memory_offset, memory) =
1184                    (backing.address_to_offset(src_addr), backing.memory().clone());
1185
1186                let new_address = self.add_memory_mapping(
1187                    mm,
1188                    dst_addr_for_map,
1189                    memory,
1190                    dst_memory_offset,
1191                    dst_length,
1192                    src_mapping.flags(),
1193                    src_mapping.max_access(),
1194                    false,
1195                    src_mapping.name().to_owned(),
1196                    src_mapping.mapping_mode(),
1197                    released_mappings,
1198                )?;
1199
1200                if src_length != 0 && !keep_source {
1201                    // Only unmap the source range if this is not a copy and if there was not a specific
1202                    // request to not unmap. It was checked earlier that in case of src_length == 0
1203                    // this mapping is MAP_SHARED.
1204                    self.unmap(mm, src_addr, src_length, released_mappings)?;
1205                }
1206
1207                return Ok(new_address);
1208            }
1209        };
1210    }
1211
1212    // Checks if an operation may be performed over the target mapping that may
1213    // result in a split mapping.
1214    //
1215    // An operation may be forbidden if the target mapping only partially covers
1216    // an existing mapping with the `MappingOptions::DONT_SPLIT` flag set.
1217    fn check_has_unauthorized_splits(&self, addr: UserAddress, length: usize) -> bool {
1218        let query_range = addr..addr.saturating_add(length);
1219        let mut intersection = self.mappings.range(query_range.clone());
1220
1221        // A mapping is not OK if it disallows splitting and the target range
1222        // does not fully cover the mapping range.
1223        let check_if_mapping_has_unauthorized_split =
1224            |mapping: Option<(&Range<UserAddress>, &Mapping)>| {
1225                mapping.is_some_and(|(mapping_range, mapping)| {
1226                    mapping.flags().contains(MappingFlags::DONT_SPLIT)
1227                        && (mapping_range.start < query_range.start
1228                            || query_range.end < mapping_range.end)
1229                })
1230            };
1231
1232        // We only check the first and last mappings in the range because naturally,
1233        // the mappings in the middle are fully covered by the target mapping and
1234        // won't be split.
1235        check_if_mapping_has_unauthorized_split(intersection.next())
1236            || check_if_mapping_has_unauthorized_split(intersection.next_back())
1237    }
1238
1239    /// Unmaps the specified range. Unmapped mappings are placed in `released_mappings`.
1240    fn unmap(
1241        &mut self,
1242        mm: &Arc<MemoryManager>,
1243        addr: UserAddress,
1244        length: usize,
1245        released_mappings: &mut ReleasedMappings,
1246    ) -> Result<(), Errno> {
1247        if !addr.is_aligned(*PAGE_SIZE) {
1248            return error!(EINVAL);
1249        }
1250        let length = round_up_to_system_page_size(length)?;
1251        if length == 0 {
1252            return error!(EINVAL);
1253        }
1254
1255        if self.check_has_unauthorized_splits(addr, length) {
1256            return error!(EINVAL);
1257        }
1258
1259        // Unmap the range, including the the tail of any range that would have been split. This
1260        // operation is safe because we're operating on another process.
1261        #[allow(
1262            clippy::undocumented_unsafe_blocks,
1263            reason = "Force documented unsafe blocks in Starnix"
1264        )]
1265        match unsafe { mm.mapping_context.user_vmar.unmap(addr.ptr(), length) } {
1266            Ok(_) => (),
1267            Err(zx::Status::NOT_FOUND) => (),
1268            Err(zx::Status::INVALID_ARGS) => return error!(EINVAL),
1269            Err(status) => {
1270                impossible_error(status);
1271            }
1272        };
1273
1274        self.update_after_unmap(mm, addr, length, released_mappings)?;
1275
1276        Ok(())
1277    }
1278
1279    // Updates `self.mappings` after the specified range was unmaped.
1280    //
1281    // The range to unmap can span multiple mappings, and can split mappings if
1282    // the range start or end falls in the middle of a mapping.
1283    //
1284    // Private anonymous memory is contained in the same memory object; The pages of that object
1285    // that are no longer reachable should be released.
1286    //
1287    // File-backed mappings don't need to have their memory object modified.
1288    //
1289    // Unmapped mappings are placed in `released_mappings`.
1290    fn update_after_unmap(
1291        &mut self,
1292        mm: &Arc<MemoryManager>,
1293        addr: UserAddress,
1294        length: usize,
1295        released_mappings: &mut ReleasedMappings,
1296    ) -> Result<(), Errno> {
1297        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
1298        let unmap_range = addr..end_addr;
1299
1300        // Remove any shadow mappings for mlock()'d pages that are now unmapped.
1301        released_mappings.extend_pins(self.shadow_mappings_for_mlock.remove(unmap_range.clone()));
1302
1303        for (range, mapping) in self.mappings.range(unmap_range.clone()) {
1304            // Deallocate any pages in the private, anonymous backing that are now unreachable.
1305            if let MappingBacking::PrivateAnonymous = self.get_mapping_backing(mapping) {
1306                let unmapped_range = &unmap_range.intersect(range);
1307
1308                mm.inflight_vmspliced_payloads.handle_unmapping(
1309                    &mm.mapping_context.private_anonymous.backing,
1310                    unmapped_range,
1311                )?;
1312
1313                mm.mapping_context
1314                    .private_anonymous
1315                    .zero(unmapped_range.start, unmapped_range.end - unmapped_range.start)?;
1316            }
1317        }
1318        released_mappings.extend(self.mappings.remove(unmap_range));
1319        return Ok(());
1320    }
1321
1322    fn protect(
1323        &mut self,
1324        current_task: &CurrentTask,
1325        addr: UserAddress,
1326        length: usize,
1327        prot_flags: ProtectionFlags,
1328        released_mappings: &mut ReleasedMappings,
1329    ) -> Result<(), Errno> {
1330        let vmar_flags = prot_flags.to_vmar_flags();
1331        let page_size = *PAGE_SIZE;
1332        let end = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?.round_up(page_size)?;
1333
1334        if self.check_has_unauthorized_splits(addr, length) {
1335            return error!(EINVAL);
1336        }
1337
1338        let prot_range = if prot_flags.contains(ProtectionFlags::GROWSDOWN) {
1339            let mut start = addr;
1340            let Some((range, mapping)) = self.mappings.get(start) else {
1341                return error!(EINVAL);
1342            };
1343            // Ensure that the mapping has GROWSDOWN if PROT_GROWSDOWN was specified.
1344            if !mapping.flags().contains(MappingFlags::GROWSDOWN) {
1345                return error!(EINVAL);
1346            }
1347            let access_flags = mapping.flags().access_flags();
1348            // From <https://man7.org/linux/man-pages/man2/mprotect.2.html>:
1349            //
1350            //   PROT_GROWSDOWN
1351            //     Apply the protection mode down to the beginning of a
1352            //     mapping that grows downward (which should be a stack
1353            //     segment or a segment mapped with the MAP_GROWSDOWN flag
1354            //     set).
1355            start = range.start;
1356            while let Some((range, mapping)) =
1357                self.mappings.get(start.saturating_sub(page_size as usize))
1358            {
1359                if !mapping.flags().contains(MappingFlags::GROWSDOWN)
1360                    || mapping.flags().access_flags() != access_flags
1361                {
1362                    break;
1363                }
1364                start = range.start;
1365            }
1366            start..end
1367        } else {
1368            addr..end
1369        };
1370
1371        let mut range_list = vec![];
1372        let mapping_context = &current_task.mm()?.mapping_context;
1373        let length = prot_range.end - prot_range.start;
1374        self.ensure_range_mapped_in_user_vmar(prot_range.start, Some(length), mapping_context)?;
1375
1376        for (range, mapping) in self.mappings.range(prot_range.clone()) {
1377            range_list.push((range.clone(), mapping.clone()));
1378        }
1379
1380        let mut start_cursor = prot_range.start;
1381        let mut updates = vec![];
1382        let mut final_result = Ok(());
1383
1384        for (range, mapping) in range_list {
1385            if range.start > start_cursor {
1386                final_result = error!(ENOMEM);
1387                break;
1388            }
1389
1390            let intersection = range.intersect(&prot_range);
1391            if let Err(e) =
1392                security::file_mprotect(current_task, &intersection, &mapping, prot_flags)
1393            {
1394                final_result = Err(e);
1395                break;
1396            }
1397
1398            if mapping.flags().contains(MappingFlags::UFFD) {
1399                track_stub!(
1400                    TODO("https://fxbug.dev/297375964"),
1401                    "mprotect on uffd-registered range should not alter protections"
1402                );
1403                final_result = error!(EINVAL);
1404                break;
1405            }
1406
1407            let mapped_len = intersection.end - intersection.start;
1408
1409            // SAFETY: This is safe because the vmar belongs to a different process.
1410            let protect_result = unsafe {
1411                mapping_context.user_vmar.protect(intersection.start.ptr(), mapped_len, vmar_flags)
1412            }
1413            .map_err(|s| match s {
1414                zx::Status::INVALID_ARGS => errno!(EINVAL),
1415                zx::Status::NOT_FOUND => errno!(ENOMEM),
1416                zx::Status::ACCESS_DENIED => errno!(EACCES),
1417                _ => impossible_error(s),
1418            });
1419
1420            if let Err(e) = protect_result {
1421                final_result = Err(e);
1422                break;
1423            }
1424
1425            let mut new_mapping = mapping.clone();
1426            new_mapping.set_flags(new_mapping.flags().with_access_flags(prot_flags));
1427            let push_range = intersection.clone();
1428            start_cursor = intersection.end;
1429            updates.push((push_range, new_mapping));
1430        }
1431
1432        if final_result.is_ok() && start_cursor < prot_range.end {
1433            final_result = error!(ENOMEM);
1434        }
1435
1436        for (r, m) in updates {
1437            released_mappings.extend(self.mappings.insert(r, m));
1438        }
1439
1440        final_result
1441    }
1442
1443    fn madvise(
1444        &mut self,
1445        context: &MappingContext,
1446        addr: UserAddress,
1447        length: usize,
1448        advice: u32,
1449        released_mappings: &mut ReleasedMappings,
1450    ) -> Result<(), Errno> {
1451        if !addr.is_aligned(*PAGE_SIZE) {
1452            return error!(EINVAL);
1453        }
1454
1455        let end_addr =
1456            addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?.round_up(*PAGE_SIZE)?;
1457        if end_addr > context.max_address() {
1458            return error!(EFAULT);
1459        }
1460
1461        if advice == MADV_NORMAL {
1462            track_stub!(TODO("https://fxbug.dev/322874202"), "madvise undo hints for MADV_NORMAL");
1463            return Ok(());
1464        }
1465
1466        let mut updates = vec![];
1467        let range_for_op = addr..end_addr;
1468        for (range, mapping) in self.mappings.range(range_for_op.clone()) {
1469            let range_to_zero = range.intersect(&range_for_op);
1470            if range_to_zero.is_empty() {
1471                continue;
1472            }
1473            let start_offset = mapping.address_to_offset(range_to_zero.start);
1474            let end_offset = mapping.address_to_offset(range_to_zero.end);
1475            if advice == MADV_DONTFORK
1476                || advice == MADV_DOFORK
1477                || advice == MADV_WIPEONFORK
1478                || advice == MADV_KEEPONFORK
1479                || advice == MADV_DONTDUMP
1480                || advice == MADV_DODUMP
1481                || advice == MADV_MERGEABLE
1482                || advice == MADV_UNMERGEABLE
1483            {
1484                // WIPEONFORK is only supported on private anonymous mappings per madvise(2).
1485                // KEEPONFORK can be specified on ranges that cover other sorts of mappings. It should
1486                // have no effect on mappings that are not private and anonymous as such mappings cannot
1487                // have the WIPEONFORK option set.
1488                if advice == MADV_WIPEONFORK && !mapping.private_anonymous() {
1489                    return error!(EINVAL);
1490                }
1491                let new_flags = match advice {
1492                    MADV_DONTFORK => mapping.flags() | MappingFlags::DONTFORK,
1493                    MADV_DOFORK => mapping.flags() & MappingFlags::DONTFORK.complement(),
1494                    MADV_WIPEONFORK => mapping.flags() | MappingFlags::WIPEONFORK,
1495                    MADV_KEEPONFORK => mapping.flags() & MappingFlags::WIPEONFORK.complement(),
1496                    MADV_DONTDUMP => {
1497                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_DONTDUMP");
1498                        mapping.flags()
1499                    }
1500                    MADV_DODUMP => {
1501                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_DODUMP");
1502                        mapping.flags()
1503                    }
1504                    MADV_MERGEABLE => {
1505                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_MERGEABLE");
1506                        mapping.flags()
1507                    }
1508                    MADV_UNMERGEABLE => {
1509                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_UNMERGEABLE");
1510                        mapping.flags()
1511                    }
1512                    // Only the variants in this match should be reachable given the condition for
1513                    // the containing branch.
1514                    unknown_advice => unreachable!("unknown advice {unknown_advice}"),
1515                };
1516                let mut new_mapping = mapping.clone();
1517                new_mapping.set_flags(new_flags);
1518                updates.push((range_to_zero, new_mapping));
1519            } else {
1520                if mapping.flags().contains(MappingFlags::SHARED) {
1521                    continue;
1522                }
1523                let op = match advice {
1524                    MADV_DONTNEED if !mapping.flags().contains(MappingFlags::ANONYMOUS) => {
1525                        // Note, we cannot simply implemented MADV_DONTNEED with
1526                        // zx::VmoOp::DONT_NEED because they have different
1527                        // semantics.
1528                        track_stub!(
1529                            TODO("https://fxbug.dev/322874496"),
1530                            "MADV_DONTNEED with file-backed mapping"
1531                        );
1532                        return error!(EINVAL);
1533                    }
1534                    MADV_DONTNEED if mapping.flags().contains(MappingFlags::LOCKED) => {
1535                        return error!(EINVAL);
1536                    }
1537                    MADV_DONTNEED => zx::VmoOp::ZERO,
1538                    MADV_DONTNEED_LOCKED => {
1539                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_DONTNEED_LOCKED");
1540                        return error!(EINVAL);
1541                    }
1542                    MADV_WILLNEED => {
1543                        if mapping.flags().contains(MappingFlags::WRITE) {
1544                            zx::VmoOp::COMMIT
1545                        } else {
1546                            zx::VmoOp::PREFETCH
1547                        }
1548                    }
1549                    MADV_COLD => {
1550                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_COLD");
1551                        return error!(EINVAL);
1552                    }
1553                    MADV_PAGEOUT => {
1554                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_PAGEOUT");
1555                        return error!(EINVAL);
1556                    }
1557                    MADV_POPULATE_READ => {
1558                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_POPULATE_READ");
1559                        return error!(EINVAL);
1560                    }
1561                    MADV_RANDOM => {
1562                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_RANDOM");
1563                        return error!(EINVAL);
1564                    }
1565                    MADV_SEQUENTIAL => {
1566                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_SEQUENTIAL");
1567                        return error!(EINVAL);
1568                    }
1569                    MADV_FREE if !mapping.flags().contains(MappingFlags::ANONYMOUS) => {
1570                        track_stub!(
1571                            TODO("https://fxbug.dev/411748419"),
1572                            "MADV_FREE with file-backed mapping"
1573                        );
1574                        return error!(EINVAL);
1575                    }
1576                    MADV_FREE if mapping.flags().contains(MappingFlags::LOCKED) => {
1577                        return error!(EINVAL);
1578                    }
1579                    MADV_FREE => {
1580                        track_stub!(TODO("https://fxbug.dev/411748419"), "MADV_FREE");
1581                        // TODO(https://fxbug.dev/411748419) For now, treat MADV_FREE like
1582                        // MADV_DONTNEED as a stopgap until we have proper support.
1583                        zx::VmoOp::ZERO
1584                    }
1585                    MADV_REMOVE => {
1586                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_REMOVE");
1587                        return error!(EINVAL);
1588                    }
1589                    MADV_HWPOISON => {
1590                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_HWPOISON");
1591                        return error!(EINVAL);
1592                    }
1593                    MADV_SOFT_OFFLINE => {
1594                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_SOFT_OFFLINE");
1595                        return error!(EINVAL);
1596                    }
1597                    MADV_HUGEPAGE => {
1598                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_HUGEPAGE");
1599                        return error!(EINVAL);
1600                    }
1601                    MADV_COLLAPSE => {
1602                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_COLLAPSE");
1603                        return error!(EINVAL);
1604                    }
1605                    MADV_NOHUGEPAGE => return Ok(()),
1606                    advice => {
1607                        track_stub!(TODO("https://fxbug.dev/322874202"), "madvise", advice);
1608                        return error!(EINVAL);
1609                    }
1610                };
1611
1612                let memory = match self.get_mapping_backing(mapping) {
1613                    MappingBacking::Memory(backing) => backing.memory(),
1614                    MappingBacking::PrivateAnonymous => &context.private_anonymous.backing,
1615                };
1616                memory.op_range(op, start_offset, end_offset - start_offset).map_err(
1617                    |s| match s {
1618                        zx::Status::OUT_OF_RANGE => errno!(EINVAL),
1619                        zx::Status::NO_MEMORY => errno!(ENOMEM),
1620                        zx::Status::INVALID_ARGS => errno!(EINVAL),
1621                        zx::Status::ACCESS_DENIED => errno!(EACCES),
1622                        _ => impossible_error(s),
1623                    },
1624                )?;
1625            }
1626        }
1627        // Use a separate loop to avoid mutating the mappings structure while iterating over it.
1628        for (range, mapping) in updates {
1629            released_mappings.extend(self.mappings.insert(range, mapping));
1630        }
1631        Ok(())
1632    }
1633
1634    fn mlock<L>(
1635        &mut self,
1636        context: &MappingContext,
1637        current_task: &CurrentTask,
1638        locked: &mut Locked<L>,
1639        desired_addr: UserAddress,
1640        desired_length: usize,
1641        on_fault: bool,
1642        released_mappings: &mut ReleasedMappings,
1643    ) -> Result<(), Errno>
1644    where
1645        L: LockBefore<ThreadGroupLimits>,
1646    {
1647        let desired_end_addr =
1648            desired_addr.checked_add(desired_length).ok_or_else(|| errno!(EINVAL))?;
1649        let start_addr = round_down_to_system_page_size(desired_addr)?;
1650        let end_addr = round_up_to_system_page_size(desired_end_addr)?;
1651
1652        let mut updates = vec![];
1653        let mut bytes_mapped_in_range = 0;
1654        let mut num_new_locked_bytes = 0;
1655        let mut failed_to_lock = false;
1656        for (range, mapping) in self.mappings.range(start_addr..end_addr) {
1657            let mut range = range.clone();
1658            let mut mapping = mapping.clone();
1659
1660            // Handle mappings that start before the region to be locked.
1661            range.start = std::cmp::max(range.start, start_addr);
1662            // Handle mappings that extend past the region to be locked.
1663            range.end = std::cmp::min(range.end, end_addr);
1664
1665            bytes_mapped_in_range += (range.end - range.start) as u64;
1666
1667            // PROT_NONE mappings generate ENOMEM but are left locked.
1668            if !mapping
1669                .flags()
1670                .intersects(MappingFlags::READ | MappingFlags::WRITE | MappingFlags::EXEC)
1671            {
1672                failed_to_lock = true;
1673            }
1674
1675            if !mapping.flags().contains(MappingFlags::LOCKED) {
1676                num_new_locked_bytes += (range.end - range.start) as u64;
1677                let shadow_mapping = match current_task.kernel().features.mlock_pin_flavor {
1678                    // Pin the memory by mapping the backing memory into the high priority vmar.
1679                    MlockPinFlavor::ShadowProcess => {
1680                        let shadow_process =
1681                            current_task.kernel().expando.get_or_try_init(|| {
1682                                memory_pinning::ShadowProcess::new(zx::Name::new_lossy(
1683                                    "starnix_mlock_pins",
1684                                ))
1685                                .map(MlockShadowProcess)
1686                                .map_err(|_| errno!(EPERM))
1687                            })?;
1688
1689                        let (vmo, offset) = match self.get_mapping_backing(&mapping) {
1690                            MappingBacking::Memory(m) => (
1691                                m.memory().as_vmo().ok_or_else(|| errno!(ENOMEM))?,
1692                                m.address_to_offset(range.start),
1693                            ),
1694                            MappingBacking::PrivateAnonymous => (
1695                                context
1696                                    .private_anonymous
1697                                    .backing
1698                                    .as_vmo()
1699                                    .ok_or_else(|| errno!(ENOMEM))?,
1700                                range.start.ptr() as u64,
1701                            ),
1702                        };
1703                        Some(shadow_process.0.pin_pages(vmo, offset, range.end - range.start)?)
1704                    }
1705
1706                    // Relying on VMAR-level operations means just flags are set per-mapping.
1707                    MlockPinFlavor::Noop | MlockPinFlavor::VmarAlwaysNeed => None,
1708                };
1709                mapping.set_mlock();
1710                updates.push((range, mapping, shadow_mapping));
1711            }
1712        }
1713
1714        if bytes_mapped_in_range as usize != end_addr - start_addr {
1715            return error!(ENOMEM);
1716        }
1717
1718        let memlock_rlimit = current_task.thread_group().get_rlimit(locked, Resource::MEMLOCK);
1719        let total_locked = self.num_locked_bytes(
1720            UserAddress::from(context.user_vmar_info.base as u64)
1721                ..UserAddress::from(
1722                    (context.user_vmar_info.base + context.user_vmar_info.len) as u64,
1723                ),
1724        );
1725        if total_locked + num_new_locked_bytes > memlock_rlimit {
1726            if crate::security::check_task_capable(current_task, CAP_IPC_LOCK).is_err() {
1727                let code = if memlock_rlimit > 0 { errno!(ENOMEM) } else { errno!(EPERM) };
1728                return Err(code);
1729            }
1730        }
1731
1732        let op_range_status_to_errno = |e| match e {
1733            zx::Status::BAD_STATE | zx::Status::NOT_SUPPORTED => errno!(ENOMEM),
1734            zx::Status::INVALID_ARGS | zx::Status::OUT_OF_RANGE => errno!(EINVAL),
1735            zx::Status::ACCESS_DENIED => {
1736                unreachable!("user vmar should always have needed rights")
1737            }
1738            zx::Status::BAD_HANDLE => {
1739                unreachable!("user vmar should always be a valid handle")
1740            }
1741            zx::Status::WRONG_TYPE => unreachable!("user vmar handle should be a vmar"),
1742            _ => unreachable!("unknown error from op_range on user vmar for mlock: {e}"),
1743        };
1744
1745        self.ensure_range_mapped_in_user_vmar(start_addr, Some(end_addr - start_addr), context)?;
1746
1747        if !on_fault && !current_task.kernel().features.mlock_always_onfault {
1748            context
1749                .user_vmar
1750                .op_range(zx::VmarOp::PREFETCH, start_addr.ptr(), end_addr - start_addr)
1751                .map_err(op_range_status_to_errno)?;
1752        }
1753
1754        match current_task.kernel().features.mlock_pin_flavor {
1755            MlockPinFlavor::VmarAlwaysNeed => {
1756                context
1757                    .user_vmar
1758                    .op_range(zx::VmarOp::ALWAYS_NEED, start_addr.ptr(), end_addr - start_addr)
1759                    .map_err(op_range_status_to_errno)?;
1760            }
1761            // The shadow process doesn't use any vmar-level operations to pin memory.
1762            MlockPinFlavor::Noop | MlockPinFlavor::ShadowProcess => (),
1763        }
1764
1765        for (range, mapping, shadow_mapping) in updates {
1766            if let Some(shadow_mapping) = shadow_mapping {
1767                released_mappings.extend_pins(
1768                    self.shadow_mappings_for_mlock.insert(range.clone(), shadow_mapping),
1769                );
1770            }
1771            released_mappings.extend(self.mappings.insert(range, mapping));
1772        }
1773
1774        if failed_to_lock { error!(ENOMEM) } else { Ok(()) }
1775    }
1776
1777    fn munlock(
1778        &mut self,
1779        _current_task: &CurrentTask,
1780        desired_addr: UserAddress,
1781        desired_length: usize,
1782        released_mappings: &mut ReleasedMappings,
1783    ) -> Result<(), Errno> {
1784        let desired_end_addr =
1785            desired_addr.checked_add(desired_length).ok_or_else(|| errno!(EINVAL))?;
1786        let start_addr = round_down_to_system_page_size(desired_addr)?;
1787        let end_addr = round_up_to_system_page_size(desired_end_addr)?;
1788
1789        let mut updates = vec![];
1790        let mut bytes_mapped_in_range = 0;
1791        for (range, mapping) in self.mappings.range(start_addr..end_addr) {
1792            let mut range = range.clone();
1793            let mut mapping = mapping.clone();
1794
1795            // Handle mappings that start before the region to be locked.
1796            range.start = std::cmp::max(range.start, start_addr);
1797            // Handle mappings that extend past the region to be locked.
1798            range.end = std::cmp::min(range.end, end_addr);
1799
1800            bytes_mapped_in_range += (range.end - range.start) as u64;
1801
1802            if mapping.flags().contains(MappingFlags::LOCKED) {
1803                // This clears the locking for the shadow process pin flavor. It's not currently
1804                // possible to actually unlock pages that were locked with the
1805                // ZX_VMAR_OP_ALWAYS_NEED pin flavor.
1806                mapping.clear_mlock();
1807                updates.push((range, mapping));
1808            }
1809        }
1810
1811        if bytes_mapped_in_range as usize != end_addr - start_addr {
1812            return error!(ENOMEM);
1813        }
1814
1815        for (range, mapping) in updates {
1816            released_mappings.extend(self.mappings.insert(range.clone(), mapping));
1817            released_mappings.extend_pins(self.shadow_mappings_for_mlock.remove(range));
1818        }
1819
1820        Ok(())
1821    }
1822
1823    pub fn num_locked_bytes(&self, range: impl RangeBounds<UserAddress>) -> u64 {
1824        self.mappings
1825            .map
1826            .range(range)
1827            .filter(|(_, mapping)| mapping.flags().contains(MappingFlags::LOCKED))
1828            .map(|(range, _)| (range.end - range.start) as u64)
1829            .sum()
1830    }
1831
1832    fn get_mappings_for_vmsplice(
1833        &self,
1834        mm: &Arc<MemoryManager>,
1835        buffers: &UserBuffers,
1836    ) -> Result<Vec<Arc<VmsplicePayload>>, Errno> {
1837        let mut vmsplice_mappings = Vec::new();
1838
1839        for UserBuffer { mut address, length } in buffers.iter().copied() {
1840            let mappings = self.get_contiguous_mappings_at(address, length, &mm.mapping_context)?;
1841            for (mapping, length) in mappings {
1842                let vmsplice_payload = match self.get_mapping_backing(mapping) {
1843                    MappingBacking::Memory(m) => VmsplicePayloadSegment {
1844                        addr_offset: address,
1845                        length,
1846                        memory: m.memory().clone(),
1847                        memory_offset: m.address_to_offset(address),
1848                        should_snapshot_on_unmap: false,
1849                    },
1850                    MappingBacking::PrivateAnonymous => VmsplicePayloadSegment {
1851                        addr_offset: address,
1852                        length,
1853                        memory: mm.mapping_context.private_anonymous.backing.clone(),
1854                        memory_offset: address.ptr() as u64,
1855                        should_snapshot_on_unmap: true,
1856                    },
1857                };
1858                vmsplice_mappings.push(VmsplicePayload::new(Arc::downgrade(mm), vmsplice_payload));
1859
1860                address = (address + length)?;
1861            }
1862        }
1863
1864        Ok(vmsplice_mappings)
1865    }
1866
1867    /// Returns all the mappings starting at `addr`, and continuing until either `length` bytes have
1868    /// been covered or an unmapped page is reached.
1869    ///
1870    /// Mappings are returned in ascending order along with the number of bytes that intersect the
1871    /// requested range. The returned mappings are guaranteed to be contiguous and the total length
1872    /// corresponds to the number of contiguous mapped bytes starting from `addr`, i.e.:
1873    /// - 0 (empty iterator) if `addr` is not mapped.
1874    /// - exactly `length` if the requested range is fully mapped.
1875    /// - the offset of the first unmapped page (between 0 and `length`) if the requested range is
1876    ///   only partially mapped.
1877    ///
1878    /// Returns EFAULT if the requested range overflows or extends past the end of the vmar.
1879    fn get_contiguous_mappings_at(
1880        &self,
1881        addr: UserAddress,
1882        length: usize,
1883        context: &MappingContext,
1884    ) -> Result<impl Iterator<Item = (&Mapping, usize)>, Errno> {
1885        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EFAULT))?;
1886        if end_addr > context.max_address() {
1887            return error!(EFAULT);
1888        }
1889
1890        // Iterate over all contiguous mappings intersecting the requested range.
1891        let mut mappings = self.mappings.range(addr..end_addr);
1892        let mut prev_range_end = None;
1893        let mut offset = 0;
1894        let result = std::iter::from_fn(move || {
1895            if offset != length {
1896                if let Some((range, mapping)) = mappings.next() {
1897                    return match prev_range_end {
1898                        // If this is the first mapping that we are considering, it may not actually
1899                        // contain `addr` at all.
1900                        None if range.start > addr => None,
1901
1902                        // Subsequent mappings may not be contiguous.
1903                        Some(prev_range_end) if range.start != prev_range_end => None,
1904
1905                        // This mapping can be returned.
1906                        _ => {
1907                            let mapping_length = std::cmp::min(length, range.end - addr) - offset;
1908                            offset += mapping_length;
1909                            prev_range_end = Some(range.end);
1910                            Some((mapping, mapping_length))
1911                        }
1912                    };
1913                }
1914            }
1915
1916            None
1917        });
1918
1919        Ok(result)
1920    }
1921
1922    /// Determines whether a fault at the given address could be covered by extending a growsdown
1923    /// mapping.
1924    ///
1925    /// If the address already belongs to a mapping, this function returns `None`. If the next
1926    /// mapping above the given address has the `MappingFlags::GROWSDOWN` flag, this function
1927    /// returns the address at which that mapping starts and the mapping itself. Otherwise, this
1928    /// function returns `None`.
1929    fn find_growsdown_mapping(&self, addr: UserAddress) -> Option<(UserAddress, &Mapping)> {
1930        match self.mappings.range(addr..).next() {
1931            Some((range, mapping)) => {
1932                if range.contains(&addr) {
1933                    // |addr| is already contained within a mapping, nothing to grow.
1934                    return None;
1935                } else if !mapping.flags().contains(MappingFlags::GROWSDOWN) {
1936                    // The next mapping above the given address does not have the
1937                    // `MappingFlags::GROWSDOWN` flag.
1938                    None
1939                } else {
1940                    Some((range.start, mapping))
1941                }
1942            }
1943            None => None,
1944        }
1945    }
1946
1947    /// Determines if an access at a given address could be covered by extending a growsdown mapping
1948    /// and extends it if possible. Returns true if the given address is covered by a mapping.
1949    fn extend_growsdown_mapping_to_address(
1950        &mut self,
1951        mm: &Arc<MemoryManager>,
1952        addr: UserAddress,
1953        is_write: bool,
1954    ) -> Result<bool, Error> {
1955        let Some((mapping_low_addr, mapping_to_grow)) = self.find_growsdown_mapping(addr) else {
1956            return Ok(false);
1957        };
1958        if is_write && !mapping_to_grow.can_write() {
1959            // Don't grow a read-only GROWSDOWN mapping for a write fault, it won't work.
1960            return Ok(false);
1961        }
1962        if !mapping_to_grow.flags().contains(MappingFlags::ANONYMOUS) {
1963            // Currently, we only grow anonymous mappings.
1964            return Ok(false);
1965        }
1966        let low_addr = (addr - (addr.ptr() as u64 % *PAGE_SIZE))?;
1967        let high_addr = mapping_low_addr;
1968
1969        let length = high_addr
1970            .ptr()
1971            .checked_sub(low_addr.ptr())
1972            .ok_or_else(|| anyhow!("Invalid growth range"))?;
1973
1974        let mut released_mappings = ReleasedMappings::default();
1975        self.map_anonymous(
1976            mm,
1977            DesiredAddress::FixedOverwrite(low_addr),
1978            length,
1979            mapping_to_grow.flags().access_flags(),
1980            mapping_to_grow.flags().options(),
1981            mapping_to_grow.name().to_owned(),
1982            &mut released_mappings,
1983        )?;
1984        // We can't have any released mappings because `find_growsdown_mapping` will return None if
1985        // the mapping already exists in this range.
1986        assert!(
1987            released_mappings.is_empty(),
1988            "expected to not remove mappings by inserting, got {released_mappings:#?}"
1989        );
1990        Ok(true)
1991    }
1992
1993    /// Reads exactly `bytes.len()` bytes of memory.
1994    ///
1995    /// # Parameters
1996    /// - `addr`: The address to read data from.
1997    /// - `bytes`: The byte array to read into.
1998    fn read_memory<'a>(
1999        &self,
2000        addr: UserAddress,
2001        bytes: &'a mut [MaybeUninit<u8>],
2002        context: &MappingContext,
2003    ) -> Result<&'a mut [u8], Errno> {
2004        let mut bytes_read = 0;
2005        for (mapping, len) in self.get_contiguous_mappings_at(addr, bytes.len(), context)? {
2006            let next_offset = bytes_read + len;
2007            self.read_mapping_memory(
2008                (addr + bytes_read)?,
2009                mapping,
2010                &mut bytes[bytes_read..next_offset],
2011                context,
2012            )?;
2013            bytes_read = next_offset;
2014        }
2015
2016        if bytes_read != bytes.len() {
2017            error!(EFAULT)
2018        } else {
2019            // SAFETY: The created slice is properly aligned/sized since it
2020            // is a subset of the `bytes` slice. Note that `MaybeUninit<T>` has
2021            // the same layout as `T`. Also note that `bytes_read` bytes have
2022            // been properly initialized.
2023            let bytes = unsafe {
2024                std::slice::from_raw_parts_mut(bytes.as_mut_ptr() as *mut u8, bytes_read)
2025            };
2026            Ok(bytes)
2027        }
2028    }
2029
2030    /// Reads exactly `bytes.len()` bytes of memory from `addr`.
2031    ///
2032    /// # Parameters
2033    /// - `addr`: The address to read data from.
2034    /// - `bytes`: The byte array to read into.
2035    fn read_mapping_memory<'a>(
2036        &self,
2037        addr: UserAddress,
2038        mapping: &Mapping,
2039        bytes: &'a mut [MaybeUninit<u8>],
2040        context: &MappingContext,
2041    ) -> Result<&'a mut [u8], Errno> {
2042        if !mapping.can_read() {
2043            return error!(EFAULT, "read_mapping_memory called on unreadable mapping");
2044        }
2045        match self.get_mapping_backing(mapping) {
2046            MappingBacking::Memory(backing) => backing.read_memory(addr, bytes),
2047            MappingBacking::PrivateAnonymous => context.private_anonymous.read_memory(addr, bytes),
2048        }
2049    }
2050
2051    /// Reads bytes starting at `addr`, continuing until either `bytes.len()` bytes have been read
2052    /// or no more bytes can be read.
2053    ///
2054    /// This is used, for example, to read null-terminated strings where the exact length is not
2055    /// known, only the maximum length is.
2056    ///
2057    /// # Parameters
2058    /// - `addr`: The address to read data from.
2059    /// - `bytes`: The byte array to read into.
2060    fn read_memory_partial<'a>(
2061        &self,
2062        addr: UserAddress,
2063        bytes: &'a mut [MaybeUninit<u8>],
2064        context: &MappingContext,
2065    ) -> Result<&'a mut [u8], Errno> {
2066        let mut bytes_read = 0;
2067        for (mapping, len) in self.get_contiguous_mappings_at(addr, bytes.len(), context)? {
2068            let next_offset = bytes_read + len;
2069            if self
2070                .read_mapping_memory(
2071                    (addr + bytes_read)?,
2072                    mapping,
2073                    &mut bytes[bytes_read..next_offset],
2074                    context,
2075                )
2076                .is_err()
2077            {
2078                break;
2079            }
2080            bytes_read = next_offset;
2081        }
2082
2083        // If at least one byte was requested but we got none, it means that `addr` was invalid.
2084        if !bytes.is_empty() && bytes_read == 0 {
2085            error!(EFAULT)
2086        } else {
2087            // SAFETY: The created slice is properly aligned/sized since it
2088            // is a subset of the `bytes` slice. Note that `MaybeUninit<T>` has
2089            // the same layout as `T`. Also note that `bytes_read` bytes have
2090            // been properly initialized.
2091            let bytes = unsafe {
2092                std::slice::from_raw_parts_mut(bytes.as_mut_ptr() as *mut u8, bytes_read)
2093            };
2094            Ok(bytes)
2095        }
2096    }
2097
2098    /// Like `read_memory_partial` but only returns the bytes up to and including
2099    /// a null (zero) byte.
2100    fn read_memory_partial_until_null_byte<'a>(
2101        &self,
2102        addr: UserAddress,
2103        bytes: &'a mut [MaybeUninit<u8>],
2104        context: &MappingContext,
2105    ) -> Result<&'a mut [u8], Errno> {
2106        let read_bytes = self.read_memory_partial(addr, bytes, context)?;
2107        let max_len = memchr::memchr(b'\0', read_bytes)
2108            .map_or_else(|| read_bytes.len(), |null_index| null_index + 1);
2109        Ok(&mut read_bytes[..max_len])
2110    }
2111
2112    /// Writes the provided bytes.
2113    ///
2114    /// In case of success, the number of bytes written will always be `bytes.len()`.
2115    ///
2116    /// # Parameters
2117    /// - `addr`: The address to write to.
2118    /// - `bytes`: The bytes to write.
2119    fn write_memory(
2120        &self,
2121        addr: UserAddress,
2122        bytes: &[u8],
2123        context: &MappingContext,
2124    ) -> Result<usize, Errno> {
2125        let mut bytes_written = 0;
2126        for (mapping, len) in self.get_contiguous_mappings_at(addr, bytes.len(), context)? {
2127            let next_offset = bytes_written + len;
2128            self.write_mapping_memory(
2129                (addr + bytes_written)?,
2130                mapping,
2131                &bytes[bytes_written..next_offset],
2132                context,
2133            )?;
2134            bytes_written = next_offset;
2135        }
2136
2137        if bytes_written != bytes.len() { error!(EFAULT) } else { Ok(bytes.len()) }
2138    }
2139
2140    /// Writes the provided bytes to `addr`.
2141    ///
2142    /// # Parameters
2143    /// - `addr`: The address to write to.
2144    /// - `bytes`: The bytes to write to the memory object.
2145    fn write_mapping_memory(
2146        &self,
2147        addr: UserAddress,
2148        mapping: &Mapping,
2149        bytes: &[u8],
2150        context: &MappingContext,
2151    ) -> Result<(), Errno> {
2152        if !mapping.can_write() {
2153            return error!(EFAULT, "write_mapping_memory called on unwritable memory");
2154        }
2155        match self.get_mapping_backing(mapping) {
2156            MappingBacking::Memory(backing) => backing.write_memory(addr, bytes),
2157            MappingBacking::PrivateAnonymous => context.private_anonymous.write_memory(addr, bytes),
2158        }
2159    }
2160
2161    /// Writes bytes starting at `addr`, continuing until either `bytes.len()` bytes have been
2162    /// written or no more bytes can be written.
2163    ///
2164    /// # Parameters
2165    /// - `addr`: The address to read data from.
2166    /// - `bytes`: The byte array to write from.
2167    fn write_memory_partial(
2168        &self,
2169        addr: UserAddress,
2170        bytes: &[u8],
2171        context: &MappingContext,
2172    ) -> Result<usize, Errno> {
2173        let mut bytes_written = 0;
2174        for (mapping, len) in self.get_contiguous_mappings_at(addr, bytes.len(), context)? {
2175            let next_offset = bytes_written + len;
2176            if self
2177                .write_mapping_memory(
2178                    (addr + bytes_written)?,
2179                    mapping,
2180                    &bytes[bytes_written..next_offset],
2181                    context,
2182                )
2183                .is_err()
2184            {
2185                break;
2186            }
2187            bytes_written = next_offset;
2188        }
2189
2190        if !bytes.is_empty() && bytes_written == 0 { error!(EFAULT) } else { Ok(bytes.len()) }
2191    }
2192
2193    fn zero(
2194        &self,
2195        addr: UserAddress,
2196        length: usize,
2197        context: &MappingContext,
2198    ) -> Result<usize, Errno> {
2199        let mut bytes_written = 0;
2200        for (mapping, len) in self.get_contiguous_mappings_at(addr, length, context)? {
2201            let next_offset = bytes_written + len;
2202            if self.zero_mapping((addr + bytes_written)?, mapping, len, context).is_err() {
2203                break;
2204            }
2205            bytes_written = next_offset;
2206        }
2207
2208        if length != bytes_written { error!(EFAULT) } else { Ok(length) }
2209    }
2210
2211    fn zero_mapping(
2212        &self,
2213        addr: UserAddress,
2214        mapping: &Mapping,
2215        length: usize,
2216        context: &MappingContext,
2217    ) -> Result<usize, Errno> {
2218        if !mapping.can_write() {
2219            return error!(EFAULT);
2220        }
2221
2222        match self.get_mapping_backing(mapping) {
2223            MappingBacking::Memory(backing) => backing.zero(addr, length),
2224            MappingBacking::PrivateAnonymous => context.private_anonymous.zero(addr, length),
2225        }
2226    }
2227
2228    pub fn create_memory_backing(
2229        &self,
2230        base: UserAddress,
2231        memory: Arc<MemoryObject>,
2232        memory_offset: u64,
2233    ) -> MappingBacking {
2234        MappingBacking::Memory(Box::new(MappingBackingMemory::new(base, memory, memory_offset)))
2235    }
2236
2237    pub fn get_mapping_backing<'a>(&self, mapping: &'a Mapping) -> &'a MappingBacking {
2238        mapping.get_backing_internal()
2239    }
2240
2241    fn get_aio_context(&self, addr: UserAddress) -> Option<(Range<UserAddress>, Arc<AioContext>)> {
2242        let Some((range, mapping)) = self.mappings.get(addr) else {
2243            return None;
2244        };
2245        let MappingNameRef::AioContext(ref aio_context) = mapping.name() else {
2246            return None;
2247        };
2248        if !mapping.can_read() {
2249            return None;
2250        }
2251        Some((range.clone(), Arc::clone(aio_context)))
2252    }
2253
2254    fn find_uffd<L>(&self, locked: &mut Locked<L>, addr: UserAddress) -> Option<Arc<UserFault>>
2255    where
2256        L: LockBefore<UserFaultInner>,
2257    {
2258        for userfault in self.userfaultfds.iter() {
2259            if let Some(userfault) = userfault.upgrade() {
2260                if userfault.contains_addr(locked, addr) {
2261                    return Some(userfault);
2262                }
2263            }
2264        }
2265        None
2266    }
2267
2268    fn cache_flush(
2269        &self,
2270        range: Range<UserAddress>,
2271        context: &MappingContext,
2272    ) -> Result<(), Errno> {
2273        let mut addr = range.start;
2274        let size = range.end - range.start;
2275        for (mapping, len) in self.get_contiguous_mappings_at(addr, size, context)? {
2276            if !mapping.can_read() {
2277                return error!(EFAULT);
2278            }
2279            if mapping.mapping_mode() == MappingMode::Lazy {
2280                addr = (addr + len)?;
2281                continue;
2282            }
2283            // SAFETY: This is operating on a readable restricted mode mapping and will not fault.
2284            zx::Status::ok(unsafe {
2285                zx::sys::zx_cache_flush(
2286                    addr.ptr() as *const u8,
2287                    len,
2288                    zx::sys::ZX_CACHE_FLUSH_DATA | zx::sys::ZX_CACHE_FLUSH_INSN,
2289                )
2290            })
2291            .map_err(impossible_error)?;
2292
2293            addr = (addr + len).unwrap(); // unwrap since we're iterating within the address space.
2294        }
2295        // Did we flush the entire range?
2296        if addr != range.end { error!(EFAULT) } else { Ok(()) }
2297    }
2298
2299    /// Register the address space managed by this memory manager for interest in
2300    /// receiving private expedited memory barriers of the given kind.
2301    pub fn register_membarrier_private_expedited(
2302        &mut self,
2303        mtype: MembarrierType,
2304    ) -> Result<(), Errno> {
2305        let registrations = &mut self.forkable_state.membarrier_registrations;
2306        match mtype {
2307            MembarrierType::Memory => {
2308                registrations.memory = true;
2309            }
2310            MembarrierType::SyncCore => {
2311                registrations.sync_core = true;
2312            }
2313        }
2314        Ok(())
2315    }
2316
2317    /// Checks if the address space managed by this memory manager is registered
2318    /// for interest in private expedited barriers of the given kind.
2319    pub fn membarrier_private_expedited_registered(&self, mtype: MembarrierType) -> bool {
2320        let registrations = &self.forkable_state.membarrier_registrations;
2321        match mtype {
2322            MembarrierType::Memory => registrations.memory,
2323            MembarrierType::SyncCore => registrations.sync_core,
2324        }
2325    }
2326
2327    fn force_write_memory(
2328        &mut self,
2329        context: &MappingContext,
2330        addr: UserAddress,
2331        bytes: &[u8],
2332        released_mappings: &mut ReleasedMappings,
2333    ) -> Result<(), Errno> {
2334        let (range, mapping) = {
2335            let (r, m) = self.mappings.get(addr).ok_or_else(|| errno!(EFAULT))?;
2336            (r.clone(), m.clone())
2337        };
2338        if range.end < addr.saturating_add(bytes.len()) {
2339            track_stub!(
2340                TODO("https://fxbug.dev/445790710"),
2341                "ptrace poke across multiple mappings"
2342            );
2343            return error!(EFAULT);
2344        }
2345
2346        // Don't create CoW copy of shared memory, go through regular syscall writing.
2347        if mapping.flags().contains(MappingFlags::SHARED) {
2348            if !mapping.can_write() {
2349                // Linux returns EIO here instead of EFAULT.
2350                return error!(EIO);
2351            }
2352            return self.write_mapping_memory(addr, &mapping, &bytes, context);
2353        }
2354
2355        let backing = match self.get_mapping_backing(&mapping) {
2356            MappingBacking::PrivateAnonymous => {
2357                // Starnix has a writable handle to private anonymous memory.
2358                return context.private_anonymous.write_memory(addr, &bytes);
2359            }
2360            MappingBacking::Memory(backing) => backing,
2361        };
2362
2363        let vmo = backing.memory().as_vmo().ok_or_else(|| errno!(EFAULT))?;
2364        let addr_offset = backing.address_to_offset(addr);
2365        let can_exec =
2366            vmo.basic_info().expect("get VMO handle info").rights.contains(Rights::EXECUTE);
2367
2368        // Attempt to write to existing VMO
2369        match vmo.write(&bytes, addr_offset) {
2370            Ok(()) => {
2371                if can_exec {
2372                    // Issue a barrier to avoid executing stale instructions.
2373                    system_barrier(BarrierType::InstructionStream);
2374                }
2375                return Ok(());
2376            }
2377
2378            Err(zx::Status::ACCESS_DENIED) => { /* Fall through */ }
2379
2380            Err(status) => {
2381                return Err(MemoryManager::get_errno_for_vmo_err(status));
2382            }
2383        }
2384
2385        // Create a CoW child of the entire VMO and swap with the backing.
2386        let mapping_offset = backing.address_to_offset(range.start);
2387        let len = range.end - range.start;
2388
2389        // 1. Obtain a writable child of the VMO.
2390        let size = vmo.get_size().map_err(MemoryManager::get_errno_for_vmo_err)?;
2391        let child_vmo = vmo
2392            .create_child(VmoChildOptions::SNAPSHOT_AT_LEAST_ON_WRITE, 0, size)
2393            .map_err(MemoryManager::get_errno_for_vmo_err)?;
2394
2395        // 2. Modify the memory.
2396        child_vmo.write(&bytes, addr_offset).map_err(MemoryManager::get_errno_for_vmo_err)?;
2397
2398        // 3. If needed, remint the VMO as executable. Zircon flushes instruction caches when
2399        // mapping executable memory below, so a barrier isn't necessary here.
2400        let child_vmo = if can_exec {
2401            child_vmo
2402                .replace_as_executable(&VMEX_RESOURCE)
2403                .map_err(MemoryManager::get_errno_for_vmo_err)?
2404        } else {
2405            child_vmo
2406        };
2407
2408        // Ensure that the mapping that `addr` falls into is mapped in the user VMAR.
2409        // This ensures that the mapping's mode becomes `Eager` (if it was `Lazy`),
2410        // otherwise, we might clone a `Lazy` mapping but map it unconditionally below,
2411        // leading to state drift where a mapping is mapped in Zircon but marked as lazy in Starnix.
2412        self.ensure_range_mapped_in_user_vmar(addr, None, context)?;
2413
2414        // 4. Map the new VMO into user VMAR
2415        let memory = Arc::new(MemoryObject::from(child_vmo));
2416        context.map_in_user_vmar(
2417            SelectedAddress::FixedOverwrite(range.start),
2418            &memory,
2419            mapping_offset,
2420            len,
2421            mapping.flags(),
2422            false,
2423        )?;
2424
2425        // 5. Update mappings
2426        let new_backing = MappingBackingMemory::new(range.start, memory, mapping_offset);
2427
2428        let mut new_mapping = mapping.clone();
2429        new_mapping.set_backing_internal(MappingBacking::Memory(Box::new(new_backing)));
2430
2431        released_mappings.extend(self.mappings.insert(range, new_mapping));
2432
2433        Ok(())
2434    }
2435
2436    fn set_brk<L>(
2437        &mut self,
2438        locked: &mut Locked<L>,
2439        current_task: &CurrentTask,
2440        mm: &Arc<MemoryManager>,
2441        addr: UserAddress,
2442        released_mappings: &mut ReleasedMappings,
2443    ) -> Result<UserAddress, Errno>
2444    where
2445        L: LockBefore<ThreadGroupLimits>,
2446    {
2447        let rlimit_data = std::cmp::min(
2448            PROGRAM_BREAK_LIMIT,
2449            current_task.thread_group().get_rlimit(locked, Resource::DATA),
2450        );
2451
2452        let brk = match self.brk.clone() {
2453            None => {
2454                let brk = ProgramBreak { base: self.brk_origin, current: self.brk_origin };
2455                self.brk = Some(brk.clone());
2456                brk
2457            }
2458            Some(brk) => brk,
2459        };
2460
2461        let Ok(last_address) = brk.base + rlimit_data else {
2462            // The requested program break is out-of-range. We're supposed to simply
2463            // return the current program break.
2464            return Ok(brk.current);
2465        };
2466
2467        if addr < brk.base || addr > last_address {
2468            // The requested program break is out-of-range. We're supposed to simply
2469            // return the current program break.
2470            return Ok(brk.current);
2471        }
2472
2473        let old_end = brk.current.round_up(*PAGE_SIZE).unwrap();
2474        let new_end = addr.round_up(*PAGE_SIZE).unwrap();
2475
2476        match new_end.cmp(&old_end) {
2477            std::cmp::Ordering::Less => {
2478                // Shrinking the program break removes any mapped pages in the
2479                // affected range, regardless of whether they were actually program
2480                // break pages, or other mappings.
2481                let delta = old_end - new_end;
2482
2483                if self.unmap(mm, new_end, delta, released_mappings).is_err() {
2484                    return Ok(brk.current);
2485                }
2486            }
2487            std::cmp::Ordering::Greater => {
2488                let range = old_end..new_end;
2489                let delta = new_end - old_end;
2490
2491                // Check for mappings over the program break region.
2492                if self.mappings.range(range).next().is_some() {
2493                    return Ok(brk.current);
2494                }
2495
2496                if self
2497                    .map_anonymous(
2498                        mm,
2499                        DesiredAddress::FixedOverwrite(old_end),
2500                        delta,
2501                        ProtectionFlags::READ | ProtectionFlags::WRITE,
2502                        MappingOptions::ANONYMOUS,
2503                        MappingName::Heap,
2504                        released_mappings,
2505                    )
2506                    .is_err()
2507                {
2508                    return Ok(brk.current);
2509                }
2510            }
2511            _ => {}
2512        };
2513
2514        // Any required updates to the program break succeeded, so update internal state.
2515        let mut new_brk = brk;
2516        new_brk.current = addr;
2517        self.brk = Some(new_brk);
2518
2519        Ok(addr)
2520    }
2521
2522    fn register_with_uffd<L>(
2523        &mut self,
2524        mm: &MemoryManager,
2525        locked: &mut Locked<L>,
2526        addr: UserAddress,
2527        length: usize,
2528        userfault: &Arc<UserFault>,
2529        mode: FaultRegisterMode,
2530        released_mappings: &mut ReleasedMappings,
2531    ) -> Result<(), Errno>
2532    where
2533        L: LockBefore<UserFaultInner>,
2534    {
2535        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
2536        let range_for_op = addr..end_addr;
2537        let mut updates = vec![];
2538
2539        for (range, mapping) in self.mappings.range(range_for_op.clone()) {
2540            if !mapping.private_anonymous() {
2541                track_stub!(TODO("https://fxbug.dev/391599171"), "uffd for shmem and hugetlbfs");
2542                return error!(EINVAL);
2543            }
2544            if mapping.flags().contains(MappingFlags::UFFD) {
2545                return error!(EBUSY);
2546            }
2547            let range = range.intersect(&range_for_op);
2548            let mut mapping = mapping.clone();
2549            mapping.set_uffd(mode);
2550            updates.push((range, mapping));
2551        }
2552        if updates.is_empty() {
2553            return error!(EINVAL);
2554        }
2555
2556        mm.protect_vmar_range(addr, length, ProtectionFlags::empty())
2557            .expect("Failed to remove protections on uffd-registered range");
2558
2559        // Use a separate loop to avoid mutating the mappings structure while iterating over it.
2560        for (range, mapping) in updates {
2561            released_mappings.extend(self.mappings.insert(range, mapping));
2562        }
2563
2564        userfault.insert_pages(locked, range_for_op, false);
2565
2566        Ok(())
2567    }
2568
2569    fn unregister_range_from_uffd<L>(
2570        &mut self,
2571        mm: &MemoryManager,
2572        locked: &mut Locked<L>,
2573        userfault: &Arc<UserFault>,
2574        addr: UserAddress,
2575        length: usize,
2576        released_mappings: &mut ReleasedMappings,
2577    ) -> Result<(), Errno>
2578    where
2579        L: LockBefore<UserFaultInner>,
2580    {
2581        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
2582        let range_for_op = addr..end_addr;
2583        let mut updates = vec![];
2584
2585        for (range, mapping) in self.mappings.range(range_for_op.clone()) {
2586            if !mapping.private_anonymous() {
2587                track_stub!(TODO("https://fxbug.dev/391599171"), "uffd for shmem and hugetlbfs");
2588                return error!(EINVAL);
2589            }
2590            if mapping.flags().contains(MappingFlags::UFFD) {
2591                let range = range.intersect(&range_for_op);
2592                if userfault.remove_pages(locked, range.clone()) {
2593                    let mut mapping = mapping.clone();
2594                    mapping.clear_uffd();
2595                    updates.push((range, mapping));
2596                }
2597            }
2598        }
2599        for (range, mapping) in updates {
2600            let length = range.end - range.start;
2601            let restored_flags = mapping.flags().access_flags();
2602
2603            released_mappings.extend(self.mappings.insert(range.clone(), mapping));
2604
2605            mm.protect_vmar_range(range.start, length, restored_flags)
2606                .expect("Failed to restore original protection bits on uffd-registered range");
2607        }
2608        Ok(())
2609    }
2610
2611    fn unregister_uffd<L>(
2612        &mut self,
2613        mm: &MemoryManager,
2614        locked: &mut Locked<L>,
2615        userfault: &Arc<UserFault>,
2616        released_mappings: &mut ReleasedMappings,
2617    ) where
2618        L: LockBefore<UserFaultInner>,
2619    {
2620        let mut updates = vec![];
2621
2622        for (range, mapping) in self.mappings.iter() {
2623            if mapping.flags().contains(MappingFlags::UFFD) {
2624                for range in userfault.get_registered_pages_overlapping_range(locked, range.clone())
2625                {
2626                    let mut mapping = mapping.clone();
2627                    mapping.clear_uffd();
2628                    updates.push((range.clone(), mapping));
2629                }
2630            }
2631        }
2632        // Use a separate loop to avoid mutating the mappings structure while iterating over it.
2633        for (range, mapping) in updates {
2634            let length = range.end - range.start;
2635            let restored_flags = mapping.flags().access_flags();
2636            released_mappings.extend(self.mappings.insert(range.clone(), mapping));
2637            // We can't recover from an error here as this is run during the cleanup.
2638            mm.protect_vmar_range(range.start, length, restored_flags)
2639                .expect("Failed to restore original protection bits on uffd-registered range");
2640        }
2641
2642        userfault.remove_pages(
2643            locked,
2644            UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
2645                ..UserAddress::from_ptr(RESTRICTED_ASPACE_HIGHEST_ADDRESS),
2646        );
2647
2648        let weak_userfault = Arc::downgrade(userfault);
2649        self.userfaultfds.retain(|uf| !Weak::ptr_eq(uf, &weak_userfault));
2650    }
2651
2652    fn set_mapping_name(
2653        &mut self,
2654        addr: UserAddress,
2655        length: usize,
2656        name: Option<FsString>,
2657        released_mappings: &mut ReleasedMappings,
2658    ) -> Result<(), Errno> {
2659        if addr.ptr() % *PAGE_SIZE as usize != 0 {
2660            return error!(EINVAL);
2661        }
2662        let end = match addr.checked_add(length) {
2663            Some(addr) => addr.round_up(*PAGE_SIZE).map_err(|_| errno!(ENOMEM))?,
2664            None => return error!(EINVAL),
2665        };
2666
2667        let mappings_in_range =
2668            self.mappings.range(addr..end).map(|(r, m)| (r.clone(), m.clone())).collect::<Vec<_>>();
2669
2670        if mappings_in_range.is_empty() {
2671            return error!(EINVAL);
2672        }
2673        if !mappings_in_range.first().unwrap().0.contains(&addr) {
2674            return error!(ENOMEM);
2675        }
2676
2677        let mut last_range_end = None;
2678        // There's no get_mut on RangeMap, because it would be hard to implement correctly in
2679        // combination with merging of adjacent mappings. Instead, make a copy, change the copy,
2680        // and insert the copy.
2681        for (mut range, mut mapping) in mappings_in_range {
2682            if mapping.name().is_file() {
2683                // It's invalid to assign a name to a file-backed mapping.
2684                return error!(EBADF);
2685            }
2686            // Handle mappings that start before the region to be named.
2687            range.start = std::cmp::max(range.start, addr);
2688            // Handle mappings that extend past the region to be named.
2689            range.end = std::cmp::min(range.end, end);
2690
2691            if let Some(last_range_end) = last_range_end {
2692                if last_range_end != range.start {
2693                    // The name must apply to a contiguous range of mapped pages.
2694                    return error!(ENOMEM);
2695                }
2696            }
2697            last_range_end = Some(range.end.round_up(*PAGE_SIZE)?);
2698            // TODO(b/310255065): We have no place to store names in a way visible to programs outside of Starnix
2699            // such as memory analysis tools.
2700            if let MappingBacking::Memory(backing) = self.get_mapping_backing(&mapping) {
2701                match &name {
2702                    Some(memory_name) => {
2703                        backing.memory().set_zx_name(memory_name);
2704                    }
2705                    None => {
2706                        backing.memory().set_zx_name(b"");
2707                    }
2708                }
2709            }
2710            mapping.set_name(match &name {
2711                Some(name) => MappingName::Vma(FlyByteStr::new(name.as_bytes())),
2712                None => MappingName::None,
2713            });
2714            released_mappings.extend(self.mappings.insert(range, mapping));
2715        }
2716        if let Some(last_range_end) = last_range_end {
2717            if last_range_end < end {
2718                // The name must apply to a contiguous range of mapped pages.
2719                return error!(ENOMEM);
2720            }
2721        }
2722        Ok(())
2723    }
2724}
2725
2726/// The memory pinning shadow process used for mlock().
2727///
2728/// Uses its own distinct shadow process so that it doesn't interfere with other uses of memory
2729/// pinning.
2730pub struct MlockShadowProcess(memory_pinning::ShadowProcess);
2731
2732impl MemoryManager {
2733    /// Ensures that any mapping at `addr` is actually mapped at in the user vmar.
2734    ///
2735    /// If `length` is `None`, it will ensure the mapping only on the page `addr` falls into.
2736    /// Returns `true` if any lazy mappings are mapped.
2737    pub fn ensure_range_mapped_in_user_vmar(
2738        &self,
2739        addr: UserAddress,
2740        length: Option<usize>,
2741    ) -> Result<bool, Errno> {
2742        if !self.state.read().any_ranges_lazy(std::iter::once((addr, length))) {
2743            return Ok(false);
2744        }
2745        self.state.write().ensure_ranges_mapped_in_user_vmar(
2746            std::iter::once((addr, length)),
2747            &self.mapping_context,
2748        )
2749    }
2750
2751    /// Ensures that any mappings in the specified ranges are actually mapped in the user vmar.
2752    ///
2753    /// If `length` is `None`, it will ensure the mapping only on the page `addr` falls into.
2754    /// Returns `true` if any lazy mappings are mapped.
2755    pub fn ensure_ranges_mapped_in_user_vmar<I>(&self, ranges: I) -> Result<bool, Errno>
2756    where
2757        I: IntoIterator<Item = (UserAddress, Option<usize>)>,
2758    {
2759        // Collect ranges into a SmallVec with capacity 4 to avoid heap allocations in the common
2760        // case where there are only a few ranges (e.g., socket read/write buffers).
2761        let ranges = ranges.into_iter().collect::<SmallVec<[_; 4]>>();
2762        if !self.state.read().any_ranges_lazy(ranges.iter().cloned()) {
2763            return Ok(false);
2764        }
2765        self.state.write().ensure_ranges_mapped_in_user_vmar(ranges, &self.mapping_context)
2766    }
2767
2768    pub fn mrelease(&self) -> Result<(), Errno> {
2769        self.mapping_context.private_anonymous.zero(
2770            UserAddress::from_ptr(self.mapping_context.user_vmar_info.base),
2771            self.mapping_context.user_vmar_info.len,
2772        )?;
2773        Ok(())
2774    }
2775
2776    pub fn summarize(&self, summary: &mut crate::mm::MappingSummary) {
2777        let state = self.state.read();
2778        for (_, mapping) in state.mappings.iter() {
2779            summary.add(&state, mapping);
2780        }
2781    }
2782
2783    pub fn get_mappings_for_vmsplice(
2784        self: &Arc<MemoryManager>,
2785        buffers: &UserBuffers,
2786    ) -> Result<Vec<Arc<VmsplicePayload>>, Errno> {
2787        self.state.read().get_mappings_for_vmsplice(self, buffers)
2788    }
2789
2790    pub fn has_same_address_space(&self, other: &Self) -> bool {
2791        std::ptr::eq(self, other)
2792    }
2793
2794    fn unified_transfer_loop<F>(
2795        &self,
2796        addr: UserAddress,
2797        len: usize,
2798        mut transfer_fn: F,
2799    ) -> Result<usize, Errno>
2800    where
2801        F: FnMut(UserAddress, usize) -> Result<ControlFlow<usize, usize>, Errno>,
2802    {
2803        let mut copied = 0;
2804        while copied < len {
2805            match transfer_fn((addr + copied)?, copied)? {
2806                ControlFlow::Continue(num_copied) => {
2807                    if num_copied == 0 {
2808                        let fault_addr = (addr + copied)?;
2809                        // If we successfully mapped a lazy mapping, retry the copy.
2810                        // Otherwise, this might be a permission fault or invalid address, so we
2811                        // stop and return the partial result.
2812                        //
2813                        // NOTE: We lazily materialize mappings one page at a time here.
2814                        // An alternative approach would be to materialize the entire range
2815                        // or the first mapping up front. That might avoid bouncing between
2816                        // threads on faults, but adds overhead (locks and range lookups)
2817                        // if the memory is already mapped. We use the reactive approach
2818                        // for now, but this could be tuned in the future.
2819                        if self.ensure_range_mapped_in_user_vmar(fault_addr, None)? {
2820                            continue;
2821                        } else {
2822                            break;
2823                        }
2824                    }
2825                    copied += num_copied;
2826                }
2827                ControlFlow::Break(num_copied) => {
2828                    copied += num_copied;
2829                    break;
2830                }
2831            }
2832        }
2833        Ok(copied)
2834    }
2835
2836    pub fn unified_read_memory<'a>(
2837        &self,
2838        current_task: &CurrentTask,
2839        addr: UserAddress,
2840        bytes: &'a mut [MaybeUninit<u8>],
2841    ) -> Result<&'a mut [u8], Errno> {
2842        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2843
2844        if let Some(usercopy) = usercopy() {
2845            let buf_ptr = bytes.as_mut_ptr();
2846            let buf_len = bytes.len();
2847
2848            let copied = self.unified_transfer_loop(addr, buf_len, |cur_addr, offset| {
2849                // SAFETY: Exclusive access to `bytes` for the lifetime of this function.
2850                let current_bytes = unsafe {
2851                    std::slice::from_raw_parts_mut(buf_ptr.add(offset), buf_len - offset)
2852                };
2853                let (read_bytes, _unread_bytes) = usercopy.copyin(cur_addr.ptr(), current_bytes);
2854                Ok(ControlFlow::Continue(read_bytes.len()))
2855            })?;
2856            if copied < bytes.len() {
2857                error!(EFAULT)
2858            } else {
2859                // SAFETY: All bytes up to `buf_len` have been initialized.
2860                Ok(unsafe { std::slice::from_raw_parts_mut(buf_ptr as *mut u8, buf_len) })
2861            }
2862        } else {
2863            self.syscall_read_memory(addr, bytes)
2864        }
2865    }
2866
2867    pub fn syscall_read_memory<'a>(
2868        &self,
2869        addr: UserAddress,
2870        bytes: &'a mut [MaybeUninit<u8>],
2871    ) -> Result<&'a mut [u8], Errno> {
2872        self.state.read().read_memory(addr, bytes, &self.mapping_context)
2873    }
2874
2875    pub fn unified_read_memory_partial_until_null_byte<'a>(
2876        &self,
2877        current_task: &CurrentTask,
2878        addr: UserAddress,
2879        bytes: &'a mut [MaybeUninit<u8>],
2880    ) -> Result<&'a mut [u8], Errno> {
2881        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2882
2883        if let Some(usercopy) = usercopy() {
2884            let buf_ptr = bytes.as_mut_ptr();
2885            let buf_len = bytes.len();
2886
2887            let copied = self.unified_transfer_loop(addr, buf_len, |cur_addr, offset| {
2888                // SAFETY: Exclusive access to `bytes` for the lifetime of this function.
2889                let current_bytes = unsafe {
2890                    std::slice::from_raw_parts_mut(buf_ptr.add(offset), buf_len - offset)
2891                };
2892                let (read_bytes, _unread_bytes) =
2893                    usercopy.copyin_until_null_byte(cur_addr.ptr(), current_bytes);
2894
2895                let num_copied = read_bytes.len();
2896                if read_bytes.last().map(|b| *b == 0).unwrap_or(false) {
2897                    Ok(ControlFlow::Break(num_copied))
2898                } else {
2899                    Ok(ControlFlow::Continue(num_copied))
2900                }
2901            })?;
2902            if copied == 0 && !bytes.is_empty() {
2903                error!(EFAULT)
2904            } else {
2905                // SAFETY: Bytes up to `copied` have been initialized.
2906                Ok(unsafe { std::slice::from_raw_parts_mut(buf_ptr as *mut u8, copied) })
2907            }
2908        } else {
2909            self.syscall_read_memory_partial_until_null_byte(addr, bytes)
2910        }
2911    }
2912
2913    pub fn syscall_read_memory_partial_until_null_byte<'a>(
2914        &self,
2915        addr: UserAddress,
2916        bytes: &'a mut [MaybeUninit<u8>],
2917    ) -> Result<&'a mut [u8], Errno> {
2918        self.state.read().read_memory_partial_until_null_byte(addr, bytes, &self.mapping_context)
2919    }
2920
2921    pub fn unified_read_memory_partial<'a>(
2922        &self,
2923        current_task: &CurrentTask,
2924        addr: UserAddress,
2925        bytes: &'a mut [MaybeUninit<u8>],
2926    ) -> Result<&'a mut [u8], Errno> {
2927        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2928
2929        if let Some(usercopy) = usercopy() {
2930            let buf_ptr = bytes.as_mut_ptr();
2931            let buf_len = bytes.len();
2932
2933            let copied = self.unified_transfer_loop(addr, buf_len, |cur_addr, offset| {
2934                // SAFETY: Exclusive access to `bytes` for the lifetime of this function.
2935                let current_bytes = unsafe {
2936                    std::slice::from_raw_parts_mut(buf_ptr.add(offset), buf_len - offset)
2937                };
2938                let (read_bytes, _unread_bytes) = usercopy.copyin(cur_addr.ptr(), current_bytes);
2939                Ok(ControlFlow::Continue(read_bytes.len()))
2940            })?;
2941            if copied == 0 && !bytes.is_empty() {
2942                error!(EFAULT)
2943            } else {
2944                // SAFETY: Bytes up to `copied` have been initialized.
2945                Ok(unsafe { std::slice::from_raw_parts_mut(buf_ptr as *mut u8, copied) })
2946            }
2947        } else {
2948            self.syscall_read_memory_partial(addr, bytes)
2949        }
2950    }
2951
2952    pub fn syscall_read_memory_partial<'a>(
2953        &self,
2954        addr: UserAddress,
2955        bytes: &'a mut [MaybeUninit<u8>],
2956    ) -> Result<&'a mut [u8], Errno> {
2957        self.state.read().read_memory_partial(addr, bytes, &self.mapping_context)
2958    }
2959
2960    pub fn unified_write_memory(
2961        &self,
2962        current_task: &CurrentTask,
2963        addr: UserAddress,
2964        bytes: &[u8],
2965    ) -> Result<usize, Errno> {
2966        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2967
2968        if let Some(usercopy) = usercopy() {
2969            let len = bytes.len();
2970            let copied = self.unified_transfer_loop(addr, len, |cur_addr, offset| {
2971                Ok(ControlFlow::Continue(usercopy.copyout(&bytes[offset..], cur_addr.ptr())))
2972            })?;
2973            if copied < bytes.len() { error!(EFAULT) } else { Ok(copied) }
2974        } else {
2975            self.syscall_write_memory(addr, bytes)
2976        }
2977    }
2978
2979    /// Write `bytes` to memory address `addr`, making a copy-on-write child of the VMO backing and
2980    /// replacing the mapping if necessary.
2981    ///
2982    /// NOTE: this bypasses userspace's memory protection configuration and should only be called
2983    /// by codepaths like ptrace which bypass memory protection.
2984    pub fn force_write_memory(&self, addr: UserAddress, bytes: &[u8]) -> Result<(), Errno> {
2985        let mut state = self.state.write();
2986        let mut released_mappings = ReleasedMappings::default();
2987        let result =
2988            state.force_write_memory(&self.mapping_context, addr, bytes, &mut released_mappings);
2989        released_mappings.finalize(state);
2990        result
2991    }
2992
2993    pub fn syscall_write_memory(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
2994        self.state.read().write_memory(addr, bytes, &self.mapping_context)
2995    }
2996
2997    pub fn unified_write_memory_partial(
2998        &self,
2999        current_task: &CurrentTask,
3000        addr: UserAddress,
3001        bytes: &[u8],
3002    ) -> Result<usize, Errno> {
3003        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
3004
3005        if let Some(usercopy) = usercopy() {
3006            let len = bytes.len();
3007            let copied = self.unified_transfer_loop(addr, len, |cur_addr, offset| {
3008                Ok(ControlFlow::Continue(usercopy.copyout(&bytes[offset..], cur_addr.ptr())))
3009            })?;
3010            if copied == 0 && !bytes.is_empty() { error!(EFAULT) } else { Ok(copied) }
3011        } else {
3012            self.syscall_write_memory_partial(addr, bytes)
3013        }
3014    }
3015
3016    pub fn syscall_write_memory_partial(
3017        &self,
3018        addr: UserAddress,
3019        bytes: &[u8],
3020    ) -> Result<usize, Errno> {
3021        self.state.read().write_memory_partial(addr, bytes, &self.mapping_context)
3022    }
3023
3024    pub fn unified_zero(
3025        &self,
3026        current_task: &CurrentTask,
3027        addr: UserAddress,
3028        length: usize,
3029    ) -> Result<usize, Errno> {
3030        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
3031
3032        {
3033            let page_size = *PAGE_SIZE as usize;
3034            // Get the page boundary immediately following `addr` if `addr` is
3035            // not page aligned.
3036            let next_page_boundary = round_up_to_system_page_size(addr.ptr())?;
3037            // The number of bytes needed to zero at least a full page (not just
3038            // a pages worth of bytes) starting at `addr`.
3039            let length_with_atleast_one_full_page = page_size + (next_page_boundary - addr.ptr());
3040            // If at least one full page is being zeroed, go through the memory object since Zircon
3041            // can swap the mapped pages with the zero page which should be cheaper than zeroing
3042            // out a pages worth of bytes manually.
3043            //
3044            // If we are not zeroing out a full page, then go through usercopy
3045            // if unified aspaces is enabled.
3046            if length >= length_with_atleast_one_full_page {
3047                return self.syscall_zero(addr, length);
3048            }
3049        }
3050
3051        if let Some(usercopy) = usercopy() {
3052            let copied = self.unified_transfer_loop(addr, length, |cur_addr, offset| {
3053                Ok(ControlFlow::Continue(usercopy.zero(cur_addr.ptr(), length - offset)))
3054            })?;
3055            if copied == 0 && length > 0 { error!(EFAULT) } else { Ok(copied) }
3056        } else {
3057            self.syscall_zero(addr, length)
3058        }
3059    }
3060
3061    pub fn syscall_zero(&self, addr: UserAddress, length: usize) -> Result<usize, Errno> {
3062        self.state.read().zero(addr, length, &self.mapping_context)
3063    }
3064
3065    /// Performs a data and instruction cache flush over the given address range.
3066    pub fn cache_flush(&self, range: Range<UserAddress>) -> Result<(), Errno> {
3067        self.state.read().cache_flush(range, &self.mapping_context)
3068    }
3069
3070    /// Register the address space managed by this memory manager for interest in
3071    /// receiving private expedited memory barriers of the given type.
3072    pub fn register_membarrier_private_expedited(
3073        &self,
3074        mtype: MembarrierType,
3075    ) -> Result<(), Errno> {
3076        self.state.write().register_membarrier_private_expedited(mtype)
3077    }
3078
3079    /// Checks if the address space managed by this memory manager is registered
3080    /// for interest in private expedited barriers of the given kind.
3081    pub fn membarrier_private_expedited_registered(&self, mtype: MembarrierType) -> bool {
3082        self.state.read().membarrier_private_expedited_registered(mtype)
3083    }
3084}
3085
3086/// State and resources of the `MemoryManager` that are either immutable after creation
3087/// or handle their own interior mutability (e.g., `private_anonymous`).
3088///
3089/// This is distinct from `MemoryManagerState` in that the fields here do not require
3090/// acquisition of the `MemoryManager`'s main lock for access. This allows concurrent
3091/// access to these resources without lock contention.
3092///
3093/// This structure primarily holds the Zircon VMAR handle and the manager for private
3094/// anonymous memory, which are the core primitives used to manipulate the address space.
3095pub struct MappingContext {
3096    /// The VMAR in which userspace mappings occur.
3097    ///
3098    /// We map userspace memory in this child VMAR so that we can destroy the
3099    /// entire VMAR during exec.
3100    /// For 32-bit tasks, we limit the user_vmar to correspond to the available memory.
3101    ///
3102    /// This field is set to `ZX_HANDLE_INVALID` when the address-space has been destroyed (e.g. on
3103    /// `exec()`), allowing the value to be pro-actively checked for, or the `ZX_ERR_BAD_HANDLE`
3104    /// status return from Zircon operations handled, to suit the call-site.
3105    pub user_vmar: zx::Vmar,
3106
3107    /// Cached VmarInfo for user_vmar.
3108    pub user_vmar_info: zx::VmarInfo,
3109
3110    /// Memory object backing private, anonymous memory allocations in this address space.
3111    pub private_anonymous: PrivateAnonymousMemoryManager,
3112}
3113
3114impl MappingContext {
3115    fn map_in_user_vmar(
3116        &self,
3117        addr: SelectedAddress,
3118        memory: &MemoryObject,
3119        memory_offset: u64,
3120        length: usize,
3121        flags: MappingFlags,
3122        populate: bool,
3123    ) -> Result<(), Errno> {
3124        map_in_vmar(
3125            &self.user_vmar,
3126            &self.user_vmar_info,
3127            addr,
3128            memory,
3129            memory_offset,
3130            length,
3131            flags,
3132            populate,
3133        )
3134    }
3135
3136    pub fn max_address(&self) -> UserAddress {
3137        UserAddress::from_ptr(self.user_vmar_info.base + self.user_vmar_info.len)
3138    }
3139}
3140
3141pub struct MemoryManager {
3142    /// The base address of the root_vmar.
3143    pub base_addr: UserAddress,
3144
3145    /// The futexes in this address space.
3146    pub futex: Arc<FutexTable<PrivateFutexKey>>,
3147
3148    /// The mapping context for this address space.
3149    pub mapping_context: MappingContext,
3150
3151    /// Mutable state for the memory manager.
3152    pub state: RwLock<MemoryManagerState>,
3153
3154    /// Whether this address space is dumpable.
3155    pub dumpable: OrderedMutex<DumpPolicy, MmDumpable>,
3156
3157    /// Maximum valid user address for this vmar.
3158    pub maximum_valid_user_address: UserAddress,
3159
3160    /// In-flight payloads enqueued to a pipe as a consequence of a `vmsplice(2)`
3161    /// operation.
3162    ///
3163    /// For details on why we need to keep track of in-flight vmspliced payloads,
3164    /// see [`VmsplicePayload`].
3165    ///
3166    /// For details on why this isn't under the `RwLock` protected `MemoryManagerState`,
3167    /// See [`InflightVmsplicedPayloads::payloads`].
3168    pub inflight_vmspliced_payloads: InflightVmsplicedPayloads,
3169
3170    /// A mechanism to be notified when this `MemoryManager` is destroyed.
3171    pub drop_notifier: DropNotifier,
3172
3173    /// The architecture width of the process.
3174    pub arch_width: ArchWidth,
3175}
3176
3177impl ArchSpecific for MemoryManager {
3178    fn is_arch32(&self) -> bool {
3179        self.arch_width.is_arch32()
3180    }
3181}
3182
3183fn check_access_permissions_in_page_fault(
3184    decoded: &PageFaultExceptionReport,
3185    mapping: &Mapping,
3186) -> bool {
3187    let exec_denied = decoded.is_execute && !mapping.can_exec();
3188    let write_denied = decoded.is_write && !mapping.can_write();
3189    let read_denied = (!decoded.is_execute && !decoded.is_write) && !mapping.can_read();
3190    !exec_denied && !write_denied && !read_denied
3191}
3192
3193impl MemoryManager {
3194    /// Returns a new `MemoryManager` suitable for use in tests.
3195    pub fn new_for_test(root_vmar: zx::Unowned<'_, zx::Vmar>, arch_width: ArchWidth) -> Arc<Self> {
3196        Self::new(root_vmar, arch_width, None, None).expect("can create MemoryManager")
3197    }
3198
3199    // Returns details of mappings in the `user_vmar`, or an empty vector if the `user_vmar` has
3200    // been destroyed.
3201    fn with_zx_mappings<R>(
3202        &self,
3203        current_task: &CurrentTask,
3204        op: impl FnOnce(&[zx::MapInfo]) -> R,
3205    ) -> R {
3206        MapInfoCache::get_or_init(current_task)
3207            .expect("must be able to retrieve map info cache")
3208            .with_map_infos(&self.mapping_context.user_vmar, |infos| match infos {
3209                Ok(infos) => op(infos),
3210                Err(_) => op(&[]),
3211            })
3212    }
3213
3214    fn protect_vmar_range(
3215        &self,
3216        addr: UserAddress,
3217        length: usize,
3218        prot_flags: ProtectionFlags,
3219    ) -> Result<(), Errno> {
3220        let vmar_flags = prot_flags.to_vmar_flags();
3221        // SAFETY: Modifying user vmar
3222        unsafe { self.mapping_context.user_vmar.protect(addr.ptr(), length, vmar_flags) }.map_err(
3223            |s| match s {
3224                zx::Status::INVALID_ARGS => errno!(EINVAL),
3225                zx::Status::NOT_FOUND => errno!(ENOMEM),
3226                zx::Status::ACCESS_DENIED => errno!(EACCES),
3227                _ => impossible_error(s),
3228            },
3229        )
3230    }
3231
3232    pub fn total_locked_bytes(&self) -> u64 {
3233        self.state.read().num_locked_bytes(
3234            UserAddress::from(self.mapping_context.user_vmar_info.base as u64)
3235                ..UserAddress::from(
3236                    (self.mapping_context.user_vmar_info.base
3237                        + self.mapping_context.user_vmar_info.len) as u64,
3238                ),
3239        )
3240    }
3241
3242    /// Returns a new `MemoryManager` initialized with a new userspace VMAR matching the specified
3243    /// `arch_width`, under the specified restricted-mode `root_vmar`.  The `executable_node` that
3244    /// the new address-space will execute may optionally be supplied.
3245    fn new(
3246        root_vmar: zx::Unowned<'_, zx::Vmar>,
3247        arch_width: ArchWidth,
3248        executable_node: Option<NamespaceNode>,
3249        private_anonymous: Option<PrivateAnonymousMemoryManager>,
3250    ) -> Result<Arc<Self>, Errno> {
3251        debug_assert!(!root_vmar.is_invalid());
3252
3253        let mut vmar_info = root_vmar.info().map_err(|status| from_status_like_fdio!(status))?;
3254        if arch_width.is_arch32() {
3255            vmar_info.len = (LOWER_4GB_LIMIT.ptr() - vmar_info.base) as usize;
3256        }
3257
3258        let (user_vmar, ptr) = root_vmar
3259            .allocate(
3260                0,
3261                vmar_info.len,
3262                zx::VmarFlags::SPECIFIC
3263                    | zx::VmarFlags::CAN_MAP_SPECIFIC
3264                    | zx::VmarFlags::CAN_MAP_READ
3265                    | zx::VmarFlags::CAN_MAP_WRITE
3266                    | zx::VmarFlags::CAN_MAP_EXECUTE,
3267            )
3268            .map_err(|status| from_status_like_fdio!(status))?;
3269        assert_eq!(ptr, vmar_info.base);
3270
3271        let user_vmar_info = user_vmar.info().map_err(|status| from_status_like_fdio!(status))?;
3272
3273        // Ensure that the `user_vmar_info` matches assumptions for the requested layout.
3274        debug_assert_eq!(RESTRICTED_ASPACE_BASE, user_vmar_info.base);
3275        if arch_width.is_arch32() {
3276            debug_assert_eq!(LOWER_4GB_LIMIT.ptr() - user_vmar_info.base, user_vmar_info.len);
3277        } else {
3278            debug_assert_eq!(RESTRICTED_ASPACE_SIZE, user_vmar_info.len);
3279        }
3280
3281        // The private anonymous backing memory object extend from the user address 0 up to the
3282        // highest mappable address. The pages below `user_vmar_info.base` are never mapped, but
3283        // including them in the memory object makes the math for mapping address to memory object
3284        // offsets simpler.
3285        let backing_size = (user_vmar_info.base + user_vmar_info.len) as u64;
3286
3287        // Place the stack at the end of the address space, subject to ASLR adjustment.
3288        let stack_origin = UserAddress::from_ptr(
3289            user_vmar_info.base + user_vmar_info.len
3290                - MAX_STACK_SIZE
3291                - generate_random_offset_for_aslr(arch_width),
3292        )
3293        .round_up(*PAGE_SIZE)?;
3294
3295        // Set the highest address that `mmap` will assign to the allocations that don't ask for a
3296        // specific address, subject to ASLR adjustment.
3297        let mmap_top = stack_origin
3298            .checked_sub(MAX_STACK_SIZE + generate_random_offset_for_aslr(arch_width))
3299            .ok_or_else(|| errno!(EINVAL))?;
3300
3301        Ok(Arc::new(MemoryManager {
3302            base_addr: UserAddress::from_ptr(user_vmar_info.base),
3303            futex: Arc::<FutexTable<PrivateFutexKey>>::default(),
3304            mapping_context: MappingContext {
3305                user_vmar,
3306                user_vmar_info,
3307                private_anonymous: private_anonymous
3308                    .unwrap_or_else(|| PrivateAnonymousMemoryManager::new(backing_size)),
3309            },
3310            state: RwLock::new(MemoryManagerState {
3311                mappings: Default::default(),
3312                userfaultfds: Default::default(),
3313                shadow_mappings_for_mlock: Default::default(),
3314                forkable_state: MemoryManagerForkableState {
3315                    executable_node,
3316                    stack_origin,
3317                    mmap_top,
3318                    ..Default::default()
3319                },
3320            }),
3321            // TODO(security): Reset to DISABLE, or the value in the fs.suid_dumpable sysctl, under
3322            // certain conditions as specified in the prctl(2) man page.
3323            dumpable: OrderedMutex::new(DumpPolicy::User),
3324            maximum_valid_user_address: UserAddress::from_ptr(
3325                user_vmar_info.base + user_vmar_info.len,
3326            ),
3327            inflight_vmspliced_payloads: Default::default(),
3328            drop_notifier: DropNotifier::default(),
3329            arch_width,
3330        }))
3331    }
3332
3333    pub fn set_brk<L>(
3334        self: &Arc<Self>,
3335        locked: &mut Locked<L>,
3336        current_task: &CurrentTask,
3337        addr: UserAddress,
3338    ) -> Result<UserAddress, Errno>
3339    where
3340        L: LockBefore<ThreadGroupLimits>,
3341    {
3342        let mut state = self.state.write();
3343        let mut released_mappings = ReleasedMappings::default();
3344        let result = state.set_brk(locked, current_task, self, addr, &mut released_mappings);
3345        released_mappings.finalize(state);
3346        result
3347    }
3348
3349    pub fn register_uffd(&self, userfault: &Arc<UserFault>) {
3350        let mut state = self.state.write();
3351        state.userfaultfds.push(Arc::downgrade(userfault));
3352    }
3353
3354    /// Register a given memory range with a userfault object.
3355    pub fn register_with_uffd<L>(
3356        self: &Arc<Self>,
3357        locked: &mut Locked<L>,
3358        addr: UserAddress,
3359        length: usize,
3360        userfault: &Arc<UserFault>,
3361        mode: FaultRegisterMode,
3362    ) -> Result<(), Errno>
3363    where
3364        L: LockBefore<UserFaultInner>,
3365    {
3366        let mut state = self.state.write();
3367        let mut released_mappings = ReleasedMappings::default();
3368        let result = state.register_with_uffd(
3369            self,
3370            locked,
3371            addr,
3372            length,
3373            userfault,
3374            mode,
3375            &mut released_mappings,
3376        );
3377        released_mappings.finalize(state);
3378        result
3379    }
3380
3381    /// Unregister a given range from any userfault objects associated with it.
3382    pub fn unregister_range_from_uffd<L>(
3383        &self,
3384        locked: &mut Locked<L>,
3385        userfault: &Arc<UserFault>,
3386        addr: UserAddress,
3387        length: usize,
3388    ) -> Result<(), Errno>
3389    where
3390        L: LockBefore<UserFaultInner>,
3391    {
3392        let mut state = self.state.write();
3393        let mut released_mappings = ReleasedMappings::default();
3394        let result = state.unregister_range_from_uffd(
3395            self,
3396            locked,
3397            userfault,
3398            addr,
3399            length,
3400            &mut released_mappings,
3401        );
3402        released_mappings.finalize(state);
3403        result
3404    }
3405
3406    /// Unregister any mappings registered with a given userfault object. Used when closing the last
3407    /// file descriptor associated to it.
3408    pub fn unregister_uffd<L>(&self, locked: &mut Locked<L>, userfault: &Arc<UserFault>)
3409    where
3410        L: LockBefore<UserFaultInner>,
3411    {
3412        let mut state = self.state.write();
3413        let mut released_mappings = ReleasedMappings::default();
3414        state.unregister_uffd(self, locked, userfault, &mut released_mappings);
3415        released_mappings.finalize(state);
3416    }
3417
3418    /// Populate a range of pages registered with an userfaulfd according to a `populate` function.
3419    /// This will fail if the pages were not registered with userfaultfd, or if the page at `addr`
3420    /// was already populated. If any page other than the first one was populated, the `length`
3421    /// is adjusted to only include the first N unpopulated pages, and this adjusted length
3422    /// is then passed to `populate`. On success, returns the number of populated bytes.
3423    pub fn populate_from_uffd<F, L>(
3424        &self,
3425        locked: &mut Locked<L>,
3426        addr: UserAddress,
3427        length: usize,
3428        userfault: &Arc<UserFault>,
3429        populate: F,
3430    ) -> Result<usize, Errno>
3431    where
3432        F: FnOnce(&MemoryManagerState, usize) -> Result<usize, Errno>,
3433        L: LockBefore<UserFaultInner>,
3434    {
3435        let state = self.state.read();
3436        // Check that the addr..length range is a contiguous range of mappings which are all
3437        // registered with an userfault object.
3438        let mut bytes_registered_with_uffd = 0;
3439        for (mapping, len) in
3440            state.get_contiguous_mappings_at(addr, length, &self.mapping_context)?
3441        {
3442            if mapping.flags().contains(MappingFlags::UFFD) {
3443                // Check that the mapping is registered with the same uffd. This is not required,
3444                // but we don't support cross-uffd operations yet.
3445                if !userfault.contains_addr(locked, addr) {
3446                    track_stub!(
3447                        TODO("https://fxbug.dev/391599171"),
3448                        "operations across different uffds"
3449                    );
3450                    return error!(ENOTSUP);
3451                };
3452            } else {
3453                return error!(ENOENT);
3454            }
3455            bytes_registered_with_uffd += len;
3456        }
3457        if bytes_registered_with_uffd != length {
3458            return error!(ENOENT);
3459        }
3460
3461        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
3462
3463        // Determine how many pages in the requested range are already populated
3464        let first_populated =
3465            userfault.get_first_populated_page_after(locked, addr).ok_or_else(|| errno!(ENOENT))?;
3466        // If the very first page is already populated, uffd operations should just return EEXIST
3467        if first_populated == addr {
3468            return error!(EEXIST);
3469        }
3470        // Otherwise it is possible to do an incomplete operation by only populating pages until
3471        // the first populated one.
3472        let trimmed_end = std::cmp::min(first_populated, end_addr);
3473        let effective_length = trimmed_end - addr;
3474
3475        populate(&state, effective_length)?;
3476        userfault.insert_pages(locked, addr..trimmed_end, true);
3477
3478        // Since we used protection bits to force pagefaults, we now need to reverse this change by
3479        // restoring the protections on the underlying Zircon mappings to the "real" protection bits
3480        // that were kept in the Starnix mappings. This will prevent new pagefaults from being
3481        // generated. Only do this on the pages that were populated by this operation.
3482        for (range, mapping) in state.mappings.range(addr..trimmed_end) {
3483            let range_to_protect = range.intersect(&(addr..trimmed_end));
3484            let restored_flags = mapping.flags().access_flags();
3485            let length = range_to_protect.end - range_to_protect.start;
3486            self.protect_vmar_range(range_to_protect.start, length, restored_flags)
3487                .expect("Failed to restore original protection bits on uffd-registered range");
3488        }
3489        // Return the number of effectively populated bytes, which might be smaller than the
3490        // requested number.
3491        Ok(effective_length)
3492    }
3493
3494    pub fn zero_from_uffd<L>(
3495        &self,
3496        locked: &mut Locked<L>,
3497        addr: UserAddress,
3498        length: usize,
3499        userfault: &Arc<UserFault>,
3500    ) -> Result<usize, Errno>
3501    where
3502        L: LockBefore<UserFaultInner>,
3503    {
3504        self.populate_from_uffd(locked, addr, length, userfault, |state, effective_length| {
3505            state.zero(addr, effective_length, &self.mapping_context)
3506        })
3507    }
3508
3509    pub fn fill_from_uffd<L>(
3510        &self,
3511        locked: &mut Locked<L>,
3512        addr: UserAddress,
3513        buf: &[u8],
3514        length: usize,
3515        userfault: &Arc<UserFault>,
3516    ) -> Result<usize, Errno>
3517    where
3518        L: LockBefore<UserFaultInner>,
3519    {
3520        self.populate_from_uffd(locked, addr, length, userfault, |state, effective_length| {
3521            state.write_memory(addr, &buf[..effective_length], &self.mapping_context)
3522        })
3523    }
3524
3525    pub fn copy_from_uffd<L>(
3526        &self,
3527        locked: &mut Locked<L>,
3528        source_addr: UserAddress,
3529        dst_addr: UserAddress,
3530        length: usize,
3531        userfault: &Arc<UserFault>,
3532    ) -> Result<usize, Errno>
3533    where
3534        L: LockBefore<UserFaultInner>,
3535    {
3536        self.populate_from_uffd(locked, dst_addr, length, userfault, |state, effective_length| {
3537            let mut buf = vec![std::mem::MaybeUninit::uninit(); effective_length];
3538            let buf = state.read_memory(source_addr, &mut buf, &self.mapping_context)?;
3539            state.write_memory(dst_addr, &buf[..effective_length], &self.mapping_context)
3540        })
3541    }
3542
3543    /// Returns the new `MemoryManager` for a process, pre-populated with a snapshot of the layout
3544    /// and mappings of `source_mm`.  This is used during `CurrentTask::clone()` operations to
3545    /// create the initial address-space for the cloned child process.
3546    pub fn snapshot_of<L>(
3547        locked: &mut Locked<L>,
3548        source_mm: &Arc<MemoryManager>,
3549        root_vmar: zx::Unowned<'_, zx::Vmar>,
3550        arch_width: ArchWidth,
3551    ) -> Result<Arc<Self>, Errno>
3552    where
3553        L: LockBefore<MmDumpable>,
3554    {
3555        fuchsia_trace::duration!(CATEGORY_STARNIX_MM, "snapshot_of");
3556        let backing_size = (source_mm.mapping_context.user_vmar_info.base
3557            + source_mm.mapping_context.user_vmar_info.len) as u64;
3558        let private_anonymous =
3559            source_mm.mapping_context.private_anonymous.snapshot(backing_size)?;
3560        let target = MemoryManager::new(
3561            root_vmar,
3562            arch_width,
3563            source_mm.executable_node(),
3564            Some(private_anonymous),
3565        )?;
3566
3567        // Hold the lock throughout the operation to uphold memory manager's invariants.
3568        // See mm/README.md.
3569        {
3570            let state: &mut MemoryManagerState = &mut source_mm.state.write();
3571            let mut target_state = target.state.write();
3572            debug_assert_eq!(
3573                source_mm.mapping_context.user_vmar_info,
3574                target.mapping_context.user_vmar_info
3575            );
3576
3577            let mut clone_cache = HashMap::<zx::Koid, Arc<MemoryObject>>::new();
3578
3579            for (range, mapping) in state.mappings.iter() {
3580                if mapping.flags().contains(MappingFlags::DONTFORK) {
3581                    continue;
3582                }
3583                // Locking is not inherited when forking.
3584                let target_mapping_flags = mapping.flags().difference(MappingFlags::LOCKED);
3585                match state.get_mapping_backing(mapping) {
3586                    MappingBacking::Memory(backing) => {
3587                        fuchsia_trace::duration!(CATEGORY_STARNIX_MM, "memory_backing_clone");
3588                        let memory_offset = backing.address_to_offset(range.start);
3589
3590                        let target_memory = if mapping.flags().contains(MappingFlags::SHARED)
3591                            || mapping.name().is_vvar()
3592                        {
3593                            // Note that the Vvar is a special mapping that behaves like a shared mapping but
3594                            // is private to each process.
3595                            backing.memory().clone()
3596                        } else {
3597                            let memory_obj = backing.memory();
3598                            let options = mapping.flags().options();
3599                            let memory =
3600                                clone_cache.entry(memory_obj.get_koid()).or_insert_with_fallible(
3601                                    || memory_obj.clone_memory(memory_obj.get_rights(), options),
3602                                )?;
3603                            memory.clone()
3604                        };
3605
3606                        let mapping = Mapping::with_name(
3607                            MappingBacking::Memory(Box::new(MappingBackingMemory::new(
3608                                range.start,
3609                                target_memory,
3610                                memory_offset,
3611                            ))),
3612                            target_mapping_flags,
3613                            mapping.max_access(),
3614                            mapping.name().to_owned(),
3615                            MappingMode::Lazy,
3616                        );
3617                        assert!(
3618                            target_state.mappings.append_non_overlapping(range.clone(), mapping)
3619                        );
3620                    }
3621                    MappingBacking::PrivateAnonymous => {
3622                        fuchsia_trace::duration!(
3623                            CATEGORY_STARNIX_MM,
3624                            "private_anonymous_backing_clone"
3625                        );
3626                        let length = range.end - range.start;
3627                        if mapping.flags().contains(MappingFlags::WIPEONFORK) {
3628                            target
3629                                .mapping_context
3630                                .private_anonymous
3631                                .zero(range.start, length)
3632                                .map_err(|_| errno!(ENOMEM))?;
3633                        }
3634
3635                        let mapping = Mapping::new_private_anonymous(
3636                            target_mapping_flags,
3637                            mapping.name().to_owned(),
3638                            MappingMode::Lazy,
3639                        );
3640                        assert!(
3641                            target_state.mappings.append_non_overlapping(range.clone(), mapping)
3642                        );
3643                    }
3644                };
3645            }
3646
3647            target_state.forkable_state = state.forkable_state.clone();
3648        }
3649
3650        let self_dumpable = *source_mm.dumpable.lock(locked);
3651        *target.dumpable.lock(locked) = self_dumpable;
3652
3653        Ok(target)
3654    }
3655
3656    /// Returns the replacement `MemoryManager` to be used by the `exec()`ing task.
3657    ///
3658    /// POSIX requires that "a call to any exec function from a process with more than one thread
3659    /// shall result in all threads being terminated and the new executable being loaded and
3660    /// executed. No destructor functions or cleanup handlers shall be called".
3661    /// The caller is responsible for having ensured that this is the only `Task` in the
3662    /// `ThreadGroup`, and thereby the `zx::process`, such that it is safe to tear-down the Zircon
3663    /// userspace VMAR for the current address-space.
3664    pub fn exec(
3665        root_vmar: zx::Unowned<'_, zx::Vmar>,
3666        old_mm: Option<Arc<Self>>,
3667        exe_node: NamespaceNode,
3668        arch_width: ArchWidth,
3669    ) -> Result<Arc<Self>, Errno> {
3670        // To safeguard against concurrent accesses by other tasks through this `MemoryManager`, the
3671        // following steps are performed while holding the write lock on the old MM, if any:
3672        //
3673        // 1. All `mappings` are removed, so that remote `MemoryAccessor` calls will fail.
3674        // 2. The `user_vmar` is `destroy()`ed to free-up the user address-space.
3675        //
3676        // Once these steps are complete it is safe for the old mappings to be dropped.
3677        if let Some(old_mm) = old_mm {
3678            let _old_mappings = {
3679                let mut state = old_mm.state.write();
3680
3681                // SAFETY: This operation is safe because this is the only `Task` active in the address-
3682                // space, and accesses by remote tasks will use syscalls on the `root_vmar`.
3683                unsafe {
3684                    old_mm
3685                        .mapping_context
3686                        .user_vmar
3687                        .destroy()
3688                        .map_err(|status| from_status_like_fdio!(status))?
3689                }
3690
3691                std::mem::replace(&mut state.mappings, Default::default())
3692            };
3693        }
3694
3695        Self::new(root_vmar, arch_width, Some(exe_node), None)
3696    }
3697
3698    pub fn initialize_brk_origin(
3699        &self,
3700        arch_width: ArchWidth,
3701        executable_end: UserAddress,
3702    ) -> Result<(), Errno> {
3703        self.state.write().brk_origin = executable_end
3704            .checked_add(generate_random_offset_for_aslr(arch_width))
3705            .ok_or_else(|| errno!(EINVAL))?;
3706        Ok(())
3707    }
3708
3709    // Get a randomised address for loading a position-independent executable.
3710    pub fn get_random_base_for_executable(
3711        &self,
3712        arch_width: ArchWidth,
3713        length: usize,
3714    ) -> Result<UserAddress, Errno> {
3715        let state = self.state.read();
3716
3717        // Place it at approx. 2/3 of the available mmap space, subject to ASLR adjustment.
3718        let base = round_up_to_system_page_size(2 * state.mmap_top.ptr() / 3).unwrap()
3719            + generate_random_offset_for_aslr(arch_width);
3720        if base.checked_add(length).ok_or_else(|| errno!(EINVAL))? <= state.mmap_top.ptr() {
3721            Ok(UserAddress::from_ptr(base))
3722        } else {
3723            error!(EINVAL)
3724        }
3725    }
3726    pub fn executable_node(&self) -> Option<NamespaceNode> {
3727        self.state.read().executable_node.clone()
3728    }
3729
3730    #[track_caller]
3731    pub fn get_errno_for_map_err(status: zx::Status) -> Errno {
3732        match status {
3733            zx::Status::INVALID_ARGS => errno!(EINVAL),
3734            zx::Status::ACCESS_DENIED => errno!(EPERM),
3735            zx::Status::NOT_SUPPORTED => errno!(ENODEV),
3736            zx::Status::NO_MEMORY => errno!(ENOMEM),
3737            zx::Status::NO_RESOURCES => errno!(ENOMEM),
3738            zx::Status::OUT_OF_RANGE => errno!(ENOMEM),
3739            zx::Status::ALREADY_EXISTS => errno!(EEXIST),
3740            zx::Status::BAD_STATE => errno!(EINVAL),
3741            _ => impossible_error(status),
3742        }
3743    }
3744
3745    #[track_caller]
3746    pub fn get_errno_for_vmo_err(status: zx::Status) -> Errno {
3747        match status {
3748            zx::Status::NO_MEMORY => errno!(ENOMEM),
3749            zx::Status::ACCESS_DENIED => errno!(EPERM),
3750            zx::Status::NOT_SUPPORTED => errno!(EIO),
3751            zx::Status::BAD_STATE => errno!(EIO),
3752            _ => return impossible_error(status),
3753        }
3754    }
3755
3756    pub fn map_memory(
3757        self: &Arc<Self>,
3758        addr: DesiredAddress,
3759        memory: Arc<MemoryObject>,
3760        memory_offset: u64,
3761        length: usize,
3762        prot_flags: ProtectionFlags,
3763        max_access: Access,
3764        options: MappingOptions,
3765        name: MappingName,
3766    ) -> Result<UserAddress, Errno> {
3767        let flags = MappingFlags::from_access_flags_and_options(prot_flags, options);
3768
3769        // Unmapped mappings must be released after the state is unlocked.
3770        let mut released_mappings = ReleasedMappings::default();
3771        // Hold the lock throughout the operation to uphold memory manager's invariants.
3772        // See mm/README.md.
3773        let mut state = self.state.write();
3774        let result = state.add_memory_mapping(
3775            self,
3776            addr,
3777            memory,
3778            memory_offset,
3779            length,
3780            flags,
3781            max_access,
3782            options.contains(MappingOptions::POPULATE),
3783            name,
3784            MappingMode::Eager,
3785            &mut released_mappings,
3786        );
3787
3788        // Drop the state before the unmapped mappings, since dropping a mapping may acquire a lock
3789        // in `DirEntry`'s `drop`.
3790        released_mappings.finalize(state);
3791
3792        result
3793    }
3794
3795    pub fn map_anonymous(
3796        self: &Arc<Self>,
3797        addr: DesiredAddress,
3798        length: usize,
3799        prot_flags: ProtectionFlags,
3800        options: MappingOptions,
3801        name: MappingName,
3802    ) -> Result<UserAddress, Errno> {
3803        let mut released_mappings = ReleasedMappings::default();
3804        // Hold the lock throughout the operation to uphold memory manager's invariants.
3805        // See mm/README.md.
3806        let mut state = self.state.write();
3807        let result = state.map_anonymous(
3808            self,
3809            addr,
3810            length,
3811            prot_flags,
3812            options,
3813            name,
3814            &mut released_mappings,
3815        );
3816
3817        released_mappings.finalize(state);
3818
3819        result
3820    }
3821
3822    /// Map the stack into a pre-selected address region
3823    pub fn map_stack(
3824        self: &Arc<Self>,
3825        length: usize,
3826        prot_flags: ProtectionFlags,
3827    ) -> Result<UserAddress, Errno> {
3828        assert!(length <= MAX_STACK_SIZE);
3829        let addr = self.state.read().stack_origin;
3830        // The address range containing stack_origin should normally be available: it's above the
3831        // mmap_top, and this method is called early enough in the process lifetime that only the
3832        // main ELF and the interpreter are already loaded. However, in the rare case that the
3833        // static position-independent executable is overlapping the chosen address, mapping as Hint
3834        // will make mmap choose a new place for it.
3835        // TODO(https://fxbug.dev/370027241): Consider a more robust approach
3836        let stack_addr = self.map_anonymous(
3837            DesiredAddress::Hint(addr),
3838            length,
3839            prot_flags,
3840            MappingOptions::ANONYMOUS | MappingOptions::GROWSDOWN,
3841            MappingName::Stack,
3842        )?;
3843        if stack_addr != addr {
3844            log_warn!(
3845                "An address designated for stack ({}) was unavailable, mapping at {} instead.",
3846                addr,
3847                stack_addr
3848            );
3849        }
3850        Ok(stack_addr)
3851    }
3852
3853    pub fn remap(
3854        self: &Arc<Self>,
3855        current_task: &CurrentTask,
3856        addr: UserAddress,
3857        old_length: usize,
3858        new_length: usize,
3859        flags: MremapFlags,
3860        new_addr: UserAddress,
3861    ) -> Result<UserAddress, Errno> {
3862        let mut released_mappings = ReleasedMappings::default();
3863        // Hold the lock throughout the operation to uphold memory manager's invariants.
3864        // See mm/README.md.
3865        let mut state = self.state.write();
3866        let result = state.remap(
3867            current_task,
3868            self,
3869            addr,
3870            old_length,
3871            new_length,
3872            flags,
3873            new_addr,
3874            &mut released_mappings,
3875        );
3876
3877        released_mappings.finalize(state);
3878
3879        result
3880    }
3881
3882    pub fn unmap(self: &Arc<Self>, addr: UserAddress, length: usize) -> Result<(), Errno> {
3883        let mut released_mappings = ReleasedMappings::default();
3884        // Hold the lock throughout the operation to uphold memory manager's invariants.
3885        // See mm/README.md.
3886        let mut state = self.state.write();
3887        let result = state.unmap(self, addr, length, &mut released_mappings);
3888
3889        released_mappings.finalize(state);
3890
3891        result
3892    }
3893
3894    pub fn protect(
3895        &self,
3896        current_task: &CurrentTask,
3897        addr: UserAddress,
3898        length: usize,
3899        prot_flags: ProtectionFlags,
3900    ) -> Result<(), Errno> {
3901        let page_size = *PAGE_SIZE;
3902        if !addr.is_aligned(page_size) {
3903            return error!(EINVAL);
3904        }
3905        if length == 0 {
3906            return Ok(());
3907        }
3908        let end = addr.checked_add(length).ok_or_else(|| errno!(ENOMEM))?.round_up(page_size)?;
3909        if end > self.maximum_valid_user_address {
3910            return error!(ENOMEM);
3911        }
3912
3913        // Hold the lock throughout the operation to uphold memory manager's invariants.
3914        // See mm/README.md.
3915        let mut state = self.state.write();
3916        let mut released_mappings = ReleasedMappings::default();
3917        let result = state.protect(current_task, addr, length, prot_flags, &mut released_mappings);
3918        released_mappings.finalize(state);
3919        result
3920    }
3921
3922    pub fn msync(
3923        &self,
3924        _locked: &mut Locked<Unlocked>,
3925        current_task: &CurrentTask,
3926        addr: UserAddress,
3927        length: usize,
3928        flags: MsyncFlags,
3929    ) -> Result<(), Errno> {
3930        // According to POSIX, either MS_SYNC or MS_ASYNC must be specified in flags,
3931        // and indeed failure to include one of these flags will cause msync() to fail
3932        // on some systems.  However, Linux permits a call to msync() that specifies
3933        // neither of these flags, with semantics that are (currently) equivalent to
3934        // specifying MS_ASYNC.
3935
3936        // Both MS_SYNC and MS_ASYNC are set in flags
3937        if flags.contains(MsyncFlags::ASYNC) && flags.contains(MsyncFlags::SYNC) {
3938            return error!(EINVAL);
3939        }
3940
3941        if !addr.is_aligned(*PAGE_SIZE) {
3942            return error!(EINVAL);
3943        }
3944
3945        // We collect the nodes to sync first, release the memory manager lock, and then sync them.
3946        // This avoids holding the lock during blocking I/O operations (sync), which prevents
3947        // stalling other memory operations and avoids potential deadlocks.
3948        // It also allows us to deduplicate nodes, avoiding redundant sync calls for the same file.
3949        let mut nodes_to_sync = {
3950            let mm_state = self.state.read();
3951
3952            let length_rounded = round_up_to_system_page_size(length)?;
3953            let end_addr = addr.checked_add(length_rounded).ok_or_else(|| errno!(EINVAL))?;
3954
3955            let mut last_end = addr;
3956            let mut nodes = vec![];
3957            for (range, mapping) in mm_state.mappings.range(addr..end_addr) {
3958                // Check if there is a gap between the last mapped address and the current mapping.
3959                // msync requires the entire range to be mapped, so any gap results in ENOMEM.
3960                if range.start > last_end {
3961                    return error!(ENOMEM);
3962                }
3963                last_end = range.end;
3964
3965                if flags.contains(MsyncFlags::INVALIDATE)
3966                    && mapping.flags().contains(MappingFlags::LOCKED)
3967                {
3968                    return error!(EBUSY);
3969                }
3970
3971                if flags.contains(MsyncFlags::SYNC) {
3972                    if let MappingNameRef::File(file_mapping) = mapping.name() {
3973                        nodes.push(file_mapping.name.entry.node.clone());
3974                    }
3975                }
3976            }
3977            if last_end < end_addr {
3978                return error!(ENOMEM);
3979            }
3980            nodes
3981        };
3982
3983        // Deduplicate nodes to avoid redundant sync calls.
3984        nodes_to_sync.sort_by_key(|n| Arc::as_ptr(n) as usize);
3985        nodes_to_sync.dedup_by(|a, b| Arc::ptr_eq(a, b));
3986
3987        for node in nodes_to_sync {
3988            // Range-based sync is non-trivial for Fxfs to support due to its complicated
3989            // reservation system (b/322874588#comment5). Naive range-based sync could exhaust
3990            // space reservations if called page-by-page, as transaction costs are based on the
3991            // number of dirty pages rather than file ranges. We use whole-file sync for now
3992            // to ensure data durability without adding excessive complexity.
3993            node.ops().sync(&node, current_task)?;
3994        }
3995        Ok(())
3996    }
3997
3998    pub fn madvise(&self, addr: UserAddress, length: usize, advice: u32) -> Result<(), Errno> {
3999        let mut state = self.state.write();
4000        let mut released_mappings = ReleasedMappings::default();
4001        let result =
4002            state.madvise(&self.mapping_context, addr, length, advice, &mut released_mappings);
4003        released_mappings.finalize(state);
4004        result
4005    }
4006
4007    pub fn mlock<L>(
4008        &self,
4009        current_task: &CurrentTask,
4010        locked: &mut Locked<L>,
4011        desired_addr: UserAddress,
4012        desired_length: usize,
4013        on_fault: bool,
4014    ) -> Result<(), Errno>
4015    where
4016        L: LockBefore<ThreadGroupLimits>,
4017    {
4018        let mut state = self.state.write();
4019        let mut released_mappings = ReleasedMappings::default();
4020        let result = state.mlock(
4021            &self.mapping_context,
4022            current_task,
4023            locked,
4024            desired_addr,
4025            desired_length,
4026            on_fault,
4027            &mut released_mappings,
4028        );
4029        released_mappings.finalize(state);
4030        result
4031    }
4032
4033    pub fn munlock(
4034        &self,
4035        current_task: &CurrentTask,
4036        desired_addr: UserAddress,
4037        desired_length: usize,
4038    ) -> Result<(), Errno> {
4039        let mut state = self.state.write();
4040        let mut released_mappings = ReleasedMappings::default();
4041        let result =
4042            state.munlock(current_task, desired_addr, desired_length, &mut released_mappings);
4043        released_mappings.finalize(state);
4044        result
4045    }
4046
4047    pub fn log_memory_map(&self, task: &Task, fault_address: UserAddress) {
4048        let state = self.state.read();
4049        log_warn!("Memory map for pid={}:", task.thread_group.leader);
4050        let mut last_end = UserAddress::from_ptr(0);
4051        for (range, map) in state.mappings.iter() {
4052            if fault_address >= last_end && fault_address < range.start {
4053                log_warn!("{:08x} <= FAULT", fault_address.ptr());
4054            }
4055
4056            let perms = format!(
4057                "{}{}{}{}",
4058                if map.can_read() { 'r' } else { '-' },
4059                if map.can_write() { 'w' } else { '-' },
4060                if map.can_exec() { 'x' } else { '-' },
4061                if map.flags().contains(MappingFlags::SHARED) { 's' } else { 'p' }
4062            );
4063
4064            let backing = match state.get_mapping_backing(map) {
4065                MappingBacking::Memory(backing) => backing.address_to_offset(range.start),
4066                MappingBacking::PrivateAnonymous => 0,
4067            };
4068
4069            let name_str = match &map.name() {
4070                MappingNameRef::File(file) => {
4071                    let Ok(running_state) = task.running_state() else {
4072                        log_warn!("Task {} is not running", task.get_tid());
4073                        continue;
4074                    };
4075                    String::from_utf8_lossy(&file.name.path(&running_state.fs())).into_owned()
4076                }
4077                MappingNameRef::None | MappingNameRef::AioContext(_) => {
4078                    if map.flags().contains(MappingFlags::SHARED)
4079                        && map.flags().contains(MappingFlags::ANONYMOUS)
4080                    {
4081                        "/dev/zero (deleted)".to_string()
4082                    } else {
4083                        "".to_string()
4084                    }
4085                }
4086                MappingNameRef::Stack => "[stack]".to_string(),
4087                MappingNameRef::Heap => "[heap]".to_string(),
4088                MappingNameRef::Vdso => "[vdso]".to_string(),
4089                MappingNameRef::Vvar => "[vvar]".to_string(),
4090                _ => format!("{:?}", map.name()),
4091            };
4092
4093            let fault_marker = if range.contains(&fault_address) { " <= FAULT" } else { "" };
4094
4095            log_warn!(
4096                "{:08x}-{:08x} {} {:08x} {}{}",
4097                range.start.ptr(),
4098                range.end.ptr(),
4099                perms,
4100                backing,
4101                name_str,
4102                fault_marker
4103            );
4104            last_end = range.end;
4105        }
4106
4107        if fault_address >= last_end {
4108            log_warn!("{:08x} <= FAULT", fault_address.ptr());
4109        }
4110    }
4111
4112    pub fn handle_page_fault(
4113        self: &Arc<Self>,
4114        locked: &mut Locked<Unlocked>,
4115        decoded: PageFaultExceptionReport,
4116        error_code: zx::Status,
4117    ) -> ExceptionResult {
4118        #[cfg(target_arch = "aarch64")]
4119        // On aarch64, 64-bit processes can use Top Byte Ignore (TBI). We need to mask out the
4120        // top byte of the faulting address to get the actual userspace address.
4121        let addr = if self.is_arch64() {
4122            UserAddress::from(decoded.faulting_address & 0x00FF_FFFF_FFFF_FFFF)
4123        } else {
4124            UserAddress::from(decoded.faulting_address)
4125        };
4126        #[cfg(not(target_arch = "aarch64"))]
4127        let addr = UserAddress::from(decoded.faulting_address);
4128
4129        // On uffd-registered range, handle according to the uffd rules
4130        if error_code == zx::Status::ACCESS_DENIED {
4131            let state = self.state.write();
4132            if let Some((_, mapping)) = state.mappings.get(addr) {
4133                if mapping.flags().contains(MappingFlags::UFFD) {
4134                    // TODO(https://fxbug.dev/391599171): Support other modes
4135                    assert!(mapping.flags().contains(MappingFlags::UFFD_MISSING));
4136
4137                    if let Some(_uffd) = state.find_uffd(locked, addr) {
4138                        // If the SIGBUS feature was set, no event will be sent to the file.
4139                        // Instead, SIGBUS is delivered to the process that triggered the fault.
4140                        // TODO(https://fxbug.dev/391599171): For now we only support this feature,
4141                        // so we assume it is set.
4142                        // Check for the SIGBUS feature when we start supporting running without it.
4143                        return ExceptionResult::Signal(SignalInfo::with_detail(
4144                            SIGBUS,
4145                            BUS_ADRERR as i32,
4146                            SignalDetail::SigFault { addr: decoded.faulting_address },
4147                        ));
4148                    };
4149                }
4150                // There is a data race resulting from uffd unregistration and page fault happening
4151                // at the same time. To detect it, we check if the access was meant to be rejected
4152                // according to Starnix own information about the mapping.
4153                if check_access_permissions_in_page_fault(&decoded, mapping) {
4154                    track_stub!(
4155                        TODO("https://fxbug.dev/435171399"),
4156                        "Inconsistent permission fault"
4157                    );
4158                    return ExceptionResult::Handled;
4159                }
4160            }
4161            std::mem::drop(state);
4162        }
4163
4164        if decoded.not_present {
4165            {
4166                let mut state = self.state.write();
4167                match state.ensure_range_mapped_in_user_vmar(addr, None, &self.mapping_context) {
4168                    Ok(true) => return ExceptionResult::Handled,
4169                    Ok(false) => {
4170                        // If the mapping generation has changed since the last time this thread
4171                        // saw it, we return `Handled` to retry the faulting instruction.
4172                        // This handles cases where the fault was spurious due to a concurrent
4173                        // mapping operation. We update the counter here to ensure we converge and
4174                        // don't loop infinitely.
4175                        let current_gen = state.mappings.generation;
4176                        let old_gen = LAST_SEEN_MAPPING_GENERATION.with(|c| c.replace(current_gen));
4177                        if current_gen != old_gen {
4178                            return ExceptionResult::Handled;
4179                        }
4180                    }
4181                    Err(e) => {
4182                        log_error!("Failed to map lazy memory: {e}")
4183                    }
4184                }
4185            }
4186
4187            // A page fault may be resolved by extending a growsdown mapping to cover the faulting
4188            // address. Mark the exception handled if so. Otherwise let the regular handling proceed.
4189
4190            // We should only attempt growth on a not-present fault and we should only extend if the
4191            // access type matches the protection on the GROWSDOWN mapping.
4192            match self.extend_growsdown_mapping_to_address(
4193                UserAddress::from(decoded.faulting_address),
4194                decoded.is_write,
4195            ) {
4196                Ok(true) => {
4197                    return ExceptionResult::Handled;
4198                }
4199                Err(e) => {
4200                    log_warn!("Error handling page fault: {e}")
4201                }
4202                _ => {}
4203            }
4204        }
4205
4206        // For this exception type, the synth_code field in the exception report's context is the
4207        // error generated by the page fault handler. For us this is used to distinguish between a
4208        // segmentation violation and a bus error. Unfortunately this detail is not documented in
4209        // Zircon's public documentation and is only described in the architecture-specific
4210        // exception definitions such as:
4211        // zircon/kernel/arch/x86/include/arch/x86.h
4212        // zircon/kernel/arch/arm64/include/arch/arm64.h
4213        let (signo, si_code) = match error_code {
4214            zx::Status::OUT_OF_RANGE => (SIGBUS, linux_uapi::BUS_ADRERR as i32),
4215            _ => {
4216                let code = if self.state.read().mappings.get(addr).is_some() {
4217                    linux_uapi::SEGV_ACCERR
4218                } else {
4219                    linux_uapi::SEGV_MAPERR
4220                };
4221                (SIGSEGV, code as i32)
4222            }
4223        };
4224        ExceptionResult::Signal(SignalInfo::with_detail(
4225            signo,
4226            si_code,
4227            SignalDetail::SigFault { addr: decoded.faulting_address },
4228        ))
4229    }
4230
4231    pub fn set_mapping_name(
4232        &self,
4233        addr: UserAddress,
4234        length: usize,
4235        name: Option<FsString>,
4236    ) -> Result<(), Errno> {
4237        let mut state = self.state.write();
4238        let mut released_mappings = ReleasedMappings::default();
4239        let result = state.set_mapping_name(addr, length, name, &mut released_mappings);
4240        released_mappings.finalize(state);
4241        result
4242    }
4243
4244    /// Returns [`Ok`] if the entire range specified by `addr..(addr+length)` contains valid
4245    /// mappings.
4246    ///
4247    /// # Errors
4248    ///
4249    /// Returns [`Err(errno)`] where `errno` is:
4250    ///
4251    ///   - `EINVAL`: `addr` is not page-aligned, or the range is too large,
4252    ///   - `ENOMEM`: one or more pages in the range are not mapped.
4253    pub fn ensure_mapped(&self, addr: UserAddress, length: usize) -> Result<(), Errno> {
4254        if !addr.is_aligned(*PAGE_SIZE) {
4255            return error!(EINVAL);
4256        }
4257
4258        let length = round_up_to_system_page_size(length)?;
4259        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
4260        let state = self.state.read();
4261        let mut last_end = addr;
4262        for (range, _) in state.mappings.range(addr..end_addr) {
4263            if range.start > last_end {
4264                // This mapping does not start immediately after the last.
4265                return error!(ENOMEM);
4266            }
4267            last_end = range.end;
4268        }
4269        if last_end < end_addr {
4270            // There is a gap of no mappings at the end of the range.
4271            error!(ENOMEM)
4272        } else {
4273            Ok(())
4274        }
4275    }
4276
4277    /// Returns the memory object mapped at the address and the offset into the memory object of
4278    /// the address. Intended for implementing futexes.
4279    pub fn get_mapping_memory(
4280        &self,
4281        addr: UserAddress,
4282        perms: ProtectionFlags,
4283    ) -> Result<(Arc<MemoryObject>, u64), Errno> {
4284        let state = self.state.read();
4285        let (_, mapping) = state.mappings.get(addr).ok_or_else(|| errno!(EFAULT))?;
4286        if !mapping.flags().access_flags().contains(perms) {
4287            return error!(EACCES);
4288        }
4289        match state.get_mapping_backing(mapping) {
4290            MappingBacking::Memory(backing) => {
4291                Ok((Arc::clone(backing.memory()), mapping.address_to_offset(addr)))
4292            }
4293            MappingBacking::PrivateAnonymous => {
4294                Ok((Arc::clone(&self.mapping_context.private_anonymous.backing), addr.ptr() as u64))
4295            }
4296        }
4297    }
4298
4299    /// Does a rough check that the given address is plausibly in the address space of the
4300    /// application. This does not mean the pointer is valid for any particular purpose or that
4301    /// it will remain so!
4302    ///
4303    /// In some syscalls, Linux seems to do some initial validation of the pointer up front to
4304    /// tell the caller early if it's invalid. For example, in epoll_wait() it's returning a vector
4305    /// of events. If the caller passes an invalid pointer, it wants to fail without dropping any
4306    /// events. Failing later when actually copying the required events to userspace would mean
4307    /// those events will be lost. But holding a lock on the memory manager for an asynchronous
4308    /// wait is not desirable.
4309    ///
4310    /// Testing shows that Linux seems to do some initial plausibility checking of the pointer to
4311    /// be able to report common usage errors before doing any (possibly unreversable) work. This
4312    /// checking is easy to get around if you try, so this function is also not required to
4313    /// be particularly robust. Certainly the more advanced cases of races (the memory could be
4314    /// unmapped after this call but before it's used) are not handled.
4315    ///
4316    /// The buffer_size variable is the size of the data structure that needs to fit
4317    /// in the given memory.
4318    ///
4319    /// Returns the error EFAULT if invalid.
4320    pub fn check_plausible(&self, addr: UserAddress, buffer_size: usize) -> Result<(), Errno> {
4321        let state = self.state.read();
4322
4323        if let Some(range) = state.mappings.last_range() {
4324            if (range.end - buffer_size)? >= addr {
4325                return Ok(());
4326            }
4327        }
4328        error!(EFAULT)
4329    }
4330
4331    pub fn get_aio_context(&self, addr: UserAddress) -> Option<Arc<AioContext>> {
4332        let state = self.state.read();
4333        state.get_aio_context(addr).map(|(_, aio_context)| aio_context)
4334    }
4335
4336    pub fn destroy_aio_context(
4337        self: &Arc<Self>,
4338        addr: UserAddress,
4339    ) -> Result<Arc<AioContext>, Errno> {
4340        let mut released_mappings = ReleasedMappings::default();
4341
4342        // Hold the lock throughout the operation to uphold memory manager's invariants.
4343        // See mm/README.md.
4344        let mut state = self.state.write();
4345
4346        // Validate that this address actually has an AioContext. We need to hold the state lock
4347        // until we actually remove the mappings to ensure that another thread does not manipulate
4348        // the mappings after we've validated that they contain an AioContext.
4349        let Some((range, aio_context)) = state.get_aio_context(addr) else {
4350            return error!(EINVAL);
4351        };
4352
4353        let length = range.end - range.start;
4354        let result = state.unmap(self, range.start, length, &mut released_mappings);
4355
4356        released_mappings.finalize(state);
4357
4358        result.map(|_| aio_context)
4359    }
4360
4361    #[cfg(test)]
4362    pub fn get_mapping_name(
4363        &self,
4364        addr: UserAddress,
4365    ) -> Result<Option<flyweights::FlyByteStr>, Errno> {
4366        let state = self.state.read();
4367        let (_, mapping) = state.mappings.get(addr).ok_or_else(|| errno!(EFAULT))?;
4368        if let MappingNameRef::Vma(name) = mapping.name() {
4369            Ok(Some(name.clone()))
4370        } else {
4371            Ok(None)
4372        }
4373    }
4374
4375    #[cfg(test)]
4376    pub fn get_mapping_count(&self) -> usize {
4377        let state = self.state.read();
4378        state.mappings.iter().count()
4379    }
4380
4381    pub fn extend_growsdown_mapping_to_address(
4382        self: &Arc<Self>,
4383        addr: UserAddress,
4384        is_write: bool,
4385    ) -> Result<bool, Error> {
4386        self.state.write().extend_growsdown_mapping_to_address(self, addr, is_write)
4387    }
4388
4389    pub fn get_total_usage(&self) -> usize {
4390        self.state.read().mappings.total_usage
4391    }
4392
4393    pub fn get_stats(&self, current_task: &CurrentTask) -> MemoryStats {
4394        // Grab our state lock before reading zircon mappings so that the two are consistent.
4395        // Other Starnix threads should not make any changes to the Zircon mappings while we hold
4396        // a read lock to the memory manager state.
4397        let state = self.state.read();
4398
4399        let mut stats = MemoryStats::default();
4400        stats.vm_stack = state.stack_size;
4401
4402        self.with_zx_mappings(current_task, |zx_mappings| {
4403            for zx_mapping in zx_mappings {
4404                // We only care about map info for actual mappings.
4405                let zx_details = zx_mapping.details();
4406                let Some(zx_details) = zx_details.as_mapping() else { continue };
4407                let user_address = UserAddress::from(zx_mapping.base as u64);
4408                let (_, mm_mapping) = state
4409                    .mappings
4410                    .get(user_address)
4411                    .unwrap_or_else(|| panic!("mapping bookkeeping must be consistent with zircon's: not found: {user_address:?}"));
4412                debug_assert_eq!(
4413                    match state.get_mapping_backing(mm_mapping) {
4414                        MappingBacking::Memory(m)=>m.memory().get_koid(),
4415                        MappingBacking::PrivateAnonymous=>self.mapping_context.private_anonymous.backing.get_koid(),
4416                    },
4417                    zx_details.vmo_koid,
4418                    "MemoryManager and Zircon must agree on which VMO is mapped in this range",
4419                );
4420
4421                stats.vm_size += zx_mapping.size;
4422
4423                stats.vm_rss += zx_details.committed_bytes;
4424                stats.vm_swap += zx_details.populated_bytes - zx_details.committed_bytes;
4425
4426                if mm_mapping.flags().contains(MappingFlags::SHARED) {
4427                    stats.rss_shared += zx_details.committed_bytes;
4428                } else if mm_mapping.flags().contains(MappingFlags::ANONYMOUS) {
4429                    stats.rss_anonymous += zx_details.committed_bytes;
4430                } else if mm_mapping.name().is_file() {
4431                    stats.rss_file += zx_details.committed_bytes;
4432                }
4433
4434                if mm_mapping.flags().contains(MappingFlags::LOCKED) {
4435                    stats.vm_lck += zx_details.committed_bytes;
4436                }
4437
4438                if mm_mapping.flags().contains(MappingFlags::ELF_BINARY)
4439                    && mm_mapping.flags().contains(MappingFlags::WRITE)
4440                {
4441                    stats.vm_data += zx_mapping.size;
4442                }
4443
4444                if mm_mapping.flags().contains(MappingFlags::ELF_BINARY)
4445                    && mm_mapping.flags().contains(MappingFlags::EXEC)
4446                {
4447                    stats.vm_exe += zx_mapping.size;
4448                }
4449            }
4450        });
4451
4452        // TODO(https://fxbug.dev/396221597): Placeholder for now. We need kernel support to track
4453        // the committed bytes high water mark.
4454        stats.vm_rss_hwm = STUB_VM_RSS_HWM;
4455        stats
4456    }
4457
4458    fn run_atomic_op<F, T>(&self, futex_addr: FutexAddress, mut op: F) -> Result<T, Errno>
4459    where
4460        F: FnMut(&usercopy::Usercopy) -> Result<T, ()>,
4461    {
4462        if let Some(usercopy) = usercopy() {
4463            // Try the lock-free fast path first.
4464            // Note: `op` returns `Err(())` strictly on memory access faults. For
4465            // compare-exchange operations, a logical mismatch is wrapped inside a
4466            // successful `Ok(value_or_error)`, meaning we will short-circuit here
4467            // and won't incorrectly retry on logical failures.
4468            if let Ok(val) = op(usercopy) {
4469                return Ok(val);
4470            }
4471            self.ensure_range_mapped_in_user_vmar(futex_addr.into(), None)?;
4472            op(usercopy).map_err(|_| errno!(EFAULT))
4473        } else {
4474            unreachable!("can only control memory ordering of atomics with usercopy");
4475        }
4476    }
4477
4478    pub fn atomic_load_u32_acquire(&self, futex_addr: FutexAddress) -> Result<u32, Errno> {
4479        self.run_atomic_op(futex_addr, |uc| uc.atomic_load_u32_acquire(futex_addr.ptr()))
4480    }
4481
4482    pub fn atomic_load_u32_relaxed(&self, futex_addr: FutexAddress) -> Result<u32, Errno> {
4483        if usercopy().is_some() {
4484            self.run_atomic_op(futex_addr, |uc| uc.atomic_load_u32_relaxed(futex_addr.ptr()))
4485        } else {
4486            // SAFETY: `self.state.read().read_memory` only returns `Ok` if all
4487            // bytes were read to.
4488            let buf = unsafe {
4489                read_to_array(|buf| {
4490                    self.state
4491                        .read()
4492                        .read_memory(futex_addr.into(), buf, &self.mapping_context)
4493                        .map(|bytes_read| {
4494                            debug_assert_eq!(bytes_read.len(), std::mem::size_of::<u32>())
4495                        })
4496                })
4497            }?;
4498            Ok(u32::from_ne_bytes(buf))
4499        }
4500    }
4501
4502    pub fn atomic_store_u32_relaxed(
4503        &self,
4504        futex_addr: FutexAddress,
4505        value: u32,
4506    ) -> Result<(), Errno> {
4507        if usercopy().is_some() {
4508            self.run_atomic_op(futex_addr, |uc| {
4509                uc.atomic_store_u32_relaxed(futex_addr.ptr(), value)
4510            })
4511        } else {
4512            self.state.read().write_memory(
4513                futex_addr.into(),
4514                value.as_bytes(),
4515                &self.mapping_context,
4516            )?;
4517            Ok(())
4518        }
4519    }
4520
4521    pub fn atomic_compare_exchange_u32_acq_rel(
4522        &self,
4523        futex_addr: FutexAddress,
4524        current: u32,
4525        new: u32,
4526    ) -> CompareExchangeResult<u32> {
4527        CompareExchangeResult::from_usercopy(self.run_atomic_op(futex_addr, |uc| {
4528            uc.atomic_compare_exchange_u32_acq_rel(futex_addr.ptr(), current, new)
4529        }))
4530    }
4531
4532    pub fn atomic_compare_exchange_weak_u32_acq_rel(
4533        &self,
4534        futex_addr: FutexAddress,
4535        current: u32,
4536        new: u32,
4537    ) -> CompareExchangeResult<u32> {
4538        CompareExchangeResult::from_usercopy(self.run_atomic_op(futex_addr, |uc| {
4539            uc.atomic_compare_exchange_weak_u32_acq_rel(futex_addr.ptr(), current, new)
4540        }))
4541    }
4542}
4543
4544/// The result of an atomic compare/exchange operation on user memory.
4545#[derive(Debug, Clone)]
4546pub enum CompareExchangeResult<T> {
4547    /// The current value provided matched the one observed in memory and the new value provided
4548    /// was written.
4549    Success,
4550    /// The provided current value did not match the current value in memory.
4551    Stale { observed: T },
4552    /// There was a general error while accessing the requested memory.
4553    Error(Errno),
4554}
4555
4556impl<T> CompareExchangeResult<T> {
4557    fn from_usercopy(res: Result<Result<T, T>, Errno>) -> Self {
4558        match res {
4559            Ok(Ok(_)) => Self::Success,
4560            Ok(Err(observed)) => Self::Stale { observed },
4561            Err(e) => Self::Error(e),
4562        }
4563    }
4564}
4565
4566impl<T> From<Errno> for CompareExchangeResult<T> {
4567    fn from(e: Errno) -> Self {
4568        Self::Error(e)
4569    }
4570}
4571
4572/// The user-space address at which a mapping should be placed. Used by [`MemoryManager::map`].
4573#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4574pub enum DesiredAddress {
4575    /// Map at any address chosen by the kernel.
4576    Any,
4577    /// The address is a hint. If the address overlaps an existing mapping a different address may
4578    /// be chosen.
4579    Hint(UserAddress),
4580    /// The address is a requirement. If the address overlaps an existing mapping (and cannot
4581    /// overwrite it), mapping fails.
4582    Fixed(UserAddress),
4583    /// The address is a requirement. If the address overlaps an existing mapping (and cannot
4584    /// overwrite it), they should be unmapped.
4585    FixedOverwrite(UserAddress),
4586}
4587
4588/// The user-space address at which a mapping should be placed. Used by [`map_in_vmar`].
4589#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4590enum SelectedAddress {
4591    /// See DesiredAddress::Fixed.
4592    Fixed(UserAddress),
4593    /// See DesiredAddress::FixedOverwrite.
4594    FixedOverwrite(UserAddress),
4595}
4596
4597impl SelectedAddress {
4598    fn addr(&self) -> UserAddress {
4599        match self {
4600            SelectedAddress::Fixed(addr) => *addr,
4601            SelectedAddress::FixedOverwrite(addr) => *addr,
4602        }
4603    }
4604}
4605
4606/// Write one line of the memory map intended for adding to `/proc/self/maps`.
4607fn write_map(
4608    task: &Task,
4609    sink: &mut DynamicFileBuf,
4610    state: &MemoryManagerState,
4611    range: &Range<UserAddress>,
4612    map: &Mapping,
4613) -> Result<(), Errno> {
4614    let line_length = write!(
4615        sink,
4616        "{:08x}-{:08x} {}{}{}{} {:08x} 00:00 {} ",
4617        range.start.ptr(),
4618        range.end.ptr(),
4619        if map.can_read() { 'r' } else { '-' },
4620        if map.can_write() { 'w' } else { '-' },
4621        if map.can_exec() { 'x' } else { '-' },
4622        if map.flags().contains(MappingFlags::SHARED) { 's' } else { 'p' },
4623        match state.get_mapping_backing(map) {
4624            MappingBacking::Memory(backing) => backing.address_to_offset(range.start),
4625            MappingBacking::PrivateAnonymous => 0,
4626        },
4627        if let MappingNameRef::File(file) = &map.name() { file.name.entry.node.ino } else { 0 }
4628    )?;
4629    let fill_to_name = |sink: &mut DynamicFileBuf| {
4630        // The filename goes at >= the 74th column (73rd when zero indexed)
4631        for _ in line_length..73 {
4632            sink.write(b" ");
4633        }
4634    };
4635    match &map.name() {
4636        MappingNameRef::None | MappingNameRef::AioContext(_) => {
4637            if map.flags().contains(MappingFlags::SHARED)
4638                && map.flags().contains(MappingFlags::ANONYMOUS)
4639            {
4640                // See proc(5), "/proc/[pid]/map_files/"
4641                fill_to_name(sink);
4642                sink.write(b"/dev/zero (deleted)");
4643            }
4644        }
4645        MappingNameRef::Stack => {
4646            fill_to_name(sink);
4647            sink.write(b"[stack]");
4648        }
4649        MappingNameRef::Heap => {
4650            fill_to_name(sink);
4651            sink.write(b"[heap]");
4652        }
4653        MappingNameRef::Vdso => {
4654            fill_to_name(sink);
4655            sink.write(b"[vdso]");
4656        }
4657        MappingNameRef::Vvar => {
4658            fill_to_name(sink);
4659            sink.write(b"[vvar]");
4660        }
4661        MappingNameRef::File(file) => {
4662            fill_to_name(sink);
4663            // File names can have newlines that need to be escaped before printing.
4664            // According to https://man7.org/linux/man-pages/man5/proc.5.html the only
4665            // escaping applied to paths is replacing newlines with an octal sequence.
4666            let path = file.name.path(&task.running_state()?.fs());
4667            sink.write_iter(
4668                path.iter()
4669                    .flat_map(|b| if *b == b'\n' { b"\\012" } else { std::slice::from_ref(b) })
4670                    .copied(),
4671            );
4672        }
4673        MappingNameRef::Vma(name) => {
4674            fill_to_name(sink);
4675            sink.write(b"[anon:");
4676            sink.write(name.as_bytes());
4677            sink.write(b"]");
4678        }
4679        MappingNameRef::Ashmem(name) => {
4680            fill_to_name(sink);
4681            sink.write(b"/dev/ashmem/");
4682            sink.write(name.as_bytes());
4683        }
4684    }
4685    sink.write(b"\n");
4686    Ok(())
4687}
4688
4689#[derive(Default)]
4690pub struct MemoryStats {
4691    pub vm_size: usize,
4692    pub vm_rss: usize,
4693    pub vm_rss_hwm: usize,
4694    pub rss_anonymous: usize,
4695    pub rss_file: usize,
4696    pub rss_shared: usize,
4697    pub vm_data: usize,
4698    pub vm_stack: usize,
4699    pub vm_exe: usize,
4700    pub vm_swap: usize,
4701    pub vm_lck: usize,
4702}
4703
4704/// Implements `/proc/self/maps`.
4705#[derive(Clone)]
4706pub struct ProcMapsFile {
4707    mm: Weak<MemoryManager>,
4708    task: Weak<Task>,
4709}
4710impl ProcMapsFile {
4711    pub fn new(task: Arc<Task>) -> DynamicFile<Self> {
4712        // "maps" is empty for kthreads, rather than inaccessible.
4713        let mm = task.mm().map_or_else(|_| Weak::default(), |mm| Arc::downgrade(&mm));
4714        DynamicFile::new(Self { mm, task: Arc::downgrade(&task) })
4715    }
4716}
4717
4718impl SequenceFileSource for ProcMapsFile {
4719    type Cursor = UserAddress;
4720
4721    fn next(
4722        &self,
4723        _current_task: &CurrentTask,
4724        cursor: UserAddress,
4725        sink: &mut DynamicFileBuf,
4726    ) -> Result<Option<UserAddress>, Errno> {
4727        let task = Task::from_weak(&self.task)?;
4728        // /proc/<pid>/maps is empty for kthreads and tasks whose memory manager has changed.
4729        let Some(mm) = self.mm.upgrade() else {
4730            return Ok(None);
4731        };
4732        let state = mm.state.read();
4733        if let Some((range, map)) = state.mappings.find_at_or_after(cursor) {
4734            write_map(&task, sink, &state, range, map)?;
4735            return Ok(Some(range.end));
4736        }
4737        Ok(None)
4738    }
4739}
4740
4741#[derive(Clone)]
4742pub struct ProcSmapsFile {
4743    mm: Weak<MemoryManager>,
4744    task: Weak<Task>,
4745}
4746impl ProcSmapsFile {
4747    pub fn new(task: Arc<Task>) -> DynamicFile<Self> {
4748        // "smaps" is empty for kthreads, rather than inaccessible.
4749        let mm = task.mm().map_or_else(|_| Weak::default(), |mm| Arc::downgrade(&mm));
4750        DynamicFile::new(Self { mm, task: Arc::downgrade(&task) })
4751    }
4752}
4753
4754impl DynamicFileSource for ProcSmapsFile {
4755    fn generate(&self, current_task: &CurrentTask, sink: &mut DynamicFileBuf) -> Result<(), Errno> {
4756        let page_size_kb = *PAGE_SIZE / 1024;
4757        let task = Task::from_weak(&self.task)?;
4758        // /proc/<pid>/smaps is empty for kthreads and tasks whose memory manager has changed.
4759        let Some(mm) = self.mm.upgrade() else {
4760            return Ok(());
4761        };
4762
4763        // Ensure all mappings are mapped into the user vmar.
4764        let max_addr = mm.maximum_valid_user_address;
4765        mm.ensure_range_mapped_in_user_vmar(UserAddress::from(0), Some(max_addr.ptr()))?;
4766
4767        let state = mm.state.read();
4768        let committed_bytes_vec = mm.with_zx_mappings(current_task, |zx_mappings| {
4769            let mut zx_memory_info = RangeMap::<UserAddress, usize>::default();
4770            for idx in 0..zx_mappings.len() {
4771                let zx_mapping = zx_mappings[idx];
4772                // RangeMap uses #[must_use] for its default usecase but this drop is trivial.
4773                let _ = zx_memory_info.insert(
4774                    UserAddress::from_ptr(zx_mapping.base)
4775                        ..UserAddress::from_ptr(zx_mapping.base + zx_mapping.size),
4776                    idx,
4777                );
4778            }
4779
4780            let mut committed_bytes_vec = Vec::new();
4781            for (mm_range, mm_mapping) in state.mappings.iter() {
4782                let mut committed_bytes = 0;
4783
4784                for (zx_range, zx_mapping_idx) in zx_memory_info.range(mm_range.clone()) {
4785                    let intersect_range = zx_range.intersect(mm_range);
4786                    let zx_mapping = zx_mappings[*zx_mapping_idx];
4787                    let zx_details = zx_mapping.details();
4788                    let Some(zx_details) = zx_details.as_mapping() else { continue };
4789                    let zx_committed_bytes = zx_details.committed_bytes;
4790
4791                    // TODO(https://fxbug.dev/419882465): It can happen that the same Zircon mapping
4792                    // is covered by more than one Starnix mapping. In this case we don't have
4793                    // enough granularity to answer the question of how many committed bytes belong
4794                    // to one mapping or another. Make a best-effort approximation by dividing the
4795                    // committed bytes of a Zircon mapping proportionally.
4796                    committed_bytes += if intersect_range != *zx_range {
4797                        let intersection_size =
4798                            intersect_range.end.ptr() - intersect_range.start.ptr();
4799                        let part = intersection_size as f32 / zx_mapping.size as f32;
4800                        let prorated_committed_bytes: f32 = part * zx_committed_bytes as f32;
4801                        prorated_committed_bytes as u64
4802                    } else {
4803                        zx_committed_bytes as u64
4804                    };
4805                    assert_eq!(
4806                        match state.get_mapping_backing(mm_mapping) {
4807                            MappingBacking::Memory(m) => m.memory().get_koid(),
4808                            MappingBacking::PrivateAnonymous =>
4809                                mm.mapping_context.private_anonymous.backing.get_koid(),
4810                        },
4811                        zx_details.vmo_koid,
4812                        "MemoryManager and Zircon must agree on which VMO is mapped in this range",
4813                    );
4814                }
4815                committed_bytes_vec.push(committed_bytes);
4816            }
4817            Ok(committed_bytes_vec)
4818        })?;
4819
4820        for ((mm_range, mm_mapping), committed_bytes) in
4821            state.mappings.iter().zip(committed_bytes_vec.into_iter())
4822        {
4823            write_map(&task, sink, &state, mm_range, mm_mapping)?;
4824
4825            let size_kb = (mm_range.end.ptr() - mm_range.start.ptr()) / 1024;
4826            writeln!(sink, "Size:           {size_kb:>8} kB",)?;
4827            let share_count = match state.get_mapping_backing(mm_mapping) {
4828                MappingBacking::Memory(backing) => {
4829                    let memory = backing.memory();
4830                    if memory.is_clock() {
4831                        // Clock memory mappings are not shared in a meaningful way.
4832                        1
4833                    } else {
4834                        let memory_info = backing.memory().info()?;
4835                        memory_info.share_count as u64
4836                    }
4837                }
4838                MappingBacking::PrivateAnonymous => {
4839                    1 // Private mapping
4840                }
4841            };
4842
4843            let rss_kb = committed_bytes / 1024;
4844            writeln!(sink, "Rss:            {rss_kb:>8} kB")?;
4845
4846            let pss_kb = if mm_mapping.flags().contains(MappingFlags::SHARED) {
4847                rss_kb / share_count
4848            } else {
4849                rss_kb
4850            };
4851            writeln!(sink, "Pss:            {pss_kb:>8} kB")?;
4852
4853            track_stub!(TODO("https://fxbug.dev/322874967"), "smaps dirty pages");
4854            let (shared_dirty_kb, private_dirty_kb) = (0, 0);
4855
4856            let is_shared = share_count > 1;
4857            let shared_clean_kb = if is_shared { rss_kb } else { 0 };
4858            writeln!(sink, "Shared_Clean:   {shared_clean_kb:>8} kB")?;
4859            writeln!(sink, "Shared_Dirty:   {shared_dirty_kb:>8} kB")?;
4860
4861            let private_clean_kb = if is_shared { 0 } else { rss_kb };
4862            writeln!(sink, "Private_Clean:  {private_clean_kb:>8} kB")?;
4863            writeln!(sink, "Private_Dirty:  {private_dirty_kb:>8} kB")?;
4864
4865            let anonymous_kb = if mm_mapping.private_anonymous() { rss_kb } else { 0 };
4866            writeln!(sink, "Anonymous:      {anonymous_kb:>8} kB")?;
4867            writeln!(sink, "KernelPageSize: {page_size_kb:>8} kB")?;
4868            writeln!(sink, "MMUPageSize:    {page_size_kb:>8} kB")?;
4869
4870            let locked_kb =
4871                if mm_mapping.flags().contains(MappingFlags::LOCKED) { rss_kb } else { 0 };
4872            writeln!(sink, "Locked:         {locked_kb:>8} kB")?;
4873            writeln!(sink, "VmFlags: {}", mm_mapping.vm_flags())?;
4874
4875            track_stub!(TODO("https://fxbug.dev/297444691"), "optional smaps fields");
4876        }
4877
4878        Ok(())
4879    }
4880}
4881
4882/// Creates a memory object that can be used in an anonymous mapping for the `mmap` syscall.
4883pub fn create_anonymous_mapping_memory(size: u64) -> Result<Arc<MemoryObject>, Errno> {
4884    // mremap can grow memory regions, so make sure the memory object is resizable.
4885    let mut memory = MemoryObject::from(
4886        zx::Vmo::create_with_opts(zx::VmoOptions::RESIZABLE, size).map_err(|s| match s {
4887            zx::Status::NO_MEMORY => errno!(ENOMEM),
4888            zx::Status::OUT_OF_RANGE => errno!(ENOMEM),
4889            _ => impossible_error(s),
4890        })?,
4891    )
4892    .with_zx_name(b"starnix:memory_manager");
4893
4894    memory.set_zx_name(b"starnix-anon");
4895
4896    // TODO(https://fxbug.dev/42056890): Audit replace_as_executable usage
4897    memory = memory.replace_as_executable(&VMEX_RESOURCE).map_err(impossible_error)?;
4898    Ok(Arc::new(memory))
4899}
4900
4901fn generate_random_offset_for_aslr(arch_width: ArchWidth) -> usize {
4902    // Generate a number with ASLR_RANDOM_BITS.
4903    let randomness = {
4904        let random_bits =
4905            if arch_width.is_arch32() { ASLR_32_RANDOM_BITS } else { ASLR_RANDOM_BITS };
4906        let mask = (1 << random_bits) - 1;
4907        let mut bytes = [0; std::mem::size_of::<usize>()];
4908        starnix_crypto::cprng_draw(&mut bytes);
4909        usize::from_le_bytes(bytes) & mask
4910    };
4911
4912    // Transform it into a page-aligned offset.
4913    randomness * (*PAGE_SIZE as usize)
4914}
4915
4916#[cfg(test)]
4917mod tests {
4918    use super::*;
4919    use crate::mm::memory_accessor::{MemoryAccessor, MemoryAccessorExt};
4920    use crate::mm::syscalls::do_mmap;
4921    use crate::task::syscalls::sys_prctl;
4922    use crate::testing::*;
4923    use crate::vfs::FdNumber;
4924    use assert_matches::assert_matches;
4925    use itertools::assert_equal;
4926    use starnix_sync::{FileOpsCore, LockEqualOrBefore};
4927    use starnix_uapi::user_address::{UserCString, UserRef};
4928    use starnix_uapi::{
4929        MAP_ANONYMOUS, MAP_FIXED, MAP_GROWSDOWN, MAP_PRIVATE, MAP_SHARED, PR_SET_VMA,
4930        PR_SET_VMA_ANON_NAME, PROT_NONE, PROT_READ,
4931    };
4932    use std::ffi::CString;
4933    use zerocopy::{FromBytes, Immutable, KnownLayout};
4934
4935    #[::fuchsia::test]
4936    fn test_mapping_flags() {
4937        let options = MappingOptions::ANONYMOUS;
4938        let access_flags = ProtectionFlags::READ | ProtectionFlags::WRITE;
4939        let mapping_flags = MappingFlags::from_access_flags_and_options(access_flags, options);
4940        assert_eq!(mapping_flags.access_flags(), access_flags);
4941        assert_eq!(mapping_flags.options(), options);
4942
4943        let new_access_flags = ProtectionFlags::READ | ProtectionFlags::EXEC;
4944        let adusted_mapping_flags = mapping_flags.with_access_flags(new_access_flags);
4945        assert_eq!(adusted_mapping_flags.access_flags(), new_access_flags);
4946        assert_eq!(adusted_mapping_flags.options(), options);
4947    }
4948
4949    #[::fuchsia::test]
4950    async fn test_any_ranges_lazy() {
4951        spawn_kernel_and_run(async |_locked, current_task| {
4952            let mm = current_task.mm().unwrap();
4953            let page_size = *PAGE_SIZE as usize;
4954            let addr = (mm.base_addr + 10 * page_size).unwrap();
4955            let length = page_size;
4956
4957            let memory = create_anonymous_mapping_memory(length as u64).unwrap();
4958            let flags = MappingFlags::from_access_flags_and_options(
4959                ProtectionFlags::READ | ProtectionFlags::WRITE,
4960                MappingOptions::empty(),
4961            );
4962
4963            let mapping = Mapping::with_name(
4964                MappingBacking::Memory(Box::new(MappingBackingMemory::new(addr, memory, 0))),
4965                flags,
4966                Access::rwx(),
4967                MappingName::None,
4968                MappingMode::Lazy,
4969            );
4970
4971            {
4972                let mut state = mm.state.write();
4973                state.mappings.insert(addr..(addr + length).unwrap(), mapping);
4974            }
4975
4976            {
4977                let state = mm.state.read();
4978                assert!(state.any_ranges_lazy(std::iter::once((addr, Some(length)))));
4979            }
4980
4981            assert!(mm.ensure_range_mapped_in_user_vmar(addr, Some(length)).unwrap());
4982
4983            {
4984                let state = mm.state.read();
4985                assert!(!state.any_ranges_lazy(std::iter::once((addr, Some(length)))));
4986            }
4987        })
4988        .await;
4989    }
4990
4991    #[::fuchsia::test]
4992    async fn test_brk() {
4993        spawn_kernel_and_run(async |locked, current_task| {
4994            let mm = current_task.mm().unwrap();
4995
4996            // Look up the given addr in the mappings table.
4997            let get_range = |addr: UserAddress| {
4998                let state = mm.state.read();
4999                state
5000                    .mappings
5001                    .map
5002                    .get(addr)
5003                    .map(|(range, mapping)| (range.clone(), mapping.clone()))
5004            };
5005
5006            // Initialize the program break.
5007            let base_addr = mm
5008                .set_brk(locked, &current_task, UserAddress::default())
5009                .expect("failed to set initial program break");
5010            assert!(base_addr > UserAddress::default());
5011
5012            // Page containing the program break address should not be mapped.
5013            assert_eq!(get_range(base_addr), None);
5014
5015            // Growing it by a single byte results in that page becoming mapped.
5016            let addr0 = mm
5017                .set_brk(locked, &current_task, (base_addr + 1u64).unwrap())
5018                .expect("failed to grow brk");
5019            assert!(addr0 > base_addr);
5020            let (range0, _) = get_range(base_addr).expect("base_addr should be mapped");
5021            assert_eq!(range0.start, base_addr);
5022            assert_eq!(range0.end, (base_addr + *PAGE_SIZE).unwrap());
5023
5024            // Grow the program break by another byte, which won't be enough to cause additional pages to be mapped.
5025            let addr1 = mm
5026                .set_brk(locked, &current_task, (base_addr + 2u64).unwrap())
5027                .expect("failed to grow brk");
5028            assert_eq!(addr1, (base_addr + 2u64).unwrap());
5029            let (range1, _) = get_range(base_addr).expect("base_addr should be mapped");
5030            assert_eq!(range1.start, range0.start);
5031            assert_eq!(range1.end, range0.end);
5032
5033            // Grow the program break by a non-trival amount and observe the larger mapping.
5034            let addr2 = mm
5035                .set_brk(locked, &current_task, (base_addr + 24893u64).unwrap())
5036                .expect("failed to grow brk");
5037            assert_eq!(addr2, (base_addr + 24893u64).unwrap());
5038            let (range2, _) = get_range(base_addr).expect("base_addr should be mapped");
5039            assert_eq!(range2.start, base_addr);
5040            assert_eq!(range2.end, addr2.round_up(*PAGE_SIZE).unwrap());
5041
5042            // Shrink the program break and observe the smaller mapping.
5043            let addr3 = mm
5044                .set_brk(locked, &current_task, (base_addr + 14832u64).unwrap())
5045                .expect("failed to shrink brk");
5046            assert_eq!(addr3, (base_addr + 14832u64).unwrap());
5047            let (range3, _) = get_range(base_addr).expect("base_addr should be mapped");
5048            assert_eq!(range3.start, base_addr);
5049            assert_eq!(range3.end, addr3.round_up(*PAGE_SIZE).unwrap());
5050
5051            // Shrink the program break close to zero and observe the smaller mapping.
5052            let addr4 = mm
5053                .set_brk(locked, &current_task, (base_addr + 3u64).unwrap())
5054                .expect("failed to drastically shrink brk");
5055            assert_eq!(addr4, (base_addr + 3u64).unwrap());
5056            let (range4, _) = get_range(base_addr).expect("base_addr should be mapped");
5057            assert_eq!(range4.start, base_addr);
5058            assert_eq!(range4.end, addr4.round_up(*PAGE_SIZE).unwrap());
5059
5060            // Shrink the program break to zero and observe that the mapping is entirely gone.
5061            let addr5 = mm
5062                .set_brk(locked, &current_task, base_addr)
5063                .expect("failed to drastically shrink brk to zero");
5064            assert_eq!(addr5, base_addr);
5065            assert_eq!(get_range(base_addr), None);
5066        })
5067        .await;
5068    }
5069
5070    #[::fuchsia::test]
5071    async fn test_mm_exec() {
5072        spawn_kernel_and_run(async |locked, current_task| {
5073            let mm = current_task.mm().unwrap();
5074
5075            let has = |addr: UserAddress| -> bool {
5076                let state = mm.state.read();
5077                state.mappings.get(addr).is_some()
5078            };
5079
5080            let brk_addr = mm
5081                .set_brk(locked, &current_task, UserAddress::default())
5082                .expect("failed to set initial program break");
5083            assert!(brk_addr > UserAddress::default());
5084
5085            // Allocate a single page of BRK space, so that the break base address is mapped.
5086            let _ = mm
5087                .set_brk(locked, &current_task, (brk_addr + 1u64).unwrap())
5088                .expect("failed to grow program break");
5089            assert!(has(brk_addr));
5090
5091            let mapped_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5092            assert!(mapped_addr > UserAddress::default());
5093            assert!(has(mapped_addr));
5094
5095            let node = current_task.lookup_path_from_root(locked, "/".into()).unwrap();
5096            let new_mm = MemoryManager::exec(
5097                current_task.thread_group().root_vmar.unowned(),
5098                current_task.running_state().mm.to_option_arc(),
5099                node,
5100                ArchWidth::Arch64,
5101            )
5102            .expect("failed to exec memory manager");
5103            current_task.running_state().mm.update(Some(new_mm));
5104
5105            assert!(!has(brk_addr));
5106            assert!(!has(mapped_addr));
5107
5108            // Check that the old addresses are actually available for mapping.
5109            let brk_addr2 = map_memory(locked, &current_task, brk_addr, *PAGE_SIZE);
5110            assert_eq!(brk_addr, brk_addr2);
5111            let mapped_addr2 = map_memory(locked, &current_task, mapped_addr, *PAGE_SIZE);
5112            assert_eq!(mapped_addr, mapped_addr2);
5113        })
5114        .await;
5115    }
5116
5117    #[::fuchsia::test]
5118    async fn test_get_contiguous_mappings_at() {
5119        spawn_kernel_and_run(async |locked, current_task| {
5120            let mm = current_task.mm().unwrap();
5121            let context = &mm.mapping_context;
5122
5123            // Create four one-page mappings with a hole between the third one and the fourth one.
5124            let page_size = *PAGE_SIZE as usize;
5125            let addr_a = (mm.base_addr + 10 * page_size).unwrap();
5126            let addr_b = (mm.base_addr + 11 * page_size).unwrap();
5127            let addr_c = (mm.base_addr + 12 * page_size).unwrap();
5128            let addr_d = (mm.base_addr + 14 * page_size).unwrap();
5129            assert_eq!(map_memory(locked, &current_task, addr_a, *PAGE_SIZE), addr_a);
5130            assert_eq!(map_memory(locked, &current_task, addr_b, *PAGE_SIZE), addr_b);
5131            assert_eq!(map_memory(locked, &current_task, addr_c, *PAGE_SIZE), addr_c);
5132            assert_eq!(map_memory(locked, &current_task, addr_d, *PAGE_SIZE), addr_d);
5133
5134            {
5135                let mm_state = mm.state.read();
5136                // Verify that requesting an unmapped address returns an empty iterator.
5137                assert_equal(
5138                    mm_state
5139                        .get_contiguous_mappings_at((addr_a - 100u64).unwrap(), 50, &context)
5140                        .unwrap(),
5141                    vec![],
5142                );
5143                assert_equal(
5144                    mm_state
5145                        .get_contiguous_mappings_at((addr_a - 100u64).unwrap(), 200, &context)
5146                        .unwrap(),
5147                    vec![],
5148                );
5149
5150                // Verify that requesting zero bytes returns an empty iterator.
5151                assert_equal(
5152                    mm_state.get_contiguous_mappings_at(addr_a, 0, &context).unwrap(),
5153                    vec![],
5154                );
5155
5156                // Verify errors.
5157                assert_eq!(
5158                    mm_state
5159                        .get_contiguous_mappings_at(UserAddress::from(100), usize::MAX, &context)
5160                        .err()
5161                        .unwrap(),
5162                    errno!(EFAULT)
5163                );
5164                assert_eq!(
5165                    mm_state
5166                        .get_contiguous_mappings_at(
5167                            (context.max_address() + 1u64).unwrap(),
5168                            0,
5169                            &context
5170                        )
5171                        .err()
5172                        .unwrap(),
5173                    errno!(EFAULT)
5174                );
5175            }
5176
5177            assert_eq!(mm.get_mapping_count(), 2);
5178            let mm_state = mm.state.read();
5179            let (map_a, map_b) = {
5180                let mut it = mm_state.mappings.iter();
5181                (it.next().unwrap().1, it.next().unwrap().1)
5182            };
5183
5184            assert_equal(
5185                mm_state.get_contiguous_mappings_at(addr_a, page_size, &context).unwrap(),
5186                vec![(map_a, page_size)],
5187            );
5188
5189            assert_equal(
5190                mm_state.get_contiguous_mappings_at(addr_a, page_size / 2, &context).unwrap(),
5191                vec![(map_a, page_size / 2)],
5192            );
5193
5194            assert_equal(
5195                mm_state.get_contiguous_mappings_at(addr_a, page_size * 3, &context).unwrap(),
5196                vec![(map_a, page_size * 3)],
5197            );
5198
5199            assert_equal(
5200                mm_state.get_contiguous_mappings_at(addr_b, page_size, &context).unwrap(),
5201                vec![(map_a, page_size)],
5202            );
5203
5204            assert_equal(
5205                mm_state.get_contiguous_mappings_at(addr_d, page_size, &context).unwrap(),
5206                vec![(map_b, page_size)],
5207            );
5208
5209            // Verify that results stop if there is a hole.
5210            assert_equal(
5211                mm_state
5212                    .get_contiguous_mappings_at(
5213                        (addr_a + page_size / 2).unwrap(),
5214                        page_size * 10,
5215                        &context,
5216                    )
5217                    .unwrap(),
5218                vec![(map_a, page_size * 2 + page_size / 2)],
5219            );
5220
5221            // Verify that results stop at the last mapped page.
5222            assert_equal(
5223                mm_state.get_contiguous_mappings_at(addr_d, page_size * 10, &context).unwrap(),
5224                vec![(map_b, page_size)],
5225            );
5226        })
5227        .await;
5228    }
5229
5230    #[::fuchsia::test]
5231    async fn test_read_write_crossing_mappings() {
5232        spawn_kernel_and_run(async |locked, current_task| {
5233            let mm = current_task.mm().unwrap();
5234            let ma = current_task.deref();
5235
5236            // Map two contiguous pages at fixed addresses, but backed by distinct mappings.
5237            let page_size = *PAGE_SIZE;
5238            let addr = (mm.base_addr + 10 * page_size).unwrap();
5239            assert_eq!(map_memory(locked, &current_task, addr, page_size), addr);
5240            assert_eq!(
5241                map_memory(locked, &current_task, (addr + page_size).unwrap(), page_size),
5242                (addr + page_size).unwrap()
5243            );
5244            // Mappings get merged since they are baked by the same memory object
5245            assert_eq!(mm.get_mapping_count(), 1);
5246
5247            // Write a pattern crossing our two mappings.
5248            let test_addr = (addr + page_size / 2).unwrap();
5249            let data: Vec<u8> = (0..page_size).map(|i| (i % 256) as u8).collect();
5250            ma.write_memory(test_addr, &data).expect("failed to write test data");
5251
5252            // Read it back.
5253            let data_readback =
5254                ma.read_memory_to_vec(test_addr, data.len()).expect("failed to read test data");
5255            assert_eq!(&data, &data_readback);
5256        })
5257        .await;
5258    }
5259
5260    #[::fuchsia::test]
5261    async fn test_read_write_errors() {
5262        spawn_kernel_and_run(async |locked, current_task| {
5263            let ma = current_task.deref();
5264
5265            let page_size = *PAGE_SIZE;
5266            let addr = map_memory(locked, &current_task, UserAddress::default(), page_size);
5267            let buf = vec![0u8; page_size as usize];
5268
5269            // Verify that accessing data that is only partially mapped is an error.
5270            let partial_addr_before = (addr - page_size / 2).unwrap();
5271            assert_eq!(ma.write_memory(partial_addr_before, &buf), error!(EFAULT));
5272            assert_eq!(ma.read_memory_to_vec(partial_addr_before, buf.len()), error!(EFAULT));
5273            let partial_addr_after = (addr + page_size / 2).unwrap();
5274            assert_eq!(ma.write_memory(partial_addr_after, &buf), error!(EFAULT));
5275            assert_eq!(ma.read_memory_to_vec(partial_addr_after, buf.len()), error!(EFAULT));
5276
5277            // Verify that accessing unmapped memory is an error.
5278            let unmapped_addr = (addr - 10 * page_size).unwrap();
5279            assert_eq!(ma.write_memory(unmapped_addr, &buf), error!(EFAULT));
5280            assert_eq!(ma.read_memory_to_vec(unmapped_addr, buf.len()), error!(EFAULT));
5281
5282            // However, accessing zero bytes in unmapped memory is not an error.
5283            ma.write_memory(unmapped_addr, &[]).expect("failed to write no data");
5284            ma.read_memory_to_vec(unmapped_addr, 0).expect("failed to read no data");
5285        })
5286        .await;
5287    }
5288
5289    #[::fuchsia::test]
5290    async fn test_read_c_string_to_vec_large() {
5291        spawn_kernel_and_run(async |locked, current_task| {
5292            let mm = current_task.mm().unwrap();
5293            let ma = current_task.deref();
5294
5295            let page_size = *PAGE_SIZE;
5296            let max_size = 4 * page_size as usize;
5297            let addr = (mm.base_addr + 10 * page_size).unwrap();
5298
5299            assert_eq!(map_memory(locked, &current_task, addr, max_size as u64), addr);
5300
5301            let mut random_data = vec![0; max_size];
5302            starnix_crypto::cprng_draw(&mut random_data);
5303            // Remove all NUL bytes.
5304            for i in 0..random_data.len() {
5305                if random_data[i] == 0 {
5306                    random_data[i] = 1;
5307                }
5308            }
5309            random_data[max_size - 1] = 0;
5310
5311            ma.write_memory(addr, &random_data).expect("failed to write test string");
5312            // We should read the same value minus the last byte (NUL char).
5313            assert_eq!(
5314                ma.read_c_string_to_vec(UserCString::new(current_task, addr), max_size).unwrap(),
5315                random_data[..max_size - 1]
5316            );
5317        })
5318        .await;
5319    }
5320
5321    #[::fuchsia::test]
5322    async fn test_read_c_string_to_vec() {
5323        spawn_kernel_and_run(async |locked, current_task| {
5324            let mm = current_task.mm().unwrap();
5325            let ma = current_task.deref();
5326
5327            let page_size = *PAGE_SIZE;
5328            let max_size = 2 * page_size as usize;
5329            let addr = (mm.base_addr + 10 * page_size).unwrap();
5330
5331            // Map a page at a fixed address and write an unterminated string at the end of it.
5332            assert_eq!(map_memory(locked, &current_task, addr, page_size), addr);
5333            let test_str = b"foo!";
5334            let test_addr =
5335                addr.checked_add(page_size as usize).unwrap().checked_sub(test_str.len()).unwrap();
5336            ma.write_memory(test_addr, test_str).expect("failed to write test string");
5337
5338            // Expect error if the string is not terminated.
5339            assert_eq!(
5340                ma.read_c_string_to_vec(UserCString::new(current_task, test_addr), max_size),
5341                error!(ENAMETOOLONG)
5342            );
5343
5344            // Expect success if the string is terminated.
5345            ma.write_memory((addr + (page_size - 1)).unwrap(), b"\0").expect("failed to write nul");
5346            assert_eq!(
5347                ma.read_c_string_to_vec(UserCString::new(current_task, test_addr), max_size)
5348                    .unwrap(),
5349                "foo"
5350            );
5351
5352            // Expect success if the string spans over two mappings.
5353            assert_eq!(
5354                map_memory(locked, &current_task, (addr + page_size).unwrap(), page_size),
5355                (addr + page_size).unwrap()
5356            );
5357            // TODO: Adjacent private anonymous mappings are collapsed. To test this case this test needs to
5358            // provide a backing for the second mapping.
5359            // assert_eq!(mm.get_mapping_count(), 2);
5360            ma.write_memory((addr + (page_size - 1)).unwrap(), b"bar\0")
5361                .expect("failed to write extra chars");
5362            assert_eq!(
5363                ma.read_c_string_to_vec(UserCString::new(current_task, test_addr), max_size)
5364                    .unwrap(),
5365                "foobar",
5366            );
5367
5368            // Expect error if the string exceeds max limit
5369            assert_eq!(
5370                ma.read_c_string_to_vec(UserCString::new(current_task, test_addr), 2),
5371                error!(ENAMETOOLONG)
5372            );
5373
5374            // Expect error if the address is invalid.
5375            assert_eq!(
5376                ma.read_c_string_to_vec(UserCString::null(current_task), max_size),
5377                error!(EFAULT)
5378            );
5379        })
5380        .await;
5381    }
5382
5383    #[::fuchsia::test]
5384    async fn can_read_argv_like_regions() {
5385        spawn_kernel_and_run(async |locked, current_task| {
5386            let ma = current_task.deref();
5387
5388            // Map a page.
5389            let page_size = *PAGE_SIZE;
5390            let addr = map_memory_anywhere(locked, &current_task, page_size);
5391            assert!(!addr.is_null());
5392
5393            // Write an unterminated string.
5394            let mut payload = "first".as_bytes().to_vec();
5395            let mut expected_parses = vec![];
5396            ma.write_memory(addr, &payload).unwrap();
5397
5398            // Expect success if the string is terminated.
5399            expected_parses.push(payload.clone());
5400            payload.push(0);
5401            ma.write_memory(addr, &payload).unwrap();
5402            assert_eq!(
5403                ma.read_nul_delimited_c_string_list(addr, payload.len()).unwrap(),
5404                expected_parses,
5405            );
5406
5407            // Make sure we can parse multiple strings from the same region.
5408            let second = b"second";
5409            payload.extend(second);
5410            payload.push(0);
5411            expected_parses.push(second.to_vec());
5412
5413            let third = b"third";
5414            payload.extend(third);
5415            payload.push(0);
5416            expected_parses.push(third.to_vec());
5417
5418            ma.write_memory(addr, &payload).unwrap();
5419            assert_eq!(
5420                ma.read_nul_delimited_c_string_list(addr, payload.len()).unwrap(),
5421                expected_parses,
5422            );
5423        })
5424        .await;
5425    }
5426
5427    #[::fuchsia::test]
5428    async fn truncate_argv_like_regions() {
5429        spawn_kernel_and_run(async |locked, current_task| {
5430            let ma = current_task.deref();
5431
5432            // Map a page.
5433            let page_size = *PAGE_SIZE;
5434            let addr = map_memory_anywhere(locked, &current_task, page_size);
5435            assert!(!addr.is_null());
5436
5437            let payload = b"first\0second\0third\0";
5438            ma.write_memory(addr, payload).unwrap();
5439            assert_eq!(
5440                ma.read_nul_delimited_c_string_list(addr, payload.len() - 3).unwrap(),
5441                vec![b"first".to_vec(), b"second".to_vec(), b"thi".to_vec()],
5442                "Skipping last three bytes of payload should skip last two bytes of 3rd string"
5443            );
5444        })
5445        .await;
5446    }
5447
5448    #[::fuchsia::test]
5449    async fn test_read_c_string() {
5450        spawn_kernel_and_run(async |locked, current_task| {
5451            let mm = current_task.mm().unwrap();
5452            let ma = current_task.deref();
5453
5454            let page_size = *PAGE_SIZE;
5455            let buf_cap = 2 * page_size as usize;
5456            let mut buf = Vec::with_capacity(buf_cap);
5457            // We can't just use `spare_capacity_mut` because `Vec::with_capacity`
5458            // returns a `Vec` with _at least_ the requested capacity.
5459            let buf = &mut buf.spare_capacity_mut()[..buf_cap];
5460            let addr = (mm.base_addr + 10 * page_size).unwrap();
5461
5462            // Map a page at a fixed address and write an unterminated string at the end of it..
5463            assert_eq!(map_memory(locked, &current_task, addr, page_size), addr);
5464            let test_str = b"foo!";
5465            let test_addr = (addr + (page_size - test_str.len() as u64)).unwrap();
5466            ma.write_memory(test_addr, test_str).expect("failed to write test string");
5467
5468            // Expect error if the string is not terminated.
5469            assert_eq!(
5470                ma.read_c_string(UserCString::new(current_task, test_addr), buf),
5471                error!(ENAMETOOLONG)
5472            );
5473
5474            // Expect success if the string is terminated.
5475            ma.write_memory((addr + (page_size - 1)).unwrap(), b"\0").expect("failed to write nul");
5476            assert_eq!(
5477                ma.read_c_string(UserCString::new(current_task, test_addr), buf).unwrap(),
5478                "foo"
5479            );
5480
5481            // Expect success if the string spans over two mappings.
5482            assert_eq!(
5483                map_memory(locked, &current_task, (addr + page_size).unwrap(), page_size),
5484                (addr + page_size).unwrap()
5485            );
5486            // TODO: To be multiple mappings we need to provide a file backing for the next page or the
5487            // mappings will be collapsed.
5488            //assert_eq!(mm.get_mapping_count(), 2);
5489            ma.write_memory((addr + (page_size - 1)).unwrap(), b"bar\0")
5490                .expect("failed to write extra chars");
5491            assert_eq!(
5492                ma.read_c_string(UserCString::new(current_task, test_addr), buf).unwrap(),
5493                "foobar"
5494            );
5495
5496            // Expect error if the string does not fit in the provided buffer.
5497            assert_eq!(
5498                ma.read_c_string(
5499                    UserCString::new(current_task, test_addr),
5500                    &mut [MaybeUninit::uninit(); 2]
5501                ),
5502                error!(ENAMETOOLONG)
5503            );
5504
5505            // Expect error if the address is invalid.
5506            assert_eq!(ma.read_c_string(UserCString::null(current_task), buf), error!(EFAULT));
5507        })
5508        .await;
5509    }
5510
5511    #[::fuchsia::test]
5512    async fn test_find_next_unused_range() {
5513        spawn_kernel_and_run(async |locked, current_task| {
5514            let mm = current_task.mm().unwrap();
5515
5516            let mmap_top = mm.state.read().find_next_unused_range(0).unwrap().ptr();
5517            let page_size = *PAGE_SIZE as usize;
5518            assert!(mmap_top <= RESTRICTED_ASPACE_HIGHEST_ADDRESS);
5519
5520            // No mappings - top address minus requested size is available
5521            assert_eq!(
5522                mm.state.read().find_next_unused_range(page_size).unwrap(),
5523                UserAddress::from_ptr(mmap_top - page_size)
5524            );
5525
5526            // Fill it.
5527            let addr = UserAddress::from_ptr(mmap_top - page_size);
5528            assert_eq!(map_memory(locked, &current_task, addr, *PAGE_SIZE), addr);
5529
5530            // The next available range is right before the new mapping.
5531            assert_eq!(
5532                mm.state.read().find_next_unused_range(page_size).unwrap(),
5533                UserAddress::from_ptr(addr.ptr() - page_size)
5534            );
5535
5536            // Allocate an extra page before a one-page gap.
5537            let addr2 = UserAddress::from_ptr(addr.ptr() - 2 * page_size);
5538            assert_eq!(map_memory(locked, &current_task, addr2, *PAGE_SIZE), addr2);
5539
5540            // Searching for one-page range still gives the same result
5541            assert_eq!(
5542                mm.state.read().find_next_unused_range(page_size).unwrap(),
5543                UserAddress::from_ptr(addr.ptr() - page_size)
5544            );
5545
5546            // Searching for a bigger range results in the area before the second mapping
5547            assert_eq!(
5548                mm.state.read().find_next_unused_range(2 * page_size).unwrap(),
5549                UserAddress::from_ptr(addr2.ptr() - 2 * page_size)
5550            );
5551
5552            // Searching for more memory than available should fail.
5553            assert_eq!(mm.state.read().find_next_unused_range(mmap_top), None);
5554        })
5555        .await;
5556    }
5557
5558    #[::fuchsia::test]
5559    async fn test_count_placements() {
5560        spawn_kernel_and_run(async |locked, current_task| {
5561            let mm = current_task.mm().unwrap();
5562
5563            // ten-page range
5564            let page_size = *PAGE_SIZE as usize;
5565            let subrange_ten = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
5566                ..UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 10 * page_size);
5567
5568            assert_eq!(
5569                mm.state.read().count_possible_placements(11 * page_size, &subrange_ten),
5570                Some(0)
5571            );
5572            assert_eq!(
5573                mm.state.read().count_possible_placements(10 * page_size, &subrange_ten),
5574                Some(1)
5575            );
5576            assert_eq!(
5577                mm.state.read().count_possible_placements(9 * page_size, &subrange_ten),
5578                Some(2)
5579            );
5580            assert_eq!(
5581                mm.state.read().count_possible_placements(page_size, &subrange_ten),
5582                Some(10)
5583            );
5584
5585            // map 6th page
5586            let addr = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 5 * page_size);
5587            assert_eq!(map_memory(locked, &current_task, addr, *PAGE_SIZE), addr);
5588
5589            assert_eq!(
5590                mm.state.read().count_possible_placements(10 * page_size, &subrange_ten),
5591                Some(0)
5592            );
5593            assert_eq!(
5594                mm.state.read().count_possible_placements(5 * page_size, &subrange_ten),
5595                Some(1)
5596            );
5597            assert_eq!(
5598                mm.state.read().count_possible_placements(4 * page_size, &subrange_ten),
5599                Some(3)
5600            );
5601            assert_eq!(
5602                mm.state.read().count_possible_placements(page_size, &subrange_ten),
5603                Some(9)
5604            );
5605        })
5606        .await;
5607    }
5608
5609    #[::fuchsia::test]
5610    async fn test_pick_placement() {
5611        spawn_kernel_and_run(async |locked, current_task| {
5612            let mm = current_task.mm().unwrap();
5613
5614            let page_size = *PAGE_SIZE as usize;
5615            let subrange_ten = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
5616                ..UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 10 * page_size);
5617
5618            let addr = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 5 * page_size);
5619            assert_eq!(map_memory(locked, &current_task, addr, *PAGE_SIZE), addr);
5620            assert_eq!(
5621                mm.state.read().count_possible_placements(4 * page_size, &subrange_ten),
5622                Some(3)
5623            );
5624
5625            assert_eq!(
5626                mm.state.read().pick_placement(4 * page_size, 0, &subrange_ten),
5627                Some(UserAddress::from_ptr(RESTRICTED_ASPACE_BASE))
5628            );
5629            assert_eq!(
5630                mm.state.read().pick_placement(4 * page_size, 1, &subrange_ten),
5631                Some(UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + page_size))
5632            );
5633            assert_eq!(
5634                mm.state.read().pick_placement(4 * page_size, 2, &subrange_ten),
5635                Some(UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 6 * page_size))
5636            );
5637        })
5638        .await;
5639    }
5640
5641    #[::fuchsia::test]
5642    async fn test_find_random_unused_range() {
5643        spawn_kernel_and_run(async |locked, current_task| {
5644            let mm = current_task.mm().unwrap();
5645
5646            // ten-page range
5647            let page_size = *PAGE_SIZE as usize;
5648            let subrange_ten = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
5649                ..UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 10 * page_size);
5650
5651            for _ in 0..10 {
5652                let addr = mm.state.read().find_random_unused_range(page_size, &subrange_ten);
5653                assert!(addr.is_some());
5654                assert_eq!(
5655                    map_memory(locked, &current_task, addr.unwrap(), *PAGE_SIZE),
5656                    addr.unwrap()
5657                );
5658            }
5659            assert_eq!(mm.state.read().find_random_unused_range(page_size, &subrange_ten), None);
5660        })
5661        .await;
5662    }
5663
5664    #[::fuchsia::test]
5665    async fn test_grows_down_near_aspace_base() {
5666        spawn_kernel_and_run(async |locked, current_task| {
5667            let mm = current_task.mm().unwrap();
5668
5669            let page_count = 10;
5670
5671            let page_size = *PAGE_SIZE as usize;
5672            let addr =
5673                (UserAddress::from_ptr(RESTRICTED_ASPACE_BASE) + page_count * page_size).unwrap();
5674            assert_eq!(
5675                map_memory_with_flags(
5676                    locked,
5677                    &current_task,
5678                    addr,
5679                    page_size as u64,
5680                    MAP_ANONYMOUS | MAP_PRIVATE | MAP_GROWSDOWN
5681                ),
5682                addr
5683            );
5684
5685            let subrange_ten = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)..addr;
5686            assert_eq!(mm.state.read().find_random_unused_range(page_size, &subrange_ten), None);
5687        })
5688        .await;
5689    }
5690
5691    #[::fuchsia::test]
5692    async fn test_unmap_returned_mappings() {
5693        spawn_kernel_and_run(async |locked, current_task| {
5694            let mm = current_task.mm().unwrap();
5695
5696            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
5697
5698            let mut released_mappings = ReleasedMappings::default();
5699            let mut mm_state = mm.state.write();
5700            let unmap_result =
5701                mm_state.unmap(&mm, addr, *PAGE_SIZE as usize, &mut released_mappings);
5702            assert!(unmap_result.is_ok());
5703            assert_eq!(released_mappings.len(), 1);
5704            released_mappings.finalize(mm_state);
5705        })
5706        .await;
5707    }
5708
5709    #[::fuchsia::test]
5710    async fn test_unmap_returns_multiple_mappings() {
5711        spawn_kernel_and_run(async |locked, current_task| {
5712            let mm = current_task.mm().unwrap();
5713
5714            let addr = mm.state.read().find_next_unused_range(3 * *PAGE_SIZE as usize).unwrap();
5715            let addr = map_memory(locked, &current_task, addr, *PAGE_SIZE);
5716            let _ = map_memory(locked, &current_task, (addr + 2 * *PAGE_SIZE).unwrap(), *PAGE_SIZE);
5717
5718            let mut released_mappings = ReleasedMappings::default();
5719            let mut mm_state = mm.state.write();
5720            let unmap_result =
5721                mm_state.unmap(&mm, addr, (*PAGE_SIZE * 3) as usize, &mut released_mappings);
5722            assert!(unmap_result.is_ok());
5723            assert_eq!(released_mappings.len(), 2);
5724            released_mappings.finalize(mm_state);
5725        })
5726        .await;
5727    }
5728
5729    /// Maps two pages in separate mappings next to each other, then unmaps the first page.
5730    /// The second page should not be modified.
5731    #[::fuchsia::test]
5732    async fn test_map_two_unmap_one() {
5733        spawn_kernel_and_run(async |locked, current_task| {
5734            let mm = current_task.mm().unwrap();
5735
5736            // reserve memory for both pages
5737            let addr_reserve =
5738                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
5739            let addr1 = do_mmap(
5740                locked,
5741                &current_task,
5742                addr_reserve,
5743                *PAGE_SIZE as usize,
5744                PROT_READ, // Map read-only to avoid merging of the two mappings
5745                MAP_ANONYMOUS | MAP_SHARED | MAP_FIXED,
5746                FdNumber::from_raw(-1),
5747                0,
5748            )
5749            .expect("failed to mmap");
5750            let addr2 = map_memory_with_flags(
5751                locked,
5752                &current_task,
5753                (addr_reserve + *PAGE_SIZE).unwrap(),
5754                *PAGE_SIZE,
5755                MAP_ANONYMOUS | MAP_SHARED | MAP_FIXED,
5756            );
5757            let state = mm.state.read();
5758            let (range1, _) = state.mappings.get(addr1).expect("mapping");
5759            assert_eq!(range1.start, addr1);
5760            assert_eq!(range1.end, (addr1 + *PAGE_SIZE).unwrap());
5761            let (range2, mapping2) = state.mappings.get(addr2).expect("mapping");
5762            assert_eq!(range2.start, addr2);
5763            assert_eq!(range2.end, (addr2 + *PAGE_SIZE).unwrap());
5764            let original_memory2 = {
5765                match state.get_mapping_backing(mapping2) {
5766                    MappingBacking::Memory(backing) => {
5767                        assert_eq!(backing.memory().get_size(), *PAGE_SIZE);
5768                        backing.memory().clone()
5769                    }
5770                    MappingBacking::PrivateAnonymous => {
5771                        panic!("Unexpected private anonymous mapping")
5772                    }
5773                }
5774            };
5775            std::mem::drop(state);
5776
5777            assert_eq!(mm.unmap(addr1, *PAGE_SIZE as usize), Ok(()));
5778
5779            let state = mm.state.read();
5780
5781            // The first page should be unmapped.
5782            assert!(state.mappings.get(addr1).is_none());
5783
5784            // The second page should remain unchanged.
5785            let (range2, mapping2) = state.mappings.get(addr2).expect("second page");
5786            assert_eq!(range2.start, addr2);
5787            assert_eq!(range2.end, (addr2 + *PAGE_SIZE).unwrap());
5788            match state.get_mapping_backing(mapping2) {
5789                MappingBacking::Memory(backing) => {
5790                    assert_eq!(backing.memory().get_size(), *PAGE_SIZE);
5791                    assert_eq!(original_memory2.get_koid(), backing.memory().get_koid());
5792                }
5793                MappingBacking::PrivateAnonymous => panic!("Unexpected private anonymous mapping"),
5794            }
5795        })
5796        .await;
5797    }
5798
5799    #[::fuchsia::test]
5800    async fn test_read_write_objects() {
5801        spawn_kernel_and_run(async |locked, current_task| {
5802            let ma = current_task.deref();
5803            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5804            let items_ref = UserRef::<i32>::new(addr);
5805
5806            let items_written = vec![0, 2, 3, 7, 1];
5807            ma.write_objects(items_ref, &items_written).expect("Failed to write object array.");
5808
5809            let items_read = ma
5810                .read_objects_to_vec(items_ref, items_written.len())
5811                .expect("Failed to read object array.");
5812
5813            assert_eq!(items_written, items_read);
5814        })
5815        .await;
5816    }
5817
5818    #[::fuchsia::test]
5819    async fn test_read_write_objects_null() {
5820        spawn_kernel_and_run(async |_, current_task| {
5821            let ma = current_task.deref();
5822            let items_ref = UserRef::<i32>::new(UserAddress::default());
5823
5824            let items_written = vec![];
5825            ma.write_objects(items_ref, &items_written)
5826                .expect("Failed to write empty object array.");
5827
5828            let items_read = ma
5829                .read_objects_to_vec(items_ref, items_written.len())
5830                .expect("Failed to read empty object array.");
5831
5832            assert_eq!(items_written, items_read);
5833        })
5834        .await;
5835    }
5836
5837    #[::fuchsia::test]
5838    async fn test_read_object_partial() {
5839        #[derive(Debug, Default, Copy, Clone, KnownLayout, FromBytes, Immutable, PartialEq)]
5840        struct Items {
5841            val: [i32; 4],
5842        }
5843
5844        spawn_kernel_and_run(async |locked, current_task| {
5845            let ma = current_task.deref();
5846            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5847            let items_array_ref = UserRef::<i32>::new(addr);
5848
5849            // Populate some values.
5850            let items_written = vec![75, 23, 51, 98];
5851            ma.write_objects(items_array_ref, &items_written)
5852                .expect("Failed to write object array.");
5853
5854            // Full read of all 4 values.
5855            let items_ref = UserRef::<Items>::new(addr);
5856            let items_read = ma
5857                .read_object_partial(items_ref, std::mem::size_of::<Items>())
5858                .expect("Failed to read object");
5859            assert_eq!(items_written, items_read.val);
5860
5861            // Partial read of the first two.
5862            let items_read = ma.read_object_partial(items_ref, 8).expect("Failed to read object");
5863            assert_eq!(vec![75, 23, 0, 0], items_read.val);
5864
5865            // The API currently allows reading 0 bytes (this could be re-evaluated) so test that does
5866            // the right thing.
5867            let items_read = ma.read_object_partial(items_ref, 0).expect("Failed to read object");
5868            assert_eq!(vec![0, 0, 0, 0], items_read.val);
5869
5870            // Size bigger than the object.
5871            assert_eq!(
5872                ma.read_object_partial(items_ref, std::mem::size_of::<Items>() + 8),
5873                error!(EINVAL)
5874            );
5875
5876            // Bad pointer.
5877            assert_eq!(
5878                ma.read_object_partial(UserRef::<Items>::new(UserAddress::from(1)), 16),
5879                error!(EFAULT)
5880            );
5881        })
5882        .await;
5883    }
5884
5885    #[::fuchsia::test]
5886    async fn test_partial_read() {
5887        spawn_kernel_and_run(async |locked, current_task| {
5888            let mm = current_task.mm().unwrap();
5889            let ma = current_task.deref();
5890
5891            let addr = mm.state.read().find_next_unused_range(2 * *PAGE_SIZE as usize).unwrap();
5892            let addr = map_memory(locked, &current_task, addr, *PAGE_SIZE);
5893            let second_map =
5894                map_memory(locked, &current_task, (addr + *PAGE_SIZE).unwrap(), *PAGE_SIZE);
5895
5896            let bytes = vec![0xf; (*PAGE_SIZE * 2) as usize];
5897            assert!(ma.write_memory(addr, &bytes).is_ok());
5898            let mut state = mm.state.write();
5899            let mut released_mappings = ReleasedMappings::default();
5900            state
5901                .protect(
5902                    ma,
5903                    second_map,
5904                    *PAGE_SIZE as usize,
5905                    ProtectionFlags::empty(),
5906                    &mut released_mappings,
5907                )
5908                .unwrap();
5909            released_mappings.finalize(state);
5910            assert_eq!(
5911                ma.read_memory_partial_to_vec(addr, bytes.len()).unwrap().len(),
5912                *PAGE_SIZE as usize,
5913            );
5914        })
5915        .await;
5916    }
5917
5918    fn map_memory_growsdown<L>(
5919        locked: &mut Locked<L>,
5920        current_task: &CurrentTask,
5921        length: u64,
5922    ) -> UserAddress
5923    where
5924        L: LockEqualOrBefore<FileOpsCore> + LockBefore<ThreadGroupLimits>,
5925    {
5926        map_memory_with_flags(
5927            locked,
5928            current_task,
5929            UserAddress::default(),
5930            length,
5931            MAP_ANONYMOUS | MAP_PRIVATE | MAP_GROWSDOWN,
5932        )
5933    }
5934
5935    #[::fuchsia::test]
5936    async fn test_grow_mapping_empty_mm() {
5937        spawn_kernel_and_run(async |_, current_task| {
5938            let mm = current_task.mm().unwrap();
5939
5940            let addr = UserAddress::from(0x100000);
5941
5942            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, false), Ok(false));
5943        })
5944        .await;
5945    }
5946
5947    #[::fuchsia::test]
5948    async fn test_grow_inside_mapping() {
5949        spawn_kernel_and_run(async |locked, current_task| {
5950            let mm = current_task.mm().unwrap();
5951
5952            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5953
5954            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, false), Ok(false));
5955        })
5956        .await;
5957    }
5958
5959    #[::fuchsia::test]
5960    async fn test_grow_write_fault_inside_read_only_mapping() {
5961        spawn_kernel_and_run(async |locked, current_task| {
5962            let mm = current_task.mm().unwrap();
5963
5964            let addr = do_mmap(
5965                locked,
5966                &current_task,
5967                UserAddress::default(),
5968                *PAGE_SIZE as usize,
5969                PROT_READ,
5970                MAP_ANONYMOUS | MAP_PRIVATE,
5971                FdNumber::from_raw(-1),
5972                0,
5973            )
5974            .expect("Could not map memory");
5975
5976            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, false), Ok(false));
5977            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, true), Ok(false));
5978        })
5979        .await;
5980    }
5981
5982    #[::fuchsia::test]
5983    async fn test_grow_fault_inside_prot_none_mapping() {
5984        spawn_kernel_and_run(async |locked, current_task| {
5985            let mm = current_task.mm().unwrap();
5986
5987            let addr = do_mmap(
5988                locked,
5989                &current_task,
5990                UserAddress::default(),
5991                *PAGE_SIZE as usize,
5992                PROT_NONE,
5993                MAP_ANONYMOUS | MAP_PRIVATE,
5994                FdNumber::from_raw(-1),
5995                0,
5996            )
5997            .expect("Could not map memory");
5998
5999            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, false), Ok(false));
6000            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, true), Ok(false));
6001        })
6002        .await;
6003    }
6004
6005    #[::fuchsia::test]
6006    async fn test_grow_below_mapping() {
6007        spawn_kernel_and_run(async |locked, current_task| {
6008            let mm = current_task.mm().unwrap();
6009
6010            let addr = map_memory_growsdown(locked, &current_task, *PAGE_SIZE) - *PAGE_SIZE;
6011
6012            assert_matches!(mm.extend_growsdown_mapping_to_address(addr.unwrap(), false), Ok(true));
6013        })
6014        .await;
6015    }
6016
6017    #[::fuchsia::test]
6018    async fn test_grow_above_mapping() {
6019        spawn_kernel_and_run(async |locked, current_task| {
6020            let mm = current_task.mm().unwrap();
6021
6022            let addr = map_memory_growsdown(locked, &current_task, *PAGE_SIZE) + *PAGE_SIZE;
6023
6024            assert_matches!(
6025                mm.extend_growsdown_mapping_to_address(addr.unwrap(), false),
6026                Ok(false)
6027            );
6028        })
6029        .await;
6030    }
6031
6032    #[::fuchsia::test]
6033    async fn test_grow_write_fault_below_read_only_mapping() {
6034        spawn_kernel_and_run(async |locked, current_task| {
6035            let mm = current_task.mm().unwrap();
6036
6037            let mapped_addr = map_memory_growsdown(locked, &current_task, *PAGE_SIZE);
6038
6039            mm.protect(&current_task, mapped_addr, *PAGE_SIZE as usize, ProtectionFlags::READ)
6040                .unwrap();
6041
6042            assert_matches!(
6043                mm.extend_growsdown_mapping_to_address((mapped_addr - *PAGE_SIZE).unwrap(), true),
6044                Ok(false)
6045            );
6046
6047            assert_eq!(mm.get_mapping_count(), 1);
6048        })
6049        .await;
6050    }
6051
6052    #[::fuchsia::test]
6053    async fn test_snapshot_paged_memory() {
6054        use zx::sys::zx_page_request_command_t::ZX_PAGER_VMO_READ;
6055
6056        spawn_kernel_and_run(async |locked, current_task| {
6057            let mm = current_task.mm().unwrap();
6058            let ma = current_task.deref();
6059
6060            let port = Arc::new(zx::Port::create());
6061            let port_clone = port.clone();
6062            let pager =
6063                Arc::new(zx::Pager::create(zx::PagerOptions::empty()).expect("create failed"));
6064            let pager_clone = pager.clone();
6065
6066            const VMO_SIZE: u64 = 128 * 1024;
6067            let vmo = Arc::new(
6068                pager
6069                    .create_vmo(zx::VmoOptions::RESIZABLE, &port, 1, VMO_SIZE)
6070                    .expect("create_vmo failed"),
6071            );
6072            let vmo_clone = vmo.clone();
6073
6074            // Create a thread to service the port where we will receive pager requests.
6075            let thread = std::thread::spawn(move || {
6076                loop {
6077                    let packet =
6078                        port_clone.wait(zx::MonotonicInstant::INFINITE).expect("wait failed");
6079                    match packet.contents() {
6080                        zx::PacketContents::Pager(contents) => {
6081                            if contents.command() == ZX_PAGER_VMO_READ {
6082                                let range = contents.range();
6083                                let source_vmo = zx::Vmo::create(range.end - range.start)
6084                                    .expect("create failed");
6085                                pager_clone
6086                                    .supply_pages(&vmo_clone, range, &source_vmo, 0)
6087                                    .expect("supply_pages failed");
6088                            }
6089                        }
6090                        zx::PacketContents::User(_) => break,
6091                        _ => {}
6092                    }
6093                }
6094            });
6095
6096            let child_vmo = vmo
6097                .create_child(zx::VmoChildOptions::SNAPSHOT_AT_LEAST_ON_WRITE, 0, VMO_SIZE)
6098                .unwrap();
6099
6100            // Write something to the source VMO.
6101            vmo.write(b"foo", 0).expect("write failed");
6102
6103            let prot_flags = ProtectionFlags::READ | ProtectionFlags::WRITE;
6104            let addr = mm
6105                .map_memory(
6106                    DesiredAddress::Any,
6107                    Arc::new(MemoryObject::from(child_vmo)),
6108                    0,
6109                    VMO_SIZE as usize,
6110                    prot_flags,
6111                    Access::rwx(),
6112                    MappingOptions::empty(),
6113                    MappingName::None,
6114                )
6115                .expect("map failed");
6116
6117            let target = current_task.clone_task_for_test(locked, 0, None);
6118
6119            // Make sure it has what we wrote.
6120            let buf = target.read_memory_to_vec(addr, 3).expect("read_memory failed");
6121            assert_eq!(buf, b"foo");
6122
6123            // Write something to both source and target and make sure they are forked.
6124            ma.write_memory(addr, b"bar").expect("write_memory failed");
6125
6126            let buf = target.read_memory_to_vec(addr, 3).expect("read_memory failed");
6127            assert_eq!(buf, b"foo");
6128
6129            target.write_memory(addr, b"baz").expect("write_memory failed");
6130            let buf = ma.read_memory_to_vec(addr, 3).expect("read_memory failed");
6131            assert_eq!(buf, b"bar");
6132
6133            let buf = target.read_memory_to_vec(addr, 3).expect("read_memory failed");
6134            assert_eq!(buf, b"baz");
6135
6136            port.queue(&zx::Packet::from_user_packet(0, 0, zx::UserPacket::from_u8_array([0; 32])))
6137                .unwrap();
6138            thread.join().unwrap();
6139        })
6140        .await;
6141    }
6142
6143    #[::fuchsia::test]
6144    async fn test_set_vma_name() {
6145        spawn_kernel_and_run(async |locked, mut current_task| {
6146            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
6147
6148            let vma_name = "vma name";
6149            current_task.write_memory(name_addr, vma_name.as_bytes()).unwrap();
6150
6151            let mapping_addr =
6152                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
6153
6154            sys_prctl(
6155                locked,
6156                &mut current_task,
6157                PR_SET_VMA,
6158                PR_SET_VMA_ANON_NAME as u64,
6159                mapping_addr.ptr() as u64,
6160                *PAGE_SIZE,
6161                name_addr.ptr() as u64,
6162            )
6163            .unwrap();
6164
6165            assert_eq!(
6166                *current_task.mm().unwrap().get_mapping_name(mapping_addr).unwrap().unwrap(),
6167                vma_name
6168            );
6169        })
6170        .await;
6171    }
6172
6173    #[::fuchsia::test]
6174    async fn test_set_vma_name_adjacent_mappings() {
6175        spawn_kernel_and_run(async |locked, mut current_task| {
6176            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
6177            current_task
6178                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
6179                .unwrap();
6180
6181            let first_mapping_addr =
6182                map_memory(locked, &current_task, UserAddress::default(), 2 * *PAGE_SIZE);
6183            let second_mapping_addr = map_memory_with_flags(
6184                locked,
6185                &current_task,
6186                (first_mapping_addr + *PAGE_SIZE).unwrap(),
6187                *PAGE_SIZE,
6188                MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
6189            );
6190
6191            assert_eq!((first_mapping_addr + *PAGE_SIZE).unwrap(), second_mapping_addr);
6192
6193            sys_prctl(
6194                locked,
6195                &mut current_task,
6196                PR_SET_VMA,
6197                PR_SET_VMA_ANON_NAME as u64,
6198                first_mapping_addr.ptr() as u64,
6199                2 * *PAGE_SIZE,
6200                name_addr.ptr() as u64,
6201            )
6202            .unwrap();
6203
6204            {
6205                let mm = current_task.mm().unwrap();
6206                let state = mm.state.read();
6207
6208                // The name should apply to both mappings.
6209                let (_, mapping) = state.mappings.get(first_mapping_addr).unwrap();
6210                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
6211
6212                let (_, mapping) = state.mappings.get(second_mapping_addr).unwrap();
6213                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
6214            }
6215        })
6216        .await;
6217    }
6218
6219    #[::fuchsia::test]
6220    async fn test_set_vma_name_beyond_end() {
6221        spawn_kernel_and_run(async |locked, mut current_task| {
6222            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
6223            current_task
6224                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
6225                .unwrap();
6226
6227            let mapping_addr =
6228                map_memory(locked, &current_task, UserAddress::default(), 2 * *PAGE_SIZE);
6229
6230            let second_page = (mapping_addr + *PAGE_SIZE).unwrap();
6231            current_task.mm().unwrap().unmap(second_page, *PAGE_SIZE as usize).unwrap();
6232
6233            // This should fail with ENOMEM since it extends past the end of the mapping into unmapped memory.
6234            assert_eq!(
6235                sys_prctl(
6236                    locked,
6237                    &mut current_task,
6238                    PR_SET_VMA,
6239                    PR_SET_VMA_ANON_NAME as u64,
6240                    mapping_addr.ptr() as u64,
6241                    2 * *PAGE_SIZE,
6242                    name_addr.ptr() as u64,
6243                ),
6244                error!(ENOMEM)
6245            );
6246
6247            // Despite returning an error, the prctl should still assign a name to the region at the start of the region.
6248            {
6249                let mm = current_task.mm().unwrap();
6250                let state = mm.state.read();
6251
6252                let (_, mapping) = state.mappings.get(mapping_addr).unwrap();
6253                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
6254            }
6255        })
6256        .await;
6257    }
6258
6259    #[::fuchsia::test]
6260    async fn test_set_vma_name_before_start() {
6261        spawn_kernel_and_run(async |locked, mut current_task| {
6262            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
6263            current_task
6264                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
6265                .unwrap();
6266
6267            let mapping_addr =
6268                map_memory(locked, &current_task, UserAddress::default(), 2 * *PAGE_SIZE);
6269
6270            let second_page = (mapping_addr + *PAGE_SIZE).unwrap();
6271            current_task.mm().unwrap().unmap(mapping_addr, *PAGE_SIZE as usize).unwrap();
6272
6273            // This should fail with ENOMEM since the start of the range is in unmapped memory.
6274            assert_eq!(
6275                sys_prctl(
6276                    locked,
6277                    &mut current_task,
6278                    PR_SET_VMA,
6279                    PR_SET_VMA_ANON_NAME as u64,
6280                    mapping_addr.ptr() as u64,
6281                    2 * *PAGE_SIZE,
6282                    name_addr.ptr() as u64,
6283                ),
6284                error!(ENOMEM)
6285            );
6286
6287            // Unlike a range which starts within a mapping and extends past the end, this should not assign
6288            // a name to any mappings.
6289            {
6290                let mm = current_task.mm().unwrap();
6291                let state = mm.state.read();
6292
6293                let (_, mapping) = state.mappings.get(second_page).unwrap();
6294                assert_eq!(mapping.name(), MappingName::None);
6295            }
6296        })
6297        .await;
6298    }
6299
6300    #[::fuchsia::test]
6301    async fn test_set_vma_name_partial() {
6302        spawn_kernel_and_run(async |locked, mut current_task| {
6303            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
6304            current_task
6305                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
6306                .unwrap();
6307
6308            let mapping_addr =
6309                map_memory(locked, &current_task, UserAddress::default(), 3 * *PAGE_SIZE);
6310
6311            assert_eq!(
6312                sys_prctl(
6313                    locked,
6314                    &mut current_task,
6315                    PR_SET_VMA,
6316                    PR_SET_VMA_ANON_NAME as u64,
6317                    (mapping_addr + *PAGE_SIZE).unwrap().ptr() as u64,
6318                    *PAGE_SIZE,
6319                    name_addr.ptr() as u64,
6320                ),
6321                Ok(starnix_syscalls::SUCCESS)
6322            );
6323
6324            // This should split the mapping into 3 pieces with the second piece having the name "foo"
6325            {
6326                let mm = current_task.mm().unwrap();
6327                let state = mm.state.read();
6328
6329                let (_, mapping) = state.mappings.get(mapping_addr).unwrap();
6330                assert_eq!(mapping.name(), MappingName::None);
6331
6332                let (_, mapping) =
6333                    state.mappings.get((mapping_addr + *PAGE_SIZE).unwrap()).unwrap();
6334                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
6335
6336                let (_, mapping) =
6337                    state.mappings.get((mapping_addr + (2 * *PAGE_SIZE)).unwrap()).unwrap();
6338                assert_eq!(mapping.name(), MappingName::None);
6339            }
6340        })
6341        .await;
6342    }
6343
6344    #[::fuchsia::test]
6345    async fn test_preserve_name_snapshot() {
6346        spawn_kernel_and_run(async |locked, mut current_task| {
6347            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
6348            current_task
6349                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
6350                .unwrap();
6351
6352            let mapping_addr =
6353                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
6354
6355            assert_eq!(
6356                sys_prctl(
6357                    locked,
6358                    &mut current_task,
6359                    PR_SET_VMA,
6360                    PR_SET_VMA_ANON_NAME as u64,
6361                    mapping_addr.ptr() as u64,
6362                    *PAGE_SIZE,
6363                    name_addr.ptr() as u64,
6364                ),
6365                Ok(starnix_syscalls::SUCCESS)
6366            );
6367
6368            let target = current_task.clone_task_for_test(locked, 0, None);
6369
6370            {
6371                let mm = target.mm().unwrap();
6372                let state = mm.state.read();
6373
6374                let (_, mapping) = state.mappings.get(mapping_addr).unwrap();
6375                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
6376            }
6377        })
6378        .await;
6379    }
6380}