Skip to main content

starnix_core/mm/
memory_manager.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::barrier::{BarrierType, system_barrier};
6use crate::mm::mapping::MappingBackingMemory;
7use crate::mm::memory::MemoryObject;
8use crate::mm::memory_accessor::{MemoryAccessor, TaskMemoryAccessor};
9use crate::mm::private_anonymous_memory_manager::PrivateAnonymousMemoryManager;
10use crate::mm::{
11    FaultRegisterMode, FutexTable, InflightVmsplicedPayloads, MapInfoCache, Mapping,
12    MappingBacking, MappingFlags, MappingMode, MappingName, MappingNameRef, MlockPinFlavor,
13    PrivateFutexKey, ProtectionFlags, UserFault, VMEX_RESOURCE, VmsplicePayload,
14    VmsplicePayloadSegment, read_to_array,
15};
16use crate::security;
17use crate::signals::{SignalDetail, SignalInfo};
18use crate::task::{CurrentTask, ExceptionResult, PageFaultExceptionReport, Task};
19use crate::vfs::aio::AioContext;
20use crate::vfs::pseudo::dynamic_file::{
21    DynamicFile, DynamicFileBuf, DynamicFileSource, SequenceFileSource,
22};
23use crate::vfs::{FsString, NamespaceNode};
24use anyhow::{Error, anyhow};
25use bitflags::bitflags;
26use flyweights::FlyByteStr;
27use linux_uapi::BUS_ADRERR;
28use memory_pinning::PinnedMapping;
29use range_map::RangeMap;
30use smallvec::SmallVec;
31use starnix_ext::map_ext::EntryExt;
32use starnix_lifecycle::DropNotifier;
33use starnix_logging::{
34    CATEGORY_STARNIX_MM, impossible_error, log_error, log_warn, trace_duration, track_stub,
35};
36use starnix_sync::{
37    LockBefore, Locked, MmDumpable, OrderedMutex, RwLock, RwLockWriteGuard, ThreadGroupLimits,
38    Unlocked, UserFaultInner,
39};
40use starnix_types::arch::ArchWidth;
41use starnix_types::futex_address::FutexAddress;
42use starnix_types::math::{round_down_to_system_page_size, round_up_to_system_page_size};
43use starnix_types::user_buffer::{UserBuffer, UserBuffers};
44use starnix_uapi::auth::CAP_IPC_LOCK;
45use starnix_uapi::errors::Errno;
46use starnix_uapi::file_mode::Access;
47use starnix_uapi::range_ext::RangeExt;
48use starnix_uapi::resource_limits::Resource;
49use starnix_uapi::restricted_aspace::{
50    RESTRICTED_ASPACE_BASE, RESTRICTED_ASPACE_HIGHEST_ADDRESS, RESTRICTED_ASPACE_RANGE,
51    RESTRICTED_ASPACE_SIZE,
52};
53use starnix_uapi::signals::{SIGBUS, SIGSEGV};
54use starnix_uapi::user_address::{ArchSpecific, UserAddress};
55use starnix_uapi::{
56    MADV_COLD, MADV_COLLAPSE, MADV_DODUMP, MADV_DOFORK, MADV_DONTDUMP, MADV_DONTFORK,
57    MADV_DONTNEED, MADV_DONTNEED_LOCKED, MADV_FREE, MADV_HUGEPAGE, MADV_HWPOISON, MADV_KEEPONFORK,
58    MADV_MERGEABLE, MADV_NOHUGEPAGE, MADV_NORMAL, MADV_PAGEOUT, MADV_POPULATE_READ, MADV_RANDOM,
59    MADV_REMOVE, MADV_SEQUENTIAL, MADV_SOFT_OFFLINE, MADV_UNMERGEABLE, MADV_WILLNEED,
60    MADV_WIPEONFORK, MREMAP_DONTUNMAP, MREMAP_FIXED, MREMAP_MAYMOVE, errno, error,
61    from_status_like_fdio,
62};
63use std::collections::HashMap;
64use std::mem::MaybeUninit;
65use std::ops::{ControlFlow, Deref, DerefMut, Range, RangeBounds};
66use std::sync::{Arc, LazyLock, Weak};
67use syncio::zxio::zxio_default_maybe_faultable_copy;
68use zerocopy::IntoBytes;
69use zx::{Rights, VmoChildOptions};
70
71pub const ZX_VM_SPECIFIC_OVERWRITE: zx::VmarFlags =
72    zx::VmarFlags::from_bits_retain(zx::VmarFlagsExtended::SPECIFIC_OVERWRITE.bits());
73
74// We do not create shared processes in unit tests.
75pub(crate) const UNIFIED_ASPACES_ENABLED: bool = cfg!(not(test));
76
77/// Initializes the usercopy utilities.
78///
79/// It is useful to explicitly call this so that the usercopy is initialized
80/// at a known instant. For example, Starnix may want to make sure the usercopy
81/// thread created to support user copying is associated to the Starnix process
82/// and not a restricted-mode process.
83pub fn init_usercopy() {
84    // This call lazily initializes the `Usercopy` instance.
85    let _ = usercopy();
86}
87
88thread_local! {
89    /// The last mapping generation seen by this thread.
90    /// Used to prevent infinite loops in page fault handling.
91    static LAST_SEEN_MAPPING_GENERATION: std::cell::Cell<u64> = const { std::cell::Cell::new(0) };
92}
93
94pub const GUARD_PAGE_COUNT_FOR_GROWSDOWN_MAPPINGS: usize = 256;
95
96#[cfg(target_arch = "x86_64")]
97const ASLR_RANDOM_BITS: usize = 27;
98
99#[cfg(target_arch = "aarch64")]
100const ASLR_RANDOM_BITS: usize = 28;
101
102#[cfg(target_arch = "riscv64")]
103const ASLR_RANDOM_BITS: usize = 18;
104
105/// Number of bits of entropy for processes running in 32 bits mode.
106const ASLR_32_RANDOM_BITS: usize = 8;
107
108// The biggest we expect stack to be; increase as needed
109// TODO(https://fxbug.dev/322874791): Once setting RLIMIT_STACK is implemented, we should use it.
110const MAX_STACK_SIZE: usize = 512 * 1024 * 1024;
111
112// Value to report temporarily as the VM RSS HWM.
113// TODO(https://fxbug.dev/396221597): Need support from the kernel to track the committed bytes high
114// water mark.
115const STUB_VM_RSS_HWM: usize = 2 * 1024 * 1024;
116
117fn usercopy() -> Option<&'static usercopy::Usercopy> {
118    static USERCOPY: LazyLock<Option<usercopy::Usercopy>> = LazyLock::new(|| {
119        // We do not create shared processes in unit tests.
120        if UNIFIED_ASPACES_ENABLED {
121            // ASUMPTION: All Starnix managed Linux processes have the same
122            // restricted mode address range.
123            Some(usercopy::Usercopy::new(RESTRICTED_ASPACE_RANGE).unwrap())
124        } else {
125            None
126        }
127    });
128
129    LazyLock::force(&USERCOPY).as_ref()
130}
131
132/// Provides an implementation for zxio's `zxio_maybe_faultable_copy` that supports
133/// catching faults.
134///
135/// See zxio's `zxio_maybe_faultable_copy` documentation for more details.
136///
137/// # Safety
138///
139/// Only one of `src`/`dest` may be an address to a buffer owned by user/restricted-mode
140/// (`ret_dest` indicates whether the user-owned buffer is `dest` when `true`).
141/// The other must be a valid Starnix/normal-mode buffer that will never cause a fault
142/// when the first `count` bytes are read/written.
143#[unsafe(no_mangle)]
144pub unsafe fn zxio_maybe_faultable_copy_impl(
145    dest: *mut u8,
146    src: *const u8,
147    count: usize,
148    ret_dest: bool,
149) -> bool {
150    if let Some(usercopy) = usercopy() {
151        #[allow(clippy::undocumented_unsafe_blocks, reason = "2024 edition migration")]
152        let ret = unsafe { usercopy.raw_hermetic_copy(dest, src, count, ret_dest) };
153        ret == count
154    } else {
155        #[allow(clippy::undocumented_unsafe_blocks, reason = "2024 edition migration")]
156        unsafe {
157            zxio_default_maybe_faultable_copy(dest, src, count, ret_dest)
158        }
159    }
160}
161
162pub static PAGE_SIZE: LazyLock<u64> = LazyLock::new(|| zx::system_get_page_size() as u64);
163
164bitflags! {
165    #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
166    pub struct MappingOptions: u16 {
167      const SHARED      = 1 << 0;
168      const ANONYMOUS   = 1 << 1;
169      const LOWER_32BIT = 1 << 2;
170      const GROWSDOWN   = 1 << 3;
171      const ELF_BINARY  = 1 << 4;
172      const DONTFORK    = 1 << 5;
173      const WIPEONFORK  = 1 << 6;
174      const DONT_SPLIT  = 1 << 7;
175      const DONT_EXPAND = 1 << 8;
176      const POPULATE    = 1 << 9;
177    }
178}
179
180bitflags! {
181    #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
182    pub struct MremapFlags: u32 {
183        const MAYMOVE = MREMAP_MAYMOVE;
184        const FIXED = MREMAP_FIXED;
185        const DONTUNMAP = MREMAP_DONTUNMAP;
186    }
187}
188
189bitflags! {
190    #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
191    pub struct MsyncFlags: u32 {
192        const ASYNC = starnix_uapi::MS_ASYNC;
193        const INVALIDATE = starnix_uapi::MS_INVALIDATE;
194        const SYNC = starnix_uapi::MS_SYNC;
195    }
196}
197
198const PROGRAM_BREAK_LIMIT: u64 = 64 * 1024 * 1024;
199
200#[derive(Debug, Clone, Eq, PartialEq)]
201struct ProgramBreak {
202    // These base address at which the data segment is mapped.
203    base: UserAddress,
204
205    // The current program break.
206    //
207    // The addresses from [base, current.round_up(*PAGE_SIZE)) are mapped into the
208    // client address space from the underlying |memory|.
209    current: UserAddress,
210}
211
212/// The policy about whether the address space can be dumped.
213#[derive(Debug, Clone, Copy, Eq, PartialEq)]
214pub enum DumpPolicy {
215    /// The address space cannot be dumped.
216    ///
217    /// Corresponds to SUID_DUMP_DISABLE.
218    Disable,
219
220    /// The address space can be dumped.
221    ///
222    /// Corresponds to SUID_DUMP_USER.
223    User,
224}
225
226// Supported types of membarriers.
227pub enum MembarrierType {
228    Memory,   // MEMBARRIER_CMD_GLOBAL, etc
229    SyncCore, // MEMBARRIER_CMD_..._SYNC_CORE
230}
231
232// Tracks the types of membarriers this address space is registered to receive.
233#[derive(Default, Clone)]
234struct MembarrierRegistrations {
235    memory: bool,
236    sync_core: bool,
237}
238
239#[derive(Default)]
240struct Mappings {
241    /// The mappings record which object backs each address.
242    map: RangeMap<UserAddress, Mapping>,
243
244    /// Generation counter for mappings. Incremented on any modification to `mappings`.
245    ///
246    /// This is used to detect stale mappings in `handle_page_fault`.
247    generation: u64,
248
249    /// The cached sum of the lengths of all mapped ranges.
250    total_usage: usize,
251}
252
253impl Deref for Mappings {
254    type Target = RangeMap<UserAddress, Mapping>;
255
256    fn deref(&self) -> &Self::Target {
257        &self.map
258    }
259}
260
261impl Mappings {
262    pub fn insert(&mut self, range: std::ops::Range<UserAddress>, value: Mapping) -> Vec<Mapping> {
263        self.generation = self.generation.wrapping_add(1);
264        let range_len = range.end - range.start;
265        let removed_len: usize = self
266            .map
267            .range(range.clone())
268            .map(|(r, _)| {
269                let intersection = r.intersect(&range);
270                intersection.end - intersection.start
271            })
272            .sum();
273        let removed = self.map.insert(range, value);
274        self.total_usage = self.total_usage.saturating_add(range_len).saturating_sub(removed_len);
275        removed
276    }
277
278    pub fn remove(&mut self, range: std::ops::Range<UserAddress>) -> Vec<Mapping> {
279        self.generation = self.generation.wrapping_add(1);
280        let removed_len: usize = self
281            .map
282            .range(range.clone())
283            .map(|(r, _)| {
284                let intersection = r.intersect(&range);
285                intersection.end - intersection.start
286            })
287            .sum();
288        let removed = self.map.remove(range);
289        self.total_usage = self.total_usage.saturating_sub(removed_len);
290        removed
291    }
292
293    pub fn append_non_overlapping(
294        &mut self,
295        range: std::ops::Range<UserAddress>,
296        value: Mapping,
297    ) -> bool {
298        self.generation = self.generation.wrapping_add(1);
299        let range_len = range.end - range.start;
300        if self.map.append_non_overlapping(range, value) {
301            self.total_usage = self.total_usage.saturating_add(range_len);
302            true
303        } else {
304            false
305        }
306    }
307
308    pub fn update_exact<F, E>(
309        &mut self,
310        range: &std::ops::Range<UserAddress>,
311        f: F,
312    ) -> Result<bool, E>
313    where
314        F: FnOnce(&mut Mapping) -> Result<(), E>,
315    {
316        self.generation = self.generation.wrapping_add(1);
317        self.map.update_exact(range, f)
318    }
319}
320
321pub struct MemoryManagerState {
322    /// The memory mappings currently used by this address space.
323    mappings: Mappings,
324
325    /// UserFaults registered with this memory manager.
326    userfaultfds: Vec<Weak<UserFault>>,
327
328    /// Shadow mappings for mlock()'d pages.
329    ///
330    /// Used for MlockPinFlavor::ShadowProcess to keep track of when we need to unmap
331    /// memory from the shadow process.
332    shadow_mappings_for_mlock: RangeMap<UserAddress, Arc<PinnedMapping>>,
333
334    forkable_state: MemoryManagerForkableState,
335}
336
337// 64k under the 4GB
338const LOWER_4GB_LIMIT: UserAddress = UserAddress::const_from(0xffff_0000);
339
340#[derive(Default, Clone)]
341pub struct MemoryManagerForkableState {
342    /// State for the brk and sbrk syscalls.
343    brk: Option<ProgramBreak>,
344
345    /// The namespace node that represents the executable associated with this task.
346    executable_node: Option<NamespaceNode>,
347
348    pub stack_size: usize,
349    pub stack_start: UserAddress,
350    pub auxv_start: UserAddress,
351    pub auxv_end: UserAddress,
352    pub argv_start: UserAddress,
353    pub argv_end: UserAddress,
354    pub environ_start: UserAddress,
355    pub environ_end: UserAddress,
356
357    /// vDSO location
358    pub vdso_base: UserAddress,
359
360    /// Randomized regions:
361    pub mmap_top: UserAddress,
362    pub stack_origin: UserAddress,
363    pub brk_origin: UserAddress,
364
365    // Membarrier registrations
366    membarrier_registrations: MembarrierRegistrations,
367}
368
369impl Deref for MemoryManagerState {
370    type Target = MemoryManagerForkableState;
371    fn deref(&self) -> &Self::Target {
372        &self.forkable_state
373    }
374}
375
376impl DerefMut for MemoryManagerState {
377    fn deref_mut(&mut self) -> &mut Self::Target {
378        &mut self.forkable_state
379    }
380}
381
382#[derive(Debug, Default)]
383struct ReleasedMappings {
384    doomed: Vec<Mapping>,
385    doomed_pins: Vec<Arc<PinnedMapping>>,
386}
387
388impl ReleasedMappings {
389    fn extend(&mut self, mappings: impl IntoIterator<Item = Mapping>) {
390        self.doomed.extend(mappings);
391    }
392
393    fn extend_pins(&mut self, mappings: impl IntoIterator<Item = Arc<PinnedMapping>>) {
394        self.doomed_pins.extend(mappings);
395    }
396
397    fn is_empty(&self) -> bool {
398        self.doomed.is_empty() && self.doomed_pins.is_empty()
399    }
400
401    #[cfg(test)]
402    fn len(&self) -> usize {
403        self.doomed.len() + self.doomed_pins.len()
404    }
405
406    fn finalize(&mut self, mm_state: RwLockWriteGuard<'_, MemoryManagerState>) {
407        // Drop the state before the unmapped mappings, since dropping a mapping may acquire a lock
408        // in `DirEntry`'s `drop`.
409        std::mem::drop(mm_state);
410        std::mem::take(&mut self.doomed);
411        std::mem::take(&mut self.doomed_pins);
412    }
413}
414
415impl Drop for ReleasedMappings {
416    fn drop(&mut self) {
417        assert!(self.is_empty(), "ReleasedMappings::finalize() must be called before drop");
418    }
419}
420
421fn map_in_vmar(
422    vmar: &zx::Vmar,
423    vmar_info: &zx::VmarInfo,
424    addr: SelectedAddress,
425    memory: &MemoryObject,
426    memory_offset: u64,
427    length: usize,
428    flags: MappingFlags,
429    populate: bool,
430) -> Result<(), Errno> {
431    let vmar_offset = addr.addr().checked_sub(vmar_info.base).ok_or_else(|| errno!(ENOMEM))?;
432    let vmar_extra_flags = match addr {
433        SelectedAddress::Fixed(_) => zx::VmarFlags::SPECIFIC,
434        SelectedAddress::FixedOverwrite(_) => ZX_VM_SPECIFIC_OVERWRITE,
435    };
436
437    if populate {
438        let op = if flags.contains(MappingFlags::WRITE) {
439            // Requires ZX_RIGHT_WRITEABLE which we should expect when the mapping is writeable.
440            zx::VmoOp::COMMIT
441        } else {
442            // When we don't expect to have ZX_RIGHT_WRITEABLE, fall back to a VMO op that doesn't
443            // need it.
444            zx::VmoOp::PREFETCH
445        };
446        trace_duration!(CATEGORY_STARNIX_MM, "MmapCommitPages");
447        let _ = memory.op_range(op, memory_offset, length as u64);
448        // "The mmap() call doesn't fail if the mapping cannot be populated."
449    }
450
451    let vmar_maybe_map_range = if populate && !vmar_extra_flags.contains(ZX_VM_SPECIFIC_OVERWRITE) {
452        zx::VmarFlags::MAP_RANGE
453    } else {
454        zx::VmarFlags::empty()
455    };
456    let vmar_flags = flags.access_flags().to_vmar_flags()
457        | zx::VmarFlags::ALLOW_FAULTS
458        | vmar_extra_flags
459        | vmar_maybe_map_range;
460
461    let map_result = memory.map_in_vmar(vmar, vmar_offset.ptr(), memory_offset, length, vmar_flags);
462    let mapped_addr = map_result.map_err(MemoryManager::get_errno_for_map_err)?;
463
464    let expected_addr = addr.addr().ptr();
465    debug_assert_eq!(
466        mapped_addr, expected_addr,
467        "Zircon mapped to a different address than requested!"
468    );
469
470    Ok(())
471}
472
473impl MemoryManagerState {
474    /// Returns occupied address ranges that intersect with the given range.
475    ///
476    /// An address range is "occupied" if (a) there is already a mapping in that range or (b) there
477    /// is a GROWSDOWN mapping <= 256 pages above that range. The 256 pages below a GROWSDOWN
478    /// mapping is the "guard region." The memory manager avoids mapping memory in the guard region
479    /// in some circumstances to preserve space for the GROWSDOWN mapping to grow down.
480    fn get_occupied_address_ranges<'a>(
481        &'a self,
482        subrange: &'a Range<UserAddress>,
483    ) -> impl Iterator<Item = Range<UserAddress>> + 'a {
484        let query_range = subrange.start
485            ..(subrange
486                .end
487                .saturating_add(*PAGE_SIZE as usize * GUARD_PAGE_COUNT_FOR_GROWSDOWN_MAPPINGS));
488        self.mappings.range(query_range).filter_map(|(range, mapping)| {
489            let occupied_range = mapping.inflate_to_include_guard_pages(range);
490            if occupied_range.start < subrange.end && subrange.start < occupied_range.end {
491                Some(occupied_range)
492            } else {
493                None
494            }
495        })
496    }
497
498    fn count_possible_placements(
499        &self,
500        length: usize,
501        subrange: &Range<UserAddress>,
502    ) -> Option<usize> {
503        let mut occupied_ranges = self.get_occupied_address_ranges(subrange);
504        let mut possible_placements = 0;
505        // If the allocation is placed at the first available address, every page that is left
506        // before the next mapping or the end of subrange is +1 potential placement.
507        let mut first_fill_end = subrange.start.checked_add(length)?;
508        while first_fill_end <= subrange.end {
509            let Some(mapping) = occupied_ranges.next() else {
510                possible_placements += (subrange.end - first_fill_end) / (*PAGE_SIZE as usize) + 1;
511                break;
512            };
513            if mapping.start >= first_fill_end {
514                possible_placements += (mapping.start - first_fill_end) / (*PAGE_SIZE as usize) + 1;
515            }
516            first_fill_end = mapping.end.checked_add(length)?;
517        }
518        Some(possible_placements)
519    }
520
521    fn pick_placement(
522        &self,
523        length: usize,
524        mut chosen_placement_idx: usize,
525        subrange: &Range<UserAddress>,
526    ) -> Option<UserAddress> {
527        let mut candidate =
528            Range { start: subrange.start, end: subrange.start.checked_add(length)? };
529        let mut occupied_ranges = self.get_occupied_address_ranges(subrange);
530        loop {
531            let Some(mapping) = occupied_ranges.next() else {
532                // No more mappings: treat the rest of the index as an offset.
533                let res =
534                    candidate.start.checked_add(chosen_placement_idx * *PAGE_SIZE as usize)?;
535                debug_assert!(res.checked_add(length)? <= subrange.end);
536                return Some(res);
537            };
538            if mapping.start < candidate.end {
539                // doesn't fit, skip
540                candidate = Range { start: mapping.end, end: mapping.end.checked_add(length)? };
541                continue;
542            }
543            let unused_space =
544                (mapping.start.ptr() - candidate.end.ptr()) / (*PAGE_SIZE as usize) + 1;
545            if unused_space > chosen_placement_idx {
546                // Chosen placement is within the range; treat the rest of the index as an offset.
547                let res =
548                    candidate.start.checked_add(chosen_placement_idx * *PAGE_SIZE as usize)?;
549                return Some(res);
550            }
551
552            // chosen address is further up, skip
553            chosen_placement_idx -= unused_space;
554            candidate = Range { start: mapping.end, end: mapping.end.checked_add(length)? };
555        }
556    }
557
558    fn find_random_unused_range(
559        &self,
560        length: usize,
561        subrange: &Range<UserAddress>,
562    ) -> Option<UserAddress> {
563        let possible_placements = self.count_possible_placements(length, subrange)?;
564        if possible_placements == 0 {
565            return None;
566        }
567        let chosen_placement_idx = rand::random_range(0..possible_placements);
568        self.pick_placement(length, chosen_placement_idx, subrange)
569    }
570
571    // Find the first unused range of addresses that fits a mapping of `length` bytes, searching
572    // from `mmap_top` downwards.
573    pub fn find_next_unused_range(&self, length: usize) -> Option<UserAddress> {
574        let gap_size = length as u64;
575        let mut upper_bound = self.mmap_top;
576
577        loop {
578            let gap_end = self.mappings.find_gap_end(gap_size, &upper_bound);
579            let candidate = gap_end.checked_sub(length)?;
580
581            // Is there a next mapping? If not, the candidate is already good.
582            let Some((occupied_range, mapping)) = self.mappings.get(gap_end) else {
583                return Some(candidate);
584            };
585            let occupied_range = mapping.inflate_to_include_guard_pages(occupied_range);
586            // If it doesn't overlap, the gap is big enough to fit.
587            if occupied_range.start >= gap_end {
588                return Some(candidate);
589            }
590            // If there was a mapping in the way, use the start of that range as the upper bound.
591            upper_bound = occupied_range.start;
592        }
593    }
594
595    // Accept the hint if the range is unused and within the range available for mapping.
596    fn is_hint_acceptable(&self, hint_addr: UserAddress, length: usize) -> bool {
597        let Some(hint_end) = hint_addr.checked_add(length) else {
598            return false;
599        };
600        if !RESTRICTED_ASPACE_RANGE.contains(&hint_addr.ptr())
601            || !RESTRICTED_ASPACE_RANGE.contains(&hint_end.ptr())
602        {
603            return false;
604        };
605        self.get_occupied_address_ranges(&(hint_addr..hint_end)).next().is_none()
606    }
607
608    fn select_address(
609        &self,
610        addr: DesiredAddress,
611        length: usize,
612        flags: MappingFlags,
613    ) -> Result<SelectedAddress, Errno> {
614        let adjusted_length = round_up_to_system_page_size(length).or_else(|_| error!(ENOMEM))?;
615
616        let find_address = || -> Result<SelectedAddress, Errno> {
617            let new_addr = if flags.contains(MappingFlags::LOWER_32BIT) {
618                // MAP_32BIT specifies that the memory allocated will
619                // be within the first 2 GB of the process address space.
620                self.find_random_unused_range(
621                    adjusted_length,
622                    &(UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
623                        ..UserAddress::from_ptr(0x80000000)),
624                )
625                .ok_or_else(|| errno!(ENOMEM))?
626            } else {
627                self.find_next_unused_range(adjusted_length).ok_or_else(|| errno!(ENOMEM))?
628            };
629
630            Ok(SelectedAddress::Fixed(new_addr))
631        };
632
633        Ok(match addr {
634            DesiredAddress::Any => find_address()?,
635            DesiredAddress::Hint(hint_addr) => {
636                // Round down to page size
637                let hint_addr =
638                    UserAddress::from_ptr(hint_addr.ptr() - hint_addr.ptr() % *PAGE_SIZE as usize);
639                if self.is_hint_acceptable(hint_addr, adjusted_length) {
640                    SelectedAddress::Fixed(hint_addr)
641                } else {
642                    find_address()?
643                }
644            }
645            DesiredAddress::Fixed(addr) => SelectedAddress::Fixed(addr),
646            DesiredAddress::FixedOverwrite(addr) => SelectedAddress::FixedOverwrite(addr),
647        })
648    }
649
650    fn validate_addr(&self, addr: DesiredAddress, length: usize) -> Result<(), Errno> {
651        if length > RESTRICTED_ASPACE_SIZE {
652            return error!(ENOMEM);
653        }
654        match addr {
655            DesiredAddress::Fixed(a) | DesiredAddress::FixedOverwrite(a) => {
656                let end = a.checked_add(length).ok_or_else(|| errno!(ENOMEM))?;
657                if end > UserAddress::from_ptr(RESTRICTED_ASPACE_HIGHEST_ADDRESS as usize) {
658                    return error!(ENOMEM);
659                }
660                if self.check_has_unauthorized_splits(a, length) {
661                    return error!(ENOMEM);
662                }
663            }
664            _ => {}
665        }
666        Ok(())
667    }
668
669    fn add_memory_mapping(
670        &mut self,
671        mm: &Arc<MemoryManager>,
672        addr: DesiredAddress,
673        memory: Arc<MemoryObject>,
674        memory_offset: u64,
675        length: usize,
676        flags: MappingFlags,
677        max_access: Access,
678        populate: bool,
679        name: MappingName,
680        mapping_mode: MappingMode,
681        released_mappings: &mut ReleasedMappings,
682    ) -> Result<UserAddress, Errno> {
683        self.validate_addr(addr, length)?;
684
685        let selected_address = self.select_address(addr, length, flags)?;
686        let mapped_addr = selected_address.addr();
687        if mapping_mode == MappingMode::Eager {
688            mm.mapping_context.map_in_user_vmar(
689                selected_address,
690                &memory,
691                memory_offset,
692                length,
693                flags,
694                populate,
695            )?;
696        }
697
698        let end = (mapped_addr + length)?.round_up(*PAGE_SIZE)?;
699
700        if let DesiredAddress::FixedOverwrite(addr) = addr {
701            assert_eq!(addr, mapped_addr);
702            self.update_after_unmap(mm, addr, end - addr, released_mappings)?;
703        }
704
705        let mapping = Mapping::with_name(
706            self.create_memory_backing(mapped_addr, memory, memory_offset),
707            flags,
708            max_access,
709            name,
710            mapping_mode,
711        );
712        released_mappings.extend(self.mappings.insert(mapped_addr..end, mapping));
713
714        Ok(mapped_addr)
715    }
716
717    fn map_private_anonymous(
718        &mut self,
719        mm: &Arc<MemoryManager>,
720        addr: DesiredAddress,
721        length: usize,
722        prot_flags: ProtectionFlags,
723        options: MappingOptions,
724        populate: bool,
725        name: MappingName,
726        released_mappings: &mut ReleasedMappings,
727    ) -> Result<UserAddress, Errno> {
728        self.validate_addr(addr, length)?;
729
730        let flags = MappingFlags::from_access_flags_and_options(prot_flags, options);
731        let selected_addr = self.select_address(addr, length, flags)?;
732        let mapped_addr = selected_addr.addr();
733        let backing_memory_offset = selected_addr.addr().ptr();
734
735        mm.mapping_context.map_in_user_vmar(
736            selected_addr,
737            &mm.mapping_context.private_anonymous.backing,
738            backing_memory_offset as u64,
739            length,
740            flags,
741            populate,
742        )?;
743
744        let end = (mapped_addr + length)?.round_up(*PAGE_SIZE)?;
745        if let DesiredAddress::FixedOverwrite(addr) = addr {
746            assert_eq!(addr, mapped_addr);
747            self.update_after_unmap(mm, addr, end - addr, released_mappings)?;
748        }
749
750        let mapping = Mapping::new_private_anonymous(flags, name, MappingMode::Eager);
751        released_mappings.extend(self.mappings.insert(mapped_addr..end, mapping));
752
753        Ok(mapped_addr)
754    }
755
756    fn map_anonymous(
757        &mut self,
758        mm: &Arc<MemoryManager>,
759        addr: DesiredAddress,
760        length: usize,
761        prot_flags: ProtectionFlags,
762        options: MappingOptions,
763        name: MappingName,
764        released_mappings: &mut ReleasedMappings,
765    ) -> Result<UserAddress, Errno> {
766        if !options.contains(MappingOptions::SHARED) {
767            return self.map_private_anonymous(
768                mm,
769                addr,
770                length,
771                prot_flags,
772                options,
773                options.contains(MappingOptions::POPULATE),
774                name,
775                released_mappings,
776            );
777        }
778        let memory = create_anonymous_mapping_memory(length as u64)?;
779        let flags = MappingFlags::from_access_flags_and_options(prot_flags, options);
780        self.add_memory_mapping(
781            mm,
782            addr,
783            memory,
784            0,
785            length,
786            flags,
787            Access::rwx(),
788            options.contains(MappingOptions::POPULATE),
789            name,
790            MappingMode::Eager,
791            released_mappings,
792        )
793    }
794
795    fn ensure_range_mapped_in_user_vmar(
796        &mut self,
797        addr: UserAddress,
798        length: Option<usize>,
799        context: &MappingContext,
800    ) -> Result<bool, Errno> {
801        self.ensure_ranges_mapped_in_user_vmar(std::iter::once((addr, length)), context)
802    }
803
804    fn ensure_ranges_mapped_in_user_vmar<I>(
805        &mut self,
806        ranges: I,
807        context: &MappingContext,
808    ) -> Result<bool, Errno>
809    where
810        I: IntoIterator<Item = (UserAddress, Option<usize>)>,
811    {
812        // This is most likely to contain one range, so use `SmallVec` to avoid
813        // heap allocation and better performance in the common case.
814        let mut ranges_to_update = SmallVec::<[std::ops::Range<UserAddress>; 1]>::new();
815        for (addr, length) in ranges {
816            match length {
817                None => {
818                    if let Some((range, mapping)) = self.mappings.get(addr) {
819                        if mapping.mapping_mode() == MappingMode::Lazy {
820                            ranges_to_update.push(range.clone());
821                        }
822                    }
823                }
824                Some(len) => {
825                    assert!(len > 0);
826                    let end = addr.checked_add(len).expect("address overflowed after validation");
827                    for (range, mapping) in self.mappings.range(addr..end) {
828                        if mapping.mapping_mode() == MappingMode::Lazy {
829                            ranges_to_update.push(range.clone());
830                        }
831                    }
832                }
833            }
834        }
835
836        if ranges_to_update.is_empty() {
837            return Ok(false);
838        }
839
840        for range in ranges_to_update {
841            let updated = self.mappings.update_exact(&range, |mapping| {
842                let addr = SelectedAddress::FixedOverwrite(range.start);
843                let flags = mapping.flags();
844                let (backing, backing_memory_offset) = match mapping.get_backing_internal() {
845                    MappingBacking::Memory(backing) => {
846                        (backing.memory(), backing.address_to_offset(addr.addr()))
847                    }
848                    MappingBacking::PrivateAnonymous => {
849                        (&context.private_anonymous.backing, addr.addr().ptr() as u64)
850                    }
851                };
852
853                let mapping_length = range.end - range.start;
854                context.map_in_user_vmar(
855                    addr,
856                    backing,
857                    backing_memory_offset,
858                    mapping_length,
859                    flags,
860                    false,
861                )?;
862
863                mapping.set_mapping_mode(MappingMode::Eager);
864                Ok(())
865            })?;
866            assert!(updated, "Expected to update exactly one mapping");
867        }
868
869        Ok(true)
870    }
871
872    fn remap(
873        &mut self,
874        _current_task: &CurrentTask,
875        mm: &Arc<MemoryManager>,
876        old_addr: UserAddress,
877        old_length: usize,
878        new_length: usize,
879        flags: MremapFlags,
880        new_addr: UserAddress,
881        released_mappings: &mut ReleasedMappings,
882    ) -> Result<UserAddress, Errno> {
883        // MREMAP_FIXED moves a mapping, which requires MREMAP_MAYMOVE.
884        if flags.contains(MremapFlags::FIXED) && !flags.contains(MremapFlags::MAYMOVE) {
885            return error!(EINVAL);
886        }
887
888        // MREMAP_DONTUNMAP is always a move, so it requires MREMAP_MAYMOVE.
889        // There is no resizing allowed either.
890        if flags.contains(MremapFlags::DONTUNMAP)
891            && (!flags.contains(MremapFlags::MAYMOVE) || old_length != new_length)
892        {
893            return error!(EINVAL);
894        }
895
896        // In-place copies are invalid.
897        if !flags.contains(MremapFlags::MAYMOVE) && old_length == 0 {
898            return error!(ENOMEM);
899        }
900
901        if new_length == 0 {
902            return error!(EINVAL);
903        }
904
905        // Make sure old_addr is page-aligned.
906        if !old_addr.is_aligned(*PAGE_SIZE) {
907            return error!(EINVAL);
908        }
909
910        let old_length = round_up_to_system_page_size(old_length)?;
911        let new_length = round_up_to_system_page_size(new_length)?;
912
913        if self.check_has_unauthorized_splits(old_addr, old_length) {
914            return error!(EINVAL);
915        }
916
917        if self.check_has_unauthorized_splits(new_addr, new_length) {
918            return error!(EINVAL);
919        }
920
921        if !flags.contains(MremapFlags::DONTUNMAP)
922            && !flags.contains(MremapFlags::FIXED)
923            && old_length != 0
924        {
925            // We are not requested to remap to a specific address, so first we see if we can remap
926            // in-place. In-place copies (old_length == 0) are not allowed.
927            if let Some(new_addr) =
928                self.try_remap_in_place(mm, old_addr, old_length, new_length, released_mappings)?
929            {
930                return Ok(new_addr);
931            }
932        }
933
934        // There is no space to grow in place, or there is an explicit request to move.
935        if flags.contains(MremapFlags::MAYMOVE) {
936            let dst_address =
937                if flags.contains(MremapFlags::FIXED) { Some(new_addr) } else { None };
938            self.remap_move(
939                mm,
940                old_addr,
941                old_length,
942                dst_address,
943                new_length,
944                flags.contains(MremapFlags::DONTUNMAP),
945                released_mappings,
946            )
947        } else {
948            error!(ENOMEM)
949        }
950    }
951
952    /// Attempts to grow or shrink the mapping in-place. Returns `Ok(Some(addr))` if the remap was
953    /// successful. Returns `Ok(None)` if there was no space to grow.
954    fn try_remap_in_place(
955        &mut self,
956        mm: &Arc<MemoryManager>,
957        old_addr: UserAddress,
958        old_length: usize,
959        new_length: usize,
960        released_mappings: &mut ReleasedMappings,
961    ) -> Result<Option<UserAddress>, Errno> {
962        let old_range = old_addr..old_addr.checked_add(old_length).ok_or_else(|| errno!(EINVAL))?;
963        let new_range_in_place =
964            old_addr..old_addr.checked_add(new_length).ok_or_else(|| errno!(EINVAL))?;
965
966        if new_length <= old_length {
967            // Shrink the mapping in-place, which should always succeed.
968            // This is done by unmapping the extraneous region.
969            if new_length != old_length {
970                self.unmap(mm, new_range_in_place.end, old_length - new_length, released_mappings)?;
971            }
972            return Ok(Some(old_addr));
973        }
974
975        if self.mappings.range(old_range.end..new_range_in_place.end).next().is_some() {
976            // There is some mapping in the growth range prevening an in-place growth.
977            return Ok(None);
978        }
979
980        // There is space to grow in-place. The old range must be one contiguous mapping.
981        let (original_range, mapping) =
982            self.mappings.get(old_addr).ok_or_else(|| errno!(EINVAL))?;
983
984        if old_range.end > original_range.end {
985            return error!(EFAULT);
986        }
987        let original_range = original_range.clone();
988        let original_mapping = mapping.clone();
989
990        // Compute the new length of the entire mapping once it has grown.
991        let final_length = (original_range.end - original_range.start) + (new_length - old_length);
992
993        match self.get_mapping_backing(&original_mapping) {
994            MappingBacking::Memory(backing) => {
995                // Re-map the original range, which may include pages before the requested range.
996                Ok(Some(self.add_memory_mapping(
997                    mm,
998                    DesiredAddress::FixedOverwrite(original_range.start),
999                    backing.memory().clone(),
1000                    backing.address_to_offset(original_range.start),
1001                    final_length,
1002                    original_mapping.flags(),
1003                    original_mapping.max_access(),
1004                    false,
1005                    original_mapping.name().to_owned(),
1006                    original_mapping.mapping_mode(),
1007                    released_mappings,
1008                )?))
1009            }
1010            MappingBacking::PrivateAnonymous => {
1011                let growth_start = original_range.end;
1012                let growth_length = new_length - old_length;
1013                let final_end = (original_range.start + final_length)?;
1014                // Map new pages to back the growth.
1015                mm.mapping_context.map_in_user_vmar(
1016                    SelectedAddress::FixedOverwrite(growth_start),
1017                    &mm.mapping_context.private_anonymous.backing,
1018                    growth_start.ptr() as u64,
1019                    growth_length,
1020                    original_mapping.flags(),
1021                    false,
1022                )?;
1023                // Overwrite the mapping entry with the new larger size.
1024                released_mappings.extend(
1025                    self.mappings.insert(original_range.start..final_end, original_mapping.clone()),
1026                );
1027                Ok(Some(original_range.start))
1028            }
1029        }
1030    }
1031
1032    /// Grows or shrinks the mapping while moving it to a new destination.
1033    fn remap_move(
1034        &mut self,
1035        mm: &Arc<MemoryManager>,
1036        src_addr: UserAddress,
1037        src_length: usize,
1038        dst_addr: Option<UserAddress>,
1039        dst_length: usize,
1040        keep_source: bool,
1041        released_mappings: &mut ReleasedMappings,
1042    ) -> Result<UserAddress, Errno> {
1043        let src_range = src_addr..src_addr.checked_add(src_length).ok_or_else(|| errno!(EINVAL))?;
1044        let (original_range, src_mapping) =
1045            self.mappings.get(src_addr).ok_or_else(|| errno!(EINVAL))?;
1046        let original_range = original_range.clone();
1047        let src_mapping = src_mapping.clone();
1048
1049        if src_length == 0 && !src_mapping.flags().contains(MappingFlags::SHARED) {
1050            // src_length == 0 means that the mapping is to be copied. This behavior is only valid
1051            // with MAP_SHARED mappings.
1052            return error!(EINVAL);
1053        }
1054
1055        // If the destination range is smaller than the source range, we must first shrink
1056        // the source range in place. This must be done now and visible to processes, even if
1057        // a later failure causes the remap operation to fail.
1058        if src_length != 0 && src_length > dst_length {
1059            self.unmap(mm, (src_addr + dst_length)?, src_length - dst_length, released_mappings)?;
1060        }
1061
1062        let dst_addr_for_map = match dst_addr {
1063            None => DesiredAddress::Any,
1064            Some(dst_addr) => {
1065                // The mapping is being moved to a specific address.
1066                let dst_range =
1067                    dst_addr..(dst_addr.checked_add(dst_length).ok_or_else(|| errno!(EINVAL))?);
1068                if !src_range.intersect(&dst_range).is_empty() {
1069                    return error!(EINVAL);
1070                }
1071
1072                // The destination range must be unmapped. This must be done now and visible to
1073                // processes, even if a later failure causes the remap operation to fail.
1074                self.unmap(mm, dst_addr, dst_length, released_mappings)?;
1075
1076                DesiredAddress::Fixed(dst_addr)
1077            }
1078        };
1079
1080        // According to gVisor's aio_test, Linux checks for DONT_EXPAND after unmapping the dst
1081        // range.
1082        if dst_length > src_length && src_mapping.flags().contains(MappingFlags::DONT_EXPAND) {
1083            return error!(EFAULT);
1084        }
1085
1086        if src_range.end > original_range.end {
1087            // The source range is not one contiguous mapping. This check must be done only after
1088            // the source range is shrunk and the destination unmapped.
1089            return error!(EFAULT);
1090        }
1091
1092        match self.get_mapping_backing(&src_mapping) {
1093            MappingBacking::PrivateAnonymous => {
1094                let dst_addr =
1095                    self.select_address(dst_addr_for_map, dst_length, src_mapping.flags())?.addr();
1096                let dst_end = (dst_addr + dst_length)?;
1097
1098                let length_to_move = std::cmp::min(dst_length, src_length) as u64;
1099                let growth_start_addr = (dst_addr + length_to_move)?;
1100
1101                if dst_addr != src_addr {
1102                    let src_move_end = (src_range.start + length_to_move)?;
1103                    let range_to_move = src_range.start..src_move_end;
1104                    // Move the previously mapped pages into their new location.
1105                    mm.mapping_context.private_anonymous.move_pages(&range_to_move, dst_addr)?;
1106                }
1107
1108                // Userfault registration is not preserved by remap
1109                let new_flags =
1110                    src_mapping.flags().difference(MappingFlags::UFFD | MappingFlags::UFFD_MISSING);
1111                if src_mapping.mapping_mode() == MappingMode::Eager {
1112                    mm.mapping_context.map_in_user_vmar(
1113                        SelectedAddress::FixedOverwrite(dst_addr),
1114                        &mm.mapping_context.private_anonymous.backing,
1115                        dst_addr.ptr() as u64,
1116                        dst_length,
1117                        new_flags,
1118                        false,
1119                    )?;
1120
1121                    if dst_length > src_length {
1122                        // The mapping has grown, map new pages in to cover the growth.
1123                        let growth_length = dst_length - src_length;
1124
1125                        self.map_private_anonymous(
1126                            mm,
1127                            DesiredAddress::FixedOverwrite(growth_start_addr),
1128                            growth_length,
1129                            new_flags.access_flags(),
1130                            new_flags.options(),
1131                            false,
1132                            src_mapping.name().to_owned(),
1133                            released_mappings,
1134                        )?;
1135                    }
1136                }
1137
1138                released_mappings.extend(self.mappings.insert(
1139                    dst_addr..dst_end,
1140                    Mapping::new_private_anonymous(
1141                        new_flags,
1142                        src_mapping.name().to_owned(),
1143                        src_mapping.mapping_mode(),
1144                    ),
1145                ));
1146
1147                if dst_addr != src_addr && src_length != 0 && !keep_source {
1148                    self.unmap(mm, src_addr, src_length, released_mappings)?;
1149                }
1150
1151                return Ok(dst_addr);
1152            }
1153            MappingBacking::Memory(backing) => {
1154                // This mapping is backed by an FD or is a shared anonymous mapping. Just map the
1155                // range of the memory object covering the moved pages. If the memory object already
1156                // had COW semantics, this preserves them.
1157                let (dst_memory_offset, memory) =
1158                    (backing.address_to_offset(src_addr), backing.memory().clone());
1159
1160                let new_address = self.add_memory_mapping(
1161                    mm,
1162                    dst_addr_for_map,
1163                    memory,
1164                    dst_memory_offset,
1165                    dst_length,
1166                    src_mapping.flags(),
1167                    src_mapping.max_access(),
1168                    false,
1169                    src_mapping.name().to_owned(),
1170                    src_mapping.mapping_mode(),
1171                    released_mappings,
1172                )?;
1173
1174                if src_length != 0 && !keep_source {
1175                    // Only unmap the source range if this is not a copy and if there was not a specific
1176                    // request to not unmap. It was checked earlier that in case of src_length == 0
1177                    // this mapping is MAP_SHARED.
1178                    self.unmap(mm, src_addr, src_length, released_mappings)?;
1179                }
1180
1181                return Ok(new_address);
1182            }
1183        };
1184    }
1185
1186    // Checks if an operation may be performed over the target mapping that may
1187    // result in a split mapping.
1188    //
1189    // An operation may be forbidden if the target mapping only partially covers
1190    // an existing mapping with the `MappingOptions::DONT_SPLIT` flag set.
1191    fn check_has_unauthorized_splits(&self, addr: UserAddress, length: usize) -> bool {
1192        let query_range = addr..addr.saturating_add(length);
1193        let mut intersection = self.mappings.range(query_range.clone());
1194
1195        // A mapping is not OK if it disallows splitting and the target range
1196        // does not fully cover the mapping range.
1197        let check_if_mapping_has_unauthorized_split =
1198            |mapping: Option<(&Range<UserAddress>, &Mapping)>| {
1199                mapping.is_some_and(|(mapping_range, mapping)| {
1200                    mapping.flags().contains(MappingFlags::DONT_SPLIT)
1201                        && (mapping_range.start < query_range.start
1202                            || query_range.end < mapping_range.end)
1203                })
1204            };
1205
1206        // We only check the first and last mappings in the range because naturally,
1207        // the mappings in the middle are fully covered by the target mapping and
1208        // won't be split.
1209        check_if_mapping_has_unauthorized_split(intersection.next())
1210            || check_if_mapping_has_unauthorized_split(intersection.next_back())
1211    }
1212
1213    /// Unmaps the specified range. Unmapped mappings are placed in `released_mappings`.
1214    fn unmap(
1215        &mut self,
1216        mm: &Arc<MemoryManager>,
1217        addr: UserAddress,
1218        length: usize,
1219        released_mappings: &mut ReleasedMappings,
1220    ) -> Result<(), Errno> {
1221        if !addr.is_aligned(*PAGE_SIZE) {
1222            return error!(EINVAL);
1223        }
1224        let length = round_up_to_system_page_size(length)?;
1225        if length == 0 {
1226            return error!(EINVAL);
1227        }
1228
1229        if self.check_has_unauthorized_splits(addr, length) {
1230            return error!(EINVAL);
1231        }
1232
1233        // Unmap the range, including the the tail of any range that would have been split. This
1234        // operation is safe because we're operating on another process.
1235        #[allow(
1236            clippy::undocumented_unsafe_blocks,
1237            reason = "Force documented unsafe blocks in Starnix"
1238        )]
1239        match unsafe { mm.mapping_context.user_vmar.unmap(addr.ptr(), length) } {
1240            Ok(_) => (),
1241            Err(zx::Status::NOT_FOUND) => (),
1242            Err(zx::Status::INVALID_ARGS) => return error!(EINVAL),
1243            Err(status) => {
1244                impossible_error(status);
1245            }
1246        };
1247
1248        self.update_after_unmap(mm, addr, length, released_mappings)?;
1249
1250        Ok(())
1251    }
1252
1253    // Updates `self.mappings` after the specified range was unmaped.
1254    //
1255    // The range to unmap can span multiple mappings, and can split mappings if
1256    // the range start or end falls in the middle of a mapping.
1257    //
1258    // Private anonymous memory is contained in the same memory object; The pages of that object
1259    // that are no longer reachable should be released.
1260    //
1261    // File-backed mappings don't need to have their memory object modified.
1262    //
1263    // Unmapped mappings are placed in `released_mappings`.
1264    fn update_after_unmap(
1265        &mut self,
1266        mm: &Arc<MemoryManager>,
1267        addr: UserAddress,
1268        length: usize,
1269        released_mappings: &mut ReleasedMappings,
1270    ) -> Result<(), Errno> {
1271        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
1272        let unmap_range = addr..end_addr;
1273
1274        // Remove any shadow mappings for mlock()'d pages that are now unmapped.
1275        released_mappings.extend_pins(self.shadow_mappings_for_mlock.remove(unmap_range.clone()));
1276
1277        for (range, mapping) in self.mappings.range(unmap_range.clone()) {
1278            // Deallocate any pages in the private, anonymous backing that are now unreachable.
1279            if let MappingBacking::PrivateAnonymous = self.get_mapping_backing(mapping) {
1280                let unmapped_range = &unmap_range.intersect(range);
1281
1282                mm.inflight_vmspliced_payloads.handle_unmapping(
1283                    &mm.mapping_context.private_anonymous.backing,
1284                    unmapped_range,
1285                )?;
1286
1287                mm.mapping_context
1288                    .private_anonymous
1289                    .zero(unmapped_range.start, unmapped_range.end - unmapped_range.start)?;
1290            }
1291        }
1292        released_mappings.extend(self.mappings.remove(unmap_range));
1293        return Ok(());
1294    }
1295
1296    fn protect(
1297        &mut self,
1298        current_task: &CurrentTask,
1299        addr: UserAddress,
1300        length: usize,
1301        prot_flags: ProtectionFlags,
1302        released_mappings: &mut ReleasedMappings,
1303    ) -> Result<(), Errno> {
1304        let vmar_flags = prot_flags.to_vmar_flags();
1305        let page_size = *PAGE_SIZE;
1306        let end = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?.round_up(page_size)?;
1307
1308        if self.check_has_unauthorized_splits(addr, length) {
1309            return error!(EINVAL);
1310        }
1311
1312        let prot_range = if prot_flags.contains(ProtectionFlags::GROWSDOWN) {
1313            let mut start = addr;
1314            let Some((range, mapping)) = self.mappings.get(start) else {
1315                return error!(EINVAL);
1316            };
1317            // Ensure that the mapping has GROWSDOWN if PROT_GROWSDOWN was specified.
1318            if !mapping.flags().contains(MappingFlags::GROWSDOWN) {
1319                return error!(EINVAL);
1320            }
1321            let access_flags = mapping.flags().access_flags();
1322            // From <https://man7.org/linux/man-pages/man2/mprotect.2.html>:
1323            //
1324            //   PROT_GROWSDOWN
1325            //     Apply the protection mode down to the beginning of a
1326            //     mapping that grows downward (which should be a stack
1327            //     segment or a segment mapped with the MAP_GROWSDOWN flag
1328            //     set).
1329            start = range.start;
1330            while let Some((range, mapping)) =
1331                self.mappings.get(start.saturating_sub(page_size as usize))
1332            {
1333                if !mapping.flags().contains(MappingFlags::GROWSDOWN)
1334                    || mapping.flags().access_flags() != access_flags
1335                {
1336                    break;
1337                }
1338                start = range.start;
1339            }
1340            start..end
1341        } else {
1342            addr..end
1343        };
1344
1345        let mut range_list = vec![];
1346        let mapping_context = &current_task.mm()?.mapping_context;
1347        let length = prot_range.end - prot_range.start;
1348        self.ensure_range_mapped_in_user_vmar(prot_range.start, Some(length), mapping_context)?;
1349
1350        for (range, mapping) in self.mappings.range(prot_range.clone()) {
1351            range_list.push((range.clone(), mapping.clone()));
1352        }
1353
1354        let mut start_cursor = prot_range.start;
1355        let mut updates = vec![];
1356        let mut final_result = Ok(());
1357
1358        for (range, mapping) in range_list {
1359            if range.start > start_cursor {
1360                final_result = error!(ENOMEM);
1361                break;
1362            }
1363
1364            let intersection = range.intersect(&prot_range);
1365            if let Err(e) =
1366                security::file_mprotect(current_task, &intersection, &mapping, prot_flags)
1367            {
1368                final_result = Err(e);
1369                break;
1370            }
1371
1372            if mapping.flags().contains(MappingFlags::UFFD) {
1373                track_stub!(
1374                    TODO("https://fxbug.dev/297375964"),
1375                    "mprotect on uffd-registered range should not alter protections"
1376                );
1377                final_result = error!(EINVAL);
1378                break;
1379            }
1380
1381            let mapped_len = intersection.end - intersection.start;
1382
1383            // SAFETY: This is safe because the vmar belongs to a different process.
1384            let protect_result = unsafe {
1385                mapping_context.user_vmar.protect(intersection.start.ptr(), mapped_len, vmar_flags)
1386            }
1387            .map_err(|s| match s {
1388                zx::Status::INVALID_ARGS => errno!(EINVAL),
1389                zx::Status::NOT_FOUND => errno!(ENOMEM),
1390                zx::Status::ACCESS_DENIED => errno!(EACCES),
1391                _ => impossible_error(s),
1392            });
1393
1394            if let Err(e) = protect_result {
1395                final_result = Err(e);
1396                break;
1397            }
1398
1399            let mut new_mapping = mapping.clone();
1400            new_mapping.set_flags(new_mapping.flags().with_access_flags(prot_flags));
1401            let push_range = intersection.clone();
1402            start_cursor = intersection.end;
1403            updates.push((push_range, new_mapping));
1404        }
1405
1406        if final_result.is_ok() && start_cursor < prot_range.end {
1407            final_result = error!(ENOMEM);
1408        }
1409
1410        for (r, m) in updates {
1411            released_mappings.extend(self.mappings.insert(r, m));
1412        }
1413
1414        final_result
1415    }
1416
1417    fn madvise(
1418        &mut self,
1419        context: &MappingContext,
1420        addr: UserAddress,
1421        length: usize,
1422        advice: u32,
1423        released_mappings: &mut ReleasedMappings,
1424    ) -> Result<(), Errno> {
1425        if !addr.is_aligned(*PAGE_SIZE) {
1426            return error!(EINVAL);
1427        }
1428
1429        let end_addr =
1430            addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?.round_up(*PAGE_SIZE)?;
1431        if end_addr > context.max_address() {
1432            return error!(EFAULT);
1433        }
1434
1435        if advice == MADV_NORMAL {
1436            track_stub!(TODO("https://fxbug.dev/322874202"), "madvise undo hints for MADV_NORMAL");
1437            return Ok(());
1438        }
1439
1440        let mut updates = vec![];
1441        let range_for_op = addr..end_addr;
1442        for (range, mapping) in self.mappings.range(range_for_op.clone()) {
1443            let range_to_zero = range.intersect(&range_for_op);
1444            if range_to_zero.is_empty() {
1445                continue;
1446            }
1447            let start_offset = mapping.address_to_offset(range_to_zero.start);
1448            let end_offset = mapping.address_to_offset(range_to_zero.end);
1449            if advice == MADV_DONTFORK
1450                || advice == MADV_DOFORK
1451                || advice == MADV_WIPEONFORK
1452                || advice == MADV_KEEPONFORK
1453                || advice == MADV_DONTDUMP
1454                || advice == MADV_DODUMP
1455                || advice == MADV_MERGEABLE
1456                || advice == MADV_UNMERGEABLE
1457            {
1458                // WIPEONFORK is only supported on private anonymous mappings per madvise(2).
1459                // KEEPONFORK can be specified on ranges that cover other sorts of mappings. It should
1460                // have no effect on mappings that are not private and anonymous as such mappings cannot
1461                // have the WIPEONFORK option set.
1462                if advice == MADV_WIPEONFORK && !mapping.private_anonymous() {
1463                    return error!(EINVAL);
1464                }
1465                let new_flags = match advice {
1466                    MADV_DONTFORK => mapping.flags() | MappingFlags::DONTFORK,
1467                    MADV_DOFORK => mapping.flags() & MappingFlags::DONTFORK.complement(),
1468                    MADV_WIPEONFORK => mapping.flags() | MappingFlags::WIPEONFORK,
1469                    MADV_KEEPONFORK => mapping.flags() & MappingFlags::WIPEONFORK.complement(),
1470                    MADV_DONTDUMP => {
1471                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_DONTDUMP");
1472                        mapping.flags()
1473                    }
1474                    MADV_DODUMP => {
1475                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_DODUMP");
1476                        mapping.flags()
1477                    }
1478                    MADV_MERGEABLE => {
1479                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_MERGEABLE");
1480                        mapping.flags()
1481                    }
1482                    MADV_UNMERGEABLE => {
1483                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_UNMERGEABLE");
1484                        mapping.flags()
1485                    }
1486                    // Only the variants in this match should be reachable given the condition for
1487                    // the containing branch.
1488                    unknown_advice => unreachable!("unknown advice {unknown_advice}"),
1489                };
1490                let mut new_mapping = mapping.clone();
1491                new_mapping.set_flags(new_flags);
1492                updates.push((range_to_zero, new_mapping));
1493            } else {
1494                if mapping.flags().contains(MappingFlags::SHARED) {
1495                    continue;
1496                }
1497                let op = match advice {
1498                    MADV_DONTNEED if !mapping.flags().contains(MappingFlags::ANONYMOUS) => {
1499                        // Note, we cannot simply implemented MADV_DONTNEED with
1500                        // zx::VmoOp::DONT_NEED because they have different
1501                        // semantics.
1502                        track_stub!(
1503                            TODO("https://fxbug.dev/322874496"),
1504                            "MADV_DONTNEED with file-backed mapping"
1505                        );
1506                        return error!(EINVAL);
1507                    }
1508                    MADV_DONTNEED if mapping.flags().contains(MappingFlags::LOCKED) => {
1509                        return error!(EINVAL);
1510                    }
1511                    MADV_DONTNEED => zx::VmoOp::ZERO,
1512                    MADV_DONTNEED_LOCKED => {
1513                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_DONTNEED_LOCKED");
1514                        return error!(EINVAL);
1515                    }
1516                    MADV_WILLNEED => {
1517                        if mapping.flags().contains(MappingFlags::WRITE) {
1518                            zx::VmoOp::COMMIT
1519                        } else {
1520                            zx::VmoOp::PREFETCH
1521                        }
1522                    }
1523                    MADV_COLD => {
1524                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_COLD");
1525                        return error!(EINVAL);
1526                    }
1527                    MADV_PAGEOUT => {
1528                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_PAGEOUT");
1529                        return error!(EINVAL);
1530                    }
1531                    MADV_POPULATE_READ => {
1532                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_POPULATE_READ");
1533                        return error!(EINVAL);
1534                    }
1535                    MADV_RANDOM => {
1536                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_RANDOM");
1537                        return error!(EINVAL);
1538                    }
1539                    MADV_SEQUENTIAL => {
1540                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_SEQUENTIAL");
1541                        return error!(EINVAL);
1542                    }
1543                    MADV_FREE if !mapping.flags().contains(MappingFlags::ANONYMOUS) => {
1544                        track_stub!(
1545                            TODO("https://fxbug.dev/411748419"),
1546                            "MADV_FREE with file-backed mapping"
1547                        );
1548                        return error!(EINVAL);
1549                    }
1550                    MADV_FREE if mapping.flags().contains(MappingFlags::LOCKED) => {
1551                        return error!(EINVAL);
1552                    }
1553                    MADV_FREE => {
1554                        track_stub!(TODO("https://fxbug.dev/411748419"), "MADV_FREE");
1555                        // TODO(https://fxbug.dev/411748419) For now, treat MADV_FREE like
1556                        // MADV_DONTNEED as a stopgap until we have proper support.
1557                        zx::VmoOp::ZERO
1558                    }
1559                    MADV_REMOVE => {
1560                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_REMOVE");
1561                        return error!(EINVAL);
1562                    }
1563                    MADV_HWPOISON => {
1564                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_HWPOISON");
1565                        return error!(EINVAL);
1566                    }
1567                    MADV_SOFT_OFFLINE => {
1568                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_SOFT_OFFLINE");
1569                        return error!(EINVAL);
1570                    }
1571                    MADV_HUGEPAGE => {
1572                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_HUGEPAGE");
1573                        return error!(EINVAL);
1574                    }
1575                    MADV_COLLAPSE => {
1576                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_COLLAPSE");
1577                        return error!(EINVAL);
1578                    }
1579                    MADV_NOHUGEPAGE => return Ok(()),
1580                    advice => {
1581                        track_stub!(TODO("https://fxbug.dev/322874202"), "madvise", advice);
1582                        return error!(EINVAL);
1583                    }
1584                };
1585
1586                let memory = match self.get_mapping_backing(mapping) {
1587                    MappingBacking::Memory(backing) => backing.memory(),
1588                    MappingBacking::PrivateAnonymous => &context.private_anonymous.backing,
1589                };
1590                memory.op_range(op, start_offset, end_offset - start_offset).map_err(
1591                    |s| match s {
1592                        zx::Status::OUT_OF_RANGE => errno!(EINVAL),
1593                        zx::Status::NO_MEMORY => errno!(ENOMEM),
1594                        zx::Status::INVALID_ARGS => errno!(EINVAL),
1595                        zx::Status::ACCESS_DENIED => errno!(EACCES),
1596                        _ => impossible_error(s),
1597                    },
1598                )?;
1599            }
1600        }
1601        // Use a separate loop to avoid mutating the mappings structure while iterating over it.
1602        for (range, mapping) in updates {
1603            released_mappings.extend(self.mappings.insert(range, mapping));
1604        }
1605        Ok(())
1606    }
1607
1608    fn mlock<L>(
1609        &mut self,
1610        context: &MappingContext,
1611        current_task: &CurrentTask,
1612        locked: &mut Locked<L>,
1613        desired_addr: UserAddress,
1614        desired_length: usize,
1615        on_fault: bool,
1616        released_mappings: &mut ReleasedMappings,
1617    ) -> Result<(), Errno>
1618    where
1619        L: LockBefore<ThreadGroupLimits>,
1620    {
1621        let desired_end_addr =
1622            desired_addr.checked_add(desired_length).ok_or_else(|| errno!(EINVAL))?;
1623        let start_addr = round_down_to_system_page_size(desired_addr)?;
1624        let end_addr = round_up_to_system_page_size(desired_end_addr)?;
1625
1626        let mut updates = vec![];
1627        let mut bytes_mapped_in_range = 0;
1628        let mut num_new_locked_bytes = 0;
1629        let mut failed_to_lock = false;
1630        for (range, mapping) in self.mappings.range(start_addr..end_addr) {
1631            let mut range = range.clone();
1632            let mut mapping = mapping.clone();
1633
1634            // Handle mappings that start before the region to be locked.
1635            range.start = std::cmp::max(range.start, start_addr);
1636            // Handle mappings that extend past the region to be locked.
1637            range.end = std::cmp::min(range.end, end_addr);
1638
1639            bytes_mapped_in_range += (range.end - range.start) as u64;
1640
1641            // PROT_NONE mappings generate ENOMEM but are left locked.
1642            if !mapping
1643                .flags()
1644                .intersects(MappingFlags::READ | MappingFlags::WRITE | MappingFlags::EXEC)
1645            {
1646                failed_to_lock = true;
1647            }
1648
1649            if !mapping.flags().contains(MappingFlags::LOCKED) {
1650                num_new_locked_bytes += (range.end - range.start) as u64;
1651                let shadow_mapping = match current_task.kernel().features.mlock_pin_flavor {
1652                    // Pin the memory by mapping the backing memory into the high priority vmar.
1653                    MlockPinFlavor::ShadowProcess => {
1654                        let shadow_process =
1655                            current_task.kernel().expando.get_or_try_init(|| {
1656                                memory_pinning::ShadowProcess::new(zx::Name::new_lossy(
1657                                    "starnix_mlock_pins",
1658                                ))
1659                                .map(MlockShadowProcess)
1660                                .map_err(|_| errno!(EPERM))
1661                            })?;
1662
1663                        let (vmo, offset) = match self.get_mapping_backing(&mapping) {
1664                            MappingBacking::Memory(m) => (
1665                                m.memory().as_vmo().ok_or_else(|| errno!(ENOMEM))?,
1666                                m.address_to_offset(range.start),
1667                            ),
1668                            MappingBacking::PrivateAnonymous => (
1669                                context
1670                                    .private_anonymous
1671                                    .backing
1672                                    .as_vmo()
1673                                    .ok_or_else(|| errno!(ENOMEM))?,
1674                                range.start.ptr() as u64,
1675                            ),
1676                        };
1677                        Some(shadow_process.0.pin_pages(vmo, offset, range.end - range.start)?)
1678                    }
1679
1680                    // Relying on VMAR-level operations means just flags are set per-mapping.
1681                    MlockPinFlavor::Noop | MlockPinFlavor::VmarAlwaysNeed => None,
1682                };
1683                mapping.set_mlock();
1684                updates.push((range, mapping, shadow_mapping));
1685            }
1686        }
1687
1688        if bytes_mapped_in_range as usize != end_addr - start_addr {
1689            return error!(ENOMEM);
1690        }
1691
1692        let memlock_rlimit = current_task.thread_group().get_rlimit(locked, Resource::MEMLOCK);
1693        let total_locked = self.num_locked_bytes(
1694            UserAddress::from(context.user_vmar_info.base as u64)
1695                ..UserAddress::from(
1696                    (context.user_vmar_info.base + context.user_vmar_info.len) as u64,
1697                ),
1698        );
1699        if total_locked + num_new_locked_bytes > memlock_rlimit {
1700            if crate::security::check_task_capable(current_task, CAP_IPC_LOCK).is_err() {
1701                let code = if memlock_rlimit > 0 { errno!(ENOMEM) } else { errno!(EPERM) };
1702                return Err(code);
1703            }
1704        }
1705
1706        let op_range_status_to_errno = |e| match e {
1707            zx::Status::BAD_STATE | zx::Status::NOT_SUPPORTED => errno!(ENOMEM),
1708            zx::Status::INVALID_ARGS | zx::Status::OUT_OF_RANGE => errno!(EINVAL),
1709            zx::Status::ACCESS_DENIED => {
1710                unreachable!("user vmar should always have needed rights")
1711            }
1712            zx::Status::BAD_HANDLE => {
1713                unreachable!("user vmar should always be a valid handle")
1714            }
1715            zx::Status::WRONG_TYPE => unreachable!("user vmar handle should be a vmar"),
1716            _ => unreachable!("unknown error from op_range on user vmar for mlock: {e}"),
1717        };
1718
1719        self.ensure_range_mapped_in_user_vmar(start_addr, Some(end_addr - start_addr), context)?;
1720
1721        if !on_fault && !current_task.kernel().features.mlock_always_onfault {
1722            context
1723                .user_vmar
1724                .op_range(zx::VmarOp::PREFETCH, start_addr.ptr(), end_addr - start_addr)
1725                .map_err(op_range_status_to_errno)?;
1726        }
1727
1728        match current_task.kernel().features.mlock_pin_flavor {
1729            MlockPinFlavor::VmarAlwaysNeed => {
1730                context
1731                    .user_vmar
1732                    .op_range(zx::VmarOp::ALWAYS_NEED, start_addr.ptr(), end_addr - start_addr)
1733                    .map_err(op_range_status_to_errno)?;
1734            }
1735            // The shadow process doesn't use any vmar-level operations to pin memory.
1736            MlockPinFlavor::Noop | MlockPinFlavor::ShadowProcess => (),
1737        }
1738
1739        for (range, mapping, shadow_mapping) in updates {
1740            if let Some(shadow_mapping) = shadow_mapping {
1741                released_mappings.extend_pins(
1742                    self.shadow_mappings_for_mlock.insert(range.clone(), shadow_mapping),
1743                );
1744            }
1745            released_mappings.extend(self.mappings.insert(range, mapping));
1746        }
1747
1748        if failed_to_lock { error!(ENOMEM) } else { Ok(()) }
1749    }
1750
1751    fn munlock(
1752        &mut self,
1753        _current_task: &CurrentTask,
1754        desired_addr: UserAddress,
1755        desired_length: usize,
1756        released_mappings: &mut ReleasedMappings,
1757    ) -> Result<(), Errno> {
1758        let desired_end_addr =
1759            desired_addr.checked_add(desired_length).ok_or_else(|| errno!(EINVAL))?;
1760        let start_addr = round_down_to_system_page_size(desired_addr)?;
1761        let end_addr = round_up_to_system_page_size(desired_end_addr)?;
1762
1763        let mut updates = vec![];
1764        let mut bytes_mapped_in_range = 0;
1765        for (range, mapping) in self.mappings.range(start_addr..end_addr) {
1766            let mut range = range.clone();
1767            let mut mapping = mapping.clone();
1768
1769            // Handle mappings that start before the region to be locked.
1770            range.start = std::cmp::max(range.start, start_addr);
1771            // Handle mappings that extend past the region to be locked.
1772            range.end = std::cmp::min(range.end, end_addr);
1773
1774            bytes_mapped_in_range += (range.end - range.start) as u64;
1775
1776            if mapping.flags().contains(MappingFlags::LOCKED) {
1777                // This clears the locking for the shadow process pin flavor. It's not currently
1778                // possible to actually unlock pages that were locked with the
1779                // ZX_VMAR_OP_ALWAYS_NEED pin flavor.
1780                mapping.clear_mlock();
1781                updates.push((range, mapping));
1782            }
1783        }
1784
1785        if bytes_mapped_in_range as usize != end_addr - start_addr {
1786            return error!(ENOMEM);
1787        }
1788
1789        for (range, mapping) in updates {
1790            released_mappings.extend(self.mappings.insert(range.clone(), mapping));
1791            released_mappings.extend_pins(self.shadow_mappings_for_mlock.remove(range));
1792        }
1793
1794        Ok(())
1795    }
1796
1797    pub fn num_locked_bytes(&self, range: impl RangeBounds<UserAddress>) -> u64 {
1798        self.mappings
1799            .map
1800            .range(range)
1801            .filter(|(_, mapping)| mapping.flags().contains(MappingFlags::LOCKED))
1802            .map(|(range, _)| (range.end - range.start) as u64)
1803            .sum()
1804    }
1805
1806    fn get_mappings_for_vmsplice(
1807        &self,
1808        mm: &Arc<MemoryManager>,
1809        buffers: &UserBuffers,
1810    ) -> Result<Vec<Arc<VmsplicePayload>>, Errno> {
1811        let mut vmsplice_mappings = Vec::new();
1812
1813        for UserBuffer { mut address, length } in buffers.iter().copied() {
1814            let mappings = self.get_contiguous_mappings_at(address, length, &mm.mapping_context)?;
1815            for (mapping, length) in mappings {
1816                let vmsplice_payload = match self.get_mapping_backing(mapping) {
1817                    MappingBacking::Memory(m) => VmsplicePayloadSegment {
1818                        addr_offset: address,
1819                        length,
1820                        memory: m.memory().clone(),
1821                        memory_offset: m.address_to_offset(address),
1822                    },
1823                    MappingBacking::PrivateAnonymous => VmsplicePayloadSegment {
1824                        addr_offset: address,
1825                        length,
1826                        memory: mm.mapping_context.private_anonymous.backing.clone(),
1827                        memory_offset: address.ptr() as u64,
1828                    },
1829                };
1830                vmsplice_mappings.push(VmsplicePayload::new(Arc::downgrade(mm), vmsplice_payload));
1831
1832                address = (address + length)?;
1833            }
1834        }
1835
1836        Ok(vmsplice_mappings)
1837    }
1838
1839    /// Returns all the mappings starting at `addr`, and continuing until either `length` bytes have
1840    /// been covered or an unmapped page is reached.
1841    ///
1842    /// Mappings are returned in ascending order along with the number of bytes that intersect the
1843    /// requested range. The returned mappings are guaranteed to be contiguous and the total length
1844    /// corresponds to the number of contiguous mapped bytes starting from `addr`, i.e.:
1845    /// - 0 (empty iterator) if `addr` is not mapped.
1846    /// - exactly `length` if the requested range is fully mapped.
1847    /// - the offset of the first unmapped page (between 0 and `length`) if the requested range is
1848    ///   only partially mapped.
1849    ///
1850    /// Returns EFAULT if the requested range overflows or extends past the end of the vmar.
1851    fn get_contiguous_mappings_at(
1852        &self,
1853        addr: UserAddress,
1854        length: usize,
1855        context: &MappingContext,
1856    ) -> Result<impl Iterator<Item = (&Mapping, usize)>, Errno> {
1857        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EFAULT))?;
1858        if end_addr > context.max_address() {
1859            return error!(EFAULT);
1860        }
1861
1862        // Iterate over all contiguous mappings intersecting the requested range.
1863        let mut mappings = self.mappings.range(addr..end_addr);
1864        let mut prev_range_end = None;
1865        let mut offset = 0;
1866        let result = std::iter::from_fn(move || {
1867            if offset != length {
1868                if let Some((range, mapping)) = mappings.next() {
1869                    return match prev_range_end {
1870                        // If this is the first mapping that we are considering, it may not actually
1871                        // contain `addr` at all.
1872                        None if range.start > addr => None,
1873
1874                        // Subsequent mappings may not be contiguous.
1875                        Some(prev_range_end) if range.start != prev_range_end => None,
1876
1877                        // This mapping can be returned.
1878                        _ => {
1879                            let mapping_length = std::cmp::min(length, range.end - addr) - offset;
1880                            offset += mapping_length;
1881                            prev_range_end = Some(range.end);
1882                            Some((mapping, mapping_length))
1883                        }
1884                    };
1885                }
1886            }
1887
1888            None
1889        });
1890
1891        Ok(result)
1892    }
1893
1894    /// Determines whether a fault at the given address could be covered by extending a growsdown
1895    /// mapping.
1896    ///
1897    /// If the address already belongs to a mapping, this function returns `None`. If the next
1898    /// mapping above the given address has the `MappingFlags::GROWSDOWN` flag, this function
1899    /// returns the address at which that mapping starts and the mapping itself. Otherwise, this
1900    /// function returns `None`.
1901    fn find_growsdown_mapping(&self, addr: UserAddress) -> Option<(UserAddress, &Mapping)> {
1902        match self.mappings.range(addr..).next() {
1903            Some((range, mapping)) => {
1904                if range.contains(&addr) {
1905                    // |addr| is already contained within a mapping, nothing to grow.
1906                    return None;
1907                } else if !mapping.flags().contains(MappingFlags::GROWSDOWN) {
1908                    // The next mapping above the given address does not have the
1909                    // `MappingFlags::GROWSDOWN` flag.
1910                    None
1911                } else {
1912                    Some((range.start, mapping))
1913                }
1914            }
1915            None => None,
1916        }
1917    }
1918
1919    /// Determines if an access at a given address could be covered by extending a growsdown mapping
1920    /// and extends it if possible. Returns true if the given address is covered by a mapping.
1921    fn extend_growsdown_mapping_to_address(
1922        &mut self,
1923        mm: &Arc<MemoryManager>,
1924        addr: UserAddress,
1925        is_write: bool,
1926    ) -> Result<bool, Error> {
1927        let Some((mapping_low_addr, mapping_to_grow)) = self.find_growsdown_mapping(addr) else {
1928            return Ok(false);
1929        };
1930        if is_write && !mapping_to_grow.can_write() {
1931            // Don't grow a read-only GROWSDOWN mapping for a write fault, it won't work.
1932            return Ok(false);
1933        }
1934        if !mapping_to_grow.flags().contains(MappingFlags::ANONYMOUS) {
1935            // Currently, we only grow anonymous mappings.
1936            return Ok(false);
1937        }
1938        let low_addr = (addr - (addr.ptr() as u64 % *PAGE_SIZE))?;
1939        let high_addr = mapping_low_addr;
1940
1941        let length = high_addr
1942            .ptr()
1943            .checked_sub(low_addr.ptr())
1944            .ok_or_else(|| anyhow!("Invalid growth range"))?;
1945
1946        let mut released_mappings = ReleasedMappings::default();
1947        self.map_anonymous(
1948            mm,
1949            DesiredAddress::FixedOverwrite(low_addr),
1950            length,
1951            mapping_to_grow.flags().access_flags(),
1952            mapping_to_grow.flags().options(),
1953            mapping_to_grow.name().to_owned(),
1954            &mut released_mappings,
1955        )?;
1956        // We can't have any released mappings because `find_growsdown_mapping` will return None if
1957        // the mapping already exists in this range.
1958        assert!(
1959            released_mappings.is_empty(),
1960            "expected to not remove mappings by inserting, got {released_mappings:#?}"
1961        );
1962        Ok(true)
1963    }
1964
1965    /// Reads exactly `bytes.len()` bytes of memory.
1966    ///
1967    /// # Parameters
1968    /// - `addr`: The address to read data from.
1969    /// - `bytes`: The byte array to read into.
1970    fn read_memory<'a>(
1971        &self,
1972        addr: UserAddress,
1973        bytes: &'a mut [MaybeUninit<u8>],
1974        context: &MappingContext,
1975    ) -> Result<&'a mut [u8], Errno> {
1976        let mut bytes_read = 0;
1977        for (mapping, len) in self.get_contiguous_mappings_at(addr, bytes.len(), context)? {
1978            let next_offset = bytes_read + len;
1979            self.read_mapping_memory(
1980                (addr + bytes_read)?,
1981                mapping,
1982                &mut bytes[bytes_read..next_offset],
1983                context,
1984            )?;
1985            bytes_read = next_offset;
1986        }
1987
1988        if bytes_read != bytes.len() {
1989            error!(EFAULT)
1990        } else {
1991            // SAFETY: The created slice is properly aligned/sized since it
1992            // is a subset of the `bytes` slice. Note that `MaybeUninit<T>` has
1993            // the same layout as `T`. Also note that `bytes_read` bytes have
1994            // been properly initialized.
1995            let bytes = unsafe {
1996                std::slice::from_raw_parts_mut(bytes.as_mut_ptr() as *mut u8, bytes_read)
1997            };
1998            Ok(bytes)
1999        }
2000    }
2001
2002    /// Reads exactly `bytes.len()` bytes of memory from `addr`.
2003    ///
2004    /// # Parameters
2005    /// - `addr`: The address to read data from.
2006    /// - `bytes`: The byte array to read into.
2007    fn read_mapping_memory<'a>(
2008        &self,
2009        addr: UserAddress,
2010        mapping: &Mapping,
2011        bytes: &'a mut [MaybeUninit<u8>],
2012        context: &MappingContext,
2013    ) -> Result<&'a mut [u8], Errno> {
2014        if !mapping.can_read() {
2015            return error!(EFAULT, "read_mapping_memory called on unreadable mapping");
2016        }
2017        match self.get_mapping_backing(mapping) {
2018            MappingBacking::Memory(backing) => backing.read_memory(addr, bytes),
2019            MappingBacking::PrivateAnonymous => context.private_anonymous.read_memory(addr, bytes),
2020        }
2021    }
2022
2023    /// Reads bytes starting at `addr`, continuing until either `bytes.len()` bytes have been read
2024    /// or no more bytes can be read.
2025    ///
2026    /// This is used, for example, to read null-terminated strings where the exact length is not
2027    /// known, only the maximum length is.
2028    ///
2029    /// # Parameters
2030    /// - `addr`: The address to read data from.
2031    /// - `bytes`: The byte array to read into.
2032    fn read_memory_partial<'a>(
2033        &self,
2034        addr: UserAddress,
2035        bytes: &'a mut [MaybeUninit<u8>],
2036        context: &MappingContext,
2037    ) -> Result<&'a mut [u8], Errno> {
2038        let mut bytes_read = 0;
2039        for (mapping, len) in self.get_contiguous_mappings_at(addr, bytes.len(), context)? {
2040            let next_offset = bytes_read + len;
2041            if self
2042                .read_mapping_memory(
2043                    (addr + bytes_read)?,
2044                    mapping,
2045                    &mut bytes[bytes_read..next_offset],
2046                    context,
2047                )
2048                .is_err()
2049            {
2050                break;
2051            }
2052            bytes_read = next_offset;
2053        }
2054
2055        // If at least one byte was requested but we got none, it means that `addr` was invalid.
2056        if !bytes.is_empty() && bytes_read == 0 {
2057            error!(EFAULT)
2058        } else {
2059            // SAFETY: The created slice is properly aligned/sized since it
2060            // is a subset of the `bytes` slice. Note that `MaybeUninit<T>` has
2061            // the same layout as `T`. Also note that `bytes_read` bytes have
2062            // been properly initialized.
2063            let bytes = unsafe {
2064                std::slice::from_raw_parts_mut(bytes.as_mut_ptr() as *mut u8, bytes_read)
2065            };
2066            Ok(bytes)
2067        }
2068    }
2069
2070    /// Like `read_memory_partial` but only returns the bytes up to and including
2071    /// a null (zero) byte.
2072    fn read_memory_partial_until_null_byte<'a>(
2073        &self,
2074        addr: UserAddress,
2075        bytes: &'a mut [MaybeUninit<u8>],
2076        context: &MappingContext,
2077    ) -> Result<&'a mut [u8], Errno> {
2078        let read_bytes = self.read_memory_partial(addr, bytes, context)?;
2079        let max_len = memchr::memchr(b'\0', read_bytes)
2080            .map_or_else(|| read_bytes.len(), |null_index| null_index + 1);
2081        Ok(&mut read_bytes[..max_len])
2082    }
2083
2084    /// Writes the provided bytes.
2085    ///
2086    /// In case of success, the number of bytes written will always be `bytes.len()`.
2087    ///
2088    /// # Parameters
2089    /// - `addr`: The address to write to.
2090    /// - `bytes`: The bytes to write.
2091    fn write_memory(
2092        &self,
2093        addr: UserAddress,
2094        bytes: &[u8],
2095        context: &MappingContext,
2096    ) -> Result<usize, Errno> {
2097        let mut bytes_written = 0;
2098        for (mapping, len) in self.get_contiguous_mappings_at(addr, bytes.len(), context)? {
2099            let next_offset = bytes_written + len;
2100            self.write_mapping_memory(
2101                (addr + bytes_written)?,
2102                mapping,
2103                &bytes[bytes_written..next_offset],
2104                context,
2105            )?;
2106            bytes_written = next_offset;
2107        }
2108
2109        if bytes_written != bytes.len() { error!(EFAULT) } else { Ok(bytes.len()) }
2110    }
2111
2112    /// Writes the provided bytes to `addr`.
2113    ///
2114    /// # Parameters
2115    /// - `addr`: The address to write to.
2116    /// - `bytes`: The bytes to write to the memory object.
2117    fn write_mapping_memory(
2118        &self,
2119        addr: UserAddress,
2120        mapping: &Mapping,
2121        bytes: &[u8],
2122        context: &MappingContext,
2123    ) -> Result<(), Errno> {
2124        if !mapping.can_write() {
2125            return error!(EFAULT, "write_mapping_memory called on unwritable memory");
2126        }
2127        match self.get_mapping_backing(mapping) {
2128            MappingBacking::Memory(backing) => backing.write_memory(addr, bytes),
2129            MappingBacking::PrivateAnonymous => context.private_anonymous.write_memory(addr, bytes),
2130        }
2131    }
2132
2133    /// Writes bytes starting at `addr`, continuing until either `bytes.len()` bytes have been
2134    /// written or no more bytes can be written.
2135    ///
2136    /// # Parameters
2137    /// - `addr`: The address to read data from.
2138    /// - `bytes`: The byte array to write from.
2139    fn write_memory_partial(
2140        &self,
2141        addr: UserAddress,
2142        bytes: &[u8],
2143        context: &MappingContext,
2144    ) -> Result<usize, Errno> {
2145        let mut bytes_written = 0;
2146        for (mapping, len) in self.get_contiguous_mappings_at(addr, bytes.len(), context)? {
2147            let next_offset = bytes_written + len;
2148            if self
2149                .write_mapping_memory(
2150                    (addr + bytes_written)?,
2151                    mapping,
2152                    &bytes[bytes_written..next_offset],
2153                    context,
2154                )
2155                .is_err()
2156            {
2157                break;
2158            }
2159            bytes_written = next_offset;
2160        }
2161
2162        if !bytes.is_empty() && bytes_written == 0 { error!(EFAULT) } else { Ok(bytes.len()) }
2163    }
2164
2165    fn zero(
2166        &self,
2167        addr: UserAddress,
2168        length: usize,
2169        context: &MappingContext,
2170    ) -> Result<usize, Errno> {
2171        let mut bytes_written = 0;
2172        for (mapping, len) in self.get_contiguous_mappings_at(addr, length, context)? {
2173            let next_offset = bytes_written + len;
2174            if self.zero_mapping((addr + bytes_written)?, mapping, len, context).is_err() {
2175                break;
2176            }
2177            bytes_written = next_offset;
2178        }
2179
2180        if length != bytes_written { error!(EFAULT) } else { Ok(length) }
2181    }
2182
2183    fn zero_mapping(
2184        &self,
2185        addr: UserAddress,
2186        mapping: &Mapping,
2187        length: usize,
2188        context: &MappingContext,
2189    ) -> Result<usize, Errno> {
2190        if !mapping.can_write() {
2191            return error!(EFAULT);
2192        }
2193
2194        match self.get_mapping_backing(mapping) {
2195            MappingBacking::Memory(backing) => backing.zero(addr, length),
2196            MappingBacking::PrivateAnonymous => context.private_anonymous.zero(addr, length),
2197        }
2198    }
2199
2200    pub fn create_memory_backing(
2201        &self,
2202        base: UserAddress,
2203        memory: Arc<MemoryObject>,
2204        memory_offset: u64,
2205    ) -> MappingBacking {
2206        MappingBacking::Memory(Box::new(MappingBackingMemory::new(base, memory, memory_offset)))
2207    }
2208
2209    pub fn get_mapping_backing<'a>(&self, mapping: &'a Mapping) -> &'a MappingBacking {
2210        mapping.get_backing_internal()
2211    }
2212
2213    fn get_aio_context(&self, addr: UserAddress) -> Option<(Range<UserAddress>, Arc<AioContext>)> {
2214        let Some((range, mapping)) = self.mappings.get(addr) else {
2215            return None;
2216        };
2217        let MappingNameRef::AioContext(ref aio_context) = mapping.name() else {
2218            return None;
2219        };
2220        if !mapping.can_read() {
2221            return None;
2222        }
2223        Some((range.clone(), Arc::clone(aio_context)))
2224    }
2225
2226    fn find_uffd<L>(&self, locked: &mut Locked<L>, addr: UserAddress) -> Option<Arc<UserFault>>
2227    where
2228        L: LockBefore<UserFaultInner>,
2229    {
2230        for userfault in self.userfaultfds.iter() {
2231            if let Some(userfault) = userfault.upgrade() {
2232                if userfault.contains_addr(locked, addr) {
2233                    return Some(userfault);
2234                }
2235            }
2236        }
2237        None
2238    }
2239
2240    fn cache_flush(
2241        &self,
2242        range: Range<UserAddress>,
2243        context: &MappingContext,
2244    ) -> Result<(), Errno> {
2245        let mut addr = range.start;
2246        let size = range.end - range.start;
2247        for (mapping, len) in self.get_contiguous_mappings_at(addr, size, context)? {
2248            if !mapping.can_read() {
2249                return error!(EFAULT);
2250            }
2251            if mapping.mapping_mode() == MappingMode::Lazy {
2252                addr = (addr + len)?;
2253                continue;
2254            }
2255            // SAFETY: This is operating on a readable restricted mode mapping and will not fault.
2256            zx::Status::ok(unsafe {
2257                zx::sys::zx_cache_flush(
2258                    addr.ptr() as *const u8,
2259                    len,
2260                    zx::sys::ZX_CACHE_FLUSH_DATA | zx::sys::ZX_CACHE_FLUSH_INSN,
2261                )
2262            })
2263            .map_err(impossible_error)?;
2264
2265            addr = (addr + len).unwrap(); // unwrap since we're iterating within the address space.
2266        }
2267        // Did we flush the entire range?
2268        if addr != range.end { error!(EFAULT) } else { Ok(()) }
2269    }
2270
2271    /// Register the address space managed by this memory manager for interest in
2272    /// receiving private expedited memory barriers of the given kind.
2273    pub fn register_membarrier_private_expedited(
2274        &mut self,
2275        mtype: MembarrierType,
2276    ) -> Result<(), Errno> {
2277        let registrations = &mut self.forkable_state.membarrier_registrations;
2278        match mtype {
2279            MembarrierType::Memory => {
2280                registrations.memory = true;
2281            }
2282            MembarrierType::SyncCore => {
2283                registrations.sync_core = true;
2284            }
2285        }
2286        Ok(())
2287    }
2288
2289    /// Checks if the address space managed by this memory manager is registered
2290    /// for interest in private expedited barriers of the given kind.
2291    pub fn membarrier_private_expedited_registered(&self, mtype: MembarrierType) -> bool {
2292        let registrations = &self.forkable_state.membarrier_registrations;
2293        match mtype {
2294            MembarrierType::Memory => registrations.memory,
2295            MembarrierType::SyncCore => registrations.sync_core,
2296        }
2297    }
2298
2299    fn force_write_memory(
2300        &mut self,
2301        context: &MappingContext,
2302        addr: UserAddress,
2303        bytes: &[u8],
2304        released_mappings: &mut ReleasedMappings,
2305    ) -> Result<(), Errno> {
2306        let (range, mapping) = {
2307            let (r, m) = self.mappings.get(addr).ok_or_else(|| errno!(EFAULT))?;
2308            (r.clone(), m.clone())
2309        };
2310        if range.end < addr.saturating_add(bytes.len()) {
2311            track_stub!(
2312                TODO("https://fxbug.dev/445790710"),
2313                "ptrace poke across multiple mappings"
2314            );
2315            return error!(EFAULT);
2316        }
2317
2318        // Don't create CoW copy of shared memory, go through regular syscall writing.
2319        if mapping.flags().contains(MappingFlags::SHARED) {
2320            if !mapping.can_write() {
2321                // Linux returns EIO here instead of EFAULT.
2322                return error!(EIO);
2323            }
2324            return self.write_mapping_memory(addr, &mapping, &bytes, context);
2325        }
2326
2327        let backing = match self.get_mapping_backing(&mapping) {
2328            MappingBacking::PrivateAnonymous => {
2329                // Starnix has a writable handle to private anonymous memory.
2330                return context.private_anonymous.write_memory(addr, &bytes);
2331            }
2332            MappingBacking::Memory(backing) => backing,
2333        };
2334
2335        let vmo = backing.memory().as_vmo().ok_or_else(|| errno!(EFAULT))?;
2336        let addr_offset = backing.address_to_offset(addr);
2337        let can_exec =
2338            vmo.basic_info().expect("get VMO handle info").rights.contains(Rights::EXECUTE);
2339
2340        // Attempt to write to existing VMO
2341        match vmo.write(&bytes, addr_offset) {
2342            Ok(()) => {
2343                if can_exec {
2344                    // Issue a barrier to avoid executing stale instructions.
2345                    system_barrier(BarrierType::InstructionStream);
2346                }
2347                return Ok(());
2348            }
2349
2350            Err(zx::Status::ACCESS_DENIED) => { /* Fall through */ }
2351
2352            Err(status) => {
2353                return Err(MemoryManager::get_errno_for_vmo_err(status));
2354            }
2355        }
2356
2357        // Create a CoW child of the entire VMO and swap with the backing.
2358        let mapping_offset = backing.address_to_offset(range.start);
2359        let len = range.end - range.start;
2360
2361        // 1. Obtain a writable child of the VMO.
2362        let size = vmo.get_size().map_err(MemoryManager::get_errno_for_vmo_err)?;
2363        let child_vmo = vmo
2364            .create_child(VmoChildOptions::SNAPSHOT_AT_LEAST_ON_WRITE, 0, size)
2365            .map_err(MemoryManager::get_errno_for_vmo_err)?;
2366
2367        // 2. Modify the memory.
2368        child_vmo.write(&bytes, addr_offset).map_err(MemoryManager::get_errno_for_vmo_err)?;
2369
2370        // 3. If needed, remint the VMO as executable. Zircon flushes instruction caches when
2371        // mapping executable memory below, so a barrier isn't necessary here.
2372        let child_vmo = if can_exec {
2373            child_vmo
2374                .replace_as_executable(&VMEX_RESOURCE)
2375                .map_err(MemoryManager::get_errno_for_vmo_err)?
2376        } else {
2377            child_vmo
2378        };
2379
2380        // Ensure that the mapping that `addr` falls into is mapped in the user VMAR.
2381        // This ensures that the mapping's mode becomes `Eager` (if it was `Lazy`),
2382        // otherwise, we might clone a `Lazy` mapping but map it unconditionally below,
2383        // leading to state drift where a mapping is mapped in Zircon but marked as lazy in Starnix.
2384        self.ensure_range_mapped_in_user_vmar(addr, None, context)?;
2385
2386        // 4. Map the new VMO into user VMAR
2387        let memory = Arc::new(MemoryObject::from(child_vmo));
2388        context.map_in_user_vmar(
2389            SelectedAddress::FixedOverwrite(range.start),
2390            &memory,
2391            mapping_offset,
2392            len,
2393            mapping.flags(),
2394            false,
2395        )?;
2396
2397        // 5. Update mappings
2398        let new_backing = MappingBackingMemory::new(range.start, memory, mapping_offset);
2399
2400        let mut new_mapping = mapping.clone();
2401        new_mapping.set_backing_internal(MappingBacking::Memory(Box::new(new_backing)));
2402
2403        released_mappings.extend(self.mappings.insert(range, new_mapping));
2404
2405        Ok(())
2406    }
2407
2408    fn set_brk<L>(
2409        &mut self,
2410        locked: &mut Locked<L>,
2411        current_task: &CurrentTask,
2412        mm: &Arc<MemoryManager>,
2413        addr: UserAddress,
2414        released_mappings: &mut ReleasedMappings,
2415    ) -> Result<UserAddress, Errno>
2416    where
2417        L: LockBefore<ThreadGroupLimits>,
2418    {
2419        let rlimit_data = std::cmp::min(
2420            PROGRAM_BREAK_LIMIT,
2421            current_task.thread_group().get_rlimit(locked, Resource::DATA),
2422        );
2423
2424        let brk = match self.brk.clone() {
2425            None => {
2426                let brk = ProgramBreak { base: self.brk_origin, current: self.brk_origin };
2427                self.brk = Some(brk.clone());
2428                brk
2429            }
2430            Some(brk) => brk,
2431        };
2432
2433        let Ok(last_address) = brk.base + rlimit_data else {
2434            // The requested program break is out-of-range. We're supposed to simply
2435            // return the current program break.
2436            return Ok(brk.current);
2437        };
2438
2439        if addr < brk.base || addr > last_address {
2440            // The requested program break is out-of-range. We're supposed to simply
2441            // return the current program break.
2442            return Ok(brk.current);
2443        }
2444
2445        let old_end = brk.current.round_up(*PAGE_SIZE).unwrap();
2446        let new_end = addr.round_up(*PAGE_SIZE).unwrap();
2447
2448        match new_end.cmp(&old_end) {
2449            std::cmp::Ordering::Less => {
2450                // Shrinking the program break removes any mapped pages in the
2451                // affected range, regardless of whether they were actually program
2452                // break pages, or other mappings.
2453                let delta = old_end - new_end;
2454
2455                if self.unmap(mm, new_end, delta, released_mappings).is_err() {
2456                    return Ok(brk.current);
2457                }
2458            }
2459            std::cmp::Ordering::Greater => {
2460                let range = old_end..new_end;
2461                let delta = new_end - old_end;
2462
2463                // Check for mappings over the program break region.
2464                if self.mappings.range(range).next().is_some() {
2465                    return Ok(brk.current);
2466                }
2467
2468                if self
2469                    .map_anonymous(
2470                        mm,
2471                        DesiredAddress::FixedOverwrite(old_end),
2472                        delta,
2473                        ProtectionFlags::READ | ProtectionFlags::WRITE,
2474                        MappingOptions::ANONYMOUS,
2475                        MappingName::Heap,
2476                        released_mappings,
2477                    )
2478                    .is_err()
2479                {
2480                    return Ok(brk.current);
2481                }
2482            }
2483            _ => {}
2484        };
2485
2486        // Any required updates to the program break succeeded, so update internal state.
2487        let mut new_brk = brk;
2488        new_brk.current = addr;
2489        self.brk = Some(new_brk);
2490
2491        Ok(addr)
2492    }
2493
2494    fn register_with_uffd<L>(
2495        &mut self,
2496        mm: &MemoryManager,
2497        locked: &mut Locked<L>,
2498        addr: UserAddress,
2499        length: usize,
2500        userfault: &Arc<UserFault>,
2501        mode: FaultRegisterMode,
2502        released_mappings: &mut ReleasedMappings,
2503    ) -> Result<(), Errno>
2504    where
2505        L: LockBefore<UserFaultInner>,
2506    {
2507        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
2508        let range_for_op = addr..end_addr;
2509        let mut updates = vec![];
2510
2511        for (range, mapping) in self.mappings.range(range_for_op.clone()) {
2512            if !mapping.private_anonymous() {
2513                track_stub!(TODO("https://fxbug.dev/391599171"), "uffd for shmem and hugetlbfs");
2514                return error!(EINVAL);
2515            }
2516            if mapping.flags().contains(MappingFlags::UFFD) {
2517                return error!(EBUSY);
2518            }
2519            let range = range.intersect(&range_for_op);
2520            let mut mapping = mapping.clone();
2521            mapping.set_uffd(mode);
2522            updates.push((range, mapping));
2523        }
2524        if updates.is_empty() {
2525            return error!(EINVAL);
2526        }
2527
2528        mm.protect_vmar_range(addr, length, ProtectionFlags::empty())
2529            .expect("Failed to remove protections on uffd-registered range");
2530
2531        // Use a separate loop to avoid mutating the mappings structure while iterating over it.
2532        for (range, mapping) in updates {
2533            released_mappings.extend(self.mappings.insert(range, mapping));
2534        }
2535
2536        userfault.insert_pages(locked, range_for_op, false);
2537
2538        Ok(())
2539    }
2540
2541    fn unregister_range_from_uffd<L>(
2542        &mut self,
2543        mm: &MemoryManager,
2544        locked: &mut Locked<L>,
2545        userfault: &Arc<UserFault>,
2546        addr: UserAddress,
2547        length: usize,
2548        released_mappings: &mut ReleasedMappings,
2549    ) -> Result<(), Errno>
2550    where
2551        L: LockBefore<UserFaultInner>,
2552    {
2553        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
2554        let range_for_op = addr..end_addr;
2555        let mut updates = vec![];
2556
2557        for (range, mapping) in self.mappings.range(range_for_op.clone()) {
2558            if !mapping.private_anonymous() {
2559                track_stub!(TODO("https://fxbug.dev/391599171"), "uffd for shmem and hugetlbfs");
2560                return error!(EINVAL);
2561            }
2562            if mapping.flags().contains(MappingFlags::UFFD) {
2563                let range = range.intersect(&range_for_op);
2564                if userfault.remove_pages(locked, range.clone()) {
2565                    let mut mapping = mapping.clone();
2566                    mapping.clear_uffd();
2567                    updates.push((range, mapping));
2568                }
2569            }
2570        }
2571        for (range, mapping) in updates {
2572            let length = range.end - range.start;
2573            let restored_flags = mapping.flags().access_flags();
2574
2575            released_mappings.extend(self.mappings.insert(range.clone(), mapping));
2576
2577            mm.protect_vmar_range(range.start, length, restored_flags)
2578                .expect("Failed to restore original protection bits on uffd-registered range");
2579        }
2580        Ok(())
2581    }
2582
2583    fn unregister_uffd<L>(
2584        &mut self,
2585        mm: &MemoryManager,
2586        locked: &mut Locked<L>,
2587        userfault: &Arc<UserFault>,
2588        released_mappings: &mut ReleasedMappings,
2589    ) where
2590        L: LockBefore<UserFaultInner>,
2591    {
2592        let mut updates = vec![];
2593
2594        for (range, mapping) in self.mappings.iter() {
2595            if mapping.flags().contains(MappingFlags::UFFD) {
2596                for range in userfault.get_registered_pages_overlapping_range(locked, range.clone())
2597                {
2598                    let mut mapping = mapping.clone();
2599                    mapping.clear_uffd();
2600                    updates.push((range.clone(), mapping));
2601                }
2602            }
2603        }
2604        // Use a separate loop to avoid mutating the mappings structure while iterating over it.
2605        for (range, mapping) in updates {
2606            let length = range.end - range.start;
2607            let restored_flags = mapping.flags().access_flags();
2608            released_mappings.extend(self.mappings.insert(range.clone(), mapping));
2609            // We can't recover from an error here as this is run during the cleanup.
2610            mm.protect_vmar_range(range.start, length, restored_flags)
2611                .expect("Failed to restore original protection bits on uffd-registered range");
2612        }
2613
2614        userfault.remove_pages(
2615            locked,
2616            UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
2617                ..UserAddress::from_ptr(RESTRICTED_ASPACE_HIGHEST_ADDRESS),
2618        );
2619
2620        let weak_userfault = Arc::downgrade(userfault);
2621        self.userfaultfds.retain(|uf| !Weak::ptr_eq(uf, &weak_userfault));
2622    }
2623
2624    fn set_mapping_name(
2625        &mut self,
2626        addr: UserAddress,
2627        length: usize,
2628        name: Option<FsString>,
2629        released_mappings: &mut ReleasedMappings,
2630    ) -> Result<(), Errno> {
2631        if addr.ptr() % *PAGE_SIZE as usize != 0 {
2632            return error!(EINVAL);
2633        }
2634        let end = match addr.checked_add(length) {
2635            Some(addr) => addr.round_up(*PAGE_SIZE).map_err(|_| errno!(ENOMEM))?,
2636            None => return error!(EINVAL),
2637        };
2638
2639        let mappings_in_range =
2640            self.mappings.range(addr..end).map(|(r, m)| (r.clone(), m.clone())).collect::<Vec<_>>();
2641
2642        if mappings_in_range.is_empty() {
2643            return error!(EINVAL);
2644        }
2645        if !mappings_in_range.first().unwrap().0.contains(&addr) {
2646            return error!(ENOMEM);
2647        }
2648
2649        let mut last_range_end = None;
2650        // There's no get_mut on RangeMap, because it would be hard to implement correctly in
2651        // combination with merging of adjacent mappings. Instead, make a copy, change the copy,
2652        // and insert the copy.
2653        for (mut range, mut mapping) in mappings_in_range {
2654            if mapping.name().is_file() {
2655                // It's invalid to assign a name to a file-backed mapping.
2656                return error!(EBADF);
2657            }
2658            // Handle mappings that start before the region to be named.
2659            range.start = std::cmp::max(range.start, addr);
2660            // Handle mappings that extend past the region to be named.
2661            range.end = std::cmp::min(range.end, end);
2662
2663            if let Some(last_range_end) = last_range_end {
2664                if last_range_end != range.start {
2665                    // The name must apply to a contiguous range of mapped pages.
2666                    return error!(ENOMEM);
2667                }
2668            }
2669            last_range_end = Some(range.end.round_up(*PAGE_SIZE)?);
2670            // TODO(b/310255065): We have no place to store names in a way visible to programs outside of Starnix
2671            // such as memory analysis tools.
2672            if let MappingBacking::Memory(backing) = self.get_mapping_backing(&mapping) {
2673                match &name {
2674                    Some(memory_name) => {
2675                        backing.memory().set_zx_name(memory_name);
2676                    }
2677                    None => {
2678                        backing.memory().set_zx_name(b"");
2679                    }
2680                }
2681            }
2682            mapping.set_name(match &name {
2683                Some(name) => MappingName::Vma(FlyByteStr::new(name.as_bytes())),
2684                None => MappingName::None,
2685            });
2686            released_mappings.extend(self.mappings.insert(range, mapping));
2687        }
2688        if let Some(last_range_end) = last_range_end {
2689            if last_range_end < end {
2690                // The name must apply to a contiguous range of mapped pages.
2691                return error!(ENOMEM);
2692            }
2693        }
2694        Ok(())
2695    }
2696}
2697
2698/// The memory pinning shadow process used for mlock().
2699///
2700/// Uses its own distinct shadow process so that it doesn't interfere with other uses of memory
2701/// pinning.
2702pub struct MlockShadowProcess(memory_pinning::ShadowProcess);
2703
2704/// A memory manager for another thread.
2705///
2706/// When accessing memory through this object, we use less efficient codepaths that work across
2707/// address spaces.
2708pub struct RemoteMemoryManager {
2709    mm: Arc<MemoryManager>,
2710}
2711
2712impl RemoteMemoryManager {
2713    fn new(mm: Arc<MemoryManager>) -> Self {
2714        Self { mm }
2715    }
2716}
2717
2718// If we just have a MemoryManager, we cannot assume that its address space is current, which means
2719// we need to use the slower "syscall" mechanism to access its memory.
2720impl MemoryAccessor for RemoteMemoryManager {
2721    fn read_memory<'a>(
2722        &self,
2723        addr: UserAddress,
2724        bytes: &'a mut [MaybeUninit<u8>],
2725    ) -> Result<&'a mut [u8], Errno> {
2726        self.mm.syscall_read_memory(addr, bytes)
2727    }
2728
2729    fn read_memory_partial_until_null_byte<'a>(
2730        &self,
2731        addr: UserAddress,
2732        bytes: &'a mut [MaybeUninit<u8>],
2733    ) -> Result<&'a mut [u8], Errno> {
2734        self.mm.syscall_read_memory_partial_until_null_byte(addr, bytes)
2735    }
2736
2737    fn read_memory_partial<'a>(
2738        &self,
2739        addr: UserAddress,
2740        bytes: &'a mut [MaybeUninit<u8>],
2741    ) -> Result<&'a mut [u8], Errno> {
2742        self.mm.syscall_read_memory_partial(addr, bytes)
2743    }
2744
2745    fn write_memory(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
2746        self.mm.syscall_write_memory(addr, bytes)
2747    }
2748
2749    fn write_memory_partial(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
2750        self.mm.syscall_write_memory_partial(addr, bytes)
2751    }
2752
2753    fn zero(&self, addr: UserAddress, length: usize) -> Result<usize, Errno> {
2754        self.mm.syscall_zero(addr, length)
2755    }
2756}
2757
2758impl TaskMemoryAccessor for RemoteMemoryManager {
2759    fn maximum_valid_address(&self) -> Option<UserAddress> {
2760        Some(self.mm.maximum_valid_user_address)
2761    }
2762}
2763
2764impl MemoryManager {
2765    /// Ensures that any mapping at `addr` is actually mapped at in the user vmar.
2766    ///
2767    /// If `length` is `None`, it will ensure the mapping only on the page `addr` falls into.
2768    /// Returns `true` if any lazy mappings are mapped.
2769    pub fn ensure_range_mapped_in_user_vmar(
2770        &self,
2771        addr: UserAddress,
2772        length: Option<usize>,
2773    ) -> Result<bool, Errno> {
2774        self.state.write().ensure_ranges_mapped_in_user_vmar(
2775            std::iter::once((addr, length)),
2776            &self.mapping_context,
2777        )
2778    }
2779
2780    /// Ensures that any mappings in the specified ranges are actually mapped in the user vmar.
2781    ///
2782    /// If `length` is `None`, it will ensure the mapping only on the page `addr` falls into.
2783    /// Returns `true` if any lazy mappings are mapped.
2784    pub fn ensure_ranges_mapped_in_user_vmar<I>(&self, ranges: I) -> Result<bool, Errno>
2785    where
2786        I: IntoIterator<Item = (UserAddress, Option<usize>)>,
2787    {
2788        self.state.write().ensure_ranges_mapped_in_user_vmar(ranges, &self.mapping_context)
2789    }
2790
2791    pub fn mrelease(&self) -> Result<(), Errno> {
2792        self.mapping_context.private_anonymous.zero(
2793            UserAddress::from_ptr(self.mapping_context.user_vmar_info.base),
2794            self.mapping_context.user_vmar_info.len,
2795        )?;
2796        Ok(())
2797    }
2798
2799    pub fn summarize(&self, summary: &mut crate::mm::MappingSummary) {
2800        let state = self.state.read();
2801        for (_, mapping) in state.mappings.iter() {
2802            summary.add(&state, mapping);
2803        }
2804    }
2805
2806    pub fn get_mappings_for_vmsplice(
2807        self: &Arc<MemoryManager>,
2808        buffers: &UserBuffers,
2809    ) -> Result<Vec<Arc<VmsplicePayload>>, Errno> {
2810        self.state.read().get_mappings_for_vmsplice(self, buffers)
2811    }
2812
2813    pub fn has_same_address_space(&self, other: &Self) -> bool {
2814        std::ptr::eq(self, other)
2815    }
2816
2817    fn unified_transfer_loop<F>(
2818        &self,
2819        addr: UserAddress,
2820        len: usize,
2821        mut transfer_fn: F,
2822    ) -> Result<usize, Errno>
2823    where
2824        F: FnMut(UserAddress, usize) -> Result<ControlFlow<usize, usize>, Errno>,
2825    {
2826        let mut copied = 0;
2827        while copied < len {
2828            match transfer_fn((addr + copied)?, copied)? {
2829                ControlFlow::Continue(num_copied) => {
2830                    if num_copied == 0 {
2831                        let fault_addr = (addr + copied)?;
2832                        // If we successfully mapped a lazy mapping, retry the copy.
2833                        // Otherwise, this might be a permission fault or invalid address, so we
2834                        // stop and return the partial result.
2835                        //
2836                        // NOTE: We lazily materialize mappings one page at a time here.
2837                        // An alternative approach would be to materialize the entire range
2838                        // or the first mapping up front. That might avoid bouncing between
2839                        // threads on faults, but adds overhead (locks and range lookups)
2840                        // if the memory is already mapped. We use the reactive approach
2841                        // for now, but this could be tuned in the future.
2842                        if self.ensure_range_mapped_in_user_vmar(fault_addr, None)? {
2843                            continue;
2844                        } else {
2845                            break;
2846                        }
2847                    }
2848                    copied += num_copied;
2849                }
2850                ControlFlow::Break(num_copied) => {
2851                    copied += num_copied;
2852                    break;
2853                }
2854            }
2855        }
2856        Ok(copied)
2857    }
2858
2859    pub fn unified_read_memory<'a>(
2860        &self,
2861        current_task: &CurrentTask,
2862        addr: UserAddress,
2863        bytes: &'a mut [MaybeUninit<u8>],
2864    ) -> Result<&'a mut [u8], Errno> {
2865        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2866
2867        if let Some(usercopy) = usercopy() {
2868            let buf_ptr = bytes.as_mut_ptr();
2869            let buf_len = bytes.len();
2870
2871            let copied = self.unified_transfer_loop(addr, buf_len, |cur_addr, offset| {
2872                // SAFETY: Exclusive access to `bytes` for the lifetime of this function.
2873                let current_bytes = unsafe {
2874                    std::slice::from_raw_parts_mut(buf_ptr.add(offset), buf_len - offset)
2875                };
2876                let (read_bytes, _unread_bytes) = usercopy.copyin(cur_addr.ptr(), current_bytes);
2877                Ok(ControlFlow::Continue(read_bytes.len()))
2878            })?;
2879            if copied < bytes.len() {
2880                error!(EFAULT)
2881            } else {
2882                // SAFETY: All bytes up to `buf_len` have been initialized.
2883                Ok(unsafe { std::slice::from_raw_parts_mut(buf_ptr as *mut u8, buf_len) })
2884            }
2885        } else {
2886            self.syscall_read_memory(addr, bytes)
2887        }
2888    }
2889
2890    pub fn syscall_read_memory<'a>(
2891        &self,
2892        addr: UserAddress,
2893        bytes: &'a mut [MaybeUninit<u8>],
2894    ) -> Result<&'a mut [u8], Errno> {
2895        self.state.read().read_memory(addr, bytes, &self.mapping_context)
2896    }
2897
2898    pub fn unified_read_memory_partial_until_null_byte<'a>(
2899        &self,
2900        current_task: &CurrentTask,
2901        addr: UserAddress,
2902        bytes: &'a mut [MaybeUninit<u8>],
2903    ) -> Result<&'a mut [u8], Errno> {
2904        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2905
2906        if let Some(usercopy) = usercopy() {
2907            let buf_ptr = bytes.as_mut_ptr();
2908            let buf_len = bytes.len();
2909
2910            let copied = self.unified_transfer_loop(addr, buf_len, |cur_addr, offset| {
2911                // SAFETY: Exclusive access to `bytes` for the lifetime of this function.
2912                let current_bytes = unsafe {
2913                    std::slice::from_raw_parts_mut(buf_ptr.add(offset), buf_len - offset)
2914                };
2915                let (read_bytes, _unread_bytes) =
2916                    usercopy.copyin_until_null_byte(cur_addr.ptr(), current_bytes);
2917
2918                let num_copied = read_bytes.len();
2919                if read_bytes.last().map(|b| *b == 0).unwrap_or(false) {
2920                    Ok(ControlFlow::Break(num_copied))
2921                } else {
2922                    Ok(ControlFlow::Continue(num_copied))
2923                }
2924            })?;
2925            if copied == 0 && !bytes.is_empty() {
2926                error!(EFAULT)
2927            } else {
2928                // SAFETY: Bytes up to `copied` have been initialized.
2929                Ok(unsafe { std::slice::from_raw_parts_mut(buf_ptr as *mut u8, copied) })
2930            }
2931        } else {
2932            self.syscall_read_memory_partial_until_null_byte(addr, bytes)
2933        }
2934    }
2935
2936    pub fn syscall_read_memory_partial_until_null_byte<'a>(
2937        &self,
2938        addr: UserAddress,
2939        bytes: &'a mut [MaybeUninit<u8>],
2940    ) -> Result<&'a mut [u8], Errno> {
2941        self.state.read().read_memory_partial_until_null_byte(addr, bytes, &self.mapping_context)
2942    }
2943
2944    pub fn unified_read_memory_partial<'a>(
2945        &self,
2946        current_task: &CurrentTask,
2947        addr: UserAddress,
2948        bytes: &'a mut [MaybeUninit<u8>],
2949    ) -> Result<&'a mut [u8], Errno> {
2950        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2951
2952        if let Some(usercopy) = usercopy() {
2953            let buf_ptr = bytes.as_mut_ptr();
2954            let buf_len = bytes.len();
2955
2956            let copied = self.unified_transfer_loop(addr, buf_len, |cur_addr, offset| {
2957                // SAFETY: Exclusive access to `bytes` for the lifetime of this function.
2958                let current_bytes = unsafe {
2959                    std::slice::from_raw_parts_mut(buf_ptr.add(offset), buf_len - offset)
2960                };
2961                let (read_bytes, _unread_bytes) = usercopy.copyin(cur_addr.ptr(), current_bytes);
2962                Ok(ControlFlow::Continue(read_bytes.len()))
2963            })?;
2964            if copied == 0 && !bytes.is_empty() {
2965                error!(EFAULT)
2966            } else {
2967                // SAFETY: Bytes up to `copied` have been initialized.
2968                Ok(unsafe { std::slice::from_raw_parts_mut(buf_ptr as *mut u8, copied) })
2969            }
2970        } else {
2971            self.syscall_read_memory_partial(addr, bytes)
2972        }
2973    }
2974
2975    pub fn syscall_read_memory_partial<'a>(
2976        &self,
2977        addr: UserAddress,
2978        bytes: &'a mut [MaybeUninit<u8>],
2979    ) -> Result<&'a mut [u8], Errno> {
2980        self.state.read().read_memory_partial(addr, bytes, &self.mapping_context)
2981    }
2982
2983    pub fn unified_write_memory(
2984        &self,
2985        current_task: &CurrentTask,
2986        addr: UserAddress,
2987        bytes: &[u8],
2988    ) -> Result<usize, Errno> {
2989        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2990
2991        if let Some(usercopy) = usercopy() {
2992            let len = bytes.len();
2993            let copied = self.unified_transfer_loop(addr, len, |cur_addr, offset| {
2994                Ok(ControlFlow::Continue(usercopy.copyout(&bytes[offset..], cur_addr.ptr())))
2995            })?;
2996            if copied < bytes.len() { error!(EFAULT) } else { Ok(copied) }
2997        } else {
2998            self.syscall_write_memory(addr, bytes)
2999        }
3000    }
3001
3002    /// Write `bytes` to memory address `addr`, making a copy-on-write child of the VMO backing and
3003    /// replacing the mapping if necessary.
3004    ///
3005    /// NOTE: this bypasses userspace's memory protection configuration and should only be called
3006    /// by codepaths like ptrace which bypass memory protection.
3007    pub fn force_write_memory(&self, addr: UserAddress, bytes: &[u8]) -> Result<(), Errno> {
3008        let mut state = self.state.write();
3009        let mut released_mappings = ReleasedMappings::default();
3010        let result =
3011            state.force_write_memory(&self.mapping_context, addr, bytes, &mut released_mappings);
3012        released_mappings.finalize(state);
3013        result
3014    }
3015
3016    pub fn syscall_write_memory(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
3017        self.state.read().write_memory(addr, bytes, &self.mapping_context)
3018    }
3019
3020    pub fn unified_write_memory_partial(
3021        &self,
3022        current_task: &CurrentTask,
3023        addr: UserAddress,
3024        bytes: &[u8],
3025    ) -> Result<usize, Errno> {
3026        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
3027
3028        if let Some(usercopy) = usercopy() {
3029            let len = bytes.len();
3030            let copied = self.unified_transfer_loop(addr, len, |cur_addr, offset| {
3031                Ok(ControlFlow::Continue(usercopy.copyout(&bytes[offset..], cur_addr.ptr())))
3032            })?;
3033            if copied == 0 && !bytes.is_empty() { error!(EFAULT) } else { Ok(copied) }
3034        } else {
3035            self.syscall_write_memory_partial(addr, bytes)
3036        }
3037    }
3038
3039    pub fn syscall_write_memory_partial(
3040        &self,
3041        addr: UserAddress,
3042        bytes: &[u8],
3043    ) -> Result<usize, Errno> {
3044        self.state.read().write_memory_partial(addr, bytes, &self.mapping_context)
3045    }
3046
3047    pub fn unified_zero(
3048        &self,
3049        current_task: &CurrentTask,
3050        addr: UserAddress,
3051        length: usize,
3052    ) -> Result<usize, Errno> {
3053        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
3054
3055        {
3056            let page_size = *PAGE_SIZE as usize;
3057            // Get the page boundary immediately following `addr` if `addr` is
3058            // not page aligned.
3059            let next_page_boundary = round_up_to_system_page_size(addr.ptr())?;
3060            // The number of bytes needed to zero at least a full page (not just
3061            // a pages worth of bytes) starting at `addr`.
3062            let length_with_atleast_one_full_page = page_size + (next_page_boundary - addr.ptr());
3063            // If at least one full page is being zeroed, go through the memory object since Zircon
3064            // can swap the mapped pages with the zero page which should be cheaper than zeroing
3065            // out a pages worth of bytes manually.
3066            //
3067            // If we are not zeroing out a full page, then go through usercopy
3068            // if unified aspaces is enabled.
3069            if length >= length_with_atleast_one_full_page {
3070                return self.syscall_zero(addr, length);
3071            }
3072        }
3073
3074        if let Some(usercopy) = usercopy() {
3075            let copied = self.unified_transfer_loop(addr, length, |cur_addr, offset| {
3076                Ok(ControlFlow::Continue(usercopy.zero(cur_addr.ptr(), length - offset)))
3077            })?;
3078            if copied == 0 && length > 0 { error!(EFAULT) } else { Ok(copied) }
3079        } else {
3080            self.syscall_zero(addr, length)
3081        }
3082    }
3083
3084    pub fn syscall_zero(&self, addr: UserAddress, length: usize) -> Result<usize, Errno> {
3085        self.state.read().zero(addr, length, &self.mapping_context)
3086    }
3087
3088    /// Obtain a reference to this memory manager that can be used from another thread.
3089    pub fn as_remote(self: &Arc<Self>) -> RemoteMemoryManager {
3090        RemoteMemoryManager::new(self.clone())
3091    }
3092
3093    /// Performs a data and instruction cache flush over the given address range.
3094    pub fn cache_flush(&self, range: Range<UserAddress>) -> Result<(), Errno> {
3095        self.state.read().cache_flush(range, &self.mapping_context)
3096    }
3097
3098    /// Register the address space managed by this memory manager for interest in
3099    /// receiving private expedited memory barriers of the given type.
3100    pub fn register_membarrier_private_expedited(
3101        &self,
3102        mtype: MembarrierType,
3103    ) -> Result<(), Errno> {
3104        self.state.write().register_membarrier_private_expedited(mtype)
3105    }
3106
3107    /// Checks if the address space managed by this memory manager is registered
3108    /// for interest in private expedited barriers of the given kind.
3109    pub fn membarrier_private_expedited_registered(&self, mtype: MembarrierType) -> bool {
3110        self.state.read().membarrier_private_expedited_registered(mtype)
3111    }
3112}
3113
3114/// State and resources of the `MemoryManager` that are either immutable after creation
3115/// or handle their own interior mutability (e.g., `private_anonymous`).
3116///
3117/// This is distinct from `MemoryManagerState` in that the fields here do not require
3118/// acquisition of the `MemoryManager`'s main lock for access. This allows concurrent
3119/// access to these resources without lock contention.
3120///
3121/// This structure primarily holds the Zircon VMAR handle and the manager for private
3122/// anonymous memory, which are the core primitives used to manipulate the address space.
3123pub struct MappingContext {
3124    /// The VMAR in which userspace mappings occur.
3125    ///
3126    /// We map userspace memory in this child VMAR so that we can destroy the
3127    /// entire VMAR during exec.
3128    /// For 32-bit tasks, we limit the user_vmar to correspond to the available memory.
3129    ///
3130    /// This field is set to `ZX_HANDLE_INVALID` when the address-space has been destroyed (e.g. on
3131    /// `exec()`), allowing the value to be pro-actively checked for, or the `ZX_ERR_BAD_HANDLE`
3132    /// status return from Zircon operations handled, to suit the call-site.
3133    pub user_vmar: zx::Vmar,
3134
3135    /// Cached VmarInfo for user_vmar.
3136    pub user_vmar_info: zx::VmarInfo,
3137
3138    /// Memory object backing private, anonymous memory allocations in this address space.
3139    pub private_anonymous: PrivateAnonymousMemoryManager,
3140}
3141
3142impl MappingContext {
3143    fn map_in_user_vmar(
3144        &self,
3145        addr: SelectedAddress,
3146        memory: &MemoryObject,
3147        memory_offset: u64,
3148        length: usize,
3149        flags: MappingFlags,
3150        populate: bool,
3151    ) -> Result<(), Errno> {
3152        map_in_vmar(
3153            &self.user_vmar,
3154            &self.user_vmar_info,
3155            addr,
3156            memory,
3157            memory_offset,
3158            length,
3159            flags,
3160            populate,
3161        )
3162    }
3163
3164    pub fn max_address(&self) -> UserAddress {
3165        UserAddress::from_ptr(self.user_vmar_info.base + self.user_vmar_info.len)
3166    }
3167}
3168
3169pub struct MemoryManager {
3170    /// The base address of the root_vmar.
3171    pub base_addr: UserAddress,
3172
3173    /// The futexes in this address space.
3174    pub futex: Arc<FutexTable<PrivateFutexKey>>,
3175
3176    /// The mapping context for this address space.
3177    pub mapping_context: MappingContext,
3178
3179    /// Mutable state for the memory manager.
3180    pub state: RwLock<MemoryManagerState>,
3181
3182    /// Whether this address space is dumpable.
3183    pub dumpable: OrderedMutex<DumpPolicy, MmDumpable>,
3184
3185    /// Maximum valid user address for this vmar.
3186    pub maximum_valid_user_address: UserAddress,
3187
3188    /// In-flight payloads enqueued to a pipe as a consequence of a `vmsplice(2)`
3189    /// operation.
3190    ///
3191    /// For details on why we need to keep track of in-flight vmspliced payloads,
3192    /// see [`VmsplicePayload`].
3193    ///
3194    /// For details on why this isn't under the `RwLock` protected `MemoryManagerState`,
3195    /// See [`InflightVmsplicedPayloads::payloads`].
3196    pub inflight_vmspliced_payloads: InflightVmsplicedPayloads,
3197
3198    /// A mechanism to be notified when this `MemoryManager` is destroyed.
3199    pub drop_notifier: DropNotifier,
3200}
3201
3202fn check_access_permissions_in_page_fault(
3203    decoded: &PageFaultExceptionReport,
3204    mapping: &Mapping,
3205) -> bool {
3206    let exec_denied = decoded.is_execute && !mapping.can_exec();
3207    let write_denied = decoded.is_write && !mapping.can_write();
3208    let read_denied = (!decoded.is_execute && !decoded.is_write) && !mapping.can_read();
3209    !exec_denied && !write_denied && !read_denied
3210}
3211
3212impl MemoryManager {
3213    /// Returns a new `MemoryManager` suitable for use in tests.
3214    pub fn new_for_test(root_vmar: zx::Unowned<'_, zx::Vmar>, arch_width: ArchWidth) -> Arc<Self> {
3215        Self::new(root_vmar, arch_width, None, None).expect("can create MemoryManager")
3216    }
3217
3218    // Returns details of mappings in the `user_vmar`, or an empty vector if the `user_vmar` has
3219    // been destroyed.
3220    fn with_zx_mappings<R>(
3221        &self,
3222        current_task: &CurrentTask,
3223        op: impl FnOnce(&[zx::MapInfo]) -> R,
3224    ) -> R {
3225        MapInfoCache::get_or_init(current_task)
3226            .expect("must be able to retrieve map info cache")
3227            .with_map_infos(&self.mapping_context.user_vmar, |infos| match infos {
3228                Ok(infos) => op(infos),
3229                Err(_) => op(&[]),
3230            })
3231    }
3232
3233    fn protect_vmar_range(
3234        &self,
3235        addr: UserAddress,
3236        length: usize,
3237        prot_flags: ProtectionFlags,
3238    ) -> Result<(), Errno> {
3239        let vmar_flags = prot_flags.to_vmar_flags();
3240        // SAFETY: Modifying user vmar
3241        unsafe { self.mapping_context.user_vmar.protect(addr.ptr(), length, vmar_flags) }.map_err(
3242            |s| match s {
3243                zx::Status::INVALID_ARGS => errno!(EINVAL),
3244                zx::Status::NOT_FOUND => errno!(ENOMEM),
3245                zx::Status::ACCESS_DENIED => errno!(EACCES),
3246                _ => impossible_error(s),
3247            },
3248        )
3249    }
3250
3251    pub fn total_locked_bytes(&self) -> u64 {
3252        self.state.read().num_locked_bytes(
3253            UserAddress::from(self.mapping_context.user_vmar_info.base as u64)
3254                ..UserAddress::from(
3255                    (self.mapping_context.user_vmar_info.base
3256                        + self.mapping_context.user_vmar_info.len) as u64,
3257                ),
3258        )
3259    }
3260
3261    /// Returns a new `MemoryManager` initialized with a new userspace VMAR matching the specified
3262    /// `arch_width`, under the specified restricted-mode `root_vmar`.  The `executable_node` that
3263    /// the new address-space will execute may optionally be supplied.
3264    fn new(
3265        root_vmar: zx::Unowned<'_, zx::Vmar>,
3266        arch_width: ArchWidth,
3267        executable_node: Option<NamespaceNode>,
3268        private_anonymous: Option<PrivateAnonymousMemoryManager>,
3269    ) -> Result<Arc<Self>, Errno> {
3270        debug_assert!(!root_vmar.is_invalid());
3271
3272        let mut vmar_info = root_vmar.info().map_err(|status| from_status_like_fdio!(status))?;
3273        if arch_width.is_arch32() {
3274            vmar_info.len = (LOWER_4GB_LIMIT.ptr() - vmar_info.base) as usize;
3275        }
3276
3277        let (user_vmar, ptr) = root_vmar
3278            .allocate(
3279                0,
3280                vmar_info.len,
3281                zx::VmarFlags::SPECIFIC
3282                    | zx::VmarFlags::CAN_MAP_SPECIFIC
3283                    | zx::VmarFlags::CAN_MAP_READ
3284                    | zx::VmarFlags::CAN_MAP_WRITE
3285                    | zx::VmarFlags::CAN_MAP_EXECUTE,
3286            )
3287            .map_err(|status| from_status_like_fdio!(status))?;
3288        assert_eq!(ptr, vmar_info.base);
3289
3290        let user_vmar_info = user_vmar.info().map_err(|status| from_status_like_fdio!(status))?;
3291
3292        // Ensure that the `user_vmar_info` matches assumptions for the requested layout.
3293        debug_assert_eq!(RESTRICTED_ASPACE_BASE, user_vmar_info.base);
3294        if arch_width.is_arch32() {
3295            debug_assert_eq!(LOWER_4GB_LIMIT.ptr() - user_vmar_info.base, user_vmar_info.len);
3296        } else {
3297            debug_assert_eq!(RESTRICTED_ASPACE_SIZE, user_vmar_info.len);
3298        }
3299
3300        // The private anonymous backing memory object extend from the user address 0 up to the
3301        // highest mappable address. The pages below `user_vmar_info.base` are never mapped, but
3302        // including them in the memory object makes the math for mapping address to memory object
3303        // offsets simpler.
3304        let backing_size = (user_vmar_info.base + user_vmar_info.len) as u64;
3305
3306        // Place the stack at the end of the address space, subject to ASLR adjustment.
3307        let stack_origin = UserAddress::from_ptr(
3308            user_vmar_info.base + user_vmar_info.len
3309                - MAX_STACK_SIZE
3310                - generate_random_offset_for_aslr(arch_width),
3311        )
3312        .round_up(*PAGE_SIZE)?;
3313
3314        // Set the highest address that `mmap` will assign to the allocations that don't ask for a
3315        // specific address, subject to ASLR adjustment.
3316        let mmap_top = stack_origin
3317            .checked_sub(MAX_STACK_SIZE + generate_random_offset_for_aslr(arch_width))
3318            .ok_or_else(|| errno!(EINVAL))?;
3319
3320        Ok(Arc::new(MemoryManager {
3321            base_addr: UserAddress::from_ptr(user_vmar_info.base),
3322            futex: Arc::<FutexTable<PrivateFutexKey>>::default(),
3323            mapping_context: MappingContext {
3324                user_vmar,
3325                user_vmar_info,
3326                private_anonymous: private_anonymous
3327                    .unwrap_or_else(|| PrivateAnonymousMemoryManager::new(backing_size)),
3328            },
3329            state: RwLock::new(MemoryManagerState {
3330                mappings: Default::default(),
3331                userfaultfds: Default::default(),
3332                shadow_mappings_for_mlock: Default::default(),
3333                forkable_state: MemoryManagerForkableState {
3334                    executable_node,
3335                    stack_origin,
3336                    mmap_top,
3337                    ..Default::default()
3338                },
3339            }),
3340            // TODO(security): Reset to DISABLE, or the value in the fs.suid_dumpable sysctl, under
3341            // certain conditions as specified in the prctl(2) man page.
3342            dumpable: OrderedMutex::new(DumpPolicy::User),
3343            maximum_valid_user_address: UserAddress::from_ptr(
3344                user_vmar_info.base + user_vmar_info.len,
3345            ),
3346            inflight_vmspliced_payloads: Default::default(),
3347            drop_notifier: DropNotifier::default(),
3348        }))
3349    }
3350
3351    pub fn set_brk<L>(
3352        self: &Arc<Self>,
3353        locked: &mut Locked<L>,
3354        current_task: &CurrentTask,
3355        addr: UserAddress,
3356    ) -> Result<UserAddress, Errno>
3357    where
3358        L: LockBefore<ThreadGroupLimits>,
3359    {
3360        let mut state = self.state.write();
3361        let mut released_mappings = ReleasedMappings::default();
3362        let result = state.set_brk(locked, current_task, self, addr, &mut released_mappings);
3363        released_mappings.finalize(state);
3364        result
3365    }
3366
3367    pub fn register_uffd(&self, userfault: &Arc<UserFault>) {
3368        let mut state = self.state.write();
3369        state.userfaultfds.push(Arc::downgrade(userfault));
3370    }
3371
3372    /// Register a given memory range with a userfault object.
3373    pub fn register_with_uffd<L>(
3374        self: &Arc<Self>,
3375        locked: &mut Locked<L>,
3376        addr: UserAddress,
3377        length: usize,
3378        userfault: &Arc<UserFault>,
3379        mode: FaultRegisterMode,
3380    ) -> Result<(), Errno>
3381    where
3382        L: LockBefore<UserFaultInner>,
3383    {
3384        let mut state = self.state.write();
3385        let mut released_mappings = ReleasedMappings::default();
3386        let result = state.register_with_uffd(
3387            self,
3388            locked,
3389            addr,
3390            length,
3391            userfault,
3392            mode,
3393            &mut released_mappings,
3394        );
3395        released_mappings.finalize(state);
3396        result
3397    }
3398
3399    /// Unregister a given range from any userfault objects associated with it.
3400    pub fn unregister_range_from_uffd<L>(
3401        &self,
3402        locked: &mut Locked<L>,
3403        userfault: &Arc<UserFault>,
3404        addr: UserAddress,
3405        length: usize,
3406    ) -> Result<(), Errno>
3407    where
3408        L: LockBefore<UserFaultInner>,
3409    {
3410        let mut state = self.state.write();
3411        let mut released_mappings = ReleasedMappings::default();
3412        let result = state.unregister_range_from_uffd(
3413            self,
3414            locked,
3415            userfault,
3416            addr,
3417            length,
3418            &mut released_mappings,
3419        );
3420        released_mappings.finalize(state);
3421        result
3422    }
3423
3424    /// Unregister any mappings registered with a given userfault object. Used when closing the last
3425    /// file descriptor associated to it.
3426    pub fn unregister_uffd<L>(&self, locked: &mut Locked<L>, userfault: &Arc<UserFault>)
3427    where
3428        L: LockBefore<UserFaultInner>,
3429    {
3430        let mut state = self.state.write();
3431        let mut released_mappings = ReleasedMappings::default();
3432        state.unregister_uffd(self, locked, userfault, &mut released_mappings);
3433        released_mappings.finalize(state);
3434    }
3435
3436    /// Populate a range of pages registered with an userfaulfd according to a `populate` function.
3437    /// This will fail if the pages were not registered with userfaultfd, or if the page at `addr`
3438    /// was already populated. If any page other than the first one was populated, the `length`
3439    /// is adjusted to only include the first N unpopulated pages, and this adjusted length
3440    /// is then passed to `populate`. On success, returns the number of populated bytes.
3441    pub fn populate_from_uffd<F, L>(
3442        &self,
3443        locked: &mut Locked<L>,
3444        addr: UserAddress,
3445        length: usize,
3446        userfault: &Arc<UserFault>,
3447        populate: F,
3448    ) -> Result<usize, Errno>
3449    where
3450        F: FnOnce(&MemoryManagerState, usize) -> Result<usize, Errno>,
3451        L: LockBefore<UserFaultInner>,
3452    {
3453        let state = self.state.read();
3454        // Check that the addr..length range is a contiguous range of mappings which are all
3455        // registered with an userfault object.
3456        let mut bytes_registered_with_uffd = 0;
3457        for (mapping, len) in
3458            state.get_contiguous_mappings_at(addr, length, &self.mapping_context)?
3459        {
3460            if mapping.flags().contains(MappingFlags::UFFD) {
3461                // Check that the mapping is registered with the same uffd. This is not required,
3462                // but we don't support cross-uffd operations yet.
3463                if !userfault.contains_addr(locked, addr) {
3464                    track_stub!(
3465                        TODO("https://fxbug.dev/391599171"),
3466                        "operations across different uffds"
3467                    );
3468                    return error!(ENOTSUP);
3469                };
3470            } else {
3471                return error!(ENOENT);
3472            }
3473            bytes_registered_with_uffd += len;
3474        }
3475        if bytes_registered_with_uffd != length {
3476            return error!(ENOENT);
3477        }
3478
3479        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
3480
3481        // Determine how many pages in the requested range are already populated
3482        let first_populated =
3483            userfault.get_first_populated_page_after(locked, addr).ok_or_else(|| errno!(ENOENT))?;
3484        // If the very first page is already populated, uffd operations should just return EEXIST
3485        if first_populated == addr {
3486            return error!(EEXIST);
3487        }
3488        // Otherwise it is possible to do an incomplete operation by only populating pages until
3489        // the first populated one.
3490        let trimmed_end = std::cmp::min(first_populated, end_addr);
3491        let effective_length = trimmed_end - addr;
3492
3493        populate(&state, effective_length)?;
3494        userfault.insert_pages(locked, addr..trimmed_end, true);
3495
3496        // Since we used protection bits to force pagefaults, we now need to reverse this change by
3497        // restoring the protections on the underlying Zircon mappings to the "real" protection bits
3498        // that were kept in the Starnix mappings. This will prevent new pagefaults from being
3499        // generated. Only do this on the pages that were populated by this operation.
3500        for (range, mapping) in state.mappings.range(addr..trimmed_end) {
3501            let range_to_protect = range.intersect(&(addr..trimmed_end));
3502            let restored_flags = mapping.flags().access_flags();
3503            let length = range_to_protect.end - range_to_protect.start;
3504            self.protect_vmar_range(range_to_protect.start, length, restored_flags)
3505                .expect("Failed to restore original protection bits on uffd-registered range");
3506        }
3507        // Return the number of effectively populated bytes, which might be smaller than the
3508        // requested number.
3509        Ok(effective_length)
3510    }
3511
3512    pub fn zero_from_uffd<L>(
3513        &self,
3514        locked: &mut Locked<L>,
3515        addr: UserAddress,
3516        length: usize,
3517        userfault: &Arc<UserFault>,
3518    ) -> Result<usize, Errno>
3519    where
3520        L: LockBefore<UserFaultInner>,
3521    {
3522        self.populate_from_uffd(locked, addr, length, userfault, |state, effective_length| {
3523            state.zero(addr, effective_length, &self.mapping_context)
3524        })
3525    }
3526
3527    pub fn fill_from_uffd<L>(
3528        &self,
3529        locked: &mut Locked<L>,
3530        addr: UserAddress,
3531        buf: &[u8],
3532        length: usize,
3533        userfault: &Arc<UserFault>,
3534    ) -> Result<usize, Errno>
3535    where
3536        L: LockBefore<UserFaultInner>,
3537    {
3538        self.populate_from_uffd(locked, addr, length, userfault, |state, effective_length| {
3539            state.write_memory(addr, &buf[..effective_length], &self.mapping_context)
3540        })
3541    }
3542
3543    pub fn copy_from_uffd<L>(
3544        &self,
3545        locked: &mut Locked<L>,
3546        source_addr: UserAddress,
3547        dst_addr: UserAddress,
3548        length: usize,
3549        userfault: &Arc<UserFault>,
3550    ) -> Result<usize, Errno>
3551    where
3552        L: LockBefore<UserFaultInner>,
3553    {
3554        self.populate_from_uffd(locked, dst_addr, length, userfault, |state, effective_length| {
3555            let mut buf = vec![std::mem::MaybeUninit::uninit(); effective_length];
3556            let buf = state.read_memory(source_addr, &mut buf, &self.mapping_context)?;
3557            state.write_memory(dst_addr, &buf[..effective_length], &self.mapping_context)
3558        })
3559    }
3560
3561    /// Returns the new `MemoryManager` for a process, pre-populated with a snapshot of the layout
3562    /// and mappings of `source_mm`.  This is used during `CurrentTask::clone()` operations to
3563    /// create the initial address-space for the cloned child process.
3564    pub fn snapshot_of<L>(
3565        locked: &mut Locked<L>,
3566        source_mm: &Arc<MemoryManager>,
3567        root_vmar: zx::Unowned<'_, zx::Vmar>,
3568        arch_width: ArchWidth,
3569    ) -> Result<Arc<Self>, Errno>
3570    where
3571        L: LockBefore<MmDumpable>,
3572    {
3573        trace_duration!(CATEGORY_STARNIX_MM, "snapshot_of");
3574        let backing_size = (source_mm.mapping_context.user_vmar_info.base
3575            + source_mm.mapping_context.user_vmar_info.len) as u64;
3576        let private_anonymous =
3577            source_mm.mapping_context.private_anonymous.snapshot(backing_size)?;
3578        let target = MemoryManager::new(
3579            root_vmar,
3580            arch_width,
3581            source_mm.executable_node(),
3582            Some(private_anonymous),
3583        )?;
3584
3585        // Hold the lock throughout the operation to uphold memory manager's invariants.
3586        // See mm/README.md.
3587        {
3588            let state: &mut MemoryManagerState = &mut source_mm.state.write();
3589            let mut target_state = target.state.write();
3590            debug_assert_eq!(
3591                source_mm.mapping_context.user_vmar_info,
3592                target.mapping_context.user_vmar_info
3593            );
3594
3595            let mut clone_cache = HashMap::<zx::Koid, Arc<MemoryObject>>::new();
3596
3597            for (range, mapping) in state.mappings.iter() {
3598                if mapping.flags().contains(MappingFlags::DONTFORK) {
3599                    continue;
3600                }
3601                // Locking is not inherited when forking.
3602                let target_mapping_flags = mapping.flags().difference(MappingFlags::LOCKED);
3603                match state.get_mapping_backing(mapping) {
3604                    MappingBacking::Memory(backing) => {
3605                        trace_duration!(CATEGORY_STARNIX_MM, "memory_backing_clone");
3606                        let memory_offset = backing.address_to_offset(range.start);
3607
3608                        let target_memory = if mapping.flags().contains(MappingFlags::SHARED)
3609                            || mapping.name().is_vvar()
3610                        {
3611                            // Note that the Vvar is a special mapping that behaves like a shared mapping but
3612                            // is private to each process.
3613                            backing.memory().clone()
3614                        } else {
3615                            let memory_obj = backing.memory();
3616                            let options = mapping.flags().options();
3617                            let memory =
3618                                clone_cache.entry(memory_obj.get_koid()).or_insert_with_fallible(
3619                                    || memory_obj.clone_memory(memory_obj.get_rights(), options),
3620                                )?;
3621                            memory.clone()
3622                        };
3623
3624                        let mapping = Mapping::with_name(
3625                            MappingBacking::Memory(Box::new(MappingBackingMemory::new(
3626                                range.start,
3627                                target_memory,
3628                                memory_offset,
3629                            ))),
3630                            target_mapping_flags,
3631                            mapping.max_access(),
3632                            mapping.name().to_owned(),
3633                            MappingMode::Lazy,
3634                        );
3635                        assert!(
3636                            target_state.mappings.append_non_overlapping(range.clone(), mapping)
3637                        );
3638                    }
3639                    MappingBacking::PrivateAnonymous => {
3640                        trace_duration!(CATEGORY_STARNIX_MM, "private_anonymous_backing_clone");
3641                        let length = range.end - range.start;
3642                        if mapping.flags().contains(MappingFlags::WIPEONFORK) {
3643                            target
3644                                .mapping_context
3645                                .private_anonymous
3646                                .zero(range.start, length)
3647                                .map_err(|_| errno!(ENOMEM))?;
3648                        }
3649
3650                        let mapping = Mapping::new_private_anonymous(
3651                            target_mapping_flags,
3652                            mapping.name().to_owned(),
3653                            MappingMode::Lazy,
3654                        );
3655                        assert!(
3656                            target_state.mappings.append_non_overlapping(range.clone(), mapping)
3657                        );
3658                    }
3659                };
3660            }
3661
3662            target_state.forkable_state = state.forkable_state.clone();
3663        }
3664
3665        let self_dumpable = *source_mm.dumpable.lock(locked);
3666        *target.dumpable.lock(locked) = self_dumpable;
3667
3668        Ok(target)
3669    }
3670
3671    /// Returns the replacement `MemoryManager` to be used by the `exec()`ing task.
3672    ///
3673    /// POSIX requires that "a call to any exec function from a process with more than one thread
3674    /// shall result in all threads being terminated and the new executable being loaded and
3675    /// executed. No destructor functions or cleanup handlers shall be called".
3676    /// The caller is responsible for having ensured that this is the only `Task` in the
3677    /// `ThreadGroup`, and thereby the `zx::process`, such that it is safe to tear-down the Zircon
3678    /// userspace VMAR for the current address-space.
3679    pub fn exec(
3680        root_vmar: zx::Unowned<'_, zx::Vmar>,
3681        old_mm: Option<Arc<Self>>,
3682        exe_node: NamespaceNode,
3683        arch_width: ArchWidth,
3684    ) -> Result<Arc<Self>, Errno> {
3685        // To safeguard against concurrent accesses by other tasks through this `MemoryManager`, the
3686        // following steps are performed while holding the write lock on the old MM, if any:
3687        //
3688        // 1. All `mappings` are removed, so that remote `MemoryAccessor` calls will fail.
3689        // 2. The `user_vmar` is `destroy()`ed to free-up the user address-space.
3690        //
3691        // Once these steps are complete it is safe for the old mappings to be dropped.
3692        if let Some(old_mm) = old_mm {
3693            let _old_mappings = {
3694                let mut state = old_mm.state.write();
3695
3696                // SAFETY: This operation is safe because this is the only `Task` active in the address-
3697                // space, and accesses by remote tasks will use syscalls on the `root_vmar`.
3698                unsafe {
3699                    old_mm
3700                        .mapping_context
3701                        .user_vmar
3702                        .destroy()
3703                        .map_err(|status| from_status_like_fdio!(status))?
3704                }
3705
3706                std::mem::replace(&mut state.mappings, Default::default())
3707            };
3708        }
3709
3710        Self::new(root_vmar, arch_width, Some(exe_node), None)
3711    }
3712
3713    pub fn initialize_brk_origin(
3714        &self,
3715        arch_width: ArchWidth,
3716        executable_end: UserAddress,
3717    ) -> Result<(), Errno> {
3718        self.state.write().brk_origin = executable_end
3719            .checked_add(generate_random_offset_for_aslr(arch_width))
3720            .ok_or_else(|| errno!(EINVAL))?;
3721        Ok(())
3722    }
3723
3724    // Get a randomised address for loading a position-independent executable.
3725    pub fn get_random_base_for_executable(
3726        &self,
3727        arch_width: ArchWidth,
3728        length: usize,
3729    ) -> Result<UserAddress, Errno> {
3730        let state = self.state.read();
3731
3732        // Place it at approx. 2/3 of the available mmap space, subject to ASLR adjustment.
3733        let base = round_up_to_system_page_size(2 * state.mmap_top.ptr() / 3).unwrap()
3734            + generate_random_offset_for_aslr(arch_width);
3735        if base.checked_add(length).ok_or_else(|| errno!(EINVAL))? <= state.mmap_top.ptr() {
3736            Ok(UserAddress::from_ptr(base))
3737        } else {
3738            error!(EINVAL)
3739        }
3740    }
3741    pub fn executable_node(&self) -> Option<NamespaceNode> {
3742        self.state.read().executable_node.clone()
3743    }
3744
3745    #[track_caller]
3746    pub fn get_errno_for_map_err(status: zx::Status) -> Errno {
3747        match status {
3748            zx::Status::INVALID_ARGS => errno!(EINVAL),
3749            zx::Status::ACCESS_DENIED => errno!(EPERM),
3750            zx::Status::NOT_SUPPORTED => errno!(ENODEV),
3751            zx::Status::NO_MEMORY => errno!(ENOMEM),
3752            zx::Status::NO_RESOURCES => errno!(ENOMEM),
3753            zx::Status::OUT_OF_RANGE => errno!(ENOMEM),
3754            zx::Status::ALREADY_EXISTS => errno!(EEXIST),
3755            zx::Status::BAD_STATE => errno!(EINVAL),
3756            _ => impossible_error(status),
3757        }
3758    }
3759
3760    #[track_caller]
3761    pub fn get_errno_for_vmo_err(status: zx::Status) -> Errno {
3762        match status {
3763            zx::Status::NO_MEMORY => errno!(ENOMEM),
3764            zx::Status::ACCESS_DENIED => errno!(EPERM),
3765            zx::Status::NOT_SUPPORTED => errno!(EIO),
3766            zx::Status::BAD_STATE => errno!(EIO),
3767            _ => return impossible_error(status),
3768        }
3769    }
3770
3771    pub fn map_memory(
3772        self: &Arc<Self>,
3773        addr: DesiredAddress,
3774        memory: Arc<MemoryObject>,
3775        memory_offset: u64,
3776        length: usize,
3777        prot_flags: ProtectionFlags,
3778        max_access: Access,
3779        options: MappingOptions,
3780        name: MappingName,
3781    ) -> Result<UserAddress, Errno> {
3782        let flags = MappingFlags::from_access_flags_and_options(prot_flags, options);
3783
3784        // Unmapped mappings must be released after the state is unlocked.
3785        let mut released_mappings = ReleasedMappings::default();
3786        // Hold the lock throughout the operation to uphold memory manager's invariants.
3787        // See mm/README.md.
3788        let mut state = self.state.write();
3789        let result = state.add_memory_mapping(
3790            self,
3791            addr,
3792            memory,
3793            memory_offset,
3794            length,
3795            flags,
3796            max_access,
3797            options.contains(MappingOptions::POPULATE),
3798            name,
3799            MappingMode::Eager,
3800            &mut released_mappings,
3801        );
3802
3803        // Drop the state before the unmapped mappings, since dropping a mapping may acquire a lock
3804        // in `DirEntry`'s `drop`.
3805        released_mappings.finalize(state);
3806
3807        result
3808    }
3809
3810    pub fn map_anonymous(
3811        self: &Arc<Self>,
3812        addr: DesiredAddress,
3813        length: usize,
3814        prot_flags: ProtectionFlags,
3815        options: MappingOptions,
3816        name: MappingName,
3817    ) -> Result<UserAddress, Errno> {
3818        let mut released_mappings = ReleasedMappings::default();
3819        // Hold the lock throughout the operation to uphold memory manager's invariants.
3820        // See mm/README.md.
3821        let mut state = self.state.write();
3822        let result = state.map_anonymous(
3823            self,
3824            addr,
3825            length,
3826            prot_flags,
3827            options,
3828            name,
3829            &mut released_mappings,
3830        );
3831
3832        released_mappings.finalize(state);
3833
3834        result
3835    }
3836
3837    /// Map the stack into a pre-selected address region
3838    pub fn map_stack(
3839        self: &Arc<Self>,
3840        length: usize,
3841        prot_flags: ProtectionFlags,
3842    ) -> Result<UserAddress, Errno> {
3843        assert!(length <= MAX_STACK_SIZE);
3844        let addr = self.state.read().stack_origin;
3845        // The address range containing stack_origin should normally be available: it's above the
3846        // mmap_top, and this method is called early enough in the process lifetime that only the
3847        // main ELF and the interpreter are already loaded. However, in the rare case that the
3848        // static position-independent executable is overlapping the chosen address, mapping as Hint
3849        // will make mmap choose a new place for it.
3850        // TODO(https://fxbug.dev/370027241): Consider a more robust approach
3851        let stack_addr = self.map_anonymous(
3852            DesiredAddress::Hint(addr),
3853            length,
3854            prot_flags,
3855            MappingOptions::ANONYMOUS | MappingOptions::GROWSDOWN,
3856            MappingName::Stack,
3857        )?;
3858        if stack_addr != addr {
3859            log_warn!(
3860                "An address designated for stack ({}) was unavailable, mapping at {} instead.",
3861                addr,
3862                stack_addr
3863            );
3864        }
3865        Ok(stack_addr)
3866    }
3867
3868    pub fn remap(
3869        self: &Arc<Self>,
3870        current_task: &CurrentTask,
3871        addr: UserAddress,
3872        old_length: usize,
3873        new_length: usize,
3874        flags: MremapFlags,
3875        new_addr: UserAddress,
3876    ) -> Result<UserAddress, Errno> {
3877        let mut released_mappings = ReleasedMappings::default();
3878        // Hold the lock throughout the operation to uphold memory manager's invariants.
3879        // See mm/README.md.
3880        let mut state = self.state.write();
3881        let result = state.remap(
3882            current_task,
3883            self,
3884            addr,
3885            old_length,
3886            new_length,
3887            flags,
3888            new_addr,
3889            &mut released_mappings,
3890        );
3891
3892        released_mappings.finalize(state);
3893
3894        result
3895    }
3896
3897    pub fn unmap(self: &Arc<Self>, addr: UserAddress, length: usize) -> Result<(), Errno> {
3898        let mut released_mappings = ReleasedMappings::default();
3899        // Hold the lock throughout the operation to uphold memory manager's invariants.
3900        // See mm/README.md.
3901        let mut state = self.state.write();
3902        let result = state.unmap(self, addr, length, &mut released_mappings);
3903
3904        released_mappings.finalize(state);
3905
3906        result
3907    }
3908
3909    pub fn protect(
3910        &self,
3911        current_task: &CurrentTask,
3912        addr: UserAddress,
3913        length: usize,
3914        prot_flags: ProtectionFlags,
3915    ) -> Result<(), Errno> {
3916        let page_size = *PAGE_SIZE;
3917        if !addr.is_aligned(page_size) {
3918            return error!(EINVAL);
3919        }
3920        if length == 0 {
3921            return Ok(());
3922        }
3923        let end = addr.checked_add(length).ok_or_else(|| errno!(ENOMEM))?.round_up(page_size)?;
3924        if end > self.maximum_valid_user_address {
3925            return error!(ENOMEM);
3926        }
3927
3928        // Hold the lock throughout the operation to uphold memory manager's invariants.
3929        // See mm/README.md.
3930        let mut state = self.state.write();
3931        let mut released_mappings = ReleasedMappings::default();
3932        let result = state.protect(current_task, addr, length, prot_flags, &mut released_mappings);
3933        released_mappings.finalize(state);
3934        result
3935    }
3936
3937    pub fn msync(
3938        &self,
3939        _locked: &mut Locked<Unlocked>,
3940        current_task: &CurrentTask,
3941        addr: UserAddress,
3942        length: usize,
3943        flags: MsyncFlags,
3944    ) -> Result<(), Errno> {
3945        // According to POSIX, either MS_SYNC or MS_ASYNC must be specified in flags,
3946        // and indeed failure to include one of these flags will cause msync() to fail
3947        // on some systems.  However, Linux permits a call to msync() that specifies
3948        // neither of these flags, with semantics that are (currently) equivalent to
3949        // specifying MS_ASYNC.
3950
3951        // Both MS_SYNC and MS_ASYNC are set in flags
3952        if flags.contains(MsyncFlags::ASYNC) && flags.contains(MsyncFlags::SYNC) {
3953            return error!(EINVAL);
3954        }
3955
3956        if !addr.is_aligned(*PAGE_SIZE) {
3957            return error!(EINVAL);
3958        }
3959
3960        // We collect the nodes to sync first, release the memory manager lock, and then sync them.
3961        // This avoids holding the lock during blocking I/O operations (sync), which prevents
3962        // stalling other memory operations and avoids potential deadlocks.
3963        // It also allows us to deduplicate nodes, avoiding redundant sync calls for the same file.
3964        let mut nodes_to_sync = {
3965            let mm_state = self.state.read();
3966
3967            let length_rounded = round_up_to_system_page_size(length)?;
3968            let end_addr = addr.checked_add(length_rounded).ok_or_else(|| errno!(EINVAL))?;
3969
3970            let mut last_end = addr;
3971            let mut nodes = vec![];
3972            for (range, mapping) in mm_state.mappings.range(addr..end_addr) {
3973                // Check if there is a gap between the last mapped address and the current mapping.
3974                // msync requires the entire range to be mapped, so any gap results in ENOMEM.
3975                if range.start > last_end {
3976                    return error!(ENOMEM);
3977                }
3978                last_end = range.end;
3979
3980                if flags.contains(MsyncFlags::INVALIDATE)
3981                    && mapping.flags().contains(MappingFlags::LOCKED)
3982                {
3983                    return error!(EBUSY);
3984                }
3985
3986                if flags.contains(MsyncFlags::SYNC) {
3987                    if let MappingNameRef::File(file_mapping) = mapping.name() {
3988                        nodes.push(file_mapping.name.entry.node.clone());
3989                    }
3990                }
3991            }
3992            if last_end < end_addr {
3993                return error!(ENOMEM);
3994            }
3995            nodes
3996        };
3997
3998        // Deduplicate nodes to avoid redundant sync calls.
3999        nodes_to_sync.sort_by_key(|n| Arc::as_ptr(n) as usize);
4000        nodes_to_sync.dedup_by(|a, b| Arc::ptr_eq(a, b));
4001
4002        for node in nodes_to_sync {
4003            // Range-based sync is non-trivial for Fxfs to support due to its complicated
4004            // reservation system (b/322874588#comment5). Naive range-based sync could exhaust
4005            // space reservations if called page-by-page, as transaction costs are based on the
4006            // number of dirty pages rather than file ranges. We use whole-file sync for now
4007            // to ensure data durability without adding excessive complexity.
4008            node.ops().sync(&node, current_task)?;
4009        }
4010        Ok(())
4011    }
4012
4013    pub fn madvise(&self, addr: UserAddress, length: usize, advice: u32) -> Result<(), Errno> {
4014        let mut state = self.state.write();
4015        let mut released_mappings = ReleasedMappings::default();
4016        let result =
4017            state.madvise(&self.mapping_context, addr, length, advice, &mut released_mappings);
4018        released_mappings.finalize(state);
4019        result
4020    }
4021
4022    pub fn mlock<L>(
4023        &self,
4024        current_task: &CurrentTask,
4025        locked: &mut Locked<L>,
4026        desired_addr: UserAddress,
4027        desired_length: usize,
4028        on_fault: bool,
4029    ) -> Result<(), Errno>
4030    where
4031        L: LockBefore<ThreadGroupLimits>,
4032    {
4033        let mut state = self.state.write();
4034        let mut released_mappings = ReleasedMappings::default();
4035        let result = state.mlock(
4036            &self.mapping_context,
4037            current_task,
4038            locked,
4039            desired_addr,
4040            desired_length,
4041            on_fault,
4042            &mut released_mappings,
4043        );
4044        released_mappings.finalize(state);
4045        result
4046    }
4047
4048    pub fn munlock(
4049        &self,
4050        current_task: &CurrentTask,
4051        desired_addr: UserAddress,
4052        desired_length: usize,
4053    ) -> Result<(), Errno> {
4054        let mut state = self.state.write();
4055        let mut released_mappings = ReleasedMappings::default();
4056        let result =
4057            state.munlock(current_task, desired_addr, desired_length, &mut released_mappings);
4058        released_mappings.finalize(state);
4059        result
4060    }
4061
4062    pub fn log_memory_map(&self, task: &Task, fault_address: UserAddress) {
4063        let state = self.state.read();
4064        log_warn!("Memory map for pid={}:", task.thread_group.leader);
4065        let mut last_end = UserAddress::from_ptr(0);
4066        for (range, map) in state.mappings.iter() {
4067            if fault_address >= last_end && fault_address < range.start {
4068                log_warn!("{:08x} <= FAULT", fault_address.ptr());
4069            }
4070
4071            let perms = format!(
4072                "{}{}{}{}",
4073                if map.can_read() { 'r' } else { '-' },
4074                if map.can_write() { 'w' } else { '-' },
4075                if map.can_exec() { 'x' } else { '-' },
4076                if map.flags().contains(MappingFlags::SHARED) { 's' } else { 'p' }
4077            );
4078
4079            let backing = match state.get_mapping_backing(map) {
4080                MappingBacking::Memory(backing) => backing.address_to_offset(range.start),
4081                MappingBacking::PrivateAnonymous => 0,
4082            };
4083
4084            let name_str = match &map.name() {
4085                MappingNameRef::File(file) => {
4086                    let Ok(live) = task.live() else {
4087                        log_warn!("Task {} is not live", task.get_tid());
4088                        continue;
4089                    };
4090                    String::from_utf8_lossy(&file.name.path(&live.fs())).into_owned()
4091                }
4092                MappingNameRef::None | MappingNameRef::AioContext(_) => {
4093                    if map.flags().contains(MappingFlags::SHARED)
4094                        && map.flags().contains(MappingFlags::ANONYMOUS)
4095                    {
4096                        "/dev/zero (deleted)".to_string()
4097                    } else {
4098                        "".to_string()
4099                    }
4100                }
4101                MappingNameRef::Stack => "[stack]".to_string(),
4102                MappingNameRef::Heap => "[heap]".to_string(),
4103                MappingNameRef::Vdso => "[vdso]".to_string(),
4104                MappingNameRef::Vvar => "[vvar]".to_string(),
4105                _ => format!("{:?}", map.name()),
4106            };
4107
4108            let fault_marker = if range.contains(&fault_address) { " <= FAULT" } else { "" };
4109
4110            log_warn!(
4111                "{:08x}-{:08x} {} {:08x} {}{}",
4112                range.start.ptr(),
4113                range.end.ptr(),
4114                perms,
4115                backing,
4116                name_str,
4117                fault_marker
4118            );
4119            last_end = range.end;
4120        }
4121
4122        if fault_address >= last_end {
4123            log_warn!("{:08x} <= FAULT", fault_address.ptr());
4124        }
4125    }
4126
4127    pub fn handle_page_fault(
4128        self: &Arc<Self>,
4129        locked: &mut Locked<Unlocked>,
4130        decoded: PageFaultExceptionReport,
4131        error_code: zx::Status,
4132    ) -> ExceptionResult {
4133        let addr = UserAddress::from(decoded.faulting_address);
4134
4135        // On uffd-registered range, handle according to the uffd rules
4136        if error_code == zx::Status::ACCESS_DENIED {
4137            let state = self.state.write();
4138            if let Some((_, mapping)) = state.mappings.get(addr) {
4139                if mapping.flags().contains(MappingFlags::UFFD) {
4140                    // TODO(https://fxbug.dev/391599171): Support other modes
4141                    assert!(mapping.flags().contains(MappingFlags::UFFD_MISSING));
4142
4143                    if let Some(_uffd) = state.find_uffd(locked, addr) {
4144                        // If the SIGBUS feature was set, no event will be sent to the file.
4145                        // Instead, SIGBUS is delivered to the process that triggered the fault.
4146                        // TODO(https://fxbug.dev/391599171): For now we only support this feature,
4147                        // so we assume it is set.
4148                        // Check for the SIGBUS feature when we start supporting running without it.
4149                        return ExceptionResult::Signal(SignalInfo::with_detail(
4150                            SIGBUS,
4151                            BUS_ADRERR as i32,
4152                            SignalDetail::SigFault { addr: decoded.faulting_address },
4153                        ));
4154                    };
4155                }
4156                // There is a data race resulting from uffd unregistration and page fault happening
4157                // at the same time. To detect it, we check if the access was meant to be rejected
4158                // according to Starnix own information about the mapping.
4159                if check_access_permissions_in_page_fault(&decoded, mapping) {
4160                    track_stub!(
4161                        TODO("https://fxbug.dev/435171399"),
4162                        "Inconsistent permission fault"
4163                    );
4164                    return ExceptionResult::Handled;
4165                }
4166            }
4167            std::mem::drop(state);
4168        }
4169
4170        if decoded.not_present {
4171            {
4172                let mut state = self.state.write();
4173                match state.ensure_range_mapped_in_user_vmar(addr, None, &self.mapping_context) {
4174                    Ok(true) => return ExceptionResult::Handled,
4175                    Ok(false) => {
4176                        // If the mapping generation has changed since the last time this thread
4177                        // saw it, we return `Handled` to retry the faulting instruction.
4178                        // This handles cases where the fault was spurious due to a concurrent
4179                        // mapping operation. We update the counter here to ensure we converge and
4180                        // don't loop infinitely.
4181                        let current_gen = state.mappings.generation;
4182                        let old_gen = LAST_SEEN_MAPPING_GENERATION.with(|c| c.replace(current_gen));
4183                        if current_gen != old_gen {
4184                            return ExceptionResult::Handled;
4185                        }
4186                    }
4187                    Err(e) => {
4188                        log_error!("Failed to map lazy memory: {e}")
4189                    }
4190                }
4191            }
4192
4193            // A page fault may be resolved by extending a growsdown mapping to cover the faulting
4194            // address. Mark the exception handled if so. Otherwise let the regular handling proceed.
4195
4196            // We should only attempt growth on a not-present fault and we should only extend if the
4197            // access type matches the protection on the GROWSDOWN mapping.
4198            match self.extend_growsdown_mapping_to_address(
4199                UserAddress::from(decoded.faulting_address),
4200                decoded.is_write,
4201            ) {
4202                Ok(true) => {
4203                    return ExceptionResult::Handled;
4204                }
4205                Err(e) => {
4206                    log_warn!("Error handling page fault: {e}")
4207                }
4208                _ => {}
4209            }
4210        }
4211
4212        // For this exception type, the synth_code field in the exception report's context is the
4213        // error generated by the page fault handler. For us this is used to distinguish between a
4214        // segmentation violation and a bus error. Unfortunately this detail is not documented in
4215        // Zircon's public documentation and is only described in the architecture-specific
4216        // exception definitions such as:
4217        // zircon/kernel/arch/x86/include/arch/x86.h
4218        // zircon/kernel/arch/arm64/include/arch/arm64.h
4219        let (signo, si_code) = match error_code {
4220            zx::Status::OUT_OF_RANGE => (SIGBUS, linux_uapi::BUS_ADRERR as i32),
4221            _ => {
4222                let code = if self.state.read().mappings.get(addr).is_some() {
4223                    linux_uapi::SEGV_ACCERR
4224                } else {
4225                    linux_uapi::SEGV_MAPERR
4226                };
4227                (SIGSEGV, code as i32)
4228            }
4229        };
4230        ExceptionResult::Signal(SignalInfo::with_detail(
4231            signo,
4232            si_code,
4233            SignalDetail::SigFault { addr: decoded.faulting_address },
4234        ))
4235    }
4236
4237    pub fn set_mapping_name(
4238        &self,
4239        addr: UserAddress,
4240        length: usize,
4241        name: Option<FsString>,
4242    ) -> Result<(), Errno> {
4243        let mut state = self.state.write();
4244        let mut released_mappings = ReleasedMappings::default();
4245        let result = state.set_mapping_name(addr, length, name, &mut released_mappings);
4246        released_mappings.finalize(state);
4247        result
4248    }
4249
4250    /// Returns [`Ok`] if the entire range specified by `addr..(addr+length)` contains valid
4251    /// mappings.
4252    ///
4253    /// # Errors
4254    ///
4255    /// Returns [`Err(errno)`] where `errno` is:
4256    ///
4257    ///   - `EINVAL`: `addr` is not page-aligned, or the range is too large,
4258    ///   - `ENOMEM`: one or more pages in the range are not mapped.
4259    pub fn ensure_mapped(&self, addr: UserAddress, length: usize) -> Result<(), Errno> {
4260        if !addr.is_aligned(*PAGE_SIZE) {
4261            return error!(EINVAL);
4262        }
4263
4264        let length = round_up_to_system_page_size(length)?;
4265        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
4266        let state = self.state.read();
4267        let mut last_end = addr;
4268        for (range, _) in state.mappings.range(addr..end_addr) {
4269            if range.start > last_end {
4270                // This mapping does not start immediately after the last.
4271                return error!(ENOMEM);
4272            }
4273            last_end = range.end;
4274        }
4275        if last_end < end_addr {
4276            // There is a gap of no mappings at the end of the range.
4277            error!(ENOMEM)
4278        } else {
4279            Ok(())
4280        }
4281    }
4282
4283    /// Returns the memory object mapped at the address and the offset into the memory object of
4284    /// the address. Intended for implementing futexes.
4285    pub fn get_mapping_memory(
4286        &self,
4287        addr: UserAddress,
4288        perms: ProtectionFlags,
4289    ) -> Result<(Arc<MemoryObject>, u64), Errno> {
4290        let state = self.state.read();
4291        let (_, mapping) = state.mappings.get(addr).ok_or_else(|| errno!(EFAULT))?;
4292        if !mapping.flags().access_flags().contains(perms) {
4293            return error!(EACCES);
4294        }
4295        match state.get_mapping_backing(mapping) {
4296            MappingBacking::Memory(backing) => {
4297                Ok((Arc::clone(backing.memory()), mapping.address_to_offset(addr)))
4298            }
4299            MappingBacking::PrivateAnonymous => {
4300                Ok((Arc::clone(&self.mapping_context.private_anonymous.backing), addr.ptr() as u64))
4301            }
4302        }
4303    }
4304
4305    /// Does a rough check that the given address is plausibly in the address space of the
4306    /// application. This does not mean the pointer is valid for any particular purpose or that
4307    /// it will remain so!
4308    ///
4309    /// In some syscalls, Linux seems to do some initial validation of the pointer up front to
4310    /// tell the caller early if it's invalid. For example, in epoll_wait() it's returning a vector
4311    /// of events. If the caller passes an invalid pointer, it wants to fail without dropping any
4312    /// events. Failing later when actually copying the required events to userspace would mean
4313    /// those events will be lost. But holding a lock on the memory manager for an asynchronous
4314    /// wait is not desirable.
4315    ///
4316    /// Testing shows that Linux seems to do some initial plausibility checking of the pointer to
4317    /// be able to report common usage errors before doing any (possibly unreversable) work. This
4318    /// checking is easy to get around if you try, so this function is also not required to
4319    /// be particularly robust. Certainly the more advanced cases of races (the memory could be
4320    /// unmapped after this call but before it's used) are not handled.
4321    ///
4322    /// The buffer_size variable is the size of the data structure that needs to fit
4323    /// in the given memory.
4324    ///
4325    /// Returns the error EFAULT if invalid.
4326    pub fn check_plausible(&self, addr: UserAddress, buffer_size: usize) -> Result<(), Errno> {
4327        let state = self.state.read();
4328
4329        if let Some(range) = state.mappings.last_range() {
4330            if (range.end - buffer_size)? >= addr {
4331                return Ok(());
4332            }
4333        }
4334        error!(EFAULT)
4335    }
4336
4337    pub fn get_aio_context(&self, addr: UserAddress) -> Option<Arc<AioContext>> {
4338        let state = self.state.read();
4339        state.get_aio_context(addr).map(|(_, aio_context)| aio_context)
4340    }
4341
4342    pub fn destroy_aio_context(
4343        self: &Arc<Self>,
4344        addr: UserAddress,
4345    ) -> Result<Arc<AioContext>, Errno> {
4346        let mut released_mappings = ReleasedMappings::default();
4347
4348        // Hold the lock throughout the operation to uphold memory manager's invariants.
4349        // See mm/README.md.
4350        let mut state = self.state.write();
4351
4352        // Validate that this address actually has an AioContext. We need to hold the state lock
4353        // until we actually remove the mappings to ensure that another thread does not manipulate
4354        // the mappings after we've validated that they contain an AioContext.
4355        let Some((range, aio_context)) = state.get_aio_context(addr) else {
4356            return error!(EINVAL);
4357        };
4358
4359        let length = range.end - range.start;
4360        let result = state.unmap(self, range.start, length, &mut released_mappings);
4361
4362        released_mappings.finalize(state);
4363
4364        result.map(|_| aio_context)
4365    }
4366
4367    #[cfg(test)]
4368    pub fn get_mapping_name(
4369        &self,
4370        addr: UserAddress,
4371    ) -> Result<Option<flyweights::FlyByteStr>, Errno> {
4372        let state = self.state.read();
4373        let (_, mapping) = state.mappings.get(addr).ok_or_else(|| errno!(EFAULT))?;
4374        if let MappingNameRef::Vma(name) = mapping.name() {
4375            Ok(Some(name.clone()))
4376        } else {
4377            Ok(None)
4378        }
4379    }
4380
4381    #[cfg(test)]
4382    pub fn get_mapping_count(&self) -> usize {
4383        let state = self.state.read();
4384        state.mappings.iter().count()
4385    }
4386
4387    pub fn extend_growsdown_mapping_to_address(
4388        self: &Arc<Self>,
4389        addr: UserAddress,
4390        is_write: bool,
4391    ) -> Result<bool, Error> {
4392        self.state.write().extend_growsdown_mapping_to_address(self, addr, is_write)
4393    }
4394
4395    pub fn get_total_usage(&self) -> usize {
4396        self.state.read().mappings.total_usage
4397    }
4398
4399    pub fn get_stats(&self, current_task: &CurrentTask) -> MemoryStats {
4400        // Grab our state lock before reading zircon mappings so that the two are consistent.
4401        // Other Starnix threads should not make any changes to the Zircon mappings while we hold
4402        // a read lock to the memory manager state.
4403        let state = self.state.read();
4404
4405        let mut stats = MemoryStats::default();
4406        stats.vm_stack = state.stack_size;
4407
4408        self.with_zx_mappings(current_task, |zx_mappings| {
4409            for zx_mapping in zx_mappings {
4410                // We only care about map info for actual mappings.
4411                let zx_details = zx_mapping.details();
4412                let Some(zx_details) = zx_details.as_mapping() else { continue };
4413                let user_address = UserAddress::from(zx_mapping.base as u64);
4414                let (_, mm_mapping) = state
4415                    .mappings
4416                    .get(user_address)
4417                    .unwrap_or_else(|| panic!("mapping bookkeeping must be consistent with zircon's: not found: {user_address:?}"));
4418                debug_assert_eq!(
4419                    match state.get_mapping_backing(mm_mapping) {
4420                        MappingBacking::Memory(m)=>m.memory().get_koid(),
4421                        MappingBacking::PrivateAnonymous=>self.mapping_context.private_anonymous.backing.get_koid(),
4422                    },
4423                    zx_details.vmo_koid,
4424                    "MemoryManager and Zircon must agree on which VMO is mapped in this range",
4425                );
4426
4427                stats.vm_size += zx_mapping.size;
4428
4429                stats.vm_rss += zx_details.committed_bytes;
4430                stats.vm_swap += zx_details.populated_bytes - zx_details.committed_bytes;
4431
4432                if mm_mapping.flags().contains(MappingFlags::SHARED) {
4433                    stats.rss_shared += zx_details.committed_bytes;
4434                } else if mm_mapping.flags().contains(MappingFlags::ANONYMOUS) {
4435                    stats.rss_anonymous += zx_details.committed_bytes;
4436                } else if mm_mapping.name().is_file() {
4437                    stats.rss_file += zx_details.committed_bytes;
4438                }
4439
4440                if mm_mapping.flags().contains(MappingFlags::LOCKED) {
4441                    stats.vm_lck += zx_details.committed_bytes;
4442                }
4443
4444                if mm_mapping.flags().contains(MappingFlags::ELF_BINARY)
4445                    && mm_mapping.flags().contains(MappingFlags::WRITE)
4446                {
4447                    stats.vm_data += zx_mapping.size;
4448                }
4449
4450                if mm_mapping.flags().contains(MappingFlags::ELF_BINARY)
4451                    && mm_mapping.flags().contains(MappingFlags::EXEC)
4452                {
4453                    stats.vm_exe += zx_mapping.size;
4454                }
4455            }
4456        });
4457
4458        // TODO(https://fxbug.dev/396221597): Placeholder for now. We need kernel support to track
4459        // the committed bytes high water mark.
4460        stats.vm_rss_hwm = STUB_VM_RSS_HWM;
4461        stats
4462    }
4463
4464    pub fn atomic_load_u32_acquire(&self, futex_addr: FutexAddress) -> Result<u32, Errno> {
4465        if let Some(usercopy) = usercopy() {
4466            self.ensure_range_mapped_in_user_vmar(futex_addr.into(), None)?;
4467            usercopy.atomic_load_u32_acquire(futex_addr.ptr()).map_err(|_| errno!(EFAULT))
4468        } else {
4469            unreachable!("can only control memory ordering of atomics with usercopy");
4470        }
4471    }
4472
4473    pub fn atomic_load_u32_relaxed(&self, futex_addr: FutexAddress) -> Result<u32, Errno> {
4474        if let Some(usercopy) = usercopy() {
4475            self.ensure_range_mapped_in_user_vmar(futex_addr.into(), None)?;
4476            usercopy.atomic_load_u32_relaxed(futex_addr.ptr()).map_err(|_| errno!(EFAULT))
4477        } else {
4478            // SAFETY: `self.state.read().read_memory` only returns `Ok` if all
4479            // bytes were read to.
4480            let buf = unsafe {
4481                read_to_array(|buf| {
4482                    self.state
4483                        .read()
4484                        .read_memory(futex_addr.into(), buf, &self.mapping_context)
4485                        .map(|bytes_read| {
4486                            debug_assert_eq!(bytes_read.len(), std::mem::size_of::<u32>())
4487                        })
4488                })
4489            }?;
4490            Ok(u32::from_ne_bytes(buf))
4491        }
4492    }
4493
4494    pub fn atomic_store_u32_relaxed(
4495        &self,
4496        futex_addr: FutexAddress,
4497        value: u32,
4498    ) -> Result<(), Errno> {
4499        if let Some(usercopy) = usercopy() {
4500            self.ensure_range_mapped_in_user_vmar(futex_addr.into(), None)?;
4501            usercopy.atomic_store_u32_relaxed(futex_addr.ptr(), value).map_err(|_| errno!(EFAULT))
4502        } else {
4503            self.state.read().write_memory(
4504                futex_addr.into(),
4505                value.as_bytes(),
4506                &self.mapping_context,
4507            )?;
4508            Ok(())
4509        }
4510    }
4511
4512    pub fn atomic_compare_exchange_u32_acq_rel(
4513        &self,
4514        futex_addr: FutexAddress,
4515        current: u32,
4516        new: u32,
4517    ) -> CompareExchangeResult<u32> {
4518        if let Err(e) = self.ensure_range_mapped_in_user_vmar(futex_addr.into(), None) {
4519            return CompareExchangeResult::Error(e);
4520        }
4521        let Some(usercopy) = usercopy() else {
4522            unreachable!("Atomic compare/exchange requires usercopy.");
4523        };
4524        CompareExchangeResult::from_usercopy(usercopy.atomic_compare_exchange_u32_acq_rel(
4525            futex_addr.ptr(),
4526            current,
4527            new,
4528        ))
4529    }
4530
4531    pub fn atomic_compare_exchange_weak_u32_acq_rel(
4532        &self,
4533        futex_addr: FutexAddress,
4534        current: u32,
4535        new: u32,
4536    ) -> CompareExchangeResult<u32> {
4537        if let Err(e) = self.ensure_range_mapped_in_user_vmar(futex_addr.into(), None) {
4538            return CompareExchangeResult::Error(e);
4539        }
4540        let Some(usercopy) = usercopy() else {
4541            unreachable!("Atomic compare/exchange requires usercopy.");
4542        };
4543        CompareExchangeResult::from_usercopy(usercopy.atomic_compare_exchange_weak_u32_acq_rel(
4544            futex_addr.ptr(),
4545            current,
4546            new,
4547        ))
4548    }
4549}
4550
4551/// The result of an atomic compare/exchange operation on user memory.
4552#[derive(Debug, Clone)]
4553pub enum CompareExchangeResult<T> {
4554    /// The current value provided matched the one observed in memory and the new value provided
4555    /// was written.
4556    Success,
4557    /// The provided current value did not match the current value in memory.
4558    Stale { observed: T },
4559    /// There was a general error while accessing the requested memory.
4560    Error(Errno),
4561}
4562
4563impl<T> CompareExchangeResult<T> {
4564    fn from_usercopy(usercopy_res: Result<Result<T, T>, ()>) -> Self {
4565        match usercopy_res {
4566            Ok(Ok(_)) => Self::Success,
4567            Ok(Err(observed)) => Self::Stale { observed },
4568            Err(()) => Self::Error(errno!(EFAULT)),
4569        }
4570    }
4571}
4572
4573impl<T> From<Errno> for CompareExchangeResult<T> {
4574    fn from(e: Errno) -> Self {
4575        Self::Error(e)
4576    }
4577}
4578
4579/// The user-space address at which a mapping should be placed. Used by [`MemoryManager::map`].
4580#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4581pub enum DesiredAddress {
4582    /// Map at any address chosen by the kernel.
4583    Any,
4584    /// The address is a hint. If the address overlaps an existing mapping a different address may
4585    /// be chosen.
4586    Hint(UserAddress),
4587    /// The address is a requirement. If the address overlaps an existing mapping (and cannot
4588    /// overwrite it), mapping fails.
4589    Fixed(UserAddress),
4590    /// The address is a requirement. If the address overlaps an existing mapping (and cannot
4591    /// overwrite it), they should be unmapped.
4592    FixedOverwrite(UserAddress),
4593}
4594
4595/// The user-space address at which a mapping should be placed. Used by [`map_in_vmar`].
4596#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4597enum SelectedAddress {
4598    /// See DesiredAddress::Fixed.
4599    Fixed(UserAddress),
4600    /// See DesiredAddress::FixedOverwrite.
4601    FixedOverwrite(UserAddress),
4602}
4603
4604impl SelectedAddress {
4605    fn addr(&self) -> UserAddress {
4606        match self {
4607            SelectedAddress::Fixed(addr) => *addr,
4608            SelectedAddress::FixedOverwrite(addr) => *addr,
4609        }
4610    }
4611}
4612
4613/// Write one line of the memory map intended for adding to `/proc/self/maps`.
4614fn write_map(
4615    task: &Task,
4616    sink: &mut DynamicFileBuf,
4617    state: &MemoryManagerState,
4618    range: &Range<UserAddress>,
4619    map: &Mapping,
4620) -> Result<(), Errno> {
4621    let line_length = write!(
4622        sink,
4623        "{:08x}-{:08x} {}{}{}{} {:08x} 00:00 {} ",
4624        range.start.ptr(),
4625        range.end.ptr(),
4626        if map.can_read() { 'r' } else { '-' },
4627        if map.can_write() { 'w' } else { '-' },
4628        if map.can_exec() { 'x' } else { '-' },
4629        if map.flags().contains(MappingFlags::SHARED) { 's' } else { 'p' },
4630        match state.get_mapping_backing(map) {
4631            MappingBacking::Memory(backing) => backing.address_to_offset(range.start),
4632            MappingBacking::PrivateAnonymous => 0,
4633        },
4634        if let MappingNameRef::File(file) = &map.name() { file.name.entry.node.ino } else { 0 }
4635    )?;
4636    let fill_to_name = |sink: &mut DynamicFileBuf| {
4637        // The filename goes at >= the 74th column (73rd when zero indexed)
4638        for _ in line_length..73 {
4639            sink.write(b" ");
4640        }
4641    };
4642    match &map.name() {
4643        MappingNameRef::None | MappingNameRef::AioContext(_) => {
4644            if map.flags().contains(MappingFlags::SHARED)
4645                && map.flags().contains(MappingFlags::ANONYMOUS)
4646            {
4647                // See proc(5), "/proc/[pid]/map_files/"
4648                fill_to_name(sink);
4649                sink.write(b"/dev/zero (deleted)");
4650            }
4651        }
4652        MappingNameRef::Stack => {
4653            fill_to_name(sink);
4654            sink.write(b"[stack]");
4655        }
4656        MappingNameRef::Heap => {
4657            fill_to_name(sink);
4658            sink.write(b"[heap]");
4659        }
4660        MappingNameRef::Vdso => {
4661            fill_to_name(sink);
4662            sink.write(b"[vdso]");
4663        }
4664        MappingNameRef::Vvar => {
4665            fill_to_name(sink);
4666            sink.write(b"[vvar]");
4667        }
4668        MappingNameRef::File(file) => {
4669            fill_to_name(sink);
4670            // File names can have newlines that need to be escaped before printing.
4671            // According to https://man7.org/linux/man-pages/man5/proc.5.html the only
4672            // escaping applied to paths is replacing newlines with an octal sequence.
4673            let path = file.name.path(&task.live()?.fs());
4674            sink.write_iter(
4675                path.iter()
4676                    .flat_map(|b| if *b == b'\n' { b"\\012" } else { std::slice::from_ref(b) })
4677                    .copied(),
4678            );
4679        }
4680        MappingNameRef::Vma(name) => {
4681            fill_to_name(sink);
4682            sink.write(b"[anon:");
4683            sink.write(name.as_bytes());
4684            sink.write(b"]");
4685        }
4686        MappingNameRef::Ashmem(name) => {
4687            fill_to_name(sink);
4688            sink.write(b"/dev/ashmem/");
4689            sink.write(name.as_bytes());
4690        }
4691    }
4692    sink.write(b"\n");
4693    Ok(())
4694}
4695
4696#[derive(Default)]
4697pub struct MemoryStats {
4698    pub vm_size: usize,
4699    pub vm_rss: usize,
4700    pub vm_rss_hwm: usize,
4701    pub rss_anonymous: usize,
4702    pub rss_file: usize,
4703    pub rss_shared: usize,
4704    pub vm_data: usize,
4705    pub vm_stack: usize,
4706    pub vm_exe: usize,
4707    pub vm_swap: usize,
4708    pub vm_lck: usize,
4709}
4710
4711/// Implements `/proc/self/maps`.
4712#[derive(Clone)]
4713pub struct ProcMapsFile {
4714    mm: Weak<MemoryManager>,
4715    task: Weak<Task>,
4716}
4717impl ProcMapsFile {
4718    pub fn new(task: Arc<Task>) -> DynamicFile<Self> {
4719        // "maps" is empty for kthreads, rather than inaccessible.
4720        let mm = task.mm().map_or_else(|_| Weak::default(), |mm| Arc::downgrade(&mm));
4721        DynamicFile::new(Self { mm, task: Arc::downgrade(&task) })
4722    }
4723}
4724
4725impl SequenceFileSource for ProcMapsFile {
4726    type Cursor = UserAddress;
4727
4728    fn next(
4729        &self,
4730        _current_task: &CurrentTask,
4731        cursor: UserAddress,
4732        sink: &mut DynamicFileBuf,
4733    ) -> Result<Option<UserAddress>, Errno> {
4734        let task = Task::from_weak(&self.task)?;
4735        // /proc/<pid>/maps is empty for kthreads and tasks whose memory manager has changed.
4736        let Some(mm) = self.mm.upgrade() else {
4737            return Ok(None);
4738        };
4739        let state = mm.state.read();
4740        if let Some((range, map)) = state.mappings.find_at_or_after(cursor) {
4741            write_map(&task, sink, &state, range, map)?;
4742            return Ok(Some(range.end));
4743        }
4744        Ok(None)
4745    }
4746}
4747
4748#[derive(Clone)]
4749pub struct ProcSmapsFile {
4750    mm: Weak<MemoryManager>,
4751    task: Weak<Task>,
4752}
4753impl ProcSmapsFile {
4754    pub fn new(task: Arc<Task>) -> DynamicFile<Self> {
4755        // "smaps" is empty for kthreads, rather than inaccessible.
4756        let mm = task.mm().map_or_else(|_| Weak::default(), |mm| Arc::downgrade(&mm));
4757        DynamicFile::new(Self { mm, task: Arc::downgrade(&task) })
4758    }
4759}
4760
4761impl DynamicFileSource for ProcSmapsFile {
4762    fn generate(&self, current_task: &CurrentTask, sink: &mut DynamicFileBuf) -> Result<(), Errno> {
4763        let page_size_kb = *PAGE_SIZE / 1024;
4764        let task = Task::from_weak(&self.task)?;
4765        // /proc/<pid>/smaps is empty for kthreads and tasks whose memory manager has changed.
4766        let Some(mm) = self.mm.upgrade() else {
4767            return Ok(());
4768        };
4769
4770        // Ensure all mappings are mapped into the user vmar.
4771        let max_addr = mm.maximum_valid_user_address;
4772        mm.ensure_range_mapped_in_user_vmar(UserAddress::from(0), Some(max_addr.ptr()))?;
4773
4774        let state = mm.state.read();
4775        let committed_bytes_vec = mm.with_zx_mappings(current_task, |zx_mappings| {
4776            let mut zx_memory_info = RangeMap::<UserAddress, usize>::default();
4777            for idx in 0..zx_mappings.len() {
4778                let zx_mapping = zx_mappings[idx];
4779                // RangeMap uses #[must_use] for its default usecase but this drop is trivial.
4780                let _ = zx_memory_info.insert(
4781                    UserAddress::from_ptr(zx_mapping.base)
4782                        ..UserAddress::from_ptr(zx_mapping.base + zx_mapping.size),
4783                    idx,
4784                );
4785            }
4786
4787            let mut committed_bytes_vec = Vec::new();
4788            for (mm_range, mm_mapping) in state.mappings.iter() {
4789                let mut committed_bytes = 0;
4790
4791                for (zx_range, zx_mapping_idx) in zx_memory_info.range(mm_range.clone()) {
4792                    let intersect_range = zx_range.intersect(mm_range);
4793                    let zx_mapping = zx_mappings[*zx_mapping_idx];
4794                    let zx_details = zx_mapping.details();
4795                    let Some(zx_details) = zx_details.as_mapping() else { continue };
4796                    let zx_committed_bytes = zx_details.committed_bytes;
4797
4798                    // TODO(https://fxbug.dev/419882465): It can happen that the same Zircon mapping
4799                    // is covered by more than one Starnix mapping. In this case we don't have
4800                    // enough granularity to answer the question of how many committed bytes belong
4801                    // to one mapping or another. Make a best-effort approximation by dividing the
4802                    // committed bytes of a Zircon mapping proportionally.
4803                    committed_bytes += if intersect_range != *zx_range {
4804                        let intersection_size =
4805                            intersect_range.end.ptr() - intersect_range.start.ptr();
4806                        let part = intersection_size as f32 / zx_mapping.size as f32;
4807                        let prorated_committed_bytes: f32 = part * zx_committed_bytes as f32;
4808                        prorated_committed_bytes as u64
4809                    } else {
4810                        zx_committed_bytes as u64
4811                    };
4812                    assert_eq!(
4813                        match state.get_mapping_backing(mm_mapping) {
4814                            MappingBacking::Memory(m) => m.memory().get_koid(),
4815                            MappingBacking::PrivateAnonymous =>
4816                                mm.mapping_context.private_anonymous.backing.get_koid(),
4817                        },
4818                        zx_details.vmo_koid,
4819                        "MemoryManager and Zircon must agree on which VMO is mapped in this range",
4820                    );
4821                }
4822                committed_bytes_vec.push(committed_bytes);
4823            }
4824            Ok(committed_bytes_vec)
4825        })?;
4826
4827        for ((mm_range, mm_mapping), committed_bytes) in
4828            state.mappings.iter().zip(committed_bytes_vec.into_iter())
4829        {
4830            write_map(&task, sink, &state, mm_range, mm_mapping)?;
4831
4832            let size_kb = (mm_range.end.ptr() - mm_range.start.ptr()) / 1024;
4833            writeln!(sink, "Size:           {size_kb:>8} kB",)?;
4834            let share_count = match state.get_mapping_backing(mm_mapping) {
4835                MappingBacking::Memory(backing) => {
4836                    let memory = backing.memory();
4837                    if memory.is_clock() {
4838                        // Clock memory mappings are not shared in a meaningful way.
4839                        1
4840                    } else {
4841                        let memory_info = backing.memory().info()?;
4842                        memory_info.share_count as u64
4843                    }
4844                }
4845                MappingBacking::PrivateAnonymous => {
4846                    1 // Private mapping
4847                }
4848            };
4849
4850            let rss_kb = committed_bytes / 1024;
4851            writeln!(sink, "Rss:            {rss_kb:>8} kB")?;
4852
4853            let pss_kb = if mm_mapping.flags().contains(MappingFlags::SHARED) {
4854                rss_kb / share_count
4855            } else {
4856                rss_kb
4857            };
4858            writeln!(sink, "Pss:            {pss_kb:>8} kB")?;
4859
4860            track_stub!(TODO("https://fxbug.dev/322874967"), "smaps dirty pages");
4861            let (shared_dirty_kb, private_dirty_kb) = (0, 0);
4862
4863            let is_shared = share_count > 1;
4864            let shared_clean_kb = if is_shared { rss_kb } else { 0 };
4865            writeln!(sink, "Shared_Clean:   {shared_clean_kb:>8} kB")?;
4866            writeln!(sink, "Shared_Dirty:   {shared_dirty_kb:>8} kB")?;
4867
4868            let private_clean_kb = if is_shared { 0 } else { rss_kb };
4869            writeln!(sink, "Private_Clean:  {private_clean_kb:>8} kB")?;
4870            writeln!(sink, "Private_Dirty:  {private_dirty_kb:>8} kB")?;
4871
4872            let anonymous_kb = if mm_mapping.private_anonymous() { rss_kb } else { 0 };
4873            writeln!(sink, "Anonymous:      {anonymous_kb:>8} kB")?;
4874            writeln!(sink, "KernelPageSize: {page_size_kb:>8} kB")?;
4875            writeln!(sink, "MMUPageSize:    {page_size_kb:>8} kB")?;
4876
4877            let locked_kb =
4878                if mm_mapping.flags().contains(MappingFlags::LOCKED) { rss_kb } else { 0 };
4879            writeln!(sink, "Locked:         {locked_kb:>8} kB")?;
4880            writeln!(sink, "VmFlags: {}", mm_mapping.vm_flags())?;
4881
4882            track_stub!(TODO("https://fxbug.dev/297444691"), "optional smaps fields");
4883        }
4884
4885        Ok(())
4886    }
4887}
4888
4889/// Creates a memory object that can be used in an anonymous mapping for the `mmap` syscall.
4890pub fn create_anonymous_mapping_memory(size: u64) -> Result<Arc<MemoryObject>, Errno> {
4891    // mremap can grow memory regions, so make sure the memory object is resizable.
4892    let mut memory = MemoryObject::from(
4893        zx::Vmo::create_with_opts(zx::VmoOptions::RESIZABLE, size).map_err(|s| match s {
4894            zx::Status::NO_MEMORY => errno!(ENOMEM),
4895            zx::Status::OUT_OF_RANGE => errno!(ENOMEM),
4896            _ => impossible_error(s),
4897        })?,
4898    )
4899    .with_zx_name(b"starnix:memory_manager");
4900
4901    memory.set_zx_name(b"starnix-anon");
4902
4903    // TODO(https://fxbug.dev/42056890): Audit replace_as_executable usage
4904    memory = memory.replace_as_executable(&VMEX_RESOURCE).map_err(impossible_error)?;
4905    Ok(Arc::new(memory))
4906}
4907
4908fn generate_random_offset_for_aslr(arch_width: ArchWidth) -> usize {
4909    // Generate a number with ASLR_RANDOM_BITS.
4910    let randomness = {
4911        let random_bits =
4912            if arch_width.is_arch32() { ASLR_32_RANDOM_BITS } else { ASLR_RANDOM_BITS };
4913        let mask = (1 << random_bits) - 1;
4914        let mut bytes = [0; std::mem::size_of::<usize>()];
4915        starnix_crypto::cprng_draw(&mut bytes);
4916        usize::from_le_bytes(bytes) & mask
4917    };
4918
4919    // Transform it into a page-aligned offset.
4920    randomness * (*PAGE_SIZE as usize)
4921}
4922
4923#[cfg(test)]
4924mod tests {
4925    use super::*;
4926    use crate::mm::memory_accessor::MemoryAccessorExt;
4927    use crate::mm::syscalls::do_mmap;
4928    use crate::task::syscalls::sys_prctl;
4929    use crate::testing::*;
4930    use crate::vfs::FdNumber;
4931    use assert_matches::assert_matches;
4932    use itertools::assert_equal;
4933    use starnix_sync::{FileOpsCore, LockEqualOrBefore};
4934    use starnix_uapi::user_address::{UserCString, UserRef};
4935    use starnix_uapi::{
4936        MAP_ANONYMOUS, MAP_FIXED, MAP_GROWSDOWN, MAP_PRIVATE, MAP_SHARED, PR_SET_VMA,
4937        PR_SET_VMA_ANON_NAME, PROT_NONE, PROT_READ,
4938    };
4939    use std::ffi::CString;
4940    use zerocopy::{FromBytes, Immutable, KnownLayout};
4941
4942    #[::fuchsia::test]
4943    fn test_mapping_flags() {
4944        let options = MappingOptions::ANONYMOUS;
4945        let access_flags = ProtectionFlags::READ | ProtectionFlags::WRITE;
4946        let mapping_flags = MappingFlags::from_access_flags_and_options(access_flags, options);
4947        assert_eq!(mapping_flags.access_flags(), access_flags);
4948        assert_eq!(mapping_flags.options(), options);
4949
4950        let new_access_flags = ProtectionFlags::READ | ProtectionFlags::EXEC;
4951        let adusted_mapping_flags = mapping_flags.with_access_flags(new_access_flags);
4952        assert_eq!(adusted_mapping_flags.access_flags(), new_access_flags);
4953        assert_eq!(adusted_mapping_flags.options(), options);
4954    }
4955
4956    #[::fuchsia::test]
4957    async fn test_brk() {
4958        spawn_kernel_and_run(async |locked, current_task| {
4959            let mm = current_task.mm().unwrap();
4960
4961            // Look up the given addr in the mappings table.
4962            let get_range = |addr: UserAddress| {
4963                let state = mm.state.read();
4964                state
4965                    .mappings
4966                    .map
4967                    .get(addr)
4968                    .map(|(range, mapping)| (range.clone(), mapping.clone()))
4969            };
4970
4971            // Initialize the program break.
4972            let base_addr = mm
4973                .set_brk(locked, &current_task, UserAddress::default())
4974                .expect("failed to set initial program break");
4975            assert!(base_addr > UserAddress::default());
4976
4977            // Page containing the program break address should not be mapped.
4978            assert_eq!(get_range(base_addr), None);
4979
4980            // Growing it by a single byte results in that page becoming mapped.
4981            let addr0 = mm
4982                .set_brk(locked, &current_task, (base_addr + 1u64).unwrap())
4983                .expect("failed to grow brk");
4984            assert!(addr0 > base_addr);
4985            let (range0, _) = get_range(base_addr).expect("base_addr should be mapped");
4986            assert_eq!(range0.start, base_addr);
4987            assert_eq!(range0.end, (base_addr + *PAGE_SIZE).unwrap());
4988
4989            // Grow the program break by another byte, which won't be enough to cause additional pages to be mapped.
4990            let addr1 = mm
4991                .set_brk(locked, &current_task, (base_addr + 2u64).unwrap())
4992                .expect("failed to grow brk");
4993            assert_eq!(addr1, (base_addr + 2u64).unwrap());
4994            let (range1, _) = get_range(base_addr).expect("base_addr should be mapped");
4995            assert_eq!(range1.start, range0.start);
4996            assert_eq!(range1.end, range0.end);
4997
4998            // Grow the program break by a non-trival amount and observe the larger mapping.
4999            let addr2 = mm
5000                .set_brk(locked, &current_task, (base_addr + 24893u64).unwrap())
5001                .expect("failed to grow brk");
5002            assert_eq!(addr2, (base_addr + 24893u64).unwrap());
5003            let (range2, _) = get_range(base_addr).expect("base_addr should be mapped");
5004            assert_eq!(range2.start, base_addr);
5005            assert_eq!(range2.end, addr2.round_up(*PAGE_SIZE).unwrap());
5006
5007            // Shrink the program break and observe the smaller mapping.
5008            let addr3 = mm
5009                .set_brk(locked, &current_task, (base_addr + 14832u64).unwrap())
5010                .expect("failed to shrink brk");
5011            assert_eq!(addr3, (base_addr + 14832u64).unwrap());
5012            let (range3, _) = get_range(base_addr).expect("base_addr should be mapped");
5013            assert_eq!(range3.start, base_addr);
5014            assert_eq!(range3.end, addr3.round_up(*PAGE_SIZE).unwrap());
5015
5016            // Shrink the program break close to zero and observe the smaller mapping.
5017            let addr4 = mm
5018                .set_brk(locked, &current_task, (base_addr + 3u64).unwrap())
5019                .expect("failed to drastically shrink brk");
5020            assert_eq!(addr4, (base_addr + 3u64).unwrap());
5021            let (range4, _) = get_range(base_addr).expect("base_addr should be mapped");
5022            assert_eq!(range4.start, base_addr);
5023            assert_eq!(range4.end, addr4.round_up(*PAGE_SIZE).unwrap());
5024
5025            // Shrink the program break to zero and observe that the mapping is entirely gone.
5026            let addr5 = mm
5027                .set_brk(locked, &current_task, base_addr)
5028                .expect("failed to drastically shrink brk to zero");
5029            assert_eq!(addr5, base_addr);
5030            assert_eq!(get_range(base_addr), None);
5031        })
5032        .await;
5033    }
5034
5035    #[::fuchsia::test]
5036    async fn test_mm_exec() {
5037        spawn_kernel_and_run(async |locked, current_task| {
5038            let mm = current_task.mm().unwrap();
5039
5040            let has = |addr: UserAddress| -> bool {
5041                let state = mm.state.read();
5042                state.mappings.get(addr).is_some()
5043            };
5044
5045            let brk_addr = mm
5046                .set_brk(locked, &current_task, UserAddress::default())
5047                .expect("failed to set initial program break");
5048            assert!(brk_addr > UserAddress::default());
5049
5050            // Allocate a single page of BRK space, so that the break base address is mapped.
5051            let _ = mm
5052                .set_brk(locked, &current_task, (brk_addr + 1u64).unwrap())
5053                .expect("failed to grow program break");
5054            assert!(has(brk_addr));
5055
5056            let mapped_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5057            assert!(mapped_addr > UserAddress::default());
5058            assert!(has(mapped_addr));
5059
5060            let node = current_task.lookup_path_from_root(locked, "/".into()).unwrap();
5061            let new_mm = MemoryManager::exec(
5062                current_task.thread_group().root_vmar.unowned(),
5063                current_task.live().mm.to_option_arc(),
5064                node,
5065                ArchWidth::Arch64,
5066            )
5067            .expect("failed to exec memory manager");
5068            current_task.live().mm.update(Some(new_mm));
5069
5070            assert!(!has(brk_addr));
5071            assert!(!has(mapped_addr));
5072
5073            // Check that the old addresses are actually available for mapping.
5074            let brk_addr2 = map_memory(locked, &current_task, brk_addr, *PAGE_SIZE);
5075            assert_eq!(brk_addr, brk_addr2);
5076            let mapped_addr2 = map_memory(locked, &current_task, mapped_addr, *PAGE_SIZE);
5077            assert_eq!(mapped_addr, mapped_addr2);
5078        })
5079        .await;
5080    }
5081
5082    #[::fuchsia::test]
5083    async fn test_get_contiguous_mappings_at() {
5084        spawn_kernel_and_run(async |locked, current_task| {
5085            let mm = current_task.mm().unwrap();
5086            let context = &mm.mapping_context;
5087
5088            // Create four one-page mappings with a hole between the third one and the fourth one.
5089            let page_size = *PAGE_SIZE as usize;
5090            let addr_a = (mm.base_addr + 10 * page_size).unwrap();
5091            let addr_b = (mm.base_addr + 11 * page_size).unwrap();
5092            let addr_c = (mm.base_addr + 12 * page_size).unwrap();
5093            let addr_d = (mm.base_addr + 14 * page_size).unwrap();
5094            assert_eq!(map_memory(locked, &current_task, addr_a, *PAGE_SIZE), addr_a);
5095            assert_eq!(map_memory(locked, &current_task, addr_b, *PAGE_SIZE), addr_b);
5096            assert_eq!(map_memory(locked, &current_task, addr_c, *PAGE_SIZE), addr_c);
5097            assert_eq!(map_memory(locked, &current_task, addr_d, *PAGE_SIZE), addr_d);
5098
5099            {
5100                let mm_state = mm.state.read();
5101                // Verify that requesting an unmapped address returns an empty iterator.
5102                assert_equal(
5103                    mm_state
5104                        .get_contiguous_mappings_at((addr_a - 100u64).unwrap(), 50, &context)
5105                        .unwrap(),
5106                    vec![],
5107                );
5108                assert_equal(
5109                    mm_state
5110                        .get_contiguous_mappings_at((addr_a - 100u64).unwrap(), 200, &context)
5111                        .unwrap(),
5112                    vec![],
5113                );
5114
5115                // Verify that requesting zero bytes returns an empty iterator.
5116                assert_equal(
5117                    mm_state.get_contiguous_mappings_at(addr_a, 0, &context).unwrap(),
5118                    vec![],
5119                );
5120
5121                // Verify errors.
5122                assert_eq!(
5123                    mm_state
5124                        .get_contiguous_mappings_at(UserAddress::from(100), usize::MAX, &context)
5125                        .err()
5126                        .unwrap(),
5127                    errno!(EFAULT)
5128                );
5129                assert_eq!(
5130                    mm_state
5131                        .get_contiguous_mappings_at(
5132                            (context.max_address() + 1u64).unwrap(),
5133                            0,
5134                            &context
5135                        )
5136                        .err()
5137                        .unwrap(),
5138                    errno!(EFAULT)
5139                );
5140            }
5141
5142            assert_eq!(mm.get_mapping_count(), 2);
5143            let mm_state = mm.state.read();
5144            let (map_a, map_b) = {
5145                let mut it = mm_state.mappings.iter();
5146                (it.next().unwrap().1, it.next().unwrap().1)
5147            };
5148
5149            assert_equal(
5150                mm_state.get_contiguous_mappings_at(addr_a, page_size, &context).unwrap(),
5151                vec![(map_a, page_size)],
5152            );
5153
5154            assert_equal(
5155                mm_state.get_contiguous_mappings_at(addr_a, page_size / 2, &context).unwrap(),
5156                vec![(map_a, page_size / 2)],
5157            );
5158
5159            assert_equal(
5160                mm_state.get_contiguous_mappings_at(addr_a, page_size * 3, &context).unwrap(),
5161                vec![(map_a, page_size * 3)],
5162            );
5163
5164            assert_equal(
5165                mm_state.get_contiguous_mappings_at(addr_b, page_size, &context).unwrap(),
5166                vec![(map_a, page_size)],
5167            );
5168
5169            assert_equal(
5170                mm_state.get_contiguous_mappings_at(addr_d, page_size, &context).unwrap(),
5171                vec![(map_b, page_size)],
5172            );
5173
5174            // Verify that results stop if there is a hole.
5175            assert_equal(
5176                mm_state
5177                    .get_contiguous_mappings_at(
5178                        (addr_a + page_size / 2).unwrap(),
5179                        page_size * 10,
5180                        &context,
5181                    )
5182                    .unwrap(),
5183                vec![(map_a, page_size * 2 + page_size / 2)],
5184            );
5185
5186            // Verify that results stop at the last mapped page.
5187            assert_equal(
5188                mm_state.get_contiguous_mappings_at(addr_d, page_size * 10, &context).unwrap(),
5189                vec![(map_b, page_size)],
5190            );
5191        })
5192        .await;
5193    }
5194
5195    #[::fuchsia::test]
5196    async fn test_read_write_crossing_mappings() {
5197        spawn_kernel_and_run(async |locked, current_task| {
5198            let mm = current_task.mm().unwrap();
5199            let ma = current_task.deref();
5200
5201            // Map two contiguous pages at fixed addresses, but backed by distinct mappings.
5202            let page_size = *PAGE_SIZE;
5203            let addr = (mm.base_addr + 10 * page_size).unwrap();
5204            assert_eq!(map_memory(locked, &current_task, addr, page_size), addr);
5205            assert_eq!(
5206                map_memory(locked, &current_task, (addr + page_size).unwrap(), page_size),
5207                (addr + page_size).unwrap()
5208            );
5209            // Mappings get merged since they are baked by the same memory object
5210            assert_eq!(mm.get_mapping_count(), 1);
5211
5212            // Write a pattern crossing our two mappings.
5213            let test_addr = (addr + page_size / 2).unwrap();
5214            let data: Vec<u8> = (0..page_size).map(|i| (i % 256) as u8).collect();
5215            ma.write_memory(test_addr, &data).expect("failed to write test data");
5216
5217            // Read it back.
5218            let data_readback =
5219                ma.read_memory_to_vec(test_addr, data.len()).expect("failed to read test data");
5220            assert_eq!(&data, &data_readback);
5221        })
5222        .await;
5223    }
5224
5225    #[::fuchsia::test]
5226    async fn test_read_write_errors() {
5227        spawn_kernel_and_run(async |locked, current_task| {
5228            let ma = current_task.deref();
5229
5230            let page_size = *PAGE_SIZE;
5231            let addr = map_memory(locked, &current_task, UserAddress::default(), page_size);
5232            let buf = vec![0u8; page_size as usize];
5233
5234            // Verify that accessing data that is only partially mapped is an error.
5235            let partial_addr_before = (addr - page_size / 2).unwrap();
5236            assert_eq!(ma.write_memory(partial_addr_before, &buf), error!(EFAULT));
5237            assert_eq!(ma.read_memory_to_vec(partial_addr_before, buf.len()), error!(EFAULT));
5238            let partial_addr_after = (addr + page_size / 2).unwrap();
5239            assert_eq!(ma.write_memory(partial_addr_after, &buf), error!(EFAULT));
5240            assert_eq!(ma.read_memory_to_vec(partial_addr_after, buf.len()), error!(EFAULT));
5241
5242            // Verify that accessing unmapped memory is an error.
5243            let unmapped_addr = (addr - 10 * page_size).unwrap();
5244            assert_eq!(ma.write_memory(unmapped_addr, &buf), error!(EFAULT));
5245            assert_eq!(ma.read_memory_to_vec(unmapped_addr, buf.len()), error!(EFAULT));
5246
5247            // However, accessing zero bytes in unmapped memory is not an error.
5248            ma.write_memory(unmapped_addr, &[]).expect("failed to write no data");
5249            ma.read_memory_to_vec(unmapped_addr, 0).expect("failed to read no data");
5250        })
5251        .await;
5252    }
5253
5254    #[::fuchsia::test]
5255    async fn test_read_c_string_to_vec_large() {
5256        spawn_kernel_and_run(async |locked, current_task| {
5257            let mm = current_task.mm().unwrap();
5258            let ma = current_task.deref();
5259
5260            let page_size = *PAGE_SIZE;
5261            let max_size = 4 * page_size as usize;
5262            let addr = (mm.base_addr + 10 * page_size).unwrap();
5263
5264            assert_eq!(map_memory(locked, &current_task, addr, max_size as u64), addr);
5265
5266            let mut random_data = vec![0; max_size];
5267            starnix_crypto::cprng_draw(&mut random_data);
5268            // Remove all NUL bytes.
5269            for i in 0..random_data.len() {
5270                if random_data[i] == 0 {
5271                    random_data[i] = 1;
5272                }
5273            }
5274            random_data[max_size - 1] = 0;
5275
5276            ma.write_memory(addr, &random_data).expect("failed to write test string");
5277            // We should read the same value minus the last byte (NUL char).
5278            assert_eq!(
5279                ma.read_c_string_to_vec(UserCString::new(current_task, addr), max_size).unwrap(),
5280                random_data[..max_size - 1]
5281            );
5282        })
5283        .await;
5284    }
5285
5286    #[::fuchsia::test]
5287    async fn test_read_c_string_to_vec() {
5288        spawn_kernel_and_run(async |locked, current_task| {
5289            let mm = current_task.mm().unwrap();
5290            let ma = current_task.deref();
5291
5292            let page_size = *PAGE_SIZE;
5293            let max_size = 2 * page_size as usize;
5294            let addr = (mm.base_addr + 10 * page_size).unwrap();
5295
5296            // Map a page at a fixed address and write an unterminated string at the end of it.
5297            assert_eq!(map_memory(locked, &current_task, addr, page_size), addr);
5298            let test_str = b"foo!";
5299            let test_addr =
5300                addr.checked_add(page_size as usize).unwrap().checked_sub(test_str.len()).unwrap();
5301            ma.write_memory(test_addr, test_str).expect("failed to write test string");
5302
5303            // Expect error if the string is not terminated.
5304            assert_eq!(
5305                ma.read_c_string_to_vec(UserCString::new(current_task, test_addr), max_size),
5306                error!(ENAMETOOLONG)
5307            );
5308
5309            // Expect success if the string is terminated.
5310            ma.write_memory((addr + (page_size - 1)).unwrap(), b"\0").expect("failed to write nul");
5311            assert_eq!(
5312                ma.read_c_string_to_vec(UserCString::new(current_task, test_addr), max_size)
5313                    .unwrap(),
5314                "foo"
5315            );
5316
5317            // Expect success if the string spans over two mappings.
5318            assert_eq!(
5319                map_memory(locked, &current_task, (addr + page_size).unwrap(), page_size),
5320                (addr + page_size).unwrap()
5321            );
5322            // TODO: Adjacent private anonymous mappings are collapsed. To test this case this test needs to
5323            // provide a backing for the second mapping.
5324            // assert_eq!(mm.get_mapping_count(), 2);
5325            ma.write_memory((addr + (page_size - 1)).unwrap(), b"bar\0")
5326                .expect("failed to write extra chars");
5327            assert_eq!(
5328                ma.read_c_string_to_vec(UserCString::new(current_task, test_addr), max_size)
5329                    .unwrap(),
5330                "foobar",
5331            );
5332
5333            // Expect error if the string exceeds max limit
5334            assert_eq!(
5335                ma.read_c_string_to_vec(UserCString::new(current_task, test_addr), 2),
5336                error!(ENAMETOOLONG)
5337            );
5338
5339            // Expect error if the address is invalid.
5340            assert_eq!(
5341                ma.read_c_string_to_vec(UserCString::null(current_task), max_size),
5342                error!(EFAULT)
5343            );
5344        })
5345        .await;
5346    }
5347
5348    #[::fuchsia::test]
5349    async fn can_read_argv_like_regions() {
5350        spawn_kernel_and_run(async |locked, current_task| {
5351            let ma = current_task.deref();
5352
5353            // Map a page.
5354            let page_size = *PAGE_SIZE;
5355            let addr = map_memory_anywhere(locked, &current_task, page_size);
5356            assert!(!addr.is_null());
5357
5358            // Write an unterminated string.
5359            let mut payload = "first".as_bytes().to_vec();
5360            let mut expected_parses = vec![];
5361            ma.write_memory(addr, &payload).unwrap();
5362
5363            // Expect success if the string is terminated.
5364            expected_parses.push(payload.clone());
5365            payload.push(0);
5366            ma.write_memory(addr, &payload).unwrap();
5367            assert_eq!(
5368                ma.read_nul_delimited_c_string_list(addr, payload.len()).unwrap(),
5369                expected_parses,
5370            );
5371
5372            // Make sure we can parse multiple strings from the same region.
5373            let second = b"second";
5374            payload.extend(second);
5375            payload.push(0);
5376            expected_parses.push(second.to_vec());
5377
5378            let third = b"third";
5379            payload.extend(third);
5380            payload.push(0);
5381            expected_parses.push(third.to_vec());
5382
5383            ma.write_memory(addr, &payload).unwrap();
5384            assert_eq!(
5385                ma.read_nul_delimited_c_string_list(addr, payload.len()).unwrap(),
5386                expected_parses,
5387            );
5388        })
5389        .await;
5390    }
5391
5392    #[::fuchsia::test]
5393    async fn truncate_argv_like_regions() {
5394        spawn_kernel_and_run(async |locked, current_task| {
5395            let ma = current_task.deref();
5396
5397            // Map a page.
5398            let page_size = *PAGE_SIZE;
5399            let addr = map_memory_anywhere(locked, &current_task, page_size);
5400            assert!(!addr.is_null());
5401
5402            let payload = b"first\0second\0third\0";
5403            ma.write_memory(addr, payload).unwrap();
5404            assert_eq!(
5405                ma.read_nul_delimited_c_string_list(addr, payload.len() - 3).unwrap(),
5406                vec![b"first".to_vec(), b"second".to_vec(), b"thi".to_vec()],
5407                "Skipping last three bytes of payload should skip last two bytes of 3rd string"
5408            );
5409        })
5410        .await;
5411    }
5412
5413    #[::fuchsia::test]
5414    async fn test_read_c_string() {
5415        spawn_kernel_and_run(async |locked, current_task| {
5416            let mm = current_task.mm().unwrap();
5417            let ma = current_task.deref();
5418
5419            let page_size = *PAGE_SIZE;
5420            let buf_cap = 2 * page_size as usize;
5421            let mut buf = Vec::with_capacity(buf_cap);
5422            // We can't just use `spare_capacity_mut` because `Vec::with_capacity`
5423            // returns a `Vec` with _at least_ the requested capacity.
5424            let buf = &mut buf.spare_capacity_mut()[..buf_cap];
5425            let addr = (mm.base_addr + 10 * page_size).unwrap();
5426
5427            // Map a page at a fixed address and write an unterminated string at the end of it..
5428            assert_eq!(map_memory(locked, &current_task, addr, page_size), addr);
5429            let test_str = b"foo!";
5430            let test_addr = (addr + (page_size - test_str.len() as u64)).unwrap();
5431            ma.write_memory(test_addr, test_str).expect("failed to write test string");
5432
5433            // Expect error if the string is not terminated.
5434            assert_eq!(
5435                ma.read_c_string(UserCString::new(current_task, test_addr), buf),
5436                error!(ENAMETOOLONG)
5437            );
5438
5439            // Expect success if the string is terminated.
5440            ma.write_memory((addr + (page_size - 1)).unwrap(), b"\0").expect("failed to write nul");
5441            assert_eq!(
5442                ma.read_c_string(UserCString::new(current_task, test_addr), buf).unwrap(),
5443                "foo"
5444            );
5445
5446            // Expect success if the string spans over two mappings.
5447            assert_eq!(
5448                map_memory(locked, &current_task, (addr + page_size).unwrap(), page_size),
5449                (addr + page_size).unwrap()
5450            );
5451            // TODO: To be multiple mappings we need to provide a file backing for the next page or the
5452            // mappings will be collapsed.
5453            //assert_eq!(mm.get_mapping_count(), 2);
5454            ma.write_memory((addr + (page_size - 1)).unwrap(), b"bar\0")
5455                .expect("failed to write extra chars");
5456            assert_eq!(
5457                ma.read_c_string(UserCString::new(current_task, test_addr), buf).unwrap(),
5458                "foobar"
5459            );
5460
5461            // Expect error if the string does not fit in the provided buffer.
5462            assert_eq!(
5463                ma.read_c_string(
5464                    UserCString::new(current_task, test_addr),
5465                    &mut [MaybeUninit::uninit(); 2]
5466                ),
5467                error!(ENAMETOOLONG)
5468            );
5469
5470            // Expect error if the address is invalid.
5471            assert_eq!(ma.read_c_string(UserCString::null(current_task), buf), error!(EFAULT));
5472        })
5473        .await;
5474    }
5475
5476    #[::fuchsia::test]
5477    async fn test_find_next_unused_range() {
5478        spawn_kernel_and_run(async |locked, current_task| {
5479            let mm = current_task.mm().unwrap();
5480
5481            let mmap_top = mm.state.read().find_next_unused_range(0).unwrap().ptr();
5482            let page_size = *PAGE_SIZE as usize;
5483            assert!(mmap_top <= RESTRICTED_ASPACE_HIGHEST_ADDRESS);
5484
5485            // No mappings - top address minus requested size is available
5486            assert_eq!(
5487                mm.state.read().find_next_unused_range(page_size).unwrap(),
5488                UserAddress::from_ptr(mmap_top - page_size)
5489            );
5490
5491            // Fill it.
5492            let addr = UserAddress::from_ptr(mmap_top - page_size);
5493            assert_eq!(map_memory(locked, &current_task, addr, *PAGE_SIZE), addr);
5494
5495            // The next available range is right before the new mapping.
5496            assert_eq!(
5497                mm.state.read().find_next_unused_range(page_size).unwrap(),
5498                UserAddress::from_ptr(addr.ptr() - page_size)
5499            );
5500
5501            // Allocate an extra page before a one-page gap.
5502            let addr2 = UserAddress::from_ptr(addr.ptr() - 2 * page_size);
5503            assert_eq!(map_memory(locked, &current_task, addr2, *PAGE_SIZE), addr2);
5504
5505            // Searching for one-page range still gives the same result
5506            assert_eq!(
5507                mm.state.read().find_next_unused_range(page_size).unwrap(),
5508                UserAddress::from_ptr(addr.ptr() - page_size)
5509            );
5510
5511            // Searching for a bigger range results in the area before the second mapping
5512            assert_eq!(
5513                mm.state.read().find_next_unused_range(2 * page_size).unwrap(),
5514                UserAddress::from_ptr(addr2.ptr() - 2 * page_size)
5515            );
5516
5517            // Searching for more memory than available should fail.
5518            assert_eq!(mm.state.read().find_next_unused_range(mmap_top), None);
5519        })
5520        .await;
5521    }
5522
5523    #[::fuchsia::test]
5524    async fn test_count_placements() {
5525        spawn_kernel_and_run(async |locked, current_task| {
5526            let mm = current_task.mm().unwrap();
5527
5528            // ten-page range
5529            let page_size = *PAGE_SIZE as usize;
5530            let subrange_ten = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
5531                ..UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 10 * page_size);
5532
5533            assert_eq!(
5534                mm.state.read().count_possible_placements(11 * page_size, &subrange_ten),
5535                Some(0)
5536            );
5537            assert_eq!(
5538                mm.state.read().count_possible_placements(10 * page_size, &subrange_ten),
5539                Some(1)
5540            );
5541            assert_eq!(
5542                mm.state.read().count_possible_placements(9 * page_size, &subrange_ten),
5543                Some(2)
5544            );
5545            assert_eq!(
5546                mm.state.read().count_possible_placements(page_size, &subrange_ten),
5547                Some(10)
5548            );
5549
5550            // map 6th page
5551            let addr = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 5 * page_size);
5552            assert_eq!(map_memory(locked, &current_task, addr, *PAGE_SIZE), addr);
5553
5554            assert_eq!(
5555                mm.state.read().count_possible_placements(10 * page_size, &subrange_ten),
5556                Some(0)
5557            );
5558            assert_eq!(
5559                mm.state.read().count_possible_placements(5 * page_size, &subrange_ten),
5560                Some(1)
5561            );
5562            assert_eq!(
5563                mm.state.read().count_possible_placements(4 * page_size, &subrange_ten),
5564                Some(3)
5565            );
5566            assert_eq!(
5567                mm.state.read().count_possible_placements(page_size, &subrange_ten),
5568                Some(9)
5569            );
5570        })
5571        .await;
5572    }
5573
5574    #[::fuchsia::test]
5575    async fn test_pick_placement() {
5576        spawn_kernel_and_run(async |locked, current_task| {
5577            let mm = current_task.mm().unwrap();
5578
5579            let page_size = *PAGE_SIZE as usize;
5580            let subrange_ten = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
5581                ..UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 10 * page_size);
5582
5583            let addr = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 5 * page_size);
5584            assert_eq!(map_memory(locked, &current_task, addr, *PAGE_SIZE), addr);
5585            assert_eq!(
5586                mm.state.read().count_possible_placements(4 * page_size, &subrange_ten),
5587                Some(3)
5588            );
5589
5590            assert_eq!(
5591                mm.state.read().pick_placement(4 * page_size, 0, &subrange_ten),
5592                Some(UserAddress::from_ptr(RESTRICTED_ASPACE_BASE))
5593            );
5594            assert_eq!(
5595                mm.state.read().pick_placement(4 * page_size, 1, &subrange_ten),
5596                Some(UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + page_size))
5597            );
5598            assert_eq!(
5599                mm.state.read().pick_placement(4 * page_size, 2, &subrange_ten),
5600                Some(UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 6 * page_size))
5601            );
5602        })
5603        .await;
5604    }
5605
5606    #[::fuchsia::test]
5607    async fn test_find_random_unused_range() {
5608        spawn_kernel_and_run(async |locked, current_task| {
5609            let mm = current_task.mm().unwrap();
5610
5611            // ten-page range
5612            let page_size = *PAGE_SIZE as usize;
5613            let subrange_ten = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
5614                ..UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 10 * page_size);
5615
5616            for _ in 0..10 {
5617                let addr = mm.state.read().find_random_unused_range(page_size, &subrange_ten);
5618                assert!(addr.is_some());
5619                assert_eq!(
5620                    map_memory(locked, &current_task, addr.unwrap(), *PAGE_SIZE),
5621                    addr.unwrap()
5622                );
5623            }
5624            assert_eq!(mm.state.read().find_random_unused_range(page_size, &subrange_ten), None);
5625        })
5626        .await;
5627    }
5628
5629    #[::fuchsia::test]
5630    async fn test_grows_down_near_aspace_base() {
5631        spawn_kernel_and_run(async |locked, current_task| {
5632            let mm = current_task.mm().unwrap();
5633
5634            let page_count = 10;
5635
5636            let page_size = *PAGE_SIZE as usize;
5637            let addr =
5638                (UserAddress::from_ptr(RESTRICTED_ASPACE_BASE) + page_count * page_size).unwrap();
5639            assert_eq!(
5640                map_memory_with_flags(
5641                    locked,
5642                    &current_task,
5643                    addr,
5644                    page_size as u64,
5645                    MAP_ANONYMOUS | MAP_PRIVATE | MAP_GROWSDOWN
5646                ),
5647                addr
5648            );
5649
5650            let subrange_ten = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)..addr;
5651            assert_eq!(mm.state.read().find_random_unused_range(page_size, &subrange_ten), None);
5652        })
5653        .await;
5654    }
5655
5656    #[::fuchsia::test]
5657    async fn test_unmap_returned_mappings() {
5658        spawn_kernel_and_run(async |locked, current_task| {
5659            let mm = current_task.mm().unwrap();
5660
5661            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
5662
5663            let mut released_mappings = ReleasedMappings::default();
5664            let mut mm_state = mm.state.write();
5665            let unmap_result =
5666                mm_state.unmap(&mm, addr, *PAGE_SIZE as usize, &mut released_mappings);
5667            assert!(unmap_result.is_ok());
5668            assert_eq!(released_mappings.len(), 1);
5669            released_mappings.finalize(mm_state);
5670        })
5671        .await;
5672    }
5673
5674    #[::fuchsia::test]
5675    async fn test_unmap_returns_multiple_mappings() {
5676        spawn_kernel_and_run(async |locked, current_task| {
5677            let mm = current_task.mm().unwrap();
5678
5679            let addr = mm.state.read().find_next_unused_range(3 * *PAGE_SIZE as usize).unwrap();
5680            let addr = map_memory(locked, &current_task, addr, *PAGE_SIZE);
5681            let _ = map_memory(locked, &current_task, (addr + 2 * *PAGE_SIZE).unwrap(), *PAGE_SIZE);
5682
5683            let mut released_mappings = ReleasedMappings::default();
5684            let mut mm_state = mm.state.write();
5685            let unmap_result =
5686                mm_state.unmap(&mm, addr, (*PAGE_SIZE * 3) as usize, &mut released_mappings);
5687            assert!(unmap_result.is_ok());
5688            assert_eq!(released_mappings.len(), 2);
5689            released_mappings.finalize(mm_state);
5690        })
5691        .await;
5692    }
5693
5694    /// Maps two pages in separate mappings next to each other, then unmaps the first page.
5695    /// The second page should not be modified.
5696    #[::fuchsia::test]
5697    async fn test_map_two_unmap_one() {
5698        spawn_kernel_and_run(async |locked, current_task| {
5699            let mm = current_task.mm().unwrap();
5700
5701            // reserve memory for both pages
5702            let addr_reserve =
5703                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
5704            let addr1 = do_mmap(
5705                locked,
5706                &current_task,
5707                addr_reserve,
5708                *PAGE_SIZE as usize,
5709                PROT_READ, // Map read-only to avoid merging of the two mappings
5710                MAP_ANONYMOUS | MAP_SHARED | MAP_FIXED,
5711                FdNumber::from_raw(-1),
5712                0,
5713            )
5714            .expect("failed to mmap");
5715            let addr2 = map_memory_with_flags(
5716                locked,
5717                &current_task,
5718                (addr_reserve + *PAGE_SIZE).unwrap(),
5719                *PAGE_SIZE,
5720                MAP_ANONYMOUS | MAP_SHARED | MAP_FIXED,
5721            );
5722            let state = mm.state.read();
5723            let (range1, _) = state.mappings.get(addr1).expect("mapping");
5724            assert_eq!(range1.start, addr1);
5725            assert_eq!(range1.end, (addr1 + *PAGE_SIZE).unwrap());
5726            let (range2, mapping2) = state.mappings.get(addr2).expect("mapping");
5727            assert_eq!(range2.start, addr2);
5728            assert_eq!(range2.end, (addr2 + *PAGE_SIZE).unwrap());
5729            let original_memory2 = {
5730                match state.get_mapping_backing(mapping2) {
5731                    MappingBacking::Memory(backing) => {
5732                        assert_eq!(backing.memory().get_size(), *PAGE_SIZE);
5733                        backing.memory().clone()
5734                    }
5735                    MappingBacking::PrivateAnonymous => {
5736                        panic!("Unexpected private anonymous mapping")
5737                    }
5738                }
5739            };
5740            std::mem::drop(state);
5741
5742            assert_eq!(mm.unmap(addr1, *PAGE_SIZE as usize), Ok(()));
5743
5744            let state = mm.state.read();
5745
5746            // The first page should be unmapped.
5747            assert!(state.mappings.get(addr1).is_none());
5748
5749            // The second page should remain unchanged.
5750            let (range2, mapping2) = state.mappings.get(addr2).expect("second page");
5751            assert_eq!(range2.start, addr2);
5752            assert_eq!(range2.end, (addr2 + *PAGE_SIZE).unwrap());
5753            match state.get_mapping_backing(mapping2) {
5754                MappingBacking::Memory(backing) => {
5755                    assert_eq!(backing.memory().get_size(), *PAGE_SIZE);
5756                    assert_eq!(original_memory2.get_koid(), backing.memory().get_koid());
5757                }
5758                MappingBacking::PrivateAnonymous => panic!("Unexpected private anonymous mapping"),
5759            }
5760        })
5761        .await;
5762    }
5763
5764    #[::fuchsia::test]
5765    async fn test_read_write_objects() {
5766        spawn_kernel_and_run(async |locked, current_task| {
5767            let ma = current_task.deref();
5768            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5769            let items_ref = UserRef::<i32>::new(addr);
5770
5771            let items_written = vec![0, 2, 3, 7, 1];
5772            ma.write_objects(items_ref, &items_written).expect("Failed to write object array.");
5773
5774            let items_read = ma
5775                .read_objects_to_vec(items_ref, items_written.len())
5776                .expect("Failed to read object array.");
5777
5778            assert_eq!(items_written, items_read);
5779        })
5780        .await;
5781    }
5782
5783    #[::fuchsia::test]
5784    async fn test_read_write_objects_null() {
5785        spawn_kernel_and_run(async |_, current_task| {
5786            let ma = current_task.deref();
5787            let items_ref = UserRef::<i32>::new(UserAddress::default());
5788
5789            let items_written = vec![];
5790            ma.write_objects(items_ref, &items_written)
5791                .expect("Failed to write empty object array.");
5792
5793            let items_read = ma
5794                .read_objects_to_vec(items_ref, items_written.len())
5795                .expect("Failed to read empty object array.");
5796
5797            assert_eq!(items_written, items_read);
5798        })
5799        .await;
5800    }
5801
5802    #[::fuchsia::test]
5803    async fn test_read_object_partial() {
5804        #[derive(Debug, Default, Copy, Clone, KnownLayout, FromBytes, Immutable, PartialEq)]
5805        struct Items {
5806            val: [i32; 4],
5807        }
5808
5809        spawn_kernel_and_run(async |locked, current_task| {
5810            let ma = current_task.deref();
5811            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5812            let items_array_ref = UserRef::<i32>::new(addr);
5813
5814            // Populate some values.
5815            let items_written = vec![75, 23, 51, 98];
5816            ma.write_objects(items_array_ref, &items_written)
5817                .expect("Failed to write object array.");
5818
5819            // Full read of all 4 values.
5820            let items_ref = UserRef::<Items>::new(addr);
5821            let items_read = ma
5822                .read_object_partial(items_ref, std::mem::size_of::<Items>())
5823                .expect("Failed to read object");
5824            assert_eq!(items_written, items_read.val);
5825
5826            // Partial read of the first two.
5827            let items_read = ma.read_object_partial(items_ref, 8).expect("Failed to read object");
5828            assert_eq!(vec![75, 23, 0, 0], items_read.val);
5829
5830            // The API currently allows reading 0 bytes (this could be re-evaluated) so test that does
5831            // the right thing.
5832            let items_read = ma.read_object_partial(items_ref, 0).expect("Failed to read object");
5833            assert_eq!(vec![0, 0, 0, 0], items_read.val);
5834
5835            // Size bigger than the object.
5836            assert_eq!(
5837                ma.read_object_partial(items_ref, std::mem::size_of::<Items>() + 8),
5838                error!(EINVAL)
5839            );
5840
5841            // Bad pointer.
5842            assert_eq!(
5843                ma.read_object_partial(UserRef::<Items>::new(UserAddress::from(1)), 16),
5844                error!(EFAULT)
5845            );
5846        })
5847        .await;
5848    }
5849
5850    #[::fuchsia::test]
5851    async fn test_partial_read() {
5852        spawn_kernel_and_run(async |locked, current_task| {
5853            let mm = current_task.mm().unwrap();
5854            let ma = current_task.deref();
5855
5856            let addr = mm.state.read().find_next_unused_range(2 * *PAGE_SIZE as usize).unwrap();
5857            let addr = map_memory(locked, &current_task, addr, *PAGE_SIZE);
5858            let second_map =
5859                map_memory(locked, &current_task, (addr + *PAGE_SIZE).unwrap(), *PAGE_SIZE);
5860
5861            let bytes = vec![0xf; (*PAGE_SIZE * 2) as usize];
5862            assert!(ma.write_memory(addr, &bytes).is_ok());
5863            let mut state = mm.state.write();
5864            let mut released_mappings = ReleasedMappings::default();
5865            state
5866                .protect(
5867                    ma,
5868                    second_map,
5869                    *PAGE_SIZE as usize,
5870                    ProtectionFlags::empty(),
5871                    &mut released_mappings,
5872                )
5873                .unwrap();
5874            released_mappings.finalize(state);
5875            assert_eq!(
5876                ma.read_memory_partial_to_vec(addr, bytes.len()).unwrap().len(),
5877                *PAGE_SIZE as usize,
5878            );
5879        })
5880        .await;
5881    }
5882
5883    fn map_memory_growsdown<L>(
5884        locked: &mut Locked<L>,
5885        current_task: &CurrentTask,
5886        length: u64,
5887    ) -> UserAddress
5888    where
5889        L: LockEqualOrBefore<FileOpsCore> + LockBefore<ThreadGroupLimits>,
5890    {
5891        map_memory_with_flags(
5892            locked,
5893            current_task,
5894            UserAddress::default(),
5895            length,
5896            MAP_ANONYMOUS | MAP_PRIVATE | MAP_GROWSDOWN,
5897        )
5898    }
5899
5900    #[::fuchsia::test]
5901    async fn test_grow_mapping_empty_mm() {
5902        spawn_kernel_and_run(async |_, current_task| {
5903            let mm = current_task.mm().unwrap();
5904
5905            let addr = UserAddress::from(0x100000);
5906
5907            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, false), Ok(false));
5908        })
5909        .await;
5910    }
5911
5912    #[::fuchsia::test]
5913    async fn test_grow_inside_mapping() {
5914        spawn_kernel_and_run(async |locked, current_task| {
5915            let mm = current_task.mm().unwrap();
5916
5917            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5918
5919            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, false), Ok(false));
5920        })
5921        .await;
5922    }
5923
5924    #[::fuchsia::test]
5925    async fn test_grow_write_fault_inside_read_only_mapping() {
5926        spawn_kernel_and_run(async |locked, current_task| {
5927            let mm = current_task.mm().unwrap();
5928
5929            let addr = do_mmap(
5930                locked,
5931                &current_task,
5932                UserAddress::default(),
5933                *PAGE_SIZE as usize,
5934                PROT_READ,
5935                MAP_ANONYMOUS | MAP_PRIVATE,
5936                FdNumber::from_raw(-1),
5937                0,
5938            )
5939            .expect("Could not map memory");
5940
5941            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, false), Ok(false));
5942            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, true), Ok(false));
5943        })
5944        .await;
5945    }
5946
5947    #[::fuchsia::test]
5948    async fn test_grow_fault_inside_prot_none_mapping() {
5949        spawn_kernel_and_run(async |locked, current_task| {
5950            let mm = current_task.mm().unwrap();
5951
5952            let addr = do_mmap(
5953                locked,
5954                &current_task,
5955                UserAddress::default(),
5956                *PAGE_SIZE as usize,
5957                PROT_NONE,
5958                MAP_ANONYMOUS | MAP_PRIVATE,
5959                FdNumber::from_raw(-1),
5960                0,
5961            )
5962            .expect("Could not map memory");
5963
5964            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, false), Ok(false));
5965            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, true), Ok(false));
5966        })
5967        .await;
5968    }
5969
5970    #[::fuchsia::test]
5971    async fn test_grow_below_mapping() {
5972        spawn_kernel_and_run(async |locked, current_task| {
5973            let mm = current_task.mm().unwrap();
5974
5975            let addr = map_memory_growsdown(locked, &current_task, *PAGE_SIZE) - *PAGE_SIZE;
5976
5977            assert_matches!(mm.extend_growsdown_mapping_to_address(addr.unwrap(), false), Ok(true));
5978        })
5979        .await;
5980    }
5981
5982    #[::fuchsia::test]
5983    async fn test_grow_above_mapping() {
5984        spawn_kernel_and_run(async |locked, current_task| {
5985            let mm = current_task.mm().unwrap();
5986
5987            let addr = map_memory_growsdown(locked, &current_task, *PAGE_SIZE) + *PAGE_SIZE;
5988
5989            assert_matches!(
5990                mm.extend_growsdown_mapping_to_address(addr.unwrap(), false),
5991                Ok(false)
5992            );
5993        })
5994        .await;
5995    }
5996
5997    #[::fuchsia::test]
5998    async fn test_grow_write_fault_below_read_only_mapping() {
5999        spawn_kernel_and_run(async |locked, current_task| {
6000            let mm = current_task.mm().unwrap();
6001
6002            let mapped_addr = map_memory_growsdown(locked, &current_task, *PAGE_SIZE);
6003
6004            mm.protect(&current_task, mapped_addr, *PAGE_SIZE as usize, ProtectionFlags::READ)
6005                .unwrap();
6006
6007            assert_matches!(
6008                mm.extend_growsdown_mapping_to_address((mapped_addr - *PAGE_SIZE).unwrap(), true),
6009                Ok(false)
6010            );
6011
6012            assert_eq!(mm.get_mapping_count(), 1);
6013        })
6014        .await;
6015    }
6016
6017    #[::fuchsia::test]
6018    async fn test_snapshot_paged_memory() {
6019        use zx::sys::zx_page_request_command_t::ZX_PAGER_VMO_READ;
6020
6021        spawn_kernel_and_run(async |locked, current_task| {
6022            let mm = current_task.mm().unwrap();
6023            let ma = current_task.deref();
6024
6025            let port = Arc::new(zx::Port::create());
6026            let port_clone = port.clone();
6027            let pager =
6028                Arc::new(zx::Pager::create(zx::PagerOptions::empty()).expect("create failed"));
6029            let pager_clone = pager.clone();
6030
6031            const VMO_SIZE: u64 = 128 * 1024;
6032            let vmo = Arc::new(
6033                pager
6034                    .create_vmo(zx::VmoOptions::RESIZABLE, &port, 1, VMO_SIZE)
6035                    .expect("create_vmo failed"),
6036            );
6037            let vmo_clone = vmo.clone();
6038
6039            // Create a thread to service the port where we will receive pager requests.
6040            let thread = std::thread::spawn(move || {
6041                loop {
6042                    let packet =
6043                        port_clone.wait(zx::MonotonicInstant::INFINITE).expect("wait failed");
6044                    match packet.contents() {
6045                        zx::PacketContents::Pager(contents) => {
6046                            if contents.command() == ZX_PAGER_VMO_READ {
6047                                let range = contents.range();
6048                                let source_vmo = zx::Vmo::create(range.end - range.start)
6049                                    .expect("create failed");
6050                                pager_clone
6051                                    .supply_pages(&vmo_clone, range, &source_vmo, 0)
6052                                    .expect("supply_pages failed");
6053                            }
6054                        }
6055                        zx::PacketContents::User(_) => break,
6056                        _ => {}
6057                    }
6058                }
6059            });
6060
6061            let child_vmo = vmo
6062                .create_child(zx::VmoChildOptions::SNAPSHOT_AT_LEAST_ON_WRITE, 0, VMO_SIZE)
6063                .unwrap();
6064
6065            // Write something to the source VMO.
6066            vmo.write(b"foo", 0).expect("write failed");
6067
6068            let prot_flags = ProtectionFlags::READ | ProtectionFlags::WRITE;
6069            let addr = mm
6070                .map_memory(
6071                    DesiredAddress::Any,
6072                    Arc::new(MemoryObject::from(child_vmo)),
6073                    0,
6074                    VMO_SIZE as usize,
6075                    prot_flags,
6076                    Access::rwx(),
6077                    MappingOptions::empty(),
6078                    MappingName::None,
6079                )
6080                .expect("map failed");
6081
6082            let target = current_task.clone_task_for_test(locked, 0, None);
6083
6084            // Make sure it has what we wrote.
6085            let buf = target.read_memory_to_vec(addr, 3).expect("read_memory failed");
6086            assert_eq!(buf, b"foo");
6087
6088            // Write something to both source and target and make sure they are forked.
6089            ma.write_memory(addr, b"bar").expect("write_memory failed");
6090
6091            let buf = target.read_memory_to_vec(addr, 3).expect("read_memory failed");
6092            assert_eq!(buf, b"foo");
6093
6094            target.write_memory(addr, b"baz").expect("write_memory failed");
6095            let buf = ma.read_memory_to_vec(addr, 3).expect("read_memory failed");
6096            assert_eq!(buf, b"bar");
6097
6098            let buf = target.read_memory_to_vec(addr, 3).expect("read_memory failed");
6099            assert_eq!(buf, b"baz");
6100
6101            port.queue(&zx::Packet::from_user_packet(0, 0, zx::UserPacket::from_u8_array([0; 32])))
6102                .unwrap();
6103            thread.join().unwrap();
6104        })
6105        .await;
6106    }
6107
6108    #[::fuchsia::test]
6109    async fn test_set_vma_name() {
6110        spawn_kernel_and_run(async |locked, mut current_task| {
6111            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
6112
6113            let vma_name = "vma name";
6114            current_task.write_memory(name_addr, vma_name.as_bytes()).unwrap();
6115
6116            let mapping_addr =
6117                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
6118
6119            sys_prctl(
6120                locked,
6121                &mut current_task,
6122                PR_SET_VMA,
6123                PR_SET_VMA_ANON_NAME as u64,
6124                mapping_addr.ptr() as u64,
6125                *PAGE_SIZE,
6126                name_addr.ptr() as u64,
6127            )
6128            .unwrap();
6129
6130            assert_eq!(
6131                *current_task.mm().unwrap().get_mapping_name(mapping_addr).unwrap().unwrap(),
6132                vma_name
6133            );
6134        })
6135        .await;
6136    }
6137
6138    #[::fuchsia::test]
6139    async fn test_set_vma_name_adjacent_mappings() {
6140        spawn_kernel_and_run(async |locked, mut current_task| {
6141            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
6142            current_task
6143                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
6144                .unwrap();
6145
6146            let first_mapping_addr =
6147                map_memory(locked, &current_task, UserAddress::default(), 2 * *PAGE_SIZE);
6148            let second_mapping_addr = map_memory_with_flags(
6149                locked,
6150                &current_task,
6151                (first_mapping_addr + *PAGE_SIZE).unwrap(),
6152                *PAGE_SIZE,
6153                MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
6154            );
6155
6156            assert_eq!((first_mapping_addr + *PAGE_SIZE).unwrap(), second_mapping_addr);
6157
6158            sys_prctl(
6159                locked,
6160                &mut current_task,
6161                PR_SET_VMA,
6162                PR_SET_VMA_ANON_NAME as u64,
6163                first_mapping_addr.ptr() as u64,
6164                2 * *PAGE_SIZE,
6165                name_addr.ptr() as u64,
6166            )
6167            .unwrap();
6168
6169            {
6170                let mm = current_task.mm().unwrap();
6171                let state = mm.state.read();
6172
6173                // The name should apply to both mappings.
6174                let (_, mapping) = state.mappings.get(first_mapping_addr).unwrap();
6175                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
6176
6177                let (_, mapping) = state.mappings.get(second_mapping_addr).unwrap();
6178                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
6179            }
6180        })
6181        .await;
6182    }
6183
6184    #[::fuchsia::test]
6185    async fn test_set_vma_name_beyond_end() {
6186        spawn_kernel_and_run(async |locked, mut current_task| {
6187            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
6188            current_task
6189                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
6190                .unwrap();
6191
6192            let mapping_addr =
6193                map_memory(locked, &current_task, UserAddress::default(), 2 * *PAGE_SIZE);
6194
6195            let second_page = (mapping_addr + *PAGE_SIZE).unwrap();
6196            current_task.mm().unwrap().unmap(second_page, *PAGE_SIZE as usize).unwrap();
6197
6198            // This should fail with ENOMEM since it extends past the end of the mapping into unmapped memory.
6199            assert_eq!(
6200                sys_prctl(
6201                    locked,
6202                    &mut current_task,
6203                    PR_SET_VMA,
6204                    PR_SET_VMA_ANON_NAME as u64,
6205                    mapping_addr.ptr() as u64,
6206                    2 * *PAGE_SIZE,
6207                    name_addr.ptr() as u64,
6208                ),
6209                error!(ENOMEM)
6210            );
6211
6212            // Despite returning an error, the prctl should still assign a name to the region at the start of the region.
6213            {
6214                let mm = current_task.mm().unwrap();
6215                let state = mm.state.read();
6216
6217                let (_, mapping) = state.mappings.get(mapping_addr).unwrap();
6218                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
6219            }
6220        })
6221        .await;
6222    }
6223
6224    #[::fuchsia::test]
6225    async fn test_set_vma_name_before_start() {
6226        spawn_kernel_and_run(async |locked, mut current_task| {
6227            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
6228            current_task
6229                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
6230                .unwrap();
6231
6232            let mapping_addr =
6233                map_memory(locked, &current_task, UserAddress::default(), 2 * *PAGE_SIZE);
6234
6235            let second_page = (mapping_addr + *PAGE_SIZE).unwrap();
6236            current_task.mm().unwrap().unmap(mapping_addr, *PAGE_SIZE as usize).unwrap();
6237
6238            // This should fail with ENOMEM since the start of the range is in unmapped memory.
6239            assert_eq!(
6240                sys_prctl(
6241                    locked,
6242                    &mut current_task,
6243                    PR_SET_VMA,
6244                    PR_SET_VMA_ANON_NAME as u64,
6245                    mapping_addr.ptr() as u64,
6246                    2 * *PAGE_SIZE,
6247                    name_addr.ptr() as u64,
6248                ),
6249                error!(ENOMEM)
6250            );
6251
6252            // Unlike a range which starts within a mapping and extends past the end, this should not assign
6253            // a name to any mappings.
6254            {
6255                let mm = current_task.mm().unwrap();
6256                let state = mm.state.read();
6257
6258                let (_, mapping) = state.mappings.get(second_page).unwrap();
6259                assert_eq!(mapping.name(), MappingName::None);
6260            }
6261        })
6262        .await;
6263    }
6264
6265    #[::fuchsia::test]
6266    async fn test_set_vma_name_partial() {
6267        spawn_kernel_and_run(async |locked, mut current_task| {
6268            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
6269            current_task
6270                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
6271                .unwrap();
6272
6273            let mapping_addr =
6274                map_memory(locked, &current_task, UserAddress::default(), 3 * *PAGE_SIZE);
6275
6276            assert_eq!(
6277                sys_prctl(
6278                    locked,
6279                    &mut current_task,
6280                    PR_SET_VMA,
6281                    PR_SET_VMA_ANON_NAME as u64,
6282                    (mapping_addr + *PAGE_SIZE).unwrap().ptr() as u64,
6283                    *PAGE_SIZE,
6284                    name_addr.ptr() as u64,
6285                ),
6286                Ok(starnix_syscalls::SUCCESS)
6287            );
6288
6289            // This should split the mapping into 3 pieces with the second piece having the name "foo"
6290            {
6291                let mm = current_task.mm().unwrap();
6292                let state = mm.state.read();
6293
6294                let (_, mapping) = state.mappings.get(mapping_addr).unwrap();
6295                assert_eq!(mapping.name(), MappingName::None);
6296
6297                let (_, mapping) =
6298                    state.mappings.get((mapping_addr + *PAGE_SIZE).unwrap()).unwrap();
6299                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
6300
6301                let (_, mapping) =
6302                    state.mappings.get((mapping_addr + (2 * *PAGE_SIZE)).unwrap()).unwrap();
6303                assert_eq!(mapping.name(), MappingName::None);
6304            }
6305        })
6306        .await;
6307    }
6308
6309    #[::fuchsia::test]
6310    async fn test_preserve_name_snapshot() {
6311        spawn_kernel_and_run(async |locked, mut current_task| {
6312            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
6313            current_task
6314                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
6315                .unwrap();
6316
6317            let mapping_addr =
6318                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
6319
6320            assert_eq!(
6321                sys_prctl(
6322                    locked,
6323                    &mut current_task,
6324                    PR_SET_VMA,
6325                    PR_SET_VMA_ANON_NAME as u64,
6326                    mapping_addr.ptr() as u64,
6327                    *PAGE_SIZE,
6328                    name_addr.ptr() as u64,
6329                ),
6330                Ok(starnix_syscalls::SUCCESS)
6331            );
6332
6333            let target = current_task.clone_task_for_test(locked, 0, None);
6334
6335            {
6336                let mm = target.mm().unwrap();
6337                let state = mm.state.read();
6338
6339                let (_, mapping) = state.mappings.get(mapping_addr).unwrap();
6340                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
6341            }
6342        })
6343        .await;
6344    }
6345}