Skip to main content

starnix_core/mm/
memory_manager.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::barrier::{BarrierType, system_barrier};
6use crate::mm::mapping::MappingBackingMemory;
7use crate::mm::memory::MemoryObject;
8use crate::mm::memory_accessor::{MemoryAccessor, TaskMemoryAccessor};
9use crate::mm::private_anonymous_memory_manager::PrivateAnonymousMemoryManager;
10use crate::mm::{
11    FaultRegisterMode, FutexTable, InflightVmsplicedPayloads, MapInfoCache, Mapping,
12    MappingBacking, MappingFlags, MappingName, MappingNameRef, MlockPinFlavor, PrivateFutexKey,
13    ProtectionFlags, UserFault, VMEX_RESOURCE, VmsplicePayload, VmsplicePayloadSegment,
14    read_to_array,
15};
16use crate::security;
17use crate::signals::{SignalDetail, SignalInfo};
18use crate::task::{CurrentTask, ExceptionResult, PageFaultExceptionReport, Task};
19use crate::vfs::aio::AioContext;
20use crate::vfs::pseudo::dynamic_file::{
21    DynamicFile, DynamicFileBuf, DynamicFileSource, SequenceFileSource,
22};
23use crate::vfs::{FsString, NamespaceNode};
24use anyhow::{Error, anyhow};
25use bitflags::bitflags;
26use flyweights::FlyByteStr;
27use linux_uapi::BUS_ADRERR;
28use memory_pinning::PinnedMapping;
29use range_map::RangeMap;
30use starnix_ext::map_ext::EntryExt;
31use starnix_lifecycle::DropNotifier;
32use starnix_logging::{
33    CATEGORY_STARNIX_MM, impossible_error, log_warn, trace_duration, track_stub,
34};
35use starnix_sync::{
36    LockBefore, Locked, MmDumpable, OrderedMutex, RwLock, RwLockWriteGuard, ThreadGroupLimits,
37    Unlocked, UserFaultInner,
38};
39use starnix_types::arch::ArchWidth;
40use starnix_types::futex_address::FutexAddress;
41use starnix_types::math::{round_down_to_system_page_size, round_up_to_system_page_size};
42use starnix_types::ownership::{TempRef, WeakRef};
43use starnix_types::user_buffer::{UserBuffer, UserBuffers};
44use starnix_uapi::auth::CAP_IPC_LOCK;
45use starnix_uapi::errors::Errno;
46use starnix_uapi::file_mode::Access;
47use starnix_uapi::range_ext::RangeExt;
48use starnix_uapi::resource_limits::Resource;
49use starnix_uapi::restricted_aspace::{
50    RESTRICTED_ASPACE_BASE, RESTRICTED_ASPACE_HIGHEST_ADDRESS, RESTRICTED_ASPACE_RANGE,
51    RESTRICTED_ASPACE_SIZE,
52};
53use starnix_uapi::signals::{SIGBUS, SIGSEGV};
54use starnix_uapi::user_address::{ArchSpecific, UserAddress};
55use starnix_uapi::{
56    MADV_COLD, MADV_COLLAPSE, MADV_DODUMP, MADV_DOFORK, MADV_DONTDUMP, MADV_DONTFORK,
57    MADV_DONTNEED, MADV_DONTNEED_LOCKED, MADV_FREE, MADV_HUGEPAGE, MADV_HWPOISON, MADV_KEEPONFORK,
58    MADV_MERGEABLE, MADV_NOHUGEPAGE, MADV_NORMAL, MADV_PAGEOUT, MADV_POPULATE_READ, MADV_RANDOM,
59    MADV_REMOVE, MADV_SEQUENTIAL, MADV_SOFT_OFFLINE, MADV_UNMERGEABLE, MADV_WILLNEED,
60    MADV_WIPEONFORK, MREMAP_DONTUNMAP, MREMAP_FIXED, MREMAP_MAYMOVE, SI_KERNEL, errno, error,
61    from_status_like_fdio,
62};
63use std::collections::HashMap;
64use std::mem::MaybeUninit;
65use std::ops::{Deref, DerefMut, Range, RangeBounds};
66use std::sync::{Arc, LazyLock, Weak};
67use syncio::zxio::zxio_default_maybe_faultable_copy;
68use zerocopy::IntoBytes;
69use zx::{Rights, VmoChildOptions};
70
71pub const ZX_VM_SPECIFIC_OVERWRITE: zx::VmarFlags =
72    zx::VmarFlags::from_bits_retain(zx::VmarFlagsExtended::SPECIFIC_OVERWRITE.bits());
73
74// We do not create shared processes in unit tests.
75pub(crate) const UNIFIED_ASPACES_ENABLED: bool = cfg!(not(test));
76
77/// Initializes the usercopy utilities.
78///
79/// It is useful to explicitly call this so that the usercopy is initialized
80/// at a known instant. For example, Starnix may want to make sure the usercopy
81/// thread created to support user copying is associated to the Starnix process
82/// and not a restricted-mode process.
83pub fn init_usercopy() {
84    // This call lazily initializes the `Usercopy` instance.
85    let _ = usercopy();
86}
87
88pub const GUARD_PAGE_COUNT_FOR_GROWSDOWN_MAPPINGS: usize = 256;
89
90#[cfg(target_arch = "x86_64")]
91const ASLR_RANDOM_BITS: usize = 27;
92
93#[cfg(target_arch = "aarch64")]
94const ASLR_RANDOM_BITS: usize = 28;
95
96#[cfg(target_arch = "riscv64")]
97const ASLR_RANDOM_BITS: usize = 18;
98
99/// Number of bits of entropy for processes running in 32 bits mode.
100const ASLR_32_RANDOM_BITS: usize = 8;
101
102// The biggest we expect stack to be; increase as needed
103// TODO(https://fxbug.dev/322874791): Once setting RLIMIT_STACK is implemented, we should use it.
104const MAX_STACK_SIZE: usize = 512 * 1024 * 1024;
105
106// Value to report temporarily as the VM RSS HWM.
107// TODO(https://fxbug.dev/396221597): Need support from the kernel to track the committed bytes high
108// water mark.
109const STUB_VM_RSS_HWM: usize = 2 * 1024 * 1024;
110
111fn usercopy() -> Option<&'static usercopy::Usercopy> {
112    static USERCOPY: LazyLock<Option<usercopy::Usercopy>> = LazyLock::new(|| {
113        // We do not create shared processes in unit tests.
114        if UNIFIED_ASPACES_ENABLED {
115            // ASUMPTION: All Starnix managed Linux processes have the same
116            // restricted mode address range.
117            Some(usercopy::Usercopy::new(RESTRICTED_ASPACE_RANGE).unwrap())
118        } else {
119            None
120        }
121    });
122
123    LazyLock::force(&USERCOPY).as_ref()
124}
125
126/// Provides an implementation for zxio's `zxio_maybe_faultable_copy` that supports
127/// catching faults.
128///
129/// See zxio's `zxio_maybe_faultable_copy` documentation for more details.
130///
131/// # Safety
132///
133/// Only one of `src`/`dest` may be an address to a buffer owned by user/restricted-mode
134/// (`ret_dest` indicates whether the user-owned buffer is `dest` when `true`).
135/// The other must be a valid Starnix/normal-mode buffer that will never cause a fault
136/// when the first `count` bytes are read/written.
137#[unsafe(no_mangle)]
138pub unsafe fn zxio_maybe_faultable_copy_impl(
139    dest: *mut u8,
140    src: *const u8,
141    count: usize,
142    ret_dest: bool,
143) -> bool {
144    if let Some(usercopy) = usercopy() {
145        #[allow(clippy::undocumented_unsafe_blocks, reason = "2024 edition migration")]
146        let ret = unsafe { usercopy.raw_hermetic_copy(dest, src, count, ret_dest) };
147        ret == count
148    } else {
149        #[allow(clippy::undocumented_unsafe_blocks, reason = "2024 edition migration")]
150        unsafe {
151            zxio_default_maybe_faultable_copy(dest, src, count, ret_dest)
152        }
153    }
154}
155
156pub static PAGE_SIZE: LazyLock<u64> = LazyLock::new(|| zx::system_get_page_size() as u64);
157
158bitflags! {
159    #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
160    pub struct MappingOptions: u16 {
161      const SHARED      = 1 << 0;
162      const ANONYMOUS   = 1 << 1;
163      const LOWER_32BIT = 1 << 2;
164      const GROWSDOWN   = 1 << 3;
165      const ELF_BINARY  = 1 << 4;
166      const DONTFORK    = 1 << 5;
167      const WIPEONFORK  = 1 << 6;
168      const DONT_SPLIT  = 1 << 7;
169      const DONT_EXPAND = 1 << 8;
170      const POPULATE    = 1 << 9;
171    }
172}
173
174bitflags! {
175    #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
176    pub struct MremapFlags: u32 {
177        const MAYMOVE = MREMAP_MAYMOVE;
178        const FIXED = MREMAP_FIXED;
179        const DONTUNMAP = MREMAP_DONTUNMAP;
180    }
181}
182
183bitflags! {
184    #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
185    pub struct MsyncFlags: u32 {
186        const ASYNC = starnix_uapi::MS_ASYNC;
187        const INVALIDATE = starnix_uapi::MS_INVALIDATE;
188        const SYNC = starnix_uapi::MS_SYNC;
189    }
190}
191
192const PROGRAM_BREAK_LIMIT: u64 = 64 * 1024 * 1024;
193
194#[derive(Debug, Clone, Eq, PartialEq)]
195struct ProgramBreak {
196    // These base address at which the data segment is mapped.
197    base: UserAddress,
198
199    // The current program break.
200    //
201    // The addresses from [base, current.round_up(*PAGE_SIZE)) are mapped into the
202    // client address space from the underlying |memory|.
203    current: UserAddress,
204}
205
206/// The policy about whether the address space can be dumped.
207#[derive(Debug, Clone, Copy, Eq, PartialEq)]
208pub enum DumpPolicy {
209    /// The address space cannot be dumped.
210    ///
211    /// Corresponds to SUID_DUMP_DISABLE.
212    Disable,
213
214    /// The address space can be dumped.
215    ///
216    /// Corresponds to SUID_DUMP_USER.
217    User,
218}
219
220// Supported types of membarriers.
221pub enum MembarrierType {
222    Memory,   // MEMBARRIER_CMD_GLOBAL, etc
223    SyncCore, // MEMBARRIER_CMD_..._SYNC_CORE
224}
225
226// Tracks the types of membarriers this address space is registered to receive.
227#[derive(Default, Clone)]
228struct MembarrierRegistrations {
229    memory: bool,
230    sync_core: bool,
231}
232
233pub struct MemoryManagerState {
234    /// The VMAR in which userspace mappings occur.
235    ///
236    /// We map userspace memory in this child VMAR so that we can destroy the
237    /// entire VMAR during exec.
238    /// For 32-bit tasks, we limit the user_vmar to correspond to the available memory.
239    ///
240    /// This field is set to `ZX_HANDLE_INVALID` when the address-space has been destroyed (e.g. on
241    /// `exec()`), allowing the value to be pro-actively checked for, or the `ZX_ERR_BAD_HANDLE`
242    /// status return from Zircon operations handled, to suit the call-site.
243    user_vmar: zx::Vmar,
244
245    /// Cached VmarInfo for user_vmar.
246    user_vmar_info: zx::VmarInfo,
247
248    /// The memory mappings currently used by this address space.
249    ///
250    /// The mappings record which object backs each address.
251    mappings: RangeMap<UserAddress, Mapping>,
252
253    /// Memory object backing private, anonymous memory allocations in this address space.
254    private_anonymous: PrivateAnonymousMemoryManager,
255
256    /// UserFaults registered with this memory manager.
257    userfaultfds: Vec<Weak<UserFault>>,
258
259    /// Shadow mappings for mlock()'d pages.
260    ///
261    /// Used for MlockPinFlavor::ShadowProcess to keep track of when we need to unmap
262    /// memory from the shadow process.
263    shadow_mappings_for_mlock: RangeMap<UserAddress, Arc<PinnedMapping>>,
264
265    forkable_state: MemoryManagerForkableState,
266}
267
268// 64k under the 4GB
269const LOWER_4GB_LIMIT: UserAddress = UserAddress::const_from(0xffff_0000);
270
271#[derive(Default, Clone)]
272pub struct MemoryManagerForkableState {
273    /// State for the brk and sbrk syscalls.
274    brk: Option<ProgramBreak>,
275
276    /// The namespace node that represents the executable associated with this task.
277    executable_node: Option<NamespaceNode>,
278
279    pub stack_size: usize,
280    pub stack_start: UserAddress,
281    pub auxv_start: UserAddress,
282    pub auxv_end: UserAddress,
283    pub argv_start: UserAddress,
284    pub argv_end: UserAddress,
285    pub environ_start: UserAddress,
286    pub environ_end: UserAddress,
287
288    /// vDSO location
289    pub vdso_base: UserAddress,
290
291    /// Randomized regions:
292    pub mmap_top: UserAddress,
293    pub stack_origin: UserAddress,
294    pub brk_origin: UserAddress,
295
296    // Membarrier registrations
297    membarrier_registrations: MembarrierRegistrations,
298}
299
300impl Deref for MemoryManagerState {
301    type Target = MemoryManagerForkableState;
302    fn deref(&self) -> &Self::Target {
303        &self.forkable_state
304    }
305}
306
307impl DerefMut for MemoryManagerState {
308    fn deref_mut(&mut self) -> &mut Self::Target {
309        &mut self.forkable_state
310    }
311}
312
313#[derive(Debug, Default)]
314struct ReleasedMappings {
315    doomed: Vec<Mapping>,
316    doomed_pins: Vec<Arc<PinnedMapping>>,
317}
318
319impl ReleasedMappings {
320    fn extend(&mut self, mappings: impl IntoIterator<Item = Mapping>) {
321        self.doomed.extend(mappings);
322    }
323
324    fn extend_pins(&mut self, mappings: impl IntoIterator<Item = Arc<PinnedMapping>>) {
325        self.doomed_pins.extend(mappings);
326    }
327
328    fn is_empty(&self) -> bool {
329        self.doomed.is_empty() && self.doomed_pins.is_empty()
330    }
331
332    #[cfg(test)]
333    fn len(&self) -> usize {
334        self.doomed.len() + self.doomed_pins.len()
335    }
336
337    fn finalize(&mut self, mm_state: RwLockWriteGuard<'_, MemoryManagerState>) {
338        // Drop the state before the unmapped mappings, since dropping a mapping may acquire a lock
339        // in `DirEntry`'s `drop`.
340        std::mem::drop(mm_state);
341        std::mem::take(&mut self.doomed);
342        std::mem::take(&mut self.doomed_pins);
343    }
344}
345
346impl Drop for ReleasedMappings {
347    fn drop(&mut self) {
348        assert!(self.is_empty(), "ReleasedMappings::finalize() must be called before drop");
349    }
350}
351
352fn map_in_vmar(
353    vmar: &zx::Vmar,
354    vmar_info: &zx::VmarInfo,
355    addr: SelectedAddress,
356    memory: &MemoryObject,
357    memory_offset: u64,
358    length: usize,
359    flags: MappingFlags,
360    populate: bool,
361) -> Result<UserAddress, Errno> {
362    let vmar_offset = addr.addr().checked_sub(vmar_info.base).ok_or_else(|| errno!(ENOMEM))?;
363    let vmar_extra_flags = match addr {
364        SelectedAddress::Fixed(_) => zx::VmarFlags::SPECIFIC,
365        SelectedAddress::FixedOverwrite(_) => ZX_VM_SPECIFIC_OVERWRITE,
366    };
367
368    if populate {
369        let op = if flags.contains(MappingFlags::WRITE) {
370            // Requires ZX_RIGHT_WRITEABLE which we should expect when the mapping is writeable.
371            zx::VmoOp::COMMIT
372        } else {
373            // When we don't expect to have ZX_RIGHT_WRITEABLE, fall back to a VMO op that doesn't
374            // need it.
375            zx::VmoOp::PREFETCH
376        };
377        trace_duration!(CATEGORY_STARNIX_MM, "MmapCommitPages");
378        let _ = memory.op_range(op, memory_offset, length as u64);
379        // "The mmap() call doesn't fail if the mapping cannot be populated."
380    }
381
382    let vmar_maybe_map_range = if populate && !vmar_extra_flags.contains(ZX_VM_SPECIFIC_OVERWRITE) {
383        zx::VmarFlags::MAP_RANGE
384    } else {
385        zx::VmarFlags::empty()
386    };
387    let vmar_flags = flags.access_flags().to_vmar_flags()
388        | zx::VmarFlags::ALLOW_FAULTS
389        | vmar_extra_flags
390        | vmar_maybe_map_range;
391
392    let map_result = memory.map_in_vmar(vmar, vmar_offset.ptr(), memory_offset, length, vmar_flags);
393    let mapped_addr = map_result.map_err(MemoryManager::get_errno_for_map_err)?;
394
395    Ok(UserAddress::from_ptr(mapped_addr))
396}
397
398impl MemoryManagerState {
399    /// Returns occupied address ranges that intersect with the given range.
400    ///
401    /// An address range is "occupied" if (a) there is already a mapping in that range or (b) there
402    /// is a GROWSDOWN mapping <= 256 pages above that range. The 256 pages below a GROWSDOWN
403    /// mapping is the "guard region." The memory manager avoids mapping memory in the guard region
404    /// in some circumstances to preserve space for the GROWSDOWN mapping to grow down.
405    fn get_occupied_address_ranges<'a>(
406        &'a self,
407        subrange: &'a Range<UserAddress>,
408    ) -> impl Iterator<Item = Range<UserAddress>> + 'a {
409        let query_range = subrange.start
410            ..(subrange
411                .end
412                .saturating_add(*PAGE_SIZE as usize * GUARD_PAGE_COUNT_FOR_GROWSDOWN_MAPPINGS));
413        self.mappings.range(query_range).filter_map(|(range, mapping)| {
414            let occupied_range = mapping.inflate_to_include_guard_pages(range);
415            if occupied_range.start < subrange.end && subrange.start < occupied_range.end {
416                Some(occupied_range)
417            } else {
418                None
419            }
420        })
421    }
422
423    fn count_possible_placements(
424        &self,
425        length: usize,
426        subrange: &Range<UserAddress>,
427    ) -> Option<usize> {
428        let mut occupied_ranges = self.get_occupied_address_ranges(subrange);
429        let mut possible_placements = 0;
430        // If the allocation is placed at the first available address, every page that is left
431        // before the next mapping or the end of subrange is +1 potential placement.
432        let mut first_fill_end = subrange.start.checked_add(length)?;
433        while first_fill_end <= subrange.end {
434            let Some(mapping) = occupied_ranges.next() else {
435                possible_placements += (subrange.end - first_fill_end) / (*PAGE_SIZE as usize) + 1;
436                break;
437            };
438            if mapping.start >= first_fill_end {
439                possible_placements += (mapping.start - first_fill_end) / (*PAGE_SIZE as usize) + 1;
440            }
441            first_fill_end = mapping.end.checked_add(length)?;
442        }
443        Some(possible_placements)
444    }
445
446    fn pick_placement(
447        &self,
448        length: usize,
449        mut chosen_placement_idx: usize,
450        subrange: &Range<UserAddress>,
451    ) -> Option<UserAddress> {
452        let mut candidate =
453            Range { start: subrange.start, end: subrange.start.checked_add(length)? };
454        let mut occupied_ranges = self.get_occupied_address_ranges(subrange);
455        loop {
456            let Some(mapping) = occupied_ranges.next() else {
457                // No more mappings: treat the rest of the index as an offset.
458                let res =
459                    candidate.start.checked_add(chosen_placement_idx * *PAGE_SIZE as usize)?;
460                debug_assert!(res.checked_add(length)? <= subrange.end);
461                return Some(res);
462            };
463            if mapping.start < candidate.end {
464                // doesn't fit, skip
465                candidate = Range { start: mapping.end, end: mapping.end.checked_add(length)? };
466                continue;
467            }
468            let unused_space =
469                (mapping.start.ptr() - candidate.end.ptr()) / (*PAGE_SIZE as usize) + 1;
470            if unused_space > chosen_placement_idx {
471                // Chosen placement is within the range; treat the rest of the index as an offset.
472                let res =
473                    candidate.start.checked_add(chosen_placement_idx * *PAGE_SIZE as usize)?;
474                return Some(res);
475            }
476
477            // chosen address is further up, skip
478            chosen_placement_idx -= unused_space;
479            candidate = Range { start: mapping.end, end: mapping.end.checked_add(length)? };
480        }
481    }
482
483    fn find_random_unused_range(
484        &self,
485        length: usize,
486        subrange: &Range<UserAddress>,
487    ) -> Option<UserAddress> {
488        let possible_placements = self.count_possible_placements(length, subrange)?;
489        if possible_placements == 0 {
490            return None;
491        }
492        let chosen_placement_idx = rand::random_range(0..possible_placements);
493        self.pick_placement(length, chosen_placement_idx, subrange)
494    }
495
496    // Find the first unused range of addresses that fits a mapping of `length` bytes, searching
497    // from `mmap_top` downwards.
498    pub fn find_next_unused_range(&self, length: usize) -> Option<UserAddress> {
499        let gap_size = length as u64;
500        let mut upper_bound = self.mmap_top;
501
502        loop {
503            let gap_end = self.mappings.find_gap_end(gap_size, &upper_bound);
504            let candidate = gap_end.checked_sub(length)?;
505
506            // Is there a next mapping? If not, the candidate is already good.
507            let Some((occupied_range, mapping)) = self.mappings.get(gap_end) else {
508                return Some(candidate);
509            };
510            let occupied_range = mapping.inflate_to_include_guard_pages(occupied_range);
511            // If it doesn't overlap, the gap is big enough to fit.
512            if occupied_range.start >= gap_end {
513                return Some(candidate);
514            }
515            // If there was a mapping in the way, use the start of that range as the upper bound.
516            upper_bound = occupied_range.start;
517        }
518    }
519
520    // Accept the hint if the range is unused and within the range available for mapping.
521    fn is_hint_acceptable(&self, hint_addr: UserAddress, length: usize) -> bool {
522        let Some(hint_end) = hint_addr.checked_add(length) else {
523            return false;
524        };
525        if !RESTRICTED_ASPACE_RANGE.contains(&hint_addr.ptr())
526            || !RESTRICTED_ASPACE_RANGE.contains(&hint_end.ptr())
527        {
528            return false;
529        };
530        self.get_occupied_address_ranges(&(hint_addr..hint_end)).next().is_none()
531    }
532
533    fn select_address(
534        &self,
535        addr: DesiredAddress,
536        length: usize,
537        flags: MappingFlags,
538    ) -> Result<SelectedAddress, Errno> {
539        let adjusted_length = round_up_to_system_page_size(length).or_else(|_| error!(ENOMEM))?;
540
541        let find_address = || -> Result<SelectedAddress, Errno> {
542            let new_addr = if flags.contains(MappingFlags::LOWER_32BIT) {
543                // MAP_32BIT specifies that the memory allocated will
544                // be within the first 2 GB of the process address space.
545                self.find_random_unused_range(
546                    adjusted_length,
547                    &(UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
548                        ..UserAddress::from_ptr(0x80000000)),
549                )
550                .ok_or_else(|| errno!(ENOMEM))?
551            } else {
552                self.find_next_unused_range(adjusted_length).ok_or_else(|| errno!(ENOMEM))?
553            };
554
555            Ok(SelectedAddress::Fixed(new_addr))
556        };
557
558        Ok(match addr {
559            DesiredAddress::Any => find_address()?,
560            DesiredAddress::Hint(hint_addr) => {
561                // Round down to page size
562                let hint_addr =
563                    UserAddress::from_ptr(hint_addr.ptr() - hint_addr.ptr() % *PAGE_SIZE as usize);
564                if self.is_hint_acceptable(hint_addr, adjusted_length) {
565                    SelectedAddress::Fixed(hint_addr)
566                } else {
567                    find_address()?
568                }
569            }
570            DesiredAddress::Fixed(addr) => SelectedAddress::Fixed(addr),
571            DesiredAddress::FixedOverwrite(addr) => SelectedAddress::FixedOverwrite(addr),
572        })
573    }
574
575    // Map the memory without updating `self.mappings`.
576    fn map_in_user_vmar(
577        &self,
578        addr: SelectedAddress,
579        memory: &MemoryObject,
580        memory_offset: u64,
581        length: usize,
582        flags: MappingFlags,
583        populate: bool,
584    ) -> Result<UserAddress, Errno> {
585        map_in_vmar(
586            &self.user_vmar,
587            &self.user_vmar_info,
588            addr,
589            memory,
590            memory_offset,
591            length,
592            flags,
593            populate,
594        )
595    }
596
597    fn validate_addr(&self, addr: DesiredAddress, length: usize) -> Result<(), Errno> {
598        if let DesiredAddress::FixedOverwrite(addr) = addr {
599            if self.check_has_unauthorized_splits(addr, length) {
600                return error!(ENOMEM);
601            }
602        }
603        Ok(())
604    }
605
606    fn map_memory(
607        &mut self,
608        mm: &Arc<MemoryManager>,
609        addr: DesiredAddress,
610        memory: Arc<MemoryObject>,
611        memory_offset: u64,
612        length: usize,
613        flags: MappingFlags,
614        max_access: Access,
615        populate: bool,
616        name: MappingName,
617        released_mappings: &mut ReleasedMappings,
618    ) -> Result<UserAddress, Errno> {
619        self.validate_addr(addr, length)?;
620
621        let selected_address = self.select_address(addr, length, flags)?;
622        let mapped_addr = self.map_in_user_vmar(
623            selected_address,
624            &memory,
625            memory_offset,
626            length,
627            flags,
628            populate,
629        )?;
630
631        let end = (mapped_addr + length)?.round_up(*PAGE_SIZE)?;
632
633        if let DesiredAddress::FixedOverwrite(addr) = addr {
634            assert_eq!(addr, mapped_addr);
635            self.update_after_unmap(mm, addr, end - addr, released_mappings)?;
636        }
637
638        let mapping = Mapping::with_name(
639            self.create_memory_backing(mapped_addr, memory, memory_offset),
640            flags,
641            max_access,
642            name,
643        );
644        released_mappings.extend(self.mappings.insert(mapped_addr..end, mapping));
645
646        Ok(mapped_addr)
647    }
648
649    fn map_private_anonymous(
650        &mut self,
651        mm: &Arc<MemoryManager>,
652        addr: DesiredAddress,
653        length: usize,
654        prot_flags: ProtectionFlags,
655        options: MappingOptions,
656        populate: bool,
657        name: MappingName,
658        released_mappings: &mut ReleasedMappings,
659    ) -> Result<UserAddress, Errno> {
660        self.validate_addr(addr, length)?;
661
662        let flags = MappingFlags::from_access_flags_and_options(prot_flags, options);
663        let selected_addr = self.select_address(addr, length, flags)?;
664        let backing_memory_offset = selected_addr.addr().ptr();
665
666        let mapped_addr = self.map_in_user_vmar(
667            selected_addr,
668            &self.private_anonymous.backing,
669            backing_memory_offset as u64,
670            length,
671            flags,
672            populate,
673        )?;
674
675        let end = (mapped_addr + length)?.round_up(*PAGE_SIZE)?;
676        if let DesiredAddress::FixedOverwrite(addr) = addr {
677            assert_eq!(addr, mapped_addr);
678            self.update_after_unmap(mm, addr, end - addr, released_mappings)?;
679        }
680
681        let mapping = Mapping::new_private_anonymous(flags, name);
682        released_mappings.extend(self.mappings.insert(mapped_addr..end, mapping));
683
684        Ok(mapped_addr)
685    }
686
687    fn map_anonymous(
688        &mut self,
689        mm: &Arc<MemoryManager>,
690        addr: DesiredAddress,
691        length: usize,
692        prot_flags: ProtectionFlags,
693        options: MappingOptions,
694        name: MappingName,
695        released_mappings: &mut ReleasedMappings,
696    ) -> Result<UserAddress, Errno> {
697        if !options.contains(MappingOptions::SHARED) {
698            return self.map_private_anonymous(
699                mm,
700                addr,
701                length,
702                prot_flags,
703                options,
704                options.contains(MappingOptions::POPULATE),
705                name,
706                released_mappings,
707            );
708        }
709        let memory = create_anonymous_mapping_memory(length as u64)?;
710        let flags = MappingFlags::from_access_flags_and_options(prot_flags, options);
711        self.map_memory(
712            mm,
713            addr,
714            memory,
715            0,
716            length,
717            flags,
718            Access::rwx(),
719            options.contains(MappingOptions::POPULATE),
720            name,
721            released_mappings,
722        )
723    }
724
725    fn remap(
726        &mut self,
727        _current_task: &CurrentTask,
728        mm: &Arc<MemoryManager>,
729        old_addr: UserAddress,
730        old_length: usize,
731        new_length: usize,
732        flags: MremapFlags,
733        new_addr: UserAddress,
734        released_mappings: &mut ReleasedMappings,
735    ) -> Result<UserAddress, Errno> {
736        // MREMAP_FIXED moves a mapping, which requires MREMAP_MAYMOVE.
737        if flags.contains(MremapFlags::FIXED) && !flags.contains(MremapFlags::MAYMOVE) {
738            return error!(EINVAL);
739        }
740
741        // MREMAP_DONTUNMAP is always a move, so it requires MREMAP_MAYMOVE.
742        // There is no resizing allowed either.
743        if flags.contains(MremapFlags::DONTUNMAP)
744            && (!flags.contains(MremapFlags::MAYMOVE) || old_length != new_length)
745        {
746            return error!(EINVAL);
747        }
748
749        // In-place copies are invalid.
750        if !flags.contains(MremapFlags::MAYMOVE) && old_length == 0 {
751            return error!(ENOMEM);
752        }
753
754        if new_length == 0 {
755            return error!(EINVAL);
756        }
757
758        // Make sure old_addr is page-aligned.
759        if !old_addr.is_aligned(*PAGE_SIZE) {
760            return error!(EINVAL);
761        }
762
763        let old_length = round_up_to_system_page_size(old_length)?;
764        let new_length = round_up_to_system_page_size(new_length)?;
765
766        if self.check_has_unauthorized_splits(old_addr, old_length) {
767            return error!(EINVAL);
768        }
769
770        if self.check_has_unauthorized_splits(new_addr, new_length) {
771            return error!(EINVAL);
772        }
773
774        if !flags.contains(MremapFlags::DONTUNMAP)
775            && !flags.contains(MremapFlags::FIXED)
776            && old_length != 0
777        {
778            // We are not requested to remap to a specific address, so first we see if we can remap
779            // in-place. In-place copies (old_length == 0) are not allowed.
780            if let Some(new_addr) =
781                self.try_remap_in_place(mm, old_addr, old_length, new_length, released_mappings)?
782            {
783                return Ok(new_addr);
784            }
785        }
786
787        // There is no space to grow in place, or there is an explicit request to move.
788        if flags.contains(MremapFlags::MAYMOVE) {
789            let dst_address =
790                if flags.contains(MremapFlags::FIXED) { Some(new_addr) } else { None };
791            self.remap_move(
792                mm,
793                old_addr,
794                old_length,
795                dst_address,
796                new_length,
797                flags.contains(MremapFlags::DONTUNMAP),
798                released_mappings,
799            )
800        } else {
801            error!(ENOMEM)
802        }
803    }
804
805    /// Attempts to grow or shrink the mapping in-place. Returns `Ok(Some(addr))` if the remap was
806    /// successful. Returns `Ok(None)` if there was no space to grow.
807    fn try_remap_in_place(
808        &mut self,
809        mm: &Arc<MemoryManager>,
810        old_addr: UserAddress,
811        old_length: usize,
812        new_length: usize,
813        released_mappings: &mut ReleasedMappings,
814    ) -> Result<Option<UserAddress>, Errno> {
815        let old_range = old_addr..old_addr.checked_add(old_length).ok_or_else(|| errno!(EINVAL))?;
816        let new_range_in_place =
817            old_addr..old_addr.checked_add(new_length).ok_or_else(|| errno!(EINVAL))?;
818
819        if new_length <= old_length {
820            // Shrink the mapping in-place, which should always succeed.
821            // This is done by unmapping the extraneous region.
822            if new_length != old_length {
823                self.unmap(mm, new_range_in_place.end, old_length - new_length, released_mappings)?;
824            }
825            return Ok(Some(old_addr));
826        }
827
828        if self.mappings.range(old_range.end..new_range_in_place.end).next().is_some() {
829            // There is some mapping in the growth range prevening an in-place growth.
830            return Ok(None);
831        }
832
833        // There is space to grow in-place. The old range must be one contiguous mapping.
834        let (original_range, mapping) =
835            self.mappings.get(old_addr).ok_or_else(|| errno!(EINVAL))?;
836
837        if old_range.end > original_range.end {
838            return error!(EFAULT);
839        }
840        let original_range = original_range.clone();
841        let original_mapping = mapping.clone();
842
843        // Compute the new length of the entire mapping once it has grown.
844        let final_length = (original_range.end - original_range.start) + (new_length - old_length);
845
846        match self.get_mapping_backing(&original_mapping) {
847            MappingBacking::Memory(backing) => {
848                // Re-map the original range, which may include pages before the requested range.
849                Ok(Some(self.map_memory(
850                    mm,
851                    DesiredAddress::FixedOverwrite(original_range.start),
852                    backing.memory().clone(),
853                    backing.address_to_offset(original_range.start),
854                    final_length,
855                    original_mapping.flags(),
856                    original_mapping.max_access(),
857                    false,
858                    original_mapping.name().to_owned(),
859                    released_mappings,
860                )?))
861            }
862            MappingBacking::PrivateAnonymous => {
863                let growth_start = original_range.end;
864                let growth_length = new_length - old_length;
865                let final_end = (original_range.start + final_length)?;
866                // Map new pages to back the growth.
867                self.map_in_user_vmar(
868                    SelectedAddress::FixedOverwrite(growth_start),
869                    &self.private_anonymous.backing,
870                    growth_start.ptr() as u64,
871                    growth_length,
872                    original_mapping.flags(),
873                    false,
874                )?;
875                // Overwrite the mapping entry with the new larger size.
876                released_mappings.extend(
877                    self.mappings.insert(original_range.start..final_end, original_mapping.clone()),
878                );
879                Ok(Some(original_range.start))
880            }
881        }
882    }
883
884    /// Grows or shrinks the mapping while moving it to a new destination.
885    fn remap_move(
886        &mut self,
887        mm: &Arc<MemoryManager>,
888        src_addr: UserAddress,
889        src_length: usize,
890        dst_addr: Option<UserAddress>,
891        dst_length: usize,
892        keep_source: bool,
893        released_mappings: &mut ReleasedMappings,
894    ) -> Result<UserAddress, Errno> {
895        let src_range = src_addr..src_addr.checked_add(src_length).ok_or_else(|| errno!(EINVAL))?;
896        let (original_range, src_mapping) =
897            self.mappings.get(src_addr).ok_or_else(|| errno!(EINVAL))?;
898        let original_range = original_range.clone();
899        let src_mapping = src_mapping.clone();
900
901        if src_length == 0 && !src_mapping.flags().contains(MappingFlags::SHARED) {
902            // src_length == 0 means that the mapping is to be copied. This behavior is only valid
903            // with MAP_SHARED mappings.
904            return error!(EINVAL);
905        }
906
907        // If the destination range is smaller than the source range, we must first shrink
908        // the source range in place. This must be done now and visible to processes, even if
909        // a later failure causes the remap operation to fail.
910        if src_length != 0 && src_length > dst_length {
911            self.unmap(mm, (src_addr + dst_length)?, src_length - dst_length, released_mappings)?;
912        }
913
914        let dst_addr_for_map = match dst_addr {
915            None => DesiredAddress::Any,
916            Some(dst_addr) => {
917                // The mapping is being moved to a specific address.
918                let dst_range =
919                    dst_addr..(dst_addr.checked_add(dst_length).ok_or_else(|| errno!(EINVAL))?);
920                if !src_range.intersect(&dst_range).is_empty() {
921                    return error!(EINVAL);
922                }
923
924                // The destination range must be unmapped. This must be done now and visible to
925                // processes, even if a later failure causes the remap operation to fail.
926                self.unmap(mm, dst_addr, dst_length, released_mappings)?;
927
928                DesiredAddress::Fixed(dst_addr)
929            }
930        };
931
932        // According to gVisor's aio_test, Linux checks for DONT_EXPAND after unmapping the dst
933        // range.
934        if dst_length > src_length && src_mapping.flags().contains(MappingFlags::DONT_EXPAND) {
935            return error!(EFAULT);
936        }
937
938        if src_range.end > original_range.end {
939            // The source range is not one contiguous mapping. This check must be done only after
940            // the source range is shrunk and the destination unmapped.
941            return error!(EFAULT);
942        }
943
944        match self.get_mapping_backing(&src_mapping) {
945            MappingBacking::PrivateAnonymous => {
946                let dst_addr =
947                    self.select_address(dst_addr_for_map, dst_length, src_mapping.flags())?.addr();
948                let dst_end = (dst_addr + dst_length)?;
949
950                let length_to_move = std::cmp::min(dst_length, src_length) as u64;
951                let growth_start_addr = (dst_addr + length_to_move)?;
952
953                if dst_addr != src_addr {
954                    let src_move_end = (src_range.start + length_to_move)?;
955                    let range_to_move = src_range.start..src_move_end;
956                    // Move the previously mapped pages into their new location.
957                    self.private_anonymous.move_pages(&range_to_move, dst_addr)?;
958                }
959
960                // Userfault registration is not preserved by remap
961                let new_flags =
962                    src_mapping.flags().difference(MappingFlags::UFFD | MappingFlags::UFFD_MISSING);
963                self.map_in_user_vmar(
964                    SelectedAddress::FixedOverwrite(dst_addr),
965                    &self.private_anonymous.backing,
966                    dst_addr.ptr() as u64,
967                    dst_length,
968                    new_flags,
969                    false,
970                )?;
971
972                if dst_length > src_length {
973                    // The mapping has grown, map new pages in to cover the growth.
974                    let growth_length = dst_length - src_length;
975
976                    self.map_private_anonymous(
977                        mm,
978                        DesiredAddress::FixedOverwrite(growth_start_addr),
979                        growth_length,
980                        new_flags.access_flags(),
981                        new_flags.options(),
982                        false,
983                        src_mapping.name().to_owned(),
984                        released_mappings,
985                    )?;
986                }
987
988                released_mappings.extend(self.mappings.insert(
989                    dst_addr..dst_end,
990                    Mapping::new_private_anonymous(new_flags, src_mapping.name().to_owned()),
991                ));
992
993                if dst_addr != src_addr && src_length != 0 && !keep_source {
994                    self.unmap(mm, src_addr, src_length, released_mappings)?;
995                }
996
997                return Ok(dst_addr);
998            }
999            MappingBacking::Memory(backing) => {
1000                // This mapping is backed by an FD or is a shared anonymous mapping. Just map the
1001                // range of the memory object covering the moved pages. If the memory object already
1002                // had COW semantics, this preserves them.
1003                let (dst_memory_offset, memory) =
1004                    (backing.address_to_offset(src_addr), backing.memory().clone());
1005
1006                let new_address = self.map_memory(
1007                    mm,
1008                    dst_addr_for_map,
1009                    memory,
1010                    dst_memory_offset,
1011                    dst_length,
1012                    src_mapping.flags(),
1013                    src_mapping.max_access(),
1014                    false,
1015                    src_mapping.name().to_owned(),
1016                    released_mappings,
1017                )?;
1018
1019                if src_length != 0 && !keep_source {
1020                    // Only unmap the source range if this is not a copy and if there was not a specific
1021                    // request to not unmap. It was checked earlier that in case of src_length == 0
1022                    // this mapping is MAP_SHARED.
1023                    self.unmap(mm, src_addr, src_length, released_mappings)?;
1024                }
1025
1026                return Ok(new_address);
1027            }
1028        };
1029    }
1030
1031    // Checks if an operation may be performed over the target mapping that may
1032    // result in a split mapping.
1033    //
1034    // An operation may be forbidden if the target mapping only partially covers
1035    // an existing mapping with the `MappingOptions::DONT_SPLIT` flag set.
1036    fn check_has_unauthorized_splits(&self, addr: UserAddress, length: usize) -> bool {
1037        let query_range = addr..addr.saturating_add(length);
1038        let mut intersection = self.mappings.range(query_range.clone());
1039
1040        // A mapping is not OK if it disallows splitting and the target range
1041        // does not fully cover the mapping range.
1042        let check_if_mapping_has_unauthorized_split =
1043            |mapping: Option<(&Range<UserAddress>, &Mapping)>| {
1044                mapping.is_some_and(|(mapping_range, mapping)| {
1045                    mapping.flags().contains(MappingFlags::DONT_SPLIT)
1046                        && (mapping_range.start < query_range.start
1047                            || query_range.end < mapping_range.end)
1048                })
1049            };
1050
1051        // We only check the first and last mappings in the range because naturally,
1052        // the mappings in the middle are fully covered by the target mapping and
1053        // won't be split.
1054        check_if_mapping_has_unauthorized_split(intersection.next())
1055            || check_if_mapping_has_unauthorized_split(intersection.next_back())
1056    }
1057
1058    /// Unmaps the specified range. Unmapped mappings are placed in `released_mappings`.
1059    fn unmap(
1060        &mut self,
1061        mm: &Arc<MemoryManager>,
1062        addr: UserAddress,
1063        length: usize,
1064        released_mappings: &mut ReleasedMappings,
1065    ) -> Result<(), Errno> {
1066        if !addr.is_aligned(*PAGE_SIZE) {
1067            return error!(EINVAL);
1068        }
1069        let length = round_up_to_system_page_size(length)?;
1070        if length == 0 {
1071            return error!(EINVAL);
1072        }
1073
1074        if self.check_has_unauthorized_splits(addr, length) {
1075            return error!(EINVAL);
1076        }
1077
1078        // Unmap the range, including the the tail of any range that would have been split. This
1079        // operation is safe because we're operating on another process.
1080        #[allow(
1081            clippy::undocumented_unsafe_blocks,
1082            reason = "Force documented unsafe blocks in Starnix"
1083        )]
1084        match unsafe { self.user_vmar.unmap(addr.ptr(), length) } {
1085            Ok(_) => (),
1086            Err(zx::Status::NOT_FOUND) => (),
1087            Err(zx::Status::INVALID_ARGS) => return error!(EINVAL),
1088            Err(status) => {
1089                impossible_error(status);
1090            }
1091        };
1092
1093        self.update_after_unmap(mm, addr, length, released_mappings)?;
1094
1095        Ok(())
1096    }
1097
1098    // Updates `self.mappings` after the specified range was unmaped.
1099    //
1100    // The range to unmap can span multiple mappings, and can split mappings if
1101    // the range start or end falls in the middle of a mapping.
1102    //
1103    // Private anonymous memory is contained in the same memory object; The pages of that object
1104    // that are no longer reachable should be released.
1105    //
1106    // File-backed mappings don't need to have their memory object modified.
1107    //
1108    // Unmapped mappings are placed in `released_mappings`.
1109    fn update_after_unmap(
1110        &mut self,
1111        mm: &Arc<MemoryManager>,
1112        addr: UserAddress,
1113        length: usize,
1114        released_mappings: &mut ReleasedMappings,
1115    ) -> Result<(), Errno> {
1116        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
1117        let unmap_range = addr..end_addr;
1118
1119        // Remove any shadow mappings for mlock()'d pages that are now unmapped.
1120        released_mappings.extend_pins(self.shadow_mappings_for_mlock.remove(unmap_range.clone()));
1121
1122        for (range, mapping) in self.mappings.range(unmap_range.clone()) {
1123            // Deallocate any pages in the private, anonymous backing that are now unreachable.
1124            if let MappingBacking::PrivateAnonymous = self.get_mapping_backing(mapping) {
1125                let unmapped_range = &unmap_range.intersect(range);
1126
1127                mm.inflight_vmspliced_payloads
1128                    .handle_unmapping(&self.private_anonymous.backing, unmapped_range)?;
1129
1130                self.private_anonymous
1131                    .zero(unmapped_range.start, unmapped_range.end - unmapped_range.start)?;
1132            }
1133        }
1134        released_mappings.extend(self.mappings.remove(unmap_range));
1135        return Ok(());
1136    }
1137
1138    fn protect_vmar_range(
1139        &self,
1140        addr: UserAddress,
1141        length: usize,
1142        prot_flags: ProtectionFlags,
1143    ) -> Result<(), Errno> {
1144        let vmar_flags = prot_flags.to_vmar_flags();
1145        // SAFETY: Modifying user vmar
1146        unsafe { self.user_vmar.protect(addr.ptr(), length, vmar_flags) }.map_err(|s| match s {
1147            zx::Status::INVALID_ARGS => errno!(EINVAL),
1148            zx::Status::NOT_FOUND => errno!(ENOMEM),
1149            zx::Status::ACCESS_DENIED => errno!(EACCES),
1150            _ => impossible_error(s),
1151        })
1152    }
1153
1154    fn protect(
1155        &mut self,
1156        current_task: &CurrentTask,
1157        addr: UserAddress,
1158        length: usize,
1159        prot_flags: ProtectionFlags,
1160        released_mappings: &mut ReleasedMappings,
1161    ) -> Result<(), Errno> {
1162        let vmar_flags = prot_flags.to_vmar_flags();
1163        let page_size = *PAGE_SIZE;
1164        let end = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?.round_up(page_size)?;
1165
1166        if self.check_has_unauthorized_splits(addr, length) {
1167            return error!(EINVAL);
1168        }
1169
1170        let prot_range = if prot_flags.contains(ProtectionFlags::GROWSDOWN) {
1171            let mut start = addr;
1172            let Some((range, mapping)) = self.mappings.get(start) else {
1173                return error!(EINVAL);
1174            };
1175            // Ensure that the mapping has GROWSDOWN if PROT_GROWSDOWN was specified.
1176            if !mapping.flags().contains(MappingFlags::GROWSDOWN) {
1177                return error!(EINVAL);
1178            }
1179            let access_flags = mapping.flags().access_flags();
1180            // From <https://man7.org/linux/man-pages/man2/mprotect.2.html>:
1181            //
1182            //   PROT_GROWSDOWN
1183            //     Apply the protection mode down to the beginning of a
1184            //     mapping that grows downward (which should be a stack
1185            //     segment or a segment mapped with the MAP_GROWSDOWN flag
1186            //     set).
1187            start = range.start;
1188            while let Some((range, mapping)) =
1189                self.mappings.get(start.saturating_sub(page_size as usize))
1190            {
1191                if !mapping.flags().contains(MappingFlags::GROWSDOWN)
1192                    || mapping.flags().access_flags() != access_flags
1193                {
1194                    break;
1195                }
1196                start = range.start;
1197            }
1198            start..end
1199        } else {
1200            addr..end
1201        };
1202
1203        let addr = prot_range.start;
1204        let length = prot_range.end - prot_range.start;
1205
1206        // TODO: We should check the max_access flags on all the mappings in this range.
1207        //       There are cases where max_access is more restrictive than the Zircon rights
1208        //       we hold on the underlying VMOs.
1209
1210        // TODO(https://fxbug.dev/411617451): `mprotect` should apply the protection flags
1211        // until it encounters a mapping that doesn't allow it, rather than not apply the protection
1212        // flags at all if a single mapping doesn't allow it.
1213        for (range, mapping) in self.mappings.range(prot_range.clone()) {
1214            security::file_mprotect(current_task, range, mapping, prot_flags)?;
1215        }
1216
1217        // Make one call to mprotect to update all the zircon protections.
1218        // SAFETY: This is safe because the vmar belongs to a different process.
1219        unsafe { self.user_vmar.protect(addr.ptr(), length, vmar_flags) }.map_err(|s| match s {
1220            zx::Status::INVALID_ARGS => errno!(EINVAL),
1221            zx::Status::NOT_FOUND => {
1222                track_stub!(
1223                    TODO("https://fxbug.dev/322875024"),
1224                    "mprotect: succeed and update prot after NOT_FOUND"
1225                );
1226                errno!(EINVAL)
1227            }
1228            zx::Status::ACCESS_DENIED => errno!(EACCES),
1229            _ => impossible_error(s),
1230        })?;
1231
1232        // Update the flags on each mapping in the range.
1233        let mut updates = vec![];
1234        for (range, mapping) in self.mappings.range(prot_range.clone()) {
1235            if mapping.flags().contains(MappingFlags::UFFD) {
1236                track_stub!(
1237                    TODO("https://fxbug.dev/297375964"),
1238                    "mprotect on uffd-registered range should not alter protections"
1239                );
1240                return error!(EINVAL);
1241            }
1242            let range = range.intersect(&prot_range);
1243            let mut mapping = mapping.clone();
1244            mapping.set_flags(mapping.flags().with_access_flags(prot_flags));
1245            updates.push((range, mapping));
1246        }
1247        // Use a separate loop to avoid mutating the mappings structure while iterating over it.
1248        for (range, mapping) in updates {
1249            released_mappings.extend(self.mappings.insert(range, mapping));
1250        }
1251        Ok(())
1252    }
1253
1254    fn madvise(
1255        &mut self,
1256        _current_task: &CurrentTask,
1257        addr: UserAddress,
1258        length: usize,
1259        advice: u32,
1260        released_mappings: &mut ReleasedMappings,
1261    ) -> Result<(), Errno> {
1262        if !addr.is_aligned(*PAGE_SIZE) {
1263            return error!(EINVAL);
1264        }
1265
1266        let end_addr =
1267            addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?.round_up(*PAGE_SIZE)?;
1268        if end_addr > self.max_address() {
1269            return error!(EFAULT);
1270        }
1271
1272        if advice == MADV_NORMAL {
1273            track_stub!(TODO("https://fxbug.dev/322874202"), "madvise undo hints for MADV_NORMAL");
1274            return Ok(());
1275        }
1276
1277        let mut updates = vec![];
1278        let range_for_op = addr..end_addr;
1279        for (range, mapping) in self.mappings.range(range_for_op.clone()) {
1280            let range_to_zero = range.intersect(&range_for_op);
1281            if range_to_zero.is_empty() {
1282                continue;
1283            }
1284            let start_offset = mapping.address_to_offset(range_to_zero.start);
1285            let end_offset = mapping.address_to_offset(range_to_zero.end);
1286            if advice == MADV_DONTFORK
1287                || advice == MADV_DOFORK
1288                || advice == MADV_WIPEONFORK
1289                || advice == MADV_KEEPONFORK
1290                || advice == MADV_DONTDUMP
1291                || advice == MADV_DODUMP
1292                || advice == MADV_MERGEABLE
1293                || advice == MADV_UNMERGEABLE
1294            {
1295                // WIPEONFORK is only supported on private anonymous mappings per madvise(2).
1296                // KEEPONFORK can be specified on ranges that cover other sorts of mappings. It should
1297                // have no effect on mappings that are not private and anonymous as such mappings cannot
1298                // have the WIPEONFORK option set.
1299                if advice == MADV_WIPEONFORK && !mapping.private_anonymous() {
1300                    return error!(EINVAL);
1301                }
1302                let new_flags = match advice {
1303                    MADV_DONTFORK => mapping.flags() | MappingFlags::DONTFORK,
1304                    MADV_DOFORK => mapping.flags() & MappingFlags::DONTFORK.complement(),
1305                    MADV_WIPEONFORK => mapping.flags() | MappingFlags::WIPEONFORK,
1306                    MADV_KEEPONFORK => mapping.flags() & MappingFlags::WIPEONFORK.complement(),
1307                    MADV_DONTDUMP => {
1308                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_DONTDUMP");
1309                        mapping.flags()
1310                    }
1311                    MADV_DODUMP => {
1312                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_DODUMP");
1313                        mapping.flags()
1314                    }
1315                    MADV_MERGEABLE => {
1316                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_MERGEABLE");
1317                        mapping.flags()
1318                    }
1319                    MADV_UNMERGEABLE => {
1320                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_UNMERGEABLE");
1321                        mapping.flags()
1322                    }
1323                    // Only the variants in this match should be reachable given the condition for
1324                    // the containing branch.
1325                    unknown_advice => unreachable!("unknown advice {unknown_advice}"),
1326                };
1327                let mut new_mapping = mapping.clone();
1328                new_mapping.set_flags(new_flags);
1329                updates.push((range_to_zero, new_mapping));
1330            } else {
1331                if mapping.flags().contains(MappingFlags::SHARED) {
1332                    continue;
1333                }
1334                let op = match advice {
1335                    MADV_DONTNEED if !mapping.flags().contains(MappingFlags::ANONYMOUS) => {
1336                        // Note, we cannot simply implemented MADV_DONTNEED with
1337                        // zx::VmoOp::DONT_NEED because they have different
1338                        // semantics.
1339                        track_stub!(
1340                            TODO("https://fxbug.dev/322874496"),
1341                            "MADV_DONTNEED with file-backed mapping"
1342                        );
1343                        return error!(EINVAL);
1344                    }
1345                    MADV_DONTNEED if mapping.flags().contains(MappingFlags::LOCKED) => {
1346                        return error!(EINVAL);
1347                    }
1348                    MADV_DONTNEED => zx::VmoOp::ZERO,
1349                    MADV_DONTNEED_LOCKED => {
1350                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_DONTNEED_LOCKED");
1351                        return error!(EINVAL);
1352                    }
1353                    MADV_WILLNEED => {
1354                        if mapping.flags().contains(MappingFlags::WRITE) {
1355                            zx::VmoOp::COMMIT
1356                        } else {
1357                            zx::VmoOp::PREFETCH
1358                        }
1359                    }
1360                    MADV_COLD => {
1361                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_COLD");
1362                        return error!(EINVAL);
1363                    }
1364                    MADV_PAGEOUT => {
1365                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_PAGEOUT");
1366                        return error!(EINVAL);
1367                    }
1368                    MADV_POPULATE_READ => {
1369                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_POPULATE_READ");
1370                        return error!(EINVAL);
1371                    }
1372                    MADV_RANDOM => {
1373                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_RANDOM");
1374                        return error!(EINVAL);
1375                    }
1376                    MADV_SEQUENTIAL => {
1377                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_SEQUENTIAL");
1378                        return error!(EINVAL);
1379                    }
1380                    MADV_FREE if !mapping.flags().contains(MappingFlags::ANONYMOUS) => {
1381                        track_stub!(
1382                            TODO("https://fxbug.dev/411748419"),
1383                            "MADV_FREE with file-backed mapping"
1384                        );
1385                        return error!(EINVAL);
1386                    }
1387                    MADV_FREE if mapping.flags().contains(MappingFlags::LOCKED) => {
1388                        return error!(EINVAL);
1389                    }
1390                    MADV_FREE => {
1391                        track_stub!(TODO("https://fxbug.dev/411748419"), "MADV_FREE");
1392                        // TODO(https://fxbug.dev/411748419) For now, treat MADV_FREE like
1393                        // MADV_DONTNEED as a stopgap until we have proper support.
1394                        zx::VmoOp::ZERO
1395                    }
1396                    MADV_REMOVE => {
1397                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_REMOVE");
1398                        return error!(EINVAL);
1399                    }
1400                    MADV_HWPOISON => {
1401                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_HWPOISON");
1402                        return error!(EINVAL);
1403                    }
1404                    MADV_SOFT_OFFLINE => {
1405                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_SOFT_OFFLINE");
1406                        return error!(EINVAL);
1407                    }
1408                    MADV_HUGEPAGE => {
1409                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_HUGEPAGE");
1410                        return error!(EINVAL);
1411                    }
1412                    MADV_COLLAPSE => {
1413                        track_stub!(TODO("https://fxbug.dev/322874202"), "MADV_COLLAPSE");
1414                        return error!(EINVAL);
1415                    }
1416                    MADV_NOHUGEPAGE => return Ok(()),
1417                    advice => {
1418                        track_stub!(TODO("https://fxbug.dev/322874202"), "madvise", advice);
1419                        return error!(EINVAL);
1420                    }
1421                };
1422
1423                let memory = match self.get_mapping_backing(mapping) {
1424                    MappingBacking::Memory(backing) => backing.memory(),
1425                    MappingBacking::PrivateAnonymous => &self.private_anonymous.backing,
1426                };
1427                memory.op_range(op, start_offset, end_offset - start_offset).map_err(
1428                    |s| match s {
1429                        zx::Status::OUT_OF_RANGE => errno!(EINVAL),
1430                        zx::Status::NO_MEMORY => errno!(ENOMEM),
1431                        zx::Status::INVALID_ARGS => errno!(EINVAL),
1432                        zx::Status::ACCESS_DENIED => errno!(EACCES),
1433                        _ => impossible_error(s),
1434                    },
1435                )?;
1436            }
1437        }
1438        // Use a separate loop to avoid mutating the mappings structure while iterating over it.
1439        for (range, mapping) in updates {
1440            released_mappings.extend(self.mappings.insert(range, mapping));
1441        }
1442        Ok(())
1443    }
1444
1445    fn mlock<L>(
1446        &mut self,
1447        current_task: &CurrentTask,
1448        locked: &mut Locked<L>,
1449        desired_addr: UserAddress,
1450        desired_length: usize,
1451        on_fault: bool,
1452        released_mappings: &mut ReleasedMappings,
1453    ) -> Result<(), Errno>
1454    where
1455        L: LockBefore<ThreadGroupLimits>,
1456    {
1457        let desired_end_addr =
1458            desired_addr.checked_add(desired_length).ok_or_else(|| errno!(EINVAL))?;
1459        let start_addr = round_down_to_system_page_size(desired_addr)?;
1460        let end_addr = round_up_to_system_page_size(desired_end_addr)?;
1461
1462        let mut updates = vec![];
1463        let mut bytes_mapped_in_range = 0;
1464        let mut num_new_locked_bytes = 0;
1465        let mut failed_to_lock = false;
1466        for (range, mapping) in self.mappings.range(start_addr..end_addr) {
1467            let mut range = range.clone();
1468            let mut mapping = mapping.clone();
1469
1470            // Handle mappings that start before the region to be locked.
1471            range.start = std::cmp::max(range.start, start_addr);
1472            // Handle mappings that extend past the region to be locked.
1473            range.end = std::cmp::min(range.end, end_addr);
1474
1475            bytes_mapped_in_range += (range.end - range.start) as u64;
1476
1477            // PROT_NONE mappings generate ENOMEM but are left locked.
1478            if !mapping
1479                .flags()
1480                .intersects(MappingFlags::READ | MappingFlags::WRITE | MappingFlags::EXEC)
1481            {
1482                failed_to_lock = true;
1483            }
1484
1485            if !mapping.flags().contains(MappingFlags::LOCKED) {
1486                num_new_locked_bytes += (range.end - range.start) as u64;
1487                let shadow_mapping = match current_task.kernel().features.mlock_pin_flavor {
1488                    // Pin the memory by mapping the backing memory into the high priority vmar.
1489                    MlockPinFlavor::ShadowProcess => {
1490                        let shadow_process =
1491                            current_task.kernel().expando.get_or_try_init(|| {
1492                                memory_pinning::ShadowProcess::new(zx::Name::new_lossy(
1493                                    "starnix_mlock_pins",
1494                                ))
1495                                .map(MlockShadowProcess)
1496                                .map_err(|_| errno!(EPERM))
1497                            })?;
1498
1499                        let (vmo, offset) = match self.get_mapping_backing(&mapping) {
1500                            MappingBacking::Memory(m) => (
1501                                m.memory().as_vmo().ok_or_else(|| errno!(ENOMEM))?,
1502                                m.address_to_offset(range.start),
1503                            ),
1504                            MappingBacking::PrivateAnonymous => (
1505                                self.private_anonymous
1506                                    .backing
1507                                    .as_vmo()
1508                                    .ok_or_else(|| errno!(ENOMEM))?,
1509                                range.start.ptr() as u64,
1510                            ),
1511                        };
1512                        Some(shadow_process.0.pin_pages(vmo, offset, range.end - range.start)?)
1513                    }
1514
1515                    // Relying on VMAR-level operations means just flags are set per-mapping.
1516                    MlockPinFlavor::Noop | MlockPinFlavor::VmarAlwaysNeed => None,
1517                };
1518                mapping.set_mlock();
1519                updates.push((range, mapping, shadow_mapping));
1520            }
1521        }
1522
1523        if bytes_mapped_in_range as usize != end_addr - start_addr {
1524            return error!(ENOMEM);
1525        }
1526
1527        let memlock_rlimit = current_task.thread_group().get_rlimit(locked, Resource::MEMLOCK);
1528        if self.total_locked_bytes() + num_new_locked_bytes > memlock_rlimit {
1529            if crate::security::check_task_capable(current_task, CAP_IPC_LOCK).is_err() {
1530                let code = if memlock_rlimit > 0 { errno!(ENOMEM) } else { errno!(EPERM) };
1531                return Err(code);
1532            }
1533        }
1534
1535        let op_range_status_to_errno = |e| match e {
1536            zx::Status::BAD_STATE | zx::Status::NOT_SUPPORTED => errno!(ENOMEM),
1537            zx::Status::INVALID_ARGS | zx::Status::OUT_OF_RANGE => errno!(EINVAL),
1538            zx::Status::ACCESS_DENIED => {
1539                unreachable!("user vmar should always have needed rights")
1540            }
1541            zx::Status::BAD_HANDLE => {
1542                unreachable!("user vmar should always be a valid handle")
1543            }
1544            zx::Status::WRONG_TYPE => unreachable!("user vmar handle should be a vmar"),
1545            _ => unreachable!("unknown error from op_range on user vmar for mlock: {e}"),
1546        };
1547
1548        if !on_fault && !current_task.kernel().features.mlock_always_onfault {
1549            self.user_vmar
1550                .op_range(zx::VmarOp::PREFETCH, start_addr.ptr(), end_addr - start_addr)
1551                .map_err(op_range_status_to_errno)?;
1552        }
1553
1554        match current_task.kernel().features.mlock_pin_flavor {
1555            MlockPinFlavor::VmarAlwaysNeed => {
1556                self.user_vmar
1557                    .op_range(zx::VmarOp::ALWAYS_NEED, start_addr.ptr(), end_addr - start_addr)
1558                    .map_err(op_range_status_to_errno)?;
1559            }
1560            // The shadow process doesn't use any vmar-level operations to pin memory.
1561            MlockPinFlavor::Noop | MlockPinFlavor::ShadowProcess => (),
1562        }
1563
1564        for (range, mapping, shadow_mapping) in updates {
1565            if let Some(shadow_mapping) = shadow_mapping {
1566                released_mappings.extend_pins(
1567                    self.shadow_mappings_for_mlock.insert(range.clone(), shadow_mapping),
1568                );
1569            }
1570            released_mappings.extend(self.mappings.insert(range, mapping));
1571        }
1572
1573        if failed_to_lock { error!(ENOMEM) } else { Ok(()) }
1574    }
1575
1576    fn munlock(
1577        &mut self,
1578        _current_task: &CurrentTask,
1579        desired_addr: UserAddress,
1580        desired_length: usize,
1581        released_mappings: &mut ReleasedMappings,
1582    ) -> Result<(), Errno> {
1583        let desired_end_addr =
1584            desired_addr.checked_add(desired_length).ok_or_else(|| errno!(EINVAL))?;
1585        let start_addr = round_down_to_system_page_size(desired_addr)?;
1586        let end_addr = round_up_to_system_page_size(desired_end_addr)?;
1587
1588        let mut updates = vec![];
1589        let mut bytes_mapped_in_range = 0;
1590        for (range, mapping) in self.mappings.range(start_addr..end_addr) {
1591            let mut range = range.clone();
1592            let mut mapping = mapping.clone();
1593
1594            // Handle mappings that start before the region to be locked.
1595            range.start = std::cmp::max(range.start, start_addr);
1596            // Handle mappings that extend past the region to be locked.
1597            range.end = std::cmp::min(range.end, end_addr);
1598
1599            bytes_mapped_in_range += (range.end - range.start) as u64;
1600
1601            if mapping.flags().contains(MappingFlags::LOCKED) {
1602                // This clears the locking for the shadow process pin flavor. It's not currently
1603                // possible to actually unlock pages that were locked with the
1604                // ZX_VMAR_OP_ALWAYS_NEED pin flavor.
1605                mapping.clear_mlock();
1606                updates.push((range, mapping));
1607            }
1608        }
1609
1610        if bytes_mapped_in_range as usize != end_addr - start_addr {
1611            return error!(ENOMEM);
1612        }
1613
1614        for (range, mapping) in updates {
1615            released_mappings.extend(self.mappings.insert(range.clone(), mapping));
1616            released_mappings.extend_pins(self.shadow_mappings_for_mlock.remove(range));
1617        }
1618
1619        Ok(())
1620    }
1621
1622    pub fn total_locked_bytes(&self) -> u64 {
1623        self.num_locked_bytes(
1624            UserAddress::from(self.user_vmar_info.base as u64)
1625                ..UserAddress::from((self.user_vmar_info.base + self.user_vmar_info.len) as u64),
1626        )
1627    }
1628
1629    pub fn num_locked_bytes(&self, range: impl RangeBounds<UserAddress>) -> u64 {
1630        self.mappings
1631            .range(range)
1632            .filter(|(_, mapping)| mapping.flags().contains(MappingFlags::LOCKED))
1633            .map(|(range, _)| (range.end - range.start) as u64)
1634            .sum()
1635    }
1636
1637    fn max_address(&self) -> UserAddress {
1638        UserAddress::from_ptr(self.user_vmar_info.base + self.user_vmar_info.len)
1639    }
1640
1641    fn get_mappings_for_vmsplice(
1642        &self,
1643        mm: &Arc<MemoryManager>,
1644        buffers: &UserBuffers,
1645    ) -> Result<Vec<Arc<VmsplicePayload>>, Errno> {
1646        let mut vmsplice_mappings = Vec::new();
1647
1648        for UserBuffer { mut address, length } in buffers.iter().copied() {
1649            let mappings = self.get_contiguous_mappings_at(address, length)?;
1650            for (mapping, length) in mappings {
1651                let vmsplice_payload = match self.get_mapping_backing(mapping) {
1652                    MappingBacking::Memory(m) => VmsplicePayloadSegment {
1653                        addr_offset: address,
1654                        length,
1655                        memory: m.memory().clone(),
1656                        memory_offset: m.address_to_offset(address),
1657                    },
1658                    MappingBacking::PrivateAnonymous => VmsplicePayloadSegment {
1659                        addr_offset: address,
1660                        length,
1661                        memory: self.private_anonymous.backing.clone(),
1662                        memory_offset: address.ptr() as u64,
1663                    },
1664                };
1665                vmsplice_mappings.push(VmsplicePayload::new(Arc::downgrade(mm), vmsplice_payload));
1666
1667                address = (address + length)?;
1668            }
1669        }
1670
1671        Ok(vmsplice_mappings)
1672    }
1673
1674    /// Returns all the mappings starting at `addr`, and continuing until either `length` bytes have
1675    /// been covered or an unmapped page is reached.
1676    ///
1677    /// Mappings are returned in ascending order along with the number of bytes that intersect the
1678    /// requested range. The returned mappings are guaranteed to be contiguous and the total length
1679    /// corresponds to the number of contiguous mapped bytes starting from `addr`, i.e.:
1680    /// - 0 (empty iterator) if `addr` is not mapped.
1681    /// - exactly `length` if the requested range is fully mapped.
1682    /// - the offset of the first unmapped page (between 0 and `length`) if the requested range is
1683    ///   only partially mapped.
1684    ///
1685    /// Returns EFAULT if the requested range overflows or extends past the end of the vmar.
1686    fn get_contiguous_mappings_at(
1687        &self,
1688        addr: UserAddress,
1689        length: usize,
1690    ) -> Result<impl Iterator<Item = (&Mapping, usize)>, Errno> {
1691        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EFAULT))?;
1692        if end_addr > self.max_address() {
1693            return error!(EFAULT);
1694        }
1695
1696        // Iterate over all contiguous mappings intersecting the requested range.
1697        let mut mappings = self.mappings.range(addr..end_addr);
1698        let mut prev_range_end = None;
1699        let mut offset = 0;
1700        let result = std::iter::from_fn(move || {
1701            if offset != length {
1702                if let Some((range, mapping)) = mappings.next() {
1703                    return match prev_range_end {
1704                        // If this is the first mapping that we are considering, it may not actually
1705                        // contain `addr` at all.
1706                        None if range.start > addr => None,
1707
1708                        // Subsequent mappings may not be contiguous.
1709                        Some(prev_range_end) if range.start != prev_range_end => None,
1710
1711                        // This mapping can be returned.
1712                        _ => {
1713                            let mapping_length = std::cmp::min(length, range.end - addr) - offset;
1714                            offset += mapping_length;
1715                            prev_range_end = Some(range.end);
1716                            Some((mapping, mapping_length))
1717                        }
1718                    };
1719                }
1720            }
1721
1722            None
1723        });
1724
1725        Ok(result)
1726    }
1727
1728    /// Determines whether a fault at the given address could be covered by extending a growsdown
1729    /// mapping.
1730    ///
1731    /// If the address already belongs to a mapping, this function returns `None`. If the next
1732    /// mapping above the given address has the `MappingFlags::GROWSDOWN` flag, this function
1733    /// returns the address at which that mapping starts and the mapping itself. Otherwise, this
1734    /// function returns `None`.
1735    fn find_growsdown_mapping(&self, addr: UserAddress) -> Option<(UserAddress, &Mapping)> {
1736        match self.mappings.range(addr..).next() {
1737            Some((range, mapping)) => {
1738                if range.contains(&addr) {
1739                    // |addr| is already contained within a mapping, nothing to grow.
1740                    return None;
1741                } else if !mapping.flags().contains(MappingFlags::GROWSDOWN) {
1742                    // The next mapping above the given address does not have the
1743                    // `MappingFlags::GROWSDOWN` flag.
1744                    None
1745                } else {
1746                    Some((range.start, mapping))
1747                }
1748            }
1749            None => None,
1750        }
1751    }
1752
1753    /// Determines if an access at a given address could be covered by extending a growsdown mapping
1754    /// and extends it if possible. Returns true if the given address is covered by a mapping.
1755    fn extend_growsdown_mapping_to_address(
1756        &mut self,
1757        mm: &Arc<MemoryManager>,
1758        addr: UserAddress,
1759        is_write: bool,
1760    ) -> Result<bool, Error> {
1761        let Some((mapping_low_addr, mapping_to_grow)) = self.find_growsdown_mapping(addr) else {
1762            return Ok(false);
1763        };
1764        if is_write && !mapping_to_grow.can_write() {
1765            // Don't grow a read-only GROWSDOWN mapping for a write fault, it won't work.
1766            return Ok(false);
1767        }
1768        if !mapping_to_grow.flags().contains(MappingFlags::ANONYMOUS) {
1769            // Currently, we only grow anonymous mappings.
1770            return Ok(false);
1771        }
1772        let low_addr = (addr - (addr.ptr() as u64 % *PAGE_SIZE))?;
1773        let high_addr = mapping_low_addr;
1774
1775        let length = high_addr
1776            .ptr()
1777            .checked_sub(low_addr.ptr())
1778            .ok_or_else(|| anyhow!("Invalid growth range"))?;
1779
1780        let mut released_mappings = ReleasedMappings::default();
1781        self.map_anonymous(
1782            mm,
1783            DesiredAddress::FixedOverwrite(low_addr),
1784            length,
1785            mapping_to_grow.flags().access_flags(),
1786            mapping_to_grow.flags().options(),
1787            mapping_to_grow.name().to_owned(),
1788            &mut released_mappings,
1789        )?;
1790        // We can't have any released mappings because `find_growsdown_mapping` will return None if
1791        // the mapping already exists in this range.
1792        assert!(
1793            released_mappings.is_empty(),
1794            "expected to not remove mappings by inserting, got {released_mappings:#?}"
1795        );
1796        Ok(true)
1797    }
1798
1799    /// Reads exactly `bytes.len()` bytes of memory.
1800    ///
1801    /// # Parameters
1802    /// - `addr`: The address to read data from.
1803    /// - `bytes`: The byte array to read into.
1804    fn read_memory<'a>(
1805        &self,
1806        addr: UserAddress,
1807        bytes: &'a mut [MaybeUninit<u8>],
1808    ) -> Result<&'a mut [u8], Errno> {
1809        let mut bytes_read = 0;
1810        for (mapping, len) in self.get_contiguous_mappings_at(addr, bytes.len())? {
1811            let next_offset = bytes_read + len;
1812            self.read_mapping_memory(
1813                (addr + bytes_read)?,
1814                mapping,
1815                &mut bytes[bytes_read..next_offset],
1816            )?;
1817            bytes_read = next_offset;
1818        }
1819
1820        if bytes_read != bytes.len() {
1821            error!(EFAULT)
1822        } else {
1823            // SAFETY: The created slice is properly aligned/sized since it
1824            // is a subset of the `bytes` slice. Note that `MaybeUninit<T>` has
1825            // the same layout as `T`. Also note that `bytes_read` bytes have
1826            // been properly initialized.
1827            let bytes = unsafe {
1828                std::slice::from_raw_parts_mut(bytes.as_mut_ptr() as *mut u8, bytes_read)
1829            };
1830            Ok(bytes)
1831        }
1832    }
1833
1834    /// Reads exactly `bytes.len()` bytes of memory from `addr`.
1835    ///
1836    /// # Parameters
1837    /// - `addr`: The address to read data from.
1838    /// - `bytes`: The byte array to read into.
1839    fn read_mapping_memory<'a>(
1840        &self,
1841        addr: UserAddress,
1842        mapping: &Mapping,
1843        bytes: &'a mut [MaybeUninit<u8>],
1844    ) -> Result<&'a mut [u8], Errno> {
1845        if !mapping.can_read() {
1846            return error!(EFAULT, "read_mapping_memory called on unreadable mapping");
1847        }
1848        match self.get_mapping_backing(mapping) {
1849            MappingBacking::Memory(backing) => backing.read_memory(addr, bytes),
1850            MappingBacking::PrivateAnonymous => self.private_anonymous.read_memory(addr, bytes),
1851        }
1852    }
1853
1854    /// Reads bytes starting at `addr`, continuing until either `bytes.len()` bytes have been read
1855    /// or no more bytes can be read.
1856    ///
1857    /// This is used, for example, to read null-terminated strings where the exact length is not
1858    /// known, only the maximum length is.
1859    ///
1860    /// # Parameters
1861    /// - `addr`: The address to read data from.
1862    /// - `bytes`: The byte array to read into.
1863    fn read_memory_partial<'a>(
1864        &self,
1865        addr: UserAddress,
1866        bytes: &'a mut [MaybeUninit<u8>],
1867    ) -> Result<&'a mut [u8], Errno> {
1868        let mut bytes_read = 0;
1869        for (mapping, len) in self.get_contiguous_mappings_at(addr, bytes.len())? {
1870            let next_offset = bytes_read + len;
1871            if self
1872                .read_mapping_memory(
1873                    (addr + bytes_read)?,
1874                    mapping,
1875                    &mut bytes[bytes_read..next_offset],
1876                )
1877                .is_err()
1878            {
1879                break;
1880            }
1881            bytes_read = next_offset;
1882        }
1883
1884        // If at least one byte was requested but we got none, it means that `addr` was invalid.
1885        if !bytes.is_empty() && bytes_read == 0 {
1886            error!(EFAULT)
1887        } else {
1888            // SAFETY: The created slice is properly aligned/sized since it
1889            // is a subset of the `bytes` slice. Note that `MaybeUninit<T>` has
1890            // the same layout as `T`. Also note that `bytes_read` bytes have
1891            // been properly initialized.
1892            let bytes = unsafe {
1893                std::slice::from_raw_parts_mut(bytes.as_mut_ptr() as *mut u8, bytes_read)
1894            };
1895            Ok(bytes)
1896        }
1897    }
1898
1899    /// Like `read_memory_partial` but only returns the bytes up to and including
1900    /// a null (zero) byte.
1901    fn read_memory_partial_until_null_byte<'a>(
1902        &self,
1903        addr: UserAddress,
1904        bytes: &'a mut [MaybeUninit<u8>],
1905    ) -> Result<&'a mut [u8], Errno> {
1906        let read_bytes = self.read_memory_partial(addr, bytes)?;
1907        let max_len = memchr::memchr(b'\0', read_bytes)
1908            .map_or_else(|| read_bytes.len(), |null_index| null_index + 1);
1909        Ok(&mut read_bytes[..max_len])
1910    }
1911
1912    /// Writes the provided bytes.
1913    ///
1914    /// In case of success, the number of bytes written will always be `bytes.len()`.
1915    ///
1916    /// # Parameters
1917    /// - `addr`: The address to write to.
1918    /// - `bytes`: The bytes to write.
1919    fn write_memory(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
1920        let mut bytes_written = 0;
1921        for (mapping, len) in self.get_contiguous_mappings_at(addr, bytes.len())? {
1922            let next_offset = bytes_written + len;
1923            self.write_mapping_memory(
1924                (addr + bytes_written)?,
1925                mapping,
1926                &bytes[bytes_written..next_offset],
1927            )?;
1928            bytes_written = next_offset;
1929        }
1930
1931        if bytes_written != bytes.len() { error!(EFAULT) } else { Ok(bytes.len()) }
1932    }
1933
1934    /// Writes the provided bytes to `addr`.
1935    ///
1936    /// # Parameters
1937    /// - `addr`: The address to write to.
1938    /// - `bytes`: The bytes to write to the memory object.
1939    fn write_mapping_memory(
1940        &self,
1941        addr: UserAddress,
1942        mapping: &Mapping,
1943        bytes: &[u8],
1944    ) -> Result<(), Errno> {
1945        if !mapping.can_write() {
1946            return error!(EFAULT, "write_mapping_memory called on unwritable memory");
1947        }
1948        match self.get_mapping_backing(mapping) {
1949            MappingBacking::Memory(backing) => backing.write_memory(addr, bytes),
1950            MappingBacking::PrivateAnonymous => self.private_anonymous.write_memory(addr, bytes),
1951        }
1952    }
1953
1954    /// Writes bytes starting at `addr`, continuing until either `bytes.len()` bytes have been
1955    /// written or no more bytes can be written.
1956    ///
1957    /// # Parameters
1958    /// - `addr`: The address to read data from.
1959    /// - `bytes`: The byte array to write from.
1960    fn write_memory_partial(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
1961        let mut bytes_written = 0;
1962        for (mapping, len) in self.get_contiguous_mappings_at(addr, bytes.len())? {
1963            let next_offset = bytes_written + len;
1964            if self
1965                .write_mapping_memory(
1966                    (addr + bytes_written)?,
1967                    mapping,
1968                    &bytes[bytes_written..next_offset],
1969                )
1970                .is_err()
1971            {
1972                break;
1973            }
1974            bytes_written = next_offset;
1975        }
1976
1977        if !bytes.is_empty() && bytes_written == 0 { error!(EFAULT) } else { Ok(bytes.len()) }
1978    }
1979
1980    fn zero(&self, addr: UserAddress, length: usize) -> Result<usize, Errno> {
1981        let mut bytes_written = 0;
1982        for (mapping, len) in self.get_contiguous_mappings_at(addr, length)? {
1983            let next_offset = bytes_written + len;
1984            if self.zero_mapping((addr + bytes_written)?, mapping, len).is_err() {
1985                break;
1986            }
1987            bytes_written = next_offset;
1988        }
1989
1990        if length != bytes_written { error!(EFAULT) } else { Ok(length) }
1991    }
1992
1993    fn zero_mapping(
1994        &self,
1995        addr: UserAddress,
1996        mapping: &Mapping,
1997        length: usize,
1998    ) -> Result<usize, Errno> {
1999        if !mapping.can_write() {
2000            return error!(EFAULT);
2001        }
2002
2003        match self.get_mapping_backing(mapping) {
2004            MappingBacking::Memory(backing) => backing.zero(addr, length),
2005            MappingBacking::PrivateAnonymous => self.private_anonymous.zero(addr, length),
2006        }
2007    }
2008
2009    pub fn create_memory_backing(
2010        &self,
2011        base: UserAddress,
2012        memory: Arc<MemoryObject>,
2013        memory_offset: u64,
2014    ) -> MappingBacking {
2015        MappingBacking::Memory(Box::new(MappingBackingMemory::new(base, memory, memory_offset)))
2016    }
2017
2018    pub fn get_mapping_backing<'a>(&self, mapping: &'a Mapping) -> &'a MappingBacking {
2019        mapping.get_backing_internal()
2020    }
2021
2022    fn get_aio_context(&self, addr: UserAddress) -> Option<(Range<UserAddress>, Arc<AioContext>)> {
2023        let Some((range, mapping)) = self.mappings.get(addr) else {
2024            return None;
2025        };
2026        let MappingNameRef::AioContext(ref aio_context) = mapping.name() else {
2027            return None;
2028        };
2029        if !mapping.can_read() {
2030            return None;
2031        }
2032        Some((range.clone(), Arc::clone(aio_context)))
2033    }
2034
2035    fn find_uffd<L>(&self, locked: &mut Locked<L>, addr: UserAddress) -> Option<Arc<UserFault>>
2036    where
2037        L: LockBefore<UserFaultInner>,
2038    {
2039        for userfault in self.userfaultfds.iter() {
2040            if let Some(userfault) = userfault.upgrade() {
2041                if userfault.contains_addr(locked, addr) {
2042                    return Some(userfault);
2043                }
2044            }
2045        }
2046        None
2047    }
2048
2049    pub fn mrelease(&self) -> Result<(), Errno> {
2050        self.private_anonymous
2051            .zero(UserAddress::from_ptr(self.user_vmar_info.base), self.user_vmar_info.len)?;
2052        return Ok(());
2053    }
2054
2055    fn cache_flush(&self, range: Range<UserAddress>) -> Result<(), Errno> {
2056        let mut addr = range.start;
2057        let size = range.end - range.start;
2058        for (mapping, len) in self.get_contiguous_mappings_at(addr, size)? {
2059            if !mapping.can_read() {
2060                return error!(EFAULT);
2061            }
2062            // SAFETY: This is operating on a readable restricted mode mapping and will not fault.
2063            zx::Status::ok(unsafe {
2064                zx::sys::zx_cache_flush(
2065                    addr.ptr() as *const u8,
2066                    len,
2067                    zx::sys::ZX_CACHE_FLUSH_DATA | zx::sys::ZX_CACHE_FLUSH_INSN,
2068                )
2069            })
2070            .map_err(impossible_error)?;
2071
2072            addr = (addr + len).unwrap(); // unwrap since we're iterating within the address space.
2073        }
2074        // Did we flush the entire range?
2075        if addr != range.end { error!(EFAULT) } else { Ok(()) }
2076    }
2077
2078    // Returns details of mappings in the `user_vmar`, or an empty vector if the `user_vmar` has
2079    // been destroyed.
2080    fn with_zx_mappings<R>(
2081        &self,
2082        current_task: &CurrentTask,
2083        op: impl FnOnce(&[zx::MapInfo]) -> R,
2084    ) -> R {
2085        if self.user_vmar.is_invalid() {
2086            return op(&[]);
2087        };
2088
2089        MapInfoCache::get_or_init(current_task)
2090            .expect("must be able to retrieve map info cache")
2091            .with_map_infos(&self.user_vmar, |infos| {
2092                // No other https://fuchsia.dev/reference/syscalls/object_get_info?hl=en#errors
2093                // are possible, because we created the VMAR and the `zx` crate ensures that the
2094                // info query is well-formed.
2095                op(infos.expect("must be able to query mappings for private user VMAR"))
2096            })
2097    }
2098
2099    /// Register the address space managed by this memory manager for interest in
2100    /// receiving private expedited memory barriers of the given kind.
2101    pub fn register_membarrier_private_expedited(
2102        &mut self,
2103        mtype: MembarrierType,
2104    ) -> Result<(), Errno> {
2105        let registrations = &mut self.forkable_state.membarrier_registrations;
2106        match mtype {
2107            MembarrierType::Memory => {
2108                registrations.memory = true;
2109            }
2110            MembarrierType::SyncCore => {
2111                registrations.sync_core = true;
2112            }
2113        }
2114        Ok(())
2115    }
2116
2117    /// Checks if the address space managed by this memory manager is registered
2118    /// for interest in private expedited barriers of the given kind.
2119    pub fn membarrier_private_expedited_registered(&self, mtype: MembarrierType) -> bool {
2120        let registrations = &self.forkable_state.membarrier_registrations;
2121        match mtype {
2122            MembarrierType::Memory => registrations.memory,
2123            MembarrierType::SyncCore => registrations.sync_core,
2124        }
2125    }
2126
2127    fn force_write_memory(
2128        &mut self,
2129        addr: UserAddress,
2130        bytes: &[u8],
2131        released_mappings: &mut ReleasedMappings,
2132    ) -> Result<(), Errno> {
2133        let (range, mapping) = self.mappings.get(addr).ok_or_else(|| errno!(EFAULT))?;
2134        if range.end < addr.saturating_add(bytes.len()) {
2135            track_stub!(
2136                TODO("https://fxbug.dev/445790710"),
2137                "ptrace poke across multiple mappings"
2138            );
2139            return error!(EFAULT);
2140        }
2141
2142        // Don't create CoW copy of shared memory, go through regular syscall writing.
2143        if mapping.flags().contains(MappingFlags::SHARED) {
2144            if !mapping.can_write() {
2145                // Linux returns EIO here instead of EFAULT.
2146                return error!(EIO);
2147            }
2148            return self.write_mapping_memory(addr, mapping, &bytes);
2149        }
2150
2151        let backing = match self.get_mapping_backing(mapping) {
2152            MappingBacking::PrivateAnonymous => {
2153                // Starnix has a writable handle to private anonymous memory.
2154                return self.private_anonymous.write_memory(addr, &bytes);
2155            }
2156            MappingBacking::Memory(backing) => backing,
2157        };
2158
2159        let vmo = backing.memory().as_vmo().ok_or_else(|| errno!(EFAULT))?;
2160        let addr_offset = backing.address_to_offset(addr);
2161        let can_exec =
2162            vmo.basic_info().expect("get VMO handle info").rights.contains(Rights::EXECUTE);
2163
2164        // Attempt to write to existing VMO
2165        match vmo.write(&bytes, addr_offset) {
2166            Ok(()) => {
2167                if can_exec {
2168                    // Issue a barrier to avoid executing stale instructions.
2169                    system_barrier(BarrierType::InstructionStream);
2170                }
2171                return Ok(());
2172            }
2173
2174            Err(zx::Status::ACCESS_DENIED) => { /* Fall through */ }
2175
2176            Err(status) => {
2177                return Err(MemoryManager::get_errno_for_vmo_err(status));
2178            }
2179        }
2180
2181        // Create a CoW child of the entire VMO and swap with the backing.
2182        let mapping_offset = backing.address_to_offset(range.start);
2183        let len = range.end - range.start;
2184
2185        // 1. Obtain a writable child of the VMO.
2186        let size = vmo.get_size().map_err(MemoryManager::get_errno_for_vmo_err)?;
2187        let child_vmo = vmo
2188            .create_child(VmoChildOptions::SNAPSHOT_AT_LEAST_ON_WRITE, 0, size)
2189            .map_err(MemoryManager::get_errno_for_vmo_err)?;
2190
2191        // 2. Modify the memory.
2192        child_vmo.write(&bytes, addr_offset).map_err(MemoryManager::get_errno_for_vmo_err)?;
2193
2194        // 3. If needed, remint the VMO as executable. Zircon flushes instruction caches when
2195        // mapping executable memory below, so a barrier isn't necessary here.
2196        let child_vmo = if can_exec {
2197            child_vmo
2198                .replace_as_executable(&VMEX_RESOURCE)
2199                .map_err(MemoryManager::get_errno_for_vmo_err)?
2200        } else {
2201            child_vmo
2202        };
2203
2204        // 4. Map the new VMO into user VMAR
2205        let memory = Arc::new(MemoryObject::from(child_vmo));
2206        let mapped_addr = self.map_in_user_vmar(
2207            SelectedAddress::FixedOverwrite(range.start),
2208            &memory,
2209            mapping_offset,
2210            len,
2211            mapping.flags(),
2212            false,
2213        )?;
2214        assert_eq!(mapped_addr, range.start);
2215
2216        // 5. Update mappings
2217        let new_backing = MappingBackingMemory::new(range.start, memory, mapping_offset);
2218
2219        let mut new_mapping = mapping.clone();
2220        new_mapping.set_backing_internal(MappingBacking::Memory(Box::new(new_backing)));
2221
2222        let range = range.clone();
2223        released_mappings.extend(self.mappings.insert(range, new_mapping));
2224
2225        Ok(())
2226    }
2227
2228    fn set_brk<L>(
2229        &mut self,
2230        locked: &mut Locked<L>,
2231        current_task: &CurrentTask,
2232        mm: &Arc<MemoryManager>,
2233        addr: UserAddress,
2234        released_mappings: &mut ReleasedMappings,
2235    ) -> Result<UserAddress, Errno>
2236    where
2237        L: LockBefore<ThreadGroupLimits>,
2238    {
2239        let rlimit_data = std::cmp::min(
2240            PROGRAM_BREAK_LIMIT,
2241            current_task.thread_group().get_rlimit(locked, Resource::DATA),
2242        );
2243
2244        let brk = match self.brk.clone() {
2245            None => {
2246                let brk = ProgramBreak { base: self.brk_origin, current: self.brk_origin };
2247                self.brk = Some(brk.clone());
2248                brk
2249            }
2250            Some(brk) => brk,
2251        };
2252
2253        let Ok(last_address) = brk.base + rlimit_data else {
2254            // The requested program break is out-of-range. We're supposed to simply
2255            // return the current program break.
2256            return Ok(brk.current);
2257        };
2258
2259        if addr < brk.base || addr > last_address {
2260            // The requested program break is out-of-range. We're supposed to simply
2261            // return the current program break.
2262            return Ok(brk.current);
2263        }
2264
2265        let old_end = brk.current.round_up(*PAGE_SIZE).unwrap();
2266        let new_end = addr.round_up(*PAGE_SIZE).unwrap();
2267
2268        match new_end.cmp(&old_end) {
2269            std::cmp::Ordering::Less => {
2270                // Shrinking the program break removes any mapped pages in the
2271                // affected range, regardless of whether they were actually program
2272                // break pages, or other mappings.
2273                let delta = old_end - new_end;
2274
2275                if self.unmap(mm, new_end, delta, released_mappings).is_err() {
2276                    return Ok(brk.current);
2277                }
2278            }
2279            std::cmp::Ordering::Greater => {
2280                let range = old_end..new_end;
2281                let delta = new_end - old_end;
2282
2283                // Check for mappings over the program break region.
2284                if self.mappings.range(range).next().is_some() {
2285                    return Ok(brk.current);
2286                }
2287
2288                if self
2289                    .map_anonymous(
2290                        mm,
2291                        DesiredAddress::FixedOverwrite(old_end),
2292                        delta,
2293                        ProtectionFlags::READ | ProtectionFlags::WRITE,
2294                        MappingOptions::ANONYMOUS,
2295                        MappingName::Heap,
2296                        released_mappings,
2297                    )
2298                    .is_err()
2299                {
2300                    return Ok(brk.current);
2301                }
2302            }
2303            _ => {}
2304        };
2305
2306        // Any required updates to the program break succeeded, so update internal state.
2307        let mut new_brk = brk;
2308        new_brk.current = addr;
2309        self.brk = Some(new_brk);
2310
2311        Ok(addr)
2312    }
2313
2314    fn register_with_uffd<L>(
2315        &mut self,
2316        locked: &mut Locked<L>,
2317        addr: UserAddress,
2318        length: usize,
2319        userfault: &Arc<UserFault>,
2320        mode: FaultRegisterMode,
2321        released_mappings: &mut ReleasedMappings,
2322    ) -> Result<(), Errno>
2323    where
2324        L: LockBefore<UserFaultInner>,
2325    {
2326        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
2327        let range_for_op = addr..end_addr;
2328        let mut updates = vec![];
2329
2330        for (range, mapping) in self.mappings.range(range_for_op.clone()) {
2331            if !mapping.private_anonymous() {
2332                track_stub!(TODO("https://fxbug.dev/391599171"), "uffd for shmem and hugetlbfs");
2333                return error!(EINVAL);
2334            }
2335            if mapping.flags().contains(MappingFlags::UFFD) {
2336                return error!(EBUSY);
2337            }
2338            let range = range.intersect(&range_for_op);
2339            let mut mapping = mapping.clone();
2340            mapping.set_uffd(mode);
2341            updates.push((range, mapping));
2342        }
2343        if updates.is_empty() {
2344            return error!(EINVAL);
2345        }
2346
2347        self.protect_vmar_range(addr, length, ProtectionFlags::empty())
2348            .expect("Failed to remove protections on uffd-registered range");
2349
2350        // Use a separate loop to avoid mutating the mappings structure while iterating over it.
2351        for (range, mapping) in updates {
2352            released_mappings.extend(self.mappings.insert(range, mapping));
2353        }
2354
2355        userfault.insert_pages(locked, range_for_op, false);
2356
2357        Ok(())
2358    }
2359
2360    fn unregister_range_from_uffd<L>(
2361        &mut self,
2362        locked: &mut Locked<L>,
2363        userfault: &Arc<UserFault>,
2364        addr: UserAddress,
2365        length: usize,
2366        released_mappings: &mut ReleasedMappings,
2367    ) -> Result<(), Errno>
2368    where
2369        L: LockBefore<UserFaultInner>,
2370    {
2371        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
2372        let range_for_op = addr..end_addr;
2373        let mut updates = vec![];
2374
2375        for (range, mapping) in self.mappings.range(range_for_op.clone()) {
2376            if !mapping.private_anonymous() {
2377                track_stub!(TODO("https://fxbug.dev/391599171"), "uffd for shmem and hugetlbfs");
2378                return error!(EINVAL);
2379            }
2380            if mapping.flags().contains(MappingFlags::UFFD) {
2381                let range = range.intersect(&range_for_op);
2382                if userfault.remove_pages(locked, range.clone()) {
2383                    let mut mapping = mapping.clone();
2384                    mapping.clear_uffd();
2385                    updates.push((range, mapping));
2386                }
2387            }
2388        }
2389        for (range, mapping) in updates {
2390            let length = range.end - range.start;
2391            let restored_flags = mapping.flags().access_flags();
2392
2393            released_mappings.extend(self.mappings.insert(range.clone(), mapping));
2394
2395            self.protect_vmar_range(range.start, length, restored_flags)
2396                .expect("Failed to restore original protection bits on uffd-registered range");
2397        }
2398        Ok(())
2399    }
2400
2401    fn unregister_uffd<L>(
2402        &mut self,
2403        locked: &mut Locked<L>,
2404        userfault: &Arc<UserFault>,
2405        released_mappings: &mut ReleasedMappings,
2406    ) where
2407        L: LockBefore<UserFaultInner>,
2408    {
2409        let mut updates = vec![];
2410
2411        for (range, mapping) in self.mappings.iter() {
2412            if mapping.flags().contains(MappingFlags::UFFD) {
2413                for range in userfault.get_registered_pages_overlapping_range(locked, range.clone())
2414                {
2415                    let mut mapping = mapping.clone();
2416                    mapping.clear_uffd();
2417                    updates.push((range.clone(), mapping));
2418                }
2419            }
2420        }
2421        // Use a separate loop to avoid mutating the mappings structure while iterating over it.
2422        for (range, mapping) in updates {
2423            let length = range.end - range.start;
2424            let restored_flags = mapping.flags().access_flags();
2425            released_mappings.extend(self.mappings.insert(range.clone(), mapping));
2426            // We can't recover from an error here as this is run during the cleanup.
2427            self.protect_vmar_range(range.start, length, restored_flags)
2428                .expect("Failed to restore original protection bits on uffd-registered range");
2429        }
2430
2431        userfault.remove_pages(
2432            locked,
2433            UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
2434                ..UserAddress::from_ptr(RESTRICTED_ASPACE_HIGHEST_ADDRESS),
2435        );
2436
2437        let weak_userfault = Arc::downgrade(userfault);
2438        self.userfaultfds.retain(|uf| !Weak::ptr_eq(uf, &weak_userfault));
2439    }
2440
2441    fn set_mapping_name(
2442        &mut self,
2443        addr: UserAddress,
2444        length: usize,
2445        name: Option<FsString>,
2446        released_mappings: &mut ReleasedMappings,
2447    ) -> Result<(), Errno> {
2448        if addr.ptr() % *PAGE_SIZE as usize != 0 {
2449            return error!(EINVAL);
2450        }
2451        let end = match addr.checked_add(length) {
2452            Some(addr) => addr.round_up(*PAGE_SIZE).map_err(|_| errno!(ENOMEM))?,
2453            None => return error!(EINVAL),
2454        };
2455
2456        let mappings_in_range =
2457            self.mappings.range(addr..end).map(|(r, m)| (r.clone(), m.clone())).collect::<Vec<_>>();
2458
2459        if mappings_in_range.is_empty() {
2460            return error!(EINVAL);
2461        }
2462        if !mappings_in_range.first().unwrap().0.contains(&addr) {
2463            return error!(ENOMEM);
2464        }
2465
2466        let mut last_range_end = None;
2467        // There's no get_mut on RangeMap, because it would be hard to implement correctly in
2468        // combination with merging of adjacent mappings. Instead, make a copy, change the copy,
2469        // and insert the copy.
2470        for (mut range, mut mapping) in mappings_in_range {
2471            if mapping.name().is_file() {
2472                // It's invalid to assign a name to a file-backed mapping.
2473                return error!(EBADF);
2474            }
2475            // Handle mappings that start before the region to be named.
2476            range.start = std::cmp::max(range.start, addr);
2477            // Handle mappings that extend past the region to be named.
2478            range.end = std::cmp::min(range.end, end);
2479
2480            if let Some(last_range_end) = last_range_end {
2481                if last_range_end != range.start {
2482                    // The name must apply to a contiguous range of mapped pages.
2483                    return error!(ENOMEM);
2484                }
2485            }
2486            last_range_end = Some(range.end.round_up(*PAGE_SIZE)?);
2487            // TODO(b/310255065): We have no place to store names in a way visible to programs outside of Starnix
2488            // such as memory analysis tools.
2489            if let MappingBacking::Memory(backing) = self.get_mapping_backing(&mapping) {
2490                match &name {
2491                    Some(memory_name) => {
2492                        backing.memory().set_zx_name(memory_name);
2493                    }
2494                    None => {
2495                        backing.memory().set_zx_name(b"");
2496                    }
2497                }
2498            }
2499            mapping.set_name(match &name {
2500                Some(name) => MappingName::Vma(FlyByteStr::new(name.as_bytes())),
2501                None => MappingName::None,
2502            });
2503            released_mappings.extend(self.mappings.insert(range, mapping));
2504        }
2505        if let Some(last_range_end) = last_range_end {
2506            if last_range_end < end {
2507                // The name must apply to a contiguous range of mapped pages.
2508                return error!(ENOMEM);
2509            }
2510        }
2511        Ok(())
2512    }
2513}
2514
2515/// The memory pinning shadow process used for mlock().
2516///
2517/// Uses its own distinct shadow process so that it doesn't interfere with other uses of memory
2518/// pinning.
2519pub struct MlockShadowProcess(memory_pinning::ShadowProcess);
2520
2521/// A memory manager for another thread.
2522///
2523/// When accessing memory through this object, we use less efficient codepaths that work across
2524/// address spaces.
2525pub struct RemoteMemoryManager {
2526    mm: Arc<MemoryManager>,
2527}
2528
2529impl RemoteMemoryManager {
2530    fn new(mm: Arc<MemoryManager>) -> Self {
2531        Self { mm }
2532    }
2533}
2534
2535// If we just have a MemoryManager, we cannot assume that its address space is current, which means
2536// we need to use the slower "syscall" mechanism to access its memory.
2537impl MemoryAccessor for RemoteMemoryManager {
2538    fn read_memory<'a>(
2539        &self,
2540        addr: UserAddress,
2541        bytes: &'a mut [MaybeUninit<u8>],
2542    ) -> Result<&'a mut [u8], Errno> {
2543        self.mm.syscall_read_memory(addr, bytes)
2544    }
2545
2546    fn read_memory_partial_until_null_byte<'a>(
2547        &self,
2548        addr: UserAddress,
2549        bytes: &'a mut [MaybeUninit<u8>],
2550    ) -> Result<&'a mut [u8], Errno> {
2551        self.mm.syscall_read_memory_partial_until_null_byte(addr, bytes)
2552    }
2553
2554    fn read_memory_partial<'a>(
2555        &self,
2556        addr: UserAddress,
2557        bytes: &'a mut [MaybeUninit<u8>],
2558    ) -> Result<&'a mut [u8], Errno> {
2559        self.mm.syscall_read_memory_partial(addr, bytes)
2560    }
2561
2562    fn write_memory(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
2563        self.mm.syscall_write_memory(addr, bytes)
2564    }
2565
2566    fn write_memory_partial(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
2567        self.mm.syscall_write_memory_partial(addr, bytes)
2568    }
2569
2570    fn zero(&self, addr: UserAddress, length: usize) -> Result<usize, Errno> {
2571        self.mm.syscall_zero(addr, length)
2572    }
2573}
2574
2575impl TaskMemoryAccessor for RemoteMemoryManager {
2576    fn maximum_valid_address(&self) -> Option<UserAddress> {
2577        Some(self.mm.maximum_valid_user_address)
2578    }
2579}
2580
2581impl MemoryManager {
2582    pub fn summarize(&self, summary: &mut crate::mm::MappingSummary) {
2583        let state = self.state.read();
2584        for (_, mapping) in state.mappings.iter() {
2585            summary.add(&state, mapping);
2586        }
2587    }
2588
2589    pub fn get_mappings_for_vmsplice(
2590        self: &Arc<MemoryManager>,
2591        buffers: &UserBuffers,
2592    ) -> Result<Vec<Arc<VmsplicePayload>>, Errno> {
2593        self.state.read().get_mappings_for_vmsplice(self, buffers)
2594    }
2595
2596    pub fn has_same_address_space(&self, other: &Self) -> bool {
2597        std::ptr::eq(self, other)
2598    }
2599
2600    pub fn unified_read_memory<'a>(
2601        &self,
2602        current_task: &CurrentTask,
2603        addr: UserAddress,
2604        bytes: &'a mut [MaybeUninit<u8>],
2605    ) -> Result<&'a mut [u8], Errno> {
2606        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2607
2608        if let Some(usercopy) = usercopy() {
2609            let (read_bytes, unread_bytes) = usercopy.copyin(addr.ptr(), bytes);
2610            if unread_bytes.is_empty() { Ok(read_bytes) } else { error!(EFAULT) }
2611        } else {
2612            self.syscall_read_memory(addr, bytes)
2613        }
2614    }
2615
2616    pub fn syscall_read_memory<'a>(
2617        &self,
2618        addr: UserAddress,
2619        bytes: &'a mut [MaybeUninit<u8>],
2620    ) -> Result<&'a mut [u8], Errno> {
2621        self.state.read().read_memory(addr, bytes)
2622    }
2623
2624    pub fn unified_read_memory_partial_until_null_byte<'a>(
2625        &self,
2626        current_task: &CurrentTask,
2627        addr: UserAddress,
2628        bytes: &'a mut [MaybeUninit<u8>],
2629    ) -> Result<&'a mut [u8], Errno> {
2630        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2631
2632        if let Some(usercopy) = usercopy() {
2633            let (read_bytes, unread_bytes) = usercopy.copyin_until_null_byte(addr.ptr(), bytes);
2634            if read_bytes.is_empty() && !unread_bytes.is_empty() {
2635                error!(EFAULT)
2636            } else {
2637                Ok(read_bytes)
2638            }
2639        } else {
2640            self.syscall_read_memory_partial_until_null_byte(addr, bytes)
2641        }
2642    }
2643
2644    pub fn syscall_read_memory_partial_until_null_byte<'a>(
2645        &self,
2646        addr: UserAddress,
2647        bytes: &'a mut [MaybeUninit<u8>],
2648    ) -> Result<&'a mut [u8], Errno> {
2649        self.state.read().read_memory_partial_until_null_byte(addr, bytes)
2650    }
2651
2652    pub fn unified_read_memory_partial<'a>(
2653        &self,
2654        current_task: &CurrentTask,
2655        addr: UserAddress,
2656        bytes: &'a mut [MaybeUninit<u8>],
2657    ) -> Result<&'a mut [u8], Errno> {
2658        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2659
2660        if let Some(usercopy) = usercopy() {
2661            let (read_bytes, unread_bytes) = usercopy.copyin(addr.ptr(), bytes);
2662            if read_bytes.is_empty() && !unread_bytes.is_empty() {
2663                error!(EFAULT)
2664            } else {
2665                Ok(read_bytes)
2666            }
2667        } else {
2668            self.syscall_read_memory_partial(addr, bytes)
2669        }
2670    }
2671
2672    pub fn syscall_read_memory_partial<'a>(
2673        &self,
2674        addr: UserAddress,
2675        bytes: &'a mut [MaybeUninit<u8>],
2676    ) -> Result<&'a mut [u8], Errno> {
2677        self.state.read().read_memory_partial(addr, bytes)
2678    }
2679
2680    pub fn unified_write_memory(
2681        &self,
2682        current_task: &CurrentTask,
2683        addr: UserAddress,
2684        bytes: &[u8],
2685    ) -> Result<usize, Errno> {
2686        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2687
2688        if let Some(usercopy) = usercopy() {
2689            let num_copied = usercopy.copyout(bytes, addr.ptr());
2690            if num_copied != bytes.len() {
2691                error!(
2692                    EFAULT,
2693                    format!("expected {:?} bytes, copied {:?} bytes", bytes.len(), num_copied)
2694                )
2695            } else {
2696                Ok(num_copied)
2697            }
2698        } else {
2699            self.syscall_write_memory(addr, bytes)
2700        }
2701    }
2702
2703    /// Write `bytes` to memory address `addr`, making a copy-on-write child of the VMO backing and
2704    /// replacing the mapping if necessary.
2705    ///
2706    /// NOTE: this bypasses userspace's memory protection configuration and should only be called
2707    /// by codepaths like ptrace which bypass memory protection.
2708    pub fn force_write_memory(&self, addr: UserAddress, bytes: &[u8]) -> Result<(), Errno> {
2709        let mut state = self.state.write();
2710        let mut released_mappings = ReleasedMappings::default();
2711        let result = state.force_write_memory(addr, bytes, &mut released_mappings);
2712        released_mappings.finalize(state);
2713        result
2714    }
2715
2716    pub fn syscall_write_memory(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
2717        self.state.read().write_memory(addr, bytes)
2718    }
2719
2720    pub fn unified_write_memory_partial(
2721        &self,
2722        current_task: &CurrentTask,
2723        addr: UserAddress,
2724        bytes: &[u8],
2725    ) -> Result<usize, Errno> {
2726        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2727
2728        if let Some(usercopy) = usercopy() {
2729            let num_copied = usercopy.copyout(bytes, addr.ptr());
2730            if num_copied == 0 && !bytes.is_empty() { error!(EFAULT) } else { Ok(num_copied) }
2731        } else {
2732            self.syscall_write_memory_partial(addr, bytes)
2733        }
2734    }
2735
2736    pub fn syscall_write_memory_partial(
2737        &self,
2738        addr: UserAddress,
2739        bytes: &[u8],
2740    ) -> Result<usize, Errno> {
2741        self.state.read().write_memory_partial(addr, bytes)
2742    }
2743
2744    pub fn unified_zero(
2745        &self,
2746        current_task: &CurrentTask,
2747        addr: UserAddress,
2748        length: usize,
2749    ) -> Result<usize, Errno> {
2750        debug_assert!(self.has_same_address_space(&current_task.mm().unwrap()));
2751
2752        {
2753            let page_size = *PAGE_SIZE as usize;
2754            // Get the page boundary immediately following `addr` if `addr` is
2755            // not page aligned.
2756            let next_page_boundary = round_up_to_system_page_size(addr.ptr())?;
2757            // The number of bytes needed to zero at least a full page (not just
2758            // a pages worth of bytes) starting at `addr`.
2759            let length_with_atleast_one_full_page = page_size + (next_page_boundary - addr.ptr());
2760            // If at least one full page is being zeroed, go through the memory object since Zircon
2761            // can swap the mapped pages with the zero page which should be cheaper than zeroing
2762            // out a pages worth of bytes manually.
2763            //
2764            // If we are not zeroing out a full page, then go through usercopy
2765            // if unified aspaces is enabled.
2766            if length >= length_with_atleast_one_full_page {
2767                return self.syscall_zero(addr, length);
2768            }
2769        }
2770
2771        if let Some(usercopy) = usercopy() {
2772            if usercopy.zero(addr.ptr(), length) == length { Ok(length) } else { error!(EFAULT) }
2773        } else {
2774            self.syscall_zero(addr, length)
2775        }
2776    }
2777
2778    pub fn syscall_zero(&self, addr: UserAddress, length: usize) -> Result<usize, Errno> {
2779        self.state.read().zero(addr, length)
2780    }
2781
2782    /// Obtain a reference to this memory manager that can be used from another thread.
2783    pub fn as_remote(self: &Arc<Self>) -> RemoteMemoryManager {
2784        RemoteMemoryManager::new(self.clone())
2785    }
2786
2787    /// Performs a data and instruction cache flush over the given address range.
2788    pub fn cache_flush(&self, range: Range<UserAddress>) -> Result<(), Errno> {
2789        self.state.read().cache_flush(range)
2790    }
2791
2792    /// Register the address space managed by this memory manager for interest in
2793    /// receiving private expedited memory barriers of the given type.
2794    pub fn register_membarrier_private_expedited(
2795        &self,
2796        mtype: MembarrierType,
2797    ) -> Result<(), Errno> {
2798        self.state.write().register_membarrier_private_expedited(mtype)
2799    }
2800
2801    /// Checks if the address space managed by this memory manager is registered
2802    /// for interest in private expedited barriers of the given kind.
2803    pub fn membarrier_private_expedited_registered(&self, mtype: MembarrierType) -> bool {
2804        self.state.read().membarrier_private_expedited_registered(mtype)
2805    }
2806}
2807
2808pub struct MemoryManager {
2809    /// The base address of the root_vmar.
2810    pub base_addr: UserAddress,
2811
2812    /// The futexes in this address space.
2813    pub futex: Arc<FutexTable<PrivateFutexKey>>,
2814
2815    /// Mutable state for the memory manager.
2816    pub state: RwLock<MemoryManagerState>,
2817
2818    /// Whether this address space is dumpable.
2819    pub dumpable: OrderedMutex<DumpPolicy, MmDumpable>,
2820
2821    /// Maximum valid user address for this vmar.
2822    pub maximum_valid_user_address: UserAddress,
2823
2824    /// In-flight payloads enqueued to a pipe as a consequence of a `vmsplice(2)`
2825    /// operation.
2826    ///
2827    /// For details on why we need to keep track of in-flight vmspliced payloads,
2828    /// see [`VmsplicePayload`].
2829    ///
2830    /// For details on why this isn't under the `RwLock` protected `MemoryManagerState`,
2831    /// See [`InflightVmsplicedPayloads::payloads`].
2832    pub inflight_vmspliced_payloads: InflightVmsplicedPayloads,
2833
2834    /// A mechanism to be notified when this `MemoryManager` is destroyed.
2835    pub drop_notifier: DropNotifier,
2836}
2837
2838impl MemoryManager {
2839    /// Returns a new `MemoryManager` suitable for use in tests.
2840    pub fn new_for_test(root_vmar: zx::Unowned<'_, zx::Vmar>, arch_width: ArchWidth) -> Arc<Self> {
2841        Self::new(root_vmar, arch_width, None).expect("can create MemoryManager")
2842    }
2843
2844    /// Returns a new `MemoryManager` initialized with a new userspace VMAR matching the specified
2845    /// `arch_width`, under the specified restricted-mode `root_vmar`.  The `executable_node` that
2846    /// the new address-space will execute may optionally be supplied.
2847    fn new(
2848        root_vmar: zx::Unowned<'_, zx::Vmar>,
2849        arch_width: ArchWidth,
2850        executable_node: Option<NamespaceNode>,
2851    ) -> Result<Arc<Self>, Errno> {
2852        debug_assert!(!root_vmar.is_invalid());
2853
2854        let mut vmar_info = root_vmar.info().map_err(|status| from_status_like_fdio!(status))?;
2855        if arch_width.is_arch32() {
2856            vmar_info.len = (LOWER_4GB_LIMIT.ptr() - vmar_info.base) as usize;
2857        }
2858
2859        let (user_vmar, ptr) = root_vmar
2860            .allocate(
2861                0,
2862                vmar_info.len,
2863                zx::VmarFlags::SPECIFIC
2864                    | zx::VmarFlags::CAN_MAP_SPECIFIC
2865                    | zx::VmarFlags::CAN_MAP_READ
2866                    | zx::VmarFlags::CAN_MAP_WRITE
2867                    | zx::VmarFlags::CAN_MAP_EXECUTE,
2868            )
2869            .map_err(|status| from_status_like_fdio!(status))?;
2870        assert_eq!(ptr, vmar_info.base);
2871
2872        let user_vmar_info = user_vmar.info().map_err(|status| from_status_like_fdio!(status))?;
2873
2874        // Ensure that the `user_vmar_info` matches assumptions for the requested layout.
2875        debug_assert_eq!(RESTRICTED_ASPACE_BASE, user_vmar_info.base);
2876        if arch_width.is_arch32() {
2877            debug_assert_eq!(LOWER_4GB_LIMIT.ptr() - user_vmar_info.base, user_vmar_info.len);
2878        } else {
2879            debug_assert_eq!(RESTRICTED_ASPACE_SIZE, user_vmar_info.len);
2880        }
2881
2882        // The private anonymous backing memory object extend from the user address 0 up to the
2883        // highest mappable address. The pages below `user_vmar_info.base` are never mapped, but
2884        // including them in the memory object makes the math for mapping address to memory object
2885        // offsets simpler.
2886        let backing_size = (user_vmar_info.base + user_vmar_info.len) as u64;
2887
2888        // Place the stack at the end of the address space, subject to ASLR adjustment.
2889        let stack_origin = UserAddress::from_ptr(
2890            user_vmar_info.base + user_vmar_info.len
2891                - MAX_STACK_SIZE
2892                - generate_random_offset_for_aslr(arch_width),
2893        )
2894        .round_up(*PAGE_SIZE)?;
2895
2896        // Set the highest address that `mmap` will assign to the allocations that don't ask for a
2897        // specific address, subject to ASLR adjustment.
2898        let mmap_top = stack_origin
2899            .checked_sub(generate_random_offset_for_aslr(arch_width))
2900            .ok_or_else(|| errno!(EINVAL))?;
2901
2902        Ok(Arc::new(MemoryManager {
2903            base_addr: UserAddress::from_ptr(user_vmar_info.base),
2904            futex: Arc::<FutexTable<PrivateFutexKey>>::default(),
2905            state: RwLock::new(MemoryManagerState {
2906                user_vmar: user_vmar,
2907                user_vmar_info,
2908                mappings: Default::default(),
2909                private_anonymous: PrivateAnonymousMemoryManager::new(backing_size),
2910                userfaultfds: Default::default(),
2911                shadow_mappings_for_mlock: Default::default(),
2912                forkable_state: MemoryManagerForkableState {
2913                    executable_node,
2914                    stack_origin,
2915                    mmap_top,
2916                    ..Default::default()
2917                },
2918            }),
2919            // TODO(security): Reset to DISABLE, or the value in the fs.suid_dumpable sysctl, under
2920            // certain conditions as specified in the prctl(2) man page.
2921            dumpable: OrderedMutex::new(DumpPolicy::User),
2922            maximum_valid_user_address: UserAddress::from_ptr(
2923                user_vmar_info.base + user_vmar_info.len,
2924            ),
2925            inflight_vmspliced_payloads: Default::default(),
2926            drop_notifier: DropNotifier::default(),
2927        }))
2928    }
2929
2930    pub fn set_brk<L>(
2931        self: &Arc<Self>,
2932        locked: &mut Locked<L>,
2933        current_task: &CurrentTask,
2934        addr: UserAddress,
2935    ) -> Result<UserAddress, Errno>
2936    where
2937        L: LockBefore<ThreadGroupLimits>,
2938    {
2939        let mut state = self.state.write();
2940        let mut released_mappings = ReleasedMappings::default();
2941        let result = state.set_brk(locked, current_task, self, addr, &mut released_mappings);
2942        released_mappings.finalize(state);
2943        result
2944    }
2945
2946    pub fn register_uffd(&self, userfault: &Arc<UserFault>) {
2947        let mut state = self.state.write();
2948        state.userfaultfds.push(Arc::downgrade(userfault));
2949    }
2950
2951    /// Register a given memory range with a userfault object.
2952    pub fn register_with_uffd<L>(
2953        self: &Arc<Self>,
2954        locked: &mut Locked<L>,
2955        addr: UserAddress,
2956        length: usize,
2957        userfault: &Arc<UserFault>,
2958        mode: FaultRegisterMode,
2959    ) -> Result<(), Errno>
2960    where
2961        L: LockBefore<UserFaultInner>,
2962    {
2963        let mut state = self.state.write();
2964        let mut released_mappings = ReleasedMappings::default();
2965        let result =
2966            state.register_with_uffd(locked, addr, length, userfault, mode, &mut released_mappings);
2967        released_mappings.finalize(state);
2968        result
2969    }
2970
2971    /// Unregister a given range from any userfault objects associated with it.
2972    pub fn unregister_range_from_uffd<L>(
2973        &self,
2974        locked: &mut Locked<L>,
2975        userfault: &Arc<UserFault>,
2976        addr: UserAddress,
2977        length: usize,
2978    ) -> Result<(), Errno>
2979    where
2980        L: LockBefore<UserFaultInner>,
2981    {
2982        let mut state = self.state.write();
2983        let mut released_mappings = ReleasedMappings::default();
2984        let result = state.unregister_range_from_uffd(
2985            locked,
2986            userfault,
2987            addr,
2988            length,
2989            &mut released_mappings,
2990        );
2991        released_mappings.finalize(state);
2992        result
2993    }
2994
2995    /// Unregister any mappings registered with a given userfault object. Used when closing the last
2996    /// file descriptor associated to it.
2997    pub fn unregister_uffd<L>(&self, locked: &mut Locked<L>, userfault: &Arc<UserFault>)
2998    where
2999        L: LockBefore<UserFaultInner>,
3000    {
3001        let mut state = self.state.write();
3002        let mut released_mappings = ReleasedMappings::default();
3003        state.unregister_uffd(locked, userfault, &mut released_mappings);
3004        released_mappings.finalize(state);
3005    }
3006
3007    /// Populate a range of pages registered with an userfaulfd according to a `populate` function.
3008    /// This will fail if the pages were not registered with userfaultfd, or if the page at `addr`
3009    /// was already populated. If any page other than the first one was populated, the `length`
3010    /// is adjusted to only include the first N unpopulated pages, and this adjusted length
3011    /// is then passed to `populate`. On success, returns the number of populated bytes.
3012    pub fn populate_from_uffd<F, L>(
3013        &self,
3014        locked: &mut Locked<L>,
3015        addr: UserAddress,
3016        length: usize,
3017        userfault: &Arc<UserFault>,
3018        populate: F,
3019    ) -> Result<usize, Errno>
3020    where
3021        F: FnOnce(&MemoryManagerState, usize) -> Result<usize, Errno>,
3022        L: LockBefore<UserFaultInner>,
3023    {
3024        let state = self.state.read();
3025
3026        // Check that the addr..length range is a contiguous range of mappings which are all
3027        // registered with an userfault object.
3028        let mut bytes_registered_with_uffd = 0;
3029        for (mapping, len) in state.get_contiguous_mappings_at(addr, length)? {
3030            if mapping.flags().contains(MappingFlags::UFFD) {
3031                // Check that the mapping is registered with the same uffd. This is not required,
3032                // but we don't support cross-uffd operations yet.
3033                if !userfault.contains_addr(locked, addr) {
3034                    track_stub!(
3035                        TODO("https://fxbug.dev/391599171"),
3036                        "operations across different uffds"
3037                    );
3038                    return error!(ENOTSUP);
3039                };
3040            } else {
3041                return error!(ENOENT);
3042            }
3043            bytes_registered_with_uffd += len;
3044        }
3045        if bytes_registered_with_uffd != length {
3046            return error!(ENOENT);
3047        }
3048
3049        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
3050
3051        // Determine how many pages in the requested range are already populated
3052        let first_populated =
3053            userfault.get_first_populated_page_after(locked, addr).ok_or_else(|| errno!(ENOENT))?;
3054        // If the very first page is already populated, uffd operations should just return EEXIST
3055        if first_populated == addr {
3056            return error!(EEXIST);
3057        }
3058        // Otherwise it is possible to do an incomplete operation by only populating pages until
3059        // the first populated one.
3060        let trimmed_end = std::cmp::min(first_populated, end_addr);
3061        let effective_length = trimmed_end - addr;
3062
3063        populate(&state, effective_length)?;
3064        userfault.insert_pages(locked, addr..trimmed_end, true);
3065
3066        // Since we used protection bits to force pagefaults, we now need to reverse this change by
3067        // restoring the protections on the underlying Zircon mappings to the "real" protection bits
3068        // that were kept in the Starnix mappings. This will prevent new pagefaults from being
3069        // generated. Only do this on the pages that were populated by this operation.
3070        for (range, mapping) in state.mappings.range(addr..trimmed_end) {
3071            let range_to_protect = range.intersect(&(addr..trimmed_end));
3072            let restored_flags = mapping.flags().access_flags();
3073            let length = range_to_protect.end - range_to_protect.start;
3074            state
3075                .protect_vmar_range(range_to_protect.start, length, restored_flags)
3076                .expect("Failed to restore original protection bits on uffd-registered range");
3077        }
3078        // Return the number of effectively populated bytes, which might be smaller than the
3079        // requested number.
3080        Ok(effective_length)
3081    }
3082
3083    pub fn zero_from_uffd<L>(
3084        &self,
3085        locked: &mut Locked<L>,
3086        addr: UserAddress,
3087        length: usize,
3088        userfault: &Arc<UserFault>,
3089    ) -> Result<usize, Errno>
3090    where
3091        L: LockBefore<UserFaultInner>,
3092    {
3093        self.populate_from_uffd(locked, addr, length, userfault, |state, effective_length| {
3094            state.zero(addr, effective_length)
3095        })
3096    }
3097
3098    pub fn fill_from_uffd<L>(
3099        &self,
3100        locked: &mut Locked<L>,
3101        addr: UserAddress,
3102        buf: &[u8],
3103        length: usize,
3104        userfault: &Arc<UserFault>,
3105    ) -> Result<usize, Errno>
3106    where
3107        L: LockBefore<UserFaultInner>,
3108    {
3109        self.populate_from_uffd(locked, addr, length, userfault, |state, effective_length| {
3110            state.write_memory(addr, &buf[..effective_length])
3111        })
3112    }
3113
3114    pub fn copy_from_uffd<L>(
3115        &self,
3116        locked: &mut Locked<L>,
3117        source_addr: UserAddress,
3118        dst_addr: UserAddress,
3119        length: usize,
3120        userfault: &Arc<UserFault>,
3121    ) -> Result<usize, Errno>
3122    where
3123        L: LockBefore<UserFaultInner>,
3124    {
3125        self.populate_from_uffd(locked, dst_addr, length, userfault, |state, effective_length| {
3126            let mut buf = vec![std::mem::MaybeUninit::uninit(); effective_length];
3127            let buf = state.read_memory(source_addr, &mut buf)?;
3128            state.write_memory(dst_addr, &buf[..effective_length])
3129        })
3130    }
3131
3132    /// Returns the new `MemoryManager` for a process, pre-populated with a snapshot of the layout
3133    /// and mappings of `source_mm`.  This is used during `CurrentTask::clone()` operations to
3134    /// create the initial address-space for the cloned child process.
3135    pub fn snapshot_of<L>(
3136        locked: &mut Locked<L>,
3137        source_mm: &Arc<MemoryManager>,
3138        root_vmar: zx::Unowned<'_, zx::Vmar>,
3139        arch_width: ArchWidth,
3140    ) -> Result<Arc<Self>, Errno>
3141    where
3142        L: LockBefore<MmDumpable>,
3143    {
3144        trace_duration!(CATEGORY_STARNIX_MM, "snapshot_of");
3145        let target = MemoryManager::new(root_vmar, arch_width, source_mm.executable_node())?;
3146
3147        // Hold the lock throughout the operation to uphold memory manager's invariants.
3148        // See mm/README.md.
3149        {
3150            let state: &mut MemoryManagerState = &mut source_mm.state.write();
3151            let mut target_state = target.state.write();
3152            debug_assert_eq!(state.user_vmar_info, target_state.user_vmar_info);
3153
3154            let mut clone_cache = HashMap::<zx::Koid, Arc<MemoryObject>>::new();
3155
3156            let backing_size =
3157                (target_state.user_vmar_info.base + target_state.user_vmar_info.len) as u64;
3158            target_state.private_anonymous = state.private_anonymous.snapshot(backing_size)?;
3159
3160            for (range, mapping) in state.mappings.iter() {
3161                if mapping.flags().contains(MappingFlags::DONTFORK) {
3162                    continue;
3163                }
3164                // Locking is not inherited when forking.
3165                let target_mapping_flags = mapping.flags().difference(MappingFlags::LOCKED);
3166                match state.get_mapping_backing(mapping) {
3167                    MappingBacking::Memory(backing) => {
3168                        trace_duration!(CATEGORY_STARNIX_MM, "memory_backing_clone");
3169                        let memory_offset = backing.address_to_offset(range.start);
3170                        let length = range.end - range.start;
3171
3172                        let target_memory = if mapping.flags().contains(MappingFlags::SHARED)
3173                            || mapping.name().is_vvar()
3174                        {
3175                            // Note that the Vvar is a special mapping that behaves like a shared mapping but
3176                            // is private to each process.
3177                            backing.memory().clone()
3178                        } else if mapping.flags().contains(MappingFlags::WIPEONFORK) {
3179                            create_anonymous_mapping_memory(length as u64)?
3180                        } else {
3181                            let basic_info = backing.memory().basic_info();
3182                            let options = mapping.flags().options();
3183                            let memory =
3184                                clone_cache.entry(basic_info.koid).or_insert_with_fallible(
3185                                    || backing.memory().clone_memory(basic_info.rights, options),
3186                                )?;
3187                            memory.clone()
3188                        };
3189
3190                        let mut released_mappings = ReleasedMappings::default();
3191                        target_state.map_memory(
3192                            &target,
3193                            DesiredAddress::Fixed(range.start),
3194                            target_memory,
3195                            memory_offset,
3196                            length,
3197                            target_mapping_flags,
3198                            mapping.max_access(),
3199                            false,
3200                            mapping.name().to_owned(),
3201                            &mut released_mappings,
3202                        )?;
3203                        assert!(
3204                            released_mappings.is_empty(),
3205                            "target mm must be empty when cloning, got {released_mappings:#?}"
3206                        );
3207                    }
3208                    MappingBacking::PrivateAnonymous => {
3209                        trace_duration!(CATEGORY_STARNIX_MM, "private_anonymous_backing_clone");
3210                        let length = range.end - range.start;
3211                        if mapping.flags().contains(MappingFlags::WIPEONFORK) {
3212                            target_state
3213                                .private_anonymous
3214                                .zero(range.start, length)
3215                                .map_err(|_| errno!(ENOMEM))?;
3216                        }
3217
3218                        let target_memory_offset = range.start.ptr() as u64;
3219                        target_state.map_in_user_vmar(
3220                            SelectedAddress::FixedOverwrite(range.start),
3221                            &target_state.private_anonymous.backing,
3222                            target_memory_offset,
3223                            length,
3224                            target_mapping_flags,
3225                            false,
3226                        )?;
3227                        let removed_mappings = target_state.mappings.insert(
3228                            range.clone(),
3229                            Mapping::new_private_anonymous(
3230                                target_mapping_flags,
3231                                mapping.name().to_owned(),
3232                            ),
3233                        );
3234                        assert!(
3235                            removed_mappings.is_empty(),
3236                            "target mm must be empty when cloning, got {removed_mappings:#?}"
3237                        );
3238                    }
3239                };
3240            }
3241
3242            target_state.forkable_state = state.forkable_state.clone();
3243        }
3244
3245        let self_dumpable = *source_mm.dumpable.lock(locked);
3246        *target.dumpable.lock(locked) = self_dumpable;
3247
3248        Ok(target)
3249    }
3250
3251    /// Returns the replacement `MemoryManager` to be used by the `exec()`ing task.
3252    ///
3253    /// POSIX requires that "a call to any exec function from a process with more than one thread
3254    /// shall result in all threads being terminated and the new executable being loaded and
3255    /// executed. No destructor functions or cleanup handlers shall be called".
3256    /// The caller is responsible for having ensured that this is the only `Task` in the
3257    /// `ThreadGroup`, and thereby the `zx::process`, such that it is safe to tear-down the Zircon
3258    /// userspace VMAR for the current address-space.
3259    pub fn exec(
3260        root_vmar: zx::Unowned<'_, zx::Vmar>,
3261        old_mm: Option<Arc<Self>>,
3262        exe_node: NamespaceNode,
3263        arch_width: ArchWidth,
3264    ) -> Result<Arc<Self>, Errno> {
3265        // To safeguard against concurrent accesses by other tasks through this `MemoryManager`, the
3266        // following steps are performed while holding the write lock on the old MM, if any:
3267        //
3268        // 1. All `mappings` are removed, so that remote `MemoryAccessor` calls will fail.
3269        // 2. The `user_vmar` is `destroy()`ed to free-up the user address-space.
3270        //
3271        // Once these steps are complete it is safe for the old mappings to be dropped.
3272        if let Some(old_mm) = old_mm {
3273            let _old_mappings = {
3274                let mut state = old_mm.state.write();
3275
3276                // SAFETY: This operation is safe because this is the only `Task` active in the address-
3277                // space, and accesses by remote tasks will use syscalls on the `root_vmar`.
3278                unsafe {
3279                    state.user_vmar.destroy().map_err(|status| from_status_like_fdio!(status))?
3280                }
3281                state.user_vmar = zx::Vmar::invalid();
3282
3283                std::mem::replace(&mut state.mappings, Default::default())
3284            };
3285        }
3286
3287        Self::new(root_vmar, arch_width, Some(exe_node))
3288    }
3289
3290    pub fn initialize_brk_origin(
3291        &self,
3292        arch_width: ArchWidth,
3293        executable_end: UserAddress,
3294    ) -> Result<(), Errno> {
3295        self.state.write().brk_origin = executable_end
3296            .checked_add(generate_random_offset_for_aslr(arch_width))
3297            .ok_or_else(|| errno!(EINVAL))?;
3298        Ok(())
3299    }
3300
3301    // Get a randomised address for loading a position-independent executable.
3302    pub fn get_random_base_for_executable(
3303        &self,
3304        arch_width: ArchWidth,
3305        length: usize,
3306    ) -> Result<UserAddress, Errno> {
3307        let state = self.state.read();
3308
3309        // Place it at approx. 2/3 of the available mmap space, subject to ASLR adjustment.
3310        let base = round_up_to_system_page_size(2 * state.mmap_top.ptr() / 3).unwrap()
3311            + generate_random_offset_for_aslr(arch_width);
3312        if base.checked_add(length).ok_or_else(|| errno!(EINVAL))? <= state.mmap_top.ptr() {
3313            Ok(UserAddress::from_ptr(base))
3314        } else {
3315            error!(EINVAL)
3316        }
3317    }
3318    pub fn executable_node(&self) -> Option<NamespaceNode> {
3319        self.state.read().executable_node.clone()
3320    }
3321
3322    #[track_caller]
3323    pub fn get_errno_for_map_err(status: zx::Status) -> Errno {
3324        match status {
3325            zx::Status::INVALID_ARGS => errno!(EINVAL),
3326            zx::Status::ACCESS_DENIED => errno!(EPERM),
3327            zx::Status::NOT_SUPPORTED => errno!(ENODEV),
3328            zx::Status::NO_MEMORY => errno!(ENOMEM),
3329            zx::Status::NO_RESOURCES => errno!(ENOMEM),
3330            zx::Status::OUT_OF_RANGE => errno!(ENOMEM),
3331            zx::Status::ALREADY_EXISTS => errno!(EEXIST),
3332            zx::Status::BAD_STATE => errno!(EINVAL),
3333            _ => impossible_error(status),
3334        }
3335    }
3336
3337    #[track_caller]
3338    pub fn get_errno_for_vmo_err(status: zx::Status) -> Errno {
3339        match status {
3340            zx::Status::NO_MEMORY => errno!(ENOMEM),
3341            zx::Status::ACCESS_DENIED => errno!(EPERM),
3342            zx::Status::NOT_SUPPORTED => errno!(EIO),
3343            zx::Status::BAD_STATE => errno!(EIO),
3344            _ => return impossible_error(status),
3345        }
3346    }
3347
3348    pub fn map_memory(
3349        self: &Arc<Self>,
3350        addr: DesiredAddress,
3351        memory: Arc<MemoryObject>,
3352        memory_offset: u64,
3353        length: usize,
3354        prot_flags: ProtectionFlags,
3355        max_access: Access,
3356        options: MappingOptions,
3357        name: MappingName,
3358    ) -> Result<UserAddress, Errno> {
3359        let flags = MappingFlags::from_access_flags_and_options(prot_flags, options);
3360
3361        // Unmapped mappings must be released after the state is unlocked.
3362        let mut released_mappings = ReleasedMappings::default();
3363        // Hold the lock throughout the operation to uphold memory manager's invariants.
3364        // See mm/README.md.
3365        let mut state = self.state.write();
3366        let result = state.map_memory(
3367            self,
3368            addr,
3369            memory,
3370            memory_offset,
3371            length,
3372            flags,
3373            max_access,
3374            options.contains(MappingOptions::POPULATE),
3375            name,
3376            &mut released_mappings,
3377        );
3378
3379        // Drop the state before the unmapped mappings, since dropping a mapping may acquire a lock
3380        // in `DirEntry`'s `drop`.
3381        released_mappings.finalize(state);
3382
3383        result
3384    }
3385
3386    pub fn map_anonymous(
3387        self: &Arc<Self>,
3388        addr: DesiredAddress,
3389        length: usize,
3390        prot_flags: ProtectionFlags,
3391        options: MappingOptions,
3392        name: MappingName,
3393    ) -> Result<UserAddress, Errno> {
3394        let mut released_mappings = ReleasedMappings::default();
3395        // Hold the lock throughout the operation to uphold memory manager's invariants.
3396        // See mm/README.md.
3397        let mut state = self.state.write();
3398        let result = state.map_anonymous(
3399            self,
3400            addr,
3401            length,
3402            prot_flags,
3403            options,
3404            name,
3405            &mut released_mappings,
3406        );
3407
3408        released_mappings.finalize(state);
3409
3410        result
3411    }
3412
3413    /// Map the stack into a pre-selected address region
3414    pub fn map_stack(
3415        self: &Arc<Self>,
3416        length: usize,
3417        prot_flags: ProtectionFlags,
3418    ) -> Result<UserAddress, Errno> {
3419        assert!(length <= MAX_STACK_SIZE);
3420        let addr = self.state.read().stack_origin;
3421        // The address range containing stack_origin should normally be available: it's above the
3422        // mmap_top, and this method is called early enough in the process lifetime that only the
3423        // main ELF and the interpreter are already loaded. However, in the rare case that the
3424        // static position-independent executable is overlapping the chosen address, mapping as Hint
3425        // will make mmap choose a new place for it.
3426        // TODO(https://fxbug.dev/370027241): Consider a more robust approach
3427        let stack_addr = self.map_anonymous(
3428            DesiredAddress::Hint(addr),
3429            length,
3430            prot_flags,
3431            MappingOptions::ANONYMOUS | MappingOptions::GROWSDOWN,
3432            MappingName::Stack,
3433        )?;
3434        if stack_addr != addr {
3435            log_warn!(
3436                "An address designated for stack ({}) was unavailable, mapping at {} instead.",
3437                addr,
3438                stack_addr
3439            );
3440        }
3441        Ok(stack_addr)
3442    }
3443
3444    pub fn remap(
3445        self: &Arc<Self>,
3446        current_task: &CurrentTask,
3447        addr: UserAddress,
3448        old_length: usize,
3449        new_length: usize,
3450        flags: MremapFlags,
3451        new_addr: UserAddress,
3452    ) -> Result<UserAddress, Errno> {
3453        let mut released_mappings = ReleasedMappings::default();
3454        // Hold the lock throughout the operation to uphold memory manager's invariants.
3455        // See mm/README.md.
3456        let mut state = self.state.write();
3457        let result = state.remap(
3458            current_task,
3459            self,
3460            addr,
3461            old_length,
3462            new_length,
3463            flags,
3464            new_addr,
3465            &mut released_mappings,
3466        );
3467
3468        released_mappings.finalize(state);
3469
3470        result
3471    }
3472
3473    pub fn unmap(self: &Arc<Self>, addr: UserAddress, length: usize) -> Result<(), Errno> {
3474        let mut released_mappings = ReleasedMappings::default();
3475        // Hold the lock throughout the operation to uphold memory manager's invariants.
3476        // See mm/README.md.
3477        let mut state = self.state.write();
3478        let result = state.unmap(self, addr, length, &mut released_mappings);
3479
3480        released_mappings.finalize(state);
3481
3482        result
3483    }
3484
3485    pub fn protect(
3486        &self,
3487        current_task: &CurrentTask,
3488        addr: UserAddress,
3489        length: usize,
3490        prot_flags: ProtectionFlags,
3491    ) -> Result<(), Errno> {
3492        // Hold the lock throughout the operation to uphold memory manager's invariants.
3493        // See mm/README.md.
3494        let mut state = self.state.write();
3495        let mut released_mappings = ReleasedMappings::default();
3496        let result = state.protect(current_task, addr, length, prot_flags, &mut released_mappings);
3497        released_mappings.finalize(state);
3498        result
3499    }
3500
3501    pub fn msync(
3502        &self,
3503        _locked: &mut Locked<Unlocked>,
3504        current_task: &CurrentTask,
3505        addr: UserAddress,
3506        length: usize,
3507        flags: MsyncFlags,
3508    ) -> Result<(), Errno> {
3509        // According to POSIX, either MS_SYNC or MS_ASYNC must be specified in flags,
3510        // and indeed failure to include one of these flags will cause msync() to fail
3511        // on some systems.  However, Linux permits a call to msync() that specifies
3512        // neither of these flags, with semantics that are (currently) equivalent to
3513        // specifying MS_ASYNC.
3514
3515        // Both MS_SYNC and MS_ASYNC are set in flags
3516        if flags.contains(MsyncFlags::ASYNC) && flags.contains(MsyncFlags::SYNC) {
3517            return error!(EINVAL);
3518        }
3519
3520        if !addr.is_aligned(*PAGE_SIZE) {
3521            return error!(EINVAL);
3522        }
3523
3524        // We collect the nodes to sync first, release the memory manager lock, and then sync them.
3525        // This avoids holding the lock during blocking I/O operations (sync), which prevents
3526        // stalling other memory operations and avoids potential deadlocks.
3527        // It also allows us to deduplicate nodes, avoiding redundant sync calls for the same file.
3528        let mut nodes_to_sync = {
3529            let mm_state = self.state.read();
3530
3531            let length_rounded = round_up_to_system_page_size(length)?;
3532            let end_addr = addr.checked_add(length_rounded).ok_or_else(|| errno!(EINVAL))?;
3533
3534            let mut last_end = addr;
3535            let mut nodes = vec![];
3536            for (range, mapping) in mm_state.mappings.range(addr..end_addr) {
3537                // Check if there is a gap between the last mapped address and the current mapping.
3538                // msync requires the entire range to be mapped, so any gap results in ENOMEM.
3539                if range.start > last_end {
3540                    return error!(ENOMEM);
3541                }
3542                last_end = range.end;
3543
3544                if flags.contains(MsyncFlags::INVALIDATE)
3545                    && mapping.flags().contains(MappingFlags::LOCKED)
3546                {
3547                    return error!(EBUSY);
3548                }
3549
3550                if flags.contains(MsyncFlags::SYNC) {
3551                    if let MappingNameRef::File(file_mapping) = mapping.name() {
3552                        nodes.push(file_mapping.name.entry.node.clone());
3553                    }
3554                }
3555            }
3556            if last_end < end_addr {
3557                return error!(ENOMEM);
3558            }
3559            nodes
3560        };
3561
3562        // Deduplicate nodes to avoid redundant sync calls.
3563        nodes_to_sync.sort_by_key(|n| Arc::as_ptr(n) as usize);
3564        nodes_to_sync.dedup_by(|a, b| Arc::ptr_eq(a, b));
3565
3566        for node in nodes_to_sync {
3567            // Range-based sync is non-trivial for Fxfs to support due to its complicated
3568            // reservation system (b/322874588#comment5). Naive range-based sync could exhaust
3569            // space reservations if called page-by-page, as transaction costs are based on the
3570            // number of dirty pages rather than file ranges. We use whole-file sync for now
3571            // to ensure data durability without adding excessive complexity.
3572            node.ops().sync(&node, current_task)?;
3573        }
3574        Ok(())
3575    }
3576
3577    pub fn madvise(
3578        &self,
3579        current_task: &CurrentTask,
3580        addr: UserAddress,
3581        length: usize,
3582        advice: u32,
3583    ) -> Result<(), Errno> {
3584        let mut state = self.state.write();
3585        let mut released_mappings = ReleasedMappings::default();
3586        let result = state.madvise(current_task, addr, length, advice, &mut released_mappings);
3587        released_mappings.finalize(state);
3588        result
3589    }
3590
3591    pub fn mlock<L>(
3592        &self,
3593        current_task: &CurrentTask,
3594        locked: &mut Locked<L>,
3595        desired_addr: UserAddress,
3596        desired_length: usize,
3597        on_fault: bool,
3598    ) -> Result<(), Errno>
3599    where
3600        L: LockBefore<ThreadGroupLimits>,
3601    {
3602        let mut state = self.state.write();
3603        let mut released_mappings = ReleasedMappings::default();
3604        let result = state.mlock(
3605            current_task,
3606            locked,
3607            desired_addr,
3608            desired_length,
3609            on_fault,
3610            &mut released_mappings,
3611        );
3612        released_mappings.finalize(state);
3613        result
3614    }
3615
3616    pub fn munlock(
3617        &self,
3618        current_task: &CurrentTask,
3619        desired_addr: UserAddress,
3620        desired_length: usize,
3621    ) -> Result<(), Errno> {
3622        let mut state = self.state.write();
3623        let mut released_mappings = ReleasedMappings::default();
3624        let result =
3625            state.munlock(current_task, desired_addr, desired_length, &mut released_mappings);
3626        released_mappings.finalize(state);
3627        result
3628    }
3629
3630    pub fn log_memory_map(&self, task: &Task, fault_address: UserAddress) {
3631        let state = self.state.read();
3632        log_warn!("Memory map for pid={}:", task.thread_group.leader);
3633        let mut last_end = UserAddress::from_ptr(0);
3634        for (range, map) in state.mappings.iter() {
3635            if fault_address >= last_end && fault_address < range.start {
3636                log_warn!("{:08x} <= FAULT", fault_address.ptr());
3637            }
3638
3639            let perms = format!(
3640                "{}{}{}{}",
3641                if map.can_read() { 'r' } else { '-' },
3642                if map.can_write() { 'w' } else { '-' },
3643                if map.can_exec() { 'x' } else { '-' },
3644                if map.flags().contains(MappingFlags::SHARED) { 's' } else { 'p' }
3645            );
3646
3647            let backing = match state.get_mapping_backing(map) {
3648                MappingBacking::Memory(backing) => backing.address_to_offset(range.start),
3649                MappingBacking::PrivateAnonymous => 0,
3650            };
3651
3652            let name_str = match &map.name() {
3653                MappingNameRef::File(file) => {
3654                    let Ok(live) = task.live() else {
3655                        log_warn!("Task {} is not live", task.get_tid());
3656                        continue;
3657                    };
3658                    String::from_utf8_lossy(&file.name.path(&live.fs())).into_owned()
3659                }
3660                MappingNameRef::None | MappingNameRef::AioContext(_) => {
3661                    if map.flags().contains(MappingFlags::SHARED)
3662                        && map.flags().contains(MappingFlags::ANONYMOUS)
3663                    {
3664                        "/dev/zero (deleted)".to_string()
3665                    } else {
3666                        "".to_string()
3667                    }
3668                }
3669                MappingNameRef::Stack => "[stack]".to_string(),
3670                MappingNameRef::Heap => "[heap]".to_string(),
3671                MappingNameRef::Vdso => "[vdso]".to_string(),
3672                MappingNameRef::Vvar => "[vvar]".to_string(),
3673                _ => format!("{:?}", map.name()),
3674            };
3675
3676            let fault_marker = if range.contains(&fault_address) { " <= FAULT" } else { "" };
3677
3678            log_warn!(
3679                "{:08x}-{:08x} {} {:08x} {}{}",
3680                range.start.ptr(),
3681                range.end.ptr(),
3682                perms,
3683                backing,
3684                name_str,
3685                fault_marker
3686            );
3687            last_end = range.end;
3688        }
3689
3690        if fault_address >= last_end {
3691            log_warn!("{:08x} <= FAULT", fault_address.ptr());
3692        }
3693    }
3694
3695    pub fn handle_page_fault(
3696        self: &Arc<Self>,
3697        locked: &mut Locked<Unlocked>,
3698        decoded: PageFaultExceptionReport,
3699        error_code: zx::Status,
3700    ) -> ExceptionResult {
3701        let addr = UserAddress::from(decoded.faulting_address);
3702        // On uffd-registered range, handle according to the uffd rules
3703        if error_code == zx::Status::ACCESS_DENIED {
3704            let state = self.state.write();
3705            if let Some((_, mapping)) = state.mappings.get(addr) {
3706                if mapping.flags().contains(MappingFlags::UFFD) {
3707                    // TODO(https://fxbug.dev/391599171): Support other modes
3708                    assert!(mapping.flags().contains(MappingFlags::UFFD_MISSING));
3709
3710                    if let Some(_uffd) = state.find_uffd(locked, addr) {
3711                        // If the SIGBUS feature was set, no event will be sent to the file.
3712                        // Instead, SIGBUS is delivered to the process that triggered the fault.
3713                        // TODO(https://fxbug.dev/391599171): For now we only support this feature,
3714                        // so we assume it is set.
3715                        // Check for the SIGBUS feature when we start supporting running without it.
3716                        return ExceptionResult::Signal(SignalInfo::with_detail(
3717                            SIGBUS,
3718                            BUS_ADRERR as i32,
3719                            SignalDetail::SigFault { addr: decoded.faulting_address },
3720                        ));
3721                    };
3722                }
3723                let exec_denied = decoded.is_execute && !mapping.can_exec();
3724                let write_denied = decoded.is_write && !mapping.can_write();
3725                let read_denied = (!decoded.is_execute && !decoded.is_write) && !mapping.can_read();
3726                // There is a data race resulting from uffd unregistration and page fault happening
3727                // at the same time. To detect it, we check if the access was meant to be rejected
3728                // according to Starnix own information about the mapping.
3729                let false_reject = !exec_denied && !write_denied && !read_denied;
3730                if false_reject {
3731                    track_stub!(
3732                        TODO("https://fxbug.dev/435171399"),
3733                        "Inconsistent permission fault"
3734                    );
3735                    return ExceptionResult::Handled;
3736                }
3737            }
3738            std::mem::drop(state);
3739        }
3740
3741        if decoded.not_present {
3742            // A page fault may be resolved by extending a growsdown mapping to cover the faulting
3743            // address. Mark the exception handled if so. Otherwise let the regular handling proceed.
3744
3745            // We should only attempt growth on a not-present fault and we should only extend if the
3746            // access type matches the protection on the GROWSDOWN mapping.
3747            match self.extend_growsdown_mapping_to_address(
3748                UserAddress::from(decoded.faulting_address),
3749                decoded.is_write,
3750            ) {
3751                Ok(true) => {
3752                    return ExceptionResult::Handled;
3753                }
3754                Err(e) => {
3755                    log_warn!("Error handling page fault: {e}")
3756                }
3757                _ => {}
3758            }
3759        }
3760        // For this exception type, the synth_code field in the exception report's context is the
3761        // error generated by the page fault handler. For us this is used to distinguish between a
3762        // segmentation violation and a bus error. Unfortunately this detail is not documented in
3763        // Zircon's public documentation and is only described in the architecture-specific
3764        // exception definitions such as:
3765        // zircon/kernel/arch/x86/include/arch/x86.h
3766        // zircon/kernel/arch/arm64/include/arch/arm64.h
3767        let signo = match error_code {
3768            zx::Status::OUT_OF_RANGE => SIGBUS,
3769            _ => SIGSEGV,
3770        };
3771        ExceptionResult::Signal(SignalInfo::with_detail(
3772            signo,
3773            SI_KERNEL as i32,
3774            SignalDetail::SigFault { addr: decoded.faulting_address },
3775        ))
3776    }
3777
3778    pub fn set_mapping_name(
3779        &self,
3780        addr: UserAddress,
3781        length: usize,
3782        name: Option<FsString>,
3783    ) -> Result<(), Errno> {
3784        let mut state = self.state.write();
3785        let mut released_mappings = ReleasedMappings::default();
3786        let result = state.set_mapping_name(addr, length, name, &mut released_mappings);
3787        released_mappings.finalize(state);
3788        result
3789    }
3790
3791    /// Returns [`Ok`] if the entire range specified by `addr..(addr+length)` contains valid
3792    /// mappings.
3793    ///
3794    /// # Errors
3795    ///
3796    /// Returns [`Err(errno)`] where `errno` is:
3797    ///
3798    ///   - `EINVAL`: `addr` is not page-aligned, or the range is too large,
3799    ///   - `ENOMEM`: one or more pages in the range are not mapped.
3800    pub fn ensure_mapped(&self, addr: UserAddress, length: usize) -> Result<(), Errno> {
3801        if !addr.is_aligned(*PAGE_SIZE) {
3802            return error!(EINVAL);
3803        }
3804
3805        let length = round_up_to_system_page_size(length)?;
3806        let end_addr = addr.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
3807        let state = self.state.read();
3808        let mut last_end = addr;
3809        for (range, _) in state.mappings.range(addr..end_addr) {
3810            if range.start > last_end {
3811                // This mapping does not start immediately after the last.
3812                return error!(ENOMEM);
3813            }
3814            last_end = range.end;
3815        }
3816        if last_end < end_addr {
3817            // There is a gap of no mappings at the end of the range.
3818            error!(ENOMEM)
3819        } else {
3820            Ok(())
3821        }
3822    }
3823
3824    /// Returns the memory object mapped at the address and the offset into the memory object of
3825    /// the address. Intended for implementing futexes.
3826    pub fn get_mapping_memory(
3827        &self,
3828        addr: UserAddress,
3829        perms: ProtectionFlags,
3830    ) -> Result<(Arc<MemoryObject>, u64), Errno> {
3831        let state = self.state.read();
3832        let (_, mapping) = state.mappings.get(addr).ok_or_else(|| errno!(EFAULT))?;
3833        if !mapping.flags().access_flags().contains(perms) {
3834            return error!(EACCES);
3835        }
3836        match state.get_mapping_backing(mapping) {
3837            MappingBacking::Memory(backing) => {
3838                Ok((Arc::clone(backing.memory()), mapping.address_to_offset(addr)))
3839            }
3840            MappingBacking::PrivateAnonymous => {
3841                Ok((Arc::clone(&state.private_anonymous.backing), addr.ptr() as u64))
3842            }
3843        }
3844    }
3845
3846    /// Does a rough check that the given address is plausibly in the address space of the
3847    /// application. This does not mean the pointer is valid for any particular purpose or that
3848    /// it will remain so!
3849    ///
3850    /// In some syscalls, Linux seems to do some initial validation of the pointer up front to
3851    /// tell the caller early if it's invalid. For example, in epoll_wait() it's returning a vector
3852    /// of events. If the caller passes an invalid pointer, it wants to fail without dropping any
3853    /// events. Failing later when actually copying the required events to userspace would mean
3854    /// those events will be lost. But holding a lock on the memory manager for an asynchronous
3855    /// wait is not desirable.
3856    ///
3857    /// Testing shows that Linux seems to do some initial plausibility checking of the pointer to
3858    /// be able to report common usage errors before doing any (possibly unreversable) work. This
3859    /// checking is easy to get around if you try, so this function is also not required to
3860    /// be particularly robust. Certainly the more advanced cases of races (the memory could be
3861    /// unmapped after this call but before it's used) are not handled.
3862    ///
3863    /// The buffer_size variable is the size of the data structure that needs to fit
3864    /// in the given memory.
3865    ///
3866    /// Returns the error EFAULT if invalid.
3867    pub fn check_plausible(&self, addr: UserAddress, buffer_size: usize) -> Result<(), Errno> {
3868        let state = self.state.read();
3869
3870        if let Some(range) = state.mappings.last_range() {
3871            if (range.end - buffer_size)? >= addr {
3872                return Ok(());
3873            }
3874        }
3875        error!(EFAULT)
3876    }
3877
3878    pub fn get_aio_context(&self, addr: UserAddress) -> Option<Arc<AioContext>> {
3879        let state = self.state.read();
3880        state.get_aio_context(addr).map(|(_, aio_context)| aio_context)
3881    }
3882
3883    pub fn destroy_aio_context(
3884        self: &Arc<Self>,
3885        addr: UserAddress,
3886    ) -> Result<Arc<AioContext>, Errno> {
3887        let mut released_mappings = ReleasedMappings::default();
3888
3889        // Hold the lock throughout the operation to uphold memory manager's invariants.
3890        // See mm/README.md.
3891        let mut state = self.state.write();
3892
3893        // Validate that this address actually has an AioContext. We need to hold the state lock
3894        // until we actually remove the mappings to ensure that another thread does not manipulate
3895        // the mappings after we've validated that they contain an AioContext.
3896        let Some((range, aio_context)) = state.get_aio_context(addr) else {
3897            return error!(EINVAL);
3898        };
3899
3900        let length = range.end - range.start;
3901        let result = state.unmap(self, range.start, length, &mut released_mappings);
3902
3903        released_mappings.finalize(state);
3904
3905        result.map(|_| aio_context)
3906    }
3907
3908    #[cfg(test)]
3909    pub fn get_mapping_name(
3910        &self,
3911        addr: UserAddress,
3912    ) -> Result<Option<flyweights::FlyByteStr>, Errno> {
3913        let state = self.state.read();
3914        let (_, mapping) = state.mappings.get(addr).ok_or_else(|| errno!(EFAULT))?;
3915        if let MappingNameRef::Vma(name) = mapping.name() {
3916            Ok(Some(name.clone()))
3917        } else {
3918            Ok(None)
3919        }
3920    }
3921
3922    #[cfg(test)]
3923    pub fn get_mapping_count(&self) -> usize {
3924        let state = self.state.read();
3925        state.mappings.iter().count()
3926    }
3927
3928    pub fn extend_growsdown_mapping_to_address(
3929        self: &Arc<Self>,
3930        addr: UserAddress,
3931        is_write: bool,
3932    ) -> Result<bool, Error> {
3933        self.state.write().extend_growsdown_mapping_to_address(self, addr, is_write)
3934    }
3935
3936    pub fn get_stats(&self, current_task: &CurrentTask) -> MemoryStats {
3937        // Grab our state lock before reading zircon mappings so that the two are consistent.
3938        // Other Starnix threads should not make any changes to the Zircon mappings while we hold
3939        // a read lock to the memory manager state.
3940        let state = self.state.read();
3941
3942        let mut stats = MemoryStats::default();
3943        stats.vm_stack = state.stack_size;
3944
3945        state.with_zx_mappings(current_task, |zx_mappings| {
3946            for zx_mapping in zx_mappings {
3947                // We only care about map info for actual mappings.
3948                let zx_details = zx_mapping.details();
3949                let Some(zx_details) = zx_details.as_mapping() else { continue };
3950                let user_address = UserAddress::from(zx_mapping.base as u64);
3951                let (_, mm_mapping) = state
3952                    .mappings
3953                    .get(user_address)
3954                    .unwrap_or_else(|| panic!("mapping bookkeeping must be consistent with zircon's: not found: {user_address:?}"));
3955                debug_assert_eq!(
3956                    match state.get_mapping_backing(mm_mapping) {
3957                        MappingBacking::Memory(m)=>m.memory().get_koid(),
3958                        MappingBacking::PrivateAnonymous=>state.private_anonymous.backing.get_koid(),
3959                    },
3960                    zx_details.vmo_koid,
3961                    "MemoryManager and Zircon must agree on which VMO is mapped in this range",
3962                );
3963
3964                stats.vm_size += zx_mapping.size;
3965
3966                stats.vm_rss += zx_details.committed_bytes;
3967                stats.vm_swap += zx_details.populated_bytes - zx_details.committed_bytes;
3968
3969                if mm_mapping.flags().contains(MappingFlags::SHARED) {
3970                    stats.rss_shared += zx_details.committed_bytes;
3971                } else if mm_mapping.flags().contains(MappingFlags::ANONYMOUS) {
3972                    stats.rss_anonymous += zx_details.committed_bytes;
3973                } else if mm_mapping.name().is_file() {
3974                    stats.rss_file += zx_details.committed_bytes;
3975                }
3976
3977                if mm_mapping.flags().contains(MappingFlags::LOCKED) {
3978                    stats.vm_lck += zx_details.committed_bytes;
3979                }
3980
3981                if mm_mapping.flags().contains(MappingFlags::ELF_BINARY)
3982                    && mm_mapping.flags().contains(MappingFlags::WRITE)
3983                {
3984                    stats.vm_data += zx_mapping.size;
3985                }
3986
3987                if mm_mapping.flags().contains(MappingFlags::ELF_BINARY)
3988                    && mm_mapping.flags().contains(MappingFlags::EXEC)
3989                {
3990                    stats.vm_exe += zx_mapping.size;
3991                }
3992            }
3993        });
3994
3995        // TODO(https://fxbug.dev/396221597): Placeholder for now. We need kernel support to track
3996        // the committed bytes high water mark.
3997        stats.vm_rss_hwm = STUB_VM_RSS_HWM;
3998        stats
3999    }
4000
4001    pub fn atomic_load_u32_acquire(&self, futex_addr: FutexAddress) -> Result<u32, Errno> {
4002        if let Some(usercopy) = usercopy() {
4003            usercopy.atomic_load_u32_acquire(futex_addr.ptr()).map_err(|_| errno!(EFAULT))
4004        } else {
4005            unreachable!("can only control memory ordering of atomics with usercopy");
4006        }
4007    }
4008
4009    pub fn atomic_load_u32_relaxed(&self, futex_addr: FutexAddress) -> Result<u32, Errno> {
4010        if let Some(usercopy) = usercopy() {
4011            usercopy.atomic_load_u32_relaxed(futex_addr.ptr()).map_err(|_| errno!(EFAULT))
4012        } else {
4013            // SAFETY: `self.state.read().read_memory` only returns `Ok` if all
4014            // bytes were read to.
4015            let buf = unsafe {
4016                read_to_array(|buf| {
4017                    self.state.read().read_memory(futex_addr.into(), buf).map(|bytes_read| {
4018                        debug_assert_eq!(bytes_read.len(), std::mem::size_of::<u32>())
4019                    })
4020                })
4021            }?;
4022            Ok(u32::from_ne_bytes(buf))
4023        }
4024    }
4025
4026    pub fn atomic_store_u32_relaxed(
4027        &self,
4028        futex_addr: FutexAddress,
4029        value: u32,
4030    ) -> Result<(), Errno> {
4031        if let Some(usercopy) = usercopy() {
4032            usercopy.atomic_store_u32_relaxed(futex_addr.ptr(), value).map_err(|_| errno!(EFAULT))
4033        } else {
4034            self.state.read().write_memory(futex_addr.into(), value.as_bytes())?;
4035            Ok(())
4036        }
4037    }
4038
4039    pub fn atomic_compare_exchange_u32_acq_rel(
4040        &self,
4041        futex_addr: FutexAddress,
4042        current: u32,
4043        new: u32,
4044    ) -> CompareExchangeResult<u32> {
4045        let Some(usercopy) = usercopy() else {
4046            unreachable!("Atomic compare/exchange requires usercopy.");
4047        };
4048        CompareExchangeResult::from_usercopy(usercopy.atomic_compare_exchange_u32_acq_rel(
4049            futex_addr.ptr(),
4050            current,
4051            new,
4052        ))
4053    }
4054
4055    pub fn atomic_compare_exchange_weak_u32_acq_rel(
4056        &self,
4057        futex_addr: FutexAddress,
4058        current: u32,
4059        new: u32,
4060    ) -> CompareExchangeResult<u32> {
4061        let Some(usercopy) = usercopy() else {
4062            unreachable!("Atomic compare/exchange requires usercopy.");
4063        };
4064        CompareExchangeResult::from_usercopy(usercopy.atomic_compare_exchange_weak_u32_acq_rel(
4065            futex_addr.ptr(),
4066            current,
4067            new,
4068        ))
4069    }
4070}
4071
4072/// The result of an atomic compare/exchange operation on user memory.
4073#[derive(Debug, Clone)]
4074pub enum CompareExchangeResult<T> {
4075    /// The current value provided matched the one observed in memory and the new value provided
4076    /// was written.
4077    Success,
4078    /// The provided current value did not match the current value in memory.
4079    Stale { observed: T },
4080    /// There was a general error while accessing the requested memory.
4081    Error(Errno),
4082}
4083
4084impl<T> CompareExchangeResult<T> {
4085    fn from_usercopy(usercopy_res: Result<Result<T, T>, ()>) -> Self {
4086        match usercopy_res {
4087            Ok(Ok(_)) => Self::Success,
4088            Ok(Err(observed)) => Self::Stale { observed },
4089            Err(()) => Self::Error(errno!(EFAULT)),
4090        }
4091    }
4092}
4093
4094impl<T> From<Errno> for CompareExchangeResult<T> {
4095    fn from(e: Errno) -> Self {
4096        Self::Error(e)
4097    }
4098}
4099
4100/// The user-space address at which a mapping should be placed. Used by [`MemoryManager::map`].
4101#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4102pub enum DesiredAddress {
4103    /// Map at any address chosen by the kernel.
4104    Any,
4105    /// The address is a hint. If the address overlaps an existing mapping a different address may
4106    /// be chosen.
4107    Hint(UserAddress),
4108    /// The address is a requirement. If the address overlaps an existing mapping (and cannot
4109    /// overwrite it), mapping fails.
4110    Fixed(UserAddress),
4111    /// The address is a requirement. If the address overlaps an existing mapping (and cannot
4112    /// overwrite it), they should be unmapped.
4113    FixedOverwrite(UserAddress),
4114}
4115
4116/// The user-space address at which a mapping should be placed. Used by [`map_in_vmar`].
4117#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4118enum SelectedAddress {
4119    /// See DesiredAddress::Fixed.
4120    Fixed(UserAddress),
4121    /// See DesiredAddress::FixedOverwrite.
4122    FixedOverwrite(UserAddress),
4123}
4124
4125impl SelectedAddress {
4126    fn addr(&self) -> UserAddress {
4127        match self {
4128            SelectedAddress::Fixed(addr) => *addr,
4129            SelectedAddress::FixedOverwrite(addr) => *addr,
4130        }
4131    }
4132}
4133
4134/// Write one line of the memory map intended for adding to `/proc/self/maps`.
4135fn write_map(
4136    task: &Task,
4137    sink: &mut DynamicFileBuf,
4138    state: &MemoryManagerState,
4139    range: &Range<UserAddress>,
4140    map: &Mapping,
4141) -> Result<(), Errno> {
4142    let line_length = write!(
4143        sink,
4144        "{:08x}-{:08x} {}{}{}{} {:08x} 00:00 {} ",
4145        range.start.ptr(),
4146        range.end.ptr(),
4147        if map.can_read() { 'r' } else { '-' },
4148        if map.can_write() { 'w' } else { '-' },
4149        if map.can_exec() { 'x' } else { '-' },
4150        if map.flags().contains(MappingFlags::SHARED) { 's' } else { 'p' },
4151        match state.get_mapping_backing(map) {
4152            MappingBacking::Memory(backing) => backing.address_to_offset(range.start),
4153            MappingBacking::PrivateAnonymous => 0,
4154        },
4155        if let MappingNameRef::File(file) = &map.name() { file.name.entry.node.ino } else { 0 }
4156    )?;
4157    let fill_to_name = |sink: &mut DynamicFileBuf| {
4158        // The filename goes at >= the 74th column (73rd when zero indexed)
4159        for _ in line_length..73 {
4160            sink.write(b" ");
4161        }
4162    };
4163    match &map.name() {
4164        MappingNameRef::None | MappingNameRef::AioContext(_) => {
4165            if map.flags().contains(MappingFlags::SHARED)
4166                && map.flags().contains(MappingFlags::ANONYMOUS)
4167            {
4168                // See proc(5), "/proc/[pid]/map_files/"
4169                fill_to_name(sink);
4170                sink.write(b"/dev/zero (deleted)");
4171            }
4172        }
4173        MappingNameRef::Stack => {
4174            fill_to_name(sink);
4175            sink.write(b"[stack]");
4176        }
4177        MappingNameRef::Heap => {
4178            fill_to_name(sink);
4179            sink.write(b"[heap]");
4180        }
4181        MappingNameRef::Vdso => {
4182            fill_to_name(sink);
4183            sink.write(b"[vdso]");
4184        }
4185        MappingNameRef::Vvar => {
4186            fill_to_name(sink);
4187            sink.write(b"[vvar]");
4188        }
4189        MappingNameRef::File(file) => {
4190            fill_to_name(sink);
4191            // File names can have newlines that need to be escaped before printing.
4192            // According to https://man7.org/linux/man-pages/man5/proc.5.html the only
4193            // escaping applied to paths is replacing newlines with an octal sequence.
4194            let path = file.name.path(&task.live()?.fs());
4195            sink.write_iter(
4196                path.iter()
4197                    .flat_map(|b| if *b == b'\n' { b"\\012" } else { std::slice::from_ref(b) })
4198                    .copied(),
4199            );
4200        }
4201        MappingNameRef::Vma(name) => {
4202            fill_to_name(sink);
4203            sink.write(b"[anon:");
4204            sink.write(name.as_bytes());
4205            sink.write(b"]");
4206        }
4207        MappingNameRef::Ashmem(name) => {
4208            fill_to_name(sink);
4209            sink.write(b"/dev/ashmem/");
4210            sink.write(name.as_bytes());
4211        }
4212    }
4213    sink.write(b"\n");
4214    Ok(())
4215}
4216
4217#[derive(Default)]
4218pub struct MemoryStats {
4219    pub vm_size: usize,
4220    pub vm_rss: usize,
4221    pub vm_rss_hwm: usize,
4222    pub rss_anonymous: usize,
4223    pub rss_file: usize,
4224    pub rss_shared: usize,
4225    pub vm_data: usize,
4226    pub vm_stack: usize,
4227    pub vm_exe: usize,
4228    pub vm_swap: usize,
4229    pub vm_lck: usize,
4230}
4231
4232/// Implements `/proc/self/maps`.
4233#[derive(Clone)]
4234pub struct ProcMapsFile {
4235    mm: Weak<MemoryManager>,
4236    task: WeakRef<Task>,
4237}
4238impl ProcMapsFile {
4239    pub fn new(task: TempRef<'_, Task>) -> DynamicFile<Self> {
4240        // "maps" is empty for kthreads, rather than inaccessible.
4241        let mm = task.mm().map_or_else(|_| Weak::default(), |mm| Arc::downgrade(&mm));
4242        let task = task.into();
4243        DynamicFile::new(Self { mm, task })
4244    }
4245}
4246
4247impl SequenceFileSource for ProcMapsFile {
4248    type Cursor = UserAddress;
4249
4250    fn next(
4251        &self,
4252        _current_task: &CurrentTask,
4253        cursor: UserAddress,
4254        sink: &mut DynamicFileBuf,
4255    ) -> Result<Option<UserAddress>, Errno> {
4256        let task = Task::from_weak(&self.task)?;
4257        // /proc/<pid>/maps is empty for kthreads and tasks whose memory manager has changed.
4258        let Some(mm) = self.mm.upgrade() else {
4259            return Ok(None);
4260        };
4261        let state = mm.state.read();
4262        if let Some((range, map)) = state.mappings.find_at_or_after(cursor) {
4263            write_map(&task, sink, &state, range, map)?;
4264            return Ok(Some(range.end));
4265        }
4266        Ok(None)
4267    }
4268}
4269
4270#[derive(Clone)]
4271pub struct ProcSmapsFile {
4272    mm: Weak<MemoryManager>,
4273    task: WeakRef<Task>,
4274}
4275impl ProcSmapsFile {
4276    pub fn new(task: TempRef<'_, Task>) -> DynamicFile<Self> {
4277        // "smaps" is empty for kthreads, rather than inaccessible.
4278        let mm = task.mm().map_or_else(|_| Weak::default(), |mm| Arc::downgrade(&mm));
4279        DynamicFile::new(Self { mm, task: task.into() })
4280    }
4281}
4282
4283impl DynamicFileSource for ProcSmapsFile {
4284    fn generate(&self, current_task: &CurrentTask, sink: &mut DynamicFileBuf) -> Result<(), Errno> {
4285        let page_size_kb = *PAGE_SIZE / 1024;
4286        let task = Task::from_weak(&self.task)?;
4287        // /proc/<pid>/smaps is empty for kthreads and tasks whose memory manager has changed.
4288        let Some(mm) = self.mm.upgrade() else {
4289            return Ok(());
4290        };
4291        let state = mm.state.read();
4292        let committed_bytes_vec = state.with_zx_mappings(current_task, |zx_mappings| {
4293            let mut zx_memory_info = RangeMap::<UserAddress, usize>::default();
4294            for idx in 0..zx_mappings.len() {
4295                let zx_mapping = zx_mappings[idx];
4296                // RangeMap uses #[must_use] for its default usecase but this drop is trivial.
4297                let _ = zx_memory_info.insert(
4298                    UserAddress::from_ptr(zx_mapping.base)
4299                        ..UserAddress::from_ptr(zx_mapping.base + zx_mapping.size),
4300                    idx,
4301                );
4302            }
4303
4304            let mut committed_bytes_vec = Vec::new();
4305            for (mm_range, mm_mapping) in state.mappings.iter() {
4306                let mut committed_bytes = 0;
4307
4308                for (zx_range, zx_mapping_idx) in zx_memory_info.range(mm_range.clone()) {
4309                    let intersect_range = zx_range.intersect(mm_range);
4310                    let zx_mapping = zx_mappings[*zx_mapping_idx];
4311                    let zx_details = zx_mapping.details();
4312                    let Some(zx_details) = zx_details.as_mapping() else { continue };
4313                    let zx_committed_bytes = zx_details.committed_bytes;
4314
4315                    // TODO(https://fxbug.dev/419882465): It can happen that the same Zircon mapping
4316                    // is covered by more than one Starnix mapping. In this case we don't have
4317                    // enough granularity to answer the question of how many committed bytes belong
4318                    // to one mapping or another. Make a best-effort approximation by dividing the
4319                    // committed bytes of a Zircon mapping proportionally.
4320                    committed_bytes += if intersect_range != *zx_range {
4321                        let intersection_size =
4322                            intersect_range.end.ptr() - intersect_range.start.ptr();
4323                        let part = intersection_size as f32 / zx_mapping.size as f32;
4324                        let prorated_committed_bytes: f32 = part * zx_committed_bytes as f32;
4325                        prorated_committed_bytes as u64
4326                    } else {
4327                        zx_committed_bytes as u64
4328                    };
4329                    assert_eq!(
4330                        match state.get_mapping_backing(mm_mapping) {
4331                            MappingBacking::Memory(m) => m.memory().get_koid(),
4332                            MappingBacking::PrivateAnonymous =>
4333                                state.private_anonymous.backing.get_koid(),
4334                        },
4335                        zx_details.vmo_koid,
4336                        "MemoryManager and Zircon must agree on which VMO is mapped in this range",
4337                    );
4338                }
4339                committed_bytes_vec.push(committed_bytes);
4340            }
4341            Ok(committed_bytes_vec)
4342        })?;
4343
4344        for ((mm_range, mm_mapping), committed_bytes) in
4345            state.mappings.iter().zip(committed_bytes_vec.into_iter())
4346        {
4347            write_map(&task, sink, &state, mm_range, mm_mapping)?;
4348
4349            let size_kb = (mm_range.end.ptr() - mm_range.start.ptr()) / 1024;
4350            writeln!(sink, "Size:           {size_kb:>8} kB",)?;
4351            let share_count = match state.get_mapping_backing(mm_mapping) {
4352                MappingBacking::Memory(backing) => {
4353                    let memory = backing.memory();
4354                    if memory.is_clock() {
4355                        // Clock memory mappings are not shared in a meaningful way.
4356                        1
4357                    } else {
4358                        let memory_info = backing.memory().info()?;
4359                        memory_info.share_count as u64
4360                    }
4361                }
4362                MappingBacking::PrivateAnonymous => {
4363                    1 // Private mapping
4364                }
4365            };
4366
4367            let rss_kb = committed_bytes / 1024;
4368            writeln!(sink, "Rss:            {rss_kb:>8} kB")?;
4369
4370            let pss_kb = if mm_mapping.flags().contains(MappingFlags::SHARED) {
4371                rss_kb / share_count
4372            } else {
4373                rss_kb
4374            };
4375            writeln!(sink, "Pss:            {pss_kb:>8} kB")?;
4376
4377            track_stub!(TODO("https://fxbug.dev/322874967"), "smaps dirty pages");
4378            let (shared_dirty_kb, private_dirty_kb) = (0, 0);
4379
4380            let is_shared = share_count > 1;
4381            let shared_clean_kb = if is_shared { rss_kb } else { 0 };
4382            writeln!(sink, "Shared_Clean:   {shared_clean_kb:>8} kB")?;
4383            writeln!(sink, "Shared_Dirty:   {shared_dirty_kb:>8} kB")?;
4384
4385            let private_clean_kb = if is_shared { 0 } else { rss_kb };
4386            writeln!(sink, "Private_Clean:  {private_clean_kb:>8} kB")?;
4387            writeln!(sink, "Private_Dirty:  {private_dirty_kb:>8} kB")?;
4388
4389            let anonymous_kb = if mm_mapping.private_anonymous() { rss_kb } else { 0 };
4390            writeln!(sink, "Anonymous:      {anonymous_kb:>8} kB")?;
4391            writeln!(sink, "KernelPageSize: {page_size_kb:>8} kB")?;
4392            writeln!(sink, "MMUPageSize:    {page_size_kb:>8} kB")?;
4393
4394            let locked_kb =
4395                if mm_mapping.flags().contains(MappingFlags::LOCKED) { rss_kb } else { 0 };
4396            writeln!(sink, "Locked:         {locked_kb:>8} kB")?;
4397            writeln!(sink, "VmFlags: {}", mm_mapping.vm_flags())?;
4398
4399            track_stub!(TODO("https://fxbug.dev/297444691"), "optional smaps fields");
4400        }
4401
4402        Ok(())
4403    }
4404}
4405
4406/// Creates a memory object that can be used in an anonymous mapping for the `mmap` syscall.
4407pub fn create_anonymous_mapping_memory(size: u64) -> Result<Arc<MemoryObject>, Errno> {
4408    // mremap can grow memory regions, so make sure the memory object is resizable.
4409    let mut memory = MemoryObject::from(
4410        zx::Vmo::create_with_opts(zx::VmoOptions::RESIZABLE, size).map_err(|s| match s {
4411            zx::Status::NO_MEMORY => errno!(ENOMEM),
4412            zx::Status::OUT_OF_RANGE => errno!(ENOMEM),
4413            _ => impossible_error(s),
4414        })?,
4415    )
4416    .with_zx_name(b"starnix:memory_manager");
4417
4418    memory.set_zx_name(b"starnix-anon");
4419
4420    // TODO(https://fxbug.dev/42056890): Audit replace_as_executable usage
4421    memory = memory.replace_as_executable(&VMEX_RESOURCE).map_err(impossible_error)?;
4422    Ok(Arc::new(memory))
4423}
4424
4425fn generate_random_offset_for_aslr(arch_width: ArchWidth) -> usize {
4426    // Generate a number with ASLR_RANDOM_BITS.
4427    let randomness = {
4428        let random_bits =
4429            if arch_width.is_arch32() { ASLR_32_RANDOM_BITS } else { ASLR_RANDOM_BITS };
4430        let mask = (1 << random_bits) - 1;
4431        let mut bytes = [0; std::mem::size_of::<usize>()];
4432        starnix_crypto::cprng_draw(&mut bytes);
4433        usize::from_le_bytes(bytes) & mask
4434    };
4435
4436    // Transform it into a page-aligned offset.
4437    randomness * (*PAGE_SIZE as usize)
4438}
4439
4440#[cfg(test)]
4441mod tests {
4442    use super::*;
4443    use crate::mm::memory_accessor::MemoryAccessorExt;
4444    use crate::mm::syscalls::do_mmap;
4445    use crate::task::syscalls::sys_prctl;
4446    use crate::testing::*;
4447    use crate::vfs::FdNumber;
4448    use assert_matches::assert_matches;
4449    use itertools::assert_equal;
4450    use starnix_sync::{FileOpsCore, LockEqualOrBefore};
4451    use starnix_uapi::user_address::{UserCString, UserRef};
4452    use starnix_uapi::{
4453        MAP_ANONYMOUS, MAP_FIXED, MAP_GROWSDOWN, MAP_PRIVATE, MAP_SHARED, PR_SET_VMA,
4454        PR_SET_VMA_ANON_NAME, PROT_NONE, PROT_READ,
4455    };
4456    use std::ffi::CString;
4457    use zerocopy::{FromBytes, Immutable, KnownLayout};
4458
4459    #[::fuchsia::test]
4460    fn test_mapping_flags() {
4461        let options = MappingOptions::ANONYMOUS;
4462        let access_flags = ProtectionFlags::READ | ProtectionFlags::WRITE;
4463        let mapping_flags = MappingFlags::from_access_flags_and_options(access_flags, options);
4464        assert_eq!(mapping_flags.access_flags(), access_flags);
4465        assert_eq!(mapping_flags.options(), options);
4466
4467        let new_access_flags = ProtectionFlags::READ | ProtectionFlags::EXEC;
4468        let adusted_mapping_flags = mapping_flags.with_access_flags(new_access_flags);
4469        assert_eq!(adusted_mapping_flags.access_flags(), new_access_flags);
4470        assert_eq!(adusted_mapping_flags.options(), options);
4471    }
4472
4473    #[::fuchsia::test]
4474    async fn test_brk() {
4475        spawn_kernel_and_run(async |locked, current_task| {
4476            let mm = current_task.mm().unwrap();
4477
4478            // Look up the given addr in the mappings table.
4479            let get_range = |addr: UserAddress| {
4480                let state = mm.state.read();
4481                state.mappings.get(addr).map(|(range, mapping)| (range.clone(), mapping.clone()))
4482            };
4483
4484            // Initialize the program break.
4485            let base_addr = mm
4486                .set_brk(locked, &current_task, UserAddress::default())
4487                .expect("failed to set initial program break");
4488            assert!(base_addr > UserAddress::default());
4489
4490            // Page containing the program break address should not be mapped.
4491            assert_eq!(get_range(base_addr), None);
4492
4493            // Growing it by a single byte results in that page becoming mapped.
4494            let addr0 = mm
4495                .set_brk(locked, &current_task, (base_addr + 1u64).unwrap())
4496                .expect("failed to grow brk");
4497            assert!(addr0 > base_addr);
4498            let (range0, _) = get_range(base_addr).expect("base_addr should be mapped");
4499            assert_eq!(range0.start, base_addr);
4500            assert_eq!(range0.end, (base_addr + *PAGE_SIZE).unwrap());
4501
4502            // Grow the program break by another byte, which won't be enough to cause additional pages to be mapped.
4503            let addr1 = mm
4504                .set_brk(locked, &current_task, (base_addr + 2u64).unwrap())
4505                .expect("failed to grow brk");
4506            assert_eq!(addr1, (base_addr + 2u64).unwrap());
4507            let (range1, _) = get_range(base_addr).expect("base_addr should be mapped");
4508            assert_eq!(range1.start, range0.start);
4509            assert_eq!(range1.end, range0.end);
4510
4511            // Grow the program break by a non-trival amount and observe the larger mapping.
4512            let addr2 = mm
4513                .set_brk(locked, &current_task, (base_addr + 24893u64).unwrap())
4514                .expect("failed to grow brk");
4515            assert_eq!(addr2, (base_addr + 24893u64).unwrap());
4516            let (range2, _) = get_range(base_addr).expect("base_addr should be mapped");
4517            assert_eq!(range2.start, base_addr);
4518            assert_eq!(range2.end, addr2.round_up(*PAGE_SIZE).unwrap());
4519
4520            // Shrink the program break and observe the smaller mapping.
4521            let addr3 = mm
4522                .set_brk(locked, &current_task, (base_addr + 14832u64).unwrap())
4523                .expect("failed to shrink brk");
4524            assert_eq!(addr3, (base_addr + 14832u64).unwrap());
4525            let (range3, _) = get_range(base_addr).expect("base_addr should be mapped");
4526            assert_eq!(range3.start, base_addr);
4527            assert_eq!(range3.end, addr3.round_up(*PAGE_SIZE).unwrap());
4528
4529            // Shrink the program break close to zero and observe the smaller mapping.
4530            let addr4 = mm
4531                .set_brk(locked, &current_task, (base_addr + 3u64).unwrap())
4532                .expect("failed to drastically shrink brk");
4533            assert_eq!(addr4, (base_addr + 3u64).unwrap());
4534            let (range4, _) = get_range(base_addr).expect("base_addr should be mapped");
4535            assert_eq!(range4.start, base_addr);
4536            assert_eq!(range4.end, addr4.round_up(*PAGE_SIZE).unwrap());
4537
4538            // Shrink the program break to zero and observe that the mapping is entirely gone.
4539            let addr5 = mm
4540                .set_brk(locked, &current_task, base_addr)
4541                .expect("failed to drastically shrink brk to zero");
4542            assert_eq!(addr5, base_addr);
4543            assert_eq!(get_range(base_addr), None);
4544        })
4545        .await;
4546    }
4547
4548    #[::fuchsia::test]
4549    async fn test_mm_exec() {
4550        spawn_kernel_and_run(async |locked, current_task| {
4551            let mm = current_task.mm().unwrap();
4552
4553            let has = |addr: UserAddress| -> bool {
4554                let state = mm.state.read();
4555                state.mappings.get(addr).is_some()
4556            };
4557
4558            let brk_addr = mm
4559                .set_brk(locked, &current_task, UserAddress::default())
4560                .expect("failed to set initial program break");
4561            assert!(brk_addr > UserAddress::default());
4562
4563            // Allocate a single page of BRK space, so that the break base address is mapped.
4564            let _ = mm
4565                .set_brk(locked, &current_task, (brk_addr + 1u64).unwrap())
4566                .expect("failed to grow program break");
4567            assert!(has(brk_addr));
4568
4569            let mapped_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
4570            assert!(mapped_addr > UserAddress::default());
4571            assert!(has(mapped_addr));
4572
4573            let node = current_task.lookup_path_from_root(locked, "/".into()).unwrap();
4574            let new_mm = MemoryManager::exec(
4575                current_task.thread_group().root_vmar.unowned(),
4576                current_task.live().mm.to_option_arc(),
4577                node,
4578                ArchWidth::Arch64,
4579            )
4580            .expect("failed to exec memory manager");
4581            current_task.live().mm.update(Some(new_mm));
4582
4583            assert!(!has(brk_addr));
4584            assert!(!has(mapped_addr));
4585
4586            // Check that the old addresses are actually available for mapping.
4587            let brk_addr2 = map_memory(locked, &current_task, brk_addr, *PAGE_SIZE);
4588            assert_eq!(brk_addr, brk_addr2);
4589            let mapped_addr2 = map_memory(locked, &current_task, mapped_addr, *PAGE_SIZE);
4590            assert_eq!(mapped_addr, mapped_addr2);
4591        })
4592        .await;
4593    }
4594
4595    #[::fuchsia::test]
4596    async fn test_get_contiguous_mappings_at() {
4597        spawn_kernel_and_run(async |locked, current_task| {
4598            let mm = current_task.mm().unwrap();
4599
4600            // Create four one-page mappings with a hole between the third one and the fourth one.
4601            let page_size = *PAGE_SIZE as usize;
4602            let addr_a = (mm.base_addr + 10 * page_size).unwrap();
4603            let addr_b = (mm.base_addr + 11 * page_size).unwrap();
4604            let addr_c = (mm.base_addr + 12 * page_size).unwrap();
4605            let addr_d = (mm.base_addr + 14 * page_size).unwrap();
4606            assert_eq!(map_memory(locked, &current_task, addr_a, *PAGE_SIZE), addr_a);
4607            assert_eq!(map_memory(locked, &current_task, addr_b, *PAGE_SIZE), addr_b);
4608            assert_eq!(map_memory(locked, &current_task, addr_c, *PAGE_SIZE), addr_c);
4609            assert_eq!(map_memory(locked, &current_task, addr_d, *PAGE_SIZE), addr_d);
4610
4611            {
4612                let mm_state = mm.state.read();
4613                // Verify that requesting an unmapped address returns an empty iterator.
4614                assert_equal(
4615                    mm_state.get_contiguous_mappings_at((addr_a - 100u64).unwrap(), 50).unwrap(),
4616                    vec![],
4617                );
4618                assert_equal(
4619                    mm_state.get_contiguous_mappings_at((addr_a - 100u64).unwrap(), 200).unwrap(),
4620                    vec![],
4621                );
4622
4623                // Verify that requesting zero bytes returns an empty iterator.
4624                assert_equal(mm_state.get_contiguous_mappings_at(addr_a, 0).unwrap(), vec![]);
4625
4626                // Verify errors.
4627                assert_eq!(
4628                    mm_state
4629                        .get_contiguous_mappings_at(UserAddress::from(100), usize::MAX)
4630                        .err()
4631                        .unwrap(),
4632                    errno!(EFAULT)
4633                );
4634                assert_eq!(
4635                    mm_state
4636                        .get_contiguous_mappings_at((mm_state.max_address() + 1u64).unwrap(), 0)
4637                        .err()
4638                        .unwrap(),
4639                    errno!(EFAULT)
4640                );
4641            }
4642
4643            assert_eq!(mm.get_mapping_count(), 2);
4644            let mm_state = mm.state.read();
4645            let (map_a, map_b) = {
4646                let mut it = mm_state.mappings.iter();
4647                (it.next().unwrap().1, it.next().unwrap().1)
4648            };
4649
4650            assert_equal(
4651                mm_state.get_contiguous_mappings_at(addr_a, page_size).unwrap(),
4652                vec![(map_a, page_size)],
4653            );
4654
4655            assert_equal(
4656                mm_state.get_contiguous_mappings_at(addr_a, page_size / 2).unwrap(),
4657                vec![(map_a, page_size / 2)],
4658            );
4659
4660            assert_equal(
4661                mm_state.get_contiguous_mappings_at(addr_a, page_size * 3).unwrap(),
4662                vec![(map_a, page_size * 3)],
4663            );
4664
4665            assert_equal(
4666                mm_state.get_contiguous_mappings_at(addr_b, page_size).unwrap(),
4667                vec![(map_a, page_size)],
4668            );
4669
4670            assert_equal(
4671                mm_state.get_contiguous_mappings_at(addr_d, page_size).unwrap(),
4672                vec![(map_b, page_size)],
4673            );
4674
4675            // Verify that results stop if there is a hole.
4676            assert_equal(
4677                mm_state
4678                    .get_contiguous_mappings_at((addr_a + page_size / 2).unwrap(), page_size * 10)
4679                    .unwrap(),
4680                vec![(map_a, page_size * 2 + page_size / 2)],
4681            );
4682
4683            // Verify that results stop at the last mapped page.
4684            assert_equal(
4685                mm_state.get_contiguous_mappings_at(addr_d, page_size * 10).unwrap(),
4686                vec![(map_b, page_size)],
4687            );
4688        })
4689        .await;
4690    }
4691
4692    #[::fuchsia::test]
4693    async fn test_read_write_crossing_mappings() {
4694        spawn_kernel_and_run(async |locked, current_task| {
4695            let mm = current_task.mm().unwrap();
4696            let ma = current_task.deref();
4697
4698            // Map two contiguous pages at fixed addresses, but backed by distinct mappings.
4699            let page_size = *PAGE_SIZE;
4700            let addr = (mm.base_addr + 10 * page_size).unwrap();
4701            assert_eq!(map_memory(locked, &current_task, addr, page_size), addr);
4702            assert_eq!(
4703                map_memory(locked, &current_task, (addr + page_size).unwrap(), page_size),
4704                (addr + page_size).unwrap()
4705            );
4706            // Mappings get merged since they are baked by the same memory object
4707            assert_eq!(mm.get_mapping_count(), 1);
4708
4709            // Write a pattern crossing our two mappings.
4710            let test_addr = (addr + page_size / 2).unwrap();
4711            let data: Vec<u8> = (0..page_size).map(|i| (i % 256) as u8).collect();
4712            ma.write_memory(test_addr, &data).expect("failed to write test data");
4713
4714            // Read it back.
4715            let data_readback =
4716                ma.read_memory_to_vec(test_addr, data.len()).expect("failed to read test data");
4717            assert_eq!(&data, &data_readback);
4718        })
4719        .await;
4720    }
4721
4722    #[::fuchsia::test]
4723    async fn test_read_write_errors() {
4724        spawn_kernel_and_run(async |locked, current_task| {
4725            let ma = current_task.deref();
4726
4727            let page_size = *PAGE_SIZE;
4728            let addr = map_memory(locked, &current_task, UserAddress::default(), page_size);
4729            let buf = vec![0u8; page_size as usize];
4730
4731            // Verify that accessing data that is only partially mapped is an error.
4732            let partial_addr_before = (addr - page_size / 2).unwrap();
4733            assert_eq!(ma.write_memory(partial_addr_before, &buf), error!(EFAULT));
4734            assert_eq!(ma.read_memory_to_vec(partial_addr_before, buf.len()), error!(EFAULT));
4735            let partial_addr_after = (addr + page_size / 2).unwrap();
4736            assert_eq!(ma.write_memory(partial_addr_after, &buf), error!(EFAULT));
4737            assert_eq!(ma.read_memory_to_vec(partial_addr_after, buf.len()), error!(EFAULT));
4738
4739            // Verify that accessing unmapped memory is an error.
4740            let unmapped_addr = (addr - 10 * page_size).unwrap();
4741            assert_eq!(ma.write_memory(unmapped_addr, &buf), error!(EFAULT));
4742            assert_eq!(ma.read_memory_to_vec(unmapped_addr, buf.len()), error!(EFAULT));
4743
4744            // However, accessing zero bytes in unmapped memory is not an error.
4745            ma.write_memory(unmapped_addr, &[]).expect("failed to write no data");
4746            ma.read_memory_to_vec(unmapped_addr, 0).expect("failed to read no data");
4747        })
4748        .await;
4749    }
4750
4751    #[::fuchsia::test]
4752    async fn test_read_c_string_to_vec_large() {
4753        spawn_kernel_and_run(async |locked, current_task| {
4754            let mm = current_task.mm().unwrap();
4755            let ma = current_task.deref();
4756
4757            let page_size = *PAGE_SIZE;
4758            let max_size = 4 * page_size as usize;
4759            let addr = (mm.base_addr + 10 * page_size).unwrap();
4760
4761            assert_eq!(map_memory(locked, &current_task, addr, max_size as u64), addr);
4762
4763            let mut random_data = vec![0; max_size];
4764            starnix_crypto::cprng_draw(&mut random_data);
4765            // Remove all NUL bytes.
4766            for i in 0..random_data.len() {
4767                if random_data[i] == 0 {
4768                    random_data[i] = 1;
4769                }
4770            }
4771            random_data[max_size - 1] = 0;
4772
4773            ma.write_memory(addr, &random_data).expect("failed to write test string");
4774            // We should read the same value minus the last byte (NUL char).
4775            assert_eq!(
4776                ma.read_c_string_to_vec(UserCString::new(current_task, addr), max_size).unwrap(),
4777                random_data[..max_size - 1]
4778            );
4779        })
4780        .await;
4781    }
4782
4783    #[::fuchsia::test]
4784    async fn test_read_c_string_to_vec() {
4785        spawn_kernel_and_run(async |locked, current_task| {
4786            let mm = current_task.mm().unwrap();
4787            let ma = current_task.deref();
4788
4789            let page_size = *PAGE_SIZE;
4790            let max_size = 2 * page_size as usize;
4791            let addr = (mm.base_addr + 10 * page_size).unwrap();
4792
4793            // Map a page at a fixed address and write an unterminated string at the end of it.
4794            assert_eq!(map_memory(locked, &current_task, addr, page_size), addr);
4795            let test_str = b"foo!";
4796            let test_addr =
4797                addr.checked_add(page_size as usize).unwrap().checked_sub(test_str.len()).unwrap();
4798            ma.write_memory(test_addr, test_str).expect("failed to write test string");
4799
4800            // Expect error if the string is not terminated.
4801            assert_eq!(
4802                ma.read_c_string_to_vec(UserCString::new(current_task, test_addr), max_size),
4803                error!(ENAMETOOLONG)
4804            );
4805
4806            // Expect success if the string is terminated.
4807            ma.write_memory((addr + (page_size - 1)).unwrap(), b"\0").expect("failed to write nul");
4808            assert_eq!(
4809                ma.read_c_string_to_vec(UserCString::new(current_task, test_addr), max_size)
4810                    .unwrap(),
4811                "foo"
4812            );
4813
4814            // Expect success if the string spans over two mappings.
4815            assert_eq!(
4816                map_memory(locked, &current_task, (addr + page_size).unwrap(), page_size),
4817                (addr + page_size).unwrap()
4818            );
4819            // TODO: Adjacent private anonymous mappings are collapsed. To test this case this test needs to
4820            // provide a backing for the second mapping.
4821            // assert_eq!(mm.get_mapping_count(), 2);
4822            ma.write_memory((addr + (page_size - 1)).unwrap(), b"bar\0")
4823                .expect("failed to write extra chars");
4824            assert_eq!(
4825                ma.read_c_string_to_vec(UserCString::new(current_task, test_addr), max_size)
4826                    .unwrap(),
4827                "foobar",
4828            );
4829
4830            // Expect error if the string exceeds max limit
4831            assert_eq!(
4832                ma.read_c_string_to_vec(UserCString::new(current_task, test_addr), 2),
4833                error!(ENAMETOOLONG)
4834            );
4835
4836            // Expect error if the address is invalid.
4837            assert_eq!(
4838                ma.read_c_string_to_vec(UserCString::null(current_task), max_size),
4839                error!(EFAULT)
4840            );
4841        })
4842        .await;
4843    }
4844
4845    #[::fuchsia::test]
4846    async fn can_read_argv_like_regions() {
4847        spawn_kernel_and_run(async |locked, current_task| {
4848            let ma = current_task.deref();
4849
4850            // Map a page.
4851            let page_size = *PAGE_SIZE;
4852            let addr = map_memory_anywhere(locked, &current_task, page_size);
4853            assert!(!addr.is_null());
4854
4855            // Write an unterminated string.
4856            let mut payload = "first".as_bytes().to_vec();
4857            let mut expected_parses = vec![];
4858            ma.write_memory(addr, &payload).unwrap();
4859
4860            // Expect success if the string is terminated.
4861            expected_parses.push(payload.clone());
4862            payload.push(0);
4863            ma.write_memory(addr, &payload).unwrap();
4864            assert_eq!(
4865                ma.read_nul_delimited_c_string_list(addr, payload.len()).unwrap(),
4866                expected_parses,
4867            );
4868
4869            // Make sure we can parse multiple strings from the same region.
4870            let second = b"second";
4871            payload.extend(second);
4872            payload.push(0);
4873            expected_parses.push(second.to_vec());
4874
4875            let third = b"third";
4876            payload.extend(third);
4877            payload.push(0);
4878            expected_parses.push(third.to_vec());
4879
4880            ma.write_memory(addr, &payload).unwrap();
4881            assert_eq!(
4882                ma.read_nul_delimited_c_string_list(addr, payload.len()).unwrap(),
4883                expected_parses,
4884            );
4885        })
4886        .await;
4887    }
4888
4889    #[::fuchsia::test]
4890    async fn truncate_argv_like_regions() {
4891        spawn_kernel_and_run(async |locked, current_task| {
4892            let ma = current_task.deref();
4893
4894            // Map a page.
4895            let page_size = *PAGE_SIZE;
4896            let addr = map_memory_anywhere(locked, &current_task, page_size);
4897            assert!(!addr.is_null());
4898
4899            let payload = b"first\0second\0third\0";
4900            ma.write_memory(addr, payload).unwrap();
4901            assert_eq!(
4902                ma.read_nul_delimited_c_string_list(addr, payload.len() - 3).unwrap(),
4903                vec![b"first".to_vec(), b"second".to_vec(), b"thi".to_vec()],
4904                "Skipping last three bytes of payload should skip last two bytes of 3rd string"
4905            );
4906        })
4907        .await;
4908    }
4909
4910    #[::fuchsia::test]
4911    async fn test_read_c_string() {
4912        spawn_kernel_and_run(async |locked, current_task| {
4913            let mm = current_task.mm().unwrap();
4914            let ma = current_task.deref();
4915
4916            let page_size = *PAGE_SIZE;
4917            let buf_cap = 2 * page_size as usize;
4918            let mut buf = Vec::with_capacity(buf_cap);
4919            // We can't just use `spare_capacity_mut` because `Vec::with_capacity`
4920            // returns a `Vec` with _at least_ the requested capacity.
4921            let buf = &mut buf.spare_capacity_mut()[..buf_cap];
4922            let addr = (mm.base_addr + 10 * page_size).unwrap();
4923
4924            // Map a page at a fixed address and write an unterminated string at the end of it..
4925            assert_eq!(map_memory(locked, &current_task, addr, page_size), addr);
4926            let test_str = b"foo!";
4927            let test_addr = (addr + (page_size - test_str.len() as u64)).unwrap();
4928            ma.write_memory(test_addr, test_str).expect("failed to write test string");
4929
4930            // Expect error if the string is not terminated.
4931            assert_eq!(
4932                ma.read_c_string(UserCString::new(current_task, test_addr), buf),
4933                error!(ENAMETOOLONG)
4934            );
4935
4936            // Expect success if the string is terminated.
4937            ma.write_memory((addr + (page_size - 1)).unwrap(), b"\0").expect("failed to write nul");
4938            assert_eq!(
4939                ma.read_c_string(UserCString::new(current_task, test_addr), buf).unwrap(),
4940                "foo"
4941            );
4942
4943            // Expect success if the string spans over two mappings.
4944            assert_eq!(
4945                map_memory(locked, &current_task, (addr + page_size).unwrap(), page_size),
4946                (addr + page_size).unwrap()
4947            );
4948            // TODO: To be multiple mappings we need to provide a file backing for the next page or the
4949            // mappings will be collapsed.
4950            //assert_eq!(mm.get_mapping_count(), 2);
4951            ma.write_memory((addr + (page_size - 1)).unwrap(), b"bar\0")
4952                .expect("failed to write extra chars");
4953            assert_eq!(
4954                ma.read_c_string(UserCString::new(current_task, test_addr), buf).unwrap(),
4955                "foobar"
4956            );
4957
4958            // Expect error if the string does not fit in the provided buffer.
4959            assert_eq!(
4960                ma.read_c_string(
4961                    UserCString::new(current_task, test_addr),
4962                    &mut [MaybeUninit::uninit(); 2]
4963                ),
4964                error!(ENAMETOOLONG)
4965            );
4966
4967            // Expect error if the address is invalid.
4968            assert_eq!(ma.read_c_string(UserCString::null(current_task), buf), error!(EFAULT));
4969        })
4970        .await;
4971    }
4972
4973    #[::fuchsia::test]
4974    async fn test_find_next_unused_range() {
4975        spawn_kernel_and_run(async |locked, current_task| {
4976            let mm = current_task.mm().unwrap();
4977
4978            let mmap_top = mm.state.read().find_next_unused_range(0).unwrap().ptr();
4979            let page_size = *PAGE_SIZE as usize;
4980            assert!(mmap_top <= RESTRICTED_ASPACE_HIGHEST_ADDRESS);
4981
4982            // No mappings - top address minus requested size is available
4983            assert_eq!(
4984                mm.state.read().find_next_unused_range(page_size).unwrap(),
4985                UserAddress::from_ptr(mmap_top - page_size)
4986            );
4987
4988            // Fill it.
4989            let addr = UserAddress::from_ptr(mmap_top - page_size);
4990            assert_eq!(map_memory(locked, &current_task, addr, *PAGE_SIZE), addr);
4991
4992            // The next available range is right before the new mapping.
4993            assert_eq!(
4994                mm.state.read().find_next_unused_range(page_size).unwrap(),
4995                UserAddress::from_ptr(addr.ptr() - page_size)
4996            );
4997
4998            // Allocate an extra page before a one-page gap.
4999            let addr2 = UserAddress::from_ptr(addr.ptr() - 2 * page_size);
5000            assert_eq!(map_memory(locked, &current_task, addr2, *PAGE_SIZE), addr2);
5001
5002            // Searching for one-page range still gives the same result
5003            assert_eq!(
5004                mm.state.read().find_next_unused_range(page_size).unwrap(),
5005                UserAddress::from_ptr(addr.ptr() - page_size)
5006            );
5007
5008            // Searching for a bigger range results in the area before the second mapping
5009            assert_eq!(
5010                mm.state.read().find_next_unused_range(2 * page_size).unwrap(),
5011                UserAddress::from_ptr(addr2.ptr() - 2 * page_size)
5012            );
5013
5014            // Searching for more memory than available should fail.
5015            assert_eq!(mm.state.read().find_next_unused_range(mmap_top), None);
5016        })
5017        .await;
5018    }
5019
5020    #[::fuchsia::test]
5021    async fn test_count_placements() {
5022        spawn_kernel_and_run(async |locked, current_task| {
5023            let mm = current_task.mm().unwrap();
5024
5025            // ten-page range
5026            let page_size = *PAGE_SIZE as usize;
5027            let subrange_ten = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
5028                ..UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 10 * page_size);
5029
5030            assert_eq!(
5031                mm.state.read().count_possible_placements(11 * page_size, &subrange_ten),
5032                Some(0)
5033            );
5034            assert_eq!(
5035                mm.state.read().count_possible_placements(10 * page_size, &subrange_ten),
5036                Some(1)
5037            );
5038            assert_eq!(
5039                mm.state.read().count_possible_placements(9 * page_size, &subrange_ten),
5040                Some(2)
5041            );
5042            assert_eq!(
5043                mm.state.read().count_possible_placements(page_size, &subrange_ten),
5044                Some(10)
5045            );
5046
5047            // map 6th page
5048            let addr = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 5 * page_size);
5049            assert_eq!(map_memory(locked, &current_task, addr, *PAGE_SIZE), addr);
5050
5051            assert_eq!(
5052                mm.state.read().count_possible_placements(10 * page_size, &subrange_ten),
5053                Some(0)
5054            );
5055            assert_eq!(
5056                mm.state.read().count_possible_placements(5 * page_size, &subrange_ten),
5057                Some(1)
5058            );
5059            assert_eq!(
5060                mm.state.read().count_possible_placements(4 * page_size, &subrange_ten),
5061                Some(3)
5062            );
5063            assert_eq!(
5064                mm.state.read().count_possible_placements(page_size, &subrange_ten),
5065                Some(9)
5066            );
5067        })
5068        .await;
5069    }
5070
5071    #[::fuchsia::test]
5072    async fn test_pick_placement() {
5073        spawn_kernel_and_run(async |locked, current_task| {
5074            let mm = current_task.mm().unwrap();
5075
5076            let page_size = *PAGE_SIZE as usize;
5077            let subrange_ten = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
5078                ..UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 10 * page_size);
5079
5080            let addr = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 5 * page_size);
5081            assert_eq!(map_memory(locked, &current_task, addr, *PAGE_SIZE), addr);
5082            assert_eq!(
5083                mm.state.read().count_possible_placements(4 * page_size, &subrange_ten),
5084                Some(3)
5085            );
5086
5087            assert_eq!(
5088                mm.state.read().pick_placement(4 * page_size, 0, &subrange_ten),
5089                Some(UserAddress::from_ptr(RESTRICTED_ASPACE_BASE))
5090            );
5091            assert_eq!(
5092                mm.state.read().pick_placement(4 * page_size, 1, &subrange_ten),
5093                Some(UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + page_size))
5094            );
5095            assert_eq!(
5096                mm.state.read().pick_placement(4 * page_size, 2, &subrange_ten),
5097                Some(UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 6 * page_size))
5098            );
5099        })
5100        .await;
5101    }
5102
5103    #[::fuchsia::test]
5104    async fn test_find_random_unused_range() {
5105        spawn_kernel_and_run(async |locked, current_task| {
5106            let mm = current_task.mm().unwrap();
5107
5108            // ten-page range
5109            let page_size = *PAGE_SIZE as usize;
5110            let subrange_ten = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)
5111                ..UserAddress::from_ptr(RESTRICTED_ASPACE_BASE + 10 * page_size);
5112
5113            for _ in 0..10 {
5114                let addr = mm.state.read().find_random_unused_range(page_size, &subrange_ten);
5115                assert!(addr.is_some());
5116                assert_eq!(
5117                    map_memory(locked, &current_task, addr.unwrap(), *PAGE_SIZE),
5118                    addr.unwrap()
5119                );
5120            }
5121            assert_eq!(mm.state.read().find_random_unused_range(page_size, &subrange_ten), None);
5122        })
5123        .await;
5124    }
5125
5126    #[::fuchsia::test]
5127    async fn test_grows_down_near_aspace_base() {
5128        spawn_kernel_and_run(async |locked, current_task| {
5129            let mm = current_task.mm().unwrap();
5130
5131            let page_count = 10;
5132
5133            let page_size = *PAGE_SIZE as usize;
5134            let addr =
5135                (UserAddress::from_ptr(RESTRICTED_ASPACE_BASE) + page_count * page_size).unwrap();
5136            assert_eq!(
5137                map_memory_with_flags(
5138                    locked,
5139                    &current_task,
5140                    addr,
5141                    page_size as u64,
5142                    MAP_ANONYMOUS | MAP_PRIVATE | MAP_GROWSDOWN
5143                ),
5144                addr
5145            );
5146
5147            let subrange_ten = UserAddress::from_ptr(RESTRICTED_ASPACE_BASE)..addr;
5148            assert_eq!(mm.state.read().find_random_unused_range(page_size, &subrange_ten), None);
5149        })
5150        .await;
5151    }
5152
5153    #[::fuchsia::test]
5154    async fn test_unmap_returned_mappings() {
5155        spawn_kernel_and_run(async |locked, current_task| {
5156            let mm = current_task.mm().unwrap();
5157
5158            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
5159
5160            let mut released_mappings = ReleasedMappings::default();
5161            let mut mm_state = mm.state.write();
5162            let unmap_result =
5163                mm_state.unmap(&mm, addr, *PAGE_SIZE as usize, &mut released_mappings);
5164            assert!(unmap_result.is_ok());
5165            assert_eq!(released_mappings.len(), 1);
5166            released_mappings.finalize(mm_state);
5167        })
5168        .await;
5169    }
5170
5171    #[::fuchsia::test]
5172    async fn test_unmap_returns_multiple_mappings() {
5173        spawn_kernel_and_run(async |locked, current_task| {
5174            let mm = current_task.mm().unwrap();
5175
5176            let addr = mm.state.read().find_next_unused_range(3 * *PAGE_SIZE as usize).unwrap();
5177            let addr = map_memory(locked, &current_task, addr, *PAGE_SIZE);
5178            let _ = map_memory(locked, &current_task, (addr + 2 * *PAGE_SIZE).unwrap(), *PAGE_SIZE);
5179
5180            let mut released_mappings = ReleasedMappings::default();
5181            let mut mm_state = mm.state.write();
5182            let unmap_result =
5183                mm_state.unmap(&mm, addr, (*PAGE_SIZE * 3) as usize, &mut released_mappings);
5184            assert!(unmap_result.is_ok());
5185            assert_eq!(released_mappings.len(), 2);
5186            released_mappings.finalize(mm_state);
5187        })
5188        .await;
5189    }
5190
5191    /// Maps two pages in separate mappings next to each other, then unmaps the first page.
5192    /// The second page should not be modified.
5193    #[::fuchsia::test]
5194    async fn test_map_two_unmap_one() {
5195        spawn_kernel_and_run(async |locked, current_task| {
5196            let mm = current_task.mm().unwrap();
5197
5198            // reserve memory for both pages
5199            let addr_reserve =
5200                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE * 2);
5201            let addr1 = do_mmap(
5202                locked,
5203                &current_task,
5204                addr_reserve,
5205                *PAGE_SIZE as usize,
5206                PROT_READ, // Map read-only to avoid merging of the two mappings
5207                MAP_ANONYMOUS | MAP_SHARED | MAP_FIXED,
5208                FdNumber::from_raw(-1),
5209                0,
5210            )
5211            .expect("failed to mmap");
5212            let addr2 = map_memory_with_flags(
5213                locked,
5214                &current_task,
5215                (addr_reserve + *PAGE_SIZE).unwrap(),
5216                *PAGE_SIZE,
5217                MAP_ANONYMOUS | MAP_SHARED | MAP_FIXED,
5218            );
5219            let state = mm.state.read();
5220            let (range1, _) = state.mappings.get(addr1).expect("mapping");
5221            assert_eq!(range1.start, addr1);
5222            assert_eq!(range1.end, (addr1 + *PAGE_SIZE).unwrap());
5223            let (range2, mapping2) = state.mappings.get(addr2).expect("mapping");
5224            assert_eq!(range2.start, addr2);
5225            assert_eq!(range2.end, (addr2 + *PAGE_SIZE).unwrap());
5226            let original_memory2 = {
5227                match state.get_mapping_backing(mapping2) {
5228                    MappingBacking::Memory(backing) => {
5229                        assert_eq!(backing.memory().get_size(), *PAGE_SIZE);
5230                        backing.memory().clone()
5231                    }
5232                    MappingBacking::PrivateAnonymous => {
5233                        panic!("Unexpected private anonymous mapping")
5234                    }
5235                }
5236            };
5237            std::mem::drop(state);
5238
5239            assert_eq!(mm.unmap(addr1, *PAGE_SIZE as usize), Ok(()));
5240
5241            let state = mm.state.read();
5242
5243            // The first page should be unmapped.
5244            assert!(state.mappings.get(addr1).is_none());
5245
5246            // The second page should remain unchanged.
5247            let (range2, mapping2) = state.mappings.get(addr2).expect("second page");
5248            assert_eq!(range2.start, addr2);
5249            assert_eq!(range2.end, (addr2 + *PAGE_SIZE).unwrap());
5250            match state.get_mapping_backing(mapping2) {
5251                MappingBacking::Memory(backing) => {
5252                    assert_eq!(backing.memory().get_size(), *PAGE_SIZE);
5253                    assert_eq!(original_memory2.get_koid(), backing.memory().get_koid());
5254                }
5255                MappingBacking::PrivateAnonymous => panic!("Unexpected private anonymous mapping"),
5256            }
5257        })
5258        .await;
5259    }
5260
5261    #[::fuchsia::test]
5262    async fn test_read_write_objects() {
5263        spawn_kernel_and_run(async |locked, current_task| {
5264            let ma = current_task.deref();
5265            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5266            let items_ref = UserRef::<i32>::new(addr);
5267
5268            let items_written = vec![0, 2, 3, 7, 1];
5269            ma.write_objects(items_ref, &items_written).expect("Failed to write object array.");
5270
5271            let items_read = ma
5272                .read_objects_to_vec(items_ref, items_written.len())
5273                .expect("Failed to read object array.");
5274
5275            assert_eq!(items_written, items_read);
5276        })
5277        .await;
5278    }
5279
5280    #[::fuchsia::test]
5281    async fn test_read_write_objects_null() {
5282        spawn_kernel_and_run(async |_, current_task| {
5283            let ma = current_task.deref();
5284            let items_ref = UserRef::<i32>::new(UserAddress::default());
5285
5286            let items_written = vec![];
5287            ma.write_objects(items_ref, &items_written)
5288                .expect("Failed to write empty object array.");
5289
5290            let items_read = ma
5291                .read_objects_to_vec(items_ref, items_written.len())
5292                .expect("Failed to read empty object array.");
5293
5294            assert_eq!(items_written, items_read);
5295        })
5296        .await;
5297    }
5298
5299    #[::fuchsia::test]
5300    async fn test_read_object_partial() {
5301        #[derive(Debug, Default, Copy, Clone, KnownLayout, FromBytes, Immutable, PartialEq)]
5302        struct Items {
5303            val: [i32; 4],
5304        }
5305
5306        spawn_kernel_and_run(async |locked, current_task| {
5307            let ma = current_task.deref();
5308            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5309            let items_array_ref = UserRef::<i32>::new(addr);
5310
5311            // Populate some values.
5312            let items_written = vec![75, 23, 51, 98];
5313            ma.write_objects(items_array_ref, &items_written)
5314                .expect("Failed to write object array.");
5315
5316            // Full read of all 4 values.
5317            let items_ref = UserRef::<Items>::new(addr);
5318            let items_read = ma
5319                .read_object_partial(items_ref, std::mem::size_of::<Items>())
5320                .expect("Failed to read object");
5321            assert_eq!(items_written, items_read.val);
5322
5323            // Partial read of the first two.
5324            let items_read = ma.read_object_partial(items_ref, 8).expect("Failed to read object");
5325            assert_eq!(vec![75, 23, 0, 0], items_read.val);
5326
5327            // The API currently allows reading 0 bytes (this could be re-evaluated) so test that does
5328            // the right thing.
5329            let items_read = ma.read_object_partial(items_ref, 0).expect("Failed to read object");
5330            assert_eq!(vec![0, 0, 0, 0], items_read.val);
5331
5332            // Size bigger than the object.
5333            assert_eq!(
5334                ma.read_object_partial(items_ref, std::mem::size_of::<Items>() + 8),
5335                error!(EINVAL)
5336            );
5337
5338            // Bad pointer.
5339            assert_eq!(
5340                ma.read_object_partial(UserRef::<Items>::new(UserAddress::from(1)), 16),
5341                error!(EFAULT)
5342            );
5343        })
5344        .await;
5345    }
5346
5347    #[::fuchsia::test]
5348    async fn test_partial_read() {
5349        spawn_kernel_and_run(async |locked, current_task| {
5350            let mm = current_task.mm().unwrap();
5351            let ma = current_task.deref();
5352
5353            let addr = mm.state.read().find_next_unused_range(2 * *PAGE_SIZE as usize).unwrap();
5354            let addr = map_memory(locked, &current_task, addr, *PAGE_SIZE);
5355            let second_map =
5356                map_memory(locked, &current_task, (addr + *PAGE_SIZE).unwrap(), *PAGE_SIZE);
5357
5358            let bytes = vec![0xf; (*PAGE_SIZE * 2) as usize];
5359            assert!(ma.write_memory(addr, &bytes).is_ok());
5360            let mut state = mm.state.write();
5361            let mut released_mappings = ReleasedMappings::default();
5362            state
5363                .protect(
5364                    ma,
5365                    second_map,
5366                    *PAGE_SIZE as usize,
5367                    ProtectionFlags::empty(),
5368                    &mut released_mappings,
5369                )
5370                .unwrap();
5371            released_mappings.finalize(state);
5372            assert_eq!(
5373                ma.read_memory_partial_to_vec(addr, bytes.len()).unwrap().len(),
5374                *PAGE_SIZE as usize,
5375            );
5376        })
5377        .await;
5378    }
5379
5380    fn map_memory_growsdown<L>(
5381        locked: &mut Locked<L>,
5382        current_task: &CurrentTask,
5383        length: u64,
5384    ) -> UserAddress
5385    where
5386        L: LockEqualOrBefore<FileOpsCore>,
5387    {
5388        map_memory_with_flags(
5389            locked,
5390            current_task,
5391            UserAddress::default(),
5392            length,
5393            MAP_ANONYMOUS | MAP_PRIVATE | MAP_GROWSDOWN,
5394        )
5395    }
5396
5397    #[::fuchsia::test]
5398    async fn test_grow_mapping_empty_mm() {
5399        spawn_kernel_and_run(async |_, current_task| {
5400            let mm = current_task.mm().unwrap();
5401
5402            let addr = UserAddress::from(0x100000);
5403
5404            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, false), Ok(false));
5405        })
5406        .await;
5407    }
5408
5409    #[::fuchsia::test]
5410    async fn test_grow_inside_mapping() {
5411        spawn_kernel_and_run(async |locked, current_task| {
5412            let mm = current_task.mm().unwrap();
5413
5414            let addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5415
5416            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, false), Ok(false));
5417        })
5418        .await;
5419    }
5420
5421    #[::fuchsia::test]
5422    async fn test_grow_write_fault_inside_read_only_mapping() {
5423        spawn_kernel_and_run(async |locked, current_task| {
5424            let mm = current_task.mm().unwrap();
5425
5426            let addr = do_mmap(
5427                locked,
5428                &current_task,
5429                UserAddress::default(),
5430                *PAGE_SIZE as usize,
5431                PROT_READ,
5432                MAP_ANONYMOUS | MAP_PRIVATE,
5433                FdNumber::from_raw(-1),
5434                0,
5435            )
5436            .expect("Could not map memory");
5437
5438            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, false), Ok(false));
5439            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, true), Ok(false));
5440        })
5441        .await;
5442    }
5443
5444    #[::fuchsia::test]
5445    async fn test_grow_fault_inside_prot_none_mapping() {
5446        spawn_kernel_and_run(async |locked, current_task| {
5447            let mm = current_task.mm().unwrap();
5448
5449            let addr = do_mmap(
5450                locked,
5451                &current_task,
5452                UserAddress::default(),
5453                *PAGE_SIZE as usize,
5454                PROT_NONE,
5455                MAP_ANONYMOUS | MAP_PRIVATE,
5456                FdNumber::from_raw(-1),
5457                0,
5458            )
5459            .expect("Could not map memory");
5460
5461            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, false), Ok(false));
5462            assert_matches!(mm.extend_growsdown_mapping_to_address(addr, true), Ok(false));
5463        })
5464        .await;
5465    }
5466
5467    #[::fuchsia::test]
5468    async fn test_grow_below_mapping() {
5469        spawn_kernel_and_run(async |locked, current_task| {
5470            let mm = current_task.mm().unwrap();
5471
5472            let addr = map_memory_growsdown(locked, &current_task, *PAGE_SIZE) - *PAGE_SIZE;
5473
5474            assert_matches!(mm.extend_growsdown_mapping_to_address(addr.unwrap(), false), Ok(true));
5475        })
5476        .await;
5477    }
5478
5479    #[::fuchsia::test]
5480    async fn test_grow_above_mapping() {
5481        spawn_kernel_and_run(async |locked, current_task| {
5482            let mm = current_task.mm().unwrap();
5483
5484            let addr = map_memory_growsdown(locked, &current_task, *PAGE_SIZE) + *PAGE_SIZE;
5485
5486            assert_matches!(
5487                mm.extend_growsdown_mapping_to_address(addr.unwrap(), false),
5488                Ok(false)
5489            );
5490        })
5491        .await;
5492    }
5493
5494    #[::fuchsia::test]
5495    async fn test_grow_write_fault_below_read_only_mapping() {
5496        spawn_kernel_and_run(async |locked, current_task| {
5497            let mm = current_task.mm().unwrap();
5498
5499            let mapped_addr = map_memory_growsdown(locked, &current_task, *PAGE_SIZE);
5500
5501            mm.protect(&current_task, mapped_addr, *PAGE_SIZE as usize, ProtectionFlags::READ)
5502                .unwrap();
5503
5504            assert_matches!(
5505                mm.extend_growsdown_mapping_to_address((mapped_addr - *PAGE_SIZE).unwrap(), true),
5506                Ok(false)
5507            );
5508
5509            assert_eq!(mm.get_mapping_count(), 1);
5510        })
5511        .await;
5512    }
5513
5514    #[::fuchsia::test]
5515    async fn test_snapshot_paged_memory() {
5516        use zx::sys::zx_page_request_command_t::ZX_PAGER_VMO_READ;
5517
5518        spawn_kernel_and_run(async |locked, current_task| {
5519            let mm = current_task.mm().unwrap();
5520            let ma = current_task.deref();
5521
5522            let port = Arc::new(zx::Port::create());
5523            let port_clone = port.clone();
5524            let pager =
5525                Arc::new(zx::Pager::create(zx::PagerOptions::empty()).expect("create failed"));
5526            let pager_clone = pager.clone();
5527
5528            const VMO_SIZE: u64 = 128 * 1024;
5529            let vmo = Arc::new(
5530                pager
5531                    .create_vmo(zx::VmoOptions::RESIZABLE, &port, 1, VMO_SIZE)
5532                    .expect("create_vmo failed"),
5533            );
5534            let vmo_clone = vmo.clone();
5535
5536            // Create a thread to service the port where we will receive pager requests.
5537            let thread = std::thread::spawn(move || {
5538                loop {
5539                    let packet =
5540                        port_clone.wait(zx::MonotonicInstant::INFINITE).expect("wait failed");
5541                    match packet.contents() {
5542                        zx::PacketContents::Pager(contents) => {
5543                            if contents.command() == ZX_PAGER_VMO_READ {
5544                                let range = contents.range();
5545                                let source_vmo = zx::Vmo::create(range.end - range.start)
5546                                    .expect("create failed");
5547                                pager_clone
5548                                    .supply_pages(&vmo_clone, range, &source_vmo, 0)
5549                                    .expect("supply_pages failed");
5550                            }
5551                        }
5552                        zx::PacketContents::User(_) => break,
5553                        _ => {}
5554                    }
5555                }
5556            });
5557
5558            let child_vmo = vmo
5559                .create_child(zx::VmoChildOptions::SNAPSHOT_AT_LEAST_ON_WRITE, 0, VMO_SIZE)
5560                .unwrap();
5561
5562            // Write something to the source VMO.
5563            vmo.write(b"foo", 0).expect("write failed");
5564
5565            let prot_flags = ProtectionFlags::READ | ProtectionFlags::WRITE;
5566            let addr = mm
5567                .map_memory(
5568                    DesiredAddress::Any,
5569                    Arc::new(MemoryObject::from(child_vmo)),
5570                    0,
5571                    VMO_SIZE as usize,
5572                    prot_flags,
5573                    Access::rwx(),
5574                    MappingOptions::empty(),
5575                    MappingName::None,
5576                )
5577                .expect("map failed");
5578
5579            let target = current_task.clone_task_for_test(locked, 0, None);
5580
5581            // Make sure it has what we wrote.
5582            let buf = target.read_memory_to_vec(addr, 3).expect("read_memory failed");
5583            assert_eq!(buf, b"foo");
5584
5585            // Write something to both source and target and make sure they are forked.
5586            ma.write_memory(addr, b"bar").expect("write_memory failed");
5587
5588            let buf = target.read_memory_to_vec(addr, 3).expect("read_memory failed");
5589            assert_eq!(buf, b"foo");
5590
5591            target.write_memory(addr, b"baz").expect("write_memory failed");
5592            let buf = ma.read_memory_to_vec(addr, 3).expect("read_memory failed");
5593            assert_eq!(buf, b"bar");
5594
5595            let buf = target.read_memory_to_vec(addr, 3).expect("read_memory failed");
5596            assert_eq!(buf, b"baz");
5597
5598            port.queue(&zx::Packet::from_user_packet(0, 0, zx::UserPacket::from_u8_array([0; 32])))
5599                .unwrap();
5600            thread.join().unwrap();
5601        })
5602        .await;
5603    }
5604
5605    #[::fuchsia::test]
5606    async fn test_set_vma_name() {
5607        spawn_kernel_and_run(async |locked, mut current_task| {
5608            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5609
5610            let vma_name = "vma name";
5611            current_task.write_memory(name_addr, vma_name.as_bytes()).unwrap();
5612
5613            let mapping_addr =
5614                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5615
5616            sys_prctl(
5617                locked,
5618                &mut current_task,
5619                PR_SET_VMA,
5620                PR_SET_VMA_ANON_NAME as u64,
5621                mapping_addr.ptr() as u64,
5622                *PAGE_SIZE,
5623                name_addr.ptr() as u64,
5624            )
5625            .unwrap();
5626
5627            assert_eq!(
5628                *current_task.mm().unwrap().get_mapping_name(mapping_addr).unwrap().unwrap(),
5629                vma_name
5630            );
5631        })
5632        .await;
5633    }
5634
5635    #[::fuchsia::test]
5636    async fn test_set_vma_name_adjacent_mappings() {
5637        spawn_kernel_and_run(async |locked, mut current_task| {
5638            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5639            current_task
5640                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
5641                .unwrap();
5642
5643            let first_mapping_addr =
5644                map_memory(locked, &current_task, UserAddress::default(), 2 * *PAGE_SIZE);
5645            let second_mapping_addr = map_memory_with_flags(
5646                locked,
5647                &current_task,
5648                (first_mapping_addr + *PAGE_SIZE).unwrap(),
5649                *PAGE_SIZE,
5650                MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
5651            );
5652
5653            assert_eq!((first_mapping_addr + *PAGE_SIZE).unwrap(), second_mapping_addr);
5654
5655            sys_prctl(
5656                locked,
5657                &mut current_task,
5658                PR_SET_VMA,
5659                PR_SET_VMA_ANON_NAME as u64,
5660                first_mapping_addr.ptr() as u64,
5661                2 * *PAGE_SIZE,
5662                name_addr.ptr() as u64,
5663            )
5664            .unwrap();
5665
5666            {
5667                let mm = current_task.mm().unwrap();
5668                let state = mm.state.read();
5669
5670                // The name should apply to both mappings.
5671                let (_, mapping) = state.mappings.get(first_mapping_addr).unwrap();
5672                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
5673
5674                let (_, mapping) = state.mappings.get(second_mapping_addr).unwrap();
5675                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
5676            }
5677        })
5678        .await;
5679    }
5680
5681    #[::fuchsia::test]
5682    async fn test_set_vma_name_beyond_end() {
5683        spawn_kernel_and_run(async |locked, mut current_task| {
5684            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5685            current_task
5686                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
5687                .unwrap();
5688
5689            let mapping_addr =
5690                map_memory(locked, &current_task, UserAddress::default(), 2 * *PAGE_SIZE);
5691
5692            let second_page = (mapping_addr + *PAGE_SIZE).unwrap();
5693            current_task.mm().unwrap().unmap(second_page, *PAGE_SIZE as usize).unwrap();
5694
5695            // This should fail with ENOMEM since it extends past the end of the mapping into unmapped memory.
5696            assert_eq!(
5697                sys_prctl(
5698                    locked,
5699                    &mut current_task,
5700                    PR_SET_VMA,
5701                    PR_SET_VMA_ANON_NAME as u64,
5702                    mapping_addr.ptr() as u64,
5703                    2 * *PAGE_SIZE,
5704                    name_addr.ptr() as u64,
5705                ),
5706                error!(ENOMEM)
5707            );
5708
5709            // Despite returning an error, the prctl should still assign a name to the region at the start of the region.
5710            {
5711                let mm = current_task.mm().unwrap();
5712                let state = mm.state.read();
5713
5714                let (_, mapping) = state.mappings.get(mapping_addr).unwrap();
5715                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
5716            }
5717        })
5718        .await;
5719    }
5720
5721    #[::fuchsia::test]
5722    async fn test_set_vma_name_before_start() {
5723        spawn_kernel_and_run(async |locked, mut current_task| {
5724            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5725            current_task
5726                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
5727                .unwrap();
5728
5729            let mapping_addr =
5730                map_memory(locked, &current_task, UserAddress::default(), 2 * *PAGE_SIZE);
5731
5732            let second_page = (mapping_addr + *PAGE_SIZE).unwrap();
5733            current_task.mm().unwrap().unmap(mapping_addr, *PAGE_SIZE as usize).unwrap();
5734
5735            // This should fail with ENOMEM since the start of the range is in unmapped memory.
5736            assert_eq!(
5737                sys_prctl(
5738                    locked,
5739                    &mut current_task,
5740                    PR_SET_VMA,
5741                    PR_SET_VMA_ANON_NAME as u64,
5742                    mapping_addr.ptr() as u64,
5743                    2 * *PAGE_SIZE,
5744                    name_addr.ptr() as u64,
5745                ),
5746                error!(ENOMEM)
5747            );
5748
5749            // Unlike a range which starts within a mapping and extends past the end, this should not assign
5750            // a name to any mappings.
5751            {
5752                let mm = current_task.mm().unwrap();
5753                let state = mm.state.read();
5754
5755                let (_, mapping) = state.mappings.get(second_page).unwrap();
5756                assert_eq!(mapping.name(), MappingName::None);
5757            }
5758        })
5759        .await;
5760    }
5761
5762    #[::fuchsia::test]
5763    async fn test_set_vma_name_partial() {
5764        spawn_kernel_and_run(async |locked, mut current_task| {
5765            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5766            current_task
5767                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
5768                .unwrap();
5769
5770            let mapping_addr =
5771                map_memory(locked, &current_task, UserAddress::default(), 3 * *PAGE_SIZE);
5772
5773            assert_eq!(
5774                sys_prctl(
5775                    locked,
5776                    &mut current_task,
5777                    PR_SET_VMA,
5778                    PR_SET_VMA_ANON_NAME as u64,
5779                    (mapping_addr + *PAGE_SIZE).unwrap().ptr() as u64,
5780                    *PAGE_SIZE,
5781                    name_addr.ptr() as u64,
5782                ),
5783                Ok(starnix_syscalls::SUCCESS)
5784            );
5785
5786            // This should split the mapping into 3 pieces with the second piece having the name "foo"
5787            {
5788                let mm = current_task.mm().unwrap();
5789                let state = mm.state.read();
5790
5791                let (_, mapping) = state.mappings.get(mapping_addr).unwrap();
5792                assert_eq!(mapping.name(), MappingName::None);
5793
5794                let (_, mapping) =
5795                    state.mappings.get((mapping_addr + *PAGE_SIZE).unwrap()).unwrap();
5796                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
5797
5798                let (_, mapping) =
5799                    state.mappings.get((mapping_addr + (2 * *PAGE_SIZE)).unwrap()).unwrap();
5800                assert_eq!(mapping.name(), MappingName::None);
5801            }
5802        })
5803        .await;
5804    }
5805
5806    #[::fuchsia::test]
5807    async fn test_preserve_name_snapshot() {
5808        spawn_kernel_and_run(async |locked, mut current_task| {
5809            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5810            current_task
5811                .write_memory(name_addr, CString::new("foo").unwrap().as_bytes_with_nul())
5812                .unwrap();
5813
5814            let mapping_addr =
5815                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
5816
5817            assert_eq!(
5818                sys_prctl(
5819                    locked,
5820                    &mut current_task,
5821                    PR_SET_VMA,
5822                    PR_SET_VMA_ANON_NAME as u64,
5823                    mapping_addr.ptr() as u64,
5824                    *PAGE_SIZE,
5825                    name_addr.ptr() as u64,
5826                ),
5827                Ok(starnix_syscalls::SUCCESS)
5828            );
5829
5830            let target = current_task.clone_task_for_test(locked, 0, None);
5831
5832            {
5833                let mm = target.mm().unwrap();
5834                let state = mm.state.read();
5835
5836                let (_, mapping) = state.mappings.get(mapping_addr).unwrap();
5837                assert_eq!(mapping.name(), MappingName::Vma("foo".into()));
5838            }
5839        })
5840        .await;
5841    }
5842}