Skip to main content

starnix_core/mm/
vmsplice.rs

1// Copyright 2024 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::MemoryManager;
6use crate::mm::memory::MemoryObject;
7use crate::vfs::buffers::{InputBuffer, MessageData, OutputBuffer};
8use crate::vfs::with_iovec_segments;
9
10use smallvec::SmallVec;
11use starnix_sync::{LockDepMutex, Mutex, VmspliceSegmentsLock};
12use starnix_uapi::errors::Errno;
13use starnix_uapi::range_ext::RangeExt as _;
14use starnix_uapi::user_address::UserAddress;
15use starnix_uapi::{errno, error};
16use std::mem::MaybeUninit;
17use std::ops::Range;
18use std::sync::{Arc, Weak};
19
20/// A single segment of a `VmsplicePayload`.
21#[derive(Clone, Debug)]
22pub struct VmsplicePayloadSegment {
23    pub addr_offset: UserAddress,
24    pub length: usize,
25    /// The `MemoryObject` that contains the memory used in this mapping.
26    pub memory: Arc<MemoryObject>,
27    /// The offset in the `MemoryObject` that corresponds to the base address.
28    pub memory_offset: u64,
29    /// Whether this segment should be snapshotted on unmap.
30    pub should_snapshot_on_unmap: bool,
31}
32
33impl VmsplicePayloadSegment {
34    fn split_off(&mut self, index: usize) -> Option<Self> {
35        if index >= self.length {
36            return None;
37        }
38
39        let mut mapping = self.clone();
40        mapping.length = self.length - index;
41        mapping.addr_offset = match mapping.addr_offset + index {
42            Ok(new_addr) => new_addr,
43            Err(_) => return None,
44        };
45        mapping.memory_offset += index as u64;
46
47        self.length = index;
48        Some(mapping)
49    }
50
51    fn truncate(&mut self, limit: usize) {
52        // TODO(https://fxbug.dev/335701084): Truncating like this may leave
53        // unreachable memory in the VMO that is free to reclaim. We should
54        // reclaim the truncated memory if we can guarantee that it is no
55        // longer reachable by other means (e.g. other mappings, files, shared
56        // memory, etc.).
57        self.length = std::cmp::min(self.length, limit);
58    }
59
60    fn read_uninit(&self, data: &mut [MaybeUninit<u8>]) -> Result<(), zx::Status> {
61        self.memory.read_uninit(data, self.memory_offset)?;
62        Ok(())
63    }
64
65    /// Reads from the backing memory.
66    ///
67    /// # Safety
68    ///
69    /// Callers must guarantee that the buffer is valid to write to.
70    unsafe fn raw_read(&self, buffer: *mut u8, buffer_length: usize) -> Result<(), zx::Status> {
71        #[allow(clippy::undocumented_unsafe_blocks, reason = "2024 edition migration")]
72        unsafe {
73            self.memory.read_raw(buffer, buffer_length, self.memory_offset)
74        }
75    }
76}
77
78/// A single payload that may sit in a pipe as a consequence of a `vmsplice(2)`
79/// to a pipe.
80///
81/// A `VmsplicePayload` originally starts with a single segment. The payload
82/// may be split up into multiple segments as the payload sits in the pipe.
83/// This can happen when a mapping that is also backing a vmsplice-ed payload
84/// is modified such that the original segment is partially unmapped.
85///
86/// When the `VmsplicePayload` is created, it will be appended to its associated
87/// memory manager's [`InflightVmsplicedPayloads`]. The list cleans itself when
88/// `handle_unmapping` is run.
89#[derive(Debug, Default)]
90pub struct VmsplicePayload {
91    mapping: Weak<MemoryManager>,
92    segments: LockDepMutex<SmallVec<[VmsplicePayloadSegment; 1]>, VmspliceSegmentsLock>,
93}
94
95impl VmsplicePayload {
96    pub fn new(mapping: Weak<MemoryManager>, segment: VmsplicePayloadSegment) -> Arc<Self> {
97        Self::new_with_segments(mapping, [segment].into())
98    }
99
100    fn new_with_segments(
101        mapping: Weak<MemoryManager>,
102        segments: SmallVec<[VmsplicePayloadSegment; 1]>,
103    ) -> Arc<Self> {
104        let mapping_strong = mapping.upgrade();
105        let payload = Arc::new(Self { mapping, segments: LockDepMutex::new(segments) });
106        if let Some(mapping) = mapping_strong {
107            mapping.inflight_vmspliced_payloads.handle_new_payload(&payload);
108        }
109        payload
110    }
111}
112
113impl MessageData for Arc<VmsplicePayload> {
114    fn copy_from_user(_data: &mut dyn InputBuffer, _limit: usize) -> Result<Self, Errno> {
115        error!(ENOTSUP)
116    }
117
118    fn ptr(&self) -> Result<*const u8, Errno> {
119        error!(ENOTSUP)
120    }
121
122    fn with_bytes<O, F: FnMut(&[u8]) -> Result<O, Errno>>(&self, mut f: F) -> Result<O, Errno> {
123        let v = {
124            let segments = self.segments.lock();
125            let mut v = Vec::with_capacity(segments.iter().map(|s| s.length).sum());
126            for segment in segments.iter() {
127                segment
128                    .read_uninit(&mut v.spare_capacity_mut()[..segment.length])
129                    .map_err(|_| errno!(EFAULT))?;
130                // SAFETY: The read above succeeded.
131                unsafe { v.set_len(v.len() + segment.length) }
132            }
133            v
134        };
135        // Don't hold the lock because the callback may perform work which
136        // requires taking memory manager or mapping locks. Note that
137        // VmsplicePayload is modified while such locks are held (e.g. unmap).
138        f(&v)
139    }
140
141    fn len(&self) -> usize {
142        self.segments.lock().iter().map(|s| s.length).sum()
143    }
144
145    fn split_off(&mut self, mut limit: usize) -> Option<Self> {
146        let new_segments = {
147            let mut segments = self.segments.lock();
148
149            let mut split_at = 0;
150            for segment in segments.iter() {
151                if limit >= segment.length {
152                    limit -= segment.length;
153                    split_at += 1;
154                } else {
155                    break;
156                }
157            }
158
159            let mut new_segments = SmallVec::new();
160            if limit != 0 && split_at < segments.len() {
161                new_segments.push(segments[split_at].split_off(limit).unwrap());
162                split_at += 1;
163            };
164            if split_at <= segments.len() {
165                new_segments.extend(segments.drain(split_at..));
166            }
167            new_segments
168        };
169
170        if new_segments.is_empty() {
171            None
172        } else {
173            Some(VmsplicePayload::new_with_segments(self.mapping.clone(), new_segments))
174        }
175    }
176
177    fn truncate(&mut self, mut limit: usize) {
178        let mut segments = self.segments.lock();
179
180        segments.retain_mut(|segment| {
181            if limit >= segment.length {
182                limit -= segment.length;
183                true
184            } else if limit != 0 {
185                segment.truncate(limit);
186                limit = 0;
187                true
188            } else {
189                false
190            }
191        })
192    }
193
194    fn clone_at_most(&self, limit: usize) -> Self {
195        let mut payload =
196            VmsplicePayload::new_with_segments(self.mapping.clone(), self.segments.lock().clone());
197        payload.truncate(limit);
198        payload
199    }
200
201    fn copy_to_user(&self, data: &mut dyn OutputBuffer) -> Result<usize, Errno> {
202        let result = with_iovec_segments(data, |iovecs: &mut [syncio::zxio::zx_iovec]| {
203            let segments = self.segments.lock();
204            let length: usize = segments.iter().map(|s| s.length).sum();
205
206            let mut segments = segments.iter();
207            let mut current_segment = segments.next().cloned();
208            let mut copied = 0;
209
210            for iovec in iovecs {
211                if length == copied {
212                    break;
213                }
214
215                iovec.capacity = std::cmp::min(iovec.capacity, length - copied);
216
217                let mut iovec_pos = 0;
218                while let Some(segment) = &mut current_segment {
219                    if iovec_pos == iovec.capacity {
220                        break;
221                    }
222
223                    let to_read = std::cmp::min(segment.length, iovec.capacity - iovec_pos);
224                    let after_read = segment.split_off(to_read);
225                    #[allow(
226                        clippy::undocumented_unsafe_blocks,
227                        reason = "Force documented unsafe blocks in Starnix"
228                    )]
229                    unsafe { segment.raw_read(iovec.buffer as *mut u8, to_read) }
230                        .map_err(|_| errno!(EFAULT))?;
231                    copied += to_read;
232                    iovec_pos += to_read;
233
234                    if let Some(after_read) = after_read {
235                        *segment = after_read;
236                    } else {
237                        current_segment = segments.next().cloned();
238                    }
239                }
240            }
241            Ok(copied)
242        });
243        match result {
244            Some(result) => {
245                let copied = result?;
246                // SAFETY: We just successfully read `copied` bytes from the `MemoryObject`
247                // to `data`.
248                unsafe { data.advance(copied)? };
249                Ok(copied)
250            }
251            None => self.with_bytes(|bytes| data.write(bytes)),
252        }
253    }
254}
255
256/// Keeps track of inflight vmsplice-ed payloads.
257///
258/// This is needed so that when a mapping is unmapped, inflight vmspliced payloads
259/// are updated to hold the (unmapped) bytes without being affected by any writes
260/// to the payload's backing `MemoryObject`.
261#[derive(Debug, Default)]
262pub struct InflightVmsplicedPayloads {
263    /// The inflight vmspliced payloads.
264    ///
265    /// Except when a [`VmsplicePayload`] is dropped, this is modified when the
266    /// memory manager's read lock is held. To allow a `vmsplice` operation to a
267    /// pipe to be performed without taking the memory manager's lock exclusively,
268    /// this is protected by its own `Mutex` instead of relying on the memory
269    /// manager's `RwLock`.
270    payloads: Mutex<Vec<Weak<VmsplicePayload>>>,
271}
272
273impl InflightVmsplicedPayloads {
274    fn handle_new_payload(&self, payload: &Arc<VmsplicePayload>) {
275        self.payloads.lock().push(Arc::downgrade(payload));
276    }
277
278    pub fn handle_unmapping(
279        &self,
280        unmapped_memory: &Arc<MemoryObject>,
281        unmapped_range: &Range<UserAddress>,
282    ) -> Result<(), Errno> {
283        // Iterate over payloads while removing any deleted payload.
284        let mut payloads = self.payloads.lock();
285        let mut index = 0;
286        while index < payloads.len() {
287            let Some(payload) = payloads[index].upgrade() else {
288                payloads.swap_remove(index);
289                continue;
290            };
291            index += 1;
292
293            let mut segments = payload.segments.lock();
294            let mut new_segments = SmallVec::new();
295
296            for segment in segments.iter() {
297                let mut segment = segment.clone();
298                let segment_end = (segment.addr_offset + segment.length)?;
299                let segment_range = segment.addr_offset..segment_end;
300                let segment_unmapped_range = unmapped_range.intersect(&segment_range);
301
302                if &segment.memory != unmapped_memory || segment_unmapped_range.is_empty() {
303                    // This can happen when say a partial unmapping was performed
304                    // on a `VmsplicePayloadSegment` which split it into a mapped
305                    // and unmapped set of payloads.
306                    new_segments.push(segment);
307                    continue;
308                }
309
310                // Keep the mapped head.
311                if segment_unmapped_range.start != segment_range.start {
312                    if let Some(tail) =
313                        segment.split_off(segment_unmapped_range.start - segment_range.start)
314                    {
315                        new_segments.push(segment);
316                        segment = tail;
317                    }
318                }
319
320                // Keep the mapped tail.
321                let tail = segment
322                    .split_off(segment.length - (segment_range.end - segment_unmapped_range.end));
323
324                // Snapshot the middle, actually unmapped, region if it should be snapshotted.
325                // For file-backed mappings, we want to keep referencing the shared VMO so that
326                // subsequent writes are visible.
327                //
328                // NB: we can't use `zx_vmo_transfer_data` because
329                // there may be multiple vmsplice payloads mapped
330                // to the same VMO region.
331                if segment.should_snapshot_on_unmap {
332                    let memory = segment
333                        .memory
334                        .create_child(
335                            zx::VmoChildOptions::SNAPSHOT_MODIFIED | zx::VmoChildOptions::NO_WRITE,
336                            segment.memory_offset,
337                            segment.length as u64,
338                        )
339                        .map_err(|_| errno!(EFAULT))?;
340
341                    segment.memory = Arc::new(memory);
342                    segment.memory_offset = 0;
343                }
344                new_segments.push(segment);
345
346                if let Some(tail) = tail {
347                    new_segments.push(tail);
348                }
349            }
350
351            *segments = new_segments;
352        }
353
354        Ok(())
355    }
356}
357
358#[cfg(test)]
359mod tests {
360    use super::*;
361    use crate::mm::PAGE_SIZE;
362    use crate::testing::spawn_kernel_and_run;
363    use crate::vfs::VecOutputBuffer;
364
365    #[::fuchsia::test]
366    async fn lifecycle() {
367        spawn_kernel_and_run(async |_, current_task| {
368            const NUM_PAGES: u64 = 3;
369            let page_size = *PAGE_SIZE;
370
371            let mm = current_task.mm().unwrap();
372
373            assert!(mm.inflight_vmspliced_payloads.payloads.lock().is_empty());
374
375            let memory_size = page_size * NUM_PAGES;
376            let memory = Arc::new(MemoryObject::from(zx::Vmo::create(memory_size).unwrap()));
377            let mut bytes = vec![0; memory_size as usize];
378            for i in 0..NUM_PAGES {
379                bytes[(page_size * i) as usize..][..(page_size as usize)].fill('A' as u8 + i as u8)
380            }
381            memory.write(&bytes, 0).unwrap();
382
383            let payload = VmsplicePayload::new(
384                Arc::downgrade(&mm),
385                VmsplicePayloadSegment {
386                    addr_offset: UserAddress::NULL,
387                    length: (page_size * NUM_PAGES) as usize,
388                    memory: Arc::clone(&memory),
389                    memory_offset: 0,
390                    should_snapshot_on_unmap: true,
391                },
392            );
393            assert_eq!(mm.inflight_vmspliced_payloads.payloads.lock().len(), 1);
394            assert_eq!(payload.segments.lock().len(), 1);
395
396            // A unmapping a different `MemoryObject` should do nothing.
397            {
398                let memory = Arc::new(MemoryObject::from(zx::Vmo::create(page_size).unwrap()));
399                mm.inflight_vmspliced_payloads
400                    .handle_unmapping(&memory, &(UserAddress::NULL..(u64::MAX.into())))
401                    .unwrap();
402                assert_eq!(payload.segments.lock().len(), 1);
403            }
404
405            mm.inflight_vmspliced_payloads
406                .handle_unmapping(&memory, &(UserAddress::NULL..page_size.into()))
407                .unwrap();
408            {
409                let segments = payload.segments.lock();
410                assert_eq!(segments.len(), 2);
411                assert!(!Arc::ptr_eq(&segments[0].memory, &memory));
412                assert!(Arc::ptr_eq(&segments[1].memory, &memory));
413            }
414            let mut got = VecOutputBuffer::new(memory_size as usize);
415            payload.copy_to_user(&mut got).unwrap();
416            assert_eq!(got.data(), &bytes);
417
418            std::mem::drop(payload);
419
420            // Run the unmapping again to ensure payload is dropped.
421            mm.inflight_vmspliced_payloads
422                .handle_unmapping(&memory, &(UserAddress::NULL..page_size.into()))
423                .unwrap();
424
425            assert!(mm.inflight_vmspliced_payloads.payloads.lock().is_empty());
426        })
427        .await;
428    }
429}