starnix_core/mm/
vmsplice.rs

1// Copyright 2024 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::MemoryManager;
6use crate::mm::memory::MemoryObject;
7use crate::vfs::buffers::{InputBuffer, MessageData, OutputBuffer};
8use crate::vfs::with_iovec_segments;
9
10use smallvec::SmallVec;
11use starnix_sync::Mutex;
12use starnix_uapi::errors::Errno;
13use starnix_uapi::range_ext::RangeExt as _;
14use starnix_uapi::user_address::UserAddress;
15use starnix_uapi::{errno, error};
16use std::mem::MaybeUninit;
17use std::ops::Range;
18use std::sync::{Arc, Weak};
19
20/// A single segment of a `VmsplicePayload`.
21#[derive(Clone, Debug)]
22pub struct VmsplicePayloadSegment {
23    pub addr_offset: UserAddress,
24    pub length: usize,
25    /// The `MemoryObject` that contains the memory used in this mapping.
26    pub memory: Arc<MemoryObject>,
27    /// The offset in the `MemoryObject` that corresponds to the base address.
28    pub memory_offset: u64,
29}
30
31impl VmsplicePayloadSegment {
32    fn split_off(&mut self, index: usize) -> Option<Self> {
33        if index >= self.length {
34            return None;
35        }
36
37        let mut mapping = self.clone();
38        mapping.length = self.length - index;
39        mapping.addr_offset = match mapping.addr_offset + index {
40            Ok(new_addr) => new_addr,
41            Err(_) => return None,
42        };
43        mapping.memory_offset += index as u64;
44
45        self.length = index;
46        Some(mapping)
47    }
48
49    fn truncate(&mut self, limit: usize) {
50        // TODO(https://fxbug.dev/335701084): Truncating like this may leave
51        // unreachable memory in the VMO that is free to reclaim. We should
52        // reclaim the truncated memory if we can guarantee that it is no
53        // longer reachable by other means (e.g. other mappings, files, shared
54        // memory, etc.).
55        self.length = std::cmp::min(self.length, limit);
56    }
57
58    fn read_uninit(&self, data: &mut [MaybeUninit<u8>]) -> Result<(), zx::Status> {
59        self.memory.read_uninit(data, self.memory_offset)?;
60        Ok(())
61    }
62
63    /// Reads from the backing memory.
64    ///
65    /// # Safety
66    ///
67    /// Callers must guarantee that the buffer is valid to write to.
68    unsafe fn raw_read(&self, buffer: *mut u8, buffer_length: usize) -> Result<(), zx::Status> {
69        #[allow(clippy::undocumented_unsafe_blocks, reason = "2024 edition migration")]
70        unsafe {
71            self.memory.read_raw(buffer, buffer_length, self.memory_offset)
72        }
73    }
74}
75
76/// A single payload that may sit in a pipe as a consequence of a `vmsplice(2)`
77/// to a pipe.
78///
79/// A `VmsplicePayload` originally starts with a single segment. The payload
80/// may be split up into multiple segments as the payload sits in the pipe.
81/// This can happen when a mapping that is also backing a vmsplice-ed payload
82/// is modified such that the original segment is partially unmapped.
83///
84/// When the `VmsplicePayload` is created, it will be appended to its associated
85/// memory manager's [`InflightVmsplicedPayloads`]. The list cleans itself when
86/// `handle_unmapping` is run.
87#[derive(Debug, Default)]
88pub struct VmsplicePayload {
89    mapping: Weak<MemoryManager>,
90    segments: Mutex<SmallVec<[VmsplicePayloadSegment; 1]>>,
91}
92
93impl VmsplicePayload {
94    pub fn new(mapping: Weak<MemoryManager>, segment: VmsplicePayloadSegment) -> Arc<Self> {
95        Self::new_with_segments(mapping, [segment].into())
96    }
97
98    fn new_with_segments(
99        mapping: Weak<MemoryManager>,
100        segments: SmallVec<[VmsplicePayloadSegment; 1]>,
101    ) -> Arc<Self> {
102        let mapping_strong = mapping.upgrade();
103        let payload = Arc::new(Self { mapping, segments: Mutex::new(segments) });
104        if let Some(mapping) = mapping_strong {
105            mapping.inflight_vmspliced_payloads.handle_new_payload(&payload);
106        }
107        payload
108    }
109}
110
111impl MessageData for Arc<VmsplicePayload> {
112    fn copy_from_user(_data: &mut dyn InputBuffer, _limit: usize) -> Result<Self, Errno> {
113        error!(ENOTSUP)
114    }
115
116    fn ptr(&self) -> Result<*const u8, Errno> {
117        error!(ENOTSUP)
118    }
119
120    fn with_bytes<O, F: FnMut(&[u8]) -> Result<O, Errno>>(&self, mut f: F) -> Result<O, Errno> {
121        let v = {
122            let segments = self.segments.lock();
123            let mut v = Vec::with_capacity(segments.iter().map(|s| s.length).sum());
124            for segment in segments.iter() {
125                segment
126                    .read_uninit(&mut v.spare_capacity_mut()[..segment.length])
127                    .map_err(|_| errno!(EFAULT))?;
128                // SAFETY: The read above succeeded.
129                unsafe { v.set_len(v.len() + segment.length) }
130            }
131            v
132        };
133        // Don't hold the lock because the callback may perform work which
134        // requires taking memory manager or mapping locks. Note that
135        // VmsplicePayload is modified while such locks are held (e.g. unmap).
136        f(&v)
137    }
138
139    fn len(&self) -> usize {
140        self.segments.lock().iter().map(|s| s.length).sum()
141    }
142
143    fn split_off(&mut self, mut limit: usize) -> Option<Self> {
144        let new_segments = {
145            let mut segments = self.segments.lock();
146
147            let mut split_at = 0;
148            for segment in segments.iter() {
149                if limit >= segment.length {
150                    limit -= segment.length;
151                    split_at += 1;
152                } else {
153                    break;
154                }
155            }
156
157            let mut new_segments = SmallVec::new();
158            if limit != 0 && split_at < segments.len() {
159                new_segments.push(segments[split_at].split_off(limit).unwrap());
160                split_at += 1;
161            };
162            if split_at <= segments.len() {
163                new_segments.extend(segments.drain(split_at..));
164            }
165            new_segments
166        };
167
168        if new_segments.is_empty() {
169            None
170        } else {
171            Some(VmsplicePayload::new_with_segments(self.mapping.clone(), new_segments))
172        }
173    }
174
175    fn truncate(&mut self, mut limit: usize) {
176        let mut segments = self.segments.lock();
177
178        segments.retain_mut(|segment| {
179            if limit >= segment.length {
180                limit -= segment.length;
181                true
182            } else if limit != 0 {
183                segment.truncate(limit);
184                limit = 0;
185                true
186            } else {
187                false
188            }
189        })
190    }
191
192    fn clone_at_most(&self, limit: usize) -> Self {
193        let mut payload =
194            VmsplicePayload::new_with_segments(self.mapping.clone(), self.segments.lock().clone());
195        payload.truncate(limit);
196        payload
197    }
198
199    fn copy_to_user(&self, data: &mut dyn OutputBuffer) -> Result<usize, Errno> {
200        let result = with_iovec_segments(data, |iovecs: &mut [syncio::zxio::zx_iovec]| {
201            let segments = self.segments.lock();
202            let length: usize = segments.iter().map(|s| s.length).sum();
203
204            let mut segments = segments.iter();
205            let mut current_segment = segments.next().cloned();
206            let mut copied = 0;
207
208            for iovec in iovecs {
209                if length == copied {
210                    break;
211                }
212
213                iovec.capacity = std::cmp::min(iovec.capacity, length - copied);
214
215                let mut iovec_pos = 0;
216                while let Some(segment) = &mut current_segment {
217                    if iovec_pos == iovec.capacity {
218                        break;
219                    }
220
221                    let to_read = std::cmp::min(segment.length, iovec.capacity - iovec_pos);
222                    let after_read = segment.split_off(to_read);
223                    #[allow(
224                        clippy::undocumented_unsafe_blocks,
225                        reason = "Force documented unsafe blocks in Starnix"
226                    )]
227                    unsafe { segment.raw_read(iovec.buffer as *mut u8, to_read) }
228                        .map_err(|_| errno!(EFAULT))?;
229                    copied += to_read;
230                    iovec_pos += to_read;
231
232                    if let Some(after_read) = after_read {
233                        *segment = after_read;
234                    } else {
235                        current_segment = segments.next().cloned();
236                    }
237                }
238            }
239            Ok(copied)
240        });
241        match result {
242            Some(result) => {
243                let copied = result?;
244                // SAFETY: We just successfully read `copied` bytes from the `MemoryObject`
245                // to `data`.
246                unsafe { data.advance(copied)? };
247                Ok(copied)
248            }
249            None => self.with_bytes(|bytes| data.write(bytes)),
250        }
251    }
252}
253
254/// Keeps track of inflight vmsplice-ed payloads.
255///
256/// This is needed so that when a mapping is unmapped, inflight vmspliced payloads
257/// are updated to hold the (unmapped) bytes without being affected by any writes
258/// to the payload's backing `MemoryObject`.
259#[derive(Debug, Default)]
260pub struct InflightVmsplicedPayloads {
261    /// The inflight vmspliced payloads.
262    ///
263    /// Except when a [`VmsplicePayload`] is dropped, this is modified when the
264    /// memory manager's read lock is held. To allow a `vmsplice` operation to a
265    /// pipe to be performed without taking the memory manager's lock exclusively,
266    /// this is protected by its own `Mutex` instead of relying on the memory
267    /// manager's `RwLock`.
268    payloads: Mutex<Vec<Weak<VmsplicePayload>>>,
269}
270
271impl InflightVmsplicedPayloads {
272    fn handle_new_payload(&self, payload: &Arc<VmsplicePayload>) {
273        self.payloads.lock().push(Arc::downgrade(payload));
274    }
275
276    pub fn handle_unmapping(
277        &self,
278        unmapped_memory: &Arc<MemoryObject>,
279        unmapped_range: &Range<UserAddress>,
280    ) -> Result<(), Errno> {
281        // Iterate over payloads while removing any deleted payload.
282        let mut payloads = self.payloads.lock();
283        let mut index = 0;
284        while index < payloads.len() {
285            let Some(payload) = payloads[index].upgrade() else {
286                payloads.swap_remove(index);
287                continue;
288            };
289            index += 1;
290
291            let mut segments = payload.segments.lock();
292            let mut new_segments = SmallVec::new();
293
294            for segment in segments.iter() {
295                let mut segment = segment.clone();
296                let segment_end = (segment.addr_offset + segment.length)?;
297                let segment_range = segment.addr_offset..segment_end;
298                let segment_unmapped_range = unmapped_range.intersect(&segment_range);
299
300                if &segment.memory != unmapped_memory || segment_unmapped_range.is_empty() {
301                    // This can happen when say a partial unmapping was performed
302                    // on a `VmsplicePayloadSegment` which split it into a mapped
303                    // and unmapped set of payloads.
304                    new_segments.push(segment);
305                    continue;
306                }
307
308                // Keep the mapped head.
309                if segment_unmapped_range.start != segment_range.start {
310                    if let Some(tail) =
311                        segment.split_off(segment_unmapped_range.start - segment_range.start)
312                    {
313                        new_segments.push(segment);
314                        segment = tail;
315                    }
316                }
317
318                // Keep the mapped tail.
319                let tail = segment
320                    .split_off(segment.length - (segment_range.end - segment_unmapped_range.end));
321
322                // Snapshot the middle, actually unmapped, region.
323                //
324                // NB: we can't use `zx_vmo_transfer_data` because
325                // there may be multiple vmsplice payloads mapped
326                // to the same VMO region.
327                let memory = segment
328                    .memory
329                    .create_child(
330                        zx::VmoChildOptions::SNAPSHOT_MODIFIED | zx::VmoChildOptions::NO_WRITE,
331                        segment.memory_offset,
332                        segment.length as u64,
333                    )
334                    .map_err(|_| errno!(EFAULT))?;
335
336                segment.memory = Arc::new(memory);
337                segment.memory_offset = 0;
338                new_segments.push(segment);
339
340                if let Some(tail) = tail {
341                    new_segments.push(tail);
342                }
343            }
344
345            *segments = new_segments;
346        }
347
348        Ok(())
349    }
350}
351
352#[cfg(test)]
353mod tests {
354    use super::*;
355    use crate::mm::PAGE_SIZE;
356    use crate::testing::spawn_kernel_and_run;
357    use crate::vfs::VecOutputBuffer;
358
359    #[::fuchsia::test]
360    async fn lifecycle() {
361        spawn_kernel_and_run(async |_, current_task| {
362            const NUM_PAGES: u64 = 3;
363            let page_size = *PAGE_SIZE;
364
365            let mm = current_task.mm().unwrap();
366
367            assert!(mm.inflight_vmspliced_payloads.payloads.lock().is_empty());
368
369            let memory_size = page_size * NUM_PAGES;
370            let memory = Arc::new(MemoryObject::Vmo(zx::Vmo::create(memory_size).unwrap()));
371            let mut bytes = vec![0; memory_size as usize];
372            for i in 0..NUM_PAGES {
373                bytes[(page_size * i) as usize..][..(page_size as usize)].fill('A' as u8 + i as u8)
374            }
375            memory.write(&bytes, 0).unwrap();
376
377            let payload = VmsplicePayload::new(
378                Arc::downgrade(&mm),
379                VmsplicePayloadSegment {
380                    addr_offset: UserAddress::NULL,
381                    length: (page_size * NUM_PAGES) as usize,
382                    memory: Arc::clone(&memory),
383                    memory_offset: 0,
384                },
385            );
386            assert_eq!(mm.inflight_vmspliced_payloads.payloads.lock().len(), 1);
387            assert_eq!(payload.segments.lock().len(), 1);
388
389            // A unmapping a different `MemoryObject` should do nothing.
390            {
391                let memory = Arc::new(MemoryObject::Vmo(zx::Vmo::create(page_size).unwrap()));
392                mm.inflight_vmspliced_payloads
393                    .handle_unmapping(&memory, &(UserAddress::NULL..(u64::MAX.into())))
394                    .unwrap();
395                assert_eq!(payload.segments.lock().len(), 1);
396            }
397
398            mm.inflight_vmspliced_payloads
399                .handle_unmapping(&memory, &(UserAddress::NULL..page_size.into()))
400                .unwrap();
401            {
402                let segments = payload.segments.lock();
403                assert_eq!(segments.len(), 2);
404                assert!(!Arc::ptr_eq(&segments[0].memory, &memory));
405                assert!(Arc::ptr_eq(&segments[1].memory, &memory));
406            }
407            let mut got = VecOutputBuffer::new(memory_size as usize);
408            payload.copy_to_user(&mut got).unwrap();
409            assert_eq!(got.data(), &bytes);
410
411            std::mem::drop(payload);
412
413            // Run the unmapping again to ensure payload is dropped.
414            mm.inflight_vmspliced_payloads
415                .handle_unmapping(&memory, &(UserAddress::NULL..page_size.into()))
416                .unwrap();
417
418            assert!(mm.inflight_vmspliced_payloads.payloads.lock().is_empty());
419        })
420        .await;
421    }
422}