starnix_core/vfs/
memory_regular.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::fs::tmpfs::TmpFs;
6use crate::mm::memory::MemoryObject;
7use crate::mm::{PAGE_SIZE, ProtectionFlags, VMEX_RESOURCE};
8use crate::security;
9use crate::signals::{SignalInfo, send_standard_signal};
10use crate::task::CurrentTask;
11use crate::vfs::buffers::{InputBuffer, OutputBuffer};
12use crate::vfs::{
13    AppendLockGuard, DirEntry, FallocMode, FileHandle, FileObject, FileOps, FileSystemHandle,
14    FsNode, FsNodeInfo, FsNodeLinkBehavior, FsNodeOps, FsString, MAX_LFS_FILESIZE,
15    MemoryXattrStorage, Mount, MountInfo, NamespaceNode, WhatToMount, XattrStorage as _,
16    default_ioctl, fileops_impl_noop_sync, fs_node_impl_not_dir, fs_node_impl_xattr_delegate,
17};
18use linux_uapi::{ASHMEM_GET_SIZE, ASHMEM_SET_SIZE};
19use starnix_logging::{impossible_error, track_stub};
20use starnix_sync::{FileOpsCore, Locked, Unlocked};
21use starnix_syscalls::{SUCCESS, SyscallArg, SyscallResult};
22use starnix_types::math::round_up_to_system_page_size;
23use starnix_uapi::errors::Errno;
24use starnix_uapi::file_mode::{AccessCheck, mode};
25use starnix_uapi::open_flags::OpenFlags;
26use starnix_uapi::resource_limits::Resource;
27use starnix_uapi::seal_flags::SealFlags;
28use starnix_uapi::signals::SIGXFSZ;
29use starnix_uapi::{errno, error};
30use std::sync::Arc;
31
32pub struct MemoryRegularNode {
33    /// The memory that backs this file.
34    memory: Arc<MemoryObject>,
35    xattrs: MemoryXattrStorage,
36}
37
38impl MemoryRegularNode {
39    /// Create a new writable file node based on a blank VMO.
40    pub fn new() -> Result<Self, Errno> {
41        let vmo =
42            zx::Vmo::create_with_opts(zx::VmoOptions::RESIZABLE, 0).map_err(|_| errno!(ENOMEM))?;
43        Ok(Self {
44            memory: Arc::new(MemoryObject::from(vmo).with_zx_name(b"starnix:vfs")),
45            xattrs: MemoryXattrStorage::default(),
46        })
47    }
48
49    /// Create a new file node based on an existing VMO.
50    /// Attempts to open the file for writing will fail unless [`memory`] has both
51    /// the `WRITE` and `RESIZE` rights.
52    pub fn from_memory(memory: Arc<MemoryObject>) -> Self {
53        Self { memory, xattrs: MemoryXattrStorage::default() }
54    }
55}
56
57impl FsNodeOps for MemoryRegularNode {
58    fs_node_impl_not_dir!();
59    fs_node_impl_xattr_delegate!(self, self.xattrs);
60
61    fn initial_info(&self, info: &mut FsNodeInfo) {
62        info.size = self.memory.get_content_size() as usize;
63    }
64
65    fn create_file_ops(
66        &self,
67        _locked: &mut Locked<FileOpsCore>,
68        node: &FsNode,
69        _current_task: &CurrentTask,
70        flags: OpenFlags,
71    ) -> Result<Box<dyn FileOps>, Errno> {
72        if flags.contains(OpenFlags::TRUNC) {
73            // Truncating to zero length must pass the shrink seal check.
74            node.write_guard_state.lock().check_no_seal(SealFlags::SHRINK)?;
75        }
76
77        // Produce a VMO handle with rights reduced to those requested in |flags|.
78        let mut desired_rights = zx::Rights::VMO_DEFAULT | zx::Rights::RESIZE;
79        if !flags.can_read() {
80            desired_rights.remove(zx::Rights::READ);
81        }
82        if !flags.can_write() {
83            desired_rights.remove(zx::Rights::WRITE | zx::Rights::RESIZE);
84        }
85        let scoped_memory =
86            Arc::new(self.memory.duplicate_handle(desired_rights).map_err(|_e| errno!(EIO))?);
87        let file_object = MemoryRegularFile::new(scoped_memory);
88
89        Ok(Box::new(file_object))
90    }
91
92    fn truncate(
93        &self,
94        _locked: &mut Locked<FileOpsCore>,
95        _guard: &AppendLockGuard<'_>,
96        node: &FsNode,
97        _current_task: &CurrentTask,
98        length: u64,
99    ) -> Result<(), Errno> {
100        let length = length as usize;
101
102        node.update_info(|info| {
103            if info.size == length {
104                // The file size remains unaffected.
105                return Ok(());
106            }
107
108            // We must hold the lock till the end of the operation to guarantee that
109            // there is no change to the seals.
110            let state = node.write_guard_state.lock();
111
112            if info.size > length {
113                // A decrease in file size must pass the shrink seal check.
114                state.check_no_seal(SealFlags::SHRINK)?;
115            } else {
116                // An increase in file size must pass the grow seal check.
117                state.check_no_seal(SealFlags::GROW)?;
118            }
119
120            let memory_size = update_memory_file_size(&self.memory, info, length)?;
121            info.size = length;
122
123            // Zero unused parts of the VMO.
124            if memory_size > length {
125                self.memory
126                    .op_range(zx::VmoOp::ZERO, length as u64, (memory_size - length) as u64)
127                    .map_err(impossible_error)?;
128            }
129
130            Ok(())
131        })
132    }
133
134    fn allocate(
135        &self,
136        _locked: &mut Locked<FileOpsCore>,
137        _guard: &AppendLockGuard<'_>,
138        node: &FsNode,
139        _current_task: &CurrentTask,
140        mode: FallocMode,
141        offset: u64,
142        length: u64,
143    ) -> Result<(), Errno> {
144        match mode {
145            FallocMode::PunchHole => {
146                // Lock `info()` before acquiring the `write_guard_state` lock to ensure consistent
147                // lock ordering.
148                let info = node.info();
149
150                // Check write seal. Hold the lock to ensure seals don't change.
151                let state = node.write_guard_state.lock();
152                state.check_no_seal(SealFlags::WRITE | SealFlags::FUTURE_WRITE)?;
153
154                let mut end = offset.checked_add(length).ok_or_else(|| errno!(EINVAL))? as usize;
155
156                let memory_size = info.blksize * info.blocks;
157                if offset as usize >= memory_size {
158                    return Ok(());
159                }
160
161                // If punching hole at the end of the file then zero all the
162                // way to the end of the VMO to avoid keeping any pages for the tail.
163                if end >= info.size {
164                    end = memory_size;
165                }
166
167                self.memory
168                    .op_range(zx::VmoOp::ZERO, offset, end as u64 - offset)
169                    .map_err(impossible_error)?;
170
171                Ok(())
172            }
173
174            FallocMode::Allocate { keep_size } => {
175                node.update_info(|info| {
176                    let new_size = (offset + length) as usize;
177                    if new_size > info.size {
178                        // Check GROW seal (even with `keep_size=true`). Hold the lock to ensure
179                        // seals don't change.
180                        let state = node.write_guard_state.lock();
181                        state.check_no_seal(SealFlags::GROW)?;
182
183                        update_memory_file_size(&self.memory, info, new_size)?;
184
185                        if !keep_size {
186                            info.size = new_size;
187                        }
188                    }
189                    Ok(())
190                })
191            }
192
193            _ => error!(EOPNOTSUPP),
194        }
195    }
196}
197
198pub struct MemoryRegularFile {
199    pub memory: Arc<MemoryObject>,
200}
201
202impl MemoryRegularFile {
203    /// Create a file object based on a VMO.
204    pub fn new(memory: Arc<MemoryObject>) -> Self {
205        MemoryRegularFile { memory }
206    }
207}
208
209impl MemoryRegularFile {
210    pub fn read(
211        memory: &Arc<MemoryObject>,
212        file: &FileObject,
213        offset: usize,
214        data: &mut dyn OutputBuffer,
215    ) -> Result<usize, Errno> {
216        let actual = {
217            let info = file.node().info();
218            let file_length = info.size;
219            let want_read = data.available();
220            if offset < file_length {
221                let to_read =
222                    if file_length < offset + want_read { file_length - offset } else { want_read };
223                let buf =
224                    memory.read_to_vec(offset as u64, to_read as u64).map_err(|_| errno!(EIO))?;
225                drop(info);
226                data.write_all(&buf[..])?;
227                to_read
228            } else {
229                0
230            }
231        };
232        Ok(actual)
233    }
234
235    pub fn write(
236        locked: &mut Locked<FileOpsCore>,
237        memory: &Arc<MemoryObject>,
238        file: &FileObject,
239        current_task: &CurrentTask,
240        offset: usize,
241        data: &mut dyn InputBuffer,
242    ) -> Result<usize, Errno> {
243        let mut want_write = data.available();
244        let buf = data.peek_all()?;
245
246        file.node().update_info(|info| {
247            let mut write_end = offset + want_write;
248            let mut update_content_size = false;
249
250            // We must hold the lock till the end of the operation to guarantee that
251            // there is no change to the seals.
252            let state = file.name.entry.node.write_guard_state.lock();
253
254            // Non-zero writes must pass the write seal check.
255            if want_write != 0 {
256                state.check_no_seal(SealFlags::WRITE | SealFlags::FUTURE_WRITE)?;
257            }
258
259            // Writing past the file size
260            if write_end > info.size {
261                // The grow seal check failed.
262                if let Err(e) = state.check_no_seal(SealFlags::GROW) {
263                    if offset >= info.size {
264                        // Write starts outside the file.
265                        // Forbid because nothing can be written without growing.
266                        return Err(e);
267                    } else if info.size == info.storage_size() {
268                        // Write starts inside file and EOF page does not need to grow.
269                        // End write at EOF.
270                        write_end = info.size;
271                        want_write = write_end - offset;
272                    } else {
273                        // Write starts inside file and EOF page needs to grow.
274                        let eof_page_start = info.storage_size() - (*PAGE_SIZE as usize);
275
276                        if offset >= eof_page_start {
277                            // Write starts in EOF page.
278                            // Forbid because EOF page cannot grow.
279                            return Err(e);
280                        }
281
282                        // End write at page before EOF.
283                        write_end = eof_page_start;
284                        want_write = write_end - offset;
285                    }
286                }
287            }
288
289            // Check against the FSIZE limt
290            let fsize_limit =
291                current_task.thread_group().get_rlimit(locked, Resource::FSIZE) as usize;
292            if write_end > fsize_limit {
293                if offset >= fsize_limit {
294                    // Write starts beyond the FSIZE limt.
295                    send_standard_signal(locked, current_task, SignalInfo::default(SIGXFSZ));
296                    return error!(EFBIG);
297                }
298
299                // End write at FSIZE limit.
300                write_end = fsize_limit;
301                want_write = write_end - offset;
302            }
303
304            if write_end > info.size {
305                if write_end > info.storage_size() {
306                    update_memory_file_size(memory, info, write_end)?;
307                }
308                update_content_size = true;
309            }
310            memory.write(&buf[..want_write], offset as u64).map_err(|_| errno!(EIO))?;
311
312            if update_content_size {
313                info.size = write_end;
314            }
315            data.advance(want_write)?;
316            Ok(want_write)
317        })
318    }
319
320    pub fn get_memory(
321        memory: &Arc<MemoryObject>,
322        file: &FileObject,
323        _current_task: &CurrentTask,
324        prot: ProtectionFlags,
325    ) -> Result<Arc<MemoryObject>, Errno> {
326        // In MemoryFileNode::create_file_ops, we downscoped the rights
327        // on the VMO to match the rights on the file object. If the caller
328        // wants more rights than exist on the file object, return an error
329        // instead of returning a MemoryObject that does not conform to
330        // the FileOps::get_memory contract.
331        if prot.contains(ProtectionFlags::READ) && !file.can_read() {
332            return error!(EACCES);
333        }
334        if prot.contains(ProtectionFlags::WRITE) && !file.can_write() {
335            return error!(EACCES);
336        }
337        let mut memory = Arc::clone(memory);
338        if prot.contains(ProtectionFlags::EXEC) {
339            memory = Arc::new(
340                memory
341                    .duplicate_handle(zx::Rights::SAME_RIGHTS)
342                    .map_err(impossible_error)?
343                    .replace_as_executable(&VMEX_RESOURCE)
344                    .map_err(impossible_error)?,
345            );
346        }
347        Ok(memory)
348    }
349}
350
351#[macro_export]
352macro_rules! fileops_impl_memory {
353    ($self:ident, $memory:expr) => {
354        $crate::fileops_impl_seekable!();
355
356        fn read(
357            &$self,
358            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
359            file: &$crate::vfs::FileObject,
360            _current_task: &$crate::task::CurrentTask,
361            offset: usize,
362            data: &mut dyn $crate::vfs::buffers::OutputBuffer,
363        ) -> Result<usize, starnix_uapi::errors::Errno> {
364            $crate::vfs::MemoryRegularFile::read($memory, file, offset, data)
365        }
366
367        fn write(
368            &$self,
369            locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
370            file: &$crate::vfs::FileObject,
371            current_task: &$crate::task::CurrentTask,
372            offset: usize,
373            data: &mut dyn $crate::vfs::buffers::InputBuffer,
374        ) -> Result<usize, starnix_uapi::errors::Errno> {
375            $crate::vfs::MemoryRegularFile::write(locked, $memory, file, current_task, offset, data)
376        }
377
378        fn get_memory(
379            &$self,
380            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
381            file: &$crate::vfs::FileObject,
382            current_task: &$crate::task::CurrentTask,
383            _length: Option<usize>,
384            prot: $crate::mm::ProtectionFlags,
385        ) -> Result<Arc<$crate::mm::memory::MemoryObject>, starnix_uapi::errors::Errno> {
386            $crate::vfs::MemoryRegularFile::get_memory($memory, file, current_task, prot)
387        }
388    }
389}
390pub use fileops_impl_memory;
391
392impl FileOps for MemoryRegularFile {
393    fileops_impl_memory!(self, &self.memory);
394    fileops_impl_noop_sync!();
395
396    fn readahead(
397        &self,
398        _file: &FileObject,
399        _current_task: &CurrentTask,
400        _offset: usize,
401        _length: usize,
402    ) -> Result<(), Errno> {
403        track_stub!(TODO("https://fxbug.dev/42082608"), "paged VMO readahead");
404        Ok(())
405    }
406
407    fn ioctl(
408        &self,
409        locked: &mut Locked<Unlocked>,
410        file: &FileObject,
411        current_task: &CurrentTask,
412        request: u32,
413        arg: SyscallArg,
414    ) -> Result<SyscallResult, Errno> {
415        match request {
416            ASHMEM_GET_SIZE => {
417                track_stub!(TODO("https://fxbug.dev/389102161"), "ashmem get_size on memfd");
418                Ok(self.memory.get_size().into())
419            }
420            ASHMEM_SET_SIZE => {
421                track_stub!(TODO("https://fxbug.dev/389102161"), "ashmem set_size on memfd");
422                self.memory.set_size(arg.into()).map_err(|_| errno!(EINVAL))?;
423                Ok(SUCCESS)
424            }
425            _ => default_ioctl(file, locked, current_task, request, arg),
426        }
427    }
428}
429
430pub fn new_memfd(
431    locked: &mut Locked<Unlocked>,
432    current_task: &CurrentTask,
433    mut name: FsString,
434    seals: SealFlags,
435    flags: OpenFlags,
436) -> Result<FileHandle, Errno> {
437    struct MemFdTmpfs {
438        tmpfs: FileSystemHandle,
439        mount: Arc<Mount>,
440    }
441
442    let fs = current_task.kernel().expando.get_or_init(|| {
443        let tmpfs = TmpFs::new_fs(locked, current_task.kernel());
444        security::file_system_resolve_security(locked, &current_task, &tmpfs)
445            .expect("resolve fs security");
446        let mount = Mount::new(WhatToMount::Fs(tmpfs.clone()), Default::default());
447        MemFdTmpfs { tmpfs, mount }
448    });
449
450    // Create the node as a kernel-internal operation, to skip the filesystem access-checks.
451    // TODO: https://fxbug.dev/455785957 - Validate whether any access-checks should be performed
452    // during "memfd" creation.
453    let fs_node = current_task.override_creds(security::creds_start_internal_operation, || {
454        let node = fs.tmpfs.root().node.create_tmpfile(
455            locked,
456            current_task,
457            &MountInfo::detached(),
458            mode!(IFREG, 0o600),
459            current_task.current_fscred(),
460            FsNodeLinkBehavior::Disallowed,
461        )?;
462        security::fs_node_init_memfd(current_task, &node);
463        Ok(node)
464    })?;
465    fs_node.write_guard_state.lock().enable_sealing(seals);
466
467    // memfd instances appear in /proc[pid]/fd as though they are O_TMPFILE files with names of
468    // the form "memfd:[name]".
469    let mut local_name = FsString::from("memfd:");
470    local_name.append(&mut name);
471    let dir_entry = DirEntry::new_deleted(fs_node, Some(fs.tmpfs.root().clone()), local_name);
472    security::fs_node_init_with_dentry(locked, current_task, &dir_entry)?;
473
474    let name = NamespaceNode::new(fs.mount.clone(), dir_entry);
475    name.open(locked, current_task, flags, AccessCheck::skip())
476}
477
478/// Sets memory size to `min_size` rounded to whole pages. Returns the new size of the VMO in bytes.
479fn update_memory_file_size(
480    memory: &MemoryObject,
481    node_info: &mut FsNodeInfo,
482    requested_size: usize,
483) -> Result<usize, Errno> {
484    assert!(requested_size <= MAX_LFS_FILESIZE);
485    let size = round_up_to_system_page_size(requested_size)?;
486    memory.set_size(size as u64).map_err(|status| match status {
487        zx::Status::NO_MEMORY => errno!(ENOMEM),
488        zx::Status::OUT_OF_RANGE => errno!(ENOMEM),
489        _ => impossible_error(status),
490    })?;
491    node_info.blocks = size / node_info.blksize;
492    Ok(size)
493}