Skip to main content

starnix_core/vfs/
fd_table.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::security;
6use crate::task::{CurrentTask, CurrentTaskAndLocked, register_delayed_release};
7use crate::vfs::{FdNumber, FileHandle, FileReleaser};
8use bitflags::bitflags;
9use fuchsia_rcu::subtle::{RcuPtrRef, rcu_ptr_to_arc};
10use fuchsia_rcu::{RcuArc, RcuReadScope, rcu_drop};
11use fuchsia_rcu_collections::rcu_array::RcuArray;
12use linux_uapi::{FD_CLOEXEC, FIOCLEX, FIONCLEX};
13use starnix_sync::{
14    FileOpsCore, LockBefore, LockEqualOrBefore, Locked, Mutex, MutexGuard, ThreadGroupLimits,
15    Unlocked,
16};
17use starnix_syscalls::SyscallResult;
18use starnix_types::ownership::Releasable;
19use starnix_uapi::errors::Errno;
20use starnix_uapi::open_flags::OpenFlags;
21use starnix_uapi::resource_limits::Resource;
22use starnix_uapi::{errno, error};
23use static_assertions::const_assert;
24use std::sync::Arc;
25use std::sync::atomic::{AtomicI32, AtomicUsize, Ordering};
26
27bitflags! {
28    #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
29    pub struct FdFlags: u32 {
30        /// Whether the file descriptor should be closed when the process execs.
31        const CLOEXEC = FD_CLOEXEC;
32    }
33}
34
35impl std::convert::From<FdFlags> for SyscallResult {
36    fn from(value: FdFlags) -> Self {
37        value.bits().into()
38    }
39}
40
41/// An identifier for an `FdTable`.
42///
43/// Used by flock to drop file locks when a file descriptor is closed.
44#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
45pub struct FdTableId(usize);
46
47impl FdTableId {
48    fn new(id: *const FdTableInner) -> Self {
49        Self(id as usize)
50    }
51
52    pub fn raw(&self) -> usize {
53        self.0
54    }
55}
56
57/// We store the CLOEXEC bit and the address of the `FileObject` in a single `usize` so that we can
58/// operate on an FdTable entry atomically. This mask is used to select the CLOEXEC bit.
59const FLAGS_MASK: usize = 0x1;
60
61/// An encoded entry in an `FdTable`.
62///
63/// Encodes both the `FileHandle` and the CLOEXEC bit. Can either hold an entry or be empty.
64#[derive(Debug, Default)]
65struct EncodedEntry {
66    /// Rather than using a separate "flags" field, we encode the table entry into a single usize.
67    ///
68    /// If `value` is zero, the entry is empty.
69    ///
70    /// The lowest bit of `value` is the CLOEXEC bit.
71    ///
72    /// The remaining bits of `value` are a `FileHandle` converted to a raw pointer.
73    value: AtomicUsize,
74}
75
76// An assert to ensure that the lowest bit of the `FileHandle` is available to store the CLOEXEC
77// bit.
78const_assert!(std::mem::align_of::<*const FileReleaser>() >= 1 << FLAGS_MASK);
79
80impl EncodedEntry {
81    /// Encodes a `FileHandle` and `FdFlags` into a single `usize`.
82    ///
83    /// The returned value holds a reference to the `FileObject` and must be released to avoid a
84    /// memory leak.
85    fn encode(file: FileHandle, flags: FdFlags) -> usize {
86        let ptr = Arc::into_raw(file) as usize;
87        let flags = (flags.bits() as usize) & FLAGS_MASK;
88        ptr | flags
89    }
90
91    /// Releases the `FileHandle` for a previously encoded value.
92    ///
93    /// # Safety
94    ///
95    /// `value` must have been encoded by `Self::encode`.
96    unsafe fn release(id: FdTableId, value: usize) {
97        let ptr = Self::decode_ptr(value);
98        if !ptr.is_null() {
99            // SAFETY: The pointer is valid because it was encoded in `self.value`.
100            let file = unsafe { Arc::from_raw(ptr) };
101            // Concurrent readers expect the `FileHandle` to be retained for the entire RCU grace
102            // period. `FlushedFile` delayed release may be processed before the grace period
103            // expires. We must defer a reference to RCU to ensure delayed release does not drop the
104            // last reference and free the file before RCU readers are done with it.
105            register_delayed_release(FlushedFile(file.clone(), id));
106            rcu_drop(file)
107        }
108    }
109
110    /// Decodes the `FdFlags` from an encoded `usize`.
111    fn decode_flags(value: usize) -> FdFlags {
112        FdFlags::from_bits_truncate((value & FLAGS_MASK) as u32)
113    }
114
115    /// Decodes the `FileHandle` from an encoded `usize`.
116    fn decode_ptr(value: usize) -> *const FileReleaser {
117        (value & !FLAGS_MASK) as *const _
118    }
119
120    /// Creates a new `EncodedEntry` from a `FdTableEntry`.
121    fn new(entry: FdTableEntry) -> Self {
122        Self { value: AtomicUsize::new(Self::encode(entry.file, entry.flags)) }
123    }
124
125    /// Whether this entry contains a valid `FileHandle`.
126    fn is_some(&self) -> bool {
127        let value = self.value.load(Ordering::Acquire);
128        value != 0
129    }
130
131    /// Whether this entry is empty.
132    fn is_none(&self) -> bool {
133        !self.is_some()
134    }
135
136    /// Sets the `FdFlags` for this entry, preserving the `FileHandle`.
137    fn set_flags(&self, flags: FdFlags) {
138        loop {
139            let old_value = self.value.load(Ordering::Relaxed);
140            assert!(old_value != 0);
141            let new_value = old_value & !FLAGS_MASK | (flags.bits() as usize) & FLAGS_MASK;
142            if self
143                .value
144                .compare_exchange_weak(old_value, new_value, Ordering::AcqRel, Ordering::Relaxed)
145                .is_ok()
146            {
147                return;
148            }
149        }
150    }
151
152    /// Sets the `FileHandle` for this entry, preserving the `FdFlags`.
153    fn set_file(&self, id: FdTableId, file: FileHandle) {
154        let ptr = Arc::into_raw(file) as usize;
155        loop {
156            let old_value = self.value.load(Ordering::Relaxed);
157            assert!(old_value != 0);
158            let flags = old_value & FLAGS_MASK;
159            let new_value = ptr | flags;
160            if self
161                .value
162                .compare_exchange_weak(old_value, new_value, Ordering::AcqRel, Ordering::Relaxed)
163                .is_ok()
164            {
165                // SAFETY: The value was previously encoded by `Self::encode`.
166                unsafe { Self::release(id, old_value) };
167                return;
168            }
169        }
170    }
171
172    /// Reads the entry, returning a guard that maintains a consistent view of it.
173    fn read<'a>(&self, scope: &'a RcuReadScope) -> Option<FdTableEntryGuard<'a>> {
174        let value = self.value.load(Ordering::Acquire);
175        if value == 0 {
176            return None;
177        }
178        let ptr = Self::decode_ptr(value);
179        let flags = Self::decode_flags(value);
180        // SAFETY: The pointer is valid because it was encoded in `self.value`.
181        let file = unsafe { RcuPtrRef::new(scope, ptr) };
182        Some(FdTableEntryGuard { file, flags })
183    }
184
185    /// Sets the `FileHandle` and `FdFlags` for this entry.
186    fn set_entry(&self, id: FdTableId, entry: FdTableEntry) -> bool {
187        // SAFETY: The value is encoded by `Self::encode`.
188        unsafe { self.set(id, Self::encode(entry.file, entry.flags)) }
189    }
190
191    /// Makes the entry empty.
192    fn clear(&self, id: FdTableId) -> bool {
193        // SAFETY: The value is zero.
194        unsafe { self.set(id, 0) }
195    }
196
197    /// Sets the value of this entry to the given value.
198    ///
199    /// Most clients should call `set_entry` or `clear` instead.
200    ///
201    /// # Safety
202    ///
203    /// The value must be encoded by `Self::encode` or be zero.
204    unsafe fn set(&self, id: FdTableId, value: usize) -> bool {
205        let old_value = self.value.swap(value, Ordering::AcqRel);
206        if old_value != 0 {
207            // SAFETY: The value was previously encoded by `Self::encode`.
208            unsafe { Self::release(id, old_value) };
209            true
210        } else {
211            false
212        }
213    }
214}
215
216impl Clone for EncodedEntry {
217    fn clone(&self) -> Self {
218        if let Some(guard) = self.read(&RcuReadScope::new()) {
219            Self::new(guard.to_entry())
220        } else {
221            Self::default()
222        }
223    }
224}
225
226impl Drop for EncodedEntry {
227    fn drop(&mut self) {
228        let value = self.value.load(Ordering::Acquire);
229        let ptr = Self::decode_ptr(value);
230        if !ptr.is_null() {
231            // SAFETY: The pointer is valid because it was encoded in `self.value`.
232            let _file = unsafe { Arc::from_raw(ptr) };
233        }
234    }
235}
236
237/// An entry in the `FdTable`.
238#[derive(Debug, Clone)]
239struct FdTableEntry {
240    /// The file handle.
241    file: FileHandle,
242
243    /// The flags associated with the file handle.
244    flags: FdFlags,
245}
246
247/// A guard for reading an `FdTableEntry`.
248///
249/// This provides memory-safe access to decoded `FdTableEntry` data, which is guarded by RCU.
250struct FdTableEntryGuard<'a> {
251    /// The pointer to the file handle.
252    file: RcuPtrRef<'a, FileReleaser>,
253
254    /// The flags associated with the file handle.
255    flags: FdFlags,
256}
257
258impl<'a> FdTableEntryGuard<'a> {
259    fn flags(&self) -> FdFlags {
260        self.flags
261    }
262
263    /// Acquire a strong reference to the file handle.
264    fn to_handle(&self) -> FileHandle {
265        // SAFETY: We can pass `self.file` to `rcu_ptr_to_arc` because it was obtained from
266        // `Arc::into_raw` via `EncodedEntry::encode` and `EncodedEntry::decode_ptr`.
267        unsafe { rcu_ptr_to_arc(self.file) }
268    }
269
270    /// Upgrade this guard to a full `FdTableEntry` independent of the guard lifetime.
271    fn to_entry(&self) -> FdTableEntry {
272        FdTableEntry { file: self.to_handle(), flags: self.flags }
273    }
274}
275
276/// A `FileHandle` that has been closed and is waiting to be flushed.
277struct FlushedFile(FileHandle, FdTableId);
278
279impl Releasable for FlushedFile {
280    type Context<'a> = CurrentTaskAndLocked<'a>;
281    fn release<'a>(self, context: Self::Context<'a>) {
282        let (locked, current_task) = context;
283        let FlushedFile(file, id) = self;
284        file.flush(locked, current_task, id);
285    }
286}
287
288/// A read-only view of an `FdTable`.
289///
290/// When reading an `FdTable`, we use an `FdTableView` to have a coherent view of the table even
291/// though the table can be modified by other threads concurrently.
292///
293/// The actual entries in the slice can still be modified by other threads. However, the view
294/// provided by the `FdTableView` is protected by an RCU read lock.
295struct FdTableView<'a> {
296    /// The entries in the table.
297    slice: &'a [EncodedEntry],
298}
299
300impl<'a> FdTableView<'a> {
301    /// Returns the number of entries in the table.
302    fn len(&self) -> usize {
303        self.slice.len()
304    }
305
306    /// Whether the view contains a given `FdNumber`.
307    fn is_some(&self, fd: FdNumber) -> bool {
308        self.slice.get(fd.raw() as usize).map_or(false, |entry| entry.is_some())
309    }
310
311    /// Whether the view does not contain a given `FdNumber`.
312    fn is_none(&self, fd: FdNumber) -> bool {
313        !self.is_some(fd)
314    }
315
316    /// Returns the `FileHandle` for a given `FdNumber`, if any.
317    fn get_file(&self, scope: &RcuReadScope, fd: FdNumber) -> Option<FileHandle> {
318        self.slice
319            .get(fd.raw() as usize)
320            .and_then(|entry| entry.read(scope))
321            .map(|guard| guard.to_handle())
322    }
323
324    /// Returns the `FdTableEntry` for a given `FdNumber`, if any.
325    fn get_entry(&self, scope: &RcuReadScope, fd: FdNumber) -> Option<FdTableEntry> {
326        self.slice
327            .get(fd.raw() as usize)
328            .and_then(|entry| entry.read(scope))
329            .map(|guard| guard.to_entry())
330    }
331}
332
333struct FdTableWriteGuard<'a> {
334    store: &'a FdTableInner,
335    _write_guard: MutexGuard<'a, ()>,
336}
337
338impl<'a> FdTableWriteGuard<'a> {
339    /// The lowest available `FdNumber`.
340    fn next_fd(&self) -> FdNumber {
341        self.store.next_fd.get()
342    }
343
344    /// Recalculates the lowest available FD >= minfd based on the contents of the map.
345    fn calculate_lowest_available_fd(&self, view: &FdTableView<'_>, minfd: &FdNumber) -> FdNumber {
346        let mut fd: FdNumber = *minfd;
347        while view.is_some(fd) {
348            fd = FdNumber::from_raw(fd.raw() + 1);
349        }
350        fd
351    }
352
353    // Returns the (possibly memoized) lowest available FD >= minfd in this map.
354    fn get_lowest_available_fd(&self, scope: &RcuReadScope, minfd: FdNumber) -> FdNumber {
355        if minfd > self.store.next_fd.get() {
356            let view = self.store.read(scope);
357            return self.calculate_lowest_available_fd(&view, &minfd);
358        }
359        self.store.next_fd.get()
360    }
361
362    /// Returns the `FileHandle` for a given `FdNumber`, if any.
363    fn get_file(&self, scope: &RcuReadScope, fd: FdNumber) -> Option<FileHandle> {
364        self.store.read(scope).get_file(scope, fd)
365    }
366
367    /// Inserts a new entry into the `FdTable`.
368    ///
369    /// Returns whether the `FdTable` previously contained an entry for the given `FdNumber`.
370    fn insert_entry(
371        &self,
372        scope: &RcuReadScope,
373        fd: FdNumber,
374        rlimit: u64,
375        entry: FdTableEntry,
376    ) -> Result<bool, Errno> {
377        let raw_fd = fd.raw();
378        if raw_fd < 0 {
379            return error!(EBADF);
380        }
381        if raw_fd as u64 >= rlimit {
382            return error!(EMFILE);
383        }
384        let mut view = self.store.read(scope);
385        if raw_fd == self.store.next_fd.get().raw() {
386            self.store
387                .next_fd
388                .set(self.calculate_lowest_available_fd(&view, &FdNumber::from_raw(raw_fd + 1)));
389        }
390        let raw_fd = raw_fd as usize;
391        if view.len() <= raw_fd {
392            // SAFETY: The write guard excludes concurrent writers.
393            unsafe { self.store.entries.ensure_at_least(raw_fd + 1) };
394            view = self.store.read(scope);
395        }
396        let id = self.store.id();
397        Ok(view.slice[raw_fd].set_entry(id, entry))
398    }
399
400    /// Removes an entry from the `FdTable`.
401    ///
402    /// Returns whether the `FdTable` previously contained an entry for the given `FdNumber`.
403    fn remove_entry(&self, scope: &RcuReadScope, fd: &FdNumber) -> bool {
404        let raw_fd = fd.raw() as usize;
405        let view = self.store.read(scope);
406        if raw_fd >= view.len() {
407            return false;
408        }
409        let id = self.store.id();
410        let removed = view.slice[raw_fd].clear(id);
411        if removed && raw_fd < self.store.next_fd.get().raw() as usize {
412            self.store.next_fd.set(*fd);
413        }
414        removed
415    }
416
417    /// Sets the flags for a given `FdNumber`.
418    ///
419    /// Returns `Errno` if the `FdTable` does not contain an entry for the given `FdNumber`.
420    fn set_fd_flags(
421        &self,
422        scope: &RcuReadScope,
423        fd: FdNumber,
424        flags: FdFlags,
425    ) -> Result<(), Errno> {
426        let view = self.store.read(scope);
427        if view.is_none(fd) {
428            return error!(EBADF);
429        }
430        let raw_fd = fd.raw() as usize;
431        view.slice[raw_fd].set_flags(flags);
432        Ok(())
433    }
434
435    /// Retains only the entries for which the given predicate returns `true`.
436    ///
437    /// The predicate is called with the `FdNumber` and a mutable reference to the `FdFlags` for
438    /// each entry in the `FdTable`. If the predicate returns `false`, the entry is removed from
439    /// the `FdTable`. Otherwise, the `FdFlags` are updated to the value modified by the predicate.
440    fn retain<F>(&self, scope: &RcuReadScope, mut predicate: F)
441    where
442        F: FnMut(FdNumber, &mut FdFlags) -> bool,
443    {
444        let id = self.store.id();
445        let view = self.store.read(scope);
446        for (index, encoded_entry) in view.slice.iter().enumerate() {
447            let fd = FdNumber::from_raw(index as i32);
448            if let Some(guard) = encoded_entry.read(scope) {
449                let mut modified_flags = guard.flags();
450                if !predicate(fd, &mut modified_flags) {
451                    encoded_entry.clear(id);
452                } else if modified_flags != guard.flags() {
453                    encoded_entry.set_flags(modified_flags);
454                }
455            }
456        }
457        self.store.next_fd.set(self.calculate_lowest_available_fd(&view, &FdNumber::from_raw(0)));
458    }
459
460    /// Replaces the `FileHandle` for each entry in the `FdTable` with the result of the given
461    /// predicate.
462    ///
463    /// The predicate is called with the `FileHandle` for each entry in the `FdTable`. If the
464    /// predicate returns `Some(file)`, the entry is updated with the new `FileHandle`. Otherwise,
465    /// the entry is left unchanged.
466    fn remap<F>(&self, scope: &RcuReadScope, predicate: F)
467    where
468        F: Fn(&FileHandle) -> Option<FileHandle>,
469    {
470        let id = self.store.id();
471        let view = self.store.read(scope);
472        for encoded_entry in view.slice.iter() {
473            if let Some(guard) = encoded_entry.read(scope) {
474                let file = guard.to_handle();
475                if let Some(replacement_file) = predicate(&file) {
476                    encoded_entry.set_file(id, replacement_file);
477                }
478            }
479        }
480    }
481}
482
483/// An `FdNumber` that can be atomically updated.
484///
485/// Used for the `next_fd` field of `FdTableInner`, which is only modified by the `FdTable` when
486/// holding the `writer_queue` lock.
487#[derive(Debug, Default)]
488struct AtomicFdNumber {
489    /// The raw value of the `FdNumber`.
490    value: AtomicI32,
491}
492
493impl AtomicFdNumber {
494    /// Returns the current value of the `FdNumber`.
495    ///
496    /// Uses `Ordering::Relaxed`.
497    fn get(&self) -> FdNumber {
498        FdNumber::from_raw(self.value.load(Ordering::Relaxed))
499    }
500
501    /// Sets the value of the `FdNumber`.
502    ///
503    /// Uses `Ordering::Relaxed`.
504    fn set(&self, value: FdNumber) {
505        self.value.store(value.raw(), Ordering::Relaxed);
506    }
507}
508
509impl Clone for AtomicFdNumber {
510    fn clone(&self) -> Self {
511        Self { value: AtomicI32::new(self.value.load(Ordering::Relaxed)) }
512    }
513}
514
515/// The state of an `FdTable` that is shared between tasks.
516///
517/// The `writer_queue` is used to serialize concurrent writers to the `FdTable`, and to prevent
518/// writers from being blocked by readers.
519#[derive(Debug)]
520struct FdTableInner {
521    /// The entries of the `FdTable`.
522    entries: RcuArray<EncodedEntry>,
523
524    /// The next available `FdNumber`.
525    next_fd: AtomicFdNumber,
526
527    /// A mutex used to serialize concurrent writers to the `FdTable`, and to prevent writers from
528    /// being blocked by readers.
529    writer_queue: Mutex<()>,
530}
531
532impl Default for FdTableInner {
533    fn default() -> Self {
534        FdTableInner {
535            entries: Default::default(),
536            next_fd: AtomicFdNumber::default(),
537            writer_queue: Mutex::new(()),
538        }
539    }
540}
541
542impl Clone for FdTableInner {
543    fn clone(&self) -> Self {
544        let _guard = self.writer_queue.lock();
545        Self {
546            entries: self.entries.clone(),
547            next_fd: self.next_fd.clone(),
548            writer_queue: Mutex::new(()),
549        }
550    }
551}
552
553impl Drop for FdTableInner {
554    fn drop(&mut self) {
555        let id = self.id();
556        let scope = RcuReadScope::new();
557        let view = self.read(&scope);
558        for entry in view.slice.iter() {
559            entry.clear(id);
560        }
561    }
562}
563
564impl FdTableInner {
565    /// Returns the `FdTableId` of the `FdTableInner`.
566    fn id(&self) -> FdTableId {
567        FdTableId::new(self as *const Self)
568    }
569
570    /// Returns an `Arc<FdTableInner>` that is a snapshot of the state of the `FdTableInner`.
571    fn unshare(&self) -> Arc<Self> {
572        Arc::new(self.clone())
573    }
574
575    /// Returns a `FdTableView` that provides read-only access to the state of the `FdTableInner`.
576    fn read<'a>(&self, scope: &'a RcuReadScope) -> FdTableView<'a> {
577        let slice = self.entries.as_slice(scope);
578        FdTableView { slice }
579    }
580
581    /// Returns a `FdTableWriteGuard` that provides exclusive access to the state of the
582    /// `FdTableInner`.
583    fn write(&self) -> FdTableWriteGuard<'_> {
584        FdTableWriteGuard { store: self, _write_guard: self.writer_queue.lock() }
585    }
586}
587
588/// An `FdTable` is a table of file descriptors.
589#[derive(Debug, Default)]
590pub struct FdTable {
591    /// The state of the `FdTable` that is shared between tasks.
592    inner: RcuArc<FdTableInner>,
593}
594
595/// The target `FdNumber` for a duplicated file descriptor.
596pub enum TargetFdNumber {
597    /// The duplicated `FdNumber` will be the smallest available `FdNumber`.
598    Default,
599
600    /// The duplicated `FdNumber` should be this specific `FdNumber`.
601    Specific(FdNumber),
602
603    /// The duplicated `FdNumber` should be greater than this `FdNumber`.
604    Minimum(FdNumber),
605}
606
607impl FdTable {
608    /// Returns the `FdTableId` of the `FdTable`.
609    pub fn id(&self) -> FdTableId {
610        self.inner.read().id()
611    }
612
613    /// Returns new unshared `FdTable` that is a snapshot of the state of the `FdTable`.
614    pub fn fork(&self) -> FdTable {
615        let unshared = self.inner.read().unshare();
616        FdTable { inner: RcuArc::new(unshared) }
617    }
618
619    /// Ensures that this `FdTable` is not shared by any other `FdTable` instances.
620    pub fn unshare(&self) {
621        let unshared = self.inner.read().unshare();
622        self.inner.update(unshared);
623    }
624
625    /// Releases the `FdTable`, closing any files opened exclusively by this table.
626    pub fn release(&self) {
627        self.inner.update(Default::default());
628    }
629
630    /// Trims close-on-exec file descriptors from the table.
631    pub fn exec(&self, locked: &mut Locked<Unlocked>, current_task: &CurrentTask) {
632        self.retain(locked, current_task, |_fd, flags| !flags.contains(FdFlags::CLOEXEC));
633    }
634
635    /// Inserts a file descriptor into the table.
636    pub fn insert<L>(
637        &self,
638        locked: &mut Locked<L>,
639        current_task: &CurrentTask,
640        fd: FdNumber,
641        file: FileHandle,
642    ) -> Result<(), Errno>
643    where
644        L: LockBefore<ThreadGroupLimits>,
645    {
646        let flags = FdFlags::empty();
647        let rlimit = current_task.thread_group().get_rlimit(locked, Resource::NOFILE);
648        let inner = self.inner.read();
649        let guard = inner.write();
650        guard.insert_entry(inner.scope(), fd, rlimit, FdTableEntry { file, flags })?;
651        Ok(())
652    }
653
654    /// Adds a file descriptor to the table.
655    ///
656    /// The file descriptor will be assigned the next available number.
657    ///
658    /// Returns the assigned file descriptor number.
659    ///
660    /// This function is the most common way to add a file descriptor to the table.
661    pub fn add<L>(
662        &self,
663        locked: &mut Locked<L>,
664        current_task: &CurrentTask,
665        file: FileHandle,
666        flags: FdFlags,
667    ) -> Result<FdNumber, Errno>
668    where
669        L: LockEqualOrBefore<FileOpsCore>,
670    {
671        let locked = locked.cast_locked::<FileOpsCore>();
672        let rlimit = current_task.thread_group().get_rlimit(locked, Resource::NOFILE);
673        let inner = self.inner.read();
674        let guard = inner.write();
675        let fd = guard.next_fd();
676        guard.insert_entry(inner.scope(), fd, rlimit, FdTableEntry { file, flags })?;
677        Ok(fd)
678    }
679
680    /// Duplicates a file descriptor.
681    ///
682    /// If `target` is `TargetFdNumber::Minimum`, a new `FdNumber` is allocated. Returns the new
683    /// `FdNumber`.
684    pub fn duplicate<L>(
685        &self,
686        locked: &mut Locked<L>,
687        current_task: &CurrentTask,
688        oldfd: FdNumber,
689        target: TargetFdNumber,
690        flags: FdFlags,
691    ) -> Result<FdNumber, Errno>
692    where
693        L: LockBefore<ThreadGroupLimits>,
694    {
695        let rlimit = current_task.thread_group().get_rlimit(locked, Resource::NOFILE);
696        let inner = self.inner.read();
697        let guard = inner.write();
698        let file = guard.get_file(inner.scope(), oldfd).ok_or_else(|| errno!(EBADF))?;
699
700        let fd = match target {
701            TargetFdNumber::Specific(fd) => {
702                // We need to check the rlimit before we remove the entry from state
703                // because we cannot error out after removing the entry.
704                if fd.raw() as u64 >= rlimit {
705                    // ltp_dup201 shows that we're supposed to return EBADF in this
706                    // situation, instead of EMFILE, which is what we normally return
707                    // when we're past the rlimit.
708                    return error!(EBADF);
709                }
710                guard.remove_entry(inner.scope(), &fd);
711                fd
712            }
713            TargetFdNumber::Minimum(fd) => guard.get_lowest_available_fd(inner.scope(), fd),
714            TargetFdNumber::Default => {
715                guard.get_lowest_available_fd(inner.scope(), FdNumber::from_raw(0))
716            }
717        };
718        let existing_entry =
719            guard.insert_entry(inner.scope(), fd, rlimit, FdTableEntry { file, flags })?;
720        assert!(!existing_entry);
721        Ok(fd)
722    }
723
724    /// Returns the file handle associated with the given file descriptor.
725    ///
726    /// Returns the file handle even if the file was opened with `O_PATH`.
727    ///
728    /// This operation is uncommon. Most clients should use `get` instead, which fails if the file
729    /// was opened with `O_PATH`.
730    pub fn get_allowing_opath(&self, fd: FdNumber) -> Result<FileHandle, Errno> {
731        self.get_allowing_opath_with_flags(fd).map(|(file, _flags)| file)
732    }
733
734    /// Returns the file handle and flags associated with the given file descriptor.
735    ///
736    /// Returns the file handle even if the file was opened with `O_PATH`.
737    ///
738    /// This operation is uncommon. Most clients should use `get` instead, which fails if the file
739    /// was opened with `O_PATH`.
740    pub fn get_allowing_opath_with_flags(
741        &self,
742        fd: FdNumber,
743    ) -> Result<(FileHandle, FdFlags), Errno> {
744        let inner = self.inner.read();
745        let view = inner.read(inner.scope());
746        view.get_entry(inner.scope(), fd)
747            .map(|entry| (entry.file, entry.flags))
748            .ok_or_else(|| errno!(EBADF))
749    }
750
751    /// Returns the file handle associated with the given file descriptor.
752    ///
753    /// This operation fails if the file was opened with `O_PATH`.
754    pub fn get(&self, fd: FdNumber) -> Result<FileHandle, Errno> {
755        let file = self.get_allowing_opath(fd)?;
756        if file.flags().contains(OpenFlags::PATH) {
757            return error!(EBADF);
758        }
759        Ok(file)
760    }
761
762    /// Closes the file descriptor associated with the given file descriptor.
763    ///
764    /// This operation fails if the file descriptor is not valid.
765    pub fn close(&self, fd: FdNumber) -> Result<(), Errno> {
766        let inner = self.inner.read();
767        let guard = inner.write();
768        if guard.remove_entry(inner.scope(), &fd) { Ok(()) } else { error!(EBADF) }
769    }
770
771    /// Returns the flags associated with the given file descriptor.
772    ///
773    /// Returns the flags even if the file was opened with `O_PATH`.
774    pub fn get_fd_flags_allowing_opath(&self, fd: FdNumber) -> Result<FdFlags, Errno> {
775        self.get_allowing_opath_with_flags(fd).map(|(_file, flags)| flags)
776    }
777
778    /// Updates the flags of the specified FD with the `request`ed change.
779    ///
780    /// This operation fails if the file descriptor was opened with `O_PATH` or is not valid.
781    pub fn ioctl_fd_flags(
782        &self,
783        current_task: &CurrentTask,
784        fd: FdNumber,
785        request: u32,
786    ) -> Result<(), Errno> {
787        let inner = self.inner.read();
788        let guard = inner.write();
789        let file = guard.get_file(inner.scope(), fd).ok_or_else(|| errno!(EBADF))?;
790        if file.flags().contains(OpenFlags::PATH) {
791            return error!(EBADF);
792        }
793        let flags = match request {
794            FIOCLEX => FdFlags::CLOEXEC,
795            FIONCLEX => FdFlags::empty(),
796            _ => {
797                return error!(EINVAL);
798            }
799        };
800        security::check_file_ioctl_access(current_task, &file, request)?;
801        guard.set_fd_flags(inner.scope(), fd, flags)
802    }
803
804    /// Sets the flags associated with the given file descriptor.
805    ///
806    /// This operation fails if the file descriptor is not valid.
807    pub fn set_fd_flags_allowing_opath(&self, fd: FdNumber, flags: FdFlags) -> Result<(), Errno> {
808        let inner = self.inner.read();
809        let guard = inner.write();
810        guard.set_fd_flags(inner.scope(), fd, flags)
811    }
812
813    /// Retains only the FDs matching the given `predicate`.
814    ///
815    /// The predicate is called with the `FdNumber` and a mutable reference to the `FdFlags` for
816    /// each entry in the `FdTable`. If the predicate returns `false`, the entry is removed from
817    /// the `FdTable`. Otherwise, the `FdFlags` are updated to the value modified by the predicate.
818    pub fn retain<L, F>(&self, _locked: &mut Locked<L>, _current_task: &CurrentTask, predicate: F)
819    where
820        L: LockEqualOrBefore<FileOpsCore>,
821        F: Fn(FdNumber, &mut FdFlags) -> bool,
822    {
823        let inner = self.inner.read();
824        let guard = inner.write();
825        guard.retain(inner.scope(), predicate);
826    }
827
828    /// Returns a vector of all current file descriptors in the table.
829    pub fn get_all_fds(&self) -> Vec<FdNumber> {
830        let inner = self.inner.read();
831        let view = inner.read(inner.scope());
832        view.slice
833            .iter()
834            .enumerate()
835            .filter_map(|(index, encoded_entry)| {
836                if encoded_entry.is_none() { None } else { Some(FdNumber::from_raw(index as i32)) }
837            })
838            .collect()
839    }
840
841    /// Executes `predicate(file) => maybe_replacement` on every non-empty table entry.
842    ///
843    /// Replaces `file` with `replacement_file` in the table when
844    /// `maybe_replacement == Some(replacement_file)`.
845    pub fn remap<L, F: Fn(&FileHandle) -> Option<FileHandle>>(
846        &self,
847        _locked: &mut Locked<L>,
848        _current_task: &CurrentTask,
849        predicate: F,
850    ) where
851        L: LockEqualOrBefore<FileOpsCore>,
852    {
853        let inner = self.inner.read();
854        let guard = inner.write();
855        guard.remap(inner.scope(), predicate);
856    }
857}
858
859impl Clone for FdTable {
860    fn clone(&self) -> Self {
861        FdTable { inner: self.inner.clone() }
862    }
863}
864
865#[cfg(test)]
866mod test {
867    use super::*;
868    use crate::fs::fuchsia::SyslogFile;
869    use crate::testing::*;
870
871    fn add(
872        locked: &mut Locked<Unlocked>,
873        current_task: &CurrentTask,
874        files: &FdTable,
875        file: FileHandle,
876    ) -> Result<FdNumber, Errno> {
877        files.add(locked, current_task, file, FdFlags::empty())
878    }
879
880    #[::fuchsia::test]
881    async fn test_fd_table_install() {
882        spawn_kernel_and_run(async |locked, current_task| {
883            let files = FdTable::default();
884            let file = SyslogFile::new_file(locked, &current_task);
885
886            let fd0 = add(locked, &current_task, &files, file.clone()).unwrap();
887            assert_eq!(fd0.raw(), 0);
888            let fd1 = add(locked, &current_task, &files, file.clone()).unwrap();
889            assert_eq!(fd1.raw(), 1);
890
891            assert!(Arc::ptr_eq(&files.get(fd0).unwrap(), &file));
892            assert!(Arc::ptr_eq(&files.get(fd1).unwrap(), &file));
893            assert_eq!(files.get(FdNumber::from_raw(fd1.raw() + 1)).map(|_| ()), error!(EBADF));
894
895            files.release();
896        })
897        .await;
898    }
899
900    #[::fuchsia::test]
901    async fn test_fd_table_fork() {
902        spawn_kernel_and_run(async |locked, current_task| {
903            let files = FdTable::default();
904            let file = SyslogFile::new_file(locked, &current_task);
905
906            let fd0 = add(locked, &current_task, &files, file.clone()).unwrap();
907            let fd1 = add(locked, &current_task, &files, file).unwrap();
908            let fd2 = FdNumber::from_raw(2);
909
910            let forked = files.fork();
911
912            assert_eq!(
913                Arc::as_ptr(&files.get(fd0).unwrap()),
914                Arc::as_ptr(&forked.get(fd0).unwrap())
915            );
916            assert_eq!(
917                Arc::as_ptr(&files.get(fd1).unwrap()),
918                Arc::as_ptr(&forked.get(fd1).unwrap())
919            );
920            assert!(files.get(fd2).is_err());
921            assert!(forked.get(fd2).is_err());
922
923            files.set_fd_flags_allowing_opath(fd0, FdFlags::CLOEXEC).unwrap();
924            assert_eq!(FdFlags::CLOEXEC, files.get_fd_flags_allowing_opath(fd0).unwrap());
925            assert_ne!(FdFlags::CLOEXEC, forked.get_fd_flags_allowing_opath(fd0).unwrap());
926
927            forked.release();
928            files.release();
929        })
930        .await;
931    }
932
933    #[::fuchsia::test]
934    async fn test_fd_table_exec() {
935        spawn_kernel_and_run(async |locked, current_task| {
936            let files = FdTable::default();
937            let file = SyslogFile::new_file(locked, &current_task);
938
939            let fd0 = add(locked, &current_task, &files, file.clone()).unwrap();
940            let fd1 = add(locked, &current_task, &files, file).unwrap();
941
942            files.set_fd_flags_allowing_opath(fd0, FdFlags::CLOEXEC).unwrap();
943
944            assert!(files.get(fd0).is_ok());
945            assert!(files.get(fd1).is_ok());
946
947            files.exec(locked, &current_task);
948
949            assert!(files.get(fd0).is_err());
950            assert!(files.get(fd1).is_ok());
951
952            files.release();
953        })
954        .await;
955    }
956
957    #[::fuchsia::test]
958    async fn test_fd_table_pack_values() {
959        spawn_kernel_and_run(async |locked, current_task| {
960            let files = FdTable::default();
961            let file = SyslogFile::new_file(locked, &current_task);
962
963            // Add two FDs.
964            let fd0 = add(locked, &current_task, &files, file.clone()).unwrap();
965            let fd1 = add(locked, &current_task, &files, file.clone()).unwrap();
966            assert_eq!(fd0.raw(), 0);
967            assert_eq!(fd1.raw(), 1);
968
969            // Close FD 0
970            assert!(files.close(fd0).is_ok());
971            assert!(files.close(fd0).is_err());
972            // Now it's gone.
973            assert!(files.get(fd0).is_err());
974
975            // The next FD we insert fills in the hole we created.
976            let another_fd = add(locked, &current_task, &files, file).unwrap();
977            assert_eq!(another_fd.raw(), 0);
978
979            files.release();
980        })
981        .await;
982    }
983}