starnix_core/vfs/
fd_table.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::task::{CurrentTask, CurrentTaskAndLocked, register_delayed_release};
6use crate::vfs::{FdNumber, FileHandle, FileReleaser};
7use bitflags::bitflags;
8use fuchsia_rcu::RcuReadScope;
9use fuchsia_rcu::rcu_arc::RcuArc;
10use fuchsia_rcu_collections::rcu_array::RcuArray;
11use starnix_sync::{
12    FileOpsCore, LockBefore, LockEqualOrBefore, Locked, Mutex, MutexGuard, ThreadGroupLimits,
13    Unlocked,
14};
15use starnix_syscalls::SyscallResult;
16use starnix_types::ownership::Releasable;
17use starnix_uapi::errors::Errno;
18use starnix_uapi::open_flags::OpenFlags;
19use starnix_uapi::resource_limits::Resource;
20use starnix_uapi::{FD_CLOEXEC, errno, error};
21use static_assertions::const_assert;
22use std::sync::Arc;
23use std::sync::atomic::{AtomicI32, AtomicUsize, Ordering};
24
25bitflags! {
26    #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
27    pub struct FdFlags: u32 {
28        /// Whether the file descriptor should be closed when the process execs.
29        const CLOEXEC = FD_CLOEXEC;
30    }
31}
32
33impl std::convert::From<FdFlags> for SyscallResult {
34    fn from(value: FdFlags) -> Self {
35        value.bits().into()
36    }
37}
38
39/// An identifier for an `FdTable`.
40///
41/// Used by flock to drop file locks when a file descriptor is closed.
42#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
43pub struct FdTableId(usize);
44
45impl FdTableId {
46    fn new(id: *const FdTableInner) -> Self {
47        Self(id as usize)
48    }
49
50    pub fn raw(&self) -> usize {
51        self.0
52    }
53}
54
55/// We store the CLOEXEC bit and the address of the `FileObject` in a single `usize` so that we can
56/// operate on an FdTable entry atomically. This mask is used to select the CLOEXEC bit.
57const FLAGS_MASK: usize = 0x1;
58
59/// An encoded entry in an `FdTable`.
60///
61/// Encodes both the `FileHandle` and the CLOEXEC bit. Can either hold an entry or be empty.
62#[derive(Debug, Default)]
63struct EncodedEntry {
64    /// Rather than using a separate "flags" field, we encode the table entry into a single usize.
65    ///
66    /// If `value` is zero, the entry is empty.
67    ///
68    /// The lowest bit of `value` is the CLOEXEC bit.
69    ///
70    /// The remaining bits of `value` are a `FileHandle` converted to a raw pointer.
71    value: AtomicUsize,
72}
73
74// An assert to ensure that the lowest bit of the `FileHandle` is available to store the CLOEXEC
75// bit.
76const_assert!(std::mem::align_of::<*const FileReleaser>() >= 1 << FLAGS_MASK);
77
78impl EncodedEntry {
79    /// Encodes a `FileHandle` and `FdFlags` into a single `usize`.
80    ///
81    /// The returned value holds a reference to the `FileObject` and must be released to avoid a
82    /// memory leak.
83    fn encode(file: FileHandle, flags: FdFlags) -> usize {
84        let ptr = Arc::into_raw(file) as usize;
85        let flags = (flags.bits() as usize) & FLAGS_MASK;
86        ptr | flags
87    }
88
89    /// Releases the `FileHandle` for a previously encoded value.
90    ///
91    /// # Safety
92    ///
93    /// `value` must have been encoded by `Self::encode`.
94    unsafe fn release(id: FdTableId, value: usize) {
95        let ptr = Self::decode_ptr(value);
96        if !ptr.is_null() {
97            // SAFETY: The pointer is valid because it was encoded in `self.value`.
98            let file = unsafe { Arc::from_raw(ptr) };
99            register_delayed_release(FlushedFile(file, id));
100        }
101    }
102
103    /// Decodes the `FdFlags` from an encoded `usize`.
104    fn decode_flags(value: usize) -> FdFlags {
105        FdFlags::from_bits_truncate((value & FLAGS_MASK) as u32)
106    }
107
108    /// Decodes the `FileHandle` from an encoded `usize`.
109    fn decode_ptr(value: usize) -> *const FileReleaser {
110        (value & !FLAGS_MASK) as *const _
111    }
112
113    /// Creates a new `EncodedEntry` from a `FdTableEntry`.
114    fn new(entry: FdTableEntry) -> Self {
115        Self { value: AtomicUsize::new(Self::encode(entry.file, entry.flags)) }
116    }
117
118    /// Whether this entry contains a valid `FileHandle`.
119    fn is_some(&self) -> bool {
120        let value = self.value.load(Ordering::Acquire);
121        value != 0
122    }
123
124    /// Whether this entry is empty.
125    fn is_none(&self) -> bool {
126        !self.is_some()
127    }
128
129    /// Returns the `FdFlags` for this entry, if any.
130    fn flags(&self) -> Option<FdFlags> {
131        let value = self.value.load(Ordering::Acquire);
132        if value == 0 {
133            return None;
134        }
135        Some(Self::decode_flags(value))
136    }
137
138    /// Sets the `FdFlags` for this entry, preserving the `FileHandle`.
139    fn set_flags(&self, flags: FdFlags) {
140        loop {
141            let old_value = self.value.load(Ordering::Relaxed);
142            assert!(old_value != 0);
143            let new_value = old_value & !FLAGS_MASK | (flags.bits() as usize) & FLAGS_MASK;
144            if self
145                .value
146                .compare_exchange_weak(old_value, new_value, Ordering::AcqRel, Ordering::Relaxed)
147                .is_ok()
148            {
149                return;
150            }
151        }
152    }
153
154    /// Returns the `FileHandle` for this entry, if any.
155    fn file(&self) -> Option<FileHandle> {
156        self.to_entry().map(|entry| entry.file)
157    }
158
159    /// Sets the `FileHandle` for this entry, preserving the `FdFlags`.
160    fn set_file(&self, id: FdTableId, file: FileHandle) {
161        let ptr = Arc::into_raw(file) as usize;
162        loop {
163            let old_value = self.value.load(Ordering::Relaxed);
164            assert!(old_value != 0);
165            let flags = old_value & FLAGS_MASK;
166            let new_value = ptr | flags;
167            if self
168                .value
169                .compare_exchange_weak(old_value, new_value, Ordering::AcqRel, Ordering::Relaxed)
170                .is_ok()
171            {
172                // SAFETY: The value was previously encoded by `Self::encode`.
173                unsafe { Self::release(id, old_value) };
174                return;
175            }
176        }
177    }
178
179    /// Returns the `FileHandle` and `FdFlags` for this entry, if any.
180    fn to_entry(&self) -> Option<FdTableEntry> {
181        let value = self.value.load(Ordering::Acquire);
182        if value == 0 {
183            return None;
184        }
185        let flags = Self::decode_flags(value);
186        let ptr = Self::decode_ptr(value);
187        // SAFETY: The pointer is valid because it was encoded in `self.value`.
188        let file = unsafe {
189            Arc::increment_strong_count(ptr);
190            Arc::from_raw(ptr)
191        };
192        Some(FdTableEntry { file, flags })
193    }
194
195    /// Sets the `FileHandle` and `FdFlags` for this entry.
196    fn set_entry(&self, id: FdTableId, entry: FdTableEntry) -> bool {
197        // SAFETY: The value is encoded by `Self::encode`.
198        unsafe { self.set(id, Self::encode(entry.file, entry.flags)) }
199    }
200
201    /// Makes the entry empty.
202    fn clear(&self, id: FdTableId) -> bool {
203        // SAFETY: The value is zero.
204        unsafe { self.set(id, 0) }
205    }
206
207    /// Sets the value of this entry to the given value.
208    ///
209    /// Most clients should call `set_entry` or `clear` instead.
210    ///
211    /// # Safety
212    ///
213    /// The value must be encoded by `Self::encode` or be zero.
214    unsafe fn set(&self, id: FdTableId, value: usize) -> bool {
215        let old_value = self.value.swap(value, Ordering::AcqRel);
216        if old_value != 0 {
217            // SAFETY: The value was previously encoded by `Self::encode`.
218            unsafe { Self::release(id, old_value) };
219            true
220        } else {
221            false
222        }
223    }
224}
225
226impl Clone for EncodedEntry {
227    fn clone(&self) -> Self {
228        if let Some(entry) = self.to_entry() { Self::new(entry) } else { Self::default() }
229    }
230}
231
232impl Drop for EncodedEntry {
233    fn drop(&mut self) {
234        let value = self.value.load(Ordering::Acquire);
235        let ptr = Self::decode_ptr(value);
236        if !ptr.is_null() {
237            // SAFETY: The pointer is valid because it was encoded in `self.value`.
238            let _file = unsafe { Arc::from_raw(ptr) };
239        }
240    }
241}
242
243/// An entry in the `FdTable`.
244#[derive(Debug, Clone)]
245struct FdTableEntry {
246    /// The file handle.
247    file: FileHandle,
248
249    /// The flags associated with the file handle.
250    flags: FdFlags,
251}
252
253/// A `FileHandle` that has been closed and is waiting to be flushed.
254struct FlushedFile(FileHandle, FdTableId);
255
256impl Releasable for FlushedFile {
257    type Context<'a> = CurrentTaskAndLocked<'a>;
258    fn release<'a>(self, context: Self::Context<'a>) {
259        let (locked, current_task) = context;
260        let FlushedFile(file, id) = self;
261        file.flush(locked, current_task, id);
262    }
263}
264
265/// A read-only view of an `FdTable`.
266///
267/// When reading an `FdTable`, we use an `FdTableView` to have a coherent view of the table even
268/// though the table can be modified by other threads concurrently.
269///
270/// The actual entries in the slice can still be modified by other threads. However, the view
271/// provided by the `FdTableView` is protected by an RCU read lock.
272struct FdTableView<'a> {
273    /// The entries in the table.
274    slice: &'a [EncodedEntry],
275}
276
277impl<'a> FdTableView<'a> {
278    /// Returns the number of entries in the table.
279    fn len(&self) -> usize {
280        self.slice.len()
281    }
282
283    /// Whether the view contains a given `FdNumber`.
284    fn is_some(&self, fd: FdNumber) -> bool {
285        self.slice.get(fd.raw() as usize).map_or(false, |entry| entry.is_some())
286    }
287
288    /// Whether the view does not contain a given `FdNumber`.
289    fn is_none(&self, fd: FdNumber) -> bool {
290        !self.is_some(fd)
291    }
292
293    /// Returns the `FileHandle` for a given `FdNumber`, if any.
294    fn get_file(&self, fd: FdNumber) -> Option<FileHandle> {
295        self.slice.get(fd.raw() as usize).and_then(|entry| entry.file())
296    }
297
298    /// Returns the `FdTableEntry` for a given `FdNumber`, if any.
299    fn get_entry(&self, fd: FdNumber) -> Option<FdTableEntry> {
300        self.slice.get(fd.raw() as usize).and_then(|entry| entry.to_entry())
301    }
302}
303
304struct FdTableWriteGuard<'a> {
305    store: &'a FdTableInner,
306    _write_guard: MutexGuard<'a, ()>,
307}
308
309impl<'a> FdTableWriteGuard<'a> {
310    /// The lowest available `FdNumber`.
311    fn next_fd(&self) -> FdNumber {
312        self.store.next_fd.get()
313    }
314
315    /// Recalculates the lowest available FD >= minfd based on the contents of the map.
316    fn calculate_lowest_available_fd(&self, view: &FdTableView<'_>, minfd: &FdNumber) -> FdNumber {
317        let mut fd: FdNumber = *minfd;
318        while view.is_some(fd) {
319            fd = FdNumber::from_raw(fd.raw() + 1);
320        }
321        fd
322    }
323
324    // Returns the (possibly memoized) lowest available FD >= minfd in this map.
325    fn get_lowest_available_fd(&self, scope: &RcuReadScope, minfd: FdNumber) -> FdNumber {
326        if minfd > self.store.next_fd.get() {
327            let view = self.store.read(scope);
328            return self.calculate_lowest_available_fd(&view, &minfd);
329        }
330        self.store.next_fd.get()
331    }
332
333    /// Returns the `FileHandle` for a given `FdNumber`, if any.
334    fn get_file(&self, scope: &RcuReadScope, fd: FdNumber) -> Option<FileHandle> {
335        self.store.read(scope).get_file(fd)
336    }
337
338    /// Inserts a new entry into the `FdTable`.
339    ///
340    /// Returns whether the `FdTable` previously contained an entry for the given `FdNumber`.
341    fn insert_entry(
342        &self,
343        scope: &RcuReadScope,
344        fd: FdNumber,
345        rlimit: u64,
346        entry: FdTableEntry,
347    ) -> Result<bool, Errno> {
348        let raw_fd = fd.raw();
349        if raw_fd < 0 {
350            return error!(EBADF);
351        }
352        if raw_fd as u64 >= rlimit {
353            return error!(EMFILE);
354        }
355        let mut view = self.store.read(scope);
356        if raw_fd == self.store.next_fd.get().raw() {
357            self.store
358                .next_fd
359                .set(self.calculate_lowest_available_fd(&view, &FdNumber::from_raw(raw_fd + 1)));
360        }
361        let raw_fd = raw_fd as usize;
362        if view.len() <= raw_fd {
363            // SAFETY: The write guard excludes concurrent writers.
364            unsafe { self.store.entries.ensure_at_least(raw_fd + 1) };
365            view = self.store.read(scope);
366        }
367        let id = self.store.id();
368        Ok(view.slice[raw_fd].set_entry(id, entry))
369    }
370
371    /// Removes an entry from the `FdTable`.
372    ///
373    /// Returns whether the `FdTable` previously contained an entry for the given `FdNumber`.
374    fn remove_entry(&self, scope: &RcuReadScope, fd: &FdNumber) -> bool {
375        let raw_fd = fd.raw() as usize;
376        let view = self.store.read(scope);
377        if raw_fd >= view.len() {
378            return false;
379        }
380        let id = self.store.id();
381        let removed = view.slice[raw_fd].clear(id);
382        if removed && raw_fd < self.store.next_fd.get().raw() as usize {
383            self.store.next_fd.set(*fd);
384        }
385        removed
386    }
387
388    /// Sets the flags for a given `FdNumber`.
389    ///
390    /// Returns `Errno` if the `FdTable` does not contain an entry for the given `FdNumber`.
391    fn set_fd_flags(
392        &self,
393        scope: &RcuReadScope,
394        fd: FdNumber,
395        flags: FdFlags,
396    ) -> Result<(), Errno> {
397        let view = self.store.read(scope);
398        if view.is_none(fd) {
399            return error!(EBADF);
400        }
401        let raw_fd = fd.raw() as usize;
402        view.slice[raw_fd].set_flags(flags);
403        Ok(())
404    }
405
406    /// Retains only the entries for which the given predicate returns `true`.
407    ///
408    /// The predicate is called with the `FdNumber` and a mutable reference to the `FdFlags` for
409    /// each entry in the `FdTable`. If the predicate returns `false`, the entry is removed from
410    /// the `FdTable`. Otherwise, the `FdFlags` are updated to the value modified by the predicate.
411    fn retain<F>(&self, scope: &RcuReadScope, mut predicate: F)
412    where
413        F: FnMut(FdNumber, &mut FdFlags) -> bool,
414    {
415        let id = self.store.id();
416        let view = self.store.read(scope);
417        for (index, encoded_entry) in view.slice.iter().enumerate() {
418            let fd = FdNumber::from_raw(index as i32);
419            if let Some(flags) = encoded_entry.flags() {
420                let mut modified_flags = flags;
421                if !predicate(fd, &mut modified_flags) {
422                    encoded_entry.clear(id);
423                } else if modified_flags != flags {
424                    encoded_entry.set_flags(modified_flags);
425                }
426            }
427        }
428        self.store.next_fd.set(self.calculate_lowest_available_fd(&view, &FdNumber::from_raw(0)));
429    }
430
431    /// Replaces the `FileHandle` for each entry in the `FdTable` with the result of the given
432    /// predicate.
433    ///
434    /// The predicate is called with the `FileHandle` for each entry in the `FdTable`. If the
435    /// predicate returns `Some(file)`, the entry is updated with the new `FileHandle`. Otherwise,
436    /// the entry is left unchanged.
437    fn remap<F>(&self, scope: &RcuReadScope, predicate: F)
438    where
439        F: Fn(&FileHandle) -> Option<FileHandle>,
440    {
441        let id = self.store.id();
442        let view = self.store.read(scope);
443        for encoded_entry in view.slice.iter() {
444            if let Some(file) = encoded_entry.file() {
445                if let Some(replacement_file) = predicate(&file) {
446                    encoded_entry.set_file(id, replacement_file);
447                }
448            }
449        }
450    }
451}
452
453/// An `FdNumber` that can be atomically updated.
454///
455/// Used for the `next_fd` field of `FdTableInner`, which is only modified by the `FdTable` when
456/// holding the `writer_queue` lock.
457#[derive(Debug, Default)]
458struct AtomicFdNumber {
459    /// The raw value of the `FdNumber`.
460    value: AtomicI32,
461}
462
463impl AtomicFdNumber {
464    /// Returns the current value of the `FdNumber`.
465    ///
466    /// Uses `Ordering::Relaxed`.
467    fn get(&self) -> FdNumber {
468        FdNumber::from_raw(self.value.load(Ordering::Relaxed))
469    }
470
471    /// Sets the value of the `FdNumber`.
472    ///
473    /// Uses `Ordering::Relaxed`.
474    fn set(&self, value: FdNumber) {
475        self.value.store(value.raw(), Ordering::Relaxed);
476    }
477}
478
479impl Clone for AtomicFdNumber {
480    fn clone(&self) -> Self {
481        Self { value: AtomicI32::new(self.value.load(Ordering::Relaxed)) }
482    }
483}
484
485/// The state of an `FdTable` that is shared between tasks.
486///
487/// The `writer_queue` is used to serialize concurrent writers to the `FdTable`, and to prevent
488/// writers from being blocked by readers.
489#[derive(Debug)]
490struct FdTableInner {
491    /// The entries of the `FdTable`.
492    entries: RcuArray<EncodedEntry>,
493
494    /// The next available `FdNumber`.
495    next_fd: AtomicFdNumber,
496
497    /// A mutex used to serialize concurrent writers to the `FdTable`, and to prevent writers from
498    /// being blocked by readers.
499    writer_queue: Mutex<()>,
500}
501
502impl Default for FdTableInner {
503    fn default() -> Self {
504        FdTableInner {
505            entries: Default::default(),
506            next_fd: AtomicFdNumber::default(),
507            writer_queue: Mutex::new(()),
508        }
509    }
510}
511
512impl Clone for FdTableInner {
513    fn clone(&self) -> Self {
514        let _guard = self.writer_queue.lock();
515        Self {
516            entries: self.entries.clone(),
517            next_fd: self.next_fd.clone(),
518            writer_queue: Mutex::new(()),
519        }
520    }
521}
522
523impl Drop for FdTableInner {
524    fn drop(&mut self) {
525        let id = self.id();
526        let scope = RcuReadScope::new();
527        let view = self.read(&scope);
528        for entry in view.slice.iter() {
529            entry.clear(id);
530        }
531    }
532}
533
534impl FdTableInner {
535    /// Returns the `FdTableId` of the `FdTableInner`.
536    fn id(&self) -> FdTableId {
537        FdTableId::new(self as *const Self)
538    }
539
540    /// Returns an `Arc<FdTableInner>` that is a snapshot of the state of the `FdTableInner`.
541    fn unshare(&self) -> Arc<Self> {
542        Arc::new(self.clone())
543    }
544
545    /// Returns a `FdTableView` that provides read-only access to the state of the `FdTableInner`.
546    fn read<'a>(&self, scope: &'a RcuReadScope) -> FdTableView<'a> {
547        let slice = self.entries.as_slice(scope);
548        FdTableView { slice }
549    }
550
551    /// Returns a `FdTableWriteGuard` that provides exclusive access to the state of the
552    /// `FdTableInner`.
553    fn write(&self) -> FdTableWriteGuard<'_> {
554        FdTableWriteGuard { store: self, _write_guard: self.writer_queue.lock() }
555    }
556}
557
558/// An `FdTable` is a table of file descriptors.
559#[derive(Debug, Default)]
560pub struct FdTable {
561    /// The state of the `FdTable` that is shared between tasks.
562    inner: RcuArc<FdTableInner>,
563}
564
565/// The target `FdNumber` for a duplicated file descriptor.
566pub enum TargetFdNumber {
567    /// The duplicated `FdNumber` will be the smallest available `FdNumber`.
568    Default,
569
570    /// The duplicated `FdNumber` should be this specific `FdNumber`.
571    Specific(FdNumber),
572
573    /// The duplicated `FdNumber` should be greater than this `FdNumber`.
574    Minimum(FdNumber),
575}
576
577impl FdTable {
578    /// Returns the `FdTableId` of the `FdTable`.
579    pub fn id(&self) -> FdTableId {
580        self.inner.read().id()
581    }
582
583    /// Returns new unshared `FdTable` that is a snapshot of the state of the `FdTable`.
584    pub fn fork(&self) -> FdTable {
585        let unshared = self.inner.read().unshare();
586        FdTable { inner: RcuArc::new(unshared) }
587    }
588
589    /// Ensures that this `FdTable` is not shared by any other `FdTable` instances.
590    pub fn unshare(&self) {
591        let unshared = self.inner.read().unshare();
592        self.inner.update(unshared);
593    }
594
595    /// Releases the `FdTable`, closing any files opened exclusively by this table.
596    pub fn release(&self) {
597        self.inner.update(Default::default());
598    }
599
600    /// Trims close-on-exec file descriptors from the table.
601    pub fn exec(&self, locked: &mut Locked<Unlocked>, current_task: &CurrentTask) {
602        self.retain(locked, current_task, |_fd, flags| !flags.contains(FdFlags::CLOEXEC));
603    }
604
605    /// Inserts a file descriptor into the table.
606    pub fn insert<L>(
607        &self,
608        locked: &mut Locked<L>,
609        current_task: &CurrentTask,
610        fd: FdNumber,
611        file: FileHandle,
612    ) -> Result<(), Errno>
613    where
614        L: LockBefore<ThreadGroupLimits>,
615    {
616        let flags = FdFlags::empty();
617        let rlimit = current_task.thread_group().get_rlimit(locked, Resource::NOFILE);
618        let inner = self.inner.read();
619        let guard = inner.write();
620        guard.insert_entry(&inner.scope, fd, rlimit, FdTableEntry { file, flags })?;
621        Ok(())
622    }
623
624    /// Adds a file descriptor to the table.
625    ///
626    /// The file descriptor will be assigned the next available number.
627    ///
628    /// Returns the assigned file descriptor number.
629    ///
630    /// This function is the most common way to add a file descriptor to the table.
631    pub fn add<L>(
632        &self,
633        locked: &mut Locked<L>,
634        current_task: &CurrentTask,
635        file: FileHandle,
636        flags: FdFlags,
637    ) -> Result<FdNumber, Errno>
638    where
639        L: LockEqualOrBefore<FileOpsCore>,
640    {
641        let locked = locked.cast_locked::<FileOpsCore>();
642        let rlimit = current_task.thread_group().get_rlimit(locked, Resource::NOFILE);
643        let inner = self.inner.read();
644        let guard = inner.write();
645        let fd = guard.next_fd();
646        guard.insert_entry(&inner.scope, fd, rlimit, FdTableEntry { file, flags })?;
647        Ok(fd)
648    }
649
650    /// Duplicates a file descriptor.
651    ///
652    /// If `target` is `TargetFdNumber::Minimum`, a new `FdNumber` is allocated. Returns the new
653    /// `FdNumber`.
654    pub fn duplicate<L>(
655        &self,
656        locked: &mut Locked<L>,
657        current_task: &CurrentTask,
658        oldfd: FdNumber,
659        target: TargetFdNumber,
660        flags: FdFlags,
661    ) -> Result<FdNumber, Errno>
662    where
663        L: LockBefore<ThreadGroupLimits>,
664    {
665        let rlimit = current_task.thread_group().get_rlimit(locked, Resource::NOFILE);
666        let inner = self.inner.read();
667        let guard = inner.write();
668        let file = guard.get_file(&inner.scope, oldfd).ok_or_else(|| errno!(EBADF))?;
669
670        let fd = match target {
671            TargetFdNumber::Specific(fd) => {
672                // We need to check the rlimit before we remove the entry from state
673                // because we cannot error out after removing the entry.
674                if fd.raw() as u64 >= rlimit {
675                    // ltp_dup201 shows that we're supposed to return EBADF in this
676                    // situation, instead of EMFILE, which is what we normally return
677                    // when we're past the rlimit.
678                    return error!(EBADF);
679                }
680                guard.remove_entry(&inner.scope, &fd);
681                fd
682            }
683            TargetFdNumber::Minimum(fd) => guard.get_lowest_available_fd(&inner.scope, fd),
684            TargetFdNumber::Default => {
685                guard.get_lowest_available_fd(&inner.scope, FdNumber::from_raw(0))
686            }
687        };
688        let existing_entry =
689            guard.insert_entry(&inner.scope, fd, rlimit, FdTableEntry { file, flags })?;
690        assert!(!existing_entry);
691        Ok(fd)
692    }
693
694    /// Returns the file handle associated with the given file descriptor.
695    ///
696    /// Returns the file handle even if the file was opened with `O_PATH`.
697    ///
698    /// This operation is uncommon. Most clients should use `get` instead, which fails if the file
699    /// was opened with `O_PATH`.
700    pub fn get_allowing_opath(&self, fd: FdNumber) -> Result<FileHandle, Errno> {
701        self.get_allowing_opath_with_flags(fd).map(|(file, _flags)| file)
702    }
703
704    /// Returns the file handle and flags associated with the given file descriptor.
705    ///
706    /// Returns the file handle even if the file was opened with `O_PATH`.
707    ///
708    /// This operation is uncommon. Most clients should use `get` instead, which fails if the file
709    /// was opened with `O_PATH`.
710    pub fn get_allowing_opath_with_flags(
711        &self,
712        fd: FdNumber,
713    ) -> Result<(FileHandle, FdFlags), Errno> {
714        let inner = self.inner.read();
715        let view = inner.read(&inner.scope);
716        view.get_entry(fd).map(|entry| (entry.file, entry.flags)).ok_or_else(|| errno!(EBADF))
717    }
718
719    /// Returns the file handle associated with the given file descriptor.
720    ///
721    /// This operation fails if the file was opened with `O_PATH`.
722    pub fn get(&self, fd: FdNumber) -> Result<FileHandle, Errno> {
723        let file = self.get_allowing_opath(fd)?;
724        if file.flags().contains(OpenFlags::PATH) {
725            return error!(EBADF);
726        }
727        Ok(file)
728    }
729
730    /// Closes the file descriptor associated with the given file descriptor.
731    ///
732    /// This operation fails if the file descriptor is not valid.
733    pub fn close(&self, fd: FdNumber) -> Result<(), Errno> {
734        let inner = self.inner.read();
735        let guard = inner.write();
736        if guard.remove_entry(&inner.scope, &fd) { Ok(()) } else { error!(EBADF) }
737    }
738
739    /// Returns the flags associated with the given file descriptor.
740    ///
741    /// Returns the flags even if the file was opened with `O_PATH`.
742    pub fn get_fd_flags_allowing_opath(&self, fd: FdNumber) -> Result<FdFlags, Errno> {
743        self.get_allowing_opath_with_flags(fd).map(|(_file, flags)| flags)
744    }
745
746    /// Sets the flags associated with the given file descriptor.
747    ///
748    /// This operation fails if the file descriptor was opened with `O_PATH` or is not valid.
749    pub fn set_fd_flags(&self, fd: FdNumber, flags: FdFlags) -> Result<(), Errno> {
750        let inner = self.inner.read();
751        let guard = inner.write();
752        let file = guard.get_file(&inner.scope, fd).ok_or_else(|| errno!(EBADF))?;
753        if file.flags().contains(OpenFlags::PATH) {
754            return error!(EBADF);
755        }
756        guard.set_fd_flags(&inner.scope, fd, flags)
757    }
758
759    /// Sets the flags associated with the given file descriptor.
760    ///
761    /// This operation fails if the file descriptor is not valid.
762    pub fn set_fd_flags_allowing_opath(&self, fd: FdNumber, flags: FdFlags) -> Result<(), Errno> {
763        let inner = self.inner.read();
764        let guard = inner.write();
765        guard.set_fd_flags(&inner.scope, fd, flags)
766    }
767
768    /// Retains only the FDs matching the given `predicate`.
769    ///
770    /// The predicate is called with the `FdNumber` and a mutable reference to the `FdFlags` for
771    /// each entry in the `FdTable`. If the predicate returns `false`, the entry is removed from
772    /// the `FdTable`. Otherwise, the `FdFlags` are updated to the value modified by the predicate.
773    pub fn retain<L, F>(&self, _locked: &mut Locked<L>, _current_task: &CurrentTask, predicate: F)
774    where
775        L: LockEqualOrBefore<FileOpsCore>,
776        F: Fn(FdNumber, &mut FdFlags) -> bool,
777    {
778        let inner = self.inner.read();
779        let guard = inner.write();
780        guard.retain(&inner.scope, predicate);
781    }
782
783    /// Returns a vector of all current file descriptors in the table.
784    pub fn get_all_fds(&self) -> Vec<FdNumber> {
785        let inner = self.inner.read();
786        let view = inner.read(&inner.scope);
787        view.slice
788            .iter()
789            .enumerate()
790            .filter_map(|(index, encoded_entry)| {
791                if encoded_entry.is_none() { None } else { Some(FdNumber::from_raw(index as i32)) }
792            })
793            .collect()
794    }
795
796    /// Executes `predicate(file) => maybe_replacement` on every non-empty table entry.
797    ///
798    /// Replaces `file` with `replacement_file` in the table when
799    /// `maybe_replacement == Some(replacement_file)`.
800    pub fn remap<L, F: Fn(&FileHandle) -> Option<FileHandle>>(
801        &self,
802        _locked: &mut Locked<L>,
803        _current_task: &CurrentTask,
804        predicate: F,
805    ) where
806        L: LockEqualOrBefore<FileOpsCore>,
807    {
808        let inner = self.inner.read();
809        let guard = inner.write();
810        guard.remap(&inner.scope, predicate);
811    }
812}
813
814impl Clone for FdTable {
815    fn clone(&self) -> Self {
816        FdTable { inner: self.inner.clone() }
817    }
818}
819
820#[cfg(test)]
821mod test {
822    use super::*;
823    use crate::fs::fuchsia::SyslogFile;
824    use crate::testing::*;
825
826    fn add(
827        locked: &mut Locked<Unlocked>,
828        current_task: &CurrentTask,
829        files: &FdTable,
830        file: FileHandle,
831    ) -> Result<FdNumber, Errno> {
832        files.add(locked, current_task, file, FdFlags::empty())
833    }
834
835    #[::fuchsia::test]
836    async fn test_fd_table_install() {
837        spawn_kernel_and_run(async |locked, current_task| {
838            let files = FdTable::default();
839            let file = SyslogFile::new_file(locked, &current_task);
840
841            let fd0 = add(locked, &current_task, &files, file.clone()).unwrap();
842            assert_eq!(fd0.raw(), 0);
843            let fd1 = add(locked, &current_task, &files, file.clone()).unwrap();
844            assert_eq!(fd1.raw(), 1);
845
846            assert!(Arc::ptr_eq(&files.get(fd0).unwrap(), &file));
847            assert!(Arc::ptr_eq(&files.get(fd1).unwrap(), &file));
848            assert_eq!(files.get(FdNumber::from_raw(fd1.raw() + 1)).map(|_| ()), error!(EBADF));
849
850            files.release();
851        })
852        .await;
853    }
854
855    #[::fuchsia::test]
856    async fn test_fd_table_fork() {
857        spawn_kernel_and_run(async |locked, current_task| {
858            let files = FdTable::default();
859            let file = SyslogFile::new_file(locked, &current_task);
860
861            let fd0 = add(locked, &current_task, &files, file.clone()).unwrap();
862            let fd1 = add(locked, &current_task, &files, file).unwrap();
863            let fd2 = FdNumber::from_raw(2);
864
865            let forked = files.fork();
866
867            assert_eq!(
868                Arc::as_ptr(&files.get(fd0).unwrap()),
869                Arc::as_ptr(&forked.get(fd0).unwrap())
870            );
871            assert_eq!(
872                Arc::as_ptr(&files.get(fd1).unwrap()),
873                Arc::as_ptr(&forked.get(fd1).unwrap())
874            );
875            assert!(files.get(fd2).is_err());
876            assert!(forked.get(fd2).is_err());
877
878            files.set_fd_flags_allowing_opath(fd0, FdFlags::CLOEXEC).unwrap();
879            assert_eq!(FdFlags::CLOEXEC, files.get_fd_flags_allowing_opath(fd0).unwrap());
880            assert_ne!(FdFlags::CLOEXEC, forked.get_fd_flags_allowing_opath(fd0).unwrap());
881
882            forked.release();
883            files.release();
884        })
885        .await;
886    }
887
888    #[::fuchsia::test]
889    async fn test_fd_table_exec() {
890        spawn_kernel_and_run(async |locked, current_task| {
891            let files = FdTable::default();
892            let file = SyslogFile::new_file(locked, &current_task);
893
894            let fd0 = add(locked, &current_task, &files, file.clone()).unwrap();
895            let fd1 = add(locked, &current_task, &files, file).unwrap();
896
897            files.set_fd_flags_allowing_opath(fd0, FdFlags::CLOEXEC).unwrap();
898
899            assert!(files.get(fd0).is_ok());
900            assert!(files.get(fd1).is_ok());
901
902            files.exec(locked, &current_task);
903
904            assert!(files.get(fd0).is_err());
905            assert!(files.get(fd1).is_ok());
906
907            files.release();
908        })
909        .await;
910    }
911
912    #[::fuchsia::test]
913    async fn test_fd_table_pack_values() {
914        spawn_kernel_and_run(async |locked, current_task| {
915            let files = FdTable::default();
916            let file = SyslogFile::new_file(locked, &current_task);
917
918            // Add two FDs.
919            let fd0 = add(locked, &current_task, &files, file.clone()).unwrap();
920            let fd1 = add(locked, &current_task, &files, file.clone()).unwrap();
921            assert_eq!(fd0.raw(), 0);
922            assert_eq!(fd1.raw(), 1);
923
924            // Close FD 0
925            assert!(files.close(fd0).is_ok());
926            assert!(files.close(fd0).is_err());
927            // Now it's gone.
928            assert!(files.get(fd0).is_err());
929
930            // The next FD we insert fills in the hole we created.
931            let another_fd = add(locked, &current_task, &files, file).unwrap();
932            assert_eq!(another_fd.raw(), 0);
933
934            files.release();
935        })
936        .await;
937    }
938}