starnix_core/vfs/
fd_table.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::task::{CurrentTaskAndLocked, Task, register_delayed_release};
6use crate::vfs::{FdNumber, FileHandle, FileReleaser};
7use bitflags::bitflags;
8use fuchsia_rcu::RcuReadScope;
9use fuchsia_rcu::rcu_arc::RcuArc;
10use fuchsia_rcu_collections::rcu_array::RcuArray;
11use starnix_sync::{LockBefore, Locked, Mutex, MutexGuard, ThreadGroupLimits};
12use starnix_syscalls::SyscallResult;
13use starnix_types::ownership::Releasable;
14use starnix_uapi::errors::Errno;
15use starnix_uapi::open_flags::OpenFlags;
16use starnix_uapi::resource_limits::Resource;
17use starnix_uapi::{FD_CLOEXEC, errno, error};
18use static_assertions::const_assert;
19use std::sync::Arc;
20use std::sync::atomic::{AtomicI32, AtomicUsize, Ordering};
21
22bitflags! {
23    #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
24    pub struct FdFlags: u32 {
25        /// Whether the file descriptor should be closed when the process execs.
26        const CLOEXEC = FD_CLOEXEC;
27    }
28}
29
30impl std::convert::From<FdFlags> for SyscallResult {
31    fn from(value: FdFlags) -> Self {
32        value.bits().into()
33    }
34}
35
36/// An identifier for an `FdTable`.
37///
38/// Used by flock to drop file locks when a file descriptor is closed.
39#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
40pub struct FdTableId(usize);
41
42impl FdTableId {
43    fn new(id: *const FdTableInner) -> Self {
44        Self(id as usize)
45    }
46
47    pub fn raw(&self) -> usize {
48        self.0
49    }
50}
51
52/// We store the CLOEXEC bit and the address of the `FileObject` in a single `usize` so that we can
53/// operate on an FdTable entry atomically. This mask is used to select the CLOEXEC bit.
54const FLAGS_MASK: usize = 0x1;
55
56/// An encoded entry in an `FdTable`.
57///
58/// Encodes both the `FileHandle` and the CLOEXEC bit. Can either hold an entry or be empty.
59#[derive(Debug, Default)]
60struct EncodedEntry {
61    /// Rather than using a separate "flags" field, we encode the table entry into a single usize.
62    ///
63    /// If `value` is zero, the entry is empty.
64    ///
65    /// The lowest bit of `value` is the CLOEXEC bit.
66    ///
67    /// The remaining bits of `value` are a `FileHandle` converted to a raw pointer.
68    value: AtomicUsize,
69}
70
71// An assert to ensure that the lowest bit of the `FileHandle` is available to store the CLOEXEC
72// bit.
73const_assert!(std::mem::align_of::<*const FileReleaser>() >= 1 << FLAGS_MASK);
74
75impl EncodedEntry {
76    /// Encodes a `FileHandle` and `FdFlags` into a single `usize`.
77    ///
78    /// The returned value holds a reference to the `FileObject` and must be released to avoid a
79    /// memory leak.
80    fn encode(file: FileHandle, flags: FdFlags) -> usize {
81        let ptr = Arc::into_raw(file) as usize;
82        let flags = (flags.bits() as usize) & FLAGS_MASK;
83        ptr | flags
84    }
85
86    /// Releases the `FileHandle` for a previously encoded value.
87    ///
88    /// # Safety
89    ///
90    /// `value` must have been encoded by `Self::encode`.
91    unsafe fn release(id: FdTableId, value: usize) {
92        let ptr = Self::decode_ptr(value);
93        if !ptr.is_null() {
94            // SAFETY: The pointer is valid because it was encoded in `self.value`.
95            let file = unsafe { Arc::from_raw(ptr) };
96            register_delayed_release(FlushedFile(file, id));
97        }
98    }
99
100    /// Decodes the `FdFlags` from an encoded `usize`.
101    fn decode_flags(value: usize) -> FdFlags {
102        FdFlags::from_bits_truncate((value & FLAGS_MASK) as u32)
103    }
104
105    /// Decodes the `FileHandle` from an encoded `usize`.
106    fn decode_ptr(value: usize) -> *const FileReleaser {
107        (value & !FLAGS_MASK) as *const _
108    }
109
110    /// Creates a new `EncodedEntry` from a `FdTableEntry`.
111    fn new(entry: FdTableEntry) -> Self {
112        Self { value: AtomicUsize::new(Self::encode(entry.file, entry.flags)) }
113    }
114
115    /// Whether this entry contains a valid `FileHandle`.
116    fn is_some(&self) -> bool {
117        let value = self.value.load(Ordering::Acquire);
118        value != 0
119    }
120
121    /// Whether this entry is empty.
122    fn is_none(&self) -> bool {
123        !self.is_some()
124    }
125
126    /// Returns the `FdFlags` for this entry, if any.
127    fn flags(&self) -> Option<FdFlags> {
128        let value = self.value.load(Ordering::Acquire);
129        if value == 0 {
130            return None;
131        }
132        Some(Self::decode_flags(value))
133    }
134
135    /// Sets the `FdFlags` for this entry, preserving the `FileHandle`.
136    fn set_flags(&self, flags: FdFlags) {
137        loop {
138            let old_value = self.value.load(Ordering::Relaxed);
139            assert!(old_value != 0);
140            let new_value = old_value & !FLAGS_MASK | (flags.bits() as usize) & FLAGS_MASK;
141            if self
142                .value
143                .compare_exchange_weak(old_value, new_value, Ordering::AcqRel, Ordering::Relaxed)
144                .is_ok()
145            {
146                return;
147            }
148        }
149    }
150
151    /// Returns the `FileHandle` for this entry, if any.
152    fn file(&self) -> Option<FileHandle> {
153        self.to_entry().map(|entry| entry.file)
154    }
155
156    /// Sets the `FileHandle` for this entry, preserving the `FdFlags`.
157    fn set_file(&self, id: FdTableId, file: FileHandle) {
158        let ptr = Arc::into_raw(file) as usize;
159        loop {
160            let old_value = self.value.load(Ordering::Relaxed);
161            assert!(old_value != 0);
162            let flags = old_value & FLAGS_MASK;
163            let new_value = ptr | flags;
164            if self
165                .value
166                .compare_exchange_weak(old_value, new_value, Ordering::AcqRel, Ordering::Relaxed)
167                .is_ok()
168            {
169                // SAFETY: The value was previously encoded by `Self::encode`.
170                unsafe { Self::release(id, old_value) };
171                return;
172            }
173        }
174    }
175
176    /// Returns the `FileHandle` and `FdFlags` for this entry, if any.
177    fn to_entry(&self) -> Option<FdTableEntry> {
178        let value = self.value.load(Ordering::Acquire);
179        if value == 0 {
180            return None;
181        }
182        let flags = Self::decode_flags(value);
183        let ptr = Self::decode_ptr(value);
184        // SAFETY: The pointer is valid because it was encoded in `self.value`.
185        let file = unsafe {
186            Arc::increment_strong_count(ptr);
187            Arc::from_raw(ptr)
188        };
189        Some(FdTableEntry { file, flags })
190    }
191
192    /// Sets the `FileHandle` and `FdFlags` for this entry.
193    fn set_entry(&self, id: FdTableId, entry: FdTableEntry) -> bool {
194        // SAFETY: The value is encoded by `Self::encode`.
195        unsafe { self.set(id, Self::encode(entry.file, entry.flags)) }
196    }
197
198    /// Makes the entry empty.
199    fn clear(&self, id: FdTableId) -> bool {
200        // SAFETY: The value is zero.
201        unsafe { self.set(id, 0) }
202    }
203
204    /// Sets the value of this entry to the given value.
205    ///
206    /// Most clients should call `set_entry` or `clear` instead.
207    ///
208    /// # Safety
209    ///
210    /// The value must be encoded by `Self::encode` or be zero.
211    unsafe fn set(&self, id: FdTableId, value: usize) -> bool {
212        let old_value = self.value.swap(value, Ordering::AcqRel);
213        if old_value != 0 {
214            // SAFETY: The value was previously encoded by `Self::encode`.
215            unsafe { Self::release(id, old_value) };
216            true
217        } else {
218            false
219        }
220    }
221}
222
223impl Clone for EncodedEntry {
224    fn clone(&self) -> Self {
225        if let Some(entry) = self.to_entry() { Self::new(entry) } else { Self::default() }
226    }
227}
228
229impl Drop for EncodedEntry {
230    fn drop(&mut self) {
231        let value = self.value.load(Ordering::Acquire);
232        let ptr = Self::decode_ptr(value);
233        if !ptr.is_null() {
234            // SAFETY: The pointer is valid because it was encoded in `self.value`.
235            let _file = unsafe { Arc::from_raw(ptr) };
236        }
237    }
238}
239
240/// An entry in the `FdTable`.
241#[derive(Debug, Clone)]
242struct FdTableEntry {
243    /// The file handle.
244    file: FileHandle,
245
246    /// The flags associated with the file handle.
247    flags: FdFlags,
248}
249
250/// A `FileHandle` that has been closed and is waiting to be flushed.
251struct FlushedFile(FileHandle, FdTableId);
252
253impl Releasable for FlushedFile {
254    type Context<'a> = CurrentTaskAndLocked<'a>;
255    fn release<'a>(self, context: Self::Context<'a>) {
256        let (locked, current_task) = context;
257        let FlushedFile(file, id) = self;
258        file.flush(locked, current_task, id);
259    }
260}
261
262/// A read-only view of an `FdTable`.
263///
264/// When reading an `FdTable`, we use an `FdTableView` to have a coherent view of the table even
265/// though the table can be modified by other threads concurrently.
266///
267/// The actual entries in the slice can still be modified by other threads. However, the view
268/// provided by the `FdTableView` is protected by an RCU read lock.
269struct FdTableView<'a> {
270    /// The entries in the table.
271    slice: &'a [EncodedEntry],
272}
273
274impl<'a> FdTableView<'a> {
275    /// Returns the number of entries in the table.
276    fn len(&self) -> usize {
277        self.slice.len()
278    }
279
280    /// Whether the view contains a given `FdNumber`.
281    fn is_some(&self, fd: FdNumber) -> bool {
282        self.slice.get(fd.raw() as usize).map_or(false, |entry| entry.is_some())
283    }
284
285    /// Whether the view does not contain a given `FdNumber`.
286    fn is_none(&self, fd: FdNumber) -> bool {
287        !self.is_some(fd)
288    }
289
290    /// Returns the `FileHandle` for a given `FdNumber`, if any.
291    fn get_file(&self, fd: FdNumber) -> Option<FileHandle> {
292        self.slice.get(fd.raw() as usize).and_then(|entry| entry.file())
293    }
294
295    /// Returns the `FdTableEntry` for a given `FdNumber`, if any.
296    fn get_entry(&self, fd: FdNumber) -> Option<FdTableEntry> {
297        self.slice.get(fd.raw() as usize).and_then(|entry| entry.to_entry())
298    }
299}
300
301struct FdTableWriteGuard<'a> {
302    store: &'a FdTableInner,
303    _write_guard: MutexGuard<'a, ()>,
304}
305
306impl<'a> FdTableWriteGuard<'a> {
307    /// The lowest available `FdNumber`.
308    fn next_fd(&self) -> FdNumber {
309        self.store.next_fd.get()
310    }
311
312    /// Recalculates the lowest available FD >= minfd based on the contents of the map.
313    fn calculate_lowest_available_fd(&self, view: &FdTableView<'_>, minfd: &FdNumber) -> FdNumber {
314        let mut fd: FdNumber = *minfd;
315        while view.is_some(fd) {
316            fd = FdNumber::from_raw(fd.raw() + 1);
317        }
318        fd
319    }
320
321    // Returns the (possibly memoized) lowest available FD >= minfd in this map.
322    fn get_lowest_available_fd(&self, scope: &RcuReadScope, minfd: FdNumber) -> FdNumber {
323        if minfd > self.store.next_fd.get() {
324            let view = self.store.read(scope);
325            return self.calculate_lowest_available_fd(&view, &minfd);
326        }
327        self.store.next_fd.get()
328    }
329
330    /// Returns the `FileHandle` for a given `FdNumber`, if any.
331    fn get_file(&self, scope: &RcuReadScope, fd: FdNumber) -> Option<FileHandle> {
332        self.store.read(scope).get_file(fd)
333    }
334
335    /// Inserts a new entry into the `FdTable`.
336    ///
337    /// Returns whether the `FdTable` previously contained an entry for the given `FdNumber`.
338    fn insert_entry(
339        &self,
340        scope: &RcuReadScope,
341        fd: FdNumber,
342        rlimit: u64,
343        entry: FdTableEntry,
344    ) -> Result<bool, Errno> {
345        let raw_fd = fd.raw();
346        if raw_fd < 0 {
347            return error!(EBADF);
348        }
349        if raw_fd as u64 >= rlimit {
350            return error!(EMFILE);
351        }
352        let mut view = self.store.read(scope);
353        if raw_fd == self.store.next_fd.get().raw() {
354            self.store
355                .next_fd
356                .set(self.calculate_lowest_available_fd(&view, &FdNumber::from_raw(raw_fd + 1)));
357        }
358        let raw_fd = raw_fd as usize;
359        if view.len() <= raw_fd {
360            // SAFETY: The write guard excludes concurrent writers.
361            unsafe { self.store.entries.ensure_at_least(raw_fd + 1) };
362            view = self.store.read(scope);
363        }
364        let id = self.store.id();
365        Ok(view.slice[raw_fd].set_entry(id, entry))
366    }
367
368    /// Removes an entry from the `FdTable`.
369    ///
370    /// Returns whether the `FdTable` previously contained an entry for the given `FdNumber`.
371    fn remove_entry(&self, scope: &RcuReadScope, fd: &FdNumber) -> bool {
372        let raw_fd = fd.raw() as usize;
373        let view = self.store.read(scope);
374        if raw_fd >= view.len() {
375            return false;
376        }
377        let id = self.store.id();
378        let removed = view.slice[raw_fd].clear(id);
379        if removed && raw_fd < self.store.next_fd.get().raw() as usize {
380            self.store.next_fd.set(*fd);
381        }
382        removed
383    }
384
385    /// Sets the flags for a given `FdNumber`.
386    ///
387    /// Returns `Errno` if the `FdTable` does not contain an entry for the given `FdNumber`.
388    fn set_fd_flags(
389        &self,
390        scope: &RcuReadScope,
391        fd: FdNumber,
392        flags: FdFlags,
393    ) -> Result<(), Errno> {
394        let view = self.store.read(scope);
395        if view.is_none(fd) {
396            return error!(EBADF);
397        }
398        let raw_fd = fd.raw() as usize;
399        view.slice[raw_fd].set_flags(flags);
400        Ok(())
401    }
402
403    /// Retains only the entries for which the given predicate returns `true`.
404    ///
405    /// The predicate is called with the `FdNumber` and a mutable reference to the `FdFlags` for
406    /// each entry in the `FdTable`. If the predicate returns `false`, the entry is removed from
407    /// the `FdTable`. Otherwise, the `FdFlags` are updated to the value modified by the predicate.
408    fn retain<F>(&self, scope: &RcuReadScope, mut predicate: F)
409    where
410        F: FnMut(FdNumber, &mut FdFlags) -> bool,
411    {
412        let id = self.store.id();
413        let view = self.store.read(scope);
414        for (index, encoded_entry) in view.slice.iter().enumerate() {
415            let fd = FdNumber::from_raw(index as i32);
416            if let Some(flags) = encoded_entry.flags() {
417                let mut modified_flags = flags;
418                if !predicate(fd, &mut modified_flags) {
419                    encoded_entry.clear(id);
420                } else if modified_flags != flags {
421                    encoded_entry.set_flags(modified_flags);
422                }
423            }
424        }
425        self.store.next_fd.set(self.calculate_lowest_available_fd(&view, &FdNumber::from_raw(0)));
426    }
427
428    /// Replaces the `FileHandle` for each entry in the `FdTable` with the result of the given
429    /// predicate.
430    ///
431    /// The predicate is called with the `FileHandle` for each entry in the `FdTable`. If the
432    /// predicate returns `Some(file)`, the entry is updated with the new `FileHandle`. Otherwise,
433    /// the entry is left unchanged.
434    fn remap<F>(&self, scope: &RcuReadScope, predicate: F)
435    where
436        F: Fn(&FileHandle) -> Option<FileHandle>,
437    {
438        let id = self.store.id();
439        let view = self.store.read(scope);
440        for encoded_entry in view.slice.iter() {
441            if let Some(file) = encoded_entry.file() {
442                if let Some(replacement_file) = predicate(&file) {
443                    encoded_entry.set_file(id, replacement_file);
444                }
445            }
446        }
447    }
448}
449
450/// An `FdNumber` that can be atomically updated.
451///
452/// Used for the `next_fd` field of `FdTableInner`, which is only modified by the `FdTable` when
453/// holding the `writer_queue` lock.
454#[derive(Debug, Default)]
455struct AtomicFdNumber {
456    /// The raw value of the `FdNumber`.
457    value: AtomicI32,
458}
459
460impl AtomicFdNumber {
461    /// Returns the current value of the `FdNumber`.
462    ///
463    /// Uses `Ordering::Relaxed`.
464    fn get(&self) -> FdNumber {
465        FdNumber::from_raw(self.value.load(Ordering::Relaxed))
466    }
467
468    /// Sets the value of the `FdNumber`.
469    ///
470    /// Uses `Ordering::Relaxed`.
471    fn set(&self, value: FdNumber) {
472        self.value.store(value.raw(), Ordering::Relaxed);
473    }
474}
475
476impl Clone for AtomicFdNumber {
477    fn clone(&self) -> Self {
478        Self { value: AtomicI32::new(self.value.load(Ordering::Relaxed)) }
479    }
480}
481
482/// The state of an `FdTable` that is shared between tasks.
483///
484/// The `writer_queue` is used to serialize concurrent writers to the `FdTable`, and to prevent
485/// writers from being blocked by readers.
486#[derive(Debug)]
487struct FdTableInner {
488    /// The entries of the `FdTable`.
489    entries: RcuArray<EncodedEntry>,
490
491    /// The next available `FdNumber`.
492    next_fd: AtomicFdNumber,
493
494    /// A mutex used to serialize concurrent writers to the `FdTable`, and to prevent writers from
495    /// being blocked by readers.
496    writer_queue: Mutex<()>,
497}
498
499impl Default for FdTableInner {
500    fn default() -> Self {
501        FdTableInner {
502            entries: Default::default(),
503            next_fd: AtomicFdNumber::default(),
504            writer_queue: Mutex::new(()),
505        }
506    }
507}
508
509impl Clone for FdTableInner {
510    fn clone(&self) -> Self {
511        let _guard = self.writer_queue.lock();
512        Self {
513            entries: self.entries.clone(),
514            next_fd: self.next_fd.clone(),
515            writer_queue: Mutex::new(()),
516        }
517    }
518}
519
520impl Drop for FdTableInner {
521    fn drop(&mut self) {
522        let id = self.id();
523        let scope = RcuReadScope::new();
524        let view = self.read(&scope);
525        for entry in view.slice.iter() {
526            entry.clear(id);
527        }
528    }
529}
530
531impl FdTableInner {
532    /// Returns the `FdTableId` of the `FdTableInner`.
533    fn id(&self) -> FdTableId {
534        FdTableId::new(self as *const Self)
535    }
536
537    /// Returns an `Arc<FdTableInner>` that is a snapshot of the state of the `FdTableInner`.
538    fn unshare(&self) -> Arc<Self> {
539        Arc::new(self.clone())
540    }
541
542    /// Returns a `FdTableView` that provides read-only access to the state of the `FdTableInner`.
543    fn read<'a>(&self, scope: &'a RcuReadScope) -> FdTableView<'a> {
544        let slice = self.entries.as_slice(scope);
545        FdTableView { slice }
546    }
547
548    /// Returns a `FdTableWriteGuard` that provides exclusive access to the state of the
549    /// `FdTableInner`.
550    fn write(&self) -> FdTableWriteGuard<'_> {
551        FdTableWriteGuard { store: self, _write_guard: self.writer_queue.lock() }
552    }
553}
554
555/// An `FdTable` is a table of file descriptors.
556#[derive(Debug, Default)]
557pub struct FdTable {
558    /// The state of the `FdTable` that is shared between tasks.
559    inner: RcuArc<FdTableInner>,
560}
561
562/// The target `FdNumber` for a duplicated file descriptor.
563pub enum TargetFdNumber {
564    /// The duplicated `FdNumber` will be the smallest available `FdNumber`.
565    Default,
566
567    /// The duplicated `FdNumber` should be this specific `FdNumber`.
568    Specific(FdNumber),
569
570    /// The duplicated `FdNumber` should be greater than this `FdNumber`.
571    Minimum(FdNumber),
572}
573
574impl FdTable {
575    /// Returns the `FdTableId` of the `FdTable`.
576    pub fn id(&self) -> FdTableId {
577        self.inner.read().id()
578    }
579
580    /// Returns new unshared `FdTable` that is a snapshot of the state of the `FdTable`.
581    pub fn fork(&self) -> FdTable {
582        let unshared = self.inner.read().unshare();
583        FdTable { inner: RcuArc::new(unshared) }
584    }
585
586    /// Ensures that this `FdTable` is not shared by any other `FdTable` instances.
587    pub fn unshare(&self) {
588        let unshared = self.inner.read().unshare();
589        self.inner.update(unshared);
590    }
591
592    /// Releases the `FdTable`, closing any files opened exclusively by this table.
593    pub fn release(&self) {
594        self.inner.update(Default::default());
595    }
596
597    /// Trims close-on-exec file descriptors from the table.
598    pub fn exec(&self) {
599        self.retain(|_fd, flags| !flags.contains(FdFlags::CLOEXEC));
600    }
601
602    /// Inserts a file descriptor into the table.
603    pub fn insert<L>(
604        &self,
605        locked: &mut Locked<L>,
606        task: &Task,
607        fd: FdNumber,
608        file: FileHandle,
609    ) -> Result<(), Errno>
610    where
611        L: LockBefore<ThreadGroupLimits>,
612    {
613        self.insert_with_flags(locked, task, fd, file, FdFlags::empty())
614    }
615
616    /// Inserts a file descriptor into the table with the specified flags.
617    pub fn insert_with_flags<L>(
618        &self,
619        locked: &mut Locked<L>,
620        task: &Task,
621        fd: FdNumber,
622        file: FileHandle,
623        flags: FdFlags,
624    ) -> Result<(), Errno>
625    where
626        L: LockBefore<ThreadGroupLimits>,
627    {
628        let rlimit = task.thread_group().get_rlimit(locked, Resource::NOFILE);
629        let inner = self.inner.read();
630        let guard = inner.write();
631        guard.insert_entry(&inner.scope, fd, rlimit, FdTableEntry { file, flags })?;
632        Ok(())
633    }
634
635    /// Adds a file descriptor to the table.
636    ///
637    /// The file descriptor will be assigned the next available number.
638    ///
639    /// Returns the assigned file descriptor number.
640    ///
641    /// This function is the most common way to add a file descriptor to the table.
642    pub fn add_with_flags<L>(
643        &self,
644        locked: &mut Locked<L>,
645        task: &Task,
646        file: FileHandle,
647        flags: FdFlags,
648    ) -> Result<FdNumber, Errno>
649    where
650        L: LockBefore<ThreadGroupLimits>,
651    {
652        let rlimit = task.thread_group().get_rlimit(locked, Resource::NOFILE);
653        let inner = self.inner.read();
654        let guard = inner.write();
655        let fd = guard.next_fd();
656        guard.insert_entry(&inner.scope, fd, rlimit, FdTableEntry { file, flags })?;
657        Ok(fd)
658    }
659
660    /// Duplicates a file descriptor.
661    ///
662    /// If `target` is `TargetFdNumber::Minimum`, a new `FdNumber` is allocated. Returns the new
663    /// `FdNumber`.
664    pub fn duplicate<L>(
665        &self,
666        locked: &mut Locked<L>,
667        task: &Task,
668        oldfd: FdNumber,
669        target: TargetFdNumber,
670        flags: FdFlags,
671    ) -> Result<FdNumber, Errno>
672    where
673        L: LockBefore<ThreadGroupLimits>,
674    {
675        let rlimit = task.thread_group().get_rlimit(locked, Resource::NOFILE);
676        let inner = self.inner.read();
677        let guard = inner.write();
678        let file = guard.get_file(&inner.scope, oldfd).ok_or_else(|| errno!(EBADF))?;
679
680        let fd = match target {
681            TargetFdNumber::Specific(fd) => {
682                // We need to check the rlimit before we remove the entry from state
683                // because we cannot error out after removing the entry.
684                if fd.raw() as u64 >= rlimit {
685                    // ltp_dup201 shows that we're supposed to return EBADF in this
686                    // situation, instead of EMFILE, which is what we normally return
687                    // when we're past the rlimit.
688                    return error!(EBADF);
689                }
690                guard.remove_entry(&inner.scope, &fd);
691                fd
692            }
693            TargetFdNumber::Minimum(fd) => guard.get_lowest_available_fd(&inner.scope, fd),
694            TargetFdNumber::Default => {
695                guard.get_lowest_available_fd(&inner.scope, FdNumber::from_raw(0))
696            }
697        };
698        let existing_entry =
699            guard.insert_entry(&inner.scope, fd, rlimit, FdTableEntry { file, flags })?;
700        assert!(!existing_entry);
701        Ok(fd)
702    }
703
704    /// Returns the file handle associated with the given file descriptor.
705    ///
706    /// Returns the file handle even if the file was opened with `O_PATH`.
707    ///
708    /// This operation is uncommon. Most clients should use `get` instead, which fails if the file
709    /// was opened with `O_PATH`.
710    pub fn get_allowing_opath(&self, fd: FdNumber) -> Result<FileHandle, Errno> {
711        self.get_allowing_opath_with_flags(fd).map(|(file, _flags)| file)
712    }
713
714    /// Returns the file handle and flags associated with the given file descriptor.
715    ///
716    /// Returns the file handle even if the file was opened with `O_PATH`.
717    ///
718    /// This operation is uncommon. Most clients should use `get` instead, which fails if the file
719    /// was opened with `O_PATH`.
720    pub fn get_allowing_opath_with_flags(
721        &self,
722        fd: FdNumber,
723    ) -> Result<(FileHandle, FdFlags), Errno> {
724        let inner = self.inner.read();
725        let view = inner.read(&inner.scope);
726        view.get_entry(fd).map(|entry| (entry.file, entry.flags)).ok_or_else(|| errno!(EBADF))
727    }
728
729    /// Returns the file handle associated with the given file descriptor.
730    ///
731    /// This operation fails if the file was opened with `O_PATH`.
732    pub fn get(&self, fd: FdNumber) -> Result<FileHandle, Errno> {
733        let file = self.get_allowing_opath(fd)?;
734        if file.flags().contains(OpenFlags::PATH) {
735            return error!(EBADF);
736        }
737        Ok(file)
738    }
739
740    /// Closes the file descriptor associated with the given file descriptor.
741    ///
742    /// This operation fails if the file descriptor is not valid.
743    pub fn close(&self, fd: FdNumber) -> Result<(), Errno> {
744        let inner = self.inner.read();
745        let guard = inner.write();
746        if guard.remove_entry(&inner.scope, &fd) { Ok(()) } else { error!(EBADF) }
747    }
748
749    /// Returns the flags associated with the given file descriptor.
750    ///
751    /// Returns the flags even if the file was opened with `O_PATH`.
752    pub fn get_fd_flags_allowing_opath(&self, fd: FdNumber) -> Result<FdFlags, Errno> {
753        self.get_allowing_opath_with_flags(fd).map(|(_file, flags)| flags)
754    }
755
756    /// Sets the flags associated with the given file descriptor.
757    ///
758    /// This operation fails if the file descriptor was opened with `O_PATH` or is not valid.
759    pub fn set_fd_flags(&self, fd: FdNumber, flags: FdFlags) -> Result<(), Errno> {
760        let inner = self.inner.read();
761        let guard = inner.write();
762        let file = guard.get_file(&inner.scope, fd).ok_or_else(|| errno!(EBADF))?;
763        if file.flags().contains(OpenFlags::PATH) {
764            return error!(EBADF);
765        }
766        guard.set_fd_flags(&inner.scope, fd, flags)
767    }
768
769    /// Sets the flags associated with the given file descriptor.
770    ///
771    /// This operation fails if the file descriptor is not valid.
772    pub fn set_fd_flags_allowing_opath(&self, fd: FdNumber, flags: FdFlags) -> Result<(), Errno> {
773        let inner = self.inner.read();
774        let guard = inner.write();
775        guard.set_fd_flags(&inner.scope, fd, flags)
776    }
777
778    /// Retains only the FDs matching the given `predicate`.
779    ///
780    /// The predicate is called with the `FdNumber` and a mutable reference to the `FdFlags` for
781    /// each entry in the `FdTable`. If the predicate returns `false`, the entry is removed from
782    /// the `FdTable`. Otherwise, the `FdFlags` are updated to the value modified by the predicate.
783    pub fn retain<F>(&self, predicate: F)
784    where
785        F: Fn(FdNumber, &mut FdFlags) -> bool,
786    {
787        let inner = self.inner.read();
788        let guard = inner.write();
789        guard.retain(&inner.scope, predicate);
790    }
791
792    /// Returns a vector of all current file descriptors in the table.
793    pub fn get_all_fds(&self) -> Vec<FdNumber> {
794        let inner = self.inner.read();
795        let view = inner.read(&inner.scope);
796        view.slice
797            .iter()
798            .enumerate()
799            .filter_map(|(index, encoded_entry)| {
800                if encoded_entry.is_none() { None } else { Some(FdNumber::from_raw(index as i32)) }
801            })
802            .collect()
803    }
804
805    /// Executes `predicate(file) => maybe_replacement` on every non-empty table entry.
806    ///
807    /// Replaces `file` with `replacement_file` in the table when
808    /// `maybe_replacement == Some(replacement_file)`.
809    pub fn remap<F: Fn(&FileHandle) -> Option<FileHandle>>(&self, predicate: F) {
810        let inner = self.inner.read();
811        let guard = inner.write();
812        guard.remap(&inner.scope, predicate);
813    }
814}
815
816impl Clone for FdTable {
817    fn clone(&self) -> Self {
818        FdTable { inner: self.inner.clone() }
819    }
820}
821
822#[cfg(test)]
823mod test {
824    use super::*;
825    use crate::fs::fuchsia::SyslogFile;
826    use crate::task::*;
827    use crate::testing::*;
828    use starnix_sync::Unlocked;
829
830    fn add(
831        locked: &mut Locked<Unlocked>,
832        current_task: &CurrentTask,
833        files: &FdTable,
834        file: FileHandle,
835    ) -> Result<FdNumber, Errno> {
836        files.add_with_flags(locked, current_task, file, FdFlags::empty())
837    }
838
839    #[::fuchsia::test]
840    async fn test_fd_table_install() {
841        spawn_kernel_and_run(async |locked, current_task| {
842            let files = FdTable::default();
843            let file = SyslogFile::new_file(locked, &current_task);
844
845            let fd0 = add(locked, &current_task, &files, file.clone()).unwrap();
846            assert_eq!(fd0.raw(), 0);
847            let fd1 = add(locked, &current_task, &files, file.clone()).unwrap();
848            assert_eq!(fd1.raw(), 1);
849
850            assert!(Arc::ptr_eq(&files.get(fd0).unwrap(), &file));
851            assert!(Arc::ptr_eq(&files.get(fd1).unwrap(), &file));
852            assert_eq!(files.get(FdNumber::from_raw(fd1.raw() + 1)).map(|_| ()), error!(EBADF));
853
854            files.release();
855        })
856        .await;
857    }
858
859    #[::fuchsia::test]
860    async fn test_fd_table_fork() {
861        spawn_kernel_and_run(async |locked, current_task| {
862            let files = FdTable::default();
863            let file = SyslogFile::new_file(locked, &current_task);
864
865            let fd0 = add(locked, &current_task, &files, file.clone()).unwrap();
866            let fd1 = add(locked, &current_task, &files, file).unwrap();
867            let fd2 = FdNumber::from_raw(2);
868
869            let forked = files.fork();
870
871            assert_eq!(
872                Arc::as_ptr(&files.get(fd0).unwrap()),
873                Arc::as_ptr(&forked.get(fd0).unwrap())
874            );
875            assert_eq!(
876                Arc::as_ptr(&files.get(fd1).unwrap()),
877                Arc::as_ptr(&forked.get(fd1).unwrap())
878            );
879            assert!(files.get(fd2).is_err());
880            assert!(forked.get(fd2).is_err());
881
882            files.set_fd_flags_allowing_opath(fd0, FdFlags::CLOEXEC).unwrap();
883            assert_eq!(FdFlags::CLOEXEC, files.get_fd_flags_allowing_opath(fd0).unwrap());
884            assert_ne!(FdFlags::CLOEXEC, forked.get_fd_flags_allowing_opath(fd0).unwrap());
885
886            forked.release();
887            files.release();
888        })
889        .await;
890    }
891
892    #[::fuchsia::test]
893    async fn test_fd_table_exec() {
894        spawn_kernel_and_run(async |locked, current_task| {
895            let files = FdTable::default();
896            let file = SyslogFile::new_file(locked, &current_task);
897
898            let fd0 = add(locked, &current_task, &files, file.clone()).unwrap();
899            let fd1 = add(locked, &current_task, &files, file).unwrap();
900
901            files.set_fd_flags_allowing_opath(fd0, FdFlags::CLOEXEC).unwrap();
902
903            assert!(files.get(fd0).is_ok());
904            assert!(files.get(fd1).is_ok());
905
906            files.exec();
907
908            assert!(files.get(fd0).is_err());
909            assert!(files.get(fd1).is_ok());
910
911            files.release();
912        })
913        .await;
914    }
915
916    #[::fuchsia::test]
917    async fn test_fd_table_pack_values() {
918        spawn_kernel_and_run(async |locked, current_task| {
919            let files = FdTable::default();
920            let file = SyslogFile::new_file(locked, &current_task);
921
922            // Add two FDs.
923            let fd0 = add(locked, &current_task, &files, file.clone()).unwrap();
924            let fd1 = add(locked, &current_task, &files, file.clone()).unwrap();
925            assert_eq!(fd0.raw(), 0);
926            assert_eq!(fd1.raw(), 1);
927
928            // Close FD 0
929            assert!(files.close(fd0).is_ok());
930            assert!(files.close(fd0).is_err());
931            // Now it's gone.
932            assert!(files.get(fd0).is_err());
933
934            // The next FD we insert fills in the hole we created.
935            let another_fd = add(locked, &current_task, &files, file).unwrap();
936            assert_eq!(another_fd.raw(), 0);
937
938            files.release();
939        })
940        .await;
941    }
942}