1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
// Copyright 2018 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#![allow(missing_docs)]

mod tcp;
pub use self::tcp::*;

mod udp;
pub use self::udp::*;

use fuchsia_zircon::{self as zx, AsHandleRef};
use futures::io::{self, AsyncRead, AsyncWrite};
use futures::ready;
use futures::task::{AtomicWaker, Context};

use std::fmt;
use std::io::{Read, Write};
use std::mem;
use std::os::unix::io::{AsRawFd, RawFd};
use std::pin::Pin;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use std::task::Poll;

use crate::runtime::{EHandle, PacketReceiver, ReceiverRegistration};

const READABLE: usize = libc::EPOLLIN as usize;
const WRITABLE: usize = libc::EPOLLOUT as usize;
const ERROR: usize = libc::EPOLLERR as usize;
const HUP: usize = libc::EPOLLHUP as usize;

// Unsafe to use. `receive_packet` must not be called after
// `fdio` is invalidated.
pub(crate) struct EventedFdPacketReceiver {
    fdio: *const syscall::fdio_t,
    signals: AtomicUsize,
    read_task: AtomicWaker,
    write_task: AtomicWaker,
}

// Needed because of the fdio pointer.
// It is safe to send because the `EventedFdPacketReceiver` must be
// deregistered (and therefore `receive_packet` never called again)
// before `fdio_unsafe_release` is called.
unsafe impl Send for EventedFdPacketReceiver {}
unsafe impl Sync for EventedFdPacketReceiver {}

impl PacketReceiver for EventedFdPacketReceiver {
    fn receive_packet(&self, packet: zx::Packet) {
        let observed_signals = if let zx::PacketContents::SignalOne(p) = packet.contents() {
            p.observed()
        } else {
            return;
        };

        let mut events: u32 = 0;
        unsafe {
            syscall::fdio_unsafe_wait_end(self.fdio, observed_signals.bits(), &mut events);
        }
        let events = events as usize;

        let old = self.signals.fetch_or(events, Ordering::SeqCst);
        let became_readable = ((events & READABLE) != 0) && ((old & READABLE) == 0);
        let became_writable = ((events & WRITABLE) != 0) && ((old & WRITABLE) == 0);
        let err_occurred = (events & (ERROR | HUP)) != 0;

        if became_readable || err_occurred {
            self.read_task.wake();
        }
        if became_writable || err_occurred {
            self.write_task.wake();
        }
    }
}

/// A type which can be used for receiving IO events for a file descriptor.
pub struct EventedFd<T> {
    inner: T,
    // Must be valid, acquired from `fdio_unsafe_fd_to_io`
    fdio: *const syscall::fdio_t,
    // Must be dropped before `fdio_unsafe_release` is called
    signal_receiver: mem::ManuallyDrop<ReceiverRegistration<EventedFdPacketReceiver>>,
}

unsafe impl<T> Send for EventedFd<T> where T: Send {}
unsafe impl<T> Sync for EventedFd<T> where T: Sync {}

impl<T> Unpin for EventedFd<T> {}

impl<T> Drop for EventedFd<T> {
    fn drop(&mut self) {
        unsafe {
            // Drop the receiver so `packet_receive` may not be called again.
            mem::ManuallyDrop::drop(&mut self.signal_receiver);

            // Release the fdio
            syscall::fdio_unsafe_release(self.fdio);
        }

        // Then `inner` gets dropped
    }
}

impl<T: fmt::Debug> fmt::Debug for EventedFd<T> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        // FIXME(https://github.com/rust-lang/rust/issues/67364): This could be
        // better written with `.finish_non_exhaustive()` once that feature is
        // stablized.
        f.debug_struct("EventedFd").field("inner", &self.inner).finish()
    }
}

impl<T> EventedFd<T>
where
    T: AsRawFd,
{
    /// Creates a new EventedFd.
    ///
    /// # Safety
    ///
    /// The raw file descriptor returned from `inner.as_raw_fd()` must not be
    /// closed until the returned `EventedFd` is dropped.
    pub unsafe fn new(inner: T) -> io::Result<Self> {
        let fdio = syscall::fdio_unsafe_fd_to_io(inner.as_raw_fd());
        let signal_receiver =
            EHandle::local().register_receiver(Arc::new(EventedFdPacketReceiver {
                fdio,
                // Optimistically assume that the fd is readable and writable.
                // Reads and writes will be attempted before queueing a packet.
                // This makes fds slightly faster to read/write the first time
                // they're accessed after being created, provided they start off as
                // readable or writable. In return, there will be an extra wasted
                // syscall per read/write if the fd is not readable or writable.
                signals: AtomicUsize::new(READABLE | WRITABLE),
                read_task: AtomicWaker::new(),
                write_task: AtomicWaker::new(),
            }));

        let evented_fd =
            EventedFd { inner, fdio, signal_receiver: mem::ManuallyDrop::new(signal_receiver) };

        // Make sure a packet is delivered if an error or closure occurs.
        evented_fd.schedule_packet(ERROR | HUP);

        // Need to schedule packets to maintain the invariant that
        // if !READABLE or !WRITABLE a packet has been scheduled.
        evented_fd.schedule_packet(READABLE);
        evented_fd.schedule_packet(WRITABLE);

        Ok(evented_fd)
    }
    /// Tests to see if this resource is ready to be read from.
    /// If it is not, it arranges for the current task to receive a notification
    /// when a "readable" signal arrives.
    pub fn poll_readable(&self, cx: &mut Context<'_>) -> Poll<Result<(), zx::Status>> {
        let receiver = self.signal_receiver.receiver();
        if (receiver.signals.load(Ordering::SeqCst) & (READABLE | ERROR | HUP)) != 0 {
            Poll::Ready(Ok(()))
        } else {
            self.need_read(cx);
            Poll::Pending
        }
    }

    /// Tests to see if this resource is ready to be written to.
    /// If it is not, it arranges for the current task to receive a notification
    /// when a "writable" signal arrives.
    pub fn poll_writable(&self, cx: &mut Context<'_>) -> Poll<Result<(), zx::Status>> {
        let receiver = self.signal_receiver.receiver();
        if (receiver.signals.load(Ordering::SeqCst) & (WRITABLE | ERROR | HUP)) != 0 {
            Poll::Ready(Ok(()))
        } else {
            self.need_write(cx);
            Poll::Pending
        }
    }

    // Returns a reference to the underlying IO object.
    pub fn as_ref(&self) -> &T {
        &self.inner
    }

    // Returns a mutable reference to the underlying IO object.
    pub fn as_mut(&mut self) -> &mut T {
        &mut self.inner
    }

    /// Arranges for the current task to receive a notification when a "readable"
    /// signal arrives.
    pub fn need_read(&self, cx: &mut Context<'_>) {
        let receiver = self.signal_receiver.receiver();
        receiver.read_task.register(cx.waker());
        let old = receiver.signals.fetch_and(!READABLE, Ordering::SeqCst);
        // We only need to schedule a new packet if one isn't already scheduled.
        // If READABLE was already false, a packet was already scheduled.
        if (old & READABLE) != 0 {
            self.schedule_packet(READABLE);
        }
    }

    /// Arranges for the current task to receive a notification when a "writable"
    /// signal arrives.
    pub fn need_write(&self, cx: &mut Context<'_>) {
        let receiver = self.signal_receiver.receiver();
        receiver.write_task.register(cx.waker());
        let old = receiver.signals.fetch_and(!WRITABLE, Ordering::SeqCst);
        // We only need to schedule a new packet if one isn't already scheduled.
        // If WRITABLE was already false, a packet was already scheduled.
        if (old & WRITABLE) != 0 {
            self.schedule_packet(WRITABLE);
        }
    }

    fn schedule_packet(&self, signals: usize) {
        unsafe {
            let (mut raw_handle, mut raw_signals) = (0, 0);
            syscall::fdio_unsafe_wait_begin(
                self.fdio,
                signals as u32,
                &mut raw_handle,
                &mut raw_signals,
            );

            let handle = zx::Handle::from_raw(raw_handle);
            let signals = zx::Signals::from_bits_truncate(raw_signals);

            let res = handle.wait_async_handle(
                self.signal_receiver.port(),
                self.signal_receiver.key(),
                signals,
                zx::WaitAsyncOpts::empty(),
            );

            // The handle is borrowed, so we cannot drop it.
            mem::forget(handle);
            res.expect("Error scheduling EventedFd notification");
        }
    }

    /// Clears all incoming signals.
    pub fn clear(&self) {
        self.signal_receiver.receiver().signals.store(0, Ordering::SeqCst);
    }
}

impl<T: AsRawFd> AsRawFd for EventedFd<T> {
    fn as_raw_fd(&self) -> RawFd {
        self.as_ref().as_raw_fd()
    }
}

impl<T: AsRawFd + Read> AsyncRead for EventedFd<T> {
    fn poll_read(
        mut self: Pin<&mut Self>,
        cx: &mut Context<'_>,
        buf: &mut [u8],
    ) -> Poll<Result<usize, io::Error>> {
        ready!(EventedFd::poll_readable(&*self, cx))?;
        let res = (&mut *self).as_mut().read(buf);
        if let Err(e) = &res {
            if e.kind() == io::ErrorKind::WouldBlock {
                self.need_read(cx);
                return Poll::Pending;
            }
        }
        Poll::Ready(res.map_err(Into::into))
    }

    // TODO: override poll_vectored_read and call readv on the underlying handle
}

impl<T: AsRawFd + Write> AsyncWrite for EventedFd<T> {
    fn poll_write(
        mut self: Pin<&mut Self>,
        cx: &mut Context<'_>,
        buf: &[u8],
    ) -> Poll<Result<usize, io::Error>> {
        ready!(EventedFd::poll_writable(&*self, cx))?;
        let res = (&mut *self).as_mut().write(buf);
        if let Err(e) = &res {
            if e.kind() == io::ErrorKind::WouldBlock {
                self.need_write(cx);
                return Poll::Pending;
            }
        }
        Poll::Ready(res.map_err(Into::into))
    }

    fn poll_flush(self: Pin<&mut Self>, _: &mut Context<'_>) -> Poll<Result<(), io::Error>> {
        Poll::Ready(Ok(()))
    }

    fn poll_close(self: Pin<&mut Self>, _: &mut Context<'_>) -> Poll<Result<(), io::Error>> {
        Poll::Ready(Ok(()))
    }

    // TODO: override poll_vectored_write and call writev on the underlying handle
}

impl<'a, T> AsyncRead for &'a EventedFd<T>
where
    T: AsRawFd,
    for<'b> &'b T: Read,
{
    fn poll_read(
        self: Pin<&mut Self>,
        cx: &mut Context<'_>,
        buf: &mut [u8],
    ) -> Poll<Result<usize, io::Error>> {
        ready!(EventedFd::poll_readable(&*self, cx))?;
        let res = (&*self).as_ref().read(buf);
        if let Err(e) = &res {
            if e.kind() == io::ErrorKind::WouldBlock {
                self.need_read(cx);
                return Poll::Pending;
            }
        }
        Poll::Ready(res.map_err(Into::into))
    }
}

impl<'a, T> AsyncWrite for &'a EventedFd<T>
where
    T: AsRawFd,
    for<'b> &'b T: Write,
{
    fn poll_write(
        self: Pin<&mut Self>,
        cx: &mut Context<'_>,
        buf: &[u8],
    ) -> Poll<Result<usize, io::Error>> {
        ready!(EventedFd::poll_writable(&*self, cx))?;
        let res = (&*self).as_ref().write(buf);
        if let Err(e) = &res {
            if e.kind() == io::ErrorKind::WouldBlock {
                self.need_write(cx);
                return Poll::Pending;
            }
        }
        Poll::Ready(res.map_err(Into::into))
    }

    fn poll_flush(self: Pin<&mut Self>, _: &mut Context<'_>) -> Poll<Result<(), io::Error>> {
        Poll::Ready(Ok(()))
    }

    fn poll_close(self: Pin<&mut Self>, _: &mut Context<'_>) -> Poll<Result<(), io::Error>> {
        Poll::Ready(Ok(()))
    }
}

mod syscall {
    #![allow(non_camel_case_types, improper_ctypes)]
    pub use fuchsia_zircon::sys::{zx_handle_t, zx_signals_t};
    use std::os::unix::io::RawFd;

    // This is the "improper" c type
    pub type fdio_t = ();

    #[link(name = "fdio")]
    extern "C" {
        pub fn fdio_unsafe_fd_to_io(fd: RawFd) -> *const fdio_t;
        pub fn fdio_unsafe_release(io: *const fdio_t);

        pub fn fdio_unsafe_wait_begin(
            io: *const fdio_t,
            events: u32,
            handle_out: &mut zx_handle_t,
            signals_out: &mut zx_signals_t,
        );

        pub fn fdio_unsafe_wait_end(io: *const fdio_t, signals: zx_signals_t, events_out: &mut u32);
    }
}