virtio_device/
queue.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! Virtqueue management wrappers.
6//!
7//! This is a slightly opinionated wrapper that presents the underlying [`Device`](ring::Device) and
8//! [`Driver`](ring::Driver) rings as a single 'virtqueue' where descriptor chains can be retrieved,
9//! walked or iterated, and then returned.
10//!
11//! The primary opinionated decision taken by this wrapper is that a descriptor chain is considered
12//! a formal object that can automatically returns itself to the queue when dropped. This return is
13//! then required to have a mechanism to potentially signal the guest driver, via the
14//! [`DriverNotify`] trait.
15
16use crate::mem::{DeviceRange, DriverRange};
17use crate::ring;
18use fuchsia_sync::Mutex;
19use std::sync::atomic;
20use thiserror::Error;
21
22/// Informs device that driver needs a notification.
23///
24/// When returning descriptor chains to the queue it may be required, due to the virtio
25/// specification, that the driver is supposed to be notified. As descriptor chains can be returned
26/// out-of-band during a `drop`, with no opportunity to report via a return code that a notification
27/// is needed, this trait exists instead. Further, since the queue has no understanding of the
28/// transport protocol, or how the guest should be notified in general, it must appeal to the higher
29/// level device code using the [`Queue`].
30pub trait DriverNotify {
31    /// Driver requires a notification.
32    ///
33    /// Indicates the device must notify the driver for correct continuation of the virtio protocol.
34    /// The notification does not need to happen synchronously during this call, it can be stored
35    /// and performed at some later point, but the driver may not make progress until it is
36    /// notified.
37    fn notify(&self);
38}
39
40/// Mutable state of a virtqueue.
41///
42/// Includes both the reference to the [`Device`](ring::Device), which is the memory shared with the
43/// guest that we actually need to manipulate, as well as additional state needed for us to
44/// correctly implement the virtio protocol. Captured here in a separate struct so that it can be
45/// wrapped in a [`Mutex`] in the [`Queue`].
46struct State<'a> {
47    device: ring::Device<'a>,
48    // Next index in avail that we expect to be come available
49    next: u16,
50    // Next index in used that we will publish at.
51    next_used: u16,
52}
53
54impl<'a> State<'a> {
55    /// Return a descriptor chain.
56    ///
57    /// `written` is the number of bytes that were written by the device to the beginning of the
58    /// buffer.
59    ///
60    /// Returns to the driver, by writing it to the device owned ring, and returns the index that
61    /// it was published at. This index is intended to be used when determining whether the driver
62    /// needs a notification.
63    fn return_chain(&mut self, used: ring::Used) -> u16 {
64        let submit = self.next_used;
65        self.device.insert_used(used, submit);
66        self.next_used = submit.wrapping_add(1);
67        self.device.publish_used(self.next_used);
68        // Return the index that we just published.
69        submit
70    }
71}
72
73/// Describes the memory ranges for a queue.
74///
75/// Collects the three different memory ranges that combined make up the [`Driver`](ring::Driver)
76/// and [`Device`](ring::Device) portions of a queue. This exists as a way to conveniently name the
77/// members for passing to [`Queue::new`]
78#[derive(Debug, Clone)]
79pub struct QueueMemory<'a> {
80    pub desc: DeviceRange<'a>,
81    pub avail: DeviceRange<'a>,
82    pub used: DeviceRange<'a>,
83}
84
85/// Representation of a virtqueue.
86///
87/// Aside from construction of the queue the only provided method is to retrieve the [`next_chain`]
88/// (Queue::next_chain) if one has been published by the driver. The [`DescChain`], if one is
89/// returned, implements a custom `drop` to return the chain to the queue, and hence to the driver.
90// Currently event_idx feature is hard set to false. Support exists though for the device
91// requirements of handling suppression from the driver, however no interface is yet exposed here
92// for the device to tell the driver any event requirements.
93pub struct Queue<'a, N> {
94    driver: ring::Driver<'a>,
95    state: Mutex<State<'a>>,
96    notify: N,
97    // Whether or not the EVENT_IDX feature was negotiated. This is stored here as we need it to
98    // correctly determine when we should signal a notify to the driver.
99    feature_event_idx: bool,
100}
101
102impl<'a, N> Queue<'a, N> {
103    /// Constructs a new [`Queue`] from memory descriptions.
104    ///
105    /// Takes a [`QueueMemory`], which is just a list of memory regions, for which to create a
106    /// queue out of. This internally creates a [`Driver`](ring::Driver) and [`Device`]
107    /// (ring::Device) from the provided regions.
108    ///
109    /// The rings are assumed to not have yet been used and so the used and avail indices will
110    /// start at 0. There is presently no way to construct a [`Queue`] around rings that have
111    /// already been in use.
112    pub fn new(mem: QueueMemory<'a>, notify: N) -> Option<Self> {
113        Self::new_from_rings(
114            ring::Driver::new(mem.desc, mem.avail)?,
115            ring::Device::new(mem.used)?,
116            notify,
117        )
118    }
119
120    /// Construct a new [`Queue`] from provided rings.
121    ///
122    /// Consumes a [`Driver`](ring::Driver) and [`Device`](ring::Device) to create a `Queue`.
123    /// It is expected that [`new`](#new) will typically be more useful to automate the ring
124    /// construction.
125    ///
126    /// Has the same initial ring state assumptions as [`new`](#new).
127    pub fn new_from_rings(
128        driver: ring::Driver<'a>,
129        device: ring::Device<'a>,
130        notify: N,
131    ) -> Option<Self> {
132        if driver.queue_size() != device.queue_size() {
133            return None;
134        }
135        Some(Queue {
136            driver,
137            state: Mutex::new(State { device, next: 0, next_used: 0 }),
138            notify,
139            feature_event_idx: false,
140        })
141    }
142
143    fn take_avail(&self) -> Option<u16> {
144        let mut state = self.state.lock();
145        let ret = self.driver.get_avail(state.next);
146        if ret.is_some() {
147            state.next = state.next.wrapping_add(1);
148        }
149        ret
150    }
151}
152
153impl<'a, N: DriverNotify> Queue<'a, N> {
154    /// Return any available [`DescChain`].
155    ///
156    /// Polls the available ring for any queued descriptor chain, and if found returns a
157    /// [`DescChain`] abstraction around it.
158    ///
159    /// It is the responsibility of the device to know, presumably via the transport level queue
160    /// notifications, when a descriptor chain might be available and to call this polling function.
161    ///
162    /// The [`DescChain`] is automatically returned the driver, via the used ring, when it is
163    /// dropped.
164    pub fn next_chain<'b>(&'b self) -> Option<DescChain<'a, 'b, N>> {
165        if let Some(desc_index) = self.take_avail() {
166            Some(DescChain { queue: self, first_desc: desc_index })
167        } else {
168            None
169        }
170    }
171    fn return_chain(&self, used: ring::Used) {
172        let submitted = self.state.lock().return_chain(used);
173        // Must ensure the read of flags or used_event occurs *after* we have returned the chain
174        // and published the index. We also need to ensure that in the event we do send an
175        // interrupt that any state and idx updates have been written. In this case acquire/release
176        // is not sufficient since the 'acquire' will prevent future loads re-ordering earlier, and
177        // the release will prevent past writes from re-ordering later, but we need a past write and
178        // a future load to not be re-ordered. Therefore we require sequentially consistent
179        // semantics.
180        atomic::fence(atomic::Ordering::SeqCst);
181        if self.driver.needs_notification(self.feature_event_idx, submitted) {
182            self.notify.notify();
183        }
184    }
185}
186
187/// Descriptor type
188///
189/// May be an indirect descriptor type ( see virtio spec 2.7.5.3 ) or a regular aka
190///  direct descriptor type.
191/// Regular descriptor wraps up [`ring::DescAccess`]
192#[derive(Debug, Clone, PartialEq, Eq)]
193pub enum DescType {
194    Direct(ring::DescAccess),
195    Indirect,
196}
197/// Reference to descriptor data.
198///
199/// Provides a higher level representation of a descriptors payload, compared to what
200/// [`ring::Desc::data`] reports. The conversion of a [`ring::Desc`] into a `Desc` necessitates some
201/// error checking and can fail with a [`DescError::BadRange`].
202///
203/// Is provided as a [`DriverRange`] as the [`DescChain`] and its [iterator](DescChainIter) have no
204/// way to translate a [`DriverRange`] and this responsibility is offloaded to the caller.
205#[derive(Debug, Clone, PartialEq, Eq)]
206pub struct Desc(pub DescType, pub DriverRange);
207
208impl TryFrom<ring::Desc> for Desc {
209    type Error = DescError;
210
211    fn try_from(desc: ring::Desc) -> Result<Self, Self::Error> {
212        let range = desc.data();
213        TryInto::<DriverRange>::try_into(range)
214            .map_err(|()| DescError::BadRange(range.0, range.1))
215            .map(|range| {
216                if desc.is_indirect() {
217                    Desc(DescType::Indirect, range)
218                } else {
219                    Desc(DescType::Direct(desc.access_type()), range)
220                }
221            })
222    }
223}
224
225/// Errors that occur when walking descriptor chains via [`DescChainIter`].
226#[derive(Error, Debug, Clone, PartialEq, Eq)]
227pub enum DescError {
228    #[error("Descriptor {0} is not in the range of the ring")]
229    InvalidIndex(u16),
230    #[error("Descriptor data range addr: {0} len: {1} is not a valid driver range")]
231    BadRange(u64, u32),
232}
233
234/// Iterates over a [`DescChain`].
235///
236/// The iterator provides a [`Desc`] representing each [virtio descriptor](ring::Desc) in the chain.
237/// Walking this chain may generate errors due to a faulty or malicious guest providing corrupt
238/// descriptors.
239///
240/// Only the most minimal validation is done to yield valid [`Desc`], with no virtio protocol
241/// validation being performed. In particular, although the virtio specification says that all
242/// readable descriptors must appear before writable ones, this is not enforced or checked for by
243/// this iterator.
244///
245/// A lifetime is associated with the underlying [`Queue`], but not the [`DescChain`] this is
246/// iterating. This makes it possible, albeit not advised, to hold an iterator after having returned
247/// a chain to the guest. Doing so will almost certainly result in violating the virtio protocol and
248/// will confuse the guest, but there are no safety concerns. Restricting the iterator to the
249/// lifetime of the chain makes them cumbersome and you should almost always be using the
250/// abstractions provided by the [`chain`](crate::chain) module instead of these iterators directly.
251pub struct DescChainIter<'a, 'b, N: DriverNotify> {
252    queue: &'b Queue<'a, N>,
253    desc: Option<u16>,
254}
255
256impl<'a, 'b, N: DriverNotify> DescChainIter<'a, 'b, N> {
257    /// Cause the iterator to complete.
258    ///
259    /// Places the iterator in a state where it will always produce None.
260    pub fn complete(&mut self) {
261        self.desc = None;
262    }
263}
264
265impl<'a, 'b, N: DriverNotify> Iterator for DescChainIter<'a, 'b, N> {
266    type Item = Result<Desc, DescError>;
267    fn next(&mut self) -> Option<Self::Item> {
268        self.desc.map(|ret| {
269            match self.queue.driver.get_desc(ret.into()).ok_or(DescError::InvalidIndex(ret)) {
270                Ok(desc) => {
271                    // If we were able to lookup the descriptor then we can always retrieve the
272                    // next one, even if this one reports a bad range.
273                    self.desc = desc.next();
274                    desc.try_into()
275                }
276                Err(e) => {
277                    // Not able to find the next descriptor, so we must terminate the iteration
278                    // after reporting this error.
279                    self.desc = None;
280                    Err(e)
281                }
282            }
283        })
284    }
285}
286
287impl<'a, 'b, N: DriverNotify> Clone for DescChainIter<'a, 'b, N> {
288    fn clone(&self) -> DescChainIter<'a, 'b, N> {
289        DescChainIter { queue: self.queue, desc: self.desc }
290    }
291}
292
293/// Represents a chain of descriptors in the available ring.
294///
295/// The `DescChain` is a thin representation over a virtio descriptor chain. It can either be walked
296/// using its [iterator](#iter), yielding the readable and writable portions, or it can be returned
297/// to the ring.
298///
299/// Although returning happens automatically when dropped, if data was written into the descriptors
300/// the chain needs to be explicitly returned with [`set_written`](#set_written) to propagate the
301/// portion that was written.
302pub struct DescChain<'a, 'b, N: DriverNotify> {
303    queue: &'b Queue<'a, N>,
304    first_desc: u16,
305}
306
307impl<'a, 'b, N: DriverNotify> DescChain<'a, 'b, N> {
308    /// Iterate over the descriptor chain
309    ///
310    /// See [`DescChainIter`].
311    pub fn iter(&self) -> DescChainIter<'a, 'b, N> {
312        DescChainIter { queue: self.queue, desc: Some(self.first_desc) }
313    }
314
315    /// Explicitly return a written to chain.
316    ///
317    /// Returns the chain to the used ring, as if the chain was dropped, but also forwards how much
318    /// of the chain was written to. No validation or manipulation is performed on written amount
319    /// and it is faithfully passed through. In particular you can claim to have written more bytes
320    /// than were made available for writing by the chain.
321    pub fn return_written(self, written: u32) {
322        self.queue.return_chain(ring::Used::new(self.first_desc, written));
323        // Don't call drop so that we avoid returning the chain a second time.
324        std::mem::forget(self)
325    }
326}
327impl<'a, 'b, N: DriverNotify> Drop for DescChain<'a, 'b, N> {
328    fn drop(&mut self) {
329        // By default return the chain with a write of 0, since as far as we know nothing was
330        // written.
331        self.queue.return_chain(ring::Used::new(self.first_desc, 0));
332    }
333}
334
335#[cfg(test)]
336mod tests {
337    use super::*;
338    use crate::fake_queue::{Chain, ChainBuilder, IdentityDriverMem, TestQueue};
339    use crate::ring::DescAccess;
340    use crate::util::NotificationCounter;
341
342    #[test]
343    fn test_create() {
344        let driver_mem = IdentityDriverMem::new();
345        // Should fail for non pow-2 ring
346        let mem = driver_mem.alloc_queue_memory(3).unwrap();
347        let notify = NotificationCounter::new();
348        assert!(Queue::new(mem, notify.clone()).is_none());
349        // Also fail for not same sized rings.
350        let mem = driver_mem.alloc_queue_memory(4).unwrap();
351        let mem2 = driver_mem.alloc_queue_memory(8).unwrap();
352        assert!(Queue::new(
353            QueueMemory { desc: mem.desc, avail: mem.avail, used: mem2.used },
354            notify.clone(),
355        )
356        .is_none());
357        // Correctly sized rings should work
358        let mem = driver_mem.alloc_queue_memory(4).unwrap();
359        assert!(Queue::new(mem, notify.clone()).is_some());
360    }
361
362    #[test]
363    fn test_notify_and_return() {
364        let driver_mem = IdentityDriverMem::new();
365        let mut state = TestQueue::new(32, &driver_mem);
366        // Should be nothing notified or queued yet.
367        assert_eq!(state.notify.get(), 0);
368        assert!(state.queue.next_chain().is_none());
369        // Publish a chain
370        assert!(state.fake_queue.publish(Chain::with_lengths(&[32], &[], &driver_mem)).is_some());
371        let chain = state.queue.next_chain().unwrap();
372        assert_eq!(state.notify.get(), 0);
373        // If we drop the chain it should get returned and trigger a notification.
374        std::mem::drop(chain);
375        assert_eq!(state.notify.get(), 1);
376        // And there should be something on the driver side.
377        assert!(state.fake_queue.next_used().is_some());
378        // Should also be able to explicitly return a written amount and see it in the driver.
379        assert!(state.fake_queue.publish(Chain::with_lengths(&[], &[32], &driver_mem)).is_some());
380        let chain = state.queue.next_chain().unwrap();
381        chain.return_written(16);
382        let used = state.fake_queue.next_used().unwrap();
383        assert_eq!(used.written(), 16);
384    }
385
386    #[test]
387    fn test_good_chain_iter() {
388        let driver_mem = IdentityDriverMem::new();
389        let mut state = TestQueue::new(32, &driver_mem);
390        // Build and insert a variety of chains.
391        let chains: [&[(DescAccess, u64, u32)]; 4] = [
392            &[(DescAccess::DeviceRead, 100, 42)],
393            &[(DescAccess::DeviceWrite, 200, 64)],
394            &[(DescAccess::DeviceRead, 1000, 20), (DescAccess::DeviceRead, 300, 40)],
395            &[
396                (DescAccess::DeviceRead, 4000, 40),
397                (DescAccess::DeviceWrite, 400, 64),
398                (DescAccess::DeviceWrite, 8000, 80),
399            ],
400        ];
401        for chain in chains {
402            assert!(state.fake_queue.publish(Chain::with_exact_data(chain)).is_some());
403        }
404        // Now read them all out and walk the iterators to ensure a match.
405        for chain in chains {
406            assert!(state
407                .queue
408                .next_chain()
409                .unwrap()
410                .iter()
411                .map(|desc| match desc {
412                    Ok(Desc(DescType::Direct(access), range)) =>
413                        (access, range.0.start as u64, range.0.len() as u32),
414                    Ok(Desc(DescType::Indirect, _)) => (DescAccess::DeviceRead, 0, 0),
415                    Err(_) => (DescAccess::DeviceRead, 0, 0),
416                })
417                .eq(chain.iter().cloned()));
418        }
419    }
420
421    #[test]
422    fn test_bad_range_iter() {
423        let driver_mem = IdentityDriverMem::new();
424        let mut state = TestQueue::new(32, &driver_mem);
425        // Build a chain with some invalid ranges, we should still be able to iterate it.
426        assert!(state
427            .fake_queue
428            .publish(Chain::with_exact_data(&[
429                (DescAccess::DeviceRead, 100, 42),
430                (DescAccess::DeviceRead, u64::MAX - 10, 20),
431                (DescAccess::DeviceRead, u64::MAX - 20, 5)
432            ]))
433            .is_some());
434        let chain = state.queue.next_chain().unwrap();
435        let mut iter = chain.iter();
436        assert_eq!(
437            iter.next().unwrap(),
438            Ok(Desc(DescType::Direct(DescAccess::DeviceRead), (100, 42).try_into().unwrap()))
439        );
440        assert_eq!(iter.next().unwrap(), Err(DescError::BadRange(u64::MAX - 10, 20)));
441        assert_eq!(
442            iter.next().unwrap(),
443            Ok(Desc(
444                DescType::Direct(DescAccess::DeviceRead),
445                (u64::MAX - 20, 5).try_into().unwrap()
446            ))
447        );
448        assert_eq!(iter.next(), None);
449    }
450
451    #[test]
452    fn test_bad_index_iter() {
453        let driver_mem = IdentityDriverMem::new();
454        let mut state = TestQueue::new(32, &driver_mem);
455        // Build a chain with an invalid descriptor index in the middle.
456        let chain = ChainBuilder::new()
457            .readable_reference(100, 42)
458            .amend_next(33)
459            .readable_zeroed(30, &driver_mem)
460            .build();
461        assert!(state.fake_queue.publish(chain).is_some());
462        let chain = state.queue.next_chain().unwrap();
463        let mut iter = chain.iter();
464        assert_eq!(
465            iter.next().unwrap(),
466            Ok(Desc(DescType::Direct(DescAccess::DeviceRead), (100, 42).try_into().unwrap()))
467        );
468        assert_eq!(iter.next().unwrap(), Err(DescError::InvalidIndex(33)));
469        assert_eq!(iter.next(), None);
470    }
471}