virtio_device/queue.rs
1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! Virtqueue management wrappers.
6//!
7//! This is a slightly opinionated wrapper that presents the underlying [`Device`](ring::Device) and
8//! [`Driver`](ring::Driver) rings as a single 'virtqueue' where descriptor chains can be retrieved,
9//! walked or iterated, and then returned.
10//!
11//! The primary opinionated decision taken by this wrapper is that a descriptor chain is considered
12//! a formal object that can automatically returns itself to the queue when dropped. This return is
13//! then required to have a mechanism to potentially signal the guest driver, via the
14//! [`DriverNotify`] trait.
15
16use crate::mem::{DeviceRange, DriverRange};
17use crate::ring;
18use fuchsia_sync::Mutex;
19use std::sync::atomic;
20use thiserror::Error;
21
22/// Informs device that driver needs a notification.
23///
24/// When returning descriptor chains to the queue it may be required, due to the virtio
25/// specification, that the driver is supposed to be notified. As descriptor chains can be returned
26/// out-of-band during a `drop`, with no opportunity to report via a return code that a notification
27/// is needed, this trait exists instead. Further, since the queue has no understanding of the
28/// transport protocol, or how the guest should be notified in general, it must appeal to the higher
29/// level device code using the [`Queue`].
30pub trait DriverNotify {
31 /// Driver requires a notification.
32 ///
33 /// Indicates the device must notify the driver for correct continuation of the virtio protocol.
34 /// The notification does not need to happen synchronously during this call, it can be stored
35 /// and performed at some later point, but the driver may not make progress until it is
36 /// notified.
37 fn notify(&self);
38}
39
40/// Mutable state of a virtqueue.
41///
42/// Includes both the reference to the [`Device`](ring::Device), which is the memory shared with the
43/// guest that we actually need to manipulate, as well as additional state needed for us to
44/// correctly implement the virtio protocol. Captured here in a separate struct so that it can be
45/// wrapped in a [`Mutex`] in the [`Queue`].
46struct State<'a> {
47 device: ring::Device<'a>,
48 // Next index in avail that we expect to be come available
49 next: u16,
50 // Next index in used that we will publish at.
51 next_used: u16,
52}
53
54impl<'a> State<'a> {
55 /// Return a descriptor chain.
56 ///
57 /// `written` is the number of bytes that were written by the device to the beginning of the
58 /// buffer.
59 ///
60 /// Returns to the driver, by writing it to the device owned ring, and returns the index that
61 /// it was published at. This index is intended to be used when determining whether the driver
62 /// needs a notification.
63 fn return_chain(&mut self, used: ring::Used) -> u16 {
64 let submit = self.next_used;
65 self.device.insert_used(used, submit);
66 self.next_used = submit.wrapping_add(1);
67 self.device.publish_used(self.next_used);
68 // Return the index that we just published.
69 submit
70 }
71}
72
73/// Describes the memory ranges for a queue.
74///
75/// Collects the three different memory ranges that combined make up the [`Driver`](ring::Driver)
76/// and [`Device`](ring::Device) portions of a queue. This exists as a way to conveniently name the
77/// members for passing to [`Queue::new`]
78#[derive(Debug, Clone)]
79pub struct QueueMemory<'a> {
80 pub desc: DeviceRange<'a>,
81 pub avail: DeviceRange<'a>,
82 pub used: DeviceRange<'a>,
83}
84
85/// Representation of a virtqueue.
86///
87/// Aside from construction of the queue the only provided method is to retrieve the [`next_chain`]
88/// (Queue::next_chain) if one has been published by the driver. The [`DescChain`], if one is
89/// returned, implements a custom `drop` to return the chain to the queue, and hence to the driver.
90// Currently event_idx feature is hard set to false. Support exists though for the device
91// requirements of handling suppression from the driver, however no interface is yet exposed here
92// for the device to tell the driver any event requirements.
93pub struct Queue<'a, N> {
94 driver: ring::Driver<'a>,
95 state: Mutex<State<'a>>,
96 notify: N,
97 // Whether or not the EVENT_IDX feature was negotiated. This is stored here as we need it to
98 // correctly determine when we should signal a notify to the driver.
99 feature_event_idx: bool,
100}
101
102impl<'a, N> Queue<'a, N> {
103 /// Constructs a new [`Queue`] from memory descriptions.
104 ///
105 /// Takes a [`QueueMemory`], which is just a list of memory regions, for which to create a
106 /// queue out of. This internally creates a [`Driver`](ring::Driver) and [`Device`]
107 /// (ring::Device) from the provided regions.
108 ///
109 /// The rings are assumed to not have yet been used and so the used and avail indices will
110 /// start at 0. There is presently no way to construct a [`Queue`] around rings that have
111 /// already been in use.
112 pub fn new(mem: QueueMemory<'a>, notify: N) -> Option<Self> {
113 Self::new_from_rings(
114 ring::Driver::new(mem.desc, mem.avail)?,
115 ring::Device::new(mem.used)?,
116 notify,
117 )
118 }
119
120 /// Construct a new [`Queue`] from provided rings.
121 ///
122 /// Consumes a [`Driver`](ring::Driver) and [`Device`](ring::Device) to create a `Queue`.
123 /// It is expected that [`new`](#new) will typically be more useful to automate the ring
124 /// construction.
125 ///
126 /// Has the same initial ring state assumptions as [`new`](#new).
127 pub fn new_from_rings(
128 driver: ring::Driver<'a>,
129 device: ring::Device<'a>,
130 notify: N,
131 ) -> Option<Self> {
132 if driver.queue_size() != device.queue_size() {
133 return None;
134 }
135 Some(Queue {
136 driver,
137 state: Mutex::new(State { device, next: 0, next_used: 0 }),
138 notify,
139 feature_event_idx: false,
140 })
141 }
142
143 fn take_avail(&self) -> Option<u16> {
144 let mut state = self.state.lock();
145 let ret = self.driver.get_avail(state.next);
146 if ret.is_some() {
147 state.next = state.next.wrapping_add(1);
148 }
149 ret
150 }
151}
152
153impl<'a, N: DriverNotify> Queue<'a, N> {
154 /// Return any available [`DescChain`].
155 ///
156 /// Polls the available ring for any queued descriptor chain, and if found returns a
157 /// [`DescChain`] abstraction around it.
158 ///
159 /// It is the responsibility of the device to know, presumably via the transport level queue
160 /// notifications, when a descriptor chain might be available and to call this polling function.
161 ///
162 /// The [`DescChain`] is automatically returned the driver, via the used ring, when it is
163 /// dropped.
164 pub fn next_chain<'b>(&'b self) -> Option<DescChain<'a, 'b, N>> {
165 if let Some(desc_index) = self.take_avail() {
166 Some(DescChain { queue: self, first_desc: desc_index })
167 } else {
168 None
169 }
170 }
171 fn return_chain(&self, used: ring::Used) {
172 let submitted = self.state.lock().return_chain(used);
173 // Must ensure the read of flags or used_event occurs *after* we have returned the chain
174 // and published the index. We also need to ensure that in the event we do send an
175 // interrupt that any state and idx updates have been written. In this case acquire/release
176 // is not sufficient since the 'acquire' will prevent future loads re-ordering earlier, and
177 // the release will prevent past writes from re-ordering later, but we need a past write and
178 // a future load to not be re-ordered. Therefore we require sequentially consistent
179 // semantics.
180 atomic::fence(atomic::Ordering::SeqCst);
181 if self.driver.needs_notification(self.feature_event_idx, submitted) {
182 self.notify.notify();
183 }
184 }
185}
186
187/// Descriptor type
188///
189/// May be an indirect descriptor type ( see virtio spec 2.7.5.3 ) or a regular aka
190/// direct descriptor type.
191/// Regular descriptor wraps up [`ring::DescAccess`]
192#[derive(Debug, Clone, PartialEq, Eq)]
193pub enum DescType {
194 Direct(ring::DescAccess),
195 Indirect,
196}
197/// Reference to descriptor data.
198///
199/// Provides a higher level representation of a descriptors payload, compared to what
200/// [`ring::Desc::data`] reports. The conversion of a [`ring::Desc`] into a `Desc` necessitates some
201/// error checking and can fail with a [`DescError::BadRange`].
202///
203/// Is provided as a [`DriverRange`] as the [`DescChain`] and its [iterator](DescChainIter) have no
204/// way to translate a [`DriverRange`] and this responsibility is offloaded to the caller.
205#[derive(Debug, Clone, PartialEq, Eq)]
206pub struct Desc(pub DescType, pub DriverRange);
207
208impl TryFrom<ring::Desc> for Desc {
209 type Error = DescError;
210
211 fn try_from(desc: ring::Desc) -> Result<Self, Self::Error> {
212 let range = desc.data();
213 TryInto::<DriverRange>::try_into(range)
214 .map_err(|()| DescError::BadRange(range.0, range.1))
215 .map(|range| {
216 if desc.is_indirect() {
217 Desc(DescType::Indirect, range)
218 } else {
219 Desc(DescType::Direct(desc.access_type()), range)
220 }
221 })
222 }
223}
224
225/// Errors that occur when walking descriptor chains via [`DescChainIter`].
226#[derive(Error, Debug, Clone, PartialEq, Eq)]
227pub enum DescError {
228 #[error("Descriptor {0} is not in the range of the ring")]
229 InvalidIndex(u16),
230 #[error("Descriptor data range addr: {0} len: {1} is not a valid driver range")]
231 BadRange(u64, u32),
232}
233
234/// Iterates over a [`DescChain`].
235///
236/// The iterator provides a [`Desc`] representing each [virtio descriptor](ring::Desc) in the chain.
237/// Walking this chain may generate errors due to a faulty or malicious guest providing corrupt
238/// descriptors.
239///
240/// Only the most minimal validation is done to yield valid [`Desc`], with no virtio protocol
241/// validation being performed. In particular, although the virtio specification says that all
242/// readable descriptors must appear before writable ones, this is not enforced or checked for by
243/// this iterator.
244///
245/// A lifetime is associated with the underlying [`Queue`], but not the [`DescChain`] this is
246/// iterating. This makes it possible, albeit not advised, to hold an iterator after having returned
247/// a chain to the guest. Doing so will almost certainly result in violating the virtio protocol and
248/// will confuse the guest, but there are no safety concerns. Restricting the iterator to the
249/// lifetime of the chain makes them cumbersome and you should almost always be using the
250/// abstractions provided by the [`chain`](crate::chain) module instead of these iterators directly.
251pub struct DescChainIter<'a, 'b, N: DriverNotify> {
252 queue: &'b Queue<'a, N>,
253 desc: Option<u16>,
254}
255
256impl<'a, 'b, N: DriverNotify> DescChainIter<'a, 'b, N> {
257 /// Cause the iterator to complete.
258 ///
259 /// Places the iterator in a state where it will always produce None.
260 pub fn complete(&mut self) {
261 self.desc = None;
262 }
263}
264
265impl<'a, 'b, N: DriverNotify> Iterator for DescChainIter<'a, 'b, N> {
266 type Item = Result<Desc, DescError>;
267 fn next(&mut self) -> Option<Self::Item> {
268 self.desc.map(|ret| {
269 match self.queue.driver.get_desc(ret.into()).ok_or(DescError::InvalidIndex(ret)) {
270 Ok(desc) => {
271 // If we were able to lookup the descriptor then we can always retrieve the
272 // next one, even if this one reports a bad range.
273 self.desc = desc.next();
274 desc.try_into()
275 }
276 Err(e) => {
277 // Not able to find the next descriptor, so we must terminate the iteration
278 // after reporting this error.
279 self.desc = None;
280 Err(e)
281 }
282 }
283 })
284 }
285}
286
287impl<'a, 'b, N: DriverNotify> Clone for DescChainIter<'a, 'b, N> {
288 fn clone(&self) -> DescChainIter<'a, 'b, N> {
289 DescChainIter { queue: self.queue, desc: self.desc }
290 }
291}
292
293/// Represents a chain of descriptors in the available ring.
294///
295/// The `DescChain` is a thin representation over a virtio descriptor chain. It can either be walked
296/// using its [iterator](#iter), yielding the readable and writable portions, or it can be returned
297/// to the ring.
298///
299/// Although returning happens automatically when dropped, if data was written into the descriptors
300/// the chain needs to be explicitly returned with [`set_written`](#set_written) to propagate the
301/// portion that was written.
302pub struct DescChain<'a, 'b, N: DriverNotify> {
303 queue: &'b Queue<'a, N>,
304 first_desc: u16,
305}
306
307impl<'a, 'b, N: DriverNotify> DescChain<'a, 'b, N> {
308 /// Iterate over the descriptor chain
309 ///
310 /// See [`DescChainIter`].
311 pub fn iter(&self) -> DescChainIter<'a, 'b, N> {
312 DescChainIter { queue: self.queue, desc: Some(self.first_desc) }
313 }
314
315 /// Explicitly return a written to chain.
316 ///
317 /// Returns the chain to the used ring, as if the chain was dropped, but also forwards how much
318 /// of the chain was written to. No validation or manipulation is performed on written amount
319 /// and it is faithfully passed through. In particular you can claim to have written more bytes
320 /// than were made available for writing by the chain.
321 pub fn return_written(self, written: u32) {
322 self.queue.return_chain(ring::Used::new(self.first_desc, written));
323 // Don't call drop so that we avoid returning the chain a second time.
324 std::mem::forget(self)
325 }
326}
327impl<'a, 'b, N: DriverNotify> Drop for DescChain<'a, 'b, N> {
328 fn drop(&mut self) {
329 // By default return the chain with a write of 0, since as far as we know nothing was
330 // written.
331 self.queue.return_chain(ring::Used::new(self.first_desc, 0));
332 }
333}
334
335#[cfg(test)]
336mod tests {
337 use super::*;
338 use crate::fake_queue::{Chain, ChainBuilder, IdentityDriverMem, TestQueue};
339 use crate::ring::DescAccess;
340 use crate::util::NotificationCounter;
341
342 #[test]
343 fn test_create() {
344 let driver_mem = IdentityDriverMem::new();
345 // Should fail for non pow-2 ring
346 let mem = driver_mem.alloc_queue_memory(3).unwrap();
347 let notify = NotificationCounter::new();
348 assert!(Queue::new(mem, notify.clone()).is_none());
349 // Also fail for not same sized rings.
350 let mem = driver_mem.alloc_queue_memory(4).unwrap();
351 let mem2 = driver_mem.alloc_queue_memory(8).unwrap();
352 assert!(Queue::new(
353 QueueMemory { desc: mem.desc, avail: mem.avail, used: mem2.used },
354 notify.clone(),
355 )
356 .is_none());
357 // Correctly sized rings should work
358 let mem = driver_mem.alloc_queue_memory(4).unwrap();
359 assert!(Queue::new(mem, notify.clone()).is_some());
360 }
361
362 #[test]
363 fn test_notify_and_return() {
364 let driver_mem = IdentityDriverMem::new();
365 let mut state = TestQueue::new(32, &driver_mem);
366 // Should be nothing notified or queued yet.
367 assert_eq!(state.notify.get(), 0);
368 assert!(state.queue.next_chain().is_none());
369 // Publish a chain
370 assert!(state.fake_queue.publish(Chain::with_lengths(&[32], &[], &driver_mem)).is_some());
371 let chain = state.queue.next_chain().unwrap();
372 assert_eq!(state.notify.get(), 0);
373 // If we drop the chain it should get returned and trigger a notification.
374 std::mem::drop(chain);
375 assert_eq!(state.notify.get(), 1);
376 // And there should be something on the driver side.
377 assert!(state.fake_queue.next_used().is_some());
378 // Should also be able to explicitly return a written amount and see it in the driver.
379 assert!(state.fake_queue.publish(Chain::with_lengths(&[], &[32], &driver_mem)).is_some());
380 let chain = state.queue.next_chain().unwrap();
381 chain.return_written(16);
382 let used = state.fake_queue.next_used().unwrap();
383 assert_eq!(used.written(), 16);
384 }
385
386 #[test]
387 fn test_good_chain_iter() {
388 let driver_mem = IdentityDriverMem::new();
389 let mut state = TestQueue::new(32, &driver_mem);
390 // Build and insert a variety of chains.
391 let chains: [&[(DescAccess, u64, u32)]; 4] = [
392 &[(DescAccess::DeviceRead, 100, 42)],
393 &[(DescAccess::DeviceWrite, 200, 64)],
394 &[(DescAccess::DeviceRead, 1000, 20), (DescAccess::DeviceRead, 300, 40)],
395 &[
396 (DescAccess::DeviceRead, 4000, 40),
397 (DescAccess::DeviceWrite, 400, 64),
398 (DescAccess::DeviceWrite, 8000, 80),
399 ],
400 ];
401 for chain in chains {
402 assert!(state.fake_queue.publish(Chain::with_exact_data(chain)).is_some());
403 }
404 // Now read them all out and walk the iterators to ensure a match.
405 for chain in chains {
406 assert!(state
407 .queue
408 .next_chain()
409 .unwrap()
410 .iter()
411 .map(|desc| match desc {
412 Ok(Desc(DescType::Direct(access), range)) =>
413 (access, range.0.start as u64, range.0.len() as u32),
414 Ok(Desc(DescType::Indirect, _)) => (DescAccess::DeviceRead, 0, 0),
415 Err(_) => (DescAccess::DeviceRead, 0, 0),
416 })
417 .eq(chain.iter().cloned()));
418 }
419 }
420
421 #[test]
422 fn test_bad_range_iter() {
423 let driver_mem = IdentityDriverMem::new();
424 let mut state = TestQueue::new(32, &driver_mem);
425 // Build a chain with some invalid ranges, we should still be able to iterate it.
426 assert!(state
427 .fake_queue
428 .publish(Chain::with_exact_data(&[
429 (DescAccess::DeviceRead, 100, 42),
430 (DescAccess::DeviceRead, u64::MAX - 10, 20),
431 (DescAccess::DeviceRead, u64::MAX - 20, 5)
432 ]))
433 .is_some());
434 let chain = state.queue.next_chain().unwrap();
435 let mut iter = chain.iter();
436 assert_eq!(
437 iter.next().unwrap(),
438 Ok(Desc(DescType::Direct(DescAccess::DeviceRead), (100, 42).try_into().unwrap()))
439 );
440 assert_eq!(iter.next().unwrap(), Err(DescError::BadRange(u64::MAX - 10, 20)));
441 assert_eq!(
442 iter.next().unwrap(),
443 Ok(Desc(
444 DescType::Direct(DescAccess::DeviceRead),
445 (u64::MAX - 20, 5).try_into().unwrap()
446 ))
447 );
448 assert_eq!(iter.next(), None);
449 }
450
451 #[test]
452 fn test_bad_index_iter() {
453 let driver_mem = IdentityDriverMem::new();
454 let mut state = TestQueue::new(32, &driver_mem);
455 // Build a chain with an invalid descriptor index in the middle.
456 let chain = ChainBuilder::new()
457 .readable_reference(100, 42)
458 .amend_next(33)
459 .readable_zeroed(30, &driver_mem)
460 .build();
461 assert!(state.fake_queue.publish(chain).is_some());
462 let chain = state.queue.next_chain().unwrap();
463 let mut iter = chain.iter();
464 assert_eq!(
465 iter.next().unwrap(),
466 Ok(Desc(DescType::Direct(DescAccess::DeviceRead), (100, 42).try_into().unwrap()))
467 );
468 assert_eq!(iter.next().unwrap(), Err(DescError::InvalidIndex(33)));
469 assert_eq!(iter.next(), None);
470 }
471}