vmm/devices/virtio/
queue.rs

1// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3//
4// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
5// Use of this source code is governed by a BSD-style license that can be
6// found in the THIRD-PARTY file.
7
8use std::num::Wrapping;
9use std::sync::atomic::{Ordering, fence};
10
11use crate::logger::{error, warn};
12use crate::utils::u64_to_usize;
13use crate::vstate::memory::{Bitmap, ByteValued, GuestAddress, GuestMemory};
14
15pub const VIRTQ_DESC_F_NEXT: u16 = 0x1;
16pub const VIRTQ_DESC_F_WRITE: u16 = 0x2;
17
18/// Max size of virtio queues offered by firecracker's virtio devices.
19pub(super) const FIRECRACKER_MAX_QUEUE_SIZE: u16 = 256;
20
21// GuestMemoryMmap::read_obj_from_addr() will be used to fetch the descriptor,
22// which has an explicit constraint that the entire descriptor doesn't
23// cross the page boundary. Otherwise the descriptor may be split into
24// two mmap regions which causes failure of GuestMemoryMmap::read_obj_from_addr().
25//
26// The Virtio Spec 1.0 defines the alignment of VirtIO descriptor is 16 bytes,
27// which fulfills the explicit constraint of GuestMemoryMmap::read_obj_from_addr().
28
29#[derive(Debug, thiserror::Error, displaydoc::Display)]
30pub enum QueueError {
31    /// Descriptor index out of bounds: {0}.
32    DescIndexOutOfBounds(u16),
33    /// Failed to write value into the virtio queue used ring: {0}
34    MemoryError(#[from] vm_memory::GuestMemoryError),
35    /// Pointer is not aligned properly: {0:#x} not {1}-byte aligned.
36    PointerNotAligned(usize, usize),
37    /// Attempt to use virtio queue that is not marked ready
38    NotReady,
39    /// Virtio queue with invalid size: {0}
40    InvalidSize(u16),
41}
42
43/// Error type indicating the guest configured a virtio queue such that the avail_idx field would
44/// indicate there are more descriptors to process than the queue actually has space for.
45///
46/// Should this error bubble up to the event loop, we exit Firecracker, since this could be a
47/// potential malicious driver scenario. This way we also eliminate the risk of repeatedly
48/// logging and potentially clogging the microVM through the log system.
49#[derive(Debug, thiserror::Error, PartialEq, Eq)]
50#[error(
51    "The number of available virtio descriptors {reported_len} is greater than queue size: \
52     {queue_size}!"
53)]
54pub struct InvalidAvailIdx {
55    queue_size: u16,
56    reported_len: u16,
57}
58
59/// A virtio descriptor constraints with C representative.
60/// Taken from Virtio spec:
61/// https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-430008
62/// 2.6.5 The Virtqueue Descriptor Table
63#[repr(C)]
64#[derive(Debug, Default, Clone, Copy)]
65pub struct Descriptor {
66    pub addr: u64,
67    pub len: u32,
68    pub flags: u16,
69    pub next: u16,
70}
71
72// SAFETY: `Descriptor` is a POD and contains no padding.
73unsafe impl ByteValued for Descriptor {}
74
75/// A virtio used element in the used ring.
76/// Taken from Virtio spec:
77/// https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-430008
78/// 2.6.8 The Virtqueue Used Ring
79#[repr(C)]
80#[derive(Debug, Default, Clone, Copy)]
81pub struct UsedElement {
82    pub id: u32,
83    pub len: u32,
84}
85
86// SAFETY: `UsedElement` is a POD and contains no padding.
87unsafe impl ByteValued for UsedElement {}
88
89/// A virtio descriptor chain.
90#[derive(Debug, Copy, Clone)]
91pub struct DescriptorChain {
92    desc_table_ptr: *const Descriptor,
93
94    queue_size: u16,
95    ttl: u16, // used to prevent infinite chain cycles
96
97    /// Index into the descriptor table
98    pub index: u16,
99
100    /// Guest physical address of device specific data
101    pub addr: GuestAddress,
102
103    /// Length of device specific data
104    pub len: u32,
105
106    /// Includes next, write, and indirect bits
107    pub flags: u16,
108
109    /// Index into the descriptor table of the next descriptor if flags has
110    /// the next bit set
111    pub next: u16,
112}
113
114impl DescriptorChain {
115    /// Creates a new `DescriptorChain` from the given memory and descriptor table.
116    ///
117    /// Note that the desc_table and queue_size are assumed to be validated by the caller.
118    fn checked_new(desc_table_ptr: *const Descriptor, queue_size: u16, index: u16) -> Option<Self> {
119        if queue_size <= index {
120            return None;
121        }
122
123        // SAFETY:
124        // index is in 0..queue_size bounds
125        let desc = unsafe { desc_table_ptr.add(usize::from(index)).read_volatile() };
126        let chain = DescriptorChain {
127            desc_table_ptr,
128            queue_size,
129            ttl: queue_size,
130            index,
131            addr: GuestAddress(desc.addr),
132            len: desc.len,
133            flags: desc.flags,
134            next: desc.next,
135        };
136
137        if chain.is_valid() { Some(chain) } else { None }
138    }
139
140    fn is_valid(&self) -> bool {
141        !self.has_next() || self.next < self.queue_size
142    }
143
144    /// Gets if this descriptor chain has another descriptor chain linked after it.
145    pub fn has_next(&self) -> bool {
146        self.flags & VIRTQ_DESC_F_NEXT != 0 && self.ttl > 1
147    }
148
149    /// If the driver designated this as a write only descriptor.
150    ///
151    /// If this is false, this descriptor is read only.
152    /// Write only means the emulated device can write and the driver can read.
153    pub fn is_write_only(&self) -> bool {
154        self.flags & VIRTQ_DESC_F_WRITE != 0
155    }
156
157    /// Gets the next descriptor in this descriptor chain, if there is one.
158    ///
159    /// Note that this is distinct from the next descriptor chain returned by `AvailIter`, which is
160    /// the head of the next _available_ descriptor chain.
161    pub fn next_descriptor(&self) -> Option<Self> {
162        if self.has_next() {
163            DescriptorChain::checked_new(self.desc_table_ptr, self.queue_size, self.next).map(
164                |mut c| {
165                    c.ttl = self.ttl - 1;
166                    c
167                },
168            )
169        } else {
170            None
171        }
172    }
173}
174
175#[derive(Debug)]
176pub struct DescriptorIterator(Option<DescriptorChain>);
177
178impl IntoIterator for DescriptorChain {
179    type Item = DescriptorChain;
180    type IntoIter = DescriptorIterator;
181
182    fn into_iter(self) -> Self::IntoIter {
183        DescriptorIterator(Some(self))
184    }
185}
186
187impl Iterator for DescriptorIterator {
188    type Item = DescriptorChain;
189
190    fn next(&mut self) -> Option<Self::Item> {
191        self.0.take().inspect(|desc| {
192            self.0 = desc.next_descriptor();
193        })
194    }
195}
196
197#[derive(Clone, Debug, PartialEq, Eq)]
198/// A virtio queue's parameters.
199pub struct Queue {
200    /// The maximal size in elements offered by the device
201    pub max_size: u16,
202
203    /// The queue size in elements the driver selected
204    pub size: u16,
205
206    /// Indicates if the queue is finished with configuration
207    pub ready: bool,
208
209    /// Guest physical address of the descriptor table
210    pub desc_table_address: GuestAddress,
211
212    /// Guest physical address of the available ring
213    pub avail_ring_address: GuestAddress,
214
215    /// Guest physical address of the used ring
216    pub used_ring_address: GuestAddress,
217
218    /// Host virtual address pointer to the descriptor table
219    /// in the guest memory .
220    /// Getting access to the underling
221    /// data structure should only occur after the
222    /// struct is initialized with `new`.
223    /// Representation of in memory struct layout.
224    /// struct DescriptorTable = [Descriptor; <queue_size>]
225    pub desc_table_ptr: *const Descriptor,
226
227    /// Host virtual address pointer to the available ring
228    /// in the guest memory .
229    /// Getting access to the underling
230    /// data structure should only occur after the
231    /// struct is initialized with `new`.
232    ///
233    /// Representation of in memory struct layout.
234    /// struct AvailRing {
235    ///     flags: u16,
236    ///     idx: u16,
237    ///     ring: [u16; <queue size>],
238    ///     used_event: u16,
239    /// }
240    ///
241    /// Because all types in the AvailRing are u16,
242    /// we store pointer as *mut u16 for simplicity.
243    pub avail_ring_ptr: *mut u16,
244
245    /// Host virtual address pointer to the used ring
246    /// in the guest memory .
247    /// Getting access to the underling
248    /// data structure should only occur after the
249    /// struct is initialized with `new`.
250    ///
251    /// Representation of in memory struct layout.
252    // struct UsedRing {
253    //     flags: u16,
254    //     idx: u16,
255    //     ring: [UsedElement; <queue size>],
256    //     avail_event: u16,
257    // }
258    /// Because types in the UsedRing are different (u16 and u32)
259    /// store pointer as *mut u8.
260    pub used_ring_ptr: *mut u8,
261
262    pub next_avail: Wrapping<u16>,
263    pub next_used: Wrapping<u16>,
264
265    /// VIRTIO_F_RING_EVENT_IDX negotiated (notification suppression enabled)
266    pub uses_notif_suppression: bool,
267    /// The number of added used buffers since last guest kick
268    pub num_added: Wrapping<u16>,
269}
270
271/// SAFETY: Queue is Send, because we use volatile memory accesses when
272/// working with pointers. These pointers are not copied or store anywhere
273/// else. We assume guest will not give different queues  same guest memory
274/// addresses.
275unsafe impl Send for Queue {}
276
277#[allow(clippy::len_without_is_empty)]
278impl Queue {
279    /// Constructs an empty virtio queue with the given `max_size`.
280    pub fn new(max_size: u16) -> Queue {
281        Queue {
282            max_size,
283            size: max_size,
284            ready: false,
285            desc_table_address: GuestAddress(0),
286            avail_ring_address: GuestAddress(0),
287            used_ring_address: GuestAddress(0),
288
289            desc_table_ptr: std::ptr::null(),
290            avail_ring_ptr: std::ptr::null_mut(),
291            used_ring_ptr: std::ptr::null_mut(),
292
293            next_avail: Wrapping(0),
294            next_used: Wrapping(0),
295            uses_notif_suppression: false,
296            num_added: Wrapping(0),
297        }
298    }
299
300    fn desc_table_size(&self) -> usize {
301        std::mem::size_of::<Descriptor>() * usize::from(self.size)
302    }
303
304    fn avail_ring_size(&self) -> usize {
305        std::mem::size_of::<u16>()
306            + std::mem::size_of::<u16>()
307            + std::mem::size_of::<u16>() * usize::from(self.size)
308            + std::mem::size_of::<u16>()
309    }
310
311    fn used_ring_size(&self) -> usize {
312        std::mem::size_of::<u16>()
313            + std::mem::size_of::<u16>()
314            + std::mem::size_of::<UsedElement>() * usize::from(self.size)
315            + std::mem::size_of::<u16>()
316    }
317
318    fn get_aligned_slice_ptr<T, M: GuestMemory>(
319        &self,
320        mem: &M,
321        addr: GuestAddress,
322        len: usize,
323        alignment: usize,
324    ) -> Result<*mut T, QueueError> {
325        // Guest memory base address is page aligned, so as long as alignment divides page size,
326        // It suffices to check that the GPA is properly aligned (e.g. we don't need to recheck
327        // the HVA).
328        if addr.0 & (alignment as u64 - 1) != 0 {
329            return Err(QueueError::PointerNotAligned(
330                u64_to_usize(addr.0),
331                alignment,
332            ));
333        }
334
335        let slice = mem.get_slice(addr, len).map_err(QueueError::MemoryError)?;
336        slice.bitmap().mark_dirty(0, len);
337        Ok(slice.ptr_guard_mut().as_ptr().cast())
338    }
339
340    /// Set up pointers to the queue objects in the guest memory
341    /// and mark memory dirty for those objects
342    pub fn initialize<M: GuestMemory>(&mut self, mem: &M) -> Result<(), QueueError> {
343        if !self.ready {
344            return Err(QueueError::NotReady);
345        }
346
347        if self.size > self.max_size || self.size == 0 || (self.size & (self.size - 1)) != 0 {
348            return Err(QueueError::InvalidSize(self.size));
349        }
350
351        // All the below pointers are verified to be aligned properly; otherwise some methods (e.g.
352        // `read_volatile()`) will panic. Such an unalignment is possible when restored from a
353        // broken/fuzzed snapshot.
354        //
355        // Specification of those pointers' alignments
356        // https://docs.oasis-open.org/virtio/virtio/v1.2/csd01/virtio-v1.2-csd01.html#x1-350007
357        // > ================ ==========
358        // > Virtqueue Part    Alignment
359        // > ================ ==========
360        // > Descriptor Table 16
361        // > Available Ring   2
362        // > Used Ring        4
363        // > ================ ==========
364        self.desc_table_ptr =
365            self.get_aligned_slice_ptr(mem, self.desc_table_address, self.desc_table_size(), 16)?;
366        self.avail_ring_ptr =
367            self.get_aligned_slice_ptr(mem, self.avail_ring_address, self.avail_ring_size(), 2)?;
368        self.used_ring_ptr =
369            self.get_aligned_slice_ptr(mem, self.used_ring_address, self.used_ring_size(), 4)?;
370
371        Ok(())
372    }
373
374    /// Get AvailRing.idx
375    #[inline(always)]
376    pub fn avail_ring_idx_get(&self) -> u16 {
377        // SAFETY: `idx` is 1 u16 away from the start
378        unsafe { self.avail_ring_ptr.add(1).read_volatile() }
379    }
380
381    /// Get element from AvailRing.ring at index
382    /// # Safety
383    /// The `index` parameter should be in 0..queue_size bounds
384    #[inline(always)]
385    unsafe fn avail_ring_ring_get(&self, index: usize) -> u16 {
386        // SAFETY: `ring` is 2 u16 away from the start
387        unsafe { self.avail_ring_ptr.add(2).add(index).read_volatile() }
388    }
389
390    /// Get AvailRing.used_event
391    #[inline(always)]
392    pub fn avail_ring_used_event_get(&self) -> u16 {
393        // SAFETY: `used_event` is 2 + self.len u16 away from the start
394        unsafe {
395            self.avail_ring_ptr
396                .add(2_usize.unchecked_add(usize::from(self.size)))
397                .read_volatile()
398        }
399    }
400
401    /// Set UsedRing.idx
402    #[inline(always)]
403    pub fn used_ring_idx_set(&mut self, val: u16) {
404        // SAFETY: `idx` is 1 u16 away from the start
405        unsafe {
406            self.used_ring_ptr
407                .add(std::mem::size_of::<u16>())
408                .cast::<u16>()
409                .write_volatile(val)
410        }
411    }
412
413    /// Get element from UsedRing.ring at index
414    /// # Safety
415    /// The `index` parameter should be in 0..queue_size bounds
416    #[inline(always)]
417    unsafe fn used_ring_ring_set(&mut self, index: usize, val: UsedElement) {
418        // SAFETY: `ring` is 2 u16 away from the start
419        unsafe {
420            self.used_ring_ptr
421                .add(std::mem::size_of::<u16>().unchecked_mul(2))
422                .cast::<UsedElement>()
423                .add(index)
424                .write_volatile(val)
425        }
426    }
427
428    #[cfg(any(test, kani))]
429    #[inline(always)]
430    pub fn used_ring_avail_event_get(&mut self) -> u16 {
431        // SAFETY: `avail_event` is 2 * u16 and self.len * UsedElement away from the start
432        unsafe {
433            self.used_ring_ptr
434                .add(
435                    std::mem::size_of::<u16>().unchecked_mul(2)
436                        + std::mem::size_of::<UsedElement>().unchecked_mul(usize::from(self.size)),
437                )
438                .cast::<u16>()
439                .read_volatile()
440        }
441    }
442
443    /// Set UsedRing.avail_event
444    #[inline(always)]
445    pub fn used_ring_avail_event_set(&mut self, val: u16) {
446        // SAFETY: `avail_event` is 2 * u16 and self.len * UsedElement away from the start
447        unsafe {
448            self.used_ring_ptr
449                .add(
450                    std::mem::size_of::<u16>().unchecked_mul(2)
451                        + std::mem::size_of::<UsedElement>().unchecked_mul(usize::from(self.size)),
452                )
453                .cast::<u16>()
454                .write_volatile(val)
455        }
456    }
457
458    /// Returns the number of yet-to-be-popped descriptor chains in the avail ring.
459    pub fn len(&self) -> u16 {
460        (Wrapping(self.avail_ring_idx_get()) - self.next_avail).0
461    }
462
463    fn len_sanitized(&mut self) -> u16 {
464        let len = self.len();
465        if len > self.size {
466            // After snapshot restores, the driver's avail_idx can be behind next_avail.
467            // Resync to avoid panicking on a wrapped index.
468            let avail_idx = self.avail_ring_idx_get();
469            warn!(
470                "queue avail_idx wrapped (avail_idx={}, next_avail={}, size={}), resyncing",
471                avail_idx, self.next_avail.0, self.size
472            );
473            self.next_avail = Wrapping(avail_idx);
474            self.num_added = Wrapping(0);
475            return 0;
476        }
477        len
478    }
479
480    /// Checks if the driver has made any descriptor chains available in the avail ring.
481    pub fn is_empty(&self) -> bool {
482        self.len() == 0
483    }
484
485    /// Pop the first available descriptor chain from the avail ring.
486    ///
487    /// If this function returns an error at runtime, then the guest has requested Firecracker
488    /// to process more virtio descriptors than there can possibly be given the queue's size.
489    /// This can be a malicious guest driver scenario, and hence a DoS attempt. If encountered
490    /// and runtime, correct handling is to panic!
491    ///
492    /// This function however is also called on paths that can (and should) just report
493    /// the error to the user (e.g. loading a corrupt snapshot file), and hence cannot panic on its
494    /// own.
495    pub fn pop(&mut self) -> Result<Option<DescriptorChain>, InvalidAvailIdx> {
496        let len = self.len_sanitized();
497        // The number of descriptor chain heads to process should always
498        // be smaller or equal to the queue size, as the driver should
499        // never ask the VMM to process a available ring entry more than
500        // once. Checking and reporting such incorrect driver behavior
501        // can prevent potential hanging and Denial-of-Service from
502        // happening on the VMM side.
503        if len == 0 {
504            return Ok(None);
505        }
506
507        Ok(self.pop_unchecked())
508    }
509
510    /// Try to pop the first available descriptor chain from the avail ring.
511    /// If no descriptor is available, enable notifications.
512    ///
513    /// If this function returns an error at runtime, then the guest has requested Firecracker
514    /// to process more virtio descriptors than there can possibly be given the queue's size.
515    /// This can be a malicious guest driver scenario, and hence a DoS attempt. If encountered
516    /// and runtime, correct handling is to panic!
517    ///
518    /// This function however is also called on paths that can (and should) just report
519    /// the error to the user (e.g. loading a corrupt snapshot file), and hence cannot panic on its
520    /// own.
521    pub fn pop_or_enable_notification(
522        &mut self,
523    ) -> Result<Option<DescriptorChain>, InvalidAvailIdx> {
524        if !self.uses_notif_suppression {
525            return self.pop();
526        }
527
528        if self.try_enable_notification()? {
529            return Ok(None);
530        }
531
532        Ok(self.pop_unchecked())
533    }
534
535    /// Pop the first available descriptor chain from the avail ring.
536    ///
537    /// # Important
538    /// This is an internal method that ASSUMES THAT THERE ARE AVAILABLE DESCRIPTORS. Otherwise it
539    /// will retrieve a descriptor that contains garbage data (obsolete/empty).
540    fn pop_unchecked(&mut self) -> Option<DescriptorChain> {
541        // This fence ensures all subsequent reads see the updated driver writes.
542        fence(Ordering::Acquire);
543
544        // We'll need to find the first available descriptor, that we haven't yet popped.
545        // In a naive notation, that would be:
546        // `descriptor_table[avail_ring[next_avail]]`.
547        //
548        // We use `self.next_avail` to store the position, in `ring`, of the next available
549        // descriptor index, with a twist: we always only increment `self.next_avail`, so the
550        // actual position will be `self.next_avail % self.size`.
551        let idx = self.next_avail.0 % self.size;
552        // SAFETY:
553        // index is bound by the queue size
554        let desc_index = unsafe { self.avail_ring_ring_get(usize::from(idx)) };
555
556        DescriptorChain::checked_new(self.desc_table_ptr, self.size, desc_index).inspect(|_| {
557            self.next_avail += Wrapping(1);
558        })
559    }
560
561    /// Undo the effects of the last `self.pop()` call.
562    /// The caller can use this, if it was unable to consume the last popped descriptor chain.
563    pub fn undo_pop(&mut self) {
564        self.next_avail -= Wrapping(1);
565    }
566
567    /// Write used element into used_ring ring.
568    /// - [`ring_index_offset`] is an offset added to the current [`self.next_used`] to obtain
569    ///   actual index into used_ring.
570    pub fn write_used_element(
571        &mut self,
572        ring_index_offset: u16,
573        desc_index: u16,
574        len: u32,
575    ) -> Result<(), QueueError> {
576        if self.size <= desc_index {
577            error!(
578                "attempted to add out of bounds descriptor to used ring: {}",
579                desc_index
580            );
581            return Err(QueueError::DescIndexOutOfBounds(desc_index));
582        }
583
584        let next_used = (self.next_used + Wrapping(ring_index_offset)).0 % self.size;
585        let used_element = UsedElement {
586            id: u32::from(desc_index),
587            len,
588        };
589        // SAFETY:
590        // index is bound by the queue size
591        unsafe {
592            self.used_ring_ring_set(usize::from(next_used), used_element);
593        }
594        Ok(())
595    }
596
597    /// Advance queue and used ring by `n` elements.
598    pub fn advance_next_used(&mut self, n: u16) {
599        self.num_added += Wrapping(n);
600        self.next_used += Wrapping(n);
601    }
602
603    /// Set the used ring index to the current `next_used` value.
604    /// Should be called once after number of `add_used` calls.
605    pub fn advance_used_ring_idx(&mut self) {
606        // This fence ensures all descriptor writes are visible before the index update is.
607        fence(Ordering::Release);
608        self.used_ring_idx_set(self.next_used.0);
609    }
610
611    /// Puts an available descriptor head into the used ring for use by the guest.
612    pub fn add_used(&mut self, desc_index: u16, len: u32) -> Result<(), QueueError> {
613        self.write_used_element(0, desc_index, len)?;
614        self.advance_next_used(1);
615        Ok(())
616    }
617
618    /// Try to enable notification events from the guest driver. Returns true if notifications were
619    /// successfully enabled. Otherwise it means that one or more descriptors can still be consumed
620    /// from the available ring and we can't guarantee that there will be a notification. In this
621    /// case the caller might want to consume the mentioned descriptors and call this method again.
622    fn try_enable_notification(&mut self) -> Result<bool, InvalidAvailIdx> {
623        // If the device doesn't use notification suppression, we'll continue to get notifications
624        // no matter what.
625        if !self.uses_notif_suppression {
626            return Ok(true);
627        }
628
629        let len = self.len_sanitized();
630        if len != 0 {
631            // The number of descriptor chain heads to process should always
632            // be smaller or equal to the queue size.
633            return Ok(false);
634        }
635
636        // Set the next expected avail_idx as avail_event.
637        self.used_ring_avail_event_set(self.next_avail.0);
638
639        // Make sure all subsequent reads are performed after we set avail_event.
640        fence(Ordering::SeqCst);
641
642        // If the actual avail_idx is different than next_avail one or more descriptors can still
643        // be consumed from the available ring.
644        Ok(self.next_avail.0 == self.avail_ring_idx_get())
645    }
646
647    /// Enable notification suppression.
648    pub fn enable_notif_suppression(&mut self) {
649        self.uses_notif_suppression = true;
650    }
651
652    /// Check if we need to kick the guest.
653    ///
654    /// Please note this method has side effects: once it returns `true`, it considers the
655    /// driver will actually be notified, and won't return `true` again until the driver
656    /// updates `used_event` and/or the notification conditions hold once more.
657    ///
658    /// This is similar to the `vring_need_event()` method implemented by the Linux kernel.
659    pub fn prepare_kick(&mut self) -> bool {
660        // If the device doesn't use notification suppression, always return true
661        if !self.uses_notif_suppression {
662            return true;
663        }
664
665        // We need to expose used array entries before checking the used_event.
666        fence(Ordering::SeqCst);
667
668        let new = self.next_used;
669        let old = self.next_used - self.num_added;
670        let used_event = Wrapping(self.avail_ring_used_event_get());
671
672        self.num_added = Wrapping(0);
673
674        new - used_event - Wrapping(1) < new - old
675    }
676
677    /// Resets the Virtio Queue
678    pub(crate) fn reset(&mut self) {
679        self.ready = false;
680        self.size = self.max_size;
681        self.desc_table_address = GuestAddress(0);
682        self.avail_ring_address = GuestAddress(0);
683        self.used_ring_address = GuestAddress(0);
684        self.next_avail = Wrapping(0);
685        self.next_used = Wrapping(0);
686        self.num_added = Wrapping(0);
687        self.uses_notif_suppression = false;
688    }
689}
690
691#[cfg(kani)]
692#[allow(dead_code)]
693mod verification {
694    use std::mem::ManuallyDrop;
695    use std::num::Wrapping;
696
697    use vm_memory::{GuestMemoryRegion, MemoryRegionAddress};
698
699    use super::*;
700    use crate::vstate::memory::{Bytes, FileOffset, GuestAddress, GuestMemory, MmapRegion};
701
702    /// A made-for-kani version of `vm_memory::GuestMemoryMmap`. Unlike the real
703    /// `GuestMemoryMmap`, which manages a list of regions and then does a binary
704    /// search to determine which region a specific read or write request goes to,
705    /// this only uses a single region. Eliminating this binary search significantly
706    /// speeds up all queue proofs, because it eliminates the only loop contained herein,
707    /// meaning we can use `kani::unwind(0)` instead of `kani::unwind(2)`. Functionally,
708    /// it works identically to `GuestMemoryMmap` with only a single contained region.
709    pub struct ProofGuestMemory {
710        the_region: vm_memory::GuestRegionMmap,
711    }
712
713    impl GuestMemory for ProofGuestMemory {
714        type R = vm_memory::GuestRegionMmap;
715
716        fn num_regions(&self) -> usize {
717            1
718        }
719
720        fn find_region(&self, addr: GuestAddress) -> Option<&Self::R> {
721            self.the_region
722                .to_region_addr(addr)
723                .map(|_| &self.the_region)
724        }
725
726        fn iter(&self) -> impl Iterator<Item = &Self::R> {
727            std::iter::once(&self.the_region)
728        }
729
730        fn try_access<F>(
731            &self,
732            count: usize,
733            addr: GuestAddress,
734            mut f: F,
735        ) -> vm_memory::guest_memory::Result<usize>
736        where
737            F: FnMut(
738                usize,
739                usize,
740                MemoryRegionAddress,
741                &Self::R,
742            ) -> vm_memory::guest_memory::Result<usize>,
743        {
744            // We only have a single region, meaning a lot of the complications of the default
745            // try_access implementation for dealing with reads/writes across multiple
746            // regions does not apply.
747            let region_addr = self
748                .the_region
749                .to_region_addr(addr)
750                .ok_or(vm_memory::guest_memory::Error::InvalidGuestAddress(addr))?;
751            self.the_region
752                .checked_offset(region_addr, count)
753                .ok_or(vm_memory::guest_memory::Error::InvalidGuestAddress(addr))?;
754            f(0, count, region_addr, &self.the_region)
755        }
756    }
757
758    pub struct ProofContext(pub Queue, pub ProofGuestMemory);
759
760    pub struct MmapRegionStub {
761        addr: *mut u8,
762        size: usize,
763        bitmap: (),
764        file_offset: Option<FileOffset>,
765        prot: i32,
766        flags: i32,
767        owned: bool,
768        hugetlbfs: Option<bool>,
769    }
770
771    /// We start the first guest memory region at an offset so that harnesses using
772    /// Queue::any() will be exposed to queue segments both before and after valid guest memory.
773    const GUEST_MEMORY_BASE: u64 = 512;
774
775    // We size our guest memory to fit a properly aligned queue, plus some wiggles bytes
776    // to make sure we not only test queues where all segments are consecutively aligned (at least
777    // for those proofs that use a completely arbitrary queue structure).
778    // We need to give at least 16 bytes of buffer space for the descriptor table to be
779    // able to change its address, as it is 16-byte aligned.
780    const GUEST_MEMORY_SIZE: usize = (QUEUE_END - QUEUE_BASE_ADDRESS) as usize + 30;
781
782    fn guest_memory(memory: *mut u8) -> ProofGuestMemory {
783        // Ideally, we'd want to do
784        // let region = unsafe {MmapRegionBuilder::new(GUEST_MEMORY_SIZE)
785        //    .with_raw_mmap_pointer(bytes.as_mut_ptr())
786        //    .build()
787        //    .unwrap()};
788        // However, .build() calls to .build_raw(), which contains a call to libc::sysconf.
789        // Since kani 0.34.0, stubbing out foreign functions is supported, but due to the rust
790        // standard library using a special version of the libc crate, it runs into some problems
791        // [1] Even if we work around those problems, we run into performance problems [2].
792        // Therefore, for now we stick to this ugly transmute hack (which only works because
793        // the kani compiler will never re-order fields, so we can treat repr(Rust) as repr(C)).
794        //
795        // [1]: https://github.com/model-checking/kani/issues/2673
796        // [2]: https://github.com/model-checking/kani/issues/2538
797        let region_stub = MmapRegionStub {
798            addr: memory,
799            size: GUEST_MEMORY_SIZE,
800            bitmap: Default::default(),
801            file_offset: None,
802            prot: 0,
803            flags: libc::MAP_ANONYMOUS | libc::MAP_PRIVATE,
804            owned: false,
805            hugetlbfs: None,
806        };
807
808        let region: MmapRegion<()> = unsafe { std::mem::transmute(region_stub) };
809
810        let guest_region =
811            vm_memory::GuestRegionMmap::new(region, GuestAddress(GUEST_MEMORY_BASE)).unwrap();
812
813        // Use a single memory region, just as firecracker does for guests of size < 2GB
814        // For largest guests, firecracker uses two regions (due to the MMIO gap being
815        // at the top of 32-bit address space)
816        ProofGuestMemory {
817            the_region: guest_region,
818        }
819    }
820
821    // can't implement kani::Arbitrary for the relevant types due to orphan rules
822    fn setup_kani_guest_memory() -> ProofGuestMemory {
823        // Non-deterministic Vec that will be used as the guest memory. We use `exact_vec` for now
824        // as `any_vec` will likely result in worse performance. We do not loose much from
825        // `exact_vec`, as our proofs do not make any assumptions about "filling" guest
826        // memory: Since everything is placed at non-deterministic addresses with
827        // non-deterministic lengths, we still cover all scenarios that would be covered by
828        // smaller guest memory closely. We leak the memory allocated here, so that it
829        // doesnt get deallocated at the end of this function. We do not explicitly
830        // de-allocate, but since this is a kani proof, that does not matter.
831        guest_memory(
832            ManuallyDrop::new(kani::vec::exact_vec::<u8, GUEST_MEMORY_SIZE>()).as_mut_ptr(),
833        )
834    }
835
836    // Constants describing the in-memory layout of a queue of size FIRECRACKER_MAX_SIZE starting
837    // at the beginning of guest memory. These are based on Section 2.6 of the VirtIO 1.1
838    // specification.
839    const QUEUE_BASE_ADDRESS: u64 = GUEST_MEMORY_BASE;
840
841    /// descriptor table has 16 bytes per entry, avail ring starts right after
842    const AVAIL_RING_BASE_ADDRESS: u64 =
843        QUEUE_BASE_ADDRESS + FIRECRACKER_MAX_QUEUE_SIZE as u64 * 16;
844
845    /// Used ring starts after avail ring (which has size 6 + 2 * FIRECRACKER_MAX_QUEUE_SIZE),
846    /// and needs 2 bytes of padding
847    const USED_RING_BASE_ADDRESS: u64 =
848        AVAIL_RING_BASE_ADDRESS + 6 + 2 * FIRECRACKER_MAX_QUEUE_SIZE as u64 + 2;
849
850    /// The address of the first byte after the queue (which starts at QUEUE_BASE_ADDRESS).
851    /// Note that the used ring structure has size 6 + 8 * FIRECRACKER_MAX_QUEUE_SIZE
852    const QUEUE_END: u64 = USED_RING_BASE_ADDRESS + 6 + 8 * FIRECRACKER_MAX_QUEUE_SIZE as u64;
853
854    fn less_arbitrary_queue() -> Queue {
855        let mut queue = Queue::new(FIRECRACKER_MAX_QUEUE_SIZE);
856
857        queue.size = FIRECRACKER_MAX_QUEUE_SIZE;
858        queue.ready = true;
859        queue.desc_table_address = GuestAddress(QUEUE_BASE_ADDRESS);
860        queue.avail_ring_address = GuestAddress(AVAIL_RING_BASE_ADDRESS);
861        queue.used_ring_address = GuestAddress(USED_RING_BASE_ADDRESS);
862        queue.next_avail = Wrapping(kani::any());
863        queue.next_used = Wrapping(kani::any());
864        queue.uses_notif_suppression = kani::any();
865        queue.num_added = Wrapping(kani::any());
866
867        queue
868    }
869
870    impl ProofContext {
871        /// Creates a [`ProofContext`] where the queue layout is not arbitrary and instead
872        /// fixed to a known valid one
873        pub fn bounded_queue() -> Self {
874            let mem = setup_kani_guest_memory();
875            let mut queue = less_arbitrary_queue();
876            queue.initialize(&mem).unwrap();
877
878            ProofContext(queue, mem)
879        }
880    }
881
882    impl kani::Arbitrary for ProofContext {
883        fn any() -> Self {
884            let mem = setup_kani_guest_memory();
885            let mut queue: Queue = kani::any();
886
887            kani::assume(queue.initialize(&mem).is_ok());
888
889            ProofContext(queue, mem)
890        }
891    }
892
893    impl kani::Arbitrary for Queue {
894        fn any() -> Queue {
895            // firecracker statically sets the maximal queue size to 256
896            let mut queue = Queue::new(FIRECRACKER_MAX_QUEUE_SIZE);
897
898            queue.size = kani::any();
899            queue.ready = true;
900            queue.desc_table_address = GuestAddress(kani::any());
901            queue.avail_ring_address = GuestAddress(kani::any());
902            queue.used_ring_address = GuestAddress(kani::any());
903            queue.next_avail = Wrapping(kani::any());
904            queue.next_used = Wrapping(kani::any());
905            queue.uses_notif_suppression = kani::any();
906            queue.num_added = Wrapping(kani::any());
907
908            queue
909        }
910    }
911
912    impl kani::Arbitrary for Descriptor {
913        fn any() -> Descriptor {
914            Descriptor {
915                addr: kani::any(),
916                len: kani::any(),
917                flags: kani::any(),
918                next: kani::any(),
919            }
920        }
921    }
922
923    #[kani::proof]
924    #[kani::unwind(0)] // There are no loops anywhere, but kani really enjoys getting stuck in std::ptr::drop_in_place.
925    // This is a compiler intrinsic that has a "dummy" implementation in stdlib that just
926    // recursively calls itself. Kani will generally unwind this recursion infinitely
927    fn verify_spec_2_6_7_2() {
928        // Section 2.6.7.2 deals with device-to-driver notification suppression.
929        // It describes a mechanism by which the driver can tell the device that it does not
930        // want notifications (IRQs) about the device finishing processing individual buffers
931        // (descriptor chain heads) from the avail ring until a specific number of descriptors
932        // has been processed. This is done by the driver
933        // defining a "used_event" index, which tells the device "please do not notify me until
934        // used.ring[used_event] has been written to by you".
935        let ProofContext(mut queue, _) = kani::any();
936
937        let num_added_old = queue.num_added.0;
938        let needs_notification = queue.prepare_kick();
939
940        // uses_notif_suppression equivalent to VIRTIO_F_EVENT_IDX negotiated
941        if !queue.uses_notif_suppression {
942            // The specification here says
943            // After the device writes a descriptor index into the used ring:
944            // – If flags is 1, the device SHOULD NOT send a notification.
945            // – If flags is 0, the device MUST send a notification
946            // flags is the first field in the avail_ring_address, which we completely ignore. We
947            // always send a notification, and as there only is a SHOULD NOT, that is okay
948            assert!(needs_notification);
949        } else {
950            // next_used - 1 is where the previous descriptor was placed
951            if Wrapping(queue.avail_ring_used_event_get()) == queue.next_used - Wrapping(1)
952                && num_added_old > 0
953            {
954                // If the idx field in the used ring (which determined where that descriptor index
955                // was placed) was equal to used_event, the device MUST send a
956                // notification.
957                assert!(needs_notification);
958
959                kani::cover!();
960            }
961
962            // The other case is handled by a "SHOULD NOT send a notification" in the spec.
963            // So we do not care
964        }
965    }
966
967    #[kani::proof]
968    #[kani::unwind(0)]
969    fn verify_prepare_kick() {
970        // Firecracker's virtio queue implementation is not completely spec conform:
971        // According to the spec, we have to check whether to notify the driver after every call
972        // to add_used. We don't do that. Instead, we call add_used a bunch of times (with the
973        // number of added descriptors being counted in Queue.num_added), and then use
974        // "prepare_kick" to check if any of those descriptors should have triggered a
975        // notification.
976        let ProofContext(mut queue, _) = kani::any();
977
978        queue.enable_notif_suppression();
979        assert!(queue.uses_notif_suppression);
980
981        // With firecracker's batching of used IRQs, we need to check if addition of the last
982        // queue.num_added buffers is what caused us to cross the used_event index (e.g. if the
983        // index used_event was written to since the last call to prepare_kick). We have to
984        // take various ring-wrapping behavior into consideration here. This is the case if
985        // used_event in [next_used - num_added, next_used - 1]. However, intervals
986        // in modular arithmetic are a finicky thing, as we do not have a notion of order
987        // (consider for example u16::MAX + 1 = 0. Clearly, x + 1 > x, but that would imply 0 >
988        // u16::MAX) This gives us some interesting corner cases: What if our "interval" is
989        // "[u16::MAX - 1, 1]"? For these "wrapped" intervals, we can instead consider
990        // [next_used - num_added - 1, u16::MAX] ∪ [0, next_used - 1]. Since queue size is at most
991        // 2^15, intervals can only wrap at most once. This gives us the following logic:
992
993        let used_event = Wrapping(queue.avail_ring_used_event_get());
994        let interval_start = queue.next_used - queue.num_added;
995        let interval_end = queue.next_used - Wrapping(1);
996        let needs_notification = if queue.num_added.0 == 0 {
997            false
998        } else if interval_start > interval_end {
999            used_event <= interval_end || used_event >= interval_start
1000        } else {
1001            used_event >= interval_start && used_event <= interval_end
1002        };
1003
1004        assert_eq!(queue.prepare_kick(), needs_notification);
1005    }
1006
1007    #[kani::proof]
1008    #[kani::unwind(0)]
1009    fn verify_add_used() {
1010        let ProofContext(mut queue, _) = kani::any();
1011
1012        // The spec here says (2.6.8.2):
1013        //
1014        // The device MUST set len prior to updating the used idx.
1015        // The device MUST write at least len bytes to descriptor, beginning at the first
1016        // device-writable buffer, prior to updating the used idx.
1017        // The device MAY write more than len bytes to descriptor.
1018        //
1019        // We can't really verify any of these. We can verify that guest memory is updated correctly
1020        // though
1021
1022        // index into used ring at which the index of the descriptor to which
1023        // the device wrote.
1024        let used_idx = queue.next_used;
1025
1026        let used_desc_table_index = kani::any();
1027        if queue.add_used(used_desc_table_index, kani::any()).is_ok() {
1028            assert_eq!(queue.next_used, used_idx + Wrapping(1));
1029        } else {
1030            assert_eq!(queue.next_used, used_idx);
1031
1032            // Ideally, here we would want to actually read the relevant values from memory and
1033            // assert they are unchanged. However, kani will run out of memory if we try to do so,
1034            // so we instead verify the following "proxy property": If an error happened, then
1035            // it happened at the very beginning of add_used, meaning no memory accesses were
1036            // done. This is relying on implementation details of add_used, namely that
1037            // the check for out-of-bounds descriptor index happens at the very beginning of the
1038            // function.
1039            assert!(used_desc_table_index >= queue.size);
1040        }
1041    }
1042
1043    #[kani::proof]
1044    #[kani::unwind(0)]
1045    fn verify_is_empty() {
1046        let ProofContext(queue, _) = kani::any();
1047
1048        assert_eq!(queue.len() == 0, queue.is_empty());
1049    }
1050
1051    #[kani::proof]
1052    #[kani::unwind(0)]
1053    #[kani::solver(cadical)]
1054    fn verify_initialize() {
1055        let ProofContext(mut queue, mem) = kani::any();
1056
1057        if queue.initialize(&mem).is_ok() {
1058            // Section 2.6: Alignment of descriptor table, available ring and used ring; size of
1059            // queue
1060            fn alignment_of(val: u64) -> u64 {
1061                if val == 0 { u64::MAX } else { val & (!val + 1) }
1062            }
1063
1064            assert!(alignment_of(queue.desc_table_address.0) >= 16);
1065            assert!(alignment_of(queue.avail_ring_address.0) >= 2);
1066            assert!(alignment_of(queue.used_ring_address.0) >= 4);
1067
1068            // length of queue must be power-of-two, and at most 2^15
1069            assert_eq!(queue.size.count_ones(), 1);
1070            assert!(queue.size <= 1u16 << 15);
1071        }
1072    }
1073
1074    #[kani::proof]
1075    #[kani::unwind(0)]
1076    fn verify_avail_ring_idx_get() {
1077        let ProofContext(queue, _) = kani::any();
1078        _ = queue.avail_ring_idx_get();
1079    }
1080
1081    #[kani::proof]
1082    #[kani::unwind(0)]
1083    fn verify_avail_ring_ring_get() {
1084        let ProofContext(queue, _) = kani::any();
1085        let x: usize = kani::any_where(|x| *x < usize::from(queue.size));
1086        unsafe { _ = queue.avail_ring_ring_get(x) };
1087    }
1088
1089    #[kani::proof]
1090    #[kani::unwind(0)]
1091    fn verify_avail_ring_used_event_get() {
1092        let ProofContext(queue, _) = kani::any();
1093        _ = queue.avail_ring_used_event_get();
1094    }
1095
1096    #[kani::proof]
1097    #[kani::unwind(0)]
1098    fn verify_used_ring_idx_set() {
1099        let ProofContext(mut queue, _) = kani::any();
1100        queue.used_ring_idx_set(kani::any());
1101    }
1102
1103    #[kani::proof]
1104    #[kani::unwind(0)]
1105    fn verify_used_ring_ring_set() {
1106        let ProofContext(mut queue, _) = kani::any();
1107        let x: usize = kani::any_where(|x| *x < usize::from(queue.size));
1108        let used_element = UsedElement {
1109            id: kani::any(),
1110            len: kani::any(),
1111        };
1112        unsafe { queue.used_ring_ring_set(x, used_element) };
1113    }
1114
1115    #[kani::proof]
1116    #[kani::unwind(0)]
1117    fn verify_used_ring_avail_event() {
1118        let ProofContext(mut queue, _) = kani::any();
1119        let x = kani::any();
1120        queue.used_ring_avail_event_set(x);
1121        assert_eq!(x, queue.used_ring_avail_event_get());
1122    }
1123
1124    #[kani::proof]
1125    #[kani::unwind(0)]
1126    #[kani::solver(cadical)]
1127    fn verify_pop() {
1128        let ProofContext(mut queue, _) = kani::any();
1129
1130        // This is an assertion in pop which we use to abort firecracker in a ddos scenario
1131        // This condition being false means that the guest is asking us to process every element
1132        // in the queue multiple times. It cannot be checked by initialize, as that function
1133        // is called when the queue is being initialized, e.g. empty. We compute it using
1134        // local variables here to make things easier on kani: One less roundtrip through vm-memory.
1135        let queue_len = queue.len();
1136        kani::assume(queue_len <= queue.size);
1137
1138        let next_avail = queue.next_avail;
1139
1140        if let Some(_) = queue.pop().unwrap() {
1141            // Can't get anything out of an empty queue, assert queue_len != 0
1142            assert_ne!(queue_len, 0);
1143            assert_eq!(queue.next_avail, next_avail + Wrapping(1));
1144        }
1145    }
1146
1147    #[kani::proof]
1148    #[kani::unwind(0)]
1149    #[kani::solver(cadical)]
1150    fn verify_undo_pop() {
1151        let ProofContext(mut queue, _) = kani::any();
1152
1153        // See verify_pop for explanation
1154        kani::assume(queue.len() <= queue.size);
1155
1156        let queue_clone = queue.clone();
1157        if let Some(_) = queue.pop().unwrap() {
1158            queue.undo_pop();
1159            assert_eq!(queue, queue_clone);
1160
1161            // TODO: can we somehow check that guest memory wasn't touched?
1162        }
1163    }
1164
1165    #[kani::proof]
1166    #[kani::unwind(0)]
1167    fn verify_try_enable_notification() {
1168        let ProofContext(mut queue, _) = ProofContext::bounded_queue();
1169
1170        kani::assume(queue.len() <= queue.size);
1171
1172        if queue.try_enable_notification().unwrap() && queue.uses_notif_suppression {
1173            // We only require new notifications if the queue is empty (e.g. we've processed
1174            // everything we've been notified about), or if suppression is disabled.
1175            assert!(queue.is_empty());
1176
1177            assert_eq!(Wrapping(queue.avail_ring_idx_get()), queue.next_avail)
1178        }
1179    }
1180
1181    #[kani::proof]
1182    #[kani::unwind(0)]
1183    #[kani::solver(cadical)]
1184    fn verify_checked_new() {
1185        let ProofContext(queue, mem) = kani::any();
1186
1187        let index = kani::any();
1188        let maybe_chain = DescriptorChain::checked_new(queue.desc_table_ptr, queue.size, index);
1189
1190        if index >= queue.size {
1191            assert!(maybe_chain.is_none())
1192        } else {
1193            // If the index was in-bounds for the descriptor table, we at least should be
1194            // able to compute the address of the descriptor table entry without going out
1195            // of bounds anywhere, and also read from that address.
1196            let desc_head = mem
1197                .checked_offset(queue.desc_table_address, (index as usize) * 16)
1198                .unwrap();
1199            mem.checked_offset(desc_head, 16).unwrap();
1200            let desc = mem.read_obj::<Descriptor>(desc_head).unwrap();
1201
1202            match maybe_chain {
1203                None => {
1204                    // This assert is the negation of the "is_valid" check in checked_new
1205                    assert!(desc.flags & VIRTQ_DESC_F_NEXT == 1 && desc.next >= queue.size)
1206                }
1207                Some(head) => {
1208                    assert!(head.is_valid())
1209                }
1210            }
1211        }
1212    }
1213}
1214
1215#[cfg(test)]
1216mod tests {
1217    use vm_memory::{Address, Bytes};
1218
1219    pub use super::*;
1220    use crate::devices::virtio::queue::QueueError::DescIndexOutOfBounds;
1221    use crate::devices::virtio::test_utils::{VirtQueue, default_mem};
1222    use crate::test_utils::{multi_region_mem, single_region_mem};
1223    use crate::vstate::memory::GuestAddress;
1224
1225    #[test]
1226    fn test_checked_new_descriptor_chain() {
1227        let m = &multi_region_mem(&[(GuestAddress(0), 0x10000), (GuestAddress(0x20000), 0x2000)]);
1228        let vq = VirtQueue::new(GuestAddress(0), m, 16);
1229        let mut q = vq.create_queue();
1230        q.initialize(m).unwrap();
1231
1232        assert!(vq.end().0 < 0x1000);
1233
1234        // index >= queue_size
1235        assert!(DescriptorChain::checked_new(q.desc_table_ptr, 16, 16).is_none());
1236
1237        // Let's create an invalid chain.
1238        {
1239            // The first desc has a normal len, and the next_descriptor flag is set.
1240            vq.dtable[0].addr.set(0x1000);
1241            vq.dtable[0].len.set(0x1000);
1242            vq.dtable[0].flags.set(VIRTQ_DESC_F_NEXT);
1243            // .. but the index of the next descriptor is too large
1244            vq.dtable[0].next.set(16);
1245
1246            assert!(DescriptorChain::checked_new(q.desc_table_ptr, 16, 0).is_none());
1247        }
1248
1249        // Finally, let's test an ok chain.
1250        {
1251            vq.dtable[0].next.set(1);
1252            vq.dtable[1].set(0x2000, 0x1000, 0, 0);
1253
1254            let c = DescriptorChain::checked_new(q.desc_table_ptr, 16, 0).unwrap();
1255
1256            assert_eq!(c.desc_table_ptr, q.desc_table_ptr);
1257            assert_eq!(c.queue_size, 16);
1258            assert_eq!(c.ttl, c.queue_size);
1259            assert_eq!(c.index, 0);
1260            assert_eq!(c.addr, GuestAddress(0x1000));
1261            assert_eq!(c.len, 0x1000);
1262            assert_eq!(c.flags, VIRTQ_DESC_F_NEXT);
1263            assert_eq!(c.next, 1);
1264
1265            assert!(c.next_descriptor().unwrap().next_descriptor().is_none());
1266        }
1267    }
1268
1269    #[test]
1270    fn test_queue_validation() {
1271        let m = &default_mem();
1272        let vq = VirtQueue::new(GuestAddress(0), m, 16);
1273
1274        let mut q = vq.create_queue();
1275
1276        // q is currently valid
1277        q.initialize(m).unwrap();
1278
1279        // shouldn't be valid when not marked as ready
1280        q.ready = false;
1281        assert!(matches!(q.initialize(m).unwrap_err(), QueueError::NotReady));
1282        q.ready = true;
1283
1284        // or when size > max_size
1285        q.size = q.max_size << 1;
1286        assert!(matches!(
1287            q.initialize(m).unwrap_err(),
1288            QueueError::InvalidSize(_)
1289        ));
1290        q.size = q.max_size;
1291
1292        // or when size is 0
1293        q.size = 0;
1294        assert!(matches!(
1295            q.initialize(m).unwrap_err(),
1296            QueueError::InvalidSize(_)
1297        ));
1298        q.size = q.max_size;
1299
1300        // or when size is not a power of 2
1301        q.size = 11;
1302        assert!(matches!(
1303            q.initialize(m).unwrap_err(),
1304            QueueError::InvalidSize(_)
1305        ));
1306        q.size = q.max_size;
1307
1308        // reset dirtied values
1309        q.max_size = 16;
1310        q.next_avail = Wrapping(0);
1311        m.write_obj::<u16>(0, q.avail_ring_address.unchecked_add(2))
1312            .unwrap();
1313
1314        // or if the various addresses are off
1315
1316        q.desc_table_address = GuestAddress(0xffff_ff00);
1317        assert!(matches!(
1318            q.initialize(m).unwrap_err(),
1319            QueueError::MemoryError(_)
1320        ));
1321        q.desc_table_address = GuestAddress(0x1001);
1322        assert!(matches!(
1323            q.initialize(m).unwrap_err(),
1324            QueueError::PointerNotAligned(_, _)
1325        ));
1326        q.desc_table_address = vq.dtable_start();
1327
1328        q.avail_ring_address = GuestAddress(0xffff_ff00);
1329        assert!(matches!(
1330            q.initialize(m).unwrap_err(),
1331            QueueError::MemoryError(_)
1332        ));
1333        q.avail_ring_address = GuestAddress(0x1001);
1334        assert!(matches!(
1335            q.initialize(m).unwrap_err(),
1336            QueueError::PointerNotAligned(_, _)
1337        ));
1338        q.avail_ring_address = vq.avail_start();
1339
1340        q.used_ring_address = GuestAddress(0xffff_ff00);
1341        assert!(matches!(
1342            q.initialize(m).unwrap_err(),
1343            QueueError::MemoryError(_)
1344        ));
1345        q.used_ring_address = GuestAddress(0x1001);
1346        assert!(matches!(
1347            q.initialize(m).unwrap_err(),
1348            QueueError::PointerNotAligned(_, _)
1349        ));
1350        q.used_ring_address = vq.used_start();
1351    }
1352
1353    #[test]
1354    fn test_queue_processing() {
1355        let m = &default_mem();
1356        let vq = VirtQueue::new(GuestAddress(0), m, 16);
1357        let mut q = vq.create_queue();
1358
1359        q.ready = true;
1360
1361        // Let's create two simple descriptor chains.
1362
1363        for j in 0..5 {
1364            vq.dtable[j as usize].set(0x1000 * u64::from(j + 1), 0x1000, VIRTQ_DESC_F_NEXT, j + 1);
1365        }
1366
1367        // the chains are (0, 1) and (2, 3, 4)
1368        vq.dtable[1].flags.set(0);
1369        vq.dtable[4].flags.set(0);
1370        vq.avail.ring[0].set(0);
1371        vq.avail.ring[1].set(2);
1372        vq.avail.idx.set(2);
1373
1374        // We've just set up two chains.
1375        assert_eq!(q.len(), 2);
1376
1377        // The first chain should hold exactly two descriptors.
1378        let d = q.pop().unwrap().unwrap().next_descriptor().unwrap();
1379        assert!(!d.has_next());
1380        assert!(d.next_descriptor().is_none());
1381
1382        // We popped one chain, so there should be only one left.
1383        assert_eq!(q.len(), 1);
1384
1385        // The next chain holds three descriptors.
1386        let d = q
1387            .pop()
1388            .unwrap()
1389            .unwrap()
1390            .next_descriptor()
1391            .unwrap()
1392            .next_descriptor()
1393            .unwrap();
1394        assert!(!d.has_next());
1395        assert!(d.next_descriptor().is_none());
1396
1397        // We've popped both chains, so the queue should be empty.
1398        assert!(q.is_empty());
1399        assert!(q.pop().unwrap().is_none());
1400
1401        // Undoing the last pop should let us walk the last chain again.
1402        q.undo_pop();
1403        assert_eq!(q.len(), 1);
1404
1405        // Walk the last chain again (three descriptors).
1406        let d = q
1407            .pop()
1408            .unwrap()
1409            .unwrap()
1410            .next_descriptor()
1411            .unwrap()
1412            .next_descriptor()
1413            .unwrap();
1414        assert!(!d.has_next());
1415        assert!(d.next_descriptor().is_none());
1416
1417        // Undoing the last pop should let us walk the last chain again.
1418        q.undo_pop();
1419        assert_eq!(q.len(), 1);
1420
1421        // Walk the last chain again (three descriptors) using pop_or_enable_notification().
1422        let d = q
1423            .pop_or_enable_notification()
1424            .unwrap()
1425            .unwrap()
1426            .next_descriptor()
1427            .unwrap()
1428            .next_descriptor()
1429            .unwrap();
1430        assert!(!d.has_next());
1431        assert!(d.next_descriptor().is_none());
1432
1433        // There are no more descriptors, but notification suppression is disabled.
1434        // Calling pop_or_enable_notification() should simply return None.
1435        assert_eq!(q.used_ring_avail_event_get(), 0);
1436        assert!(q.pop_or_enable_notification().unwrap().is_none());
1437        assert_eq!(q.used_ring_avail_event_get(), 0);
1438
1439        // There are no more descriptors and notification suppression is enabled. Calling
1440        // pop_or_enable_notification() should enable notifications.
1441        q.enable_notif_suppression();
1442        assert!(q.pop_or_enable_notification().unwrap().is_none());
1443        assert_eq!(q.used_ring_avail_event_get(), 2);
1444    }
1445
1446    #[test]
1447    fn test_invalid_avail_idx_no_notification() {
1448        // This test ensures constructing a descriptor chain succeeds
1449        // with valid available ring indexes while it produces an error with invalid
1450        // indexes.
1451        // No notification suppression enabled.
1452        let m = &single_region_mem(0x6000);
1453
1454        // We set up a queue of size 4.
1455        let vq = VirtQueue::new(GuestAddress(0), m, 4);
1456        let mut q = vq.create_queue();
1457
1458        for j in 0..4 {
1459            vq.dtable[j as usize].set(0x1000 * u64::from(j + 1), 0x1000, VIRTQ_DESC_F_NEXT, j + 1);
1460        }
1461
1462        // Create 2 descriptor chains.
1463        // the chains are (0, 1) and (2, 3)
1464        vq.dtable[1].flags.set(0);
1465        vq.dtable[3].flags.set(0);
1466        vq.avail.ring[0].set(0);
1467        vq.avail.ring[1].set(2);
1468        vq.avail.idx.set(2);
1469
1470        // We've just set up two chains.
1471        assert_eq!(q.len(), 2);
1472
1473        // We process the first descriptor.
1474        let d = q.pop().unwrap().unwrap().next_descriptor();
1475        assert!(matches!(d, Some(x) if !x.has_next()));
1476        // We confuse the device and set the available index as being 6.
1477        vq.avail.idx.set(6);
1478
1479        // We've actually just popped a descriptor so 6 - 1 = 5.
1480        assert_eq!(q.len(), 5);
1481
1482        // Since the apparent length set by the driver is more than the queue size,
1483        // the queue resyncs and treats this as no pending descriptors.
1484        assert!(q.pop().unwrap().is_none());
1485        assert_eq!(q.len(), 0);
1486    }
1487
1488    #[test]
1489    fn test_invalid_avail_idx_with_notification() {
1490        // This test ensures constructing a descriptor chain succeeds
1491        // with valid available ring indexes while it produces an error with invalid
1492        // indexes.
1493        // Notification suppression is enabled.
1494        let m = &single_region_mem(0x6000);
1495
1496        // We set up a queue of size 4.
1497        let vq = VirtQueue::new(GuestAddress(0), m, 4);
1498        let mut q = vq.create_queue();
1499
1500        q.uses_notif_suppression = true;
1501
1502        // Create 1 descriptor chain of 4.
1503        for j in 0..4 {
1504            vq.dtable[j as usize].set(0x1000 * u64::from(j + 1), 0x1000, VIRTQ_DESC_F_NEXT, j + 1);
1505        }
1506        // We need to clear the VIRTQ_DESC_F_NEXT for the last descriptor.
1507        vq.dtable[3].flags.set(0);
1508        vq.avail.ring[0].set(0);
1509
1510        // driver sets available index to suspicious value.
1511        vq.avail.idx.set(6);
1512
1513        assert!(q.pop_or_enable_notification().unwrap().is_none());
1514        assert_eq!(q.len(), 0);
1515    }
1516
1517    #[test]
1518    fn test_add_used() {
1519        let m = &default_mem();
1520        let vq = VirtQueue::new(GuestAddress(0), m, 16);
1521
1522        let mut q = vq.create_queue();
1523        assert_eq!(vq.used.idx.get(), 0);
1524
1525        // Valid queue addresses configuration
1526        {
1527            // index too large
1528            match q.add_used(16, 0x1000) {
1529                Err(DescIndexOutOfBounds(16)) => (),
1530                _ => unreachable!(),
1531            }
1532
1533            // should be ok
1534            q.add_used(1, 0x1000).unwrap();
1535            q.advance_used_ring_idx();
1536            assert_eq!(vq.used.idx.get(), 1);
1537            let x = vq.used.ring[0].get();
1538            assert_eq!(x.id, 1);
1539            assert_eq!(x.len, 0x1000);
1540        }
1541    }
1542
1543    #[test]
1544    fn test_used_event() {
1545        let m = &default_mem();
1546        let vq = VirtQueue::new(GuestAddress(0), m, 16);
1547
1548        let q = vq.create_queue();
1549        assert_eq!(q.avail_ring_used_event_get(), 0);
1550
1551        vq.avail.event.set(10);
1552        assert_eq!(q.avail_ring_used_event_get(), 10);
1553
1554        vq.avail.event.set(u16::MAX);
1555        assert_eq!(q.avail_ring_used_event_get(), u16::MAX);
1556    }
1557
1558    #[test]
1559    fn test_set_used_ring_avail_event() {
1560        let m = &default_mem();
1561        let vq = VirtQueue::new(GuestAddress(0), m, 16);
1562
1563        let mut q = vq.create_queue();
1564        assert_eq!(vq.used.event.get(), 0);
1565
1566        q.used_ring_avail_event_set(10);
1567        assert_eq!(vq.used.event.get(), 10);
1568
1569        q.used_ring_avail_event_set(u16::MAX);
1570        assert_eq!(vq.used.event.get(), u16::MAX);
1571    }
1572
1573    #[test]
1574    fn test_needs_kick() {
1575        let m = &default_mem();
1576        let vq = VirtQueue::new(GuestAddress(0), m, 16);
1577        let mut q = vq.create_queue();
1578
1579        {
1580            // If the device doesn't have notification suppression support,
1581            // `needs_notification()` should always return true.
1582            q.uses_notif_suppression = false;
1583            for used_idx in 0..10 {
1584                for used_event in 0..10 {
1585                    for num_added in 0..10 {
1586                        q.next_used = Wrapping(used_idx);
1587                        vq.avail.event.set(used_event);
1588                        q.num_added = Wrapping(num_added);
1589                        assert!(q.prepare_kick());
1590                    }
1591                }
1592            }
1593        }
1594
1595        q.enable_notif_suppression();
1596        {
1597            // old used idx < used_event < next_used
1598            q.next_used = Wrapping(10);
1599            vq.avail.event.set(6);
1600            q.num_added = Wrapping(5);
1601            assert!(q.prepare_kick());
1602        }
1603
1604        {
1605            // old used idx = used_event < next_used
1606            q.next_used = Wrapping(10);
1607            vq.avail.event.set(6);
1608            q.num_added = Wrapping(4);
1609            assert!(q.prepare_kick());
1610        }
1611
1612        {
1613            // used_event < old used idx < next_used
1614            q.next_used = Wrapping(10);
1615            vq.avail.event.set(6);
1616            q.num_added = Wrapping(3);
1617            assert!(!q.prepare_kick());
1618        }
1619    }
1620
1621    #[test]
1622    fn test_try_enable_notification() {
1623        let m = &default_mem();
1624        let vq = VirtQueue::new(GuestAddress(0), m, 16);
1625        let mut q = vq.create_queue();
1626
1627        q.ready = true;
1628
1629        // We create a simple descriptor chain
1630        vq.dtable[0].set(0x1000_u64, 0x1000, 0, 0);
1631        vq.avail.ring[0].set(0);
1632        vq.avail.idx.set(1);
1633
1634        assert_eq!(q.len(), 1);
1635
1636        // Notification suppression is disabled. try_enable_notification shouldn't do anything.
1637        assert!(q.try_enable_notification().unwrap());
1638        assert_eq!(q.used_ring_avail_event_get(), 0);
1639
1640        // Enable notification suppression and check again. There is 1 available descriptor chain.
1641        // Again nothing should happen.
1642        q.enable_notif_suppression();
1643        assert!(!q.try_enable_notification().unwrap());
1644        assert_eq!(q.used_ring_avail_event_get(), 0);
1645
1646        // Consume the descriptor. avail_event should be modified
1647        assert!(q.pop().unwrap().is_some());
1648        assert!(q.try_enable_notification().unwrap());
1649        assert_eq!(q.used_ring_avail_event_get(), 1);
1650    }
1651
1652    #[test]
1653    fn test_initialize_with_aligned_pointer() {
1654        let mut q = Queue::new(FIRECRACKER_MAX_QUEUE_SIZE);
1655
1656        q.ready = true;
1657        q.size = q.max_size;
1658
1659        // Descriptor table must be 16-byte aligned.
1660        q.desc_table_address = GuestAddress(16);
1661        // Available ring must be 2-byte aligned.
1662        q.avail_ring_address = GuestAddress(2);
1663        // Used ring must be 4-byte aligned.
1664        q.avail_ring_address = GuestAddress(4);
1665
1666        let mem = single_region_mem(0x10000);
1667        q.initialize(&mem).unwrap();
1668    }
1669
1670    #[test]
1671    fn test_initialize_with_misaligned_pointer() {
1672        let mut q = Queue::new(FIRECRACKER_MAX_QUEUE_SIZE);
1673        q.ready = true;
1674        q.size = q.max_size;
1675        let mem = single_region_mem(0x1000);
1676
1677        // Descriptor table must be 16-byte aligned.
1678        q.desc_table_address = GuestAddress(0xb);
1679        match q.initialize(&mem) {
1680            Ok(_) => panic!("Unexpected success"),
1681            Err(QueueError::PointerNotAligned(addr, alignment)) => {
1682                assert_eq!(addr % 16, 0xb);
1683                assert_eq!(alignment, 16);
1684            }
1685            Err(e) => panic!("Unexpected error {e:#?}"),
1686        }
1687        q.desc_table_address = GuestAddress(0x0);
1688
1689        // Available ring must be 2-byte aligned.
1690        q.avail_ring_address = GuestAddress(0x1);
1691        match q.initialize(&mem) {
1692            Ok(_) => panic!("Unexpected success"),
1693            Err(QueueError::PointerNotAligned(addr, alignment)) => {
1694                assert_eq!(addr % 2, 0x1);
1695                assert_eq!(alignment, 2);
1696            }
1697            Err(e) => panic!("Unexpected error {e:#?}"),
1698        }
1699        q.avail_ring_address = GuestAddress(0x0);
1700
1701        // Used ring must be 4-byte aligned.
1702        q.used_ring_address = GuestAddress(0x3);
1703        match q.initialize(&mem) {
1704            Ok(_) => panic!("unexpected success"),
1705            Err(QueueError::PointerNotAligned(addr, alignment)) => {
1706                assert_eq!(addr % 4, 0x3);
1707                assert_eq!(alignment, 4);
1708            }
1709            Err(e) => panic!("Unexpected error {e:#?}"),
1710        }
1711    }
1712
1713    #[test]
1714    fn test_queue_error_display() {
1715        let err = QueueError::MemoryError(vm_memory::GuestMemoryError::InvalidGuestAddress(
1716            GuestAddress(0),
1717        ));
1718        let _ = format!("{}{:?}", err, err);
1719
1720        let err = DescIndexOutOfBounds(1);
1721        let _ = format!("{}{:?}", err, err);
1722    }
1723}