vmm/devices/virtio/
iovec.rs

1// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4use std::io::ErrorKind;
5
6use libc::{c_void, iovec, size_t};
7use serde::{Deserialize, Serialize};
8use vm_memory::bitmap::Bitmap;
9use vm_memory::{
10    GuestMemory, GuestMemoryError, ReadVolatile, VolatileMemoryError, VolatileSlice, WriteVolatile,
11};
12
13use super::iov_deque::{IovDeque, IovDequeError};
14use super::queue::FIRECRACKER_MAX_QUEUE_SIZE;
15use crate::devices::virtio::queue::DescriptorChain;
16use crate::vstate::memory::GuestMemoryMmap;
17
18#[derive(Debug, thiserror::Error, displaydoc::Display)]
19pub enum IoVecError {
20    /// Tried to create an `IoVec` from a write-only descriptor chain
21    WriteOnlyDescriptor,
22    /// Tried to create an 'IoVecMut` from a read-only descriptor chain
23    ReadOnlyDescriptor,
24    /// Tried to create an `IoVec` or `IoVecMut` from a descriptor chain that was too large
25    OverflowedDescriptor,
26    /// Tried to push to full IovDeque.
27    IovDequeOverflow,
28    /// Guest memory error: {0}
29    GuestMemory(#[from] GuestMemoryError),
30    /// Error with underlying `IovDeque`: {0}
31    IovDeque(#[from] IovDequeError),
32}
33
34/// This is essentially a wrapper of a `Vec<libc::iovec>` which can be passed to `libc::writev`.
35///
36/// It describes a buffer passed to us by the guest that is scattered across multiple
37/// memory regions. Additionally, this wrapper provides methods that allow reading arbitrary ranges
38/// of data from that buffer.
39#[derive(Debug, Default)]
40pub struct IoVecBuffer {
41    // container of the memory regions included in this IO vector
42    vecs: Vec<iovec>,
43    // Total length of the IoVecBuffer
44    len: u32,
45}
46
47// SAFETY: `IoVecBuffer` doesn't allow for interior mutability and no shared ownership is possible
48// as it doesn't implement clone
49unsafe impl Send for IoVecBuffer {}
50
51impl IoVecBuffer {
52    /// Create an `IoVecBuffer` from a `DescriptorChain`
53    ///
54    /// # Safety
55    ///
56    /// The descriptor chain cannot be referencing the same memory location as another chain
57    pub unsafe fn load_descriptor_chain(
58        &mut self,
59        mem: &GuestMemoryMmap,
60        head: DescriptorChain,
61    ) -> Result<(), IoVecError> {
62        self.clear();
63
64        let mut next_descriptor = Some(head);
65        while let Some(desc) = next_descriptor {
66            if desc.is_write_only() {
67                return Err(IoVecError::WriteOnlyDescriptor);
68            }
69
70            // We use get_slice instead of `get_host_address` here in order to have the whole
71            // range of the descriptor chain checked, i.e. [addr, addr + len) is a valid memory
72            // region in the GuestMemoryMmap.
73            let iov_base = mem
74                .get_slice(desc.addr, desc.len as usize)?
75                .ptr_guard_mut()
76                .as_ptr()
77                .cast::<c_void>();
78            self.vecs.push(iovec {
79                iov_base,
80                iov_len: desc.len as size_t,
81            });
82            self.len = self
83                .len
84                .checked_add(desc.len)
85                .ok_or(IoVecError::OverflowedDescriptor)?;
86
87            next_descriptor = desc.next_descriptor();
88        }
89
90        Ok(())
91    }
92
93    /// Create an `IoVecBuffer` from a `DescriptorChain`
94    ///
95    /// # Safety
96    ///
97    /// The descriptor chain cannot be referencing the same memory location as another chain
98    pub unsafe fn from_descriptor_chain(
99        mem: &GuestMemoryMmap,
100        head: DescriptorChain,
101    ) -> Result<Self, IoVecError> {
102        let mut new_buffer = Self::default();
103        // SAFETY: descriptor chain cannot be referencing the same memory location as another chain
104        unsafe {
105            new_buffer.load_descriptor_chain(mem, head)?;
106        }
107        Ok(new_buffer)
108    }
109
110    /// Get the total length of the memory regions covered by this `IoVecBuffer`
111    pub(crate) fn len(&self) -> u32 {
112        self.len
113    }
114
115    /// Returns a pointer to the memory keeping the `iovec` structs
116    pub fn as_iovec_ptr(&self) -> *const iovec {
117        self.vecs.as_ptr()
118    }
119
120    /// Returns the length of the `iovec` array.
121    pub fn iovec_count(&self) -> usize {
122        self.vecs.len()
123    }
124
125    /// Clears the `iovec` array
126    pub fn clear(&mut self) {
127        self.vecs.clear();
128        self.len = 0u32;
129    }
130
131    /// Reads a number of bytes from the `IoVecBuffer` starting at a given offset.
132    ///
133    /// This will try to fill `buf` reading bytes from the `IoVecBuffer` starting from
134    /// the given offset.
135    ///
136    /// # Returns
137    ///
138    /// `Ok(())` if `buf` was filled by reading from this [`IoVecBuffer`],
139    /// `Err(VolatileMemoryError::PartialBuffer)` if only part of `buf` could not be filled, and
140    /// `Err(VolatileMemoryError::OutOfBounds)` if `offset >= self.len()`.
141    pub fn read_exact_volatile_at(
142        &self,
143        mut buf: &mut [u8],
144        offset: usize,
145    ) -> Result<(), VolatileMemoryError> {
146        if offset < self.len() as usize {
147            let expected = buf.len();
148            let bytes_read = self.read_volatile_at(&mut buf, offset, expected)?;
149
150            if bytes_read != expected {
151                return Err(VolatileMemoryError::PartialBuffer {
152                    expected,
153                    completed: bytes_read,
154                });
155            }
156
157            Ok(())
158        } else {
159            // If `offset` is past size, there's nothing to read.
160            Err(VolatileMemoryError::OutOfBounds { addr: offset })
161        }
162    }
163
164    /// Reads up to `len` bytes from the `IoVecBuffer` starting at the given offset.
165    ///
166    /// This will try to write to the given [`WriteVolatile`].
167    pub fn read_volatile_at<W: WriteVolatile>(
168        &self,
169        dst: &mut W,
170        mut offset: usize,
171        mut len: usize,
172    ) -> Result<usize, VolatileMemoryError> {
173        let mut total_bytes_read = 0;
174
175        for iov in &self.vecs {
176            if len == 0 {
177                break;
178            }
179
180            if offset >= iov.iov_len {
181                offset -= iov.iov_len;
182                continue;
183            }
184
185            let mut slice =
186                // SAFETY: the constructor IoVecBufferMut::from_descriptor_chain ensures that
187                // all iovecs contained point towards valid ranges of guest memory
188                unsafe { VolatileSlice::new(iov.iov_base.cast(), iov.iov_len).offset(offset)? };
189            offset = 0;
190
191            if slice.len() > len {
192                slice = slice.subslice(0, len)?;
193            }
194
195            match loop {
196                match dst.write_volatile(&slice) {
197                    Err(VolatileMemoryError::IOError(err))
198                        if err.kind() == ErrorKind::Interrupted => {}
199                    result => break result,
200                }
201            } {
202                Ok(bytes_read) => {
203                    total_bytes_read += bytes_read;
204
205                    if bytes_read < slice.len() {
206                        break;
207                    }
208                    len -= bytes_read;
209                }
210                // exit successfully if we previously managed to write some bytes
211                Err(_) if total_bytes_read > 0 => break,
212                Err(err) => return Err(err),
213            }
214        }
215
216        Ok(total_bytes_read)
217    }
218}
219
220#[derive(Debug, Clone, Serialize, Deserialize)]
221pub struct ParsedDescriptorChain {
222    pub head_index: u16,
223    pub length: u32,
224    pub nr_iovecs: u16,
225}
226
227/// This is essentially a wrapper of a `Vec<libc::iovec>` which can be passed to `libc::readv`.
228///
229/// It describes a write-only buffer passed to us by the guest that is scattered across multiple
230/// memory regions. Additionally, this wrapper provides methods that allow reading arbitrary ranges
231/// of data from that buffer.
232/// `L` const generic value must be a multiple of 256 as required by the `IovDeque` requirements.
233#[derive(Debug)]
234pub struct IoVecBufferMut<const L: u16 = FIRECRACKER_MAX_QUEUE_SIZE> {
235    // container of the memory regions included in this IO vector
236    pub vecs: IovDeque<L>,
237    // Total length of the IoVecBufferMut
238    // We use `u32` here because we use this type in devices which
239    // should not give us huge buffers. In any case this
240    // value will not overflow as we explicitly check for this case.
241    pub len: u32,
242}
243
244// SAFETY: `IoVecBufferMut` doesn't allow for interior mutability and no shared ownership is
245// possible as it doesn't implement clone
246unsafe impl<const L: u16> Send for IoVecBufferMut<L> {}
247
248impl<const L: u16> IoVecBufferMut<L> {
249    /// Append a `DescriptorChain` in this `IoVecBufferMut`
250    ///
251    /// # Safety
252    ///
253    /// The descriptor chain cannot be referencing the same memory location as another chain
254    pub unsafe fn append_descriptor_chain(
255        &mut self,
256        mem: &GuestMemoryMmap,
257        head: DescriptorChain,
258    ) -> Result<ParsedDescriptorChain, IoVecError> {
259        let head_index = head.index;
260        let mut next_descriptor = Some(head);
261        let mut length = 0u32;
262        let mut nr_iovecs = 0u16;
263        while let Some(desc) = next_descriptor {
264            if !desc.is_write_only() {
265                self.vecs.pop_back(nr_iovecs);
266                return Err(IoVecError::ReadOnlyDescriptor);
267            }
268
269            // We use get_slice instead of `get_host_address` here in order to have the whole
270            // range of the descriptor chain checked, i.e. [addr, addr + len) is a valid memory
271            // region in the GuestMemoryMmap.
272            let slice = mem
273                .get_slice(desc.addr, desc.len as usize)
274                .inspect_err(|_| {
275                    self.vecs.pop_back(nr_iovecs);
276                })?;
277            // We need to mark the area of guest memory that will be mutated through this
278            // IoVecBufferMut as dirty ahead of time, as we loose access to all
279            // vm-memory related information after converting down to iovecs.
280            slice.bitmap().mark_dirty(0, desc.len as usize);
281            let iov_base = slice.ptr_guard_mut().as_ptr().cast::<c_void>();
282
283            if self.vecs.is_full() {
284                self.vecs.pop_back(nr_iovecs);
285                return Err(IoVecError::IovDequeOverflow);
286            }
287
288            self.vecs.push_back(iovec {
289                iov_base,
290                iov_len: desc.len as size_t,
291            });
292
293            nr_iovecs += 1;
294            length = length
295                .checked_add(desc.len)
296                .ok_or(IoVecError::OverflowedDescriptor)
297                .inspect_err(|_| {
298                    self.vecs.pop_back(nr_iovecs);
299                })?;
300
301            next_descriptor = desc.next_descriptor();
302        }
303
304        self.len = self.len.checked_add(length).ok_or_else(|| {
305            self.vecs.pop_back(nr_iovecs);
306            IoVecError::OverflowedDescriptor
307        })?;
308
309        Ok(ParsedDescriptorChain {
310            head_index,
311            length,
312            nr_iovecs,
313        })
314    }
315
316    /// Create an empty `IoVecBufferMut`.
317    pub fn new() -> Result<Self, IovDequeError> {
318        let vecs = IovDeque::new()?;
319        Ok(Self { vecs, len: 0 })
320    }
321
322    /// Create an `IoVecBufferMut` from a `DescriptorChain`
323    ///
324    /// This will clear any previous `iovec` objects in the buffer and load the new
325    /// [`DescriptorChain`].
326    ///
327    /// # Safety
328    ///
329    /// The descriptor chain cannot be referencing the same memory location as another chain
330    pub unsafe fn load_descriptor_chain(
331        &mut self,
332        mem: &GuestMemoryMmap,
333        head: DescriptorChain,
334    ) -> Result<(), IoVecError> {
335        self.clear();
336        // SAFETY: descriptor chain cannot be referencing the same memory location as another chain
337        let _ = unsafe { self.append_descriptor_chain(mem, head)? };
338        Ok(())
339    }
340
341    /// Drop descriptor chain from the `IoVecBufferMut` front
342    ///
343    /// This will drop memory described by the `IoVecBufferMut` from the beginning.
344    pub fn drop_chain_front(&mut self, parse_descriptor: &ParsedDescriptorChain) {
345        self.vecs.pop_front(parse_descriptor.nr_iovecs);
346        self.len -= parse_descriptor.length;
347    }
348
349    /// Drop descriptor chain from the `IoVecBufferMut` back
350    ///
351    /// This will drop memory described by the `IoVecBufferMut` from the beginning.
352    pub fn drop_chain_back(&mut self, parse_descriptor: &ParsedDescriptorChain) {
353        self.vecs.pop_back(parse_descriptor.nr_iovecs);
354        self.len -= parse_descriptor.length;
355    }
356
357    /// Create an `IoVecBuffer` from a `DescriptorChain`
358    ///
359    /// # Safety
360    ///
361    /// The descriptor chain cannot be referencing the same memory location as another chain
362    pub unsafe fn from_descriptor_chain(
363        mem: &GuestMemoryMmap,
364        head: DescriptorChain,
365    ) -> Result<Self, IoVecError> {
366        let mut new_buffer = Self::new()?;
367        // SAFETY: descriptor chain cannot be referencing the same memory location as another chain
368        unsafe {
369            new_buffer.load_descriptor_chain(mem, head)?;
370        }
371        Ok(new_buffer)
372    }
373
374    /// Get the total length of the memory regions covered by this `IoVecBuffer`
375    #[inline(always)]
376    pub fn len(&self) -> u32 {
377        self.len
378    }
379
380    /// Returns true if buffer is empty.
381    #[inline(always)]
382    pub fn is_empty(&self) -> bool {
383        self.len == 0
384    }
385
386    /// Returns a pointer to the memory keeping the `iovec` structs
387    pub fn as_iovec_mut_slice(&mut self) -> &mut [iovec] {
388        self.vecs.as_mut_slice()
389    }
390
391    /// Clears the `iovec` array
392    pub fn clear(&mut self) {
393        self.vecs.clear();
394        self.len = 0;
395    }
396
397    /// Writes a number of bytes into the `IoVecBufferMut` starting at a given offset.
398    ///
399    /// This will try to fill `IoVecBufferMut` writing bytes from the `buf` starting from
400    /// the given offset. It will write as many bytes from `buf` as they fit inside the
401    /// `IoVecBufferMut` starting from `offset`.
402    ///
403    /// # Returns
404    ///
405    /// `Ok(())` if the entire contents of `buf` could be written to this [`IoVecBufferMut`],
406    /// `Err(VolatileMemoryError::PartialBuffer)` if only part of `buf` could be transferred, and
407    /// `Err(VolatileMemoryError::OutOfBounds)` if `offset >= self.len()`.
408    pub fn write_all_volatile_at(
409        &mut self,
410        mut buf: &[u8],
411        offset: usize,
412    ) -> Result<(), VolatileMemoryError> {
413        if offset < self.len() as usize {
414            let expected = buf.len();
415            let bytes_written = self.write_volatile_at(&mut buf, offset, expected)?;
416
417            if bytes_written != expected {
418                return Err(VolatileMemoryError::PartialBuffer {
419                    expected,
420                    completed: bytes_written,
421                });
422            }
423
424            Ok(())
425        } else {
426            // We cannot write past the end of the `IoVecBufferMut`.
427            Err(VolatileMemoryError::OutOfBounds { addr: offset })
428        }
429    }
430
431    /// Writes up to `len` bytes into the `IoVecBuffer` starting at the given offset.
432    ///
433    /// This will try to write to the given [`WriteVolatile`].
434    pub fn write_volatile_at<W: ReadVolatile>(
435        &mut self,
436        src: &mut W,
437        mut offset: usize,
438        mut len: usize,
439    ) -> Result<usize, VolatileMemoryError> {
440        let mut total_bytes_read = 0;
441
442        for iov in self.vecs.as_slice() {
443            if len == 0 {
444                break;
445            }
446
447            if offset >= iov.iov_len {
448                offset -= iov.iov_len;
449                continue;
450            }
451
452            let mut slice =
453                // SAFETY: the constructor IoVecBufferMut::from_descriptor_chain ensures that
454                // all iovecs contained point towards valid ranges of guest memory
455                unsafe { VolatileSlice::new(iov.iov_base.cast(), iov.iov_len).offset(offset)? };
456            offset = 0;
457
458            if slice.len() > len {
459                slice = slice.subslice(0, len)?;
460            }
461
462            match loop {
463                match src.read_volatile(&mut slice) {
464                    Err(VolatileMemoryError::IOError(err))
465                        if err.kind() == ErrorKind::Interrupted => {}
466                    result => break result,
467                }
468            } {
469                Ok(bytes_read) => {
470                    total_bytes_read += bytes_read;
471
472                    if bytes_read < slice.len() {
473                        break;
474                    }
475                    len -= bytes_read;
476                }
477                // exit successfully if we previously managed to read some bytes
478                Err(_) if total_bytes_read > 0 => break,
479                Err(err) => return Err(err),
480            }
481        }
482
483        Ok(total_bytes_read)
484    }
485}
486
487#[cfg(test)]
488#[allow(clippy::cast_possible_truncation)]
489mod tests {
490    use libc::{c_void, iovec};
491    use vm_memory::VolatileMemoryError;
492
493    use super::IoVecBuffer;
494    // Redefine `IoVecBufferMut` with specific length. Otherwise
495    // Rust will not know what to do.
496    type IoVecBufferMutDefault = super::IoVecBufferMut<FIRECRACKER_MAX_QUEUE_SIZE>;
497
498    use crate::devices::virtio::iov_deque::IovDeque;
499    use crate::devices::virtio::queue::{
500        FIRECRACKER_MAX_QUEUE_SIZE, Queue, VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE,
501    };
502    use crate::devices::virtio::test_utils::VirtQueue;
503    use crate::test_utils::multi_region_mem;
504    use crate::vstate::memory::{Bytes, GuestAddress, GuestMemoryMmap};
505
506    impl<'a> From<&'a [u8]> for IoVecBuffer {
507        fn from(buf: &'a [u8]) -> Self {
508            Self {
509                vecs: vec![iovec {
510                    iov_base: buf.as_ptr() as *mut c_void,
511                    iov_len: buf.len(),
512                }],
513                len: buf.len().try_into().unwrap(),
514            }
515        }
516    }
517
518    impl<'a> From<Vec<&'a [u8]>> for IoVecBuffer {
519        fn from(buffer: Vec<&'a [u8]>) -> Self {
520            let mut len = 0_u32;
521            let vecs = buffer
522                .into_iter()
523                .map(|slice| {
524                    len += TryInto::<u32>::try_into(slice.len()).unwrap();
525                    iovec {
526                        iov_base: slice.as_ptr() as *mut c_void,
527                        iov_len: slice.len(),
528                    }
529                })
530                .collect();
531
532            Self { vecs, len }
533        }
534    }
535
536    impl<const L: u16> From<&mut [u8]> for super::IoVecBufferMut<L> {
537        fn from(buf: &mut [u8]) -> Self {
538            let mut vecs = IovDeque::new().unwrap();
539            vecs.push_back(iovec {
540                iov_base: buf.as_mut_ptr().cast::<c_void>(),
541                iov_len: buf.len(),
542            });
543
544            Self {
545                vecs,
546                len: buf.len() as u32,
547            }
548        }
549    }
550
551    impl<const L: u16> From<Vec<&mut [u8]>> for super::IoVecBufferMut<L> {
552        fn from(buffer: Vec<&mut [u8]>) -> Self {
553            let mut len = 0;
554            let mut vecs = IovDeque::new().unwrap();
555            for slice in buffer {
556                len += slice.len() as u32;
557
558                vecs.push_back(iovec {
559                    iov_base: slice.as_ptr() as *mut c_void,
560                    iov_len: slice.len(),
561                });
562            }
563
564            Self { vecs, len }
565        }
566    }
567
568    fn default_mem() -> GuestMemoryMmap {
569        multi_region_mem(&[
570            (GuestAddress(0), 0x10000),
571            (GuestAddress(0x20000), 0x10000),
572            (GuestAddress(0x40000), 0x10000),
573        ])
574    }
575
576    fn chain(m: &GuestMemoryMmap, is_write_only: bool) -> (Queue, VirtQueue<'_>) {
577        let vq = VirtQueue::new(GuestAddress(0), m, 16);
578
579        let mut q = vq.create_queue();
580        q.ready = true;
581
582        let flags = if is_write_only {
583            VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE
584        } else {
585            VIRTQ_DESC_F_NEXT
586        };
587
588        for j in 0..4 {
589            vq.dtable[j as usize].set(0x20000 + 64 * u64::from(j), 64, flags, j + 1);
590        }
591
592        // one chain: (0, 1, 2, 3)
593        vq.dtable[3].flags.set(flags & !VIRTQ_DESC_F_NEXT);
594        vq.avail.ring[0].set(0);
595        vq.avail.idx.set(1);
596
597        (q, vq)
598    }
599
600    fn read_only_chain(mem: &GuestMemoryMmap) -> (Queue, VirtQueue<'_>) {
601        let v: Vec<u8> = (0..=255).collect();
602        mem.write_slice(&v, GuestAddress(0x20000)).unwrap();
603
604        chain(mem, false)
605    }
606
607    fn write_only_chain(mem: &GuestMemoryMmap) -> (Queue, VirtQueue<'_>) {
608        let v = vec![0; 256];
609        mem.write_slice(&v, GuestAddress(0x20000)).unwrap();
610
611        chain(mem, true)
612    }
613
614    #[test]
615    fn test_access_mode() {
616        let mem = default_mem();
617        let (mut q, _) = read_only_chain(&mem);
618        let head = q.pop().unwrap().unwrap();
619        // SAFETY: This descriptor chain is only loaded into one buffer
620        unsafe { IoVecBuffer::from_descriptor_chain(&mem, head).unwrap() };
621
622        let (mut q, _) = write_only_chain(&mem);
623        let head = q.pop().unwrap().unwrap();
624        // SAFETY: This descriptor chain is only loaded into one buffer
625        unsafe { IoVecBuffer::from_descriptor_chain(&mem, head).unwrap_err() };
626
627        let (mut q, _) = read_only_chain(&mem);
628        let head = q.pop().unwrap().unwrap();
629        // SAFETY: This descriptor chain is only loaded into one buffer
630        unsafe { IoVecBufferMutDefault::from_descriptor_chain(&mem, head).unwrap_err() };
631
632        let (mut q, _) = write_only_chain(&mem);
633        let head = q.pop().unwrap().unwrap();
634        // SAFETY: This descriptor chain is only loaded into one buffer
635        unsafe { IoVecBufferMutDefault::from_descriptor_chain(&mem, head).unwrap() };
636    }
637
638    #[test]
639    fn test_iovec_length() {
640        let mem = default_mem();
641        let (mut q, _) = read_only_chain(&mem);
642        let head = q.pop().unwrap().unwrap();
643
644        // SAFETY: This descriptor chain is only loaded once in this test
645        let iovec = unsafe { IoVecBuffer::from_descriptor_chain(&mem, head).unwrap() };
646        assert_eq!(iovec.len(), 4 * 64);
647    }
648
649    #[test]
650    fn test_iovec_mut_length() {
651        let mem = default_mem();
652        let (mut q, _) = write_only_chain(&mem);
653        let head = q.pop().unwrap().unwrap();
654
655        // SAFETY: This descriptor chain is only loaded once in this test
656        let mut iovec =
657            unsafe { IoVecBufferMutDefault::from_descriptor_chain(&mem, head).unwrap() };
658        assert_eq!(iovec.len(), 4 * 64);
659
660        // We are creating a new queue where we can get descriptors from. Probably, this is not
661        // something that we will ever want to do, as `IoVecBufferMut`s are typically
662        // (concpetually) associated with a single `Queue`. We just do this here to be able to test
663        // the appending logic.
664        let (mut q, _) = write_only_chain(&mem);
665        let head = q.pop().unwrap().unwrap();
666        // SAFETY: it is actually unsafe, but we just want to check the length of the
667        // `IoVecBufferMut` after appending.
668        let _ = unsafe { iovec.append_descriptor_chain(&mem, head).unwrap() };
669        assert_eq!(iovec.len(), 8 * 64);
670    }
671
672    #[test]
673    fn test_iovec_read_at() {
674        let mem = default_mem();
675        let (mut q, _) = read_only_chain(&mem);
676        let head = q.pop().unwrap().unwrap();
677
678        // SAFETY: This descriptor chain is only loaded once in this test
679        let iovec = unsafe { IoVecBuffer::from_descriptor_chain(&mem, head).unwrap() };
680
681        let mut buf = vec![0u8; 257];
682        assert_eq!(
683            iovec
684                .read_volatile_at(&mut buf.as_mut_slice(), 0, 257)
685                .unwrap(),
686            256
687        );
688        assert_eq!(buf[0..256], (0..=255).collect::<Vec<_>>());
689        assert_eq!(buf[256], 0);
690
691        let mut buf = vec![0; 5];
692        iovec.read_exact_volatile_at(&mut buf[..4], 0).unwrap();
693        assert_eq!(buf, vec![0u8, 1, 2, 3, 0]);
694
695        iovec.read_exact_volatile_at(&mut buf, 0).unwrap();
696        assert_eq!(buf, vec![0u8, 1, 2, 3, 4]);
697
698        iovec.read_exact_volatile_at(&mut buf, 1).unwrap();
699        assert_eq!(buf, vec![1u8, 2, 3, 4, 5]);
700
701        iovec.read_exact_volatile_at(&mut buf, 60).unwrap();
702        assert_eq!(buf, vec![60u8, 61, 62, 63, 64]);
703
704        assert_eq!(
705            iovec
706                .read_volatile_at(&mut buf.as_mut_slice(), 252, 5)
707                .unwrap(),
708            4
709        );
710        assert_eq!(buf[0..4], vec![252u8, 253, 254, 255]);
711
712        assert!(matches!(
713            iovec.read_exact_volatile_at(&mut buf, 252),
714            Err(VolatileMemoryError::PartialBuffer {
715                expected: 5,
716                completed: 4
717            })
718        ));
719        assert!(matches!(
720            iovec.read_exact_volatile_at(&mut buf, 256),
721            Err(VolatileMemoryError::OutOfBounds { addr: 256 })
722        ));
723    }
724
725    #[test]
726    fn test_iovec_mut_write_at() {
727        let mem = default_mem();
728        let (mut q, vq) = write_only_chain(&mem);
729
730        // This is a descriptor chain with 4 elements 64 bytes long each.
731        let head = q.pop().unwrap().unwrap();
732
733        // SAFETY: This descriptor chain is only loaded into one buffer
734        let mut iovec =
735            unsafe { IoVecBufferMutDefault::from_descriptor_chain(&mem, head).unwrap() };
736        let buf = vec![0u8, 1, 2, 3, 4];
737
738        // One test vector for each part of the chain
739        let mut test_vec1 = vec![0u8; 64];
740        let mut test_vec2 = vec![0u8; 64];
741        let test_vec3 = vec![0u8; 64];
742        let mut test_vec4 = vec![0u8; 64];
743
744        // Control test: Initially all three regions should be zero
745        iovec.write_all_volatile_at(&test_vec1, 0).unwrap();
746        iovec.write_all_volatile_at(&test_vec2, 64).unwrap();
747        iovec.write_all_volatile_at(&test_vec3, 128).unwrap();
748        iovec.write_all_volatile_at(&test_vec4, 192).unwrap();
749        vq.dtable[0].check_data(&test_vec1);
750        vq.dtable[1].check_data(&test_vec2);
751        vq.dtable[2].check_data(&test_vec3);
752        vq.dtable[3].check_data(&test_vec4);
753
754        // Let's initialize test_vec1 with our buffer.
755        test_vec1[..buf.len()].copy_from_slice(&buf);
756        // And write just a part of it
757        iovec.write_all_volatile_at(&buf[..3], 0).unwrap();
758        // Not all 5 bytes from buf should be written in memory,
759        // just 3 of them.
760        vq.dtable[0].check_data(&[0u8, 1, 2, 0, 0]);
761        vq.dtable[1].check_data(&test_vec2);
762        vq.dtable[2].check_data(&test_vec3);
763        vq.dtable[3].check_data(&test_vec4);
764        // But if we write the whole `buf` in memory then all
765        // of it should be observable.
766        iovec.write_all_volatile_at(&buf, 0).unwrap();
767        vq.dtable[0].check_data(&test_vec1);
768        vq.dtable[1].check_data(&test_vec2);
769        vq.dtable[2].check_data(&test_vec3);
770        vq.dtable[3].check_data(&test_vec4);
771
772        // We are now writing with an offset of 1. So, initialize
773        // the corresponding part of `test_vec1`
774        test_vec1[1..buf.len() + 1].copy_from_slice(&buf);
775        iovec.write_all_volatile_at(&buf, 1).unwrap();
776        vq.dtable[0].check_data(&test_vec1);
777        vq.dtable[1].check_data(&test_vec2);
778        vq.dtable[2].check_data(&test_vec3);
779        vq.dtable[3].check_data(&test_vec4);
780
781        // Perform a write that traverses two of the underlying
782        // regions. Writing at offset 60 should write 4 bytes on the
783        // first region and one byte on the second
784        test_vec1[60..64].copy_from_slice(&buf[0..4]);
785        test_vec2[0] = 4;
786        iovec.write_all_volatile_at(&buf, 60).unwrap();
787        vq.dtable[0].check_data(&test_vec1);
788        vq.dtable[1].check_data(&test_vec2);
789        vq.dtable[2].check_data(&test_vec3);
790        vq.dtable[3].check_data(&test_vec4);
791
792        test_vec4[63] = 3;
793        test_vec4[62] = 2;
794        test_vec4[61] = 1;
795        // Now perform a write that does not fit in the buffer. Try writing
796        // 5 bytes at offset 252 (only 4 bytes left).
797        test_vec4[60..64].copy_from_slice(&buf[0..4]);
798        assert_eq!(
799            iovec.write_volatile_at(&mut &*buf, 252, buf.len()).unwrap(),
800            4
801        );
802        vq.dtable[0].check_data(&test_vec1);
803        vq.dtable[1].check_data(&test_vec2);
804        vq.dtable[2].check_data(&test_vec3);
805        vq.dtable[3].check_data(&test_vec4);
806
807        // Trying to add past the end of the buffer should not write anything
808        assert!(matches!(
809            iovec.write_all_volatile_at(&buf, 256),
810            Err(VolatileMemoryError::OutOfBounds { addr: 256 })
811        ));
812        vq.dtable[0].check_data(&test_vec1);
813        vq.dtable[1].check_data(&test_vec2);
814        vq.dtable[2].check_data(&test_vec3);
815        vq.dtable[3].check_data(&test_vec4);
816    }
817}
818
819#[cfg(kani)]
820#[allow(dead_code)] // Avoid warning when using stubs
821mod verification {
822    use std::mem::ManuallyDrop;
823
824    use libc::{c_void, iovec};
825    use vm_memory::VolatileSlice;
826    use vm_memory::bitmap::BitmapSlice;
827
828    use super::IoVecBuffer;
829    use crate::arch::GUEST_PAGE_SIZE;
830    use crate::devices::virtio::iov_deque::IovDeque;
831    // Redefine `IoVecBufferMut` and `IovDeque` with specific length. Otherwise
832    // Rust will not know what to do.
833    type IoVecBufferMutDefault = super::IoVecBufferMut<FIRECRACKER_MAX_QUEUE_SIZE>;
834    type IovDequeDefault = IovDeque<FIRECRACKER_MAX_QUEUE_SIZE>;
835
836    use crate::devices::virtio::queue::FIRECRACKER_MAX_QUEUE_SIZE;
837
838    // Maximum memory size to use for our buffers. For the time being 1KB.
839    const GUEST_MEMORY_SIZE: usize = 1 << 10;
840
841    // Maximum number of descriptors in a chain to use in our proofs. The value is selected upon
842    // experimenting with the execution time. Typically, in our virtio devices we use queues of up
843    // to 256 entries which is the theoretical maximum length of a `DescriptorChain`, but in reality
844    // our code does not make any assumption about the length of the chain, apart from it being
845    // >= 1.
846    const MAX_DESC_LENGTH: usize = 4;
847
848    mod stubs {
849        use super::*;
850
851        /// This is a stub for the `IovDeque::push_back` method.
852        ///
853        /// `IovDeque` relies on a special allocation of two pages of virtual memory, where both of
854        /// these point to the same underlying physical page. This way, the contents of the first
855        /// page of virtual memory are automatically mirrored in the second virtual page. We do
856        /// that in order to always have the elements that are currently in the ring buffer in
857        /// consecutive (virtual) memory.
858        ///
859        /// To build this particular memory layout we create a new `memfd` object, allocate memory
860        /// with `mmap` and call `mmap` again to make sure both pages point to the page allocated
861        /// via the `memfd` object. These ffi calls make kani complain, so here we mock the
862        /// `IovDeque` object memory with a normal memory allocation of two pages worth of data.
863        ///
864        /// This stub helps imitate the effect of mirroring without all the elaborate memory
865        /// allocation trick.
866        pub fn push_back<const L: u16>(deque: &mut IovDeque<L>, iov: iovec) {
867            // This should NEVER happen, since our ring buffer is as big as the maximum queue size.
868            // We also check for the sanity of the VirtIO queues, in queue.rs, which means that if
869            // we ever try to add something in a full ring buffer, there is an internal
870            // bug in the device emulation logic. Panic here because the device is
871            // hopelessly broken.
872            assert!(
873                !deque.is_full(),
874                "The number of `iovec` objects is bigger than the available space"
875            );
876
877            let offset = (deque.start + deque.len) as usize;
878            let mirror = if offset >= L as usize {
879                offset - L as usize
880            } else {
881                offset + L as usize
882            };
883
884            // SAFETY: self.iov is a valid pointer and `self.start + self.len` is within range (we
885            // asserted before that the buffer is not full).
886            unsafe { deque.iov.add(offset).write_volatile(iov) };
887            unsafe { deque.iov.add(mirror).write_volatile(iov) };
888            deque.len += 1;
889        }
890    }
891
892    fn create_iovecs(mem: *mut u8, size: usize, nr_descs: usize) -> (Vec<iovec>, u32) {
893        let mut vecs: Vec<iovec> = Vec::with_capacity(nr_descs);
894        let mut len = 0u32;
895        for _ in 0..nr_descs {
896            // The `IoVecBuffer` constructors ensure that the memory region described by every
897            // `Descriptor` in the chain is a valid, i.e. it is memory with then guest's memory
898            // mmap. The assumption, here, that the last address is within the memory object's
899            // bound substitutes these checks that `IoVecBuffer::new() performs.`
900            let addr: usize = kani::any();
901            let iov_len: usize =
902                kani::any_where(|&len| matches!(addr.checked_add(len), Some(x) if x <= size));
903            let iov_base = unsafe { mem.offset(addr.try_into().unwrap()) } as *mut c_void;
904
905            vecs.push(iovec { iov_base, iov_len });
906            len += u32::try_from(iov_len).unwrap();
907        }
908
909        (vecs, len)
910    }
911
912    impl IoVecBuffer {
913        fn any_of_length(nr_descs: usize) -> Self {
914            // We only read from `IoVecBuffer`, so create here a guest memory object, with arbitrary
915            // contents and size up to GUEST_MEMORY_SIZE.
916            let mut mem = ManuallyDrop::new(kani::vec::exact_vec::<u8, GUEST_MEMORY_SIZE>());
917            let (vecs, len) = create_iovecs(mem.as_mut_ptr(), mem.len(), nr_descs);
918            Self { vecs, len }
919        }
920    }
921
922    fn create_iov_deque() -> IovDequeDefault {
923        // SAFETY: safe because the layout has non-zero size
924        let mem = unsafe {
925            std::alloc::alloc(std::alloc::Layout::from_size_align_unchecked(
926                2 * GUEST_PAGE_SIZE,
927                GUEST_PAGE_SIZE,
928            ))
929        };
930        IovDequeDefault {
931            iov: mem.cast(),
932            start: kani::any_where(|&start| start < FIRECRACKER_MAX_QUEUE_SIZE),
933            len: 0,
934            capacity: FIRECRACKER_MAX_QUEUE_SIZE,
935        }
936    }
937
938    fn create_iovecs_mut(mem: *mut u8, size: usize, nr_descs: usize) -> (IovDequeDefault, u32) {
939        let mut vecs = create_iov_deque();
940        let mut len = 0u32;
941        for _ in 0..nr_descs {
942            // The `IoVecBufferMut` constructors ensure that the memory region described by every
943            // `Descriptor` in the chain is a valid, i.e. it is memory with then guest's memory
944            // mmap. The assumption, here, that the last address is within the memory object's
945            // bound substitutes these checks that `IoVecBufferMut::new() performs.`
946            let addr: usize = kani::any();
947            let iov_len: usize =
948                kani::any_where(|&len| matches!(addr.checked_add(len), Some(x) if x <= size));
949            let iov_base = unsafe { mem.offset(addr.try_into().unwrap()) } as *mut c_void;
950
951            vecs.push_back(iovec { iov_base, iov_len });
952            len += u32::try_from(iov_len).unwrap();
953        }
954
955        (vecs, len)
956    }
957
958    impl IoVecBufferMutDefault {
959        fn any_of_length(nr_descs: usize) -> Self {
960            // We only write into `IoVecBufferMut` objects, so we can simply create a guest memory
961            // object initialized to zeroes, trying to be nice to Kani.
962            let mem = unsafe {
963                std::alloc::alloc_zeroed(std::alloc::Layout::from_size_align_unchecked(
964                    GUEST_MEMORY_SIZE,
965                    16,
966                ))
967            };
968
969            let (vecs, len) = create_iovecs_mut(mem, GUEST_MEMORY_SIZE, nr_descs);
970            Self {
971                vecs,
972                len: len.try_into().unwrap(),
973            }
974        }
975    }
976
977    // A mock for the Read-/WriteVolatile implementation for u8 slices that does
978    // not go through rust-vmm's machinery (which would cause kani get stuck during post processing)
979    struct KaniBuffer<'a>(&'a mut [u8]);
980
981    impl vm_memory::ReadVolatile for KaniBuffer<'_> {
982        fn read_volatile<B: BitmapSlice>(
983            &mut self,
984            buf: &mut VolatileSlice<B>,
985        ) -> Result<usize, vm_memory::VolatileMemoryError> {
986            let count = buf.len().min(self.0.len());
987            unsafe {
988                std::ptr::copy_nonoverlapping(self.0.as_ptr(), buf.ptr_guard_mut().as_ptr(), count);
989            }
990            self.0 = std::mem::take(&mut self.0).split_at_mut(count).1;
991            Ok(count)
992        }
993    }
994
995    impl vm_memory::WriteVolatile for KaniBuffer<'_> {
996        fn write_volatile<B: BitmapSlice>(
997            &mut self,
998            buf: &VolatileSlice<B>,
999        ) -> Result<usize, vm_memory::VolatileMemoryError> {
1000            let count = buf.len().min(self.0.len());
1001            unsafe {
1002                std::ptr::copy_nonoverlapping(
1003                    buf.ptr_guard_mut().as_ptr(),
1004                    self.0.as_mut_ptr(),
1005                    count,
1006                );
1007            }
1008            self.0 = std::mem::take(&mut self.0).split_at_mut(count).1;
1009            Ok(count)
1010        }
1011    }
1012
1013    #[kani::proof]
1014    #[kani::unwind(5)]
1015    #[kani::solver(cadical)]
1016    fn verify_read_from_iovec() {
1017        for nr_descs in 0..MAX_DESC_LENGTH {
1018            let iov = IoVecBuffer::any_of_length(nr_descs);
1019
1020            let mut buf = vec![0; GUEST_MEMORY_SIZE];
1021            let offset: u32 = kani::any();
1022
1023            // We can't really check the contents that the operation here writes into `buf`, because
1024            // our `IoVecBuffer` being completely arbitrary can contain overlapping memory regions,
1025            // so checking the data copied is not exactly trivial.
1026            //
1027            // What we can verify is the bytes that we read out from guest memory:
1028            //    - `buf.len()`, if `offset + buf.len() < iov.len()`;
1029            //    - `iov.len() - offset`, otherwise.
1030            // Furthermore, we know our Read-/WriteVolatile implementation above is infallible, so
1031            // provided that the logic inside read_volatile_at is correct, we should always get
1032            // Ok(...)
1033            assert_eq!(
1034                iov.read_volatile_at(
1035                    &mut KaniBuffer(&mut buf),
1036                    offset as usize,
1037                    GUEST_MEMORY_SIZE
1038                )
1039                .unwrap(),
1040                buf.len().min(iov.len().saturating_sub(offset) as usize)
1041            );
1042        }
1043    }
1044
1045    #[kani::proof]
1046    #[kani::unwind(5)]
1047    #[kani::solver(cadical)]
1048    #[kani::stub(IovDeque::push_back, stubs::push_back)]
1049    fn verify_write_to_iovec() {
1050        for nr_descs in 0..MAX_DESC_LENGTH {
1051            let mut iov_mut = IoVecBufferMutDefault::any_of_length(nr_descs);
1052
1053            let mut buf = kani::vec::any_vec::<u8, GUEST_MEMORY_SIZE>();
1054            let offset: u32 = kani::any();
1055
1056            // We can't really check the contents that the operation here writes into
1057            // `IoVecBufferMut`, because our `IoVecBufferMut` being completely arbitrary
1058            // can contain overlapping memory regions, so checking the data copied is
1059            // not exactly trivial.
1060            //
1061            // What we can verify is the bytes that we write into guest memory:
1062            //    - `buf.len()`, if `offset + buf.len() < iov.len()`;
1063            //    - `iov.len() - offset`, otherwise.
1064            // Furthermore, we know our Read-/WriteVolatile implementation above is infallible, so
1065            // provided that the logic inside write_volatile_at is correct, we should always get
1066            // Ok(...)
1067            assert_eq!(
1068                iov_mut
1069                    .write_volatile_at(
1070                        &mut KaniBuffer(&mut buf),
1071                        offset as usize,
1072                        GUEST_MEMORY_SIZE
1073                    )
1074                    .unwrap(),
1075                buf.len().min(iov_mut.len().saturating_sub(offset) as usize)
1076            );
1077            std::mem::forget(iov_mut.vecs);
1078        }
1079    }
1080}