vmm/vstate/
vm.rs

1// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3//
4// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
5// Use of this source code is governed by a BSD-style license that can be
6// found in the THIRD-PARTY file.
7
8use std::collections::HashMap;
9use std::fs::OpenOptions;
10use std::io::Write;
11use std::os::fd::AsRawFd;
12use std::path::Path;
13use std::sync::atomic::{AtomicU32, Ordering};
14use std::sync::{Arc, Mutex, MutexGuard};
15
16#[cfg(target_arch = "x86_64")]
17use kvm_bindings::KVM_IRQCHIP_IOAPIC;
18use kvm_bindings::{
19    KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI, KVM_MSI_VALID_DEVID, KVMIO, KvmIrqRouting,
20    kvm_irq_routing_entry, kvm_userspace_memory_region,
21};
22use kvm_ioctls::VmFd;
23use log::debug;
24use serde::{Deserialize, Serialize};
25use vmm_sys_util::errno;
26use vmm_sys_util::eventfd::EventFd;
27use vmm_sys_util::ioctl_io_nr;
28
29pub use crate::arch::{ArchVm as Vm, ArchVmError, VmState};
30use crate::arch::{GSI_MSI_END, host_page_size};
31use crate::logger::info;
32use crate::pci::{DeviceRelocation, DeviceRelocationError, PciDevice};
33use crate::persist::CreateSnapshotError;
34use crate::vmm_config::snapshot::SnapshotType;
35use crate::vstate::bus::Bus;
36use crate::vstate::interrupts::{InterruptError, MsixVector, MsixVectorConfig, MsixVectorGroup};
37use crate::vstate::memory::{
38    GuestMemory, GuestMemoryExtension, GuestMemoryMmap, GuestMemoryRegion, GuestMemoryState,
39    GuestRegionMmap, GuestRegionMmapExt, MemoryError,
40};
41use crate::vstate::resources::ResourceAllocator;
42use crate::vstate::vcpu::VcpuError;
43use crate::{DirtyBitmap, Vcpu, mem_size_mib};
44
45ioctl_io_nr!(KVM_RESET_DIRTY_RINGS, KVMIO, 0xc7);
46
47#[derive(Debug, Serialize, Deserialize)]
48/// A struct representing an interrupt line used by some device of the microVM
49pub struct RoutingEntry {
50    entry: kvm_irq_routing_entry,
51    masked: bool,
52}
53
54/// Architecture independent parts of a VM.
55#[derive(Debug)]
56pub struct VmCommon {
57    /// The KVM file descriptor used to access this Vm.
58    pub fd: VmFd,
59    max_memslots: u32,
60    /// The guest memory of this Vm.
61    pub guest_memory: GuestMemoryMmap,
62    next_kvm_slot: AtomicU32,
63    /// Interrupts used by Vm's devices
64    pub interrupts: Mutex<HashMap<u32, RoutingEntry>>,
65    /// Allocator for VM resources
66    pub resource_allocator: Mutex<ResourceAllocator>,
67    /// MMIO bus
68    pub mmio_bus: Arc<Bus>,
69}
70
71/// Errors associated with the wrappers over KVM ioctls.
72/// Needs `rustfmt::skip` to make multiline comments work
73#[rustfmt::skip]
74#[derive(Debug, thiserror::Error, displaydoc::Display)]
75pub enum VmError {
76    /// Cannot set the memory regions: {0}
77    SetUserMemoryRegion(kvm_ioctls::Error),
78    /// Failed to create VM: {0}
79    CreateVm(kvm_ioctls::Error),
80    /// Failed to get KVM's dirty log: {0}
81    GetDirtyLog(kvm_ioctls::Error),
82    /// {0}
83    Arch(#[from] ArchVmError),
84    /// Error during eventfd operations: {0}
85    EventFd(std::io::Error),
86    /// Failed to create vcpu: {0}
87    CreateVcpu(VcpuError),
88    /// The number of configured slots is bigger than the maximum reported by KVM: {0}
89    NotEnoughMemorySlots(u32),
90    /// Failed to add a memory region: {0}
91    InsertRegion(#[from] vm_memory::GuestRegionCollectionError),
92    /// Error calling mincore: {0}
93    Mincore(vmm_sys_util::errno::Error),
94    /// Failed to reset dirty rings: {0}
95    ResetDirtyRings(vmm_sys_util::errno::Error),
96    /// ResourceAllocator error: {0}
97    ResourceAllocator(#[from] vm_allocator::Error),
98    /// MemoryError error: {0}
99    MemoryError(#[from] MemoryError),
100}
101
102/// Contains Vm functions that are usable across CPU architectures
103impl Vm {
104    /// Create a KVM VM
105    pub fn create_common(kvm: &crate::vstate::kvm::Kvm) -> Result<VmCommon, VmError> {
106        // It is known that KVM_CREATE_VM occasionally fails with EINTR on heavily loaded machines
107        // with many VMs.
108        //
109        // The behavior itself that KVM_CREATE_VM can return EINTR is intentional. This is because
110        // the KVM_CREATE_VM path includes mm_take_all_locks() that is CPU intensive and all CPU
111        // intensive syscalls should check for pending signals and return EINTR immediately to allow
112        // userland to remain interactive.
113        // https://lists.nongnu.org/archive/html/qemu-devel/2014-01/msg01740.html
114        //
115        // However, it is empirically confirmed that, even though there is no pending signal,
116        // KVM_CREATE_VM returns EINTR.
117        // https://lore.kernel.org/qemu-devel/8735e0s1zw.wl-maz@kernel.org/
118        //
119        // To mitigate it, QEMU does an infinite retry on EINTR that greatly improves reliabiliy:
120        // - https://github.com/qemu/qemu/commit/94ccff133820552a859c0fb95e33a539e0b90a75
121        // - https://github.com/qemu/qemu/commit/bbde13cd14ad4eec18529ce0bf5876058464e124
122        //
123        // Similarly, we do retries up to 5 times. Although Firecracker clients are also able to
124        // retry, they have to start Firecracker from scratch. Doing retries in Firecracker makes
125        // recovery faster and improves reliability.
126        const MAX_ATTEMPTS: u32 = 5;
127        let mut attempt = 1;
128        let fd = loop {
129            match kvm.fd.create_vm() {
130                Ok(fd) => break fd,
131                Err(e) if e.errno() == libc::EINTR && attempt < MAX_ATTEMPTS => {
132                    info!("Attempt #{attempt} of KVM_CREATE_VM returned EINTR");
133                    // Exponential backoff (1us, 2us, 4us, and 8us => 15us in total)
134                    std::thread::sleep(std::time::Duration::from_micros(2u64.pow(attempt - 1)));
135                }
136                Err(e) => return Err(VmError::CreateVm(e)),
137            }
138
139            attempt += 1;
140        };
141
142        Ok(VmCommon {
143            fd,
144            max_memslots: kvm.max_nr_memslots(),
145            guest_memory: GuestMemoryMmap::default(),
146            next_kvm_slot: AtomicU32::new(0),
147            interrupts: Mutex::new(HashMap::with_capacity(GSI_MSI_END as usize + 1)),
148            resource_allocator: Mutex::new(ResourceAllocator::new()),
149            mmio_bus: Arc::new(Bus::new()),
150        })
151    }
152
153    /// Creates the specified number of [`Vcpu`]s.
154    ///
155    /// The returned [`EventFd`] is written to whenever any of the vcpus exit.
156    pub fn create_vcpus(&mut self, vcpu_count: u8) -> Result<(Vec<Vcpu>, EventFd), VmError> {
157        self.arch_pre_create_vcpus(vcpu_count)?;
158
159        let exit_evt = EventFd::new(libc::EFD_NONBLOCK).map_err(VmError::EventFd)?;
160
161        let mut vcpus = Vec::with_capacity(vcpu_count as usize);
162        for cpu_idx in 0..vcpu_count {
163            let exit_evt = exit_evt.try_clone().map_err(VmError::EventFd)?;
164            let vcpu = Vcpu::new(cpu_idx, self, exit_evt).map_err(VmError::CreateVcpu)?;
165            vcpus.push(vcpu);
166        }
167
168        self.arch_post_create_vcpus(vcpu_count)?;
169
170        Ok((vcpus, exit_evt))
171    }
172
173    /// Reserves the next `slot_cnt` contiguous kvm slot ids and returns the first one
174    pub fn next_kvm_slot(&self, slot_cnt: u32) -> Option<u32> {
175        let next = self
176            .common
177            .next_kvm_slot
178            .fetch_add(slot_cnt, Ordering::Relaxed);
179        if self.common.max_memslots <= next {
180            None
181        } else {
182            Some(next)
183        }
184    }
185
186    pub(crate) fn set_user_memory_region(
187        &self,
188        region: kvm_userspace_memory_region,
189    ) -> Result<(), VmError> {
190        // SAFETY: Safe because the fd is a valid KVM file descriptor.
191        unsafe {
192            self.fd()
193                .set_user_memory_region(region)
194                .map_err(VmError::SetUserMemoryRegion)
195        }
196    }
197
198    fn register_memory_region(&mut self, region: Arc<GuestRegionMmapExt>) -> Result<(), VmError> {
199        let new_guest_memory = self
200            .common
201            .guest_memory
202            .insert_region(Arc::clone(&region))?;
203
204        region
205            .slots()
206            .try_for_each(|(ref slot, plugged)| match plugged {
207                // if the slot is plugged, add it to kvm user memory regions
208                true => self.set_user_memory_region(slot.into()),
209                // if the slot is not plugged, protect accesses to it
210                false => slot.protect(true).map_err(VmError::MemoryError),
211            })?;
212
213        self.common.guest_memory = new_guest_memory;
214
215        Ok(())
216    }
217
218    /// Register a list of new memory regions to this [`Vm`].
219    pub fn register_dram_memory_regions(
220        &mut self,
221        regions: Vec<GuestRegionMmap>,
222    ) -> Result<(), VmError> {
223        for region in regions {
224            let next_slot = self
225                .next_kvm_slot(1)
226                .ok_or(VmError::NotEnoughMemorySlots(self.common.max_memslots))?;
227
228            let arcd_region =
229                Arc::new(GuestRegionMmapExt::dram_from_mmap_region(region, next_slot));
230
231            self.register_memory_region(arcd_region)?
232        }
233
234        Ok(())
235    }
236
237    /// Register a new hotpluggable region to this [`Vm`].
238    pub fn register_hotpluggable_memory_region(
239        &mut self,
240        region: GuestRegionMmap,
241        slot_size: usize,
242    ) -> Result<(), VmError> {
243        // caller should ensure the slot size divides the region length.
244        assert!(region.len().is_multiple_of(slot_size as u64));
245        let slot_cnt = (region.len() / (slot_size as u64))
246            .try_into()
247            .map_err(|_| VmError::NotEnoughMemorySlots(self.common.max_memslots))?;
248        let slot_from = self
249            .next_kvm_slot(slot_cnt)
250            .ok_or(VmError::NotEnoughMemorySlots(self.common.max_memslots))?;
251        let arcd_region = Arc::new(GuestRegionMmapExt::hotpluggable_from_mmap_region(
252            region, slot_from, slot_size,
253        ));
254
255        self.register_memory_region(arcd_region)
256    }
257
258    /// Register a list of new memory regions to this [`Vm`].
259    ///
260    /// Note: regions and state.regions need to be in the same order.
261    pub fn restore_memory_regions(
262        &mut self,
263        regions: Vec<GuestRegionMmap>,
264        state: &GuestMemoryState,
265    ) -> Result<(), VmError> {
266        for (region, state) in regions.into_iter().zip(state.regions.iter()) {
267            let slot_cnt = state
268                .plugged
269                .len()
270                .try_into()
271                .map_err(|_| VmError::NotEnoughMemorySlots(self.common.max_memslots))?;
272
273            let next_slot = self
274                .next_kvm_slot(slot_cnt)
275                .ok_or(VmError::NotEnoughMemorySlots(self.common.max_memslots))?;
276
277            let arcd_region = Arc::new(GuestRegionMmapExt::from_state(region, state, next_slot)?);
278
279            self.register_memory_region(arcd_region)?
280        }
281
282        Ok(())
283    }
284
285    /// Gets a reference to the kvm file descriptor owned by this VM.
286    pub fn fd(&self) -> &VmFd {
287        &self.common.fd
288    }
289
290    /// Gets a reference to this [`Vm`]'s [`GuestMemoryMmap`] object
291    pub fn guest_memory(&self) -> &GuestMemoryMmap {
292        &self.common.guest_memory
293    }
294
295    /// Gets a mutable reference to this [`Vm`]'s [`ResourceAllocator`] object
296    pub fn resource_allocator(&self) -> MutexGuard<'_, ResourceAllocator> {
297        self.common
298            .resource_allocator
299            .lock()
300            .expect("Poisoned lock")
301    }
302
303    /// Resets the KVM dirty bitmap for each of the guest's memory regions.
304    pub fn reset_dirty_bitmap(&self) {
305        self.guest_memory()
306            .iter()
307            .flat_map(|region| region.plugged_slots())
308            .for_each(|mem_slot| {
309                let _ = self.fd().get_dirty_log(mem_slot.slot, mem_slot.slice.len());
310            });
311    }
312
313    /// Resets the KVM dirty ring tracking for all vcpus.
314    pub fn reset_dirty_rings(&self) -> Result<(), VmError> {
315        // SAFETY: Safe because the fd is a valid KVM VM file descriptor.
316        let ret = unsafe { libc::ioctl(self.fd().as_raw_fd(), KVM_RESET_DIRTY_RINGS()) };
317        if ret == 0 {
318            Ok(())
319        } else {
320            Err(VmError::ResetDirtyRings(errno::Error::last()))
321        }
322    }
323
324    /// Retrieves the KVM dirty bitmap for each of the guest's memory regions.
325    pub fn get_dirty_bitmap(&self) -> Result<DirtyBitmap, VmError> {
326        self.guest_memory()
327            .iter()
328            .flat_map(|region| region.plugged_slots())
329            .map(|mem_slot| {
330                let bitmap = match mem_slot.slice.bitmap() {
331                    Some(_) => self
332                        .fd()
333                        .get_dirty_log(mem_slot.slot, mem_slot.slice.len())
334                        .map_err(VmError::GetDirtyLog)?,
335                    None => mincore_bitmap(
336                        mem_slot.slice.ptr_guard_mut().as_ptr(),
337                        mem_slot.slice.len(),
338                    )?,
339                };
340                Ok((mem_slot.slot, bitmap))
341            })
342            .collect()
343    }
344
345    /// Takes a snapshot of the virtual machine running inside the given [`Vmm`] and saves it to
346    /// `mem_file_path`.
347    ///
348    /// If `snapshot_type` is [`SnapshotType::Diff`], and `mem_file_path` exists and is a snapshot
349    /// file of matching size, then the diff snapshot will be directly merged into the existing
350    /// snapshot. Otherwise, existing files are simply overwritten.
351    pub(crate) fn snapshot_memory_to_file(
352        &self,
353        mem_file_path: &Path,
354        snapshot_type: SnapshotType,
355    ) -> Result<(), CreateSnapshotError> {
356        use self::CreateSnapshotError::*;
357
358        // Need to check this here, as we create the file in the line below
359        let file_existed = mem_file_path.exists();
360
361        let mut file = OpenOptions::new()
362            .write(true)
363            .create(true)
364            .truncate(false)
365            .open(mem_file_path)
366            .map_err(|err| MemoryBackingFile("open", err))?;
367
368        // Determine what size our total memory area is.
369        let mem_size_mib = mem_size_mib(self.guest_memory());
370        let expected_size = mem_size_mib * 1024 * 1024;
371
372        if file_existed {
373            let file_size = file
374                .metadata()
375                .map_err(|e| MemoryBackingFile("get_metadata", e))?
376                .len();
377
378            // Here we only truncate the file if the size mismatches.
379            // - For full snapshots, the entire file's contents will be overwritten anyway. We have
380            //   to avoid truncating here to deal with the edge case where it represents the
381            //   snapshot file from which this very microVM was loaded (as modifying the memory file
382            //   would be reflected in the mmap of the file, meaning a truncate operation would zero
383            //   out guest memory, and thus corrupt the VM).
384            // - For diff snapshots, we want to merge the diff layer directly into the file.
385            if file_size != expected_size {
386                file.set_len(0)
387                    .map_err(|err| MemoryBackingFile("truncate", err))?;
388            }
389        }
390
391        // Set the length of the file to the full size of the memory area.
392        file.set_len(expected_size)
393            .map_err(|e| MemoryBackingFile("set_length", e))?;
394
395        match snapshot_type {
396            SnapshotType::Diff => {
397                let dirty_bitmap = self.get_dirty_bitmap()?;
398                self.guest_memory().dump_dirty(&mut file, &dirty_bitmap)?;
399            }
400            SnapshotType::Full => {
401                self.guest_memory().dump(&mut file)?;
402                self.reset_dirty_bitmap();
403                self.guest_memory().reset_dirty();
404            }
405        };
406
407        file.flush()
408            .map_err(|err| MemoryBackingFile("flush", err))?;
409        file.sync_all()
410            .map_err(|err| MemoryBackingFile("sync_all", err))
411    }
412
413    /// Register a device IRQ
414    pub fn register_irq(&self, fd: &EventFd, gsi: u32) -> Result<(), errno::Error> {
415        self.common.fd.register_irqfd(fd, gsi)?;
416
417        let mut entry = kvm_irq_routing_entry {
418            gsi,
419            type_: KVM_IRQ_ROUTING_IRQCHIP,
420            ..Default::default()
421        };
422        #[cfg(target_arch = "x86_64")]
423        {
424            entry.u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC;
425        }
426        #[cfg(target_arch = "aarch64")]
427        {
428            entry.u.irqchip.irqchip = 0;
429        }
430        entry.u.irqchip.pin = gsi;
431
432        self.common
433            .interrupts
434            .lock()
435            .expect("Poisoned lock")
436            .insert(
437                gsi,
438                RoutingEntry {
439                    entry,
440                    masked: false,
441                },
442            );
443        Ok(())
444    }
445
446    /// Register an MSI device interrupt
447    pub fn register_msi(
448        &self,
449        route: &MsixVector,
450        masked: bool,
451        config: MsixVectorConfig,
452    ) -> Result<(), errno::Error> {
453        let mut entry = kvm_irq_routing_entry {
454            gsi: route.gsi,
455            type_: KVM_IRQ_ROUTING_MSI,
456            ..Default::default()
457        };
458        entry.u.msi.address_lo = config.low_addr;
459        entry.u.msi.address_hi = config.high_addr;
460        entry.u.msi.data = config.data;
461
462        if self.common.fd.check_extension(kvm_ioctls::Cap::MsiDevid) {
463            // According to KVM documentation:
464            // https://docs.kernel.org/virt/kvm/api.html#kvm-set-gsi-routing
465            //
466            // if the capability is set, we need to set the flag and provide a valid unique device
467            // ID. "For PCI, this is usually a BDF identifier in the lower 16 bits".
468            //
469            // The layout of `config.devid` is:
470            //
471            // |---- 16 bits ----|-- 8 bits --|-- 5 bits --|-- 3 bits --|
472            // |      segment    |     bus    |   device   |  function  |
473            //
474            // For the time being, we are using a single PCI segment and a single bus per segment
475            // so just passing config.devid should be fine.
476            entry.flags = KVM_MSI_VALID_DEVID;
477            entry.u.msi.__bindgen_anon_1.devid = config.devid;
478        }
479
480        self.common
481            .interrupts
482            .lock()
483            .expect("Poisoned lock")
484            .insert(route.gsi, RoutingEntry { entry, masked });
485
486        Ok(())
487    }
488
489    /// Create a group of MSI-X interrupts
490    pub fn create_msix_group(vm: Arc<Vm>, count: u16) -> Result<MsixVectorGroup, InterruptError> {
491        debug!("Creating new MSI group with {count} vectors");
492        let mut vectors = Vec::with_capacity(count as usize);
493        for gsi in vm
494            .resource_allocator()
495            .allocate_gsi_msi(count as u32)?
496            .iter()
497        {
498            vectors.push(MsixVector::new(*gsi, false)?);
499        }
500
501        Ok(MsixVectorGroup { vm, vectors })
502    }
503
504    /// Set GSI routes to KVM
505    pub fn set_gsi_routes(&self) -> Result<(), InterruptError> {
506        let entries = self.common.interrupts.lock().expect("Poisoned lock");
507        let mut routes = KvmIrqRouting::new(0)?;
508
509        for entry in entries.values() {
510            if entry.masked {
511                continue;
512            }
513            routes.push(entry.entry)?;
514        }
515
516        self.common.fd.set_gsi_routing(&routes)?;
517        Ok(())
518    }
519}
520
521/// Use `mincore(2)` to overapproximate the dirty bitmap for the given memslot. To be used
522/// if a diff snapshot is requested, but dirty page tracking wasn't enabled.
523fn mincore_bitmap(addr: *mut u8, len: usize) -> Result<Vec<u64>, VmError> {
524    // TODO: Once Host 5.10 goes out of support, we can make this more robust and work on
525    // swap-enabled systems, by doing mlock2(MLOCK_ONFAULT)/munlock() in this function (to
526    // force swapped-out pages to get paged in, so that mincore will consider them incore).
527    // However, on AMD (m6a/m7a) 5.10, doing so introduces a 100%/30ms regression to snapshot
528    // creation, even if swap is disabled, so currently it cannot be done.
529
530    // Mincore always works at PAGE_SIZE granularity, even if the VMA we are dealing with
531    // is a hugetlbfs VMA (e.g. to report a single hugepage as "present", mincore will
532    // give us 512 4k markers with the lowest bit set).
533    let page_size = host_page_size();
534    let mut mincore_bitmap = vec![0u8; len / page_size];
535    let mut bitmap = vec![0u64; (len / page_size).div_ceil(64)];
536
537    // SAFETY: The safety invariants of GuestRegionMmap ensure that region.as_ptr() is a valid
538    // userspace mapping of size region.len() bytes. The bitmap has exactly one byte for each
539    // page in this userspace mapping. Note that mincore does not operate on bitmaps like
540    // KVM_MEM_LOG_DIRTY_PAGES, but rather it uses 8 bits per page (e.g. 1 byte), setting the
541    // least significant bit to 1 if the page corresponding to a byte is in core (available in
542    // the page cache and resolvable via just a minor page fault).
543    let r = unsafe { libc::mincore(addr.cast(), len, mincore_bitmap.as_mut_ptr()) };
544
545    if r != 0 {
546        return Err(VmError::Mincore(vmm_sys_util::errno::Error::last()));
547    }
548
549    for (page_idx, b) in mincore_bitmap.iter().enumerate() {
550        bitmap[page_idx / 64] |= (*b as u64 & 0x1) << (page_idx as u64 % 64);
551    }
552
553    Ok(bitmap)
554}
555
556impl DeviceRelocation for Vm {
557    fn move_bar(
558        &self,
559        _old_base: u64,
560        _new_base: u64,
561        _len: u64,
562        _pci_dev: &mut dyn PciDevice,
563    ) -> Result<(), DeviceRelocationError> {
564        Err(DeviceRelocationError::NotSupported)
565    }
566}
567
568#[cfg(test)]
569pub(crate) mod tests {
570    use std::sync::atomic::Ordering;
571
572    use vm_memory::GuestAddress;
573    use vm_memory::mmap::MmapRegionBuilder;
574
575    use super::*;
576    use crate::snapshot::Persist;
577    #[cfg(target_arch = "x86_64")]
578    use crate::snapshot::Snapshot;
579    use crate::test_utils::single_region_mem_raw;
580    use crate::utils::mib_to_bytes;
581    use crate::vstate::kvm::Kvm;
582    use crate::vstate::memory::GuestRegionMmap;
583
584    // Auxiliary function being used throughout the tests.
585    pub(crate) fn setup_vm() -> (Kvm, Vm) {
586        let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
587        let vm = Vm::new(&kvm).expect("Cannot create new vm");
588        (kvm, vm)
589    }
590
591    // Auxiliary function being used throughout the tests.
592    pub(crate) fn setup_vm_with_memory(mem_size: usize) -> (Kvm, Vm) {
593        let (kvm, mut vm) = setup_vm();
594        let gm = single_region_mem_raw(mem_size);
595        vm.register_dram_memory_regions(gm).unwrap();
596        (kvm, vm)
597    }
598
599    #[test]
600    fn test_new() {
601        // Testing with a valid /dev/kvm descriptor.
602        let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
603        Vm::new(&kvm).unwrap();
604    }
605
606    #[test]
607    fn test_register_memory_regions() {
608        let (_, mut vm) = setup_vm();
609
610        // Trying to set a memory region with a size that is not a multiple of GUEST_PAGE_SIZE
611        // will result in error.
612        let gm = single_region_mem_raw(0x10);
613        let res = vm.register_dram_memory_regions(gm);
614        assert_eq!(
615            res.unwrap_err().to_string(),
616            "Cannot set the memory regions: Invalid argument (os error 22)"
617        );
618
619        let gm = single_region_mem_raw(0x1000);
620        let res = vm.register_dram_memory_regions(gm);
621        res.unwrap();
622    }
623
624    #[test]
625    fn test_too_many_regions() {
626        let (kvm, mut vm) = setup_vm();
627        let max_nr_regions = kvm.max_nr_memslots();
628
629        // SAFETY: valid mmap parameters
630        let ptr = unsafe {
631            libc::mmap(
632                std::ptr::null_mut(),
633                0x1000,
634                libc::PROT_READ | libc::PROT_WRITE,
635                libc::MAP_ANONYMOUS | libc::MAP_PRIVATE,
636                -1,
637                0,
638            )
639        };
640
641        assert_ne!(ptr, libc::MAP_FAILED);
642
643        for i in 0..=max_nr_regions {
644            // SAFETY: we assert above that the ptr is valid, and the size matches what we passed to
645            // mmap
646            let region = unsafe {
647                MmapRegionBuilder::new(0x1000)
648                    .with_raw_mmap_pointer(ptr.cast())
649                    .build()
650                    .unwrap()
651            };
652
653            let region = GuestRegionMmap::new(region, GuestAddress(i as u64 * 0x1000)).unwrap();
654
655            let res = vm.register_dram_memory_regions(vec![region]);
656
657            if max_nr_regions <= i {
658                assert!(
659                    matches!(res, Err(VmError::NotEnoughMemorySlots(v)) if v == max_nr_regions),
660                    "{:?} at iteration {}",
661                    res,
662                    i
663                );
664            } else {
665                res.unwrap_or_else(|_| {
666                    panic!(
667                        "to be able to insert more regions in iteration {i} - max_nr_memslots: \
668                         {max_nr_regions} - num_regions: {}",
669                        vm.guest_memory().num_regions()
670                    )
671                });
672            }
673        }
674    }
675
676    #[test]
677    fn test_create_vcpus() {
678        let vcpu_count = 2;
679        let (_, mut vm) = setup_vm_with_memory(mib_to_bytes(128));
680
681        let (vcpu_vec, _) = vm.create_vcpus(vcpu_count).unwrap();
682
683        assert_eq!(vcpu_vec.len(), vcpu_count as usize);
684    }
685
686    fn enable_irqchip(vm: &mut Vm) {
687        #[cfg(target_arch = "x86_64")]
688        vm.setup_irqchip().unwrap();
689        #[cfg(target_arch = "aarch64")]
690        vm.setup_irqchip(1).unwrap();
691    }
692
693    fn create_msix_group(vm: &Arc<Vm>) -> MsixVectorGroup {
694        Vm::create_msix_group(vm.clone(), 4).unwrap()
695    }
696
697    #[test]
698    fn test_msi_vector_group_new() {
699        let (_, vm) = setup_vm_with_memory(mib_to_bytes(128));
700        let vm = Arc::new(vm);
701        let msix_group = create_msix_group(&vm);
702        assert_eq!(msix_group.num_vectors(), 4);
703    }
704
705    #[test]
706    fn test_msi_vector_group_enable_disable() {
707        let (_, mut vm) = setup_vm_with_memory(mib_to_bytes(128));
708        enable_irqchip(&mut vm);
709        let vm = Arc::new(vm);
710        let msix_group = create_msix_group(&vm);
711
712        // Initially all vectors are disabled
713        for route in &msix_group.vectors {
714            assert!(!route.enabled.load(Ordering::Acquire))
715        }
716
717        // Enable works
718        msix_group.enable().unwrap();
719        for route in &msix_group.vectors {
720            assert!(route.enabled.load(Ordering::Acquire));
721        }
722        // Enabling an enabled group doesn't error out
723        msix_group.enable().unwrap();
724
725        // Disable works
726        msix_group.disable().unwrap();
727        for route in &msix_group.vectors {
728            assert!(!route.enabled.load(Ordering::Acquire))
729        }
730        // Disabling a disabled group doesn't error out
731    }
732
733    #[test]
734    fn test_msi_vector_group_trigger() {
735        let (_, mut vm) = setup_vm_with_memory(mib_to_bytes(128));
736        enable_irqchip(&mut vm);
737
738        let vm = Arc::new(vm);
739        let msix_group = create_msix_group(&vm);
740
741        // We can now trigger all vectors
742        for i in 0..4 {
743            msix_group.trigger(i).unwrap()
744        }
745
746        // We can't trigger an invalid vector
747        msix_group.trigger(4).unwrap_err();
748    }
749
750    #[test]
751    fn test_msi_vector_group_notifier() {
752        let (_, vm) = setup_vm_with_memory(mib_to_bytes(128));
753        let vm = Arc::new(vm);
754        let msix_group = create_msix_group(&vm);
755
756        for i in 0..4 {
757            assert!(msix_group.notifier(i).is_some());
758        }
759
760        assert!(msix_group.notifier(4).is_none());
761    }
762
763    #[test]
764    fn test_msi_vector_group_update_invalid_vector() {
765        let (_, mut vm) = setup_vm_with_memory(mib_to_bytes(128));
766        enable_irqchip(&mut vm);
767        let vm = Arc::new(vm);
768        let msix_group = create_msix_group(&vm);
769        let config = MsixVectorConfig {
770            high_addr: 0x42,
771            low_addr: 0x12,
772            data: 0x12,
773            devid: 0xafa,
774        };
775        msix_group.update(0, config, true, true).unwrap();
776        msix_group.update(4, config, true, true).unwrap_err();
777    }
778
779    #[test]
780    fn test_msi_vector_group_update() {
781        let (_, mut vm) = setup_vm_with_memory(mib_to_bytes(128));
782        enable_irqchip(&mut vm);
783        let vm = Arc::new(vm);
784        assert!(vm.common.interrupts.lock().unwrap().is_empty());
785        let msix_group = create_msix_group(&vm);
786
787        // Set some configuration for the vectors. Initially all are masked
788        let mut config = MsixVectorConfig {
789            high_addr: 0x42,
790            low_addr: 0x13,
791            data: 0x12,
792            devid: 0xafa,
793        };
794        for i in 0..4 {
795            config.data = 0x12 * i;
796            msix_group.update(i as usize, config, true, false).unwrap();
797        }
798
799        // All vectors should be disabled
800        for vector in &msix_group.vectors {
801            assert!(!vector.enabled.load(Ordering::Acquire));
802        }
803
804        for i in 0..4 {
805            let gsi = crate::arch::GSI_MSI_START + i;
806            let interrupts = vm.common.interrupts.lock().unwrap();
807            let kvm_route = interrupts.get(&gsi).unwrap();
808            assert!(kvm_route.masked);
809            assert_eq!(kvm_route.entry.gsi, gsi);
810            assert_eq!(kvm_route.entry.type_, KVM_IRQ_ROUTING_MSI);
811            // SAFETY: because we know we setup MSI routes.
812            unsafe {
813                assert_eq!(kvm_route.entry.u.msi.address_hi, 0x42);
814                assert_eq!(kvm_route.entry.u.msi.address_lo, 0x13);
815                assert_eq!(kvm_route.entry.u.msi.data, 0x12 * i);
816            }
817        }
818
819        // Simply enabling the vectors should not update the registered IRQ routes
820        msix_group.enable().unwrap();
821        for i in 0..4 {
822            let gsi = crate::arch::GSI_MSI_START + i;
823            let interrupts = vm.common.interrupts.lock().unwrap();
824            let kvm_route = interrupts.get(&gsi).unwrap();
825            assert!(kvm_route.masked);
826            assert_eq!(kvm_route.entry.gsi, gsi);
827            assert_eq!(kvm_route.entry.type_, KVM_IRQ_ROUTING_MSI);
828            // SAFETY: because we know we setup MSI routes.
829            unsafe {
830                assert_eq!(kvm_route.entry.u.msi.address_hi, 0x42);
831                assert_eq!(kvm_route.entry.u.msi.address_lo, 0x13);
832                assert_eq!(kvm_route.entry.u.msi.data, 0x12 * i);
833            }
834        }
835
836        // Updating the config of a vector should enable its route (and only its route)
837        config.data = 0;
838        msix_group.update(0, config, false, true).unwrap();
839        for i in 0..4 {
840            let gsi = crate::arch::GSI_MSI_START + i;
841            let interrupts = vm.common.interrupts.lock().unwrap();
842            let kvm_route = interrupts.get(&gsi).unwrap();
843            assert_eq!(kvm_route.masked, i != 0);
844            assert_eq!(kvm_route.entry.gsi, gsi);
845            assert_eq!(kvm_route.entry.type_, KVM_IRQ_ROUTING_MSI);
846            // SAFETY: because we know we setup MSI routes.
847            unsafe {
848                assert_eq!(kvm_route.entry.u.msi.address_hi, 0x42);
849                assert_eq!(kvm_route.entry.u.msi.address_lo, 0x13);
850                assert_eq!(kvm_route.entry.u.msi.data, 0x12 * i);
851            }
852        }
853    }
854
855    #[test]
856    fn test_msi_vector_group_persistence() {
857        let (_, mut vm) = setup_vm_with_memory(mib_to_bytes(128));
858        enable_irqchip(&mut vm);
859        let vm = Arc::new(vm);
860        let msix_group = create_msix_group(&vm);
861
862        msix_group.enable().unwrap();
863        let state = msix_group.save();
864        let restored_group = MsixVectorGroup::restore(vm, &state).unwrap();
865
866        assert_eq!(msix_group.num_vectors(), restored_group.num_vectors());
867        // Even if an MSI group is enabled, we don't save it as such. During restoration, the PCI
868        // transport will make sure the correct config is set for the vectors and enable them
869        // accordingly.
870        for (id, vector) in msix_group.vectors.iter().enumerate() {
871            let new_vector = &restored_group.vectors[id];
872            assert_eq!(vector.gsi, new_vector.gsi);
873            assert!(!new_vector.enabled.load(Ordering::Acquire));
874        }
875    }
876
877    #[cfg(target_arch = "x86_64")]
878    #[test]
879    fn test_restore_state_resource_allocator() {
880        use vm_allocator::AllocPolicy;
881
882        let mut snapshot_data = vec![0u8; 10000];
883        let (_, mut vm) = setup_vm_with_memory(0x1000);
884        vm.setup_irqchip().unwrap();
885
886        // Allocate a GSI and some memory and make sure they are still allocated after restore
887        let (gsi, range) = {
888            let mut resource_allocator = vm.resource_allocator();
889
890            let gsi = resource_allocator.allocate_gsi_msi(1).unwrap()[0];
891            let range = resource_allocator
892                .allocate_32bit_mmio_memory(1024, 1024, AllocPolicy::FirstMatch)
893                .unwrap();
894            (gsi, range)
895        };
896
897        let state = vm.save_state().unwrap();
898        Snapshot::new(state)
899            .save(&mut snapshot_data.as_mut_slice())
900            .unwrap();
901
902        let restored_state: VmState = Snapshot::load_without_crc_check(snapshot_data.as_slice())
903            .unwrap()
904            .data;
905        vm.restore_state(&restored_state).unwrap();
906
907        let mut resource_allocator = vm.resource_allocator();
908        let gsi_new = resource_allocator.allocate_gsi_msi(1).unwrap()[0];
909        assert_eq!(gsi + 1, gsi_new);
910
911        resource_allocator
912            .allocate_32bit_mmio_memory(1024, 1024, AllocPolicy::ExactMatch(range))
913            .unwrap_err();
914        let range_new = resource_allocator
915            .allocate_32bit_mmio_memory(1024, 1024, AllocPolicy::FirstMatch)
916            .unwrap();
917        assert_eq!(range + 1024, range_new);
918    }
919}