vmm/
persist.rs

1// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//! Defines state structures for saving/restoring a Firecracker microVM.
5
6use std::fmt::Debug;
7use std::fs::{File, OpenOptions};
8use std::io::{self, Write};
9use std::mem::forget;
10use std::os::unix::io::AsRawFd;
11use std::os::unix::net::UnixStream;
12use std::path::Path;
13use std::sync::{Arc, Mutex};
14
15use semver::Version;
16use serde::{Deserialize, Serialize};
17use userfaultfd::{FeatureFlags, Uffd, UffdBuilder};
18use vmm_sys_util::sock_ctrl_msg::ScmSocket;
19
20#[cfg(target_arch = "aarch64")]
21use crate::arch::aarch64::vcpu::get_manufacturer_id_from_host;
22use crate::builder::{self, BuildMicrovmFromSnapshotError};
23use crate::cpu_config::templates::StaticCpuTemplate;
24#[cfg(target_arch = "x86_64")]
25use crate::cpu_config::x86_64::cpuid::CpuidTrait;
26#[cfg(target_arch = "x86_64")]
27use crate::cpu_config::x86_64::cpuid::common::get_vendor_id_from_host;
28use crate::device_manager::{DevicePersistError, DevicesState};
29use crate::logger::{info, warn};
30use crate::resources::VmResources;
31use crate::seccomp::BpfThreadMap;
32use crate::snapshot::Snapshot;
33use crate::utils::u64_to_usize;
34use crate::vmm_config::boot_source::BootSourceConfig;
35use crate::vmm_config::instance_info::InstanceInfo;
36use crate::vmm_config::machine_config::{HugePageConfig, MachineConfigError, MachineConfigUpdate};
37use crate::vmm_config::snapshot::{CreateSnapshotParams, LoadSnapshotParams, MemBackendType};
38use crate::vstate::kvm::KvmState;
39use crate::vstate::memory::{
40    self, GuestMemoryState, GuestRegionMmap, GuestRegionType, MemoryError,
41};
42use crate::vstate::vcpu::{VcpuSendEventError, VcpuState};
43use crate::vstate::vm::{VmError, VmState};
44use crate::{EventManager, Vmm, vstate};
45
46/// Holds information related to the VM that is not part of VmState.
47#[derive(Clone, Debug, Default, Deserialize, PartialEq, Eq, Serialize)]
48pub struct VmInfo {
49    /// Guest memory size.
50    pub mem_size_mib: u64,
51    /// smt information
52    pub smt: bool,
53    /// CPU template type
54    pub cpu_template: StaticCpuTemplate,
55    /// Boot source information.
56    pub boot_source: BootSourceConfig,
57    /// Huge page configuration
58    pub huge_pages: HugePageConfig,
59    /// Nested virtualization enablement
60    pub enable_nested_virt: bool,
61}
62
63impl From<&VmResources> for VmInfo {
64    fn from(value: &VmResources) -> Self {
65        Self {
66            mem_size_mib: value.machine_config.mem_size_mib as u64,
67            smt: value.machine_config.smt,
68            cpu_template: StaticCpuTemplate::from(&value.machine_config.cpu_template),
69            boot_source: value.boot_source.config.clone(),
70            huge_pages: value.machine_config.huge_pages,
71            enable_nested_virt: value.machine_config.enable_nested_virt,
72        }
73    }
74}
75
76/// Contains the necessary state for saving/restoring a microVM.
77#[derive(Debug, Default, Serialize, Deserialize)]
78pub struct MicrovmState {
79    /// Miscellaneous VM info.
80    pub vm_info: VmInfo,
81    /// KVM KVM state.
82    pub kvm_state: KvmState,
83    /// VM KVM state.
84    pub vm_state: VmState,
85    /// Vcpu states.
86    pub vcpu_states: Vec<VcpuState>,
87    /// Device states.
88    pub device_states: DevicesState,
89}
90
91/// This describes the mapping between Firecracker base virtual address and
92/// offset in the buffer or file backend for a guest memory region. It is used
93/// to tell an external process/thread where to populate the guest memory data
94/// for this range.
95///
96/// E.g. Guest memory contents for a region of `size` bytes can be found in the
97/// backend at `offset` bytes from the beginning, and should be copied/populated
98/// into `base_host_address`.
99#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
100pub struct GuestRegionUffdMapping {
101    /// Base host virtual address where the guest memory contents for this
102    /// region should be copied/populated.
103    pub base_host_virt_addr: u64,
104    /// Region size.
105    pub size: usize,
106    /// Offset in the backend file/buffer where the region contents are.
107    pub offset: u64,
108    /// The configured page size for this memory region.
109    pub page_size: usize,
110    /// The configured page size **in bytes** for this memory region. The name is
111    /// wrong but cannot be changed due to being API, so this field is deprecated,
112    /// to be removed in 2.0.
113    #[deprecated]
114    pub page_size_kib: usize,
115}
116
117/// Errors related to saving and restoring Microvm state.
118#[derive(Debug, thiserror::Error, displaydoc::Display)]
119pub enum MicrovmStateError {
120    /// Operation not allowed: {0}
121    NotAllowed(String),
122    /// Cannot restore devices: {0}
123    RestoreDevices(#[from] DevicePersistError),
124    /// Cannot save Vcpu state: {0}
125    SaveVcpuState(vstate::vcpu::VcpuError),
126    /// Cannot save Vm state: {0}
127    SaveVmState(vstate::vm::ArchVmError),
128    /// Cannot signal Vcpu: {0}
129    SignalVcpu(VcpuSendEventError),
130    /// Vcpu is in unexpected state.
131    UnexpectedVcpuResponse,
132}
133
134/// Errors associated with creating a snapshot.
135#[rustfmt::skip]
136#[derive(Debug, thiserror::Error, displaydoc::Display)]
137pub enum CreateSnapshotError {
138    /// Cannot get dirty bitmap: {0}
139    DirtyBitmap(#[from] VmError),
140    /// Cannot write memory file: {0}
141    Memory(#[from] MemoryError),
142    /// Cannot perform {0} on the memory backing file: {1}
143    MemoryBackingFile(&'static str, io::Error),
144    /// Cannot save the microVM state: {0}
145    MicrovmState(MicrovmStateError),
146    /// Cannot serialize the microVM state: {0}
147    SerializeMicrovmState(#[from] crate::snapshot::SnapshotError),
148    /// Cannot perform {0} on the snapshot backing file: {1}
149    SnapshotBackingFile(&'static str, io::Error),
150}
151
152/// Snapshot version
153pub const SNAPSHOT_VERSION: Version = Version::new(8, 0, 0);
154
155/// Creates a Microvm snapshot.
156pub fn create_snapshot(
157    vmm: &mut Vmm,
158    vm_info: &VmInfo,
159    params: &CreateSnapshotParams,
160) -> Result<(), CreateSnapshotError> {
161    let microvm_state = vmm
162        .save_state(vm_info)
163        .map_err(CreateSnapshotError::MicrovmState)?;
164
165    snapshot_state_to_file(&microvm_state, &params.snapshot_path)?;
166
167    vmm.vm
168        .snapshot_memory_to_file(&params.mem_file_path, params.snapshot_type)?;
169
170    // We need to mark queues as dirty again for all activated devices. The reason we
171    // do it here is that we don't mark pages as dirty during runtime
172    // for queue objects.
173    vmm.device_manager
174        .mark_virtio_queue_memory_dirty(vmm.vm.guest_memory());
175
176    Ok(())
177}
178
179fn snapshot_state_to_file(
180    microvm_state: &MicrovmState,
181    snapshot_path: &Path,
182) -> Result<(), CreateSnapshotError> {
183    use self::CreateSnapshotError::*;
184    let mut snapshot_file = OpenOptions::new()
185        .create(true)
186        .write(true)
187        .truncate(true)
188        .open(snapshot_path)
189        .map_err(|err| SnapshotBackingFile("open", err))?;
190
191    let snapshot = Snapshot::new(microvm_state);
192    snapshot.save(&mut snapshot_file)?;
193    snapshot_file
194        .flush()
195        .map_err(|err| SnapshotBackingFile("flush", err))?;
196    snapshot_file
197        .sync_all()
198        .map_err(|err| SnapshotBackingFile("sync_all", err))
199}
200
201/// Validates that snapshot CPU vendor matches the host CPU vendor.
202///
203/// # Errors
204///
205/// When:
206/// - Failed to read host vendor.
207/// - Failed to read snapshot vendor.
208#[cfg(target_arch = "x86_64")]
209pub fn validate_cpu_vendor(microvm_state: &MicrovmState) {
210    let host_vendor_id = get_vendor_id_from_host();
211    let snapshot_vendor_id = microvm_state.vcpu_states[0].cpuid.vendor_id();
212    match (host_vendor_id, snapshot_vendor_id) {
213        (Ok(host_id), Some(snapshot_id)) => {
214            info!("Host CPU vendor ID: {host_id:?}");
215            info!("Snapshot CPU vendor ID: {snapshot_id:?}");
216            if host_id != snapshot_id {
217                warn!("Host CPU vendor ID differs from the snapshotted one",);
218            }
219        }
220        (Ok(host_id), None) => {
221            info!("Host CPU vendor ID: {host_id:?}");
222            warn!("Snapshot CPU vendor ID: couldn't get from the snapshot");
223        }
224        (Err(_), Some(snapshot_id)) => {
225            warn!("Host CPU vendor ID: couldn't get from the host");
226            info!("Snapshot CPU vendor ID: {snapshot_id:?}");
227        }
228        (Err(_), None) => {
229            warn!("Host CPU vendor ID: couldn't get from the host");
230            warn!("Snapshot CPU vendor ID: couldn't get from the snapshot");
231        }
232    }
233}
234
235/// Validate that Snapshot Manufacturer ID matches
236/// the one from the Host
237///
238/// The manufacturer ID for the Snapshot is taken from each VCPU state.
239/// # Errors
240///
241/// When:
242/// - Failed to read host vendor.
243/// - Failed to read snapshot vendor.
244#[cfg(target_arch = "aarch64")]
245pub fn validate_cpu_manufacturer_id(microvm_state: &MicrovmState) {
246    let host_cpu_id = get_manufacturer_id_from_host();
247    let snapshot_cpu_id = microvm_state.vcpu_states[0].regs.manifacturer_id();
248    match (host_cpu_id, snapshot_cpu_id) {
249        (Some(host_id), Some(snapshot_id)) => {
250            info!("Host CPU manufacturer ID: {host_id:?}");
251            info!("Snapshot CPU manufacturer ID: {snapshot_id:?}");
252            if host_id != snapshot_id {
253                warn!("Host CPU manufacturer ID differs from the snapshotted one",);
254            }
255        }
256        (Some(host_id), None) => {
257            info!("Host CPU manufacturer ID: {host_id:?}");
258            warn!("Snapshot CPU manufacturer ID: couldn't get from the snapshot");
259        }
260        (None, Some(snapshot_id)) => {
261            warn!("Host CPU manufacturer ID: couldn't get from the host");
262            info!("Snapshot CPU manufacturer ID: {snapshot_id:?}");
263        }
264        (None, None) => {
265            warn!("Host CPU manufacturer ID: couldn't get from the host");
266            warn!("Snapshot CPU manufacturer ID: couldn't get from the snapshot");
267        }
268    }
269}
270/// Error type for [`snapshot_state_sanity_check`].
271#[derive(Debug, thiserror::Error, displaydoc::Display, PartialEq, Eq)]
272pub enum SnapShotStateSanityCheckError {
273    /// No memory region defined.
274    NoMemory,
275    /// No DRAM memory region defined.
276    NoDramMemory,
277    /// DRAM memory has more than a single slot.
278    DramMemoryTooManySlots,
279    /// DRAM memory is unplugged.
280    DramMemoryUnplugged,
281}
282
283/// Performs sanity checks against the state file and returns specific errors.
284pub fn snapshot_state_sanity_check(
285    microvm_state: &MicrovmState,
286) -> Result<(), SnapShotStateSanityCheckError> {
287    // Check that the snapshot contains at least 1 mem region, that at least one is Dram,
288    // and that Dram region contains a single plugged slot.
289    // Upper bound check will be done when creating guest memory by comparing against
290    // KVM max supported value kvm_context.max_memslots().
291    let regions = &microvm_state.vm_state.memory.regions;
292
293    if regions.is_empty() {
294        return Err(SnapShotStateSanityCheckError::NoMemory);
295    }
296
297    if !regions
298        .iter()
299        .any(|r| r.region_type == GuestRegionType::Dram)
300    {
301        return Err(SnapShotStateSanityCheckError::NoDramMemory);
302    }
303
304    for dram_region in regions
305        .iter()
306        .filter(|r| r.region_type == GuestRegionType::Dram)
307    {
308        if dram_region.plugged.len() != 1 {
309            return Err(SnapShotStateSanityCheckError::DramMemoryTooManySlots);
310        }
311
312        if !dram_region.plugged[0] {
313            return Err(SnapShotStateSanityCheckError::DramMemoryUnplugged);
314        }
315    }
316
317    #[cfg(target_arch = "x86_64")]
318    validate_cpu_vendor(microvm_state);
319    #[cfg(target_arch = "aarch64")]
320    validate_cpu_manufacturer_id(microvm_state);
321
322    Ok(())
323}
324
325/// Error type for [`restore_from_snapshot`].
326#[derive(Debug, thiserror::Error, displaydoc::Display)]
327pub enum RestoreFromSnapshotError {
328    /// Failed to get snapshot state from file: {0}
329    File(#[from] SnapshotStateFromFileError),
330    /// Invalid snapshot state: {0}
331    Invalid(#[from] SnapShotStateSanityCheckError),
332    /// Failed to load guest memory: {0}
333    GuestMemory(#[from] RestoreFromSnapshotGuestMemoryError),
334    /// Failed to build microVM from snapshot: {0}
335    Build(#[from] BuildMicrovmFromSnapshotError),
336}
337/// Sub-Error type for [`restore_from_snapshot`] to contain either [`GuestMemoryFromFileError`] or
338/// [`GuestMemoryFromUffdError`] within [`RestoreFromSnapshotError`].
339#[derive(Debug, thiserror::Error, displaydoc::Display)]
340pub enum RestoreFromSnapshotGuestMemoryError {
341    /// Error creating guest memory from file: {0}
342    File(#[from] GuestMemoryFromFileError),
343    /// Error creating guest memory from uffd: {0}
344    Uffd(#[from] GuestMemoryFromUffdError),
345}
346
347/// Loads a Microvm snapshot producing a 'paused' Microvm.
348pub fn restore_from_snapshot(
349    instance_info: &InstanceInfo,
350    event_manager: &mut EventManager,
351    seccomp_filters: &BpfThreadMap,
352    params: &LoadSnapshotParams,
353    vm_resources: &mut VmResources,
354) -> Result<Arc<Mutex<Vmm>>, RestoreFromSnapshotError> {
355    let mut microvm_state = snapshot_state_from_file(&params.snapshot_path)?;
356    for entry in &params.network_overrides {
357        microvm_state
358            .device_states
359            .mmio_state
360            .net_devices
361            .iter_mut()
362            .map(|device| &mut device.device_state)
363            .chain(
364                microvm_state
365                    .device_states
366                    .pci_state
367                    .net_devices
368                    .iter_mut()
369                    .map(|device| &mut device.device_state),
370            )
371            .find(|x| x.id == entry.iface_id)
372            .map(|device_state| device_state.tap_if_name.clone_from(&entry.host_dev_name))
373            .ok_or(SnapshotStateFromFileError::UnknownNetworkDevice)?;
374    }
375    let track_dirty_pages = params.track_dirty_pages;
376
377    let vcpu_count = microvm_state
378        .vcpu_states
379        .len()
380        .try_into()
381        .map_err(|_| MachineConfigError::InvalidVcpuCount)
382        .map_err(BuildMicrovmFromSnapshotError::VmUpdateConfig)?;
383
384    vm_resources
385        .update_machine_config(&MachineConfigUpdate {
386            vcpu_count: Some(vcpu_count),
387            mem_size_mib: Some(u64_to_usize(microvm_state.vm_info.mem_size_mib)),
388            smt: Some(microvm_state.vm_info.smt),
389            cpu_template: Some(microvm_state.vm_info.cpu_template),
390            track_dirty_pages: Some(track_dirty_pages),
391            huge_pages: Some(microvm_state.vm_info.huge_pages),
392            enable_nested_virt: Some(microvm_state.vm_info.enable_nested_virt),
393            #[cfg(feature = "gdb")]
394            gdb_socket_path: None,
395        })
396        .map_err(BuildMicrovmFromSnapshotError::VmUpdateConfig)?;
397
398    // Some sanity checks before building the microvm.
399    snapshot_state_sanity_check(&microvm_state)?;
400
401    let mem_backend_path = &params.mem_backend.backend_path;
402    let mem_state = &microvm_state.vm_state.memory;
403
404    let (guest_memory, uffd) = match params.mem_backend.backend_type {
405        MemBackendType::File => {
406            if vm_resources.machine_config.huge_pages.is_hugetlbfs() {
407                return Err(RestoreFromSnapshotGuestMemoryError::File(
408                    GuestMemoryFromFileError::HugetlbfsSnapshot,
409                )
410                .into());
411            }
412            (
413                guest_memory_from_file(mem_backend_path, mem_state, track_dirty_pages)
414                    .map_err(RestoreFromSnapshotGuestMemoryError::File)?,
415                None,
416            )
417        }
418        MemBackendType::Uffd => guest_memory_from_uffd(
419            mem_backend_path,
420            mem_state,
421            track_dirty_pages,
422            vm_resources.machine_config.huge_pages,
423        )
424        .map_err(RestoreFromSnapshotGuestMemoryError::Uffd)?,
425    };
426    builder::build_microvm_from_snapshot(
427        instance_info,
428        event_manager,
429        microvm_state,
430        guest_memory,
431        uffd,
432        seccomp_filters,
433        vm_resources,
434    )
435    .map_err(RestoreFromSnapshotError::Build)
436}
437
438/// Error type for [`snapshot_state_from_file`]
439#[derive(Debug, thiserror::Error, displaydoc::Display)]
440pub enum SnapshotStateFromFileError {
441    /// Failed to open snapshot file: {0}
442    Open(#[from] std::io::Error),
443    /// Failed to load snapshot state from file: {0}
444    Load(#[from] crate::snapshot::SnapshotError),
445    /// Unknown Network Device.
446    UnknownNetworkDevice,
447}
448
449fn snapshot_state_from_file(
450    snapshot_path: &Path,
451) -> Result<MicrovmState, SnapshotStateFromFileError> {
452    let mut snapshot_reader = File::open(snapshot_path)?;
453    let snapshot = Snapshot::load(&mut snapshot_reader)?;
454
455    Ok(snapshot.data)
456}
457
458/// Error type for [`guest_memory_from_file`].
459#[derive(Debug, thiserror::Error, displaydoc::Display)]
460pub enum GuestMemoryFromFileError {
461    /// Failed to load guest memory: {0}
462    File(#[from] std::io::Error),
463    /// Failed to restore guest memory: {0}
464    Restore(#[from] MemoryError),
465    /// Cannot restore hugetlbfs backed snapshot by mapping the memory file. Please use uffd.
466    HugetlbfsSnapshot,
467}
468
469fn guest_memory_from_file(
470    mem_file_path: &Path,
471    mem_state: &GuestMemoryState,
472    track_dirty_pages: bool,
473) -> Result<Vec<GuestRegionMmap>, GuestMemoryFromFileError> {
474    let mem_file = File::open(mem_file_path)?;
475    let guest_mem = memory::snapshot_file(mem_file, mem_state.regions(), track_dirty_pages)?;
476    Ok(guest_mem)
477}
478
479/// Error type for [`guest_memory_from_uffd`]
480#[derive(Debug, thiserror::Error, displaydoc::Display)]
481pub enum GuestMemoryFromUffdError {
482    /// Failed to restore guest memory: {0}
483    Restore(#[from] MemoryError),
484    /// Failed to UFFD object: {0}
485    Create(userfaultfd::Error),
486    /// Failed to register memory address range with the userfaultfd object: {0}
487    Register(userfaultfd::Error),
488    /// Failed to connect to UDS Unix stream: {0}
489    Connect(#[from] std::io::Error),
490    /// Failed to sends file descriptor: {0}
491    Send(#[from] vmm_sys_util::errno::Error),
492}
493
494fn guest_memory_from_uffd(
495    mem_uds_path: &Path,
496    mem_state: &GuestMemoryState,
497    track_dirty_pages: bool,
498    huge_pages: HugePageConfig,
499) -> Result<(Vec<GuestRegionMmap>, Option<Uffd>), GuestMemoryFromUffdError> {
500    let (guest_memory, backend_mappings) =
501        create_guest_memory(mem_state, track_dirty_pages, huge_pages)?;
502
503    let mut uffd_builder = UffdBuilder::new();
504
505    // We only make use of this if balloon devices are present, but we can enable it unconditionally
506    // because the only place the kernel checks this is in a hook from madvise, e.g. it doesn't
507    // actively change the behavior of UFFD, only passively. Without balloon devices
508    // we never call madvise anyway, so no need to put this into a conditional.
509    uffd_builder.require_features(FeatureFlags::EVENT_REMOVE);
510
511    let uffd = uffd_builder
512        .close_on_exec(true)
513        .non_blocking(true)
514        .user_mode_only(false)
515        .create()
516        .map_err(GuestMemoryFromUffdError::Create)?;
517
518    for mem_region in guest_memory.iter() {
519        uffd.register(mem_region.as_ptr().cast(), mem_region.size() as _)
520            .map_err(GuestMemoryFromUffdError::Register)?;
521    }
522
523    send_uffd_handshake(mem_uds_path, &backend_mappings, &uffd)?;
524
525    Ok((guest_memory, Some(uffd)))
526}
527
528fn create_guest_memory(
529    mem_state: &GuestMemoryState,
530    track_dirty_pages: bool,
531    huge_pages: HugePageConfig,
532) -> Result<(Vec<GuestRegionMmap>, Vec<GuestRegionUffdMapping>), GuestMemoryFromUffdError> {
533    let guest_memory = memory::anonymous(mem_state.regions(), track_dirty_pages, huge_pages)?;
534    let mut backend_mappings = Vec::with_capacity(guest_memory.len());
535    let mut offset = 0;
536    for mem_region in guest_memory.iter() {
537        #[allow(deprecated)]
538        backend_mappings.push(GuestRegionUffdMapping {
539            base_host_virt_addr: mem_region.as_ptr() as u64,
540            size: mem_region.size(),
541            offset,
542            page_size: huge_pages.page_size(),
543            page_size_kib: huge_pages.page_size(),
544        });
545        offset += mem_region.size() as u64;
546    }
547
548    Ok((guest_memory, backend_mappings))
549}
550
551fn send_uffd_handshake(
552    mem_uds_path: &Path,
553    backend_mappings: &[GuestRegionUffdMapping],
554    uffd: &impl AsRawFd,
555) -> Result<(), GuestMemoryFromUffdError> {
556    // This is safe to unwrap() because we control the contents of the vector
557    // (i.e GuestRegionUffdMapping entries).
558    let backend_mappings = serde_json::to_string(backend_mappings).unwrap();
559
560    let socket = UnixStream::connect(mem_uds_path)?;
561    socket.send_with_fd(
562        backend_mappings.as_bytes(),
563        // In the happy case we can close the fd since the other process has it open and is
564        // using it to serve us pages.
565        //
566        // The problem is that if other process crashes/exits, firecracker guest memory
567        // will simply revert to anon-mem behavior which would lead to silent errors and
568        // undefined behavior.
569        //
570        // To tackle this scenario, the page fault handler can notify Firecracker of any
571        // crashes/exits. There is no need for Firecracker to explicitly send its process ID.
572        // The external process can obtain Firecracker's PID by calling `getsockopt` with
573        // `libc::SO_PEERCRED` option like so:
574        //
575        // let mut val = libc::ucred { pid: 0, gid: 0, uid: 0 };
576        // let mut ucred_size: u32 = mem::size_of::<libc::ucred>() as u32;
577        // libc::getsockopt(
578        //      socket.as_raw_fd(),
579        //      libc::SOL_SOCKET,
580        //      libc::SO_PEERCRED,
581        //      &mut val as *mut _ as *mut _,
582        //      &mut ucred_size as *mut libc::socklen_t,
583        // );
584        //
585        // Per this linux man page: https://man7.org/linux/man-pages/man7/unix.7.html,
586        // `SO_PEERCRED` returns the credentials (PID, UID and GID) of the peer process
587        // connected to this socket. The returned credentials are those that were in effect
588        // at the time of the `connect` call.
589        //
590        // Moreover, Firecracker holds a copy of the UFFD fd as well, so that even if the
591        // page fault handler process does not tear down Firecracker when necessary, the
592        // uffd will still be alive but with no one to serve faults, leading to guest freeze.
593        uffd.as_raw_fd(),
594    )?;
595
596    // We prevent Rust from closing the socket file descriptor to avoid a potential race condition
597    // between the mappings message and the connection shutdown. If the latter arrives at the UFFD
598    // handler first, the handler never sees the mappings.
599    forget(socket);
600
601    Ok(())
602}
603
604#[cfg(test)]
605mod tests {
606    use std::os::unix::net::UnixListener;
607
608    use vmm_sys_util::tempfile::TempFile;
609
610    use super::*;
611    use crate::Vmm;
612    #[cfg(target_arch = "x86_64")]
613    use crate::builder::tests::insert_vmclock_device;
614    #[cfg(target_arch = "x86_64")]
615    use crate::builder::tests::insert_vmgenid_device;
616    use crate::builder::tests::{
617        CustomBlockConfig, default_kernel_cmdline, default_vmm, insert_balloon_device,
618        insert_block_devices, insert_net_device, insert_vsock_device,
619    };
620    #[cfg(target_arch = "aarch64")]
621    use crate::construct_kvm_mpidrs;
622    use crate::devices::virtio::block::CacheType;
623    use crate::snapshot::Persist;
624    use crate::vmm_config::balloon::BalloonDeviceConfig;
625    use crate::vmm_config::net::NetworkInterfaceConfig;
626    use crate::vmm_config::vsock::tests::default_config;
627    use crate::vstate::memory::{GuestMemoryRegionState, GuestRegionType};
628
629    fn default_vmm_with_devices() -> Vmm {
630        let mut event_manager = EventManager::new().expect("Cannot create EventManager");
631        let mut vmm = default_vmm();
632        let mut cmdline = default_kernel_cmdline();
633
634        // Add a balloon device.
635        let balloon_config = BalloonDeviceConfig {
636            amount_mib: 0,
637            deflate_on_oom: false,
638            stats_polling_interval_s: 0,
639            free_page_hinting: false,
640            free_page_reporting: false,
641        };
642        insert_balloon_device(&mut vmm, &mut cmdline, &mut event_manager, balloon_config);
643
644        // Add a block device.
645        let drive_id = String::from("root");
646        let block_configs = vec![CustomBlockConfig::new(
647            drive_id,
648            true,
649            None,
650            true,
651            CacheType::Unsafe,
652        )];
653        insert_block_devices(&mut vmm, &mut cmdline, &mut event_manager, block_configs);
654
655        // Add net device.
656        let network_interface = NetworkInterfaceConfig {
657            iface_id: String::from("netif"),
658            host_dev_name: String::from("hostname"),
659            guest_mac: None,
660            rx_rate_limiter: None,
661            tx_rate_limiter: None,
662        };
663        insert_net_device(
664            &mut vmm,
665            &mut cmdline,
666            &mut event_manager,
667            network_interface,
668        );
669
670        // Add vsock device.
671        let mut tmp_sock_file = TempFile::new().unwrap();
672        tmp_sock_file.remove().unwrap();
673        let vsock_config = default_config(&tmp_sock_file);
674
675        insert_vsock_device(&mut vmm, &mut cmdline, &mut event_manager, vsock_config);
676
677        #[cfg(target_arch = "x86_64")]
678        insert_vmgenid_device(&mut vmm);
679        #[cfg(target_arch = "x86_64")]
680        insert_vmclock_device(&mut vmm);
681
682        vmm
683    }
684
685    #[test]
686    fn test_microvm_state_snapshot() {
687        let vmm = default_vmm_with_devices();
688        let states = vmm.device_manager.save();
689
690        // Only checking that all devices are saved, actual device state
691        // is tested by that device's tests.
692        assert_eq!(states.mmio_state.block_devices.len(), 1);
693        assert_eq!(states.mmio_state.net_devices.len(), 1);
694        assert!(states.mmio_state.vsock_device.is_some());
695        assert!(states.mmio_state.balloon_device.is_some());
696
697        let vcpu_states = vec![VcpuState::default()];
698        #[cfg(target_arch = "aarch64")]
699        let mpidrs = construct_kvm_mpidrs(&vcpu_states);
700        let microvm_state = MicrovmState {
701            device_states: states,
702            vcpu_states,
703            kvm_state: Default::default(),
704            vm_info: VmInfo {
705                mem_size_mib: 1u64,
706                ..Default::default()
707            },
708            #[cfg(target_arch = "aarch64")]
709            vm_state: vmm.vm.save_state(&mpidrs).unwrap(),
710            #[cfg(target_arch = "x86_64")]
711            vm_state: vmm.vm.save_state().unwrap(),
712        };
713
714        let mut buf = vec![0; 10000];
715        Snapshot::new(&microvm_state)
716            .save(&mut buf.as_mut_slice())
717            .unwrap();
718
719        let restored_microvm_state: MicrovmState = Snapshot::load_without_crc_check(buf.as_slice())
720            .unwrap()
721            .data;
722
723        assert_eq!(restored_microvm_state.vm_info, microvm_state.vm_info);
724        assert_eq!(
725            restored_microvm_state.device_states.mmio_state,
726            microvm_state.device_states.mmio_state
727        )
728    }
729
730    #[test]
731    fn test_create_guest_memory() {
732        let mem_state = GuestMemoryState {
733            regions: vec![GuestMemoryRegionState {
734                base_address: 0,
735                size: 0x20000,
736                region_type: GuestRegionType::Dram,
737                plugged: vec![true],
738            }],
739        };
740
741        let (_, uffd_regions) =
742            create_guest_memory(&mem_state, false, HugePageConfig::None).unwrap();
743
744        assert_eq!(uffd_regions.len(), 1);
745        assert_eq!(uffd_regions[0].size, 0x20000);
746        assert_eq!(uffd_regions[0].offset, 0);
747        assert_eq!(uffd_regions[0].page_size, HugePageConfig::None.page_size());
748    }
749
750    #[test]
751    fn test_send_uffd_handshake() {
752        #[allow(deprecated)]
753        let uffd_regions = vec![
754            GuestRegionUffdMapping {
755                base_host_virt_addr: 0,
756                size: 0x100000,
757                offset: 0,
758                page_size: HugePageConfig::None.page_size(),
759                page_size_kib: HugePageConfig::None.page_size(),
760            },
761            GuestRegionUffdMapping {
762                base_host_virt_addr: 0x100000,
763                size: 0x200000,
764                offset: 0,
765                page_size: HugePageConfig::Hugetlbfs2M.page_size(),
766                page_size_kib: HugePageConfig::Hugetlbfs2M.page_size(),
767            },
768        ];
769
770        let uds_path = TempFile::new().unwrap();
771        let uds_path = uds_path.as_path();
772        std::fs::remove_file(uds_path).unwrap();
773
774        let listener = UnixListener::bind(uds_path).expect("Cannot bind to socket path");
775
776        send_uffd_handshake(uds_path, &uffd_regions, &std::io::stdin()).unwrap();
777
778        let (stream, _) = listener.accept().expect("Cannot listen on UDS socket");
779
780        let mut message_buf = vec![0u8; 1024];
781        let (bytes_read, _) = stream
782            .recv_with_fd(&mut message_buf[..])
783            .expect("Cannot recv_with_fd");
784        message_buf.resize(bytes_read, 0);
785
786        let deserialized: Vec<GuestRegionUffdMapping> =
787            serde_json::from_slice(&message_buf).unwrap();
788
789        assert_eq!(uffd_regions, deserialized);
790    }
791}