vmm/
lib.rs

1// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3//
4// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
5// Use of this source code is governed by a BSD-style license that can be
6// found in the THIRD-PARTY file.
7
8//! Virtual Machine Monitor that leverages the Linux Kernel-based Virtual Machine (KVM),
9//! and other virtualization features to run a single lightweight micro-virtual
10//! machine (microVM).
11#![allow(missing_docs)]
12#![warn(clippy::undocumented_unsafe_blocks)]
13#![allow(clippy::blanket_clippy_restriction_lints)]
14
15/// Implements platform specific functionality.
16/// Supported platforms: x86_64 and aarch64.
17pub mod arch;
18
19/// High-level interface over Linux io_uring.
20///
21/// Aims to provide an easy-to-use interface, while making some Firecracker-specific simplifying
22/// assumptions. The crate does not currently aim at supporting all io_uring features and use
23/// cases. For example, it only works with pre-registered fds and read/write/fsync requests.
24///
25/// Requires at least kernel version 5.10.51.
26/// For more information on io_uring, refer to the man pages.
27/// [This pdf](https://kernel.dk/io_uring.pdf) is also very useful, though outdated at times.
28pub mod io_uring;
29
30/// # Rate Limiter
31///
32/// Provides a rate limiter written in Rust useful for IO operations that need to
33/// be throttled.
34///
35/// ## Behavior
36///
37/// The rate limiter starts off as 'unblocked' with two token buckets configured
38/// with the values passed in the `RateLimiter::new()` constructor.
39/// All subsequent accounting is done independently for each token bucket based
40/// on the `TokenType` used. If any of the buckets runs out of budget, the limiter
41/// goes in the 'blocked' state. At this point an internal timer is set up which
42/// will later 'wake up' the user in order to retry sending data. The 'wake up'
43/// notification will be dispatched as an event on the FD provided by the `AsRawFD`
44/// trait implementation.
45///
46/// The contract is that the user shall also call the `event_handler()` method on
47/// receipt of such an event.
48///
49/// The token buckets are replenished when a called `consume()` doesn't find enough
50/// tokens in the bucket. The amount of tokens replenished is automatically calculated
51/// to respect the `complete_refill_time` configuration parameter provided by the user.
52/// The token buckets will never replenish above their respective `size`.
53///
54/// Each token bucket can start off with a `one_time_burst` initial extra capacity
55/// on top of their `size`. This initial extra credit does not replenish and
56/// can be used for an initial burst of data.
57///
58/// The granularity for 'wake up' events when the rate limiter is blocked is
59/// currently hardcoded to `100 milliseconds`.
60///
61/// ## Limitations
62///
63/// This rate limiter implementation relies on the *Linux kernel's timerfd* so its
64/// usage is limited to Linux systems.
65///
66/// Another particularity of this implementation is that it is not self-driving.
67/// It is meant to be used in an external event loop and thus implements the `AsRawFd`
68/// trait and provides an *event-handler* as part of its API. This *event-handler*
69/// needs to be called by the user on every event on the rate limiter's `AsRawFd` FD.
70pub mod rate_limiter;
71
72/// Module for handling ACPI tables.
73/// Currently, we only use ACPI on x86 microVMs.
74#[cfg(target_arch = "x86_64")]
75pub mod acpi;
76/// Handles setup and initialization a `Vmm` object.
77pub mod builder;
78/// Types for guest configuration.
79pub mod cpu_config;
80pub mod device_manager;
81/// Emulates virtual and hardware devices.
82#[allow(missing_docs)]
83pub mod devices;
84/// minimalist HTTP/TCP/IPv4 stack named DUMBO
85pub mod dumbo;
86/// Support for GDB debugging the guest
87#[cfg(feature = "gdb")]
88pub mod gdb;
89/// Logger
90pub mod logger;
91/// microVM Metadata Service MMDS
92pub mod mmds;
93/// PCI specific emulation code.
94pub mod pci;
95/// Save/restore utilities.
96pub mod persist;
97/// Resource store for configured microVM resources.
98pub mod resources;
99/// microVM RPC API adapters.
100pub mod rpc_interface;
101/// Seccomp filter utilities.
102pub mod seccomp;
103/// Signal handling utilities.
104pub mod signal_handler;
105/// Serialization and deserialization facilities
106pub mod snapshot;
107/// Utility functions for integration and benchmark testing
108pub mod test_utils;
109/// Utility functions and struct
110pub mod utils;
111/// Wrappers over structures used to configure the VMM.
112pub mod vmm_config;
113/// Module with virtual state structs.
114pub mod vstate;
115
116/// Module with initrd.
117pub mod initrd;
118
119use std::collections::HashMap;
120use std::io;
121use std::os::unix::io::AsRawFd;
122use std::sync::mpsc::RecvTimeoutError;
123use std::sync::{Arc, Barrier, Mutex};
124use std::time::Duration;
125
126use device_manager::DeviceManager;
127use event_manager::{EventManager as BaseEventManager, EventOps, Events, MutEventSubscriber};
128use seccomp::BpfProgram;
129use snapshot::Persist;
130use userfaultfd::Uffd;
131use vmm_sys_util::epoll::EventSet;
132use vmm_sys_util::eventfd::EventFd;
133use vmm_sys_util::terminal::Terminal;
134use vstate::kvm::Kvm;
135use vstate::vcpu::{self, StartThreadedError, VcpuSendEventError};
136
137use crate::cpu_config::templates::CpuConfiguration;
138use crate::devices::virtio::balloon::device::{HintingStatus, StartHintingCmd};
139use crate::devices::virtio::balloon::{
140    BALLOON_DEV_ID, Balloon, BalloonConfig, BalloonError, BalloonStats,
141};
142use crate::devices::virtio::block::BlockError;
143use crate::devices::virtio::block::device::Block;
144use crate::devices::virtio::mem::{VIRTIO_MEM_DEV_ID, VirtioMem, VirtioMemError, VirtioMemStatus};
145use crate::devices::virtio::net::Net;
146use crate::logger::{METRICS, MetricsError, error, info, warn};
147use crate::persist::{MicrovmState, MicrovmStateError, VmInfo};
148use crate::rate_limiter::BucketUpdate;
149use crate::vmm_config::instance_info::{InstanceInfo, VmState};
150use crate::vstate::memory::{GuestMemory, GuestMemoryMmap, GuestMemoryRegion};
151use crate::vstate::vcpu::VcpuState;
152pub use crate::vstate::vcpu::{Vcpu, VcpuConfig, VcpuEvent, VcpuHandle, VcpuResponse};
153pub use crate::vstate::vm::Vm;
154
155/// Shorthand type for the EventManager flavour used by Firecracker.
156pub type EventManager = BaseEventManager<Arc<Mutex<dyn MutEventSubscriber>>>;
157
158// Since the exit code names e.g. `SIGBUS` are most appropriate yet trigger a test error with the
159// clippy lint `upper_case_acronyms` we have disabled this lint for this enum.
160/// Vmm exit-code type.
161#[allow(clippy::upper_case_acronyms)]
162#[derive(Debug, Clone, Copy, PartialEq, Eq)]
163pub enum FcExitCode {
164    /// Success exit code.
165    Ok = 0,
166    /// Generic error exit code.
167    GenericError = 1,
168    /// Generic exit code error; not possible to occur if the program logic is sound.
169    UnexpectedError = 2,
170    /// Firecracker was shut down after intercepting a restricted system call.
171    BadSyscall = 148,
172    /// Firecracker was shut down after intercepting `SIGBUS`.
173    SIGBUS = 149,
174    /// Firecracker was shut down after intercepting `SIGSEGV`.
175    SIGSEGV = 150,
176    /// Firecracker was shut down after intercepting `SIGXFSZ`.
177    SIGXFSZ = 151,
178    /// Firecracker was shut down after intercepting `SIGXCPU`.
179    SIGXCPU = 154,
180    /// Firecracker was shut down after intercepting `SIGPIPE`.
181    SIGPIPE = 155,
182    /// Firecracker was shut down after intercepting `SIGHUP`.
183    SIGHUP = 156,
184    /// Firecracker was shut down after intercepting `SIGILL`.
185    SIGILL = 157,
186    /// Bad configuration for microvm's resources, when using a single json.
187    BadConfiguration = 152,
188    /// Command line arguments parsing error.
189    ArgParsing = 153,
190}
191
192/// Timeout used in recv_timeout, when waiting for a vcpu response on
193/// Pause/Resume/Save/Restore. A high enough limit that should not be reached during normal usage,
194/// used to detect a potential vcpu deadlock.
195pub const RECV_TIMEOUT_SEC: Duration = Duration::from_secs(30);
196
197/// Default byte limit of accepted http requests on API and MMDS servers.
198pub const HTTP_MAX_PAYLOAD_SIZE: usize = 51200;
199
200/// Errors associated with the VMM internal logic. These errors cannot be generated by direct user
201/// input, but can result from bad configuration of the host (for example if Firecracker doesn't
202/// have permissions to open the KVM fd).
203#[derive(Debug, thiserror::Error, displaydoc::Display)]
204pub enum VmmError {
205    #[cfg(target_arch = "aarch64")]
206    /// Invalid command line error.
207    Cmdline,
208    /// Device manager error: {0}
209    DeviceManager(#[from] device_manager::DeviceManagerCreateError),
210    /// MMIO Device manager error: {0}
211    MmioDeviceManager(device_manager::mmio::MmioError),
212    /// Error getting the KVM dirty bitmap. {0}
213    DirtyBitmap(kvm_ioctls::Error),
214    /// I8042 error: {0}
215    I8042Error(devices::legacy::I8042DeviceError),
216    #[cfg(target_arch = "x86_64")]
217    /// Cannot add devices to the legacy I/O Bus. {0}
218    LegacyIOBus(device_manager::legacy::LegacyDeviceError),
219    /// Metrics error: {0}
220    Metrics(MetricsError),
221    /// Cannot add a device to the MMIO Bus. {0}
222    RegisterMMIODevice(device_manager::mmio::MmioError),
223    /// Cannot install seccomp filters: {0}
224    SeccompFilters(seccomp::InstallationError),
225    /// Error writing to the serial console: {0}
226    Serial(io::Error),
227    /// Error creating timer fd: {0}
228    TimerFd(io::Error),
229    /// Error creating the vcpu: {0}
230    VcpuCreate(vstate::vcpu::VcpuError),
231    /// Cannot send event to vCPU. {0}
232    VcpuEvent(vstate::vcpu::VcpuError),
233    /// Cannot create a vCPU handle. {0}
234    VcpuHandle(vstate::vcpu::VcpuError),
235    /// Failed to start vCPUs
236    VcpuStart(StartVcpusError),
237    /// Failed to pause the vCPUs.
238    VcpuPause,
239    /// Failed to exit the vCPUs.
240    VcpuExit,
241    /// Failed to resume the vCPUs.
242    VcpuResume,
243    /// Failed to message the vCPUs.
244    VcpuMessage,
245    /// Cannot spawn Vcpu thread: {0}
246    VcpuSpawn(io::Error),
247    /// Vm error: {0}
248    Vm(#[from] vstate::vm::VmError),
249    /// Kvm error: {0}
250    Kvm(#[from] vstate::kvm::KvmError),
251    /// Failed perform action on device: {0}
252    FindDeviceError(#[from] device_manager::FindDeviceError),
253    /// Block: {0}
254    Block(#[from] BlockError),
255    /// Balloon: {0}
256    Balloon(#[from] BalloonError),
257    /// Failed to create memory hotplug device: {0}
258    VirtioMem(#[from] VirtioMemError),
259}
260
261/// Shorthand type for KVM dirty page bitmap.
262pub type DirtyBitmap = HashMap<u32, Vec<u64>>;
263
264/// Returns the size of guest memory, in MiB.
265pub(crate) fn mem_size_mib(guest_memory: &GuestMemoryMmap) -> u64 {
266    guest_memory.iter().map(|region| region.len()).sum::<u64>() >> 20
267}
268
269// Error type for [`Vmm::emulate_serial_init`].
270/// Emulate serial init error: {0}
271#[derive(Debug, thiserror::Error, displaydoc::Display)]
272pub struct EmulateSerialInitError(#[from] std::io::Error);
273
274/// Error type for [`Vmm::start_vcpus`].
275#[derive(Debug, thiserror::Error, displaydoc::Display)]
276pub enum StartVcpusError {
277    /// VMM observer init error: {0}
278    VmmObserverInit(#[from] vmm_sys_util::errno::Error),
279    /// Vcpu handle error: {0}
280    VcpuHandle(#[from] StartThreadedError),
281}
282
283/// Error type for [`Vmm::dump_cpu_config()`]
284#[derive(Debug, thiserror::Error, displaydoc::Display)]
285pub enum DumpCpuConfigError {
286    /// Failed to send event to vcpu thread: {0}
287    SendEvent(#[from] VcpuSendEventError),
288    /// Got unexpected response from vcpu thread.
289    UnexpectedResponse,
290    /// Failed to dump CPU config: {0}
291    DumpCpuConfig(#[from] vcpu::VcpuError),
292    /// Operation not allowed: {0}
293    NotAllowed(String),
294}
295
296/// Contains the state and associated methods required for the Firecracker VMM.
297#[derive(Debug)]
298pub struct Vmm {
299    /// The [`InstanceInfo`] state of this [`Vmm`].
300    pub instance_info: InstanceInfo,
301    shutdown_exit_code: Option<FcExitCode>,
302
303    // Guest VM core resources.
304    kvm: Kvm,
305    /// VM object
306    pub vm: Arc<Vm>,
307    // Save UFFD in order to keep it open in the Firecracker process, as well.
308    #[allow(unused)]
309    uffd: Option<Uffd>,
310    /// Handles to the vcpu threads with vcpu_fds inside them.
311    pub vcpus_handles: Vec<VcpuHandle>,
312    // Used by Vcpus and devices to initiate teardown; Vmm should never write here.
313    vcpus_exit_evt: EventFd,
314    // Device manager
315    pub device_manager: DeviceManager,
316}
317
318impl Vmm {
319    /// Gets Vmm version.
320    pub fn version(&self) -> String {
321        self.instance_info.vmm_version.clone()
322    }
323
324    /// Gets Vmm instance info.
325    pub fn instance_info(&self) -> InstanceInfo {
326        self.instance_info.clone()
327    }
328
329    /// Provides access to the underlying KVM handle.
330    pub fn kvm(&self) -> &Kvm {
331        &self.kvm
332    }
333
334    /// Provides the Vmm shutdown exit code if there is one.
335    pub fn shutdown_exit_code(&self) -> Option<FcExitCode> {
336        self.shutdown_exit_code
337    }
338
339    /// Clears any shutdown exit code and returns the VM to a paused state.
340    pub fn clear_shutdown_exit_code(&mut self) {
341        self.shutdown_exit_code = None;
342        self.instance_info.state = VmState::Paused;
343    }
344
345    /// Starts the microVM vcpus.
346    ///
347    /// # Errors
348    ///
349    /// When:
350    /// - [`vmm::VmmEventsObserver::on_vmm_boot`] errors.
351    /// - [`vmm::vstate::vcpu::Vcpu::start_threaded`] errors.
352    pub fn start_vcpus(
353        &mut self,
354        mut vcpus: Vec<Vcpu>,
355        vcpu_seccomp_filter: Arc<BpfProgram>,
356    ) -> Result<(), StartVcpusError> {
357        let vcpu_count = vcpus.len();
358        let barrier = Arc::new(Barrier::new(vcpu_count + 1));
359
360        let stdin = std::io::stdin().lock();
361        // Set raw mode for stdin.
362        stdin.set_raw_mode().inspect_err(|&err| {
363            warn!("Cannot set raw mode for the terminal. {:?}", err);
364        })?;
365
366        // Set non blocking stdin.
367        stdin.set_non_block(true).inspect_err(|&err| {
368            warn!("Cannot set non block for the terminal. {:?}", err);
369        })?;
370
371        self.vcpus_handles.reserve(vcpu_count);
372
373        for mut vcpu in vcpus.drain(..) {
374            vcpu.set_mmio_bus(self.vm.common.mmio_bus.clone());
375            #[cfg(target_arch = "x86_64")]
376            vcpu.kvm_vcpu.set_pio_bus(self.vm.pio_bus.clone());
377
378            self.vcpus_handles.push(vcpu.start_threaded(
379                &self.vm,
380                vcpu_seccomp_filter.clone(),
381                barrier.clone(),
382            )?);
383        }
384        self.instance_info.state = VmState::Paused;
385        // Wait for vCPUs to initialize their TLS before moving forward.
386        barrier.wait();
387
388        Ok(())
389    }
390
391    /// Sends a resume command to the vCPUs.
392    pub fn resume_vm(&mut self) -> Result<(), VmmError> {
393        self.device_manager.kick_virtio_devices();
394
395        // Send the events.
396        self.vcpus_handles
397            .iter_mut()
398            .try_for_each(|handle| handle.send_event(VcpuEvent::Resume))
399            .map_err(|_| VmmError::VcpuMessage)?;
400
401        // Check the responses.
402        if self
403            .vcpus_handles
404            .iter()
405            .map(|handle| handle.response_receiver().recv_timeout(RECV_TIMEOUT_SEC))
406            .any(|response| !matches!(response, Ok(VcpuResponse::Resumed)))
407        {
408            return Err(VmmError::VcpuMessage);
409        }
410
411        self.instance_info.state = VmState::Running;
412        Ok(())
413    }
414
415    /// Sends a pause command to the vCPUs.
416    pub fn pause_vm(&mut self) -> Result<(), VmmError> {
417        // Send the events.
418        self.vcpus_handles
419            .iter_mut()
420            .try_for_each(|handle| handle.send_event(VcpuEvent::Pause))
421            .map_err(|_| VmmError::VcpuMessage)?;
422
423        // Check the responses.
424        if self
425            .vcpus_handles
426            .iter()
427            .map(|handle| handle.response_receiver().recv_timeout(RECV_TIMEOUT_SEC))
428            .any(|response| !matches!(response, Ok(VcpuResponse::Paused)))
429        {
430            return Err(VmmError::VcpuMessage);
431        }
432
433        self.instance_info.state = VmState::Paused;
434        Ok(())
435    }
436
437    /// Injects CTRL+ALT+DEL keystroke combo in the i8042 device.
438    #[cfg(target_arch = "x86_64")]
439    pub fn send_ctrl_alt_del(&mut self) -> Result<(), VmmError> {
440        self.device_manager
441            .legacy_devices
442            .i8042
443            .lock()
444            .expect("i8042 lock was poisoned")
445            .trigger_ctrl_alt_del()
446            .map_err(VmmError::I8042Error)
447    }
448
449    /// Saves the state of a paused Microvm.
450    pub fn save_state(&mut self, vm_info: &VmInfo) -> Result<MicrovmState, MicrovmStateError> {
451        use self::MicrovmStateError::SaveVmState;
452        let vcpu_states = self.save_vcpu_states()?;
453        let kvm_state = self.kvm.save_state();
454        let vm_state = {
455            #[cfg(target_arch = "x86_64")]
456            {
457                self.vm.save_state().map_err(SaveVmState)?
458            }
459            #[cfg(target_arch = "aarch64")]
460            {
461                let mpidrs = construct_kvm_mpidrs(&vcpu_states);
462
463                self.vm.save_state(&mpidrs).map_err(SaveVmState)?
464            }
465        };
466        let device_states = self.device_manager.save();
467
468        Ok(MicrovmState {
469            vm_info: vm_info.clone(),
470            kvm_state,
471            vm_state,
472            vcpu_states,
473            device_states,
474        })
475    }
476
477    fn save_vcpu_states(&mut self) -> Result<Vec<VcpuState>, MicrovmStateError> {
478        for handle in self.vcpus_handles.iter_mut() {
479            handle
480                .send_event(VcpuEvent::SaveState)
481                .map_err(MicrovmStateError::SignalVcpu)?;
482        }
483
484        let vcpu_responses = self
485            .vcpus_handles
486            .iter()
487            // `Iterator::collect` can transform a `Vec<Result>` into a `Result<Vec>`.
488            .map(|handle| handle.response_receiver().recv_timeout(RECV_TIMEOUT_SEC))
489            .collect::<Result<Vec<VcpuResponse>, RecvTimeoutError>>()
490            .map_err(|_| MicrovmStateError::UnexpectedVcpuResponse)?;
491
492        let vcpu_states = vcpu_responses
493            .into_iter()
494            .map(|response| match response {
495                VcpuResponse::SavedState(state) => Ok(*state),
496                VcpuResponse::Error(err) => Err(MicrovmStateError::SaveVcpuState(err)),
497                VcpuResponse::NotAllowed(reason) => Err(MicrovmStateError::NotAllowed(reason)),
498                _ => Err(MicrovmStateError::UnexpectedVcpuResponse),
499            })
500            .collect::<Result<Vec<VcpuState>, MicrovmStateError>>()?;
501
502        Ok(vcpu_states)
503    }
504
505    /// Dumps CPU configuration.
506    pub fn dump_cpu_config(&mut self) -> Result<Vec<CpuConfiguration>, DumpCpuConfigError> {
507        for handle in self.vcpus_handles.iter_mut() {
508            handle
509                .send_event(VcpuEvent::DumpCpuConfig)
510                .map_err(DumpCpuConfigError::SendEvent)?;
511        }
512
513        let vcpu_responses = self
514            .vcpus_handles
515            .iter()
516            .map(|handle| handle.response_receiver().recv_timeout(RECV_TIMEOUT_SEC))
517            .collect::<Result<Vec<VcpuResponse>, RecvTimeoutError>>()
518            .map_err(|_| DumpCpuConfigError::UnexpectedResponse)?;
519
520        let cpu_configs = vcpu_responses
521            .into_iter()
522            .map(|response| match response {
523                VcpuResponse::DumpedCpuConfig(cpu_config) => Ok(*cpu_config),
524                VcpuResponse::Error(err) => Err(DumpCpuConfigError::DumpCpuConfig(err)),
525                VcpuResponse::NotAllowed(reason) => Err(DumpCpuConfigError::NotAllowed(reason)),
526                _ => Err(DumpCpuConfigError::UnexpectedResponse),
527            })
528            .collect::<Result<Vec<CpuConfiguration>, DumpCpuConfigError>>()?;
529
530        Ok(cpu_configs)
531    }
532
533    /// Updates the path of the host file backing the emulated block device with id `drive_id`.
534    /// We update the disk image on the device and its virtio configuration.
535    pub fn update_block_device_path(
536        &mut self,
537        drive_id: &str,
538        path_on_host: String,
539    ) -> Result<(), VmmError> {
540        self.device_manager
541            .with_virtio_device(drive_id, |block: &mut Block| {
542                block.update_disk_image(path_on_host)
543            })??;
544        Ok(())
545    }
546
547    /// Updates the rate limiter parameters for block device with `drive_id` id.
548    pub fn update_block_rate_limiter(
549        &mut self,
550        drive_id: &str,
551        rl_bytes: BucketUpdate,
552        rl_ops: BucketUpdate,
553    ) -> Result<(), VmmError> {
554        self.device_manager
555            .with_virtio_device(drive_id, |block: &mut Block| {
556                block.update_rate_limiter(rl_bytes, rl_ops)
557            })??;
558        Ok(())
559    }
560
561    /// Updates the rate limiter parameters for block device with `drive_id` id.
562    pub fn update_vhost_user_block_config(&mut self, drive_id: &str) -> Result<(), VmmError> {
563        self.device_manager
564            .with_virtio_device(drive_id, |block: &mut Block| block.update_config())??;
565        Ok(())
566    }
567
568    /// Updates the rate limiter parameters for net device with `net_id` id.
569    pub fn update_net_rate_limiters(
570        &mut self,
571        net_id: &str,
572        rx_bytes: BucketUpdate,
573        rx_ops: BucketUpdate,
574        tx_bytes: BucketUpdate,
575        tx_ops: BucketUpdate,
576    ) -> Result<(), VmmError> {
577        self.device_manager
578            .with_virtio_device(net_id, |net: &mut Net| {
579                net.patch_rate_limiters(rx_bytes, rx_ops, tx_bytes, tx_ops)
580            })?;
581        Ok(())
582    }
583
584    /// Returns a reference to the balloon device if present.
585    pub fn balloon_config(&self) -> Result<BalloonConfig, VmmError> {
586        let config = self
587            .device_manager
588            .with_virtio_device(BALLOON_DEV_ID, |dev: &mut Balloon| dev.config())?;
589        Ok(config)
590    }
591
592    /// Returns the latest balloon statistics if they are enabled.
593    pub fn latest_balloon_stats(&self) -> Result<BalloonStats, VmmError> {
594        let stats = self
595            .device_manager
596            .with_virtio_device(BALLOON_DEV_ID, |dev: &mut Balloon| dev.latest_stats())??;
597        Ok(stats)
598    }
599
600    /// Updates configuration for the balloon device target size.
601    pub fn update_balloon_config(&mut self, amount_mib: u32) -> Result<(), VmmError> {
602        self.device_manager
603            .with_virtio_device(BALLOON_DEV_ID, |dev: &mut Balloon| {
604                dev.update_size(amount_mib)
605            })??;
606        Ok(())
607    }
608
609    /// Updates configuration for the balloon device as described in `balloon_stats_update`.
610    pub fn update_balloon_stats_config(
611        &mut self,
612        stats_polling_interval_s: u16,
613    ) -> Result<(), VmmError> {
614        self.device_manager
615            .with_virtio_device(BALLOON_DEV_ID, |dev: &mut Balloon| {
616                dev.update_stats_polling_interval(stats_polling_interval_s)
617            })??;
618        Ok(())
619    }
620
621    /// Returns the current state of the memory hotplug device.
622    pub fn memory_hotplug_status(&self) -> Result<VirtioMemStatus, VmmError> {
623        self.device_manager
624            .with_virtio_device(VIRTIO_MEM_DEV_ID, |dev: &mut VirtioMem| dev.status())
625            .map_err(VmmError::FindDeviceError)
626    }
627
628    /// Returns the current state of the memory hotplug device.
629    pub fn update_memory_hotplug_size(&self, requested_size_mib: usize) -> Result<(), VmmError> {
630        self.device_manager
631            .with_virtio_device(VIRTIO_MEM_DEV_ID, |dev: &mut VirtioMem| {
632                dev.update_requested_size(requested_size_mib)
633            })
634            .map_err(VmmError::FindDeviceError)??;
635        Ok(())
636    }
637
638    /// Starts the balloon free page hinting run
639    pub fn start_balloon_hinting(&mut self, cmd: StartHintingCmd) -> Result<(), VmmError> {
640        self.device_manager
641            .with_virtio_device(BALLOON_DEV_ID, |dev: &mut Balloon| dev.start_hinting(cmd))??;
642        Ok(())
643    }
644
645    /// Retrieves the status of the balloon hinting run
646    pub fn get_balloon_hinting_status(&mut self) -> Result<HintingStatus, VmmError> {
647        let status = self
648            .device_manager
649            .with_virtio_device(BALLOON_DEV_ID, |dev: &mut Balloon| dev.get_hinting_status())??;
650        Ok(status)
651    }
652
653    /// Stops the balloon free page hinting run
654    pub fn stop_balloon_hinting(&mut self) -> Result<(), VmmError> {
655        self.device_manager
656            .with_virtio_device(BALLOON_DEV_ID, |dev: &mut Balloon| dev.stop_hinting())??;
657        Ok(())
658    }
659
660    /// Signals Vmm to stop and exit.
661    pub fn stop(&mut self, exit_code: FcExitCode) {
662        // To avoid cycles, all teardown paths take the following route:
663        //   +------------------------+----------------------------+------------------------+
664        //   |        Vmm             |           Action           |           Vcpu         |
665        //   +------------------------+----------------------------+------------------------+
666        // 1 |                        |                            | vcpu.exit(exit_code)   |
667        // 2 |                        |                            | vcpu.exit_evt.write(1) |
668        // 3 |                        | <--- EventFd::exit_evt --- |                        |
669        // 4 | vmm.stop()             |                            |                        |
670        // 5 |                        | --- VcpuEvent::Finish ---> |                        |
671        // 6 |                        |                            | StateMachine::finish() |
672        // 7 | VcpuHandle::join()     |                            |                        |
673        // 8 | vmm.shutdown_exit_code becomes Some(exit_code) breaking the main event loop  |
674        //   +------------------------+----------------------------+------------------------+
675        // Vcpu initiated teardown starts from `fn Vcpu::exit()` (step 1).
676        // Vmm initiated teardown starts from `pub fn Vmm::stop()` (step 4).
677        // Once `vmm.shutdown_exit_code` becomes `Some(exit_code)`, it is the upper layer's
678        // responsibility to break main event loop and propagate the exit code value.
679        info!("Vmm is stopping.");
680
681        // We send a "Finish" event.  If a VCPU has already exited, this is the only
682        // message it will accept... but running and paused will take it as well.
683        // It breaks out of the state machine loop so that the thread can be joined.
684        for (idx, handle) in self.vcpus_handles.iter_mut().enumerate() {
685            if let Err(err) = handle.send_event(VcpuEvent::Finish) {
686                error!("Failed to send VcpuEvent::Finish to vCPU {}: {}", idx, err);
687            }
688        }
689        // The actual thread::join() that runs to release the thread's resource is done in
690        // the VcpuHandle's Drop trait.  We can trigger that to happen now by clearing the
691        // list of handles. Do it here instead of Vmm::Drop to avoid dependency cycles.
692        // (Vmm's Drop will also check if this list is empty).
693        self.vcpus_handles.clear();
694
695        // Break the main event loop, propagating the Vmm exit-code.
696        self.shutdown_exit_code = Some(exit_code);
697    }
698
699    /// Gets a reference to kvm-ioctls Vm
700    #[cfg(feature = "gdb")]
701    pub fn vm(&self) -> &Vm {
702        &self.vm
703    }
704}
705
706/// Process the content of the MPIDR_EL1 register in order to be able to pass it to KVM
707///
708/// The kernel expects to find the four affinity levels of the MPIDR in the first 32 bits of the
709/// VGIC register attribute:
710/// https://elixir.free-electrons.com/linux/v4.14.203/source/virt/kvm/arm/vgic/vgic-kvm-device.c#L445.
711///
712/// The format of the MPIDR_EL1 register is:
713/// | 39 .... 32 | 31 .... 24 | 23 .... 16 | 15 .... 8 | 7 .... 0 |
714/// |    Aff3    |    Other   |    Aff2    |    Aff1   |   Aff0   |
715///
716/// The KVM mpidr format is:
717/// | 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
718/// |    Aff3    |    Aff2    |    Aff1    |    Aff0    |
719/// As specified in the linux kernel: Documentation/virt/kvm/devices/arm-vgic-v3.rst
720#[cfg(target_arch = "aarch64")]
721fn construct_kvm_mpidrs(vcpu_states: &[VcpuState]) -> Vec<u64> {
722    vcpu_states
723        .iter()
724        .map(|state| {
725            let cpu_affid = ((state.mpidr & 0xFF_0000_0000) >> 8) | (state.mpidr & 0xFF_FFFF);
726            cpu_affid << 32
727        })
728        .collect()
729}
730
731impl Drop for Vmm {
732    fn drop(&mut self) {
733        // There are two cases when `drop()` is called:
734        // 1) before the Vmm has been mutexed and subscribed to the event manager, or
735        // 2) after the Vmm has been registered as a subscriber to the event manager.
736        //
737        // The first scenario is bound to happen if an error is raised during
738        // Vmm creation (for example, during snapshot load), before the Vmm has
739        // been subscribed to the event manager. If that happens, the `drop()`
740        // function is called right before propagating the error. In order to
741        // be able to gracefully exit Firecracker with the correct fault
742        // message, we need to prepare the Vmm contents for the tear down
743        // (join the vcpu threads). Explicitly calling `stop()` allows the
744        // Vmm to be successfully dropped and firecracker to propagate the
745        // error.
746        //
747        // In the second case, before dropping the Vmm object, the event
748        // manager calls `stop()`, which sends a `Finish` event to the vcpus
749        // and joins the vcpu threads. The Vmm is dropped after everything is
750        // ready to be teared down. The line below is a no-op, because the Vmm
751        // has already been stopped by the event manager at this point.
752        self.stop(self.shutdown_exit_code.unwrap_or(FcExitCode::Ok));
753
754        if let Err(err) = std::io::stdin().lock().set_canon_mode() {
755            warn!("Cannot set canonical mode for the terminal. {:?}", err);
756        }
757
758        // Write the metrics before exiting.
759        if let Err(err) = METRICS.write() {
760            error!("Failed to write metrics while stopping: {}", err);
761        }
762
763        if !self.vcpus_handles.is_empty() {
764            error!("Failed to tear down Vmm: the vcpu threads have not finished execution.");
765        }
766    }
767}
768
769impl MutEventSubscriber for Vmm {
770    /// Handle a read event (EPOLLIN).
771    fn process(&mut self, event: Events, _: &mut EventOps) {
772        let source = event.fd();
773        let event_set = event.event_set();
774
775        if source == self.vcpus_exit_evt.as_raw_fd() && event_set == EventSet::IN {
776            // Exit event handling should never do anything more than call 'self.stop()'.
777            let _ = self.vcpus_exit_evt.read();
778
779            let exit_code = 'exit_code: {
780                // Query each vcpu for their exit_code.
781                for handle in &self.vcpus_handles {
782                    // Drain all vcpu responses that are pending from this vcpu until we find an
783                    // exit status.
784                    for response in handle.response_receiver().try_iter() {
785                        if let VcpuResponse::Exited(status) = response {
786                            // It could be that some vcpus exited successfully while others
787                            // errored out. Thus make sure that error exits from one vcpu always
788                            // takes precedence over "ok" exits
789                            if status != FcExitCode::Ok {
790                                break 'exit_code status;
791                            }
792                        }
793                    }
794                }
795
796                // No CPUs exited with error status code, report "Ok"
797                FcExitCode::Ok
798            };
799            self.stop(exit_code);
800        } else {
801            error!("Spurious EventManager event for handler: Vmm");
802        }
803    }
804
805    fn init(&mut self, ops: &mut EventOps) {
806        if let Err(err) = ops.add(Events::new(&self.vcpus_exit_evt, EventSet::IN)) {
807            error!("Failed to register vmm exit event: {}", err);
808        }
809    }
810}