vmm/lib.rs
1// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3//
4// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
5// Use of this source code is governed by a BSD-style license that can be
6// found in the THIRD-PARTY file.
7
8//! Virtual Machine Monitor that leverages the Linux Kernel-based Virtual Machine (KVM),
9//! and other virtualization features to run a single lightweight micro-virtual
10//! machine (microVM).
11#![allow(missing_docs)]
12#![warn(clippy::undocumented_unsafe_blocks)]
13#![allow(clippy::blanket_clippy_restriction_lints)]
14
15/// Implements platform specific functionality.
16/// Supported platforms: x86_64 and aarch64.
17pub mod arch;
18
19/// High-level interface over Linux io_uring.
20///
21/// Aims to provide an easy-to-use interface, while making some Firecracker-specific simplifying
22/// assumptions. The crate does not currently aim at supporting all io_uring features and use
23/// cases. For example, it only works with pre-registered fds and read/write/fsync requests.
24///
25/// Requires at least kernel version 5.10.51.
26/// For more information on io_uring, refer to the man pages.
27/// [This pdf](https://kernel.dk/io_uring.pdf) is also very useful, though outdated at times.
28pub mod io_uring;
29
30/// # Rate Limiter
31///
32/// Provides a rate limiter written in Rust useful for IO operations that need to
33/// be throttled.
34///
35/// ## Behavior
36///
37/// The rate limiter starts off as 'unblocked' with two token buckets configured
38/// with the values passed in the `RateLimiter::new()` constructor.
39/// All subsequent accounting is done independently for each token bucket based
40/// on the `TokenType` used. If any of the buckets runs out of budget, the limiter
41/// goes in the 'blocked' state. At this point an internal timer is set up which
42/// will later 'wake up' the user in order to retry sending data. The 'wake up'
43/// notification will be dispatched as an event on the FD provided by the `AsRawFD`
44/// trait implementation.
45///
46/// The contract is that the user shall also call the `event_handler()` method on
47/// receipt of such an event.
48///
49/// The token buckets are replenished when a called `consume()` doesn't find enough
50/// tokens in the bucket. The amount of tokens replenished is automatically calculated
51/// to respect the `complete_refill_time` configuration parameter provided by the user.
52/// The token buckets will never replenish above their respective `size`.
53///
54/// Each token bucket can start off with a `one_time_burst` initial extra capacity
55/// on top of their `size`. This initial extra credit does not replenish and
56/// can be used for an initial burst of data.
57///
58/// The granularity for 'wake up' events when the rate limiter is blocked is
59/// currently hardcoded to `100 milliseconds`.
60///
61/// ## Limitations
62///
63/// This rate limiter implementation relies on the *Linux kernel's timerfd* so its
64/// usage is limited to Linux systems.
65///
66/// Another particularity of this implementation is that it is not self-driving.
67/// It is meant to be used in an external event loop and thus implements the `AsRawFd`
68/// trait and provides an *event-handler* as part of its API. This *event-handler*
69/// needs to be called by the user on every event on the rate limiter's `AsRawFd` FD.
70pub mod rate_limiter;
71
72/// Module for handling ACPI tables.
73/// Currently, we only use ACPI on x86 microVMs.
74#[cfg(target_arch = "x86_64")]
75pub mod acpi;
76/// Handles setup and initialization a `Vmm` object.
77pub mod builder;
78/// Types for guest configuration.
79pub mod cpu_config;
80pub mod device_manager;
81/// Emulates virtual and hardware devices.
82#[allow(missing_docs)]
83pub mod devices;
84/// minimalist HTTP/TCP/IPv4 stack named DUMBO
85pub mod dumbo;
86/// Support for GDB debugging the guest
87#[cfg(feature = "gdb")]
88pub mod gdb;
89/// Logger
90pub mod logger;
91/// microVM Metadata Service MMDS
92pub mod mmds;
93/// PCI specific emulation code.
94pub mod pci;
95/// Save/restore utilities.
96pub mod persist;
97/// Resource store for configured microVM resources.
98pub mod resources;
99/// microVM RPC API adapters.
100pub mod rpc_interface;
101/// Seccomp filter utilities.
102pub mod seccomp;
103/// Signal handling utilities.
104pub mod signal_handler;
105/// Serialization and deserialization facilities
106pub mod snapshot;
107/// Utility functions for integration and benchmark testing
108pub mod test_utils;
109/// Utility functions and struct
110pub mod utils;
111/// Wrappers over structures used to configure the VMM.
112pub mod vmm_config;
113/// Module with virtual state structs.
114pub mod vstate;
115
116/// Module with initrd.
117pub mod initrd;
118
119use std::collections::HashMap;
120use std::io;
121use std::os::unix::io::AsRawFd;
122use std::sync::mpsc::RecvTimeoutError;
123use std::sync::{Arc, Barrier, Mutex};
124use std::time::Duration;
125
126use device_manager::DeviceManager;
127use event_manager::{EventManager as BaseEventManager, EventOps, Events, MutEventSubscriber};
128use seccomp::BpfProgram;
129use snapshot::Persist;
130use userfaultfd::Uffd;
131use vmm_sys_util::epoll::EventSet;
132use vmm_sys_util::eventfd::EventFd;
133use vmm_sys_util::terminal::Terminal;
134use vstate::kvm::Kvm;
135use vstate::vcpu::{self, StartThreadedError, VcpuSendEventError};
136
137use crate::cpu_config::templates::CpuConfiguration;
138use crate::devices::virtio::balloon::device::{HintingStatus, StartHintingCmd};
139use crate::devices::virtio::balloon::{
140 BALLOON_DEV_ID, Balloon, BalloonConfig, BalloonError, BalloonStats,
141};
142use crate::devices::virtio::block::BlockError;
143use crate::devices::virtio::block::device::Block;
144use crate::devices::virtio::mem::{VIRTIO_MEM_DEV_ID, VirtioMem, VirtioMemError, VirtioMemStatus};
145use crate::devices::virtio::net::Net;
146use crate::logger::{METRICS, MetricsError, error, info, warn};
147use crate::persist::{MicrovmState, MicrovmStateError, VmInfo};
148use crate::rate_limiter::BucketUpdate;
149use crate::vmm_config::instance_info::{InstanceInfo, VmState};
150use crate::vstate::memory::{GuestMemory, GuestMemoryMmap, GuestMemoryRegion};
151use crate::vstate::vcpu::VcpuState;
152pub use crate::vstate::vcpu::{Vcpu, VcpuConfig, VcpuEvent, VcpuHandle, VcpuResponse};
153pub use crate::vstate::vm::Vm;
154
155/// Shorthand type for the EventManager flavour used by Firecracker.
156pub type EventManager = BaseEventManager<Arc<Mutex<dyn MutEventSubscriber>>>;
157
158// Since the exit code names e.g. `SIGBUS` are most appropriate yet trigger a test error with the
159// clippy lint `upper_case_acronyms` we have disabled this lint for this enum.
160/// Vmm exit-code type.
161#[allow(clippy::upper_case_acronyms)]
162#[derive(Debug, Clone, Copy, PartialEq, Eq)]
163pub enum FcExitCode {
164 /// Success exit code.
165 Ok = 0,
166 /// Generic error exit code.
167 GenericError = 1,
168 /// Generic exit code error; not possible to occur if the program logic is sound.
169 UnexpectedError = 2,
170 /// Firecracker was shut down after intercepting a restricted system call.
171 BadSyscall = 148,
172 /// Firecracker was shut down after intercepting `SIGBUS`.
173 SIGBUS = 149,
174 /// Firecracker was shut down after intercepting `SIGSEGV`.
175 SIGSEGV = 150,
176 /// Firecracker was shut down after intercepting `SIGXFSZ`.
177 SIGXFSZ = 151,
178 /// Firecracker was shut down after intercepting `SIGXCPU`.
179 SIGXCPU = 154,
180 /// Firecracker was shut down after intercepting `SIGPIPE`.
181 SIGPIPE = 155,
182 /// Firecracker was shut down after intercepting `SIGHUP`.
183 SIGHUP = 156,
184 /// Firecracker was shut down after intercepting `SIGILL`.
185 SIGILL = 157,
186 /// Bad configuration for microvm's resources, when using a single json.
187 BadConfiguration = 152,
188 /// Command line arguments parsing error.
189 ArgParsing = 153,
190}
191
192/// Timeout used in recv_timeout, when waiting for a vcpu response on
193/// Pause/Resume/Save/Restore. A high enough limit that should not be reached during normal usage,
194/// used to detect a potential vcpu deadlock.
195pub const RECV_TIMEOUT_SEC: Duration = Duration::from_secs(30);
196
197/// Default byte limit of accepted http requests on API and MMDS servers.
198pub const HTTP_MAX_PAYLOAD_SIZE: usize = 51200;
199
200/// Errors associated with the VMM internal logic. These errors cannot be generated by direct user
201/// input, but can result from bad configuration of the host (for example if Firecracker doesn't
202/// have permissions to open the KVM fd).
203#[derive(Debug, thiserror::Error, displaydoc::Display)]
204pub enum VmmError {
205 #[cfg(target_arch = "aarch64")]
206 /// Invalid command line error.
207 Cmdline,
208 /// Device manager error: {0}
209 DeviceManager(#[from] device_manager::DeviceManagerCreateError),
210 /// MMIO Device manager error: {0}
211 MmioDeviceManager(device_manager::mmio::MmioError),
212 /// Error getting the KVM dirty bitmap. {0}
213 DirtyBitmap(kvm_ioctls::Error),
214 /// I8042 error: {0}
215 I8042Error(devices::legacy::I8042DeviceError),
216 #[cfg(target_arch = "x86_64")]
217 /// Cannot add devices to the legacy I/O Bus. {0}
218 LegacyIOBus(device_manager::legacy::LegacyDeviceError),
219 /// Metrics error: {0}
220 Metrics(MetricsError),
221 /// Cannot add a device to the MMIO Bus. {0}
222 RegisterMMIODevice(device_manager::mmio::MmioError),
223 /// Cannot install seccomp filters: {0}
224 SeccompFilters(seccomp::InstallationError),
225 /// Error writing to the serial console: {0}
226 Serial(io::Error),
227 /// Error creating timer fd: {0}
228 TimerFd(io::Error),
229 /// Error creating the vcpu: {0}
230 VcpuCreate(vstate::vcpu::VcpuError),
231 /// Cannot send event to vCPU. {0}
232 VcpuEvent(vstate::vcpu::VcpuError),
233 /// Cannot create a vCPU handle. {0}
234 VcpuHandle(vstate::vcpu::VcpuError),
235 /// Failed to start vCPUs
236 VcpuStart(StartVcpusError),
237 /// Failed to pause the vCPUs.
238 VcpuPause,
239 /// Failed to exit the vCPUs.
240 VcpuExit,
241 /// Failed to resume the vCPUs.
242 VcpuResume,
243 /// Failed to message the vCPUs.
244 VcpuMessage,
245 /// Cannot spawn Vcpu thread: {0}
246 VcpuSpawn(io::Error),
247 /// Vm error: {0}
248 Vm(#[from] vstate::vm::VmError),
249 /// Kvm error: {0}
250 Kvm(#[from] vstate::kvm::KvmError),
251 /// Failed perform action on device: {0}
252 FindDeviceError(#[from] device_manager::FindDeviceError),
253 /// Block: {0}
254 Block(#[from] BlockError),
255 /// Balloon: {0}
256 Balloon(#[from] BalloonError),
257 /// Failed to create memory hotplug device: {0}
258 VirtioMem(#[from] VirtioMemError),
259}
260
261/// Shorthand type for KVM dirty page bitmap.
262pub type DirtyBitmap = HashMap<u32, Vec<u64>>;
263
264/// Returns the size of guest memory, in MiB.
265pub(crate) fn mem_size_mib(guest_memory: &GuestMemoryMmap) -> u64 {
266 guest_memory.iter().map(|region| region.len()).sum::<u64>() >> 20
267}
268
269// Error type for [`Vmm::emulate_serial_init`].
270/// Emulate serial init error: {0}
271#[derive(Debug, thiserror::Error, displaydoc::Display)]
272pub struct EmulateSerialInitError(#[from] std::io::Error);
273
274/// Error type for [`Vmm::start_vcpus`].
275#[derive(Debug, thiserror::Error, displaydoc::Display)]
276pub enum StartVcpusError {
277 /// VMM observer init error: {0}
278 VmmObserverInit(#[from] vmm_sys_util::errno::Error),
279 /// Vcpu handle error: {0}
280 VcpuHandle(#[from] StartThreadedError),
281}
282
283/// Error type for [`Vmm::dump_cpu_config()`]
284#[derive(Debug, thiserror::Error, displaydoc::Display)]
285pub enum DumpCpuConfigError {
286 /// Failed to send event to vcpu thread: {0}
287 SendEvent(#[from] VcpuSendEventError),
288 /// Got unexpected response from vcpu thread.
289 UnexpectedResponse,
290 /// Failed to dump CPU config: {0}
291 DumpCpuConfig(#[from] vcpu::VcpuError),
292 /// Operation not allowed: {0}
293 NotAllowed(String),
294}
295
296/// Contains the state and associated methods required for the Firecracker VMM.
297#[derive(Debug)]
298pub struct Vmm {
299 /// The [`InstanceInfo`] state of this [`Vmm`].
300 pub instance_info: InstanceInfo,
301 shutdown_exit_code: Option<FcExitCode>,
302
303 // Guest VM core resources.
304 kvm: Kvm,
305 /// VM object
306 pub vm: Arc<Vm>,
307 // Save UFFD in order to keep it open in the Firecracker process, as well.
308 #[allow(unused)]
309 uffd: Option<Uffd>,
310 /// Handles to the vcpu threads with vcpu_fds inside them.
311 pub vcpus_handles: Vec<VcpuHandle>,
312 // Used by Vcpus and devices to initiate teardown; Vmm should never write here.
313 vcpus_exit_evt: EventFd,
314 // Device manager
315 pub device_manager: DeviceManager,
316}
317
318impl Vmm {
319 /// Gets Vmm version.
320 pub fn version(&self) -> String {
321 self.instance_info.vmm_version.clone()
322 }
323
324 /// Gets Vmm instance info.
325 pub fn instance_info(&self) -> InstanceInfo {
326 self.instance_info.clone()
327 }
328
329 /// Provides access to the underlying KVM handle.
330 pub fn kvm(&self) -> &Kvm {
331 &self.kvm
332 }
333
334 /// Provides the Vmm shutdown exit code if there is one.
335 pub fn shutdown_exit_code(&self) -> Option<FcExitCode> {
336 self.shutdown_exit_code
337 }
338
339 /// Clears any shutdown exit code and returns the VM to a paused state.
340 pub fn clear_shutdown_exit_code(&mut self) {
341 self.shutdown_exit_code = None;
342 self.instance_info.state = VmState::Paused;
343 }
344
345 /// Starts the microVM vcpus.
346 ///
347 /// # Errors
348 ///
349 /// When:
350 /// - [`vmm::VmmEventsObserver::on_vmm_boot`] errors.
351 /// - [`vmm::vstate::vcpu::Vcpu::start_threaded`] errors.
352 pub fn start_vcpus(
353 &mut self,
354 mut vcpus: Vec<Vcpu>,
355 vcpu_seccomp_filter: Arc<BpfProgram>,
356 ) -> Result<(), StartVcpusError> {
357 let vcpu_count = vcpus.len();
358 let barrier = Arc::new(Barrier::new(vcpu_count + 1));
359
360 let stdin = std::io::stdin().lock();
361 // Set raw mode for stdin.
362 stdin.set_raw_mode().inspect_err(|&err| {
363 warn!("Cannot set raw mode for the terminal. {:?}", err);
364 })?;
365
366 // Set non blocking stdin.
367 stdin.set_non_block(true).inspect_err(|&err| {
368 warn!("Cannot set non block for the terminal. {:?}", err);
369 })?;
370
371 self.vcpus_handles.reserve(vcpu_count);
372
373 for mut vcpu in vcpus.drain(..) {
374 vcpu.set_mmio_bus(self.vm.common.mmio_bus.clone());
375 #[cfg(target_arch = "x86_64")]
376 vcpu.kvm_vcpu.set_pio_bus(self.vm.pio_bus.clone());
377
378 self.vcpus_handles.push(vcpu.start_threaded(
379 &self.vm,
380 vcpu_seccomp_filter.clone(),
381 barrier.clone(),
382 )?);
383 }
384 self.instance_info.state = VmState::Paused;
385 // Wait for vCPUs to initialize their TLS before moving forward.
386 barrier.wait();
387
388 Ok(())
389 }
390
391 /// Sends a resume command to the vCPUs.
392 pub fn resume_vm(&mut self) -> Result<(), VmmError> {
393 self.device_manager.kick_virtio_devices();
394
395 // Send the events.
396 self.vcpus_handles
397 .iter_mut()
398 .try_for_each(|handle| handle.send_event(VcpuEvent::Resume))
399 .map_err(|_| VmmError::VcpuMessage)?;
400
401 // Check the responses.
402 if self
403 .vcpus_handles
404 .iter()
405 .map(|handle| handle.response_receiver().recv_timeout(RECV_TIMEOUT_SEC))
406 .any(|response| !matches!(response, Ok(VcpuResponse::Resumed)))
407 {
408 return Err(VmmError::VcpuMessage);
409 }
410
411 self.instance_info.state = VmState::Running;
412 Ok(())
413 }
414
415 /// Sends a pause command to the vCPUs.
416 pub fn pause_vm(&mut self) -> Result<(), VmmError> {
417 // Send the events.
418 self.vcpus_handles
419 .iter_mut()
420 .try_for_each(|handle| handle.send_event(VcpuEvent::Pause))
421 .map_err(|_| VmmError::VcpuMessage)?;
422
423 // Check the responses.
424 if self
425 .vcpus_handles
426 .iter()
427 .map(|handle| handle.response_receiver().recv_timeout(RECV_TIMEOUT_SEC))
428 .any(|response| !matches!(response, Ok(VcpuResponse::Paused)))
429 {
430 return Err(VmmError::VcpuMessage);
431 }
432
433 self.instance_info.state = VmState::Paused;
434 Ok(())
435 }
436
437 /// Injects CTRL+ALT+DEL keystroke combo in the i8042 device.
438 #[cfg(target_arch = "x86_64")]
439 pub fn send_ctrl_alt_del(&mut self) -> Result<(), VmmError> {
440 self.device_manager
441 .legacy_devices
442 .i8042
443 .lock()
444 .expect("i8042 lock was poisoned")
445 .trigger_ctrl_alt_del()
446 .map_err(VmmError::I8042Error)
447 }
448
449 /// Saves the state of a paused Microvm.
450 pub fn save_state(&mut self, vm_info: &VmInfo) -> Result<MicrovmState, MicrovmStateError> {
451 use self::MicrovmStateError::SaveVmState;
452 let vcpu_states = self.save_vcpu_states()?;
453 let kvm_state = self.kvm.save_state();
454 let vm_state = {
455 #[cfg(target_arch = "x86_64")]
456 {
457 self.vm.save_state().map_err(SaveVmState)?
458 }
459 #[cfg(target_arch = "aarch64")]
460 {
461 let mpidrs = construct_kvm_mpidrs(&vcpu_states);
462
463 self.vm.save_state(&mpidrs).map_err(SaveVmState)?
464 }
465 };
466 let device_states = self.device_manager.save();
467
468 Ok(MicrovmState {
469 vm_info: vm_info.clone(),
470 kvm_state,
471 vm_state,
472 vcpu_states,
473 device_states,
474 })
475 }
476
477 fn save_vcpu_states(&mut self) -> Result<Vec<VcpuState>, MicrovmStateError> {
478 for handle in self.vcpus_handles.iter_mut() {
479 handle
480 .send_event(VcpuEvent::SaveState)
481 .map_err(MicrovmStateError::SignalVcpu)?;
482 }
483
484 let vcpu_responses = self
485 .vcpus_handles
486 .iter()
487 // `Iterator::collect` can transform a `Vec<Result>` into a `Result<Vec>`.
488 .map(|handle| handle.response_receiver().recv_timeout(RECV_TIMEOUT_SEC))
489 .collect::<Result<Vec<VcpuResponse>, RecvTimeoutError>>()
490 .map_err(|_| MicrovmStateError::UnexpectedVcpuResponse)?;
491
492 let vcpu_states = vcpu_responses
493 .into_iter()
494 .map(|response| match response {
495 VcpuResponse::SavedState(state) => Ok(*state),
496 VcpuResponse::Error(err) => Err(MicrovmStateError::SaveVcpuState(err)),
497 VcpuResponse::NotAllowed(reason) => Err(MicrovmStateError::NotAllowed(reason)),
498 _ => Err(MicrovmStateError::UnexpectedVcpuResponse),
499 })
500 .collect::<Result<Vec<VcpuState>, MicrovmStateError>>()?;
501
502 Ok(vcpu_states)
503 }
504
505 /// Dumps CPU configuration.
506 pub fn dump_cpu_config(&mut self) -> Result<Vec<CpuConfiguration>, DumpCpuConfigError> {
507 for handle in self.vcpus_handles.iter_mut() {
508 handle
509 .send_event(VcpuEvent::DumpCpuConfig)
510 .map_err(DumpCpuConfigError::SendEvent)?;
511 }
512
513 let vcpu_responses = self
514 .vcpus_handles
515 .iter()
516 .map(|handle| handle.response_receiver().recv_timeout(RECV_TIMEOUT_SEC))
517 .collect::<Result<Vec<VcpuResponse>, RecvTimeoutError>>()
518 .map_err(|_| DumpCpuConfigError::UnexpectedResponse)?;
519
520 let cpu_configs = vcpu_responses
521 .into_iter()
522 .map(|response| match response {
523 VcpuResponse::DumpedCpuConfig(cpu_config) => Ok(*cpu_config),
524 VcpuResponse::Error(err) => Err(DumpCpuConfigError::DumpCpuConfig(err)),
525 VcpuResponse::NotAllowed(reason) => Err(DumpCpuConfigError::NotAllowed(reason)),
526 _ => Err(DumpCpuConfigError::UnexpectedResponse),
527 })
528 .collect::<Result<Vec<CpuConfiguration>, DumpCpuConfigError>>()?;
529
530 Ok(cpu_configs)
531 }
532
533 /// Updates the path of the host file backing the emulated block device with id `drive_id`.
534 /// We update the disk image on the device and its virtio configuration.
535 pub fn update_block_device_path(
536 &mut self,
537 drive_id: &str,
538 path_on_host: String,
539 ) -> Result<(), VmmError> {
540 self.device_manager
541 .with_virtio_device(drive_id, |block: &mut Block| {
542 block.update_disk_image(path_on_host)
543 })??;
544 Ok(())
545 }
546
547 /// Updates the rate limiter parameters for block device with `drive_id` id.
548 pub fn update_block_rate_limiter(
549 &mut self,
550 drive_id: &str,
551 rl_bytes: BucketUpdate,
552 rl_ops: BucketUpdate,
553 ) -> Result<(), VmmError> {
554 self.device_manager
555 .with_virtio_device(drive_id, |block: &mut Block| {
556 block.update_rate_limiter(rl_bytes, rl_ops)
557 })??;
558 Ok(())
559 }
560
561 /// Updates the rate limiter parameters for block device with `drive_id` id.
562 pub fn update_vhost_user_block_config(&mut self, drive_id: &str) -> Result<(), VmmError> {
563 self.device_manager
564 .with_virtio_device(drive_id, |block: &mut Block| block.update_config())??;
565 Ok(())
566 }
567
568 /// Updates the rate limiter parameters for net device with `net_id` id.
569 pub fn update_net_rate_limiters(
570 &mut self,
571 net_id: &str,
572 rx_bytes: BucketUpdate,
573 rx_ops: BucketUpdate,
574 tx_bytes: BucketUpdate,
575 tx_ops: BucketUpdate,
576 ) -> Result<(), VmmError> {
577 self.device_manager
578 .with_virtio_device(net_id, |net: &mut Net| {
579 net.patch_rate_limiters(rx_bytes, rx_ops, tx_bytes, tx_ops)
580 })?;
581 Ok(())
582 }
583
584 /// Returns a reference to the balloon device if present.
585 pub fn balloon_config(&self) -> Result<BalloonConfig, VmmError> {
586 let config = self
587 .device_manager
588 .with_virtio_device(BALLOON_DEV_ID, |dev: &mut Balloon| dev.config())?;
589 Ok(config)
590 }
591
592 /// Returns the latest balloon statistics if they are enabled.
593 pub fn latest_balloon_stats(&self) -> Result<BalloonStats, VmmError> {
594 let stats = self
595 .device_manager
596 .with_virtio_device(BALLOON_DEV_ID, |dev: &mut Balloon| dev.latest_stats())??;
597 Ok(stats)
598 }
599
600 /// Updates configuration for the balloon device target size.
601 pub fn update_balloon_config(&mut self, amount_mib: u32) -> Result<(), VmmError> {
602 self.device_manager
603 .with_virtio_device(BALLOON_DEV_ID, |dev: &mut Balloon| {
604 dev.update_size(amount_mib)
605 })??;
606 Ok(())
607 }
608
609 /// Updates configuration for the balloon device as described in `balloon_stats_update`.
610 pub fn update_balloon_stats_config(
611 &mut self,
612 stats_polling_interval_s: u16,
613 ) -> Result<(), VmmError> {
614 self.device_manager
615 .with_virtio_device(BALLOON_DEV_ID, |dev: &mut Balloon| {
616 dev.update_stats_polling_interval(stats_polling_interval_s)
617 })??;
618 Ok(())
619 }
620
621 /// Returns the current state of the memory hotplug device.
622 pub fn memory_hotplug_status(&self) -> Result<VirtioMemStatus, VmmError> {
623 self.device_manager
624 .with_virtio_device(VIRTIO_MEM_DEV_ID, |dev: &mut VirtioMem| dev.status())
625 .map_err(VmmError::FindDeviceError)
626 }
627
628 /// Returns the current state of the memory hotplug device.
629 pub fn update_memory_hotplug_size(&self, requested_size_mib: usize) -> Result<(), VmmError> {
630 self.device_manager
631 .with_virtio_device(VIRTIO_MEM_DEV_ID, |dev: &mut VirtioMem| {
632 dev.update_requested_size(requested_size_mib)
633 })
634 .map_err(VmmError::FindDeviceError)??;
635 Ok(())
636 }
637
638 /// Starts the balloon free page hinting run
639 pub fn start_balloon_hinting(&mut self, cmd: StartHintingCmd) -> Result<(), VmmError> {
640 self.device_manager
641 .with_virtio_device(BALLOON_DEV_ID, |dev: &mut Balloon| dev.start_hinting(cmd))??;
642 Ok(())
643 }
644
645 /// Retrieves the status of the balloon hinting run
646 pub fn get_balloon_hinting_status(&mut self) -> Result<HintingStatus, VmmError> {
647 let status = self
648 .device_manager
649 .with_virtio_device(BALLOON_DEV_ID, |dev: &mut Balloon| dev.get_hinting_status())??;
650 Ok(status)
651 }
652
653 /// Stops the balloon free page hinting run
654 pub fn stop_balloon_hinting(&mut self) -> Result<(), VmmError> {
655 self.device_manager
656 .with_virtio_device(BALLOON_DEV_ID, |dev: &mut Balloon| dev.stop_hinting())??;
657 Ok(())
658 }
659
660 /// Signals Vmm to stop and exit.
661 pub fn stop(&mut self, exit_code: FcExitCode) {
662 // To avoid cycles, all teardown paths take the following route:
663 // +------------------------+----------------------------+------------------------+
664 // | Vmm | Action | Vcpu |
665 // +------------------------+----------------------------+------------------------+
666 // 1 | | | vcpu.exit(exit_code) |
667 // 2 | | | vcpu.exit_evt.write(1) |
668 // 3 | | <--- EventFd::exit_evt --- | |
669 // 4 | vmm.stop() | | |
670 // 5 | | --- VcpuEvent::Finish ---> | |
671 // 6 | | | StateMachine::finish() |
672 // 7 | VcpuHandle::join() | | |
673 // 8 | vmm.shutdown_exit_code becomes Some(exit_code) breaking the main event loop |
674 // +------------------------+----------------------------+------------------------+
675 // Vcpu initiated teardown starts from `fn Vcpu::exit()` (step 1).
676 // Vmm initiated teardown starts from `pub fn Vmm::stop()` (step 4).
677 // Once `vmm.shutdown_exit_code` becomes `Some(exit_code)`, it is the upper layer's
678 // responsibility to break main event loop and propagate the exit code value.
679 info!("Vmm is stopping.");
680
681 // We send a "Finish" event. If a VCPU has already exited, this is the only
682 // message it will accept... but running and paused will take it as well.
683 // It breaks out of the state machine loop so that the thread can be joined.
684 for (idx, handle) in self.vcpus_handles.iter_mut().enumerate() {
685 if let Err(err) = handle.send_event(VcpuEvent::Finish) {
686 error!("Failed to send VcpuEvent::Finish to vCPU {}: {}", idx, err);
687 }
688 }
689 // The actual thread::join() that runs to release the thread's resource is done in
690 // the VcpuHandle's Drop trait. We can trigger that to happen now by clearing the
691 // list of handles. Do it here instead of Vmm::Drop to avoid dependency cycles.
692 // (Vmm's Drop will also check if this list is empty).
693 self.vcpus_handles.clear();
694
695 // Break the main event loop, propagating the Vmm exit-code.
696 self.shutdown_exit_code = Some(exit_code);
697 }
698
699 /// Gets a reference to kvm-ioctls Vm
700 #[cfg(feature = "gdb")]
701 pub fn vm(&self) -> &Vm {
702 &self.vm
703 }
704}
705
706/// Process the content of the MPIDR_EL1 register in order to be able to pass it to KVM
707///
708/// The kernel expects to find the four affinity levels of the MPIDR in the first 32 bits of the
709/// VGIC register attribute:
710/// https://elixir.free-electrons.com/linux/v4.14.203/source/virt/kvm/arm/vgic/vgic-kvm-device.c#L445.
711///
712/// The format of the MPIDR_EL1 register is:
713/// | 39 .... 32 | 31 .... 24 | 23 .... 16 | 15 .... 8 | 7 .... 0 |
714/// | Aff3 | Other | Aff2 | Aff1 | Aff0 |
715///
716/// The KVM mpidr format is:
717/// | 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
718/// | Aff3 | Aff2 | Aff1 | Aff0 |
719/// As specified in the linux kernel: Documentation/virt/kvm/devices/arm-vgic-v3.rst
720#[cfg(target_arch = "aarch64")]
721fn construct_kvm_mpidrs(vcpu_states: &[VcpuState]) -> Vec<u64> {
722 vcpu_states
723 .iter()
724 .map(|state| {
725 let cpu_affid = ((state.mpidr & 0xFF_0000_0000) >> 8) | (state.mpidr & 0xFF_FFFF);
726 cpu_affid << 32
727 })
728 .collect()
729}
730
731impl Drop for Vmm {
732 fn drop(&mut self) {
733 // There are two cases when `drop()` is called:
734 // 1) before the Vmm has been mutexed and subscribed to the event manager, or
735 // 2) after the Vmm has been registered as a subscriber to the event manager.
736 //
737 // The first scenario is bound to happen if an error is raised during
738 // Vmm creation (for example, during snapshot load), before the Vmm has
739 // been subscribed to the event manager. If that happens, the `drop()`
740 // function is called right before propagating the error. In order to
741 // be able to gracefully exit Firecracker with the correct fault
742 // message, we need to prepare the Vmm contents for the tear down
743 // (join the vcpu threads). Explicitly calling `stop()` allows the
744 // Vmm to be successfully dropped and firecracker to propagate the
745 // error.
746 //
747 // In the second case, before dropping the Vmm object, the event
748 // manager calls `stop()`, which sends a `Finish` event to the vcpus
749 // and joins the vcpu threads. The Vmm is dropped after everything is
750 // ready to be teared down. The line below is a no-op, because the Vmm
751 // has already been stopped by the event manager at this point.
752 self.stop(self.shutdown_exit_code.unwrap_or(FcExitCode::Ok));
753
754 if let Err(err) = std::io::stdin().lock().set_canon_mode() {
755 warn!("Cannot set canonical mode for the terminal. {:?}", err);
756 }
757
758 // Write the metrics before exiting.
759 if let Err(err) = METRICS.write() {
760 error!("Failed to write metrics while stopping: {}", err);
761 }
762
763 if !self.vcpus_handles.is_empty() {
764 error!("Failed to tear down Vmm: the vcpu threads have not finished execution.");
765 }
766 }
767}
768
769impl MutEventSubscriber for Vmm {
770 /// Handle a read event (EPOLLIN).
771 fn process(&mut self, event: Events, _: &mut EventOps) {
772 let source = event.fd();
773 let event_set = event.event_set();
774
775 if source == self.vcpus_exit_evt.as_raw_fd() && event_set == EventSet::IN {
776 // Exit event handling should never do anything more than call 'self.stop()'.
777 let _ = self.vcpus_exit_evt.read();
778
779 let exit_code = 'exit_code: {
780 // Query each vcpu for their exit_code.
781 for handle in &self.vcpus_handles {
782 // Drain all vcpu responses that are pending from this vcpu until we find an
783 // exit status.
784 for response in handle.response_receiver().try_iter() {
785 if let VcpuResponse::Exited(status) = response {
786 // It could be that some vcpus exited successfully while others
787 // errored out. Thus make sure that error exits from one vcpu always
788 // takes precedence over "ok" exits
789 if status != FcExitCode::Ok {
790 break 'exit_code status;
791 }
792 }
793 }
794 }
795
796 // No CPUs exited with error status code, report "Ok"
797 FcExitCode::Ok
798 };
799 self.stop(exit_code);
800 } else {
801 error!("Spurious EventManager event for handler: Vmm");
802 }
803 }
804
805 fn init(&mut self, ops: &mut EventOps) {
806 if let Err(err) = ops.add(Events::new(&self.vcpus_exit_evt, EventSet::IN)) {
807 error!("Failed to register vmm exit event: {}", err);
808 }
809 }
810}