vmm/vstate/
vcpu.rs

1// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3//
4// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
5// Use of this source code is governed by a BSD-style license that can be
6// found in the THIRD-PARTY file.
7
8use std::os::fd::AsRawFd;
9use std::sync::atomic::{Ordering, fence};
10use std::sync::mpsc::{Receiver, Sender, TryRecvError, channel};
11use std::sync::{Arc, Barrier};
12use std::{fmt, io, thread};
13
14use kvm_bindings::{KVM_EXIT_DIRTY_RING_FULL, KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN};
15use kvm_ioctls::{VcpuExit, VcpuFd};
16use libc::{c_int, c_void, siginfo_t};
17use log::{error, info, warn};
18use vmm_sys_util::errno;
19use vmm_sys_util::eventfd::EventFd;
20
21use crate::FcExitCode;
22pub use crate::arch::{KvmVcpu, KvmVcpuConfigureError, KvmVcpuError, Peripherals, VcpuState};
23use crate::cpu_config::templates::{CpuConfiguration, GuestConfigError};
24#[cfg(feature = "gdb")]
25use crate::gdb::target::{GdbTargetError, get_raw_tid};
26use crate::logger::{IncMetric, METRICS};
27use crate::seccomp::{BpfProgram, BpfProgramRef};
28use crate::utils::signal::{Killable, register_signal_handler, sigrtmin};
29use crate::utils::sm::StateMachine;
30use crate::vstate::bus::Bus;
31use crate::vstate::vm::Vm;
32
33/// Signal number (SIGRTMIN) used to kick Vcpus.
34pub const VCPU_RTSIG_OFFSET: i32 = 0;
35
36/// Errors associated with the wrappers over KVM ioctls.
37#[derive(Debug, thiserror::Error, displaydoc::Display)]
38pub enum VcpuError {
39    /// Error creating vcpu config: {0}
40    VcpuConfig(GuestConfigError),
41    /// Received error signaling kvm exit: {0}
42    FaultyKvmExit(String),
43    /// Failed to signal vcpu: {0}
44    SignalVcpu(vmm_sys_util::errno::Error),
45    /// Unexpected kvm exit received: {0}
46    UnhandledKvmExit(String),
47    /// Failed to run action on vcpu: {0}
48    VcpuResponse(KvmVcpuError),
49    /// Cannot spawn a new vCPU thread: {0}
50    VcpuSpawn(io::Error),
51    /// Vcpu not present in TLS
52    VcpuTlsNotPresent,
53    /// Error with gdb request sent
54    #[cfg(feature = "gdb")]
55    GdbRequest(GdbTargetError),
56}
57
58/// Encapsulates configuration parameters for the guest vCPUS.
59#[derive(Debug)]
60pub struct VcpuConfig {
61    /// Number of guest VCPUs.
62    pub vcpu_count: u8,
63    /// Enable simultaneous multithreading in the CPUID configuration.
64    pub smt: bool,
65    /// Configuration for vCPU
66    pub cpu_config: CpuConfiguration,
67}
68
69/// Error type for [`Vcpu::start_threaded`].
70#[derive(Debug, thiserror::Error, displaydoc::Display)]
71pub enum StartThreadedError {
72    /// Failed to spawn vCPU thread: {0}
73    Spawn(std::io::Error),
74    /// Failed to clone kvm Vcpu fd: {0}
75    CopyFd(CopyKvmFdError),
76}
77
78/// Error type for [`Vcpu::copy_kvm_vcpu_fd`].
79#[derive(Debug, thiserror::Error, displaydoc::Display)]
80pub enum CopyKvmFdError {
81    /// Error with libc dup of kvm Vcpu fd
82    DupError(#[from] std::io::Error),
83    /// Error creating the Vcpu from the duplicated Vcpu fd
84    CreateVcpuError(#[from] kvm_ioctls::Error),
85}
86
87/// A wrapper around creating and using a vcpu.
88#[derive(Debug)]
89pub struct Vcpu {
90    /// Access to kvm-arch specific functionality.
91    pub kvm_vcpu: KvmVcpu,
92
93    /// File descriptor for vcpu to trigger exit event on vmm.
94    exit_evt: EventFd,
95    /// Debugger emitter for gdb events
96    #[cfg(feature = "gdb")]
97    gdb_event: Option<Sender<usize>>,
98    /// The receiving end of events channel owned by the vcpu side.
99    pub event_receiver: Receiver<VcpuEvent>,
100    /// The transmitting end of the events channel which will be given to the handler.
101    pub event_sender: Option<Sender<VcpuEvent>>,
102    /// The receiving end of the responses channel which will be given to the handler.
103    pub response_receiver: Option<Receiver<VcpuResponse>>,
104    /// The transmitting end of the responses channel owned by the vcpu side.
105    response_sender: Sender<VcpuResponse>,
106}
107
108impl Vcpu {
109    /// Registers a signal handler which kicks the vcpu running on the current thread, if there is
110    /// one.
111    fn register_kick_signal_handler(&mut self) {
112        extern "C" fn handle_signal(_: c_int, _: *mut siginfo_t, _: *mut c_void) {
113            // We write to the immediate_exit from other thread, so make sure the read in the
114            // KVM_RUN sees the up to date value
115            fence(Ordering::Acquire);
116        }
117        register_signal_handler(sigrtmin() + VCPU_RTSIG_OFFSET, handle_signal)
118            .expect("Failed to register vcpu signal handler");
119    }
120
121    /// Constructs a new VCPU for `vm`.
122    ///
123    /// # Arguments
124    ///
125    /// * `index` - Represents the 0-based CPU index between [0, max vcpus).
126    /// * `vm` - The vm to which this vcpu will get attached.
127    /// * `exit_evt` - An `EventFd` that will be written into when this vcpu exits.
128    pub fn new(index: u8, vm: &Vm, exit_evt: EventFd) -> Result<Self, VcpuError> {
129        let (event_sender, event_receiver) = channel();
130        let (response_sender, response_receiver) = channel();
131        let kvm_vcpu = KvmVcpu::new(index, vm).unwrap();
132
133        Ok(Vcpu {
134            exit_evt,
135            event_receiver,
136            event_sender: Some(event_sender),
137            response_receiver: Some(response_receiver),
138            response_sender,
139            #[cfg(feature = "gdb")]
140            gdb_event: None,
141            kvm_vcpu,
142        })
143    }
144
145    /// Sets a MMIO bus for this vcpu.
146    pub fn set_mmio_bus(&mut self, mmio_bus: Arc<Bus>) {
147        self.kvm_vcpu.peripherals.mmio_bus = Some(mmio_bus);
148    }
149
150    /// Attaches the fields required for debugging
151    #[cfg(feature = "gdb")]
152    pub fn attach_debug_info(&mut self, gdb_event: Sender<usize>) {
153        self.gdb_event = Some(gdb_event);
154    }
155
156    /// Obtains a copy of the VcpuFd
157    pub fn copy_kvm_vcpu_fd(&self, vm: &Vm) -> Result<VcpuFd, CopyKvmFdError> {
158        // SAFETY: We own this fd so it is considered safe to clone
159        let r = unsafe { libc::dup(self.kvm_vcpu.fd.as_raw_fd()) };
160        if r < 0 {
161            return Err(std::io::Error::last_os_error().into());
162        }
163        // SAFETY: We assert this is a valid fd by checking the result from the dup
164        unsafe { Ok(vm.fd().create_vcpu_from_rawfd(r)?) }
165    }
166
167    /// Moves the vcpu to its own thread and constructs a VcpuHandle.
168    /// The handle can be used to control the remote vcpu.
169    pub fn start_threaded(
170        mut self,
171        vm: &Vm,
172        seccomp_filter: Arc<BpfProgram>,
173        barrier: Arc<Barrier>,
174    ) -> Result<VcpuHandle, StartThreadedError> {
175        let event_sender = self.event_sender.take().expect("vCPU already started");
176        let response_receiver = self.response_receiver.take().unwrap();
177        let vcpu_fd = self
178            .copy_kvm_vcpu_fd(vm)
179            .map_err(StartThreadedError::CopyFd)?;
180        let vcpu_thread = thread::Builder::new()
181            .name(format!("fc_vcpu {}", self.kvm_vcpu.index))
182            .spawn(move || {
183                let filter = &*seccomp_filter;
184                self.register_kick_signal_handler();
185                // Synchronization to make sure thread local data is initialized.
186                barrier.wait();
187                self.run(filter);
188            })
189            .map_err(StartThreadedError::Spawn)?;
190
191        Ok(VcpuHandle::new(
192            event_sender,
193            response_receiver,
194            vcpu_fd,
195            vcpu_thread,
196        ))
197    }
198
199    /// Main loop of the vCPU thread.
200    ///
201    /// Runs the vCPU in KVM context in a loop. Handles KVM_EXITs then goes back in.
202    /// Note that the state of the VCPU and associated VM must be setup first for this to do
203    /// anything useful.
204    pub fn run(&mut self, seccomp_filter: BpfProgramRef) {
205        // Load seccomp filters for this vCPU thread.
206        // Execution panics if filters cannot be loaded, use --no-seccomp if skipping filters
207        // altogether is the desired behaviour.
208        if let Err(err) = crate::seccomp::apply_filter(seccomp_filter) {
209            panic!(
210                "Failed to set the requested seccomp filters on vCPU {}: Error: {}",
211                self.kvm_vcpu.index, err
212            );
213        }
214
215        // Start running the machine state in the `Paused` state.
216        StateMachine::run(self, Self::paused);
217    }
218
219    // This is the main loop of the `Running` state.
220    fn running(&mut self) -> StateMachine<Self> {
221        // This loop is here just for optimizing the emulation path.
222        // No point in ticking the state machine if there are no external events.
223        loop {
224            match self.run_emulation() {
225                // Emulation ran successfully, continue.
226                Ok(VcpuEmulation::Handled) => (),
227                // Emulation was interrupted, check external events.
228                Ok(VcpuEmulation::Interrupted) => break,
229                Ok(VcpuEmulation::DirtyRingFull) => break,
230                // If the guest was rebooted or halted:
231                // - vCPU0 will always exit out of `KVM_RUN` with KVM_EXIT_SHUTDOWN or KVM_EXIT_HLT.
232                // - the other vCPUs won't ever exit out of `KVM_RUN`, but they won't consume CPU.
233                // So we pause vCPU0 and send a signal to the emulation thread to stop the VMM.
234                Ok(VcpuEmulation::Stopped) => return self.exit(FcExitCode::Ok),
235                // If the emulation requests a pause lets do this
236                #[cfg(feature = "gdb")]
237                Ok(VcpuEmulation::Paused) => {
238                    #[cfg(target_arch = "x86_64")]
239                    self.kvm_vcpu.kvmclock_ctrl();
240                    return StateMachine::next(Self::paused);
241                }
242                Ok(VcpuEmulation::DebugEvent(_)) => break,
243                // Emulation errors lead to vCPU exit.
244                Err(_) => return self.exit(FcExitCode::GenericError),
245            }
246        }
247
248        // By default don't change state.
249        let mut state = StateMachine::next(Self::running);
250
251        // Break this emulation loop on any transition request/external event.
252        match self.event_receiver.try_recv() {
253            // Running ---- Pause ----> Paused
254            Ok(VcpuEvent::Pause) => {
255                // Nothing special to do.
256                self.response_sender
257                    .send(VcpuResponse::Paused)
258                    .expect("vcpu channel unexpectedly closed");
259
260                #[cfg(target_arch = "x86_64")]
261                self.kvm_vcpu.kvmclock_ctrl();
262
263                // Move to 'paused' state.
264                state = StateMachine::next(Self::paused);
265            }
266            Ok(VcpuEvent::Resume) => {
267                self.response_sender
268                    .send(VcpuResponse::Resumed)
269                    .expect("vcpu channel unexpectedly closed");
270            }
271            // SaveState cannot be performed on a running Vcpu.
272            Ok(VcpuEvent::SaveState) => {
273                self.response_sender
274                    .send(VcpuResponse::NotAllowed(String::from(
275                        "save/restore unavailable while running",
276                    )))
277                    .expect("vcpu channel unexpectedly closed");
278            }
279            // DumpCpuConfig cannot be performed on a running Vcpu.
280            Ok(VcpuEvent::DumpCpuConfig) => {
281                self.response_sender
282                    .send(VcpuResponse::NotAllowed(String::from(
283                        "cpu config dump is unavailable while running",
284                    )))
285                    .expect("vcpu channel unexpectedly closed");
286            }
287            Ok(VcpuEvent::Finish) => return StateMachine::finish(),
288            // Unhandled exit of the other end.
289            Err(TryRecvError::Disconnected) => {
290                // Move to 'exited' state.
291                state = self.exit(FcExitCode::GenericError);
292            }
293            // All other events or lack thereof have no effect on current 'running' state.
294            Err(TryRecvError::Empty) => (),
295        }
296
297        state
298    }
299
300    // This is the main loop of the `Paused` state.
301    fn paused(&mut self) -> StateMachine<Self> {
302        match self.event_receiver.recv() {
303            // Paused ---- Resume ----> Running
304            Ok(VcpuEvent::Resume) => {
305                if self.kvm_vcpu.fd.get_kvm_run().immediate_exit == 1u8 {
306                    warn!(
307                        "Received a VcpuEvent::Resume message with immediate_exit enabled. \
308                         immediate_exit was disabled before proceeding"
309                    );
310                    self.kvm_vcpu.fd.set_kvm_immediate_exit(0);
311                }
312                self.response_sender
313                    .send(VcpuResponse::Resumed)
314                    .expect("vcpu channel unexpectedly closed");
315                // Move to 'running' state.
316                StateMachine::next(Self::running)
317            }
318            Ok(VcpuEvent::Pause) => {
319                self.response_sender
320                    .send(VcpuResponse::Paused)
321                    .expect("vcpu channel unexpectedly closed");
322                StateMachine::next(Self::paused)
323            }
324            Ok(VcpuEvent::SaveState) => {
325                // Save vcpu state.
326                self.kvm_vcpu
327                    .save_state()
328                    .map(|vcpu_state| {
329                        self.response_sender
330                            .send(VcpuResponse::SavedState(Box::new(vcpu_state)))
331                            .expect("vcpu channel unexpectedly closed");
332                    })
333                    .unwrap_or_else(|err| {
334                        self.response_sender
335                            .send(VcpuResponse::Error(VcpuError::VcpuResponse(err)))
336                            .expect("vcpu channel unexpectedly closed");
337                    });
338
339                StateMachine::next(Self::paused)
340            }
341            Ok(VcpuEvent::DumpCpuConfig) => {
342                self.kvm_vcpu
343                    .dump_cpu_config()
344                    .map(|cpu_config| {
345                        self.response_sender
346                            .send(VcpuResponse::DumpedCpuConfig(Box::new(cpu_config)))
347                            .expect("vcpu channel unexpectedly closed");
348                    })
349                    .unwrap_or_else(|err| {
350                        self.response_sender
351                            .send(VcpuResponse::Error(VcpuError::VcpuResponse(err)))
352                            .expect("vcpu channel unexpectedly closed");
353                    });
354
355                StateMachine::next(Self::paused)
356            }
357            Ok(VcpuEvent::Finish) => StateMachine::finish(),
358            // Unhandled exit of the other end.
359            Err(_) => {
360                // Move to 'exited' state.
361                self.exit(FcExitCode::GenericError)
362            }
363        }
364    }
365
366    // Transition to the exited state and finish on command.
367    fn exit(&mut self, exit_code: FcExitCode) -> StateMachine<Self> {
368        // To avoid cycles, all teardown paths take the following route:
369        //   +------------------------+----------------------------+------------------------+
370        //   |        Vmm             |           Action           |           Vcpu         |
371        //   +------------------------+----------------------------+------------------------+
372        // 1 |                        |                            | vcpu.exit(exit_code)   |
373        // 2 |                        |                            | vcpu.exit_evt.write(1) |
374        // 3 |                        | <--- EventFd::exit_evt --- |                        |
375        // 4 | vmm.stop()             |                            |                        |
376        // 5 |                        | --- VcpuEvent::Finish ---> |                        |
377        // 6 |                        |                            | StateMachine::finish() |
378        // 7 | VcpuHandle::join()     |                            |                        |
379        // 8 | vmm.shutdown_exit_code becomes Some(exit_code) breaking the main event loop  |
380        //   +------------------------+----------------------------+------------------------+
381        // Vcpu initiated teardown starts from `fn Vcpu::exit()` (step 1).
382        // Vmm initiated teardown starts from `pub fn Vmm::stop()` (step 4).
383        // Once `vmm.shutdown_exit_code` becomes `Some(exit_code)`, it is the upper layer's
384        // responsibility to break main event loop and propagate the exit code value.
385        // Signal Vmm of Vcpu exit.
386        if let Err(err) = self.exit_evt.write(1) {
387            METRICS.vcpu.failures.inc();
388            error!("Failed signaling vcpu exit event: {}", err);
389        }
390        // From this state we only accept going to finished.
391        loop {
392            self.response_sender
393                .send(VcpuResponse::Exited(exit_code))
394                .expect("vcpu channel unexpectedly closed");
395            // Wait for and only accept 'VcpuEvent::Finish'.
396            if let Ok(VcpuEvent::Finish) = self.event_receiver.recv() {
397                break;
398            }
399        }
400        StateMachine::finish()
401    }
402
403    /// Runs the vCPU in KVM context and handles the kvm exit reason.
404    ///
405    /// Returns error or enum specifying whether emulation was handled or interrupted.
406    pub fn run_emulation(&mut self) -> Result<VcpuEmulation, VcpuError> {
407        if self.kvm_vcpu.fd.get_kvm_run().immediate_exit == 1u8 {
408            warn!("Requested a vCPU run with immediate_exit enabled. The operation was skipped");
409            self.kvm_vcpu.fd.set_kvm_immediate_exit(0);
410            return Ok(VcpuEmulation::Interrupted);
411        }
412
413        match self.kvm_vcpu.fd.run() {
414            Err(ref err) if err.errno() == libc::EINTR => {
415                self.kvm_vcpu.fd.set_kvm_immediate_exit(0);
416                // Notify that this KVM_RUN was interrupted.
417                Ok(VcpuEmulation::Interrupted)
418            }
419            Ok(VcpuExit::Debug(debug)) => {
420                #[cfg(feature = "gdb")]
421                {
422                    if let Some(gdb_event) = &self.gdb_event {
423                        gdb_event
424                            .send(get_raw_tid(self.kvm_vcpu.index.into()))
425                            .expect("Unable to notify gdb event");
426                    }
427                    return Ok(VcpuEmulation::Paused);
428                }
429                #[cfg(not(feature = "gdb"))]
430                {
431                    return Ok(VcpuEmulation::DebugEvent(DebugExitInfo {
432                        exception: debug.exception,
433                        pc: debug.pc,
434                        dr6: debug.dr6,
435                        dr7: debug.dr7,
436                    }));
437                }
438            }
439            emulation_result => handle_kvm_exit(&mut self.kvm_vcpu.peripherals, emulation_result),
440        }
441    }
442}
443
444/// Handle the return value of a call to [`VcpuFd::run`] and update our emulation accordingly
445fn handle_kvm_exit(
446    peripherals: &mut Peripherals,
447    emulation_result: Result<VcpuExit, errno::Error>,
448) -> Result<VcpuEmulation, VcpuError> {
449    match emulation_result {
450        Ok(run) => match run {
451            VcpuExit::MmioRead(addr, data) => {
452                if let Some(mmio_bus) = &peripherals.mmio_bus {
453                    let _metric = METRICS.vcpu.exit_mmio_read_agg.record_latency_metrics();
454                    if let Err(err) = mmio_bus.read(addr, data) {
455                        warn!("Invalid MMIO read @ {addr:#x}:{:#x}: {err}", data.len());
456                    }
457                    METRICS.vcpu.exit_mmio_read.inc();
458                }
459                Ok(VcpuEmulation::Handled)
460            }
461            VcpuExit::MmioWrite(addr, data) => {
462                if let Some(mmio_bus) = &peripherals.mmio_bus {
463                    let _metric = METRICS.vcpu.exit_mmio_write_agg.record_latency_metrics();
464                    if let Err(err) = mmio_bus.write(addr, data) {
465                        warn!("Invalid MMIO read @ {addr:#x}:{:#x}: {err}", data.len());
466                    }
467                    METRICS.vcpu.exit_mmio_write.inc();
468                }
469                Ok(VcpuEmulation::Handled)
470            }
471            VcpuExit::Hlt => {
472                info!("Received KVM_EXIT_HLT signal");
473                Ok(VcpuEmulation::Stopped)
474            }
475            VcpuExit::Shutdown => {
476                info!("Received KVM_EXIT_SHUTDOWN signal");
477                Ok(VcpuEmulation::Stopped)
478            }
479            // Documentation specifies that below kvm exits are considered
480            // errors.
481            VcpuExit::FailEntry(hardware_entry_failure_reason, cpu) => {
482                // Hardware entry failure.
483                METRICS.vcpu.failures.inc();
484                error!(
485                    "Received KVM_EXIT_FAIL_ENTRY signal: {} on cpu {}",
486                    hardware_entry_failure_reason, cpu
487                );
488                Err(VcpuError::FaultyKvmExit(format!(
489                    "{:?}",
490                    VcpuExit::FailEntry(hardware_entry_failure_reason, cpu)
491                )))
492            }
493            VcpuExit::InternalError => {
494                // Failure from the Linux KVM subsystem rather than from the hardware.
495                METRICS.vcpu.failures.inc();
496                error!("Received KVM_EXIT_INTERNAL_ERROR signal");
497                Err(VcpuError::FaultyKvmExit(format!(
498                    "{:?}",
499                    VcpuExit::InternalError
500                )))
501            }
502            VcpuExit::SystemEvent(event_type, event_flags) => match event_type {
503                KVM_SYSTEM_EVENT_RESET | KVM_SYSTEM_EVENT_SHUTDOWN => {
504                    info!(
505                        "Received KVM_SYSTEM_EVENT: type: {}, event: {:?}",
506                        event_type, event_flags
507                    );
508                    Ok(VcpuEmulation::Stopped)
509                }
510                _ => {
511                    METRICS.vcpu.failures.inc();
512                    error!(
513                        "Received KVM_SYSTEM_EVENT signal type: {}, flag: {:?}",
514                        event_type, event_flags
515                    );
516                    Err(VcpuError::FaultyKvmExit(format!(
517                        "{:?}",
518                        VcpuExit::SystemEvent(event_type, event_flags)
519                    )))
520                }
521            },
522            VcpuExit::Unsupported(exit_reason) if exit_reason == KVM_EXIT_DIRTY_RING_FULL => {
523                Ok(VcpuEmulation::DirtyRingFull)
524            }
525            arch_specific_reason => {
526                // run specific architecture emulation.
527                peripherals.run_arch_emulation(arch_specific_reason)
528            }
529        },
530        // The unwrap on raw_os_error can only fail if we have a logic
531        // error in our code in which case it is better to panic.
532        Err(ref err) => match err.errno() {
533            libc::EAGAIN => Ok(VcpuEmulation::Handled),
534            libc::ENOSYS => {
535                METRICS.vcpu.failures.inc();
536                error!("Received ENOSYS error because KVM failed to emulate an instruction.");
537                Err(VcpuError::FaultyKvmExit(
538                    "Received ENOSYS error because KVM failed to emulate an instruction."
539                        .to_string(),
540                ))
541            }
542            libc::EFAULT => Err(VcpuError::FaultyKvmExit(format!("{}", err))),
543            _ => {
544                METRICS.vcpu.failures.inc();
545                error!("Failure during vcpu run: {}", err);
546                Err(VcpuError::FaultyKvmExit(format!("{}", err)))
547            }
548        },
549    }
550}
551
552/// List of events that the Vcpu can receive.
553#[derive(Debug, Clone)]
554pub enum VcpuEvent {
555    /// The vCPU thread will end when receiving this message.
556    Finish,
557    /// Pause the Vcpu.
558    Pause,
559    /// Event to resume the Vcpu.
560    Resume,
561    /// Event to save the state of a paused Vcpu.
562    SaveState,
563    /// Event to dump CPU configuration of a paused Vcpu.
564    DumpCpuConfig,
565}
566
567/// List of responses that the Vcpu reports.
568pub enum VcpuResponse {
569    /// Requested action encountered an error.
570    Error(VcpuError),
571    /// Vcpu is stopped.
572    Exited(FcExitCode),
573    /// Requested action not allowed.
574    NotAllowed(String),
575    /// Vcpu is paused.
576    Paused,
577    /// Vcpu is resumed.
578    Resumed,
579    /// Vcpu state is saved.
580    SavedState(Box<VcpuState>),
581    /// Vcpu is in the state where CPU config is dumped.
582    DumpedCpuConfig(Box<CpuConfiguration>),
583}
584
585impl fmt::Debug for VcpuResponse {
586    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
587        use crate::VcpuResponse::*;
588        match self {
589            Paused => write!(f, "VcpuResponse::Paused"),
590            Resumed => write!(f, "VcpuResponse::Resumed"),
591            Exited(code) => write!(f, "VcpuResponse::Exited({:?})", code),
592            SavedState(_) => write!(f, "VcpuResponse::SavedState"),
593            Error(err) => write!(f, "VcpuResponse::Error({:?})", err),
594            NotAllowed(reason) => write!(f, "VcpuResponse::NotAllowed({})", reason),
595            DumpedCpuConfig(_) => write!(f, "VcpuResponse::DumpedCpuConfig"),
596        }
597    }
598}
599
600/// Wrapper over Vcpu that hides the underlying interactions with the Vcpu thread.
601#[derive(Debug)]
602pub struct VcpuHandle {
603    event_sender: Sender<VcpuEvent>,
604    response_receiver: Receiver<VcpuResponse>,
605    /// VcpuFd
606    pub vcpu_fd: VcpuFd,
607    // Rust JoinHandles have to be wrapped in Option if you ever plan on 'join()'ing them.
608    // We want to be able to join these threads in tests.
609    vcpu_thread: Option<thread::JoinHandle<()>>,
610}
611
612/// Error type for [`VcpuHandle::send_event`].
613#[derive(Debug, derive_more::From, thiserror::Error)]
614#[error("Failed to signal vCPU: {0}")]
615pub struct VcpuSendEventError(pub vmm_sys_util::errno::Error);
616
617impl VcpuHandle {
618    /// Creates a new [`VcpuHandle`].
619    ///
620    /// # Arguments
621    /// + `event_sender`: [`Sender`] to communicate [`VcpuEvent`] to control the vcpu.
622    /// + `response_received`: [`Received`] from which the vcpu's responses can be read.
623    /// + `vcpu_thread`: A [`JoinHandle`] for the vcpu thread.
624    pub fn new(
625        event_sender: Sender<VcpuEvent>,
626        response_receiver: Receiver<VcpuResponse>,
627        vcpu_fd: VcpuFd,
628        vcpu_thread: thread::JoinHandle<()>,
629    ) -> Self {
630        Self {
631            event_sender,
632            response_receiver,
633            vcpu_fd,
634            vcpu_thread: Some(vcpu_thread),
635        }
636    }
637    /// Sends event to vCPU.
638    ///
639    /// # Errors
640    ///
641    /// When [`vmm_sys_util::linux::signal::Killable::kill`] errors.
642    pub fn send_event(&mut self, event: VcpuEvent) -> Result<(), VcpuSendEventError> {
643        // Use expect() to crash if the other thread closed this channel.
644        if self.event_sender.send(event).is_err() {
645            return Err(VcpuSendEventError(errno::Error::new(libc::EPIPE)));
646        }
647        // Kick the vcpu so it picks up the message.
648        // Add a fence to ensure the write is visible to the vpu thread
649        self.vcpu_fd.set_kvm_immediate_exit(1);
650        fence(Ordering::Release);
651        self.vcpu_thread
652            .as_ref()
653            // Safe to unwrap since constructor make this 'Some'.
654            .unwrap()
655            .kill(sigrtmin() + VCPU_RTSIG_OFFSET)?;
656        Ok(())
657    }
658
659    /// Returns a reference to the [`Received`] from which the vcpu's responses can be read.
660    pub fn response_receiver(&self) -> &Receiver<VcpuResponse> {
661        &self.response_receiver
662    }
663}
664
665// Wait for the Vcpu thread to finish execution
666impl Drop for VcpuHandle {
667    fn drop(&mut self) {
668        // We assume that by the time a VcpuHandle is dropped, other code has run to
669        // get the state machine loop to finish so the thread is ready to join.
670        // The strategy of avoiding more complex messaging protocols during the Drop
671        // helps avoid cycles which were preventing a truly clean shutdown.
672        //
673        // If the code hangs at this point, that means that a Finish event was not
674        // sent by Vmm.
675        self.vcpu_thread.take().unwrap().join().unwrap();
676    }
677}
678
679/// Vcpu emulation state.
680#[derive(Debug, Copy, Clone, PartialEq, Eq)]
681pub enum VcpuEmulation {
682    /// Handled.
683    Handled,
684    /// Dirty ring is full and needs to be harvested.
685    DirtyRingFull,
686    /// Interrupted.
687    Interrupted,
688    /// Stopped.
689    Stopped,
690    /// Pause request
691    #[cfg(feature = "gdb")]
692    Paused,
693    /// Debug event (breakpoint/single-step)
694    DebugEvent(DebugExitInfo),
695}
696
697/// Minimal debug information from a KVM debug exit.
698#[derive(Debug, Copy, Clone, PartialEq, Eq)]
699pub struct DebugExitInfo {
700    pub exception: u32,
701    pub pc: u64,
702    pub dr6: u64,
703    pub dr7: u64,
704}
705
706#[cfg(test)]
707pub(crate) mod tests {
708    #![allow(clippy::undocumented_unsafe_blocks)]
709
710    #[cfg(target_arch = "x86_64")]
711    use std::collections::BTreeMap;
712    use std::sync::atomic::Ordering;
713    use std::sync::{Arc, Barrier, Mutex};
714
715    use linux_loader::loader::KernelLoader;
716    use vmm_sys_util::errno;
717
718    use super::*;
719    use crate::RECV_TIMEOUT_SEC;
720    use crate::arch::{BootProtocol, EntryPoint};
721    use crate::seccomp::get_empty_filters;
722    use crate::utils::mib_to_bytes;
723    use crate::utils::signal::validate_signal_num;
724    use crate::vstate::bus::BusDevice;
725    use crate::vstate::kvm::Kvm;
726    use crate::vstate::memory::{GuestAddress, GuestMemoryMmap};
727    use crate::vstate::vcpu::VcpuError as EmulationError;
728    use crate::vstate::vm::Vm;
729    use crate::vstate::vm::tests::setup_vm_with_memory;
730
731    struct DummyDevice;
732
733    impl BusDevice for DummyDevice {
734        fn read(&mut self, _base: u64, _offset: u64, _data: &mut [u8]) {}
735
736        fn write(&mut self, _base: u64, _offset: u64, _data: &[u8]) -> Option<Arc<Barrier>> {
737            None
738        }
739    }
740
741    #[test]
742    fn test_handle_kvm_exit() {
743        let (_, _, mut vcpu) = setup_vcpu(0x1000);
744        let res = handle_kvm_exit(&mut vcpu.kvm_vcpu.peripherals, Ok(VcpuExit::Hlt));
745        assert_eq!(res.unwrap(), VcpuEmulation::Stopped);
746
747        let res = handle_kvm_exit(&mut vcpu.kvm_vcpu.peripherals, Ok(VcpuExit::Shutdown));
748        assert_eq!(res.unwrap(), VcpuEmulation::Stopped);
749
750        let res = handle_kvm_exit(
751            &mut vcpu.kvm_vcpu.peripherals,
752            Ok(VcpuExit::FailEntry(0, 0)),
753        );
754        assert_eq!(
755            format!("{:?}", res.unwrap_err()),
756            format!(
757                "{:?}",
758                EmulationError::FaultyKvmExit("FailEntry(0, 0)".to_string())
759            )
760        );
761
762        let res = handle_kvm_exit(&mut vcpu.kvm_vcpu.peripherals, Ok(VcpuExit::InternalError));
763        assert_eq!(
764            format!("{:?}", res.unwrap_err()),
765            format!(
766                "{:?}",
767                EmulationError::FaultyKvmExit("InternalError".to_string())
768            )
769        );
770
771        let res = handle_kvm_exit(
772            &mut vcpu.kvm_vcpu.peripherals,
773            Ok(VcpuExit::SystemEvent(2, &[])),
774        );
775        assert_eq!(res.unwrap(), VcpuEmulation::Stopped);
776
777        let res = handle_kvm_exit(
778            &mut vcpu.kvm_vcpu.peripherals,
779            Ok(VcpuExit::SystemEvent(1, &[])),
780        );
781        assert_eq!(res.unwrap(), VcpuEmulation::Stopped);
782
783        let res = handle_kvm_exit(
784            &mut vcpu.kvm_vcpu.peripherals,
785            Ok(VcpuExit::SystemEvent(3, &[])),
786        );
787        assert_eq!(
788            format!("{:?}", res.unwrap_err()),
789            format!(
790                "{:?}",
791                EmulationError::FaultyKvmExit("SystemEvent(3, [])".to_string())
792            )
793        );
794
795        // Check what happens with an unhandled exit reason.
796        let res = handle_kvm_exit(&mut vcpu.kvm_vcpu.peripherals, Ok(VcpuExit::Unknown));
797        assert_eq!(
798            res.unwrap_err().to_string(),
799            "Unexpected kvm exit received: Unknown".to_string()
800        );
801
802        let res = handle_kvm_exit(
803            &mut vcpu.kvm_vcpu.peripherals,
804            Err(errno::Error::new(libc::EAGAIN)),
805        );
806        assert_eq!(res.unwrap(), VcpuEmulation::Handled);
807
808        let res = handle_kvm_exit(
809            &mut vcpu.kvm_vcpu.peripherals,
810            Err(errno::Error::new(libc::ENOSYS)),
811        );
812        assert_eq!(
813            format!("{:?}", res.unwrap_err()),
814            format!(
815                "{:?}",
816                EmulationError::FaultyKvmExit(
817                    "Received ENOSYS error because KVM failed to emulate an instruction."
818                        .to_string()
819                )
820            )
821        );
822
823        let res = handle_kvm_exit(
824            &mut vcpu.kvm_vcpu.peripherals,
825            Err(errno::Error::new(libc::EINVAL)),
826        );
827        assert_eq!(
828            format!("{:?}", res.unwrap_err()),
829            format!(
830                "{:?}",
831                EmulationError::FaultyKvmExit("Invalid argument (os error 22)".to_string())
832            )
833        );
834
835        let bus = Arc::new(Bus::new());
836        let dummy = Arc::new(Mutex::new(DummyDevice));
837        bus.insert(dummy, 0x10, 0x10).unwrap();
838        vcpu.set_mmio_bus(bus);
839        let addr = 0x10;
840
841        let res = handle_kvm_exit(
842            &mut vcpu.kvm_vcpu.peripherals,
843            Ok(VcpuExit::MmioRead(addr, &mut [0, 0, 0, 0])),
844        );
845        assert_eq!(res.unwrap(), VcpuEmulation::Handled);
846
847        let res = handle_kvm_exit(
848            &mut vcpu.kvm_vcpu.peripherals,
849            Ok(VcpuExit::MmioWrite(addr, &[0, 0, 0, 0])),
850        );
851        assert_eq!(res.unwrap(), VcpuEmulation::Handled);
852    }
853
854    impl PartialEq for VcpuResponse {
855        fn eq(&self, other: &Self) -> bool {
856            use crate::VcpuResponse::*;
857            // Guard match with no wildcard to make sure we catch new enum variants.
858            match self {
859                Paused | Resumed | Exited(_) => (),
860                Error(_) | NotAllowed(_) | SavedState(_) | DumpedCpuConfig(_) => (),
861            };
862            match (self, other) {
863                (Paused, Paused) | (Resumed, Resumed) => true,
864                (Exited(code), Exited(other_code)) => code == other_code,
865                (NotAllowed(_), NotAllowed(_))
866                | (SavedState(_), SavedState(_))
867                | (DumpedCpuConfig(_), DumpedCpuConfig(_)) => true,
868                (Error(err), Error(other_err)) => {
869                    format!("{:?}", err) == format!("{:?}", other_err)
870                }
871                _ => false,
872            }
873        }
874    }
875
876    // Auxiliary function being used throughout the tests.
877    #[allow(unused_mut)]
878    pub(crate) fn setup_vcpu(mem_size: usize) -> (Kvm, Vm, Vcpu) {
879        let (kvm, mut vm) = setup_vm_with_memory(mem_size);
880
881        let (mut vcpus, _) = vm.create_vcpus(1).unwrap();
882        let mut vcpu = vcpus.remove(0);
883
884        #[cfg(target_arch = "aarch64")]
885        vcpu.kvm_vcpu.init(&[]).unwrap();
886
887        (kvm, vm, vcpu)
888    }
889
890    fn load_good_kernel(vm_memory: &GuestMemoryMmap) -> GuestAddress {
891        use std::fs::File;
892        use std::path::PathBuf;
893
894        let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
895
896        #[cfg(target_arch = "x86_64")]
897        path.push("src/test_utils/mock_resources/test_elf.bin");
898        #[cfg(target_arch = "aarch64")]
899        path.push("src/test_utils/mock_resources/test_pe.bin");
900
901        let mut kernel_file = File::open(path).expect("Cannot open kernel file");
902
903        #[cfg(target_arch = "x86_64")]
904        let entry_addr = linux_loader::loader::elf::Elf::load(
905            vm_memory,
906            Some(GuestAddress(crate::arch::get_kernel_start())),
907            &mut kernel_file,
908            Some(GuestAddress(crate::arch::get_kernel_start())),
909        )
910        .unwrap();
911        #[cfg(target_arch = "aarch64")]
912        let entry_addr =
913            linux_loader::loader::pe::PE::load(vm_memory, None, &mut kernel_file, None).unwrap();
914        entry_addr.kernel_load
915    }
916
917    fn vcpu_configured_for_boot() -> (Vm, VcpuHandle, EventFd) {
918        // Need enough mem to boot linux.
919        let mem_size = mib_to_bytes(64);
920        let (kvm, vm, mut vcpu) = setup_vcpu(mem_size);
921
922        let vcpu_exit_evt = vcpu.exit_evt.try_clone().unwrap();
923
924        // Needs a kernel since we'll actually run this vcpu.
925        let entry_point = EntryPoint {
926            entry_addr: load_good_kernel(vm.guest_memory()),
927            protocol: BootProtocol::LinuxBoot,
928        };
929
930        #[cfg(target_arch = "x86_64")]
931        {
932            use crate::cpu_config::x86_64::cpuid::Cpuid;
933            vcpu.kvm_vcpu
934                .configure(
935                    vm.guest_memory(),
936                    entry_point,
937                    &VcpuConfig {
938                        vcpu_count: 1,
939                        smt: false,
940                        cpu_config: CpuConfiguration {
941                            cpuid: Cpuid::try_from(kvm.supported_cpuid.clone()).unwrap(),
942                            msrs: BTreeMap::new(),
943                        },
944                    },
945                )
946                .expect("failed to configure vcpu");
947        }
948
949        #[cfg(target_arch = "aarch64")]
950        vcpu.kvm_vcpu
951            .configure(
952                vm.guest_memory(),
953                entry_point,
954                &VcpuConfig {
955                    vcpu_count: 1,
956                    smt: false,
957                    cpu_config: crate::cpu_config::aarch64::CpuConfiguration::default(),
958                },
959                &kvm.optional_capabilities(),
960            )
961            .expect("failed to configure vcpu");
962
963        let mut seccomp_filters = get_empty_filters();
964        let barrier = Arc::new(Barrier::new(2));
965        let vcpu_handle = vcpu
966            .start_threaded(
967                &vm,
968                seccomp_filters.remove("vcpu").unwrap(),
969                barrier.clone(),
970            )
971            .expect("failed to start vcpu");
972        // Wait for vCPUs to initialize their TLS before moving forward.
973        barrier.wait();
974
975        (vm, vcpu_handle, vcpu_exit_evt)
976    }
977
978    #[test]
979    fn test_set_mmio_bus() {
980        let (_, _, mut vcpu) = setup_vcpu(0x1000);
981        assert!(vcpu.kvm_vcpu.peripherals.mmio_bus.is_none());
982        vcpu.set_mmio_bus(Arc::new(Bus::new()));
983        assert!(vcpu.kvm_vcpu.peripherals.mmio_bus.is_some());
984    }
985
986    #[test]
987    fn test_vcpu_kick() {
988        let (_, vm, mut vcpu) = setup_vcpu(0x1000);
989
990        let mut kvm_run =
991            kvm_ioctls::KvmRunWrapper::mmap_from_fd(&vcpu.kvm_vcpu.fd, vm.fd().run_size())
992                .expect("cannot mmap kvm-run");
993        let vcpu_kvm_run =
994            kvm_ioctls::KvmRunWrapper::mmap_from_fd(&vcpu.kvm_vcpu.fd, vm.fd().run_size())
995                .expect("cannot mmap kvm-run");
996        let success = Arc::new(std::sync::atomic::AtomicBool::new(false));
997        let vcpu_success = success.clone();
998        let barrier = Arc::new(Barrier::new(2));
999        let vcpu_barrier = barrier.clone();
1000        // Start Vcpu thread which will be kicked with a signal.
1001        let handle = std::thread::Builder::new()
1002            .name("test_vcpu_kick".to_string())
1003            .spawn(move || {
1004                vcpu.register_kick_signal_handler();
1005                // Notify TLS was populated.
1006                vcpu_barrier.wait();
1007                // Loop for max 1 second to check if the signal handler has run.
1008                for _ in 0..10 {
1009                    if vcpu_kvm_run.as_ref().immediate_exit == 1 {
1010                        // Signal handler has run and set immediate_exit to 1.
1011                        vcpu_success.store(true, Ordering::Release);
1012                        break;
1013                    }
1014                    std::thread::sleep(std::time::Duration::from_millis(100));
1015                }
1016            })
1017            .expect("cannot start thread");
1018        barrier.wait();
1019
1020        // Set immediate_exit and kick the Vcpu using the custom signal.
1021        kvm_run.as_mut_ref().immediate_exit = 1;
1022        handle
1023            .kill(sigrtmin() + VCPU_RTSIG_OFFSET)
1024            .expect("failed to signal thread");
1025        handle.join().expect("failed to join thread");
1026        // Verify that the Vcpu saw its kvm immediate-exit as set.
1027        assert!(success.load(Ordering::Acquire));
1028    }
1029
1030    // Sends an event to a vcpu and expects a particular response.
1031    fn queue_event_expect_response(
1032        handle: &mut VcpuHandle,
1033        event: VcpuEvent,
1034        response: VcpuResponse,
1035    ) {
1036        handle
1037            .send_event(event)
1038            .expect("failed to send event to vcpu");
1039        assert_eq!(
1040            handle
1041                .response_receiver()
1042                .recv_timeout(RECV_TIMEOUT_SEC)
1043                .expect("did not receive event response from vcpu"),
1044            response
1045        );
1046    }
1047
1048    #[test]
1049    fn test_immediate_exit_shortcircuits_execution() {
1050        let (_, _, mut vcpu) = setup_vcpu(0x1000);
1051
1052        vcpu.kvm_vcpu.fd.set_kvm_immediate_exit(1);
1053        // Set a dummy value to be returned by the emulate call
1054        let result = vcpu.run_emulation().expect("Failed to run emulation");
1055        assert_eq!(
1056            result,
1057            VcpuEmulation::Interrupted,
1058            "The Immediate Exit short-circuit should have prevented the execution of emulate"
1059        );
1060
1061        let event_sender = vcpu.event_sender.take().expect("vCPU already started");
1062        let _ = event_sender.send(VcpuEvent::Resume);
1063        vcpu.kvm_vcpu.fd.set_kvm_immediate_exit(1);
1064        // paused is expected to coerce immediate_exit to 0 when receiving a VcpuEvent::Resume
1065        let _ = vcpu.paused();
1066        assert_eq!(
1067            0,
1068            vcpu.kvm_vcpu.fd.get_kvm_run().immediate_exit,
1069            "Immediate Exit should have been disabled by sending Resume to a paused VM"
1070        )
1071    }
1072
1073    #[test]
1074    fn test_vcpu_pause_resume() {
1075        let (_vm, mut vcpu_handle, vcpu_exit_evt) = vcpu_configured_for_boot();
1076
1077        // Queue a Resume event, expect a response.
1078        queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Resume, VcpuResponse::Resumed);
1079
1080        // Queue a Pause event, expect a response.
1081        queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Pause, VcpuResponse::Paused);
1082
1083        // Validate vcpu handled the EINTR gracefully and didn't exit.
1084        let err = vcpu_exit_evt.read().unwrap_err();
1085        assert_eq!(err.raw_os_error().unwrap(), libc::EAGAIN);
1086
1087        // Queue another Pause event, expect a response.
1088        queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Pause, VcpuResponse::Paused);
1089
1090        // Queue a Resume event, expect a response.
1091        queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Resume, VcpuResponse::Resumed);
1092
1093        // Queue another Resume event, expect a response.
1094        queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Resume, VcpuResponse::Resumed);
1095
1096        // Queue another Pause event, expect a response.
1097        queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Pause, VcpuResponse::Paused);
1098
1099        // Queue a Resume event, expect a response.
1100        queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Resume, VcpuResponse::Resumed);
1101
1102        vcpu_handle.send_event(VcpuEvent::Finish).unwrap();
1103    }
1104
1105    #[test]
1106    fn test_vcpu_save_state_events() {
1107        let (_vm, mut vcpu_handle, _vcpu_exit_evt) = vcpu_configured_for_boot();
1108
1109        // Queue a Resume event, expect a response.
1110        queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Resume, VcpuResponse::Resumed);
1111
1112        // Queue a SaveState event, expect a response.
1113        queue_event_expect_response(
1114            &mut vcpu_handle,
1115            VcpuEvent::SaveState,
1116            VcpuResponse::NotAllowed(String::new()),
1117        );
1118
1119        // Queue another Pause event, expect a response.
1120        queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Pause, VcpuResponse::Paused);
1121
1122        // Queue a SaveState event, get the response.
1123        vcpu_handle
1124            .send_event(VcpuEvent::SaveState)
1125            .expect("failed to send event to vcpu");
1126        match vcpu_handle
1127            .response_receiver()
1128            .recv_timeout(RECV_TIMEOUT_SEC)
1129            .expect("did not receive event response from vcpu")
1130        {
1131            VcpuResponse::SavedState(_) => {}
1132            _ => panic!("unexpected response"),
1133        };
1134
1135        vcpu_handle.send_event(VcpuEvent::Finish).unwrap();
1136    }
1137
1138    #[test]
1139    fn test_vcpu_dump_cpu_config() {
1140        let (_vm, mut vcpu_handle, _) = vcpu_configured_for_boot();
1141
1142        // Queue a DumpCpuConfig event, expect a DumpedCpuConfig response.
1143        vcpu_handle
1144            .send_event(VcpuEvent::DumpCpuConfig)
1145            .expect("Failed to send an event to vcpu.");
1146        match vcpu_handle
1147            .response_receiver()
1148            .recv_timeout(RECV_TIMEOUT_SEC)
1149            .expect("Could not receive a response from vcpu.")
1150        {
1151            VcpuResponse::DumpedCpuConfig(_) => (),
1152            VcpuResponse::Error(err) => panic!("Got an error: {err}"),
1153            _ => panic!("Got an unexpected response."),
1154        }
1155
1156        // Queue a Resume event, expect a response.
1157        queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Resume, VcpuResponse::Resumed);
1158
1159        // Queue a DumpCpuConfig event, expect a NotAllowed respoonse.
1160        // The DumpCpuConfig event is only allowed while paused.
1161        queue_event_expect_response(
1162            &mut vcpu_handle,
1163            VcpuEvent::DumpCpuConfig,
1164            VcpuResponse::NotAllowed(String::new()),
1165        );
1166
1167        vcpu_handle.send_event(VcpuEvent::Finish).unwrap();
1168    }
1169
1170    #[test]
1171    fn test_vcpu_rtsig_offset() {
1172        validate_signal_num(sigrtmin() + VCPU_RTSIG_OFFSET).unwrap();
1173    }
1174}