vmm/devices/virtio/transport/pci/
device.rs

1// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// Copyright 2018 The Chromium OS Authors. All rights reserved.
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE-BSD-3-Clause file.
5//
6// Copyright © 2019 Intel Corporation
7//
8// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
9
10use std::cmp;
11use std::collections::HashMap;
12use std::fmt::{Debug, Formatter};
13use std::io::{ErrorKind, Write};
14use std::sync::atomic::{AtomicBool, AtomicU16, AtomicU32, AtomicUsize, Ordering};
15use std::sync::{Arc, Barrier, Mutex};
16
17use kvm_ioctls::{IoEventAddress, NoDatamatch};
18use log::warn;
19use pci::{
20    PciBdf, PciCapabilityId, PciClassCode, PciMassStorageSubclass, PciNetworkControllerSubclass,
21    PciSubclass,
22};
23use serde::{Deserialize, Serialize};
24use thiserror::Error;
25use vm_allocator::{AddressAllocator, AllocPolicy, RangeInclusive};
26use vm_memory::{Address, ByteValued, GuestAddress, Le32};
27use vmm_sys_util::errno;
28use vmm_sys_util::eventfd::EventFd;
29
30use crate::Vm;
31use crate::devices::virtio::device::VirtioDevice;
32use crate::devices::virtio::generated::virtio_ids;
33use crate::devices::virtio::queue::Queue;
34use crate::devices::virtio::transport::pci::common_config::{
35    VirtioPciCommonConfig, VirtioPciCommonConfigState,
36};
37use crate::devices::virtio::transport::{VirtioInterrupt, VirtioInterruptType};
38use crate::logger::{debug, error};
39use crate::pci::configuration::{PciCapability, PciConfiguration, PciConfigurationState};
40use crate::pci::msix::{MsixCap, MsixConfig, MsixConfigState};
41use crate::pci::{BarReprogrammingParams, DeviceRelocationError, PciDevice};
42use crate::snapshot::Persist;
43use crate::utils::u64_to_usize;
44use crate::vstate::bus::BusDevice;
45use crate::vstate::interrupts::{InterruptError, MsixVectorGroup};
46use crate::vstate::memory::GuestMemoryMmap;
47use crate::vstate::resources::ResourceAllocator;
48
49const DEVICE_INIT: u8 = 0x00;
50const DEVICE_ACKNOWLEDGE: u8 = 0x01;
51const DEVICE_DRIVER: u8 = 0x02;
52const DEVICE_DRIVER_OK: u8 = 0x04;
53const DEVICE_FEATURES_OK: u8 = 0x08;
54const DEVICE_FAILED: u8 = 0x80;
55
56/// Vector value used to disable MSI for a queue.
57pub const VIRTQ_MSI_NO_VECTOR: u16 = 0xffff;
58
59/// BAR index we are using for VirtIO configuration
60const VIRTIO_BAR_INDEX: u8 = 0;
61
62enum PciCapabilityType {
63    Common = 1,
64    Notify = 2,
65    Isr = 3,
66    Device = 4,
67    Pci = 5,
68    SharedMemory = 8,
69}
70
71// This offset represents the 2 bytes omitted from the VirtioPciCap structure
72// as they are already handled through add_capability(). These 2 bytes are the
73// fields cap_vndr (1 byte) and cap_next (1 byte) defined in the virtio spec.
74const VIRTIO_PCI_CAP_OFFSET: usize = 2;
75
76#[repr(C, packed)]
77#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
78struct VirtioPciCap {
79    cap_len: u8,      // Generic PCI field: capability length
80    cfg_type: u8,     // Identifies the structure.
81    pci_bar: u8,      // Where to find it.
82    id: u8,           // Multiple capabilities of the same type.
83    padding: [u8; 2], // Pad to full dword.
84    offset: Le32,     // Offset within bar.
85    length: Le32,     // Length of the structure, in bytes.
86}
87
88// SAFETY: All members are simple numbers and any value is valid.
89unsafe impl ByteValued for VirtioPciCap {}
90
91impl PciCapability for VirtioPciCap {
92    fn bytes(&self) -> &[u8] {
93        self.as_slice()
94    }
95
96    fn id(&self) -> PciCapabilityId {
97        PciCapabilityId::VendorSpecific
98    }
99}
100
101const VIRTIO_PCI_CAP_LEN_OFFSET: u8 = 2;
102
103impl VirtioPciCap {
104    pub fn new(cfg_type: PciCapabilityType, offset: u32, length: u32) -> Self {
105        VirtioPciCap {
106            cap_len: u8::try_from(std::mem::size_of::<VirtioPciCap>()).unwrap()
107                + VIRTIO_PCI_CAP_LEN_OFFSET,
108            cfg_type: cfg_type as u8,
109            pci_bar: VIRTIO_BAR_INDEX,
110            id: 0,
111            padding: [0; 2],
112            offset: Le32::from(offset),
113            length: Le32::from(length),
114        }
115    }
116}
117
118#[repr(C, packed)]
119#[derive(Clone, Copy, Default)]
120struct VirtioPciNotifyCap {
121    cap: VirtioPciCap,
122    notify_off_multiplier: Le32,
123}
124// SAFETY: All members are simple numbers and any value is valid.
125unsafe impl ByteValued for VirtioPciNotifyCap {}
126
127impl PciCapability for VirtioPciNotifyCap {
128    fn bytes(&self) -> &[u8] {
129        self.as_slice()
130    }
131
132    fn id(&self) -> PciCapabilityId {
133        PciCapabilityId::VendorSpecific
134    }
135}
136
137impl VirtioPciNotifyCap {
138    pub fn new(cfg_type: PciCapabilityType, offset: u32, length: u32, multiplier: Le32) -> Self {
139        VirtioPciNotifyCap {
140            cap: VirtioPciCap {
141                cap_len: u8::try_from(std::mem::size_of::<VirtioPciNotifyCap>()).unwrap()
142                    + VIRTIO_PCI_CAP_LEN_OFFSET,
143                cfg_type: cfg_type as u8,
144                pci_bar: VIRTIO_BAR_INDEX,
145                id: 0,
146                padding: [0; 2],
147                offset: Le32::from(offset),
148                length: Le32::from(length),
149            },
150            notify_off_multiplier: multiplier,
151        }
152    }
153}
154
155#[repr(C, packed)]
156#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
157struct VirtioPciCfgCap {
158    cap: VirtioPciCap,
159    pci_cfg_data: [u8; 4],
160}
161// SAFETY: All members are simple numbers and any value is valid.
162unsafe impl ByteValued for VirtioPciCfgCap {}
163
164impl PciCapability for VirtioPciCfgCap {
165    fn bytes(&self) -> &[u8] {
166        self.as_slice()
167    }
168
169    fn id(&self) -> PciCapabilityId {
170        PciCapabilityId::VendorSpecific
171    }
172}
173
174impl VirtioPciCfgCap {
175    fn new() -> Self {
176        VirtioPciCfgCap {
177            cap: VirtioPciCap {
178                cap_len: u8::try_from(size_of::<Self>()).unwrap() + VIRTIO_PCI_CAP_LEN_OFFSET,
179                cfg_type: PciCapabilityType::Pci as u8,
180                pci_bar: VIRTIO_BAR_INDEX,
181                id: 0,
182                padding: [0; 2],
183                offset: Le32::from(0),
184                length: Le32::from(0),
185            },
186            ..Default::default()
187        }
188    }
189}
190
191#[derive(Debug, Clone, Copy, Default)]
192struct VirtioPciCfgCapInfo {
193    offset: usize,
194    cap: VirtioPciCfgCap,
195}
196
197#[derive(Debug, Copy, Clone)]
198pub enum PciVirtioSubclass {
199    NonTransitionalBase = 0xff,
200}
201
202impl PciSubclass for PciVirtioSubclass {
203    fn get_register_value(&self) -> u8 {
204        *self as u8
205    }
206}
207
208// Allocate one bar for the structs pointed to by the capability structures.
209// As per the PCI specification, because the same BAR shares MSI-X and non
210// MSI-X structures, it is recommended to use 8KiB alignment for all those
211// structures.
212const COMMON_CONFIG_BAR_OFFSET: u64 = 0x0000;
213const COMMON_CONFIG_SIZE: u64 = 56;
214const ISR_CONFIG_BAR_OFFSET: u64 = 0x2000;
215const ISR_CONFIG_SIZE: u64 = 1;
216const DEVICE_CONFIG_BAR_OFFSET: u64 = 0x4000;
217const DEVICE_CONFIG_SIZE: u64 = 0x1000;
218const NOTIFICATION_BAR_OFFSET: u64 = 0x6000;
219const NOTIFICATION_SIZE: u64 = 0x1000;
220const MSIX_TABLE_BAR_OFFSET: u64 = 0x8000;
221// The size is 256KiB because the table can hold up to 2048 entries, with each
222// entry being 128 bits (4 DWORDS).
223const MSIX_TABLE_SIZE: u64 = 0x40000;
224const MSIX_PBA_BAR_OFFSET: u64 = 0x48000;
225// The size is 2KiB because the Pending Bit Array has one bit per vector and it
226// can support up to 2048 vectors.
227const MSIX_PBA_SIZE: u64 = 0x800;
228/// The BAR size must be a power of 2.
229pub const CAPABILITY_BAR_SIZE: u64 = 0x80000;
230const VIRTIO_COMMON_BAR_INDEX: usize = 0;
231const VIRTIO_SHM_BAR_INDEX: usize = 2;
232
233const NOTIFY_OFF_MULTIPLIER: u32 = 4; // A dword per notification address.
234
235const VIRTIO_PCI_VENDOR_ID: u16 = 0x1af4;
236const VIRTIO_PCI_DEVICE_ID_BASE: u16 = 0x1040; // Add to device type to get device ID.
237
238#[derive(Debug, Clone, Serialize, Deserialize)]
239pub struct VirtioPciDeviceState {
240    pub pci_device_bdf: PciBdf,
241    pub device_activated: bool,
242    pub cap_pci_cfg_offset: usize,
243    pub cap_pci_cfg: Vec<u8>,
244    pub pci_configuration_state: PciConfigurationState,
245    pub pci_dev_state: VirtioPciCommonConfigState,
246    pub msix_state: MsixConfigState,
247    pub bar_address: u64,
248}
249
250#[derive(Debug, thiserror::Error, displaydoc::Display)]
251pub enum VirtioPciDeviceError {
252    /// Failed creating VirtioPciDevice: {0}
253    CreateVirtioPciDevice(#[from] DeviceRelocationError),
254    /// Error creating MSI configuration: {0}
255    Msi(#[from] InterruptError),
256}
257
258pub struct VirtioPciDevice {
259    id: String,
260
261    // BDF assigned to the device
262    pci_device_bdf: PciBdf,
263
264    // PCI configuration registers.
265    configuration: PciConfiguration,
266
267    // virtio PCI common configuration
268    common_config: VirtioPciCommonConfig,
269
270    // Virtio device reference and status
271    device: Arc<Mutex<dyn VirtioDevice>>,
272    device_activated: Arc<AtomicBool>,
273
274    // PCI interrupts.
275    virtio_interrupt: Option<Arc<VirtioInterruptMsix>>,
276
277    // Guest memory
278    memory: GuestMemoryMmap,
279
280    // Add a dedicated structure to hold information about the very specific
281    // virtio-pci capability VIRTIO_PCI_CAP_PCI_CFG. This is needed to support
282    // the legacy/backward compatible mechanism of letting the guest access the
283    // other virtio capabilities without mapping the PCI BARs. This can be
284    // needed when the guest tries to early access the virtio configuration of
285    // a device.
286    cap_pci_cfg_info: VirtioPciCfgCapInfo,
287
288    // Allocated address for the BAR
289    pub bar_address: u64,
290}
291
292impl Debug for VirtioPciDevice {
293    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
294        f.debug_struct("VirtioPciDevice")
295            .field("id", &self.id)
296            .finish()
297    }
298}
299
300impl VirtioPciDevice {
301    fn pci_configuration(
302        virtio_device_type: u32,
303        msix_config: &Arc<Mutex<MsixConfig>>,
304    ) -> PciConfiguration {
305        let pci_device_id = VIRTIO_PCI_DEVICE_ID_BASE + u16::try_from(virtio_device_type).unwrap();
306        let (class, subclass) = match virtio_device_type {
307            virtio_ids::VIRTIO_ID_NET => (
308                PciClassCode::NetworkController,
309                &PciNetworkControllerSubclass::EthernetController as &dyn PciSubclass,
310            ),
311            virtio_ids::VIRTIO_ID_BLOCK => (
312                PciClassCode::MassStorage,
313                &PciMassStorageSubclass::MassStorage as &dyn PciSubclass,
314            ),
315            _ => (
316                PciClassCode::Other,
317                &PciVirtioSubclass::NonTransitionalBase as &dyn PciSubclass,
318            ),
319        };
320
321        PciConfiguration::new_type0(
322            VIRTIO_PCI_VENDOR_ID,
323            pci_device_id,
324            0x1, // For modern virtio-PCI devices
325            class,
326            subclass,
327            VIRTIO_PCI_VENDOR_ID,
328            pci_device_id,
329            Some(msix_config.clone()),
330        )
331    }
332
333    /// Allocate the PCI BAR for the VirtIO device and its associated capabilities.
334    ///
335    /// This must happen only during the creation of a brand new VM. When a VM is restored from a
336    /// known state, the BARs are already created with the right content, therefore we don't need
337    /// to go through this codepath.
338    pub fn allocate_bars(&mut self, mmio64_allocator: &mut AddressAllocator) {
339        let device_clone = self.device.clone();
340        let device = device_clone.lock().unwrap();
341
342        // Allocate the virtio-pci capability BAR.
343        // See http://docs.oasis-open.org/virtio/virtio/v1.0/cs04/virtio-v1.0-cs04.html#x1-740004
344        let virtio_pci_bar_addr = mmio64_allocator
345            .allocate(
346                CAPABILITY_BAR_SIZE,
347                CAPABILITY_BAR_SIZE,
348                AllocPolicy::FirstMatch,
349            )
350            .unwrap()
351            .start();
352
353        self.configuration.add_pci_bar(
354            VIRTIO_COMMON_BAR_INDEX,
355            virtio_pci_bar_addr,
356            CAPABILITY_BAR_SIZE,
357        );
358
359        // Once the BARs are allocated, the capabilities can be added to the PCI configuration.
360        self.add_pci_capabilities();
361        self.bar_address = virtio_pci_bar_addr;
362    }
363
364    /// Constructs a new PCI transport for the given virtio device.
365    pub fn new(
366        id: String,
367        memory: GuestMemoryMmap,
368        device: Arc<Mutex<dyn VirtioDevice>>,
369        msix_vectors: Arc<MsixVectorGroup>,
370        pci_device_bdf: u32,
371    ) -> Result<Self, VirtioPciDeviceError> {
372        let num_queues = device.lock().expect("Poisoned lock").queues().len();
373
374        let msix_config = Arc::new(Mutex::new(MsixConfig::new(
375            msix_vectors.clone(),
376            pci_device_bdf,
377        )));
378        let pci_config = Self::pci_configuration(
379            device.lock().expect("Poisoned lock").device_type(),
380            &msix_config,
381        );
382
383        let virtio_common_config = VirtioPciCommonConfig::new(VirtioPciCommonConfigState {
384            driver_status: 0,
385            config_generation: 0,
386            device_feature_select: 0,
387            driver_feature_select: 0,
388            queue_select: 0,
389            msix_config: VIRTQ_MSI_NO_VECTOR,
390            msix_queues: vec![VIRTQ_MSI_NO_VECTOR; num_queues],
391        });
392        let interrupt = Arc::new(VirtioInterruptMsix::new(
393            msix_config.clone(),
394            virtio_common_config.msix_config.clone(),
395            virtio_common_config.msix_queues.clone(),
396            msix_vectors,
397        ));
398
399        let virtio_pci_device = VirtioPciDevice {
400            id,
401            pci_device_bdf: pci_device_bdf.into(),
402            configuration: pci_config,
403            common_config: virtio_common_config,
404            device,
405            device_activated: Arc::new(AtomicBool::new(false)),
406            virtio_interrupt: Some(interrupt),
407            memory,
408            cap_pci_cfg_info: VirtioPciCfgCapInfo::default(),
409            bar_address: 0,
410        };
411
412        Ok(virtio_pci_device)
413    }
414
415    pub fn new_from_state(
416        id: String,
417        vm: &Arc<Vm>,
418        device: Arc<Mutex<dyn VirtioDevice>>,
419        state: VirtioPciDeviceState,
420    ) -> Result<Self, VirtioPciDeviceError> {
421        let msix_config =
422            MsixConfig::from_state(state.msix_state, vm.clone(), state.pci_device_bdf.into())?;
423        let vectors = msix_config.vectors.clone();
424        let msix_config = Arc::new(Mutex::new(msix_config));
425
426        let pci_config = PciConfiguration::type0_from_state(
427            state.pci_configuration_state,
428            Some(msix_config.clone()),
429        );
430        let virtio_common_config = VirtioPciCommonConfig::new(state.pci_dev_state);
431        let cap_pci_cfg_info = VirtioPciCfgCapInfo {
432            offset: state.cap_pci_cfg_offset,
433            cap: *VirtioPciCfgCap::from_slice(&state.cap_pci_cfg).unwrap(),
434        };
435
436        let interrupt = Arc::new(VirtioInterruptMsix::new(
437            msix_config.clone(),
438            virtio_common_config.msix_config.clone(),
439            virtio_common_config.msix_queues.clone(),
440            vectors,
441        ));
442
443        let virtio_pci_device = VirtioPciDevice {
444            id,
445            pci_device_bdf: state.pci_device_bdf,
446            configuration: pci_config,
447            common_config: virtio_common_config,
448            device,
449            device_activated: Arc::new(AtomicBool::new(state.device_activated)),
450            virtio_interrupt: Some(interrupt),
451            memory: vm.guest_memory().clone(),
452            cap_pci_cfg_info,
453            bar_address: state.bar_address,
454        };
455
456        if state.device_activated {
457            virtio_pci_device
458                .device
459                .lock()
460                .expect("Poisoned lock")
461                .activate(
462                    virtio_pci_device.memory.clone(),
463                    virtio_pci_device.virtio_interrupt.as_ref().unwrap().clone(),
464                );
465        }
466
467        Ok(virtio_pci_device)
468    }
469
470    fn is_driver_ready(&self) -> bool {
471        let ready_bits =
472            (DEVICE_ACKNOWLEDGE | DEVICE_DRIVER | DEVICE_DRIVER_OK | DEVICE_FEATURES_OK);
473        self.common_config.driver_status == ready_bits
474            && self.common_config.driver_status & DEVICE_FAILED == 0
475    }
476
477    /// Determines if the driver has requested the device (re)init / reset itself
478    fn is_driver_init(&self) -> bool {
479        self.common_config.driver_status == DEVICE_INIT
480    }
481
482    pub fn config_bar_addr(&self) -> u64 {
483        self.configuration.get_bar_addr(VIRTIO_BAR_INDEX as usize)
484    }
485
486    fn add_pci_capabilities(&mut self) {
487        // Add pointers to the different configuration structures from the PCI capabilities.
488        let common_cap = VirtioPciCap::new(
489            PciCapabilityType::Common,
490            COMMON_CONFIG_BAR_OFFSET.try_into().unwrap(),
491            COMMON_CONFIG_SIZE.try_into().unwrap(),
492        );
493        self.configuration.add_capability(&common_cap);
494
495        let isr_cap = VirtioPciCap::new(
496            PciCapabilityType::Isr,
497            ISR_CONFIG_BAR_OFFSET.try_into().unwrap(),
498            ISR_CONFIG_SIZE.try_into().unwrap(),
499        );
500        self.configuration.add_capability(&isr_cap);
501
502        // TODO(dgreid) - set based on device's configuration size?
503        let device_cap = VirtioPciCap::new(
504            PciCapabilityType::Device,
505            DEVICE_CONFIG_BAR_OFFSET.try_into().unwrap(),
506            DEVICE_CONFIG_SIZE.try_into().unwrap(),
507        );
508        self.configuration.add_capability(&device_cap);
509
510        let notify_cap = VirtioPciNotifyCap::new(
511            PciCapabilityType::Notify,
512            NOTIFICATION_BAR_OFFSET.try_into().unwrap(),
513            NOTIFICATION_SIZE.try_into().unwrap(),
514            Le32::from(NOTIFY_OFF_MULTIPLIER),
515        );
516        self.configuration.add_capability(&notify_cap);
517
518        let configuration_cap = VirtioPciCfgCap::new();
519        self.cap_pci_cfg_info.offset =
520            self.configuration.add_capability(&configuration_cap) + VIRTIO_PCI_CAP_OFFSET;
521        self.cap_pci_cfg_info.cap = configuration_cap;
522
523        if let Some(interrupt) = &self.virtio_interrupt {
524            let msix_cap = MsixCap::new(
525                VIRTIO_BAR_INDEX,
526                interrupt
527                    .msix_config
528                    .lock()
529                    .expect("Poisoned lock")
530                    .vectors
531                    .num_vectors(),
532                MSIX_TABLE_BAR_OFFSET.try_into().unwrap(),
533                VIRTIO_BAR_INDEX,
534                MSIX_PBA_BAR_OFFSET.try_into().unwrap(),
535            );
536            self.configuration.add_capability(&msix_cap);
537        }
538    }
539
540    fn read_cap_pci_cfg(&mut self, offset: usize, mut data: &mut [u8]) {
541        let cap_slice = self.cap_pci_cfg_info.cap.as_slice();
542        let data_len = data.len();
543        let cap_len = cap_slice.len();
544        if offset + data_len > cap_len {
545            error!("Failed to read cap_pci_cfg from config space");
546            return;
547        }
548
549        if offset < std::mem::size_of::<VirtioPciCap>() {
550            if let Some(end) = offset.checked_add(data_len) {
551                // This write can't fail, offset and end are checked against config_len.
552                data.write_all(&cap_slice[offset..cmp::min(end, cap_len)])
553                    .unwrap();
554            }
555        } else {
556            let bar_offset: u32 = self.cap_pci_cfg_info.cap.cap.offset.into();
557            let len = u32::from(self.cap_pci_cfg_info.cap.cap.length) as usize;
558            // BAR reads expect that the buffer has the exact size of the field that
559            // offset is pointing to. So, do some check that the `length` has a meaningful value
560            // and only use the part of the buffer we actually need.
561            if len <= 4 {
562                self.read_bar(0, bar_offset as u64, &mut data[..len]);
563            }
564        }
565    }
566
567    fn write_cap_pci_cfg(&mut self, offset: usize, data: &[u8]) -> Option<Arc<Barrier>> {
568        let cap_slice = self.cap_pci_cfg_info.cap.as_mut_slice();
569        let data_len = data.len();
570        let cap_len = cap_slice.len();
571        if offset + data_len > cap_len {
572            error!("Failed to write cap_pci_cfg to config space");
573            return None;
574        }
575
576        if offset < std::mem::size_of::<VirtioPciCap>() {
577            let (_, right) = cap_slice.split_at_mut(offset);
578            right[..data_len].copy_from_slice(data);
579            None
580        } else {
581            let bar_offset: u32 = self.cap_pci_cfg_info.cap.cap.offset.into();
582            let len = u32::from(self.cap_pci_cfg_info.cap.cap.length) as usize;
583            // BAR writes expect that the buffer has the exact size of the field that
584            // offset is pointing to. So, do some check that the `length` has a meaningful value
585            // and only use the part of the buffer we actually need.
586            if len <= 4 {
587                let len = len.min(data.len());
588                self.write_bar(0, bar_offset as u64, &data[..len])
589            } else {
590                None
591            }
592        }
593    }
594
595    pub fn virtio_device(&self) -> Arc<Mutex<dyn VirtioDevice>> {
596        self.device.clone()
597    }
598
599    fn needs_activation(&self) -> bool {
600        !self.device_activated.load(Ordering::SeqCst) && self.is_driver_ready()
601    }
602
603    /// Register the IoEvent notification for a VirtIO device
604    pub fn register_notification_ioevent(&self, vm: &Vm) -> Result<(), errno::Error> {
605        let bar_addr = self.config_bar_addr();
606        for (i, queue_evt) in self
607            .device
608            .lock()
609            .expect("Poisoned lock")
610            .queue_events()
611            .iter()
612            .enumerate()
613        {
614            let notify_base = bar_addr + NOTIFICATION_BAR_OFFSET;
615            let io_addr =
616                IoEventAddress::Mmio(notify_base + i as u64 * NOTIFY_OFF_MULTIPLIER as u64);
617            vm.fd().register_ioevent(queue_evt, &io_addr, NoDatamatch)?;
618        }
619        Ok(())
620    }
621
622    pub fn state(&self) -> VirtioPciDeviceState {
623        VirtioPciDeviceState {
624            pci_device_bdf: self.pci_device_bdf,
625            device_activated: self.device_activated.load(Ordering::Acquire),
626            cap_pci_cfg_offset: self.cap_pci_cfg_info.offset,
627            cap_pci_cfg: self.cap_pci_cfg_info.cap.bytes().to_vec(),
628            pci_configuration_state: self.configuration.state(),
629            pci_dev_state: self.common_config.state(),
630            msix_state: self
631                .virtio_interrupt
632                .as_ref()
633                .unwrap()
634                .msix_config
635                .lock()
636                .expect("Poisoned lock")
637                .state(),
638            bar_address: self.bar_address,
639        }
640    }
641}
642
643pub struct VirtioInterruptMsix {
644    msix_config: Arc<Mutex<MsixConfig>>,
645    config_vector: Arc<AtomicU16>,
646    queues_vectors: Arc<Mutex<Vec<u16>>>,
647    vectors: Arc<MsixVectorGroup>,
648}
649
650impl std::fmt::Debug for VirtioInterruptMsix {
651    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
652        f.debug_struct("VirtioInterruptMsix")
653            .field("msix_config", &self.msix_config)
654            .field("config_vector", &self.config_vector)
655            .field("queues_vectors", &self.queues_vectors)
656            .finish()
657    }
658}
659
660impl VirtioInterruptMsix {
661    pub fn new(
662        msix_config: Arc<Mutex<MsixConfig>>,
663        config_vector: Arc<AtomicU16>,
664        queues_vectors: Arc<Mutex<Vec<u16>>>,
665        vectors: Arc<MsixVectorGroup>,
666    ) -> Self {
667        VirtioInterruptMsix {
668            msix_config,
669            config_vector,
670            queues_vectors,
671            vectors,
672        }
673    }
674}
675
676impl VirtioInterrupt for VirtioInterruptMsix {
677    fn trigger(&self, int_type: VirtioInterruptType) -> Result<(), InterruptError> {
678        let vector = match int_type {
679            VirtioInterruptType::Config => self.config_vector.load(Ordering::Acquire),
680            VirtioInterruptType::Queue(queue_index) => *self
681                .queues_vectors
682                .lock()
683                .unwrap()
684                .get(queue_index as usize)
685                .ok_or(InterruptError::InvalidVectorIndex(queue_index as usize))?,
686        };
687
688        if vector == VIRTQ_MSI_NO_VECTOR {
689            return Ok(());
690        }
691
692        let config = &mut self.msix_config.lock().unwrap();
693        let entry = &config.table_entries[vector as usize];
694        // In case the vector control register associated with the entry
695        // has its first bit set, this means the vector is masked and the
696        // device should not inject the interrupt.
697        // Instead, the Pending Bit Array table is updated to reflect there
698        // is a pending interrupt for this specific vector.
699        if config.masked || entry.masked() {
700            config.set_pba_bit(vector, false);
701            return Ok(());
702        }
703
704        self.vectors.trigger(vector as usize)
705    }
706
707    fn notifier(&self, int_type: VirtioInterruptType) -> Option<&EventFd> {
708        let vector = match int_type {
709            VirtioInterruptType::Config => self.config_vector.load(Ordering::Acquire),
710            VirtioInterruptType::Queue(queue_index) => *self
711                .queues_vectors
712                .lock()
713                .unwrap()
714                .get(queue_index as usize)?,
715        };
716
717        self.vectors.notifier(vector as usize)
718    }
719
720    fn status(&self) -> Arc<AtomicU32> {
721        Arc::new(AtomicU32::new(0))
722    }
723
724    #[cfg(test)]
725    fn has_pending_interrupt(&self, interrupt_type: VirtioInterruptType) -> bool {
726        false
727    }
728
729    #[cfg(test)]
730    fn ack_interrupt(&self, interrupt_type: VirtioInterruptType) {
731        // Do nothing here
732    }
733}
734
735impl PciDevice for VirtioPciDevice {
736    fn write_config_register(
737        &mut self,
738        reg_idx: usize,
739        offset: u64,
740        data: &[u8],
741    ) -> Option<Arc<Barrier>> {
742        // Handle the special case where the capability VIRTIO_PCI_CAP_PCI_CFG
743        // is accessed. This capability has a special meaning as it allows the
744        // guest to access other capabilities without mapping the PCI BAR.
745        let base = reg_idx * 4;
746        if base + u64_to_usize(offset) >= self.cap_pci_cfg_info.offset
747            && base + u64_to_usize(offset) + data.len()
748                <= self.cap_pci_cfg_info.offset + self.cap_pci_cfg_info.cap.bytes().len()
749        {
750            let offset = base + u64_to_usize(offset) - self.cap_pci_cfg_info.offset;
751            self.write_cap_pci_cfg(offset, data)
752        } else {
753            self.configuration
754                .write_config_register(reg_idx, offset, data);
755            None
756        }
757    }
758
759    fn read_config_register(&mut self, reg_idx: usize) -> u32 {
760        // Handle the special case where the capability VIRTIO_PCI_CAP_PCI_CFG
761        // is accessed. This capability has a special meaning as it allows the
762        // guest to access other capabilities without mapping the PCI BAR.
763        let base = reg_idx * 4;
764        if base >= self.cap_pci_cfg_info.offset
765            && base + 4 <= self.cap_pci_cfg_info.offset + self.cap_pci_cfg_info.cap.bytes().len()
766        {
767            let offset = base - self.cap_pci_cfg_info.offset;
768            let mut data = [0u8; 4];
769            let len = u32::from(self.cap_pci_cfg_info.cap.cap.length) as usize;
770            if len <= 4 {
771                self.read_cap_pci_cfg(offset, &mut data[..len]);
772                u32::from_le_bytes(data)
773            } else {
774                0
775            }
776        } else {
777            self.configuration.read_reg(reg_idx)
778        }
779    }
780
781    fn detect_bar_reprogramming(
782        &mut self,
783        reg_idx: usize,
784        data: &[u8],
785    ) -> Option<BarReprogrammingParams> {
786        self.configuration.detect_bar_reprogramming(reg_idx, data)
787    }
788
789    fn move_bar(&mut self, old_base: u64, new_base: u64) -> Result<(), DeviceRelocationError> {
790        // We only update our idea of the bar in order to support free_bars() above.
791        // The majority of the reallocation is done inside DeviceManager.
792        if self.bar_address == old_base {
793            self.bar_address = new_base;
794        }
795
796        Ok(())
797    }
798
799    fn read_bar(&mut self, _base: u64, offset: u64, data: &mut [u8]) {
800        match offset {
801            o if o < COMMON_CONFIG_BAR_OFFSET + COMMON_CONFIG_SIZE => {
802                self.common_config
803                    .read(o - COMMON_CONFIG_BAR_OFFSET, data, self.device.clone())
804            }
805            o if (ISR_CONFIG_BAR_OFFSET..ISR_CONFIG_BAR_OFFSET + ISR_CONFIG_SIZE).contains(&o) => {
806                // We don't actually support legacy INT#x interrupts for VirtIO PCI devices
807                warn!("pci: read access to unsupported ISR status field");
808                data.fill(0);
809            }
810            o if (DEVICE_CONFIG_BAR_OFFSET..DEVICE_CONFIG_BAR_OFFSET + DEVICE_CONFIG_SIZE)
811                .contains(&o) =>
812            {
813                let device = self.device.lock().unwrap();
814                device.read_config(o - DEVICE_CONFIG_BAR_OFFSET, data);
815            }
816            o if (NOTIFICATION_BAR_OFFSET..NOTIFICATION_BAR_OFFSET + NOTIFICATION_SIZE)
817                .contains(&o) =>
818            {
819                // Handled with ioeventfds.
820                warn!("pci: unexpected read to notification BAR. Offset {o:#x}");
821            }
822            o if (MSIX_TABLE_BAR_OFFSET..MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE).contains(&o) => {
823                if let Some(interrupt) = &self.virtio_interrupt {
824                    interrupt
825                        .msix_config
826                        .lock()
827                        .unwrap()
828                        .read_table(o - MSIX_TABLE_BAR_OFFSET, data);
829                }
830            }
831            o if (MSIX_PBA_BAR_OFFSET..MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE).contains(&o) => {
832                if let Some(interrupt) = &self.virtio_interrupt {
833                    interrupt
834                        .msix_config
835                        .lock()
836                        .unwrap()
837                        .read_pba(o - MSIX_PBA_BAR_OFFSET, data);
838                }
839            }
840            _ => (),
841        }
842    }
843
844    fn write_bar(&mut self, _base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> {
845        match offset {
846            o if o < COMMON_CONFIG_BAR_OFFSET + COMMON_CONFIG_SIZE => {
847                self.common_config
848                    .write(o - COMMON_CONFIG_BAR_OFFSET, data, self.device.clone())
849            }
850            o if (ISR_CONFIG_BAR_OFFSET..ISR_CONFIG_BAR_OFFSET + ISR_CONFIG_SIZE).contains(&o) => {
851                // We don't actually support legacy INT#x interrupts for VirtIO PCI devices
852                warn!("pci: access to unsupported ISR status field");
853            }
854            o if (DEVICE_CONFIG_BAR_OFFSET..DEVICE_CONFIG_BAR_OFFSET + DEVICE_CONFIG_SIZE)
855                .contains(&o) =>
856            {
857                let mut device = self.device.lock().unwrap();
858                device.write_config(o - DEVICE_CONFIG_BAR_OFFSET, data);
859            }
860            o if (NOTIFICATION_BAR_OFFSET..NOTIFICATION_BAR_OFFSET + NOTIFICATION_SIZE)
861                .contains(&o) =>
862            {
863                // Handled with ioeventfds.
864                warn!("pci: unexpected write to notification BAR. Offset {o:#x}");
865            }
866            o if (MSIX_TABLE_BAR_OFFSET..MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE).contains(&o) => {
867                if let Some(interrupt) = &self.virtio_interrupt {
868                    interrupt
869                        .msix_config
870                        .lock()
871                        .unwrap()
872                        .write_table(o - MSIX_TABLE_BAR_OFFSET, data);
873                }
874            }
875            o if (MSIX_PBA_BAR_OFFSET..MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE).contains(&o) => {
876                if let Some(interrupt) = &self.virtio_interrupt {
877                    interrupt
878                        .msix_config
879                        .lock()
880                        .unwrap()
881                        .write_pba(o - MSIX_PBA_BAR_OFFSET, data);
882                }
883            }
884            _ => (),
885        };
886
887        // Try and activate the device if the driver status has changed
888        if self.needs_activation() {
889            debug!("Activating device");
890            let interrupt = Arc::clone(self.virtio_interrupt.as_ref().unwrap());
891            match self
892                .virtio_device()
893                .lock()
894                .unwrap()
895                .activate(self.memory.clone(), interrupt.clone())
896            {
897                Ok(()) => self.device_activated.store(true, Ordering::SeqCst),
898                Err(err) => {
899                    error!("Error activating device: {err:?}");
900
901                    // Section 2.1.2 of the specification states that we need to send a device
902                    // configuration change interrupt
903                    let _ = interrupt.trigger(VirtioInterruptType::Config);
904                }
905            }
906        }
907
908        // Device has been reset by the driver
909        if self.device_activated.load(Ordering::SeqCst) && self.is_driver_init() {
910            let mut device = self.device.lock().unwrap();
911            let reset_result = device.reset();
912            match reset_result {
913                Some(_) => {
914                    // Upon reset the device returns its interrupt EventFD
915                    self.virtio_interrupt = None;
916                    self.device_activated.store(false, Ordering::SeqCst);
917
918                    // Reset queue readiness (changes queue_enable), queue sizes
919                    // and selected_queue as per spec for reset
920                    self.virtio_device()
921                        .lock()
922                        .unwrap()
923                        .queues_mut()
924                        .iter_mut()
925                        .for_each(Queue::reset);
926                    self.common_config.queue_select = 0;
927                }
928                None => {
929                    error!("Attempt to reset device when not implemented in underlying device");
930                    // TODO: currently we don't support device resetting, but we still
931                    // follow the spec and set the status field to 0.
932                    self.common_config.driver_status = DEVICE_INIT;
933                }
934            }
935        }
936        None
937    }
938}
939
940impl BusDevice for VirtioPciDevice {
941    fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
942        self.read_bar(base, offset, data)
943    }
944
945    fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> {
946        self.write_bar(base, offset, data)
947    }
948}
949
950#[cfg(test)]
951mod tests {
952    use std::sync::{Arc, Mutex};
953
954    use event_manager::MutEventSubscriber;
955    use linux_loader::loader::Cmdline;
956    use pci::{PciCapabilityId, PciClassCode, PciSubclass};
957    use vm_memory::{ByteValued, Le32};
958
959    use super::{PciCapabilityType, VirtioPciDevice};
960    use crate::arch::MEM_64BIT_DEVICES_START;
961    use crate::builder::tests::default_vmm;
962    use crate::devices::virtio::device::VirtioDevice;
963    use crate::devices::virtio::device_status::{ACKNOWLEDGE, DRIVER, DRIVER_OK, FEATURES_OK};
964    use crate::devices::virtio::generated::virtio_config::VIRTIO_F_VERSION_1;
965    use crate::devices::virtio::generated::virtio_ids;
966    use crate::devices::virtio::rng::Entropy;
967    use crate::devices::virtio::transport::pci::device::{
968        COMMON_CONFIG_BAR_OFFSET, COMMON_CONFIG_SIZE, DEVICE_CONFIG_BAR_OFFSET, DEVICE_CONFIG_SIZE,
969        ISR_CONFIG_BAR_OFFSET, ISR_CONFIG_SIZE, NOTIFICATION_BAR_OFFSET, NOTIFICATION_SIZE,
970        NOTIFY_OFF_MULTIPLIER, PciVirtioSubclass, VirtioPciCap, VirtioPciCfgCap,
971        VirtioPciNotifyCap,
972    };
973    use crate::pci::PciDevice;
974    use crate::pci::msix::MsixCap;
975    use crate::rate_limiter::RateLimiter;
976    use crate::utils::u64_to_usize;
977    use crate::{Vm, Vmm};
978
979    fn create_vmm_with_virtio_pci_device() -> Vmm {
980        let mut vmm = default_vmm();
981        vmm.device_manager.enable_pci(&vmm.vm);
982        let entropy = Arc::new(Mutex::new(Entropy::new(RateLimiter::default()).unwrap()));
983        vmm.device_manager
984            .attach_virtio_device(
985                &vmm.vm,
986                "rng".to_string(),
987                entropy.clone(),
988                &mut Cmdline::new(1024).unwrap(),
989                false,
990            )
991            .unwrap();
992        vmm
993    }
994
995    fn get_virtio_device(vmm: &Vmm) -> Arc<Mutex<VirtioPciDevice>> {
996        vmm.device_manager
997            .pci_devices
998            .get_virtio_device(virtio_ids::VIRTIO_ID_RNG, "rng")
999            .unwrap()
1000            .clone()
1001    }
1002
1003    #[test]
1004    fn test_pci_device_config() {
1005        let mut vmm = create_vmm_with_virtio_pci_device();
1006        let device = get_virtio_device(&vmm);
1007        let mut locked_virtio_pci_device = device.lock().unwrap();
1008
1009        // For more information for the values we are checking here look into the VirtIO spec here:
1010        // https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-1220007
1011        // and PCI Header type 0 layout here: https://wiki.osdev.org/PCI#Configuration_Space
1012
1013        //              |  16 bits  |  16 bits  |
1014        //              |-----------|-----------|
1015        // regiger 0x0: | Device ID | Vendor ID |
1016        //
1017        // Vendor ID of VirtIO devices is 0x1af4
1018        let reg0 = locked_virtio_pci_device.read_config_register(0);
1019        assert_eq!(reg0 & 0xffff, 0x1af4);
1020        // VirtIO PCI device IDs are in the range [0x1000, 0x107f]. (We are not using transitional
1021        // device IDs).
1022        let devid = reg0 >> 16;
1023        assert!(
1024            (0x1000..=0x107f).contains(&devid),
1025            "Device ID check: {:#x} >= 0x1000 && {:#x} <= 0x107f",
1026            devid,
1027            devid
1028        );
1029
1030        //              |   16 bits  |  16 bits  |
1031        //              |------------|-----------|
1032        // regiger 0x1: |   Status   |  Command  |
1033        // We offer the capabilities list (bit 4 of status register) at offset 0x34
1034        let reg1 = locked_virtio_pci_device.read_config_register(1);
1035        assert_eq!(reg1, 0x0010_0000);
1036
1037        //               |   8 bits   |  8 bits  | 8 bits  |    8 bits   |
1038        // register 0x2: | Class code | Subclass | Prog IF | Revision ID |
1039        //
1040        // Class code: VIRTIO_PCI_VENDOR_ID for all VirtIO devices
1041        // Subclass: PciClassCode::NetworkController for net, PciClassCode::MassStore for block
1042        //           PciClassCode::Other for everything else
1043        // Prog IF: A register defining some programmable interface register. 0 for VirtIO devices
1044        // Revision ID: 0x1 for modern VirtIO devices
1045        let reg2 = locked_virtio_pci_device.read_config_register(2);
1046        assert_eq!(reg2, 0xffff_0001);
1047        let class_code = ((reg2 >> 24) & 0xff) as u8;
1048        assert_eq!(class_code, PciClassCode::Other.get_register_value());
1049        let subclass = ((reg2 >> 16) & 0xff) as u8;
1050        assert_eq!(
1051            subclass,
1052            PciVirtioSubclass::NonTransitionalBase.get_register_value()
1053        );
1054        let prog_if = ((reg2 >> 8) & 0xff) as u8;
1055        assert_eq!(prog_if, 0);
1056        let revision_id = reg2 & 0xff;
1057        assert_eq!(revision_id, 0x1);
1058
1059        //               |   8 bits   |    8 bits   |    8 bits     |      8 bits     |
1060        // register 0x3: |    BIST    | Header Type | Latency timer | Cache line size |
1061        //
1062        // BIST: status and control for self test of PCI devices. Always 0 for VirtIO devices
1063        // HeaderType: 0x0 for general devices
1064        // LatencyTimer: Latency timer in units of PCI bus clocks, 0 for VirtIO
1065        // Cache Line size: 0 for VirtIO devices
1066        let reg3 = locked_virtio_pci_device.read_config_register(3);
1067        assert_eq!(reg3, 0x0);
1068
1069        // register 0xa: Cardbus CIS pointer
1070        //
1071        // We don't emulate CardBus
1072        let reg10 = locked_virtio_pci_device.read_config_register(0xa);
1073        assert_eq!(reg10, 0);
1074
1075        //              |    16 bits   |       16 bits      |
1076        // regiger 0xb: | Subsystem ID | Subsystem vendor ID|
1077        //
1078        // For us Subsystem ID is same as device ID and subsystem vendor ID is same as vendor ID
1079        // (reg 0x0)
1080        let reg11 = locked_virtio_pci_device.read_config_register(0xb);
1081        assert_eq!(reg11, reg0);
1082
1083        // register 0xc: Expansion ROM base address: 0x0 for us
1084        let reg12 = locked_virtio_pci_device.read_config_register(0xc);
1085        assert_eq!(reg12, 0);
1086
1087        //               |  24 bits |        8 bits        |
1088        // register 0xd: | Reserved | Capabilities pointer |
1089        let reg13 = locked_virtio_pci_device.read_config_register(0xd);
1090        assert_eq!(reg13 >> 24, 0);
1091
1092        // register 0xe: Reserved
1093        let reg14 = locked_virtio_pci_device.read_config_register(0xe);
1094        assert_eq!(reg14, 0);
1095
1096        //               |    8 bits   |   8 bits  |    8 bits     |     8 bits     |
1097        // register 0xf: | max latency | min grant | Interrupt pin | Interrupt line |
1098        //
1099        // We don't specify any of those
1100        let reg15 = locked_virtio_pci_device.read_config_register(0xf);
1101        assert_eq!(reg15, 0);
1102    }
1103
1104    #[test]
1105    fn test_reading_bars() {
1106        let mut vmm = create_vmm_with_virtio_pci_device();
1107        let device = get_virtio_device(&vmm);
1108        let mut locked_virtio_pci_device = device.lock().unwrap();
1109
1110        // According to OSdev wiki (https://wiki.osdev.org/PCI#Configuration_Space):
1111        //
1112        // When you want to retrieve the actual base address of a BAR, be sure to mask the lower
1113        // bits. For 16-bit Memory Space BARs, you calculate (BAR[x] & 0xFFF0). For 32-bit Memory
1114        // Space BARs, you calculate (BAR[x] & 0xFFFFFFF0). For 64-bit Memory Space BARs, you
1115        // calculate ((BAR[x] & 0xFFFFFFF0) + ((BAR[x + 1] & 0xFFFFFFFF) << 32)) For I/O Space
1116        // BARs, you calculate (BAR[x] & 0xFFFFFFFC).
1117
1118        // We are allocating a single 64-bit MMIO bar for VirtIO capabilities list. As a result, we
1119        // are using the first two BAR registers from the configuration space.
1120        //
1121        // The BAR address layout is as follows:
1122        //
1123        // |          Bits 31-4           |     Bit 3    | Bits 2-1 |   Bit 0  |
1124        // | 16-Byte Aligned Base Address | Prefetchable |   Type   | Always 0 |
1125        //
1126        // For 64-bit addresses though a second BAR is used to hold the upper 32 bits
1127        // of the address. Prefetchable and type will be help in the lower bits of the
1128        // first bar along with the lower 32-bits of the address which is always 16-bytes
1129        // aligned.
1130        let bar_addr_lo = locked_virtio_pci_device.read_config_register(0x4);
1131        let bar_addr_hi = locked_virtio_pci_device.read_config_register(0x5);
1132        let bar_addr = bar_addr_lo as u64 + ((bar_addr_hi as u64) << 32);
1133
1134        // Bit 0 always 0
1135        assert_eq!(bar_addr & 0x1, 0);
1136        // Type is 0x2 meaning 64-bit BAR
1137        assert_eq!((bar_addr & 0x6) >> 1, 2);
1138        // The actual address of the BAR should be the first available address of our 64-bit MMIO
1139        // region
1140        assert_eq!(bar_addr & 0xffff_ffff_ffff_fff0, MEM_64BIT_DEVICES_START);
1141
1142        // Reading the BAR size is a bit more convoluted. According to OSDev wiki:
1143        //
1144        // To determine the amount of address space needed by a PCI device, you must save the
1145        // original value of the BAR, write a value of all 1's to the register, then read it back.
1146        // The amount of memory can then be determined by masking the information bits, performing
1147        // a bitwise NOT ('~' in C), and incrementing the value by 1.
1148
1149        locked_virtio_pci_device.write_config_register(0x4, 0, &[0xff, 0xff, 0xff, 0xff]);
1150        // Read the lower size bits and mask out the last 4 bits include Prefetchable, Type and
1151        // hardwired-0
1152        let bar_size_lo = locked_virtio_pci_device.read_config_register(0x4) as u64 & 0xfffffff0;
1153        locked_virtio_pci_device.write_config_register(0x5, 0, &[0xff, 0xff, 0xff, 0xff]);
1154        let bar_size_hi = locked_virtio_pci_device.read_config_register(0x5) as u64;
1155        let bar_size = !((bar_size_hi << 32) | bar_size_lo) + 1;
1156
1157        // We create a capabilities BAR region of 0x80000 bytes
1158        assert_eq!(bar_size, 0x80000);
1159    }
1160
1161    fn read_virtio_pci_cap(
1162        device: &mut VirtioPciDevice,
1163        offset: u32,
1164    ) -> (PciCapabilityId, u8, VirtioPciCap) {
1165        let word1 = device.read_config_register((offset >> 2) as usize);
1166        let word2 = device.read_config_register((offset >> 2) as usize + 1);
1167        let word3 = device.read_config_register((offset >> 2) as usize + 2);
1168        let word4 = device.read_config_register((offset >> 2) as usize + 3);
1169
1170        let id = PciCapabilityId::from((word1 & 0xff) as u8);
1171        let next = ((word1 >> 8) & 0xff) as u8;
1172
1173        let cap = VirtioPciCap {
1174            cap_len: ((word1 >> 16) & 0xff) as u8,
1175            cfg_type: ((word1 >> 24) & 0xff) as u8,
1176            pci_bar: (word2 & 0xff) as u8,
1177            id: ((word2 >> 8) & 0xff) as u8,
1178            padding: [0u8; 2],
1179            offset: Le32::from(word3),
1180            length: Le32::from(word4),
1181        };
1182
1183        // We only ever set a single capability of a type. It's ID is 0.
1184        assert_eq!(cap.id, 0);
1185
1186        (id, next, cap)
1187    }
1188
1189    fn read_virtio_notification_cap(
1190        device: &mut VirtioPciDevice,
1191        offset: u32,
1192    ) -> (PciCapabilityId, u8, VirtioPciNotifyCap) {
1193        let (id, next, cap) = read_virtio_pci_cap(device, offset);
1194        let word5 = device.read_config_register((offset >> 2) as usize + 4);
1195
1196        let notification_cap = VirtioPciNotifyCap {
1197            cap,
1198            notify_off_multiplier: Le32::from(word5),
1199        };
1200
1201        (id, next, notification_cap)
1202    }
1203
1204    fn read_virtio_pci_config_cap(
1205        device: &mut VirtioPciDevice,
1206        offset: u32,
1207    ) -> (PciCapabilityId, u8, VirtioPciCfgCap) {
1208        let (id, next, cap) = read_virtio_pci_cap(device, offset);
1209        let word5 = device.read_config_register((offset >> 2) as usize + 4);
1210
1211        let pci_cfg_cap = VirtioPciCfgCap {
1212            cap,
1213            pci_cfg_data: word5.as_slice().try_into().unwrap(),
1214        };
1215
1216        (id, next, pci_cfg_cap)
1217    }
1218
1219    fn read_msix_cap(device: &mut VirtioPciDevice, offset: u32) -> (PciCapabilityId, u8, MsixCap) {
1220        let word1 = device.read_config_register((offset >> 2) as usize);
1221        let table = device.read_config_register((offset >> 2) as usize + 1);
1222        let pba = device.read_config_register((offset >> 2) as usize + 2);
1223
1224        let id = PciCapabilityId::from((word1 & 0xff) as u8);
1225        let next = ((word1 >> 8) & 0xff) as u8;
1226
1227        let cap = MsixCap {
1228            msg_ctl: (word1 & 0xffff) as u16,
1229            table,
1230            pba,
1231        };
1232
1233        (id, next, cap)
1234    }
1235
1236    fn capabilities_start(device: &mut VirtioPciDevice) -> u32 {
1237        device.read_config_register(0xd) & 0xfc
1238    }
1239
1240    #[test]
1241    fn test_capabilities() {
1242        let mut vmm = create_vmm_with_virtio_pci_device();
1243        let device = get_virtio_device(&vmm);
1244        let mut locked_virtio_pci_device = device.lock().unwrap();
1245
1246        // VirtIO devices need to expose a set of mandatory capabilities:
1247        // * Common configuration
1248        // * Notifications
1249        // * ISR status
1250        // * PCI configuration access
1251        //
1252        // and, optionally, a device-specific configuration area for those devices that need it.
1253        //
1254        // We always expose all 5 capabilities, so check that the capabilities are present
1255
1256        // Common config
1257        let common_config_cap_offset = capabilities_start(&mut locked_virtio_pci_device);
1258        let (id, next, cap) =
1259            read_virtio_pci_cap(&mut locked_virtio_pci_device, common_config_cap_offset);
1260        assert_eq!(id, PciCapabilityId::VendorSpecific);
1261        assert_eq!(cap.cap_len as usize, size_of::<VirtioPciCap>() + 2);
1262        assert_eq!(cap.cfg_type, PciCapabilityType::Common as u8);
1263        assert_eq!(cap.pci_bar, 0);
1264        assert_eq!(u32::from(cap.offset) as u64, COMMON_CONFIG_BAR_OFFSET);
1265        assert_eq!(u32::from(cap.length) as u64, COMMON_CONFIG_SIZE);
1266        assert_eq!(next as u32, common_config_cap_offset + cap.cap_len as u32);
1267
1268        // ISR
1269        let isr_cap_offset = next as u32;
1270        let (id, next, cap) = read_virtio_pci_cap(&mut locked_virtio_pci_device, isr_cap_offset);
1271        assert_eq!(id, PciCapabilityId::VendorSpecific);
1272        assert_eq!(cap.cap_len as usize, size_of::<VirtioPciCap>() + 2);
1273        assert_eq!(cap.cfg_type, PciCapabilityType::Isr as u8);
1274        assert_eq!(cap.pci_bar, 0);
1275        assert_eq!(u32::from(cap.offset) as u64, ISR_CONFIG_BAR_OFFSET);
1276        assert_eq!(u32::from(cap.length) as u64, ISR_CONFIG_SIZE);
1277        assert_eq!(next as u32, isr_cap_offset + cap.cap_len as u32);
1278
1279        // Device config
1280        let device_config_cap_offset = next as u32;
1281        let (id, next, cap) =
1282            read_virtio_pci_cap(&mut locked_virtio_pci_device, device_config_cap_offset);
1283        assert_eq!(id, PciCapabilityId::VendorSpecific);
1284        assert_eq!(cap.cap_len as usize, size_of::<VirtioPciCap>() + 2);
1285        assert_eq!(cap.cfg_type, PciCapabilityType::Device as u8);
1286        assert_eq!(cap.pci_bar, 0);
1287        assert_eq!(u32::from(cap.offset) as u64, DEVICE_CONFIG_BAR_OFFSET);
1288        assert_eq!(u32::from(cap.length) as u64, DEVICE_CONFIG_SIZE);
1289        assert_eq!(next as u32, device_config_cap_offset + cap.cap_len as u32);
1290
1291        let notification_cap_offset = next as u32;
1292        let (id, next, cap) =
1293            read_virtio_notification_cap(&mut locked_virtio_pci_device, notification_cap_offset);
1294        assert_eq!(id, PciCapabilityId::VendorSpecific);
1295        assert_eq!(
1296            cap.cap.cap_len as usize,
1297            size_of::<VirtioPciNotifyCap>() + 2
1298        );
1299        assert_eq!(cap.cap.cfg_type, PciCapabilityType::Notify as u8);
1300        assert_eq!(cap.cap.pci_bar, 0);
1301        assert_eq!(u32::from(cap.cap.offset) as u64, NOTIFICATION_BAR_OFFSET);
1302        assert_eq!(u32::from(cap.cap.length) as u64, NOTIFICATION_SIZE);
1303        assert_eq!(
1304            next as u32,
1305            notification_cap_offset + cap.cap.cap_len as u32
1306        );
1307        assert_eq!(u32::from(cap.notify_off_multiplier), NOTIFY_OFF_MULTIPLIER);
1308
1309        let pci_config_cap_offset = next as u32;
1310        let (id, next, cap) =
1311            read_virtio_pci_config_cap(&mut locked_virtio_pci_device, pci_config_cap_offset);
1312        assert_eq!(id, PciCapabilityId::VendorSpecific);
1313        assert_eq!(cap.cap.cap_len as usize, size_of::<VirtioPciCfgCap>() + 2);
1314        assert_eq!(cap.cap.cfg_type, PciCapabilityType::Pci as u8);
1315        assert_eq!(cap.cap.pci_bar, 0);
1316        assert_eq!(u32::from(cap.cap.offset) as u64, 0);
1317        assert_eq!(u32::from(cap.cap.length) as u64, 0);
1318        assert_eq!(
1319            locked_virtio_pci_device.cap_pci_cfg_info.offset,
1320            pci_config_cap_offset as usize + 2
1321        );
1322        assert_eq!(locked_virtio_pci_device.cap_pci_cfg_info.cap, cap);
1323        assert_eq!(next as u32, pci_config_cap_offset + cap.cap.cap_len as u32);
1324
1325        let msix_cap_offset = next as u32;
1326        let (id, next, cap) = read_msix_cap(&mut locked_virtio_pci_device, msix_cap_offset);
1327        assert_eq!(id, PciCapabilityId::MsiX);
1328        assert_eq!(next, 0);
1329    }
1330
1331    fn cap_pci_cfg_read(device: &mut VirtioPciDevice, bar_offset: u32, length: u32) -> u32 {
1332        let pci_config_cap_offset = capabilities_start(device) as usize
1333            + 3 * (size_of::<VirtioPciCap>() + 2)
1334            + (size_of::<VirtioPciNotifyCap>() + 2);
1335
1336        // To program the access through the PCI config capability mechanism, we need to write the
1337        // bar offset and read length in the `VirtioPciCfgCap::cap.offset` and
1338        // `VirtioPciCfgCap::length` fields. These are the third and fourth word respectively
1339        // within the capability. The fifth word of the capability should contain the data
1340        let offset_register = (pci_config_cap_offset + 8) >> 2;
1341        let length_register = (pci_config_cap_offset + 12) >> 2;
1342        let data_register = (pci_config_cap_offset + 16) >> 2;
1343
1344        device.write_config_register(offset_register, 0, bar_offset.as_slice());
1345        device.write_config_register(length_register, 0, length.as_slice());
1346        device.read_config_register(data_register)
1347    }
1348
1349    fn cap_pci_cfg_write(device: &mut VirtioPciDevice, bar_offset: u32, length: u32, data: &[u8]) {
1350        let pci_config_cap_offset = capabilities_start(device) as usize
1351            + 3 * (size_of::<VirtioPciCap>() + 2)
1352            + (size_of::<VirtioPciNotifyCap>() + 2);
1353
1354        // To program the access through the PCI config capability mechanism, we need to write the
1355        // bar offset and read length in the `VirtioPciCfgCap::cap.offset` and
1356        // `VirtioPciCfgCap::length` fields. These are the third and fourth word respectively
1357        // within the capability. The fifth word of the capability should contain the data
1358        let offset_register = (pci_config_cap_offset + 8) >> 2;
1359        let length_register = (pci_config_cap_offset + 12) >> 2;
1360        let data_register = (pci_config_cap_offset + 16) >> 2;
1361
1362        device.write_config_register(offset_register, 0, bar_offset.as_slice());
1363        device.write_config_register(length_register, 0, length.as_slice());
1364        device.write_config_register(data_register, 0, data);
1365    }
1366
1367    #[test]
1368    fn test_pci_configuration_cap() {
1369        let mut vmm = create_vmm_with_virtio_pci_device();
1370        let device = get_virtio_device(&vmm);
1371        let mut locked_virtio_pci_device = device.lock().unwrap();
1372
1373        // Let's read the number of queues of the entropy device
1374        // That information is located at offset 0x12 past the BAR region belonging to the common
1375        // config capability.
1376        let bar_offset = u32::try_from(COMMON_CONFIG_BAR_OFFSET).unwrap() + 0x12;
1377        let len = 2u32;
1378        let num_queues = cap_pci_cfg_read(&mut locked_virtio_pci_device, bar_offset, len);
1379        assert_eq!(num_queues, 1);
1380
1381        // Let's update the driver features and see if that takes effect
1382        let bar_offset = u32::try_from(COMMON_CONFIG_BAR_OFFSET).unwrap() + 0x14;
1383        let len = 1u32;
1384        let device_status = cap_pci_cfg_read(&mut locked_virtio_pci_device, bar_offset, len);
1385        assert_eq!(device_status, 0);
1386        cap_pci_cfg_write(
1387            &mut locked_virtio_pci_device,
1388            bar_offset,
1389            len,
1390            0x42u32.as_slice(),
1391        );
1392        let device_status = cap_pci_cfg_read(&mut locked_virtio_pci_device, bar_offset, len);
1393        assert_eq!(device_status, 0x42);
1394
1395        // reads with out-of-bounds lengths should return 0s
1396        assert_eq!(
1397            cap_pci_cfg_read(&mut locked_virtio_pci_device, bar_offset, 8),
1398            0
1399        );
1400        // writes out-of-bounds lengths should have no effect
1401        cap_pci_cfg_write(
1402            &mut locked_virtio_pci_device,
1403            bar_offset,
1404            8,
1405            0x84u32.as_slice(),
1406        );
1407        assert_eq!(
1408            cap_pci_cfg_read(&mut locked_virtio_pci_device, bar_offset, 1),
1409            0x42
1410        );
1411        // Make sure that we handle properly from/to a BAR where the access length doesn't match
1412        // what we've set in the capability's length
1413        cap_pci_cfg_write(
1414            &mut locked_virtio_pci_device,
1415            bar_offset,
1416            2,
1417            0x42u8.as_slice(),
1418        );
1419    }
1420
1421    fn isr_status_read(device: &mut VirtioPciDevice) -> u32 {
1422        let mut data = 0u32;
1423        device.read_bar(0, ISR_CONFIG_BAR_OFFSET, data.as_mut_slice());
1424        data
1425    }
1426
1427    fn isr_status_write(device: &mut VirtioPciDevice, data: u32) {
1428        device.write_bar(0, ISR_CONFIG_BAR_OFFSET, data.as_slice());
1429    }
1430
1431    #[test]
1432    fn test_isr_capability() {
1433        let mut vmm = create_vmm_with_virtio_pci_device();
1434        let device = get_virtio_device(&vmm);
1435        let mut locked_virtio_pci_device = device.lock().unwrap();
1436
1437        // We don't support legacy interrupts so reads to ISR BAR should always return 0s and
1438        // writes to it should not have any effect
1439        assert_eq!(isr_status_read(&mut locked_virtio_pci_device), 0);
1440        isr_status_write(&mut locked_virtio_pci_device, 0x1312);
1441        assert_eq!(isr_status_read(&mut locked_virtio_pci_device), 0);
1442    }
1443
1444    #[test]
1445    fn test_notification_capability() {
1446        let mut vmm = create_vmm_with_virtio_pci_device();
1447        let device = get_virtio_device(&vmm);
1448        let mut locked_virtio_pci_device = device.lock().unwrap();
1449
1450        let notification_cap_offset = (capabilities_start(&mut locked_virtio_pci_device) as usize
1451            + 3 * (size_of::<VirtioPciCap>() + 2))
1452            .try_into()
1453            .unwrap();
1454
1455        let (_, _, notify_cap) =
1456            read_virtio_notification_cap(&mut locked_virtio_pci_device, notification_cap_offset);
1457
1458        // We do not offer `VIRTIO_F_NOTIFICATION_DATA` so:
1459        // * `cap.offset` MUST by 2-byte aligned
1460        assert_eq!(u32::from(notify_cap.cap.offset) & 0x3, 0);
1461        // * The device MUST either present notify_off_multiplier as an even power of 2, or present
1462        //   notify_off_multiplier as 0.
1463        let multiplier = u32::from(notify_cap.notify_off_multiplier);
1464        assert!(multiplier.is_power_of_two() && multiplier.trailing_zeros() % 2 == 0);
1465        // * For all queues, the value cap.length presented by the device MUST satisfy:
1466        //
1467        //   `cap.length >= queue_notify_off * notify_off_multiplier + 2`
1468        //
1469        // The spec allows for up to 65536 queues, but in reality the device we are using with most
1470        // queues is vsock (3). Let's check here for 16, projecting for future devices and
1471        // use-cases such as multiple queue pairs in network devices
1472        assert!(u32::from(notify_cap.cap.length) >= 15 * multiplier + 2);
1473
1474        // Reads and writes to the notification region of the BAR are handled by IoEvent file
1475        // descriptors. Any such accesses should have no effects.
1476        let data = [0x42u8; u64_to_usize(NOTIFICATION_SIZE)];
1477        locked_virtio_pci_device.write_bar(0, NOTIFICATION_BAR_OFFSET, &data);
1478        let mut buffer = [0x0; u64_to_usize(NOTIFICATION_SIZE)];
1479        locked_virtio_pci_device.read_bar(0, NOTIFICATION_BAR_OFFSET, &mut buffer);
1480        assert_eq!(buffer, [0u8; u64_to_usize(NOTIFICATION_SIZE)]);
1481    }
1482
1483    fn write_driver_status(device: &mut VirtioPciDevice, status: u8) {
1484        device.write_bar(0, COMMON_CONFIG_BAR_OFFSET + 0x14, status.as_slice());
1485    }
1486
1487    fn read_driver_status(device: &mut VirtioPciDevice) -> u8 {
1488        let mut status = 0u8;
1489        device.read_bar(0, COMMON_CONFIG_BAR_OFFSET + 0x14, status.as_mut_slice());
1490        status
1491    }
1492
1493    fn read_device_features(device: &mut VirtioPciDevice) -> u64 {
1494        let mut features_lo = 0u32;
1495        device.write_bar(0, COMMON_CONFIG_BAR_OFFSET, 0u32.as_slice());
1496        device.read_bar(
1497            0,
1498            COMMON_CONFIG_BAR_OFFSET + 0x4,
1499            features_lo.as_mut_slice(),
1500        );
1501        let mut features_hi = 0u32;
1502        device.write_bar(0, COMMON_CONFIG_BAR_OFFSET, 1u32.as_slice());
1503        device.read_bar(
1504            0,
1505            COMMON_CONFIG_BAR_OFFSET + 0x4,
1506            features_hi.as_mut_slice(),
1507        );
1508
1509        features_lo as u64 | ((features_hi as u64) << 32)
1510    }
1511
1512    fn write_driver_features(device: &mut VirtioPciDevice, features: u64) {
1513        device.write_bar(0, COMMON_CONFIG_BAR_OFFSET + 0x8, 0u32.as_slice());
1514        device.write_bar(
1515            0,
1516            COMMON_CONFIG_BAR_OFFSET + 0xc,
1517            ((features & 0xffff_ffff) as u32).as_slice(),
1518        );
1519        device.write_bar(0, COMMON_CONFIG_BAR_OFFSET + 0x8, 1u32.as_slice());
1520        device.write_bar(
1521            0,
1522            COMMON_CONFIG_BAR_OFFSET + 0xc,
1523            (((features >> 32) & 0xffff_ffff) as u32).as_slice(),
1524        );
1525    }
1526
1527    fn setup_queues(device: &mut VirtioPciDevice) {
1528        device.write_bar(
1529            0,
1530            COMMON_CONFIG_BAR_OFFSET + 0x20,
1531            0x8000_0000u64.as_slice(),
1532        );
1533        device.write_bar(
1534            0,
1535            COMMON_CONFIG_BAR_OFFSET + 0x28,
1536            0x8000_1000u64.as_slice(),
1537        );
1538        device.write_bar(
1539            0,
1540            COMMON_CONFIG_BAR_OFFSET + 0x30,
1541            0x8000_2000u64.as_slice(),
1542        );
1543        device.write_bar(0, COMMON_CONFIG_BAR_OFFSET + 0x1c, 1u16.as_slice());
1544    }
1545
1546    #[test]
1547    fn test_device_initialization() {
1548        let mut vmm = create_vmm_with_virtio_pci_device();
1549        let device = get_virtio_device(&vmm);
1550        let mut locked_virtio_pci_device = device.lock().unwrap();
1551
1552        assert!(locked_virtio_pci_device.is_driver_init());
1553        assert!(!locked_virtio_pci_device.is_driver_ready());
1554        assert!(
1555            !locked_virtio_pci_device
1556                .device_activated
1557                .load(std::sync::atomic::Ordering::SeqCst)
1558        );
1559
1560        write_driver_status(
1561            &mut locked_virtio_pci_device,
1562            ACKNOWLEDGE.try_into().unwrap(),
1563        );
1564        write_driver_status(
1565            &mut locked_virtio_pci_device,
1566            (ACKNOWLEDGE | DRIVER).try_into().unwrap(),
1567        );
1568        assert!(!locked_virtio_pci_device.is_driver_init());
1569        assert!(!locked_virtio_pci_device.is_driver_ready());
1570        assert!(
1571            !locked_virtio_pci_device
1572                .device_activated
1573                .load(std::sync::atomic::Ordering::SeqCst)
1574        );
1575
1576        let status = read_driver_status(&mut locked_virtio_pci_device);
1577        assert_eq!(status as u32, ACKNOWLEDGE | DRIVER);
1578
1579        // Entropy device just offers VIRTIO_F_VERSION_1
1580        let offered_features = read_device_features(&mut locked_virtio_pci_device);
1581        assert_eq!(offered_features, 1 << VIRTIO_F_VERSION_1);
1582        // ACK features
1583        write_driver_features(&mut locked_virtio_pci_device, offered_features);
1584        write_driver_status(
1585            &mut locked_virtio_pci_device,
1586            (ACKNOWLEDGE | DRIVER | FEATURES_OK).try_into().unwrap(),
1587        );
1588        let status = read_driver_status(&mut locked_virtio_pci_device);
1589        assert!((status & u8::try_from(FEATURES_OK).unwrap()) != 0);
1590
1591        assert!(!locked_virtio_pci_device.is_driver_init());
1592        assert!(!locked_virtio_pci_device.is_driver_ready());
1593        assert!(
1594            !locked_virtio_pci_device
1595                .device_activated
1596                .load(std::sync::atomic::Ordering::SeqCst)
1597        );
1598
1599        setup_queues(&mut locked_virtio_pci_device);
1600
1601        write_driver_status(
1602            &mut locked_virtio_pci_device,
1603            (ACKNOWLEDGE | DRIVER | FEATURES_OK | DRIVER_OK)
1604                .try_into()
1605                .unwrap(),
1606        );
1607
1608        assert!(!locked_virtio_pci_device.is_driver_init());
1609        assert!(locked_virtio_pci_device.is_driver_ready());
1610        assert!(
1611            locked_virtio_pci_device
1612                .device_activated
1613                .load(std::sync::atomic::Ordering::SeqCst)
1614        );
1615    }
1616}