vmm/devices/virtio/vsock/
device.rs

1// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3//
4// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
5// Use of this source code is governed by a BSD-style license that can be
6// found in the THIRD-PARTY file.
7
8//! This is the `VirtioDevice` implementation for our vsock device. It handles the virtio-level
9//! device logic: feature negotiation, device configuration, and device activation.
10//!
11//! We aim to conform to the VirtIO v1.1 spec:
12//! https://docs.oasis-open.org/virtio/virtio/v1.1/virtio-v1.1.html
13//!
14//! The vsock device has two input parameters: a CID to identify the device, and a
15//! `VsockBackend` to use for offloading vsock traffic.
16//!
17//! Upon its activation, the vsock device registers handlers for the following events/FDs:
18//! - an RX queue FD;
19//! - a TX queue FD;
20//! - an event queue FD; and
21//! - a backend FD.
22
23use std::fmt::Debug;
24use std::ops::Deref;
25use std::sync::Arc;
26
27use log::{error, info, warn};
28use vmm_sys_util::eventfd::EventFd;
29
30use super::super::super::DeviceError;
31use super::defs::uapi;
32use super::packet::{VSOCK_PKT_HDR_SIZE, VsockPacketRx, VsockPacketTx};
33use super::{VsockBackend, defs};
34use crate::devices::virtio::ActivateError;
35use crate::devices::virtio::device::{ActiveState, DeviceState, VirtioDevice};
36use crate::devices::virtio::generated::virtio_config::{VIRTIO_F_IN_ORDER, VIRTIO_F_VERSION_1};
37use crate::devices::virtio::generated::virtio_ids::VIRTIO_ID_VSOCK;
38use crate::devices::virtio::queue::{InvalidAvailIdx, Queue as VirtQueue};
39use crate::devices::virtio::transport::{VirtioInterrupt, VirtioInterruptType};
40use crate::devices::virtio::vsock::VsockError;
41use crate::devices::virtio::vsock::metrics::METRICS;
42use crate::impl_device_type;
43use crate::logger::IncMetric;
44use crate::utils::byte_order;
45use crate::vstate::memory::{Bytes, GuestMemoryMmap};
46
47pub(crate) const RXQ_INDEX: usize = 0;
48pub(crate) const TXQ_INDEX: usize = 1;
49pub(crate) const EVQ_INDEX: usize = 2;
50
51pub(crate) const VIRTIO_VSOCK_EVENT_TRANSPORT_RESET: u32 = 0;
52
53/// The virtio features supported by our vsock device:
54/// - VIRTIO_F_VERSION_1: the device conforms to at least version 1.0 of the VirtIO spec.
55/// - VIRTIO_F_IN_ORDER: the device returns used buffers in the same order that the driver makes
56///   them available.
57pub(crate) const AVAIL_FEATURES: u64 =
58    (1 << VIRTIO_F_VERSION_1 as u64) | (1 << VIRTIO_F_IN_ORDER as u64);
59
60/// Structure representing the vsock device.
61#[derive(Debug)]
62pub struct Vsock<B> {
63    cid: u64,
64    pub(crate) queues: Vec<VirtQueue>,
65    pub(crate) queue_events: Vec<EventFd>,
66    pub(crate) backend: B,
67    pub(crate) avail_features: u64,
68    pub(crate) acked_features: u64,
69    // This EventFd is the only one initially registered for a vsock device, and is used to convert
70    // a VirtioDevice::activate call into an EventHandler read event which allows the other events
71    // (queue and backend related) to be registered post virtio device activation. That's
72    // mostly something we wanted to happen for the backend events, to prevent (potentially)
73    // continuous triggers from happening before the device gets activated.
74    pub(crate) activate_evt: EventFd,
75    pub(crate) device_state: DeviceState,
76
77    pub rx_packet: VsockPacketRx,
78    pub tx_packet: VsockPacketTx,
79}
80
81// TODO: Detect / handle queue deadlock:
82// 1. If the driver halts RX queue processing, we'll need to notify `self.backend`, so that it can
83//    unregister any EPOLLIN listeners, since otherwise it will keep spinning, unable to consume its
84//    EPOLLIN events.
85
86impl<B> Vsock<B>
87where
88    B: VsockBackend + Debug,
89{
90    /// Auxiliary function for creating a new virtio-vsock device with the given VM CID, vsock
91    /// backend and empty virtio queues.
92    pub fn with_queues(
93        cid: u64,
94        backend: B,
95        queues: Vec<VirtQueue>,
96    ) -> Result<Vsock<B>, VsockError> {
97        let mut queue_events = Vec::new();
98        for _ in 0..queues.len() {
99            queue_events.push(EventFd::new(libc::EFD_NONBLOCK).map_err(VsockError::EventFd)?);
100        }
101
102        Ok(Vsock {
103            cid,
104            queues,
105            queue_events,
106            backend,
107            avail_features: AVAIL_FEATURES,
108            acked_features: 0,
109            activate_evt: EventFd::new(libc::EFD_NONBLOCK).map_err(VsockError::EventFd)?,
110            device_state: DeviceState::Inactive,
111            rx_packet: VsockPacketRx::new()?,
112            tx_packet: VsockPacketTx::default(),
113        })
114    }
115
116    /// Create a new virtio-vsock device with the given VM CID and vsock backend.
117    pub fn new(cid: u64, backend: B) -> Result<Vsock<B>, VsockError> {
118        let queues: Vec<VirtQueue> = defs::VSOCK_QUEUE_SIZES
119            .iter()
120            .map(|&max_size| VirtQueue::new(max_size))
121            .collect();
122        Self::with_queues(cid, backend, queues)
123    }
124
125    /// Provides the ID of this vsock device as used in MMIO device identification.
126    pub fn id(&self) -> &str {
127        defs::VSOCK_DEV_ID
128    }
129
130    /// Retrieve the cid associated with this vsock device.
131    pub fn cid(&self) -> u64 {
132        self.cid
133    }
134
135    /// Access the backend behind the device.
136    pub fn backend(&self) -> &B {
137        &self.backend
138    }
139
140    /// Signal the guest driver that we've used some virtio buffers that it had previously made
141    /// available.
142    pub fn signal_used_queue(&self, qidx: usize) -> Result<(), DeviceError> {
143        self.device_state
144            .active_state()
145            .expect("Device is not initialized")
146            .interrupt
147            .trigger(VirtioInterruptType::Queue(qidx.try_into().unwrap_or_else(
148                |_| panic!("vsock: invalid queue index: {qidx}"),
149            )))
150            .map_err(DeviceError::FailedSignalingIrq)
151    }
152
153    /// Signal the guest which queues are ready to be consumed
154    pub fn signal_used_queues(&self, used_queues: &[u16]) -> Result<(), DeviceError> {
155        self.device_state
156            .active_state()
157            .expect("Device is not initialized")
158            .interrupt
159            .trigger_queues(used_queues)
160            .map_err(DeviceError::FailedSignalingIrq)
161    }
162
163    /// Walk the driver-provided RX queue buffers and attempt to fill them up with any data that we
164    /// have pending. Return `true` if descriptors have been added to the used ring, and `false`
165    /// otherwise.
166    pub fn process_rx(&mut self) -> Result<bool, InvalidAvailIdx> {
167        // This is safe since we checked in the event handler that the device is activated.
168        let mem = &self.device_state.active_state().unwrap().mem;
169
170        let queue = &mut self.queues[RXQ_INDEX];
171        let mut have_used = false;
172
173        while let Some(head) = queue.pop()? {
174            let index = head.index;
175            let used_len = match self.rx_packet.parse(mem, head) {
176                Ok(()) => {
177                    if self.backend.recv_pkt(&mut self.rx_packet).is_ok() {
178                        match self.rx_packet.commit_hdr() {
179                            // This addition cannot overflow, because packet length
180                            // is previously validated against `MAX_PKT_BUF_SIZE`
181                            // bound as part of `commit_hdr()`.
182                            Ok(()) => VSOCK_PKT_HDR_SIZE + self.rx_packet.hdr.len(),
183                            Err(err) => {
184                                warn!(
185                                    "vsock: Error writing packet header to guest memory: \
186                                     {:?}.Discarding the package.",
187                                    err
188                                );
189                                0
190                            }
191                        }
192                    } else {
193                        // We are using a consuming iterator over the virtio buffers, so, if we
194                        // can't fill in this buffer, we'll need to undo the
195                        // last iterator step.
196                        queue.undo_pop();
197                        break;
198                    }
199                }
200                Err(err) => {
201                    warn!("vsock: RX queue error: {:?}. Discarding the package.", err);
202                    0
203                }
204            };
205
206            have_used = true;
207            queue.add_used(index, used_len).unwrap_or_else(|err| {
208                error!("Failed to add available descriptor {}: {}", index, err)
209            });
210        }
211        queue.advance_used_ring_idx();
212
213        Ok(have_used)
214    }
215
216    /// Walk the driver-provided TX queue buffers, package them up as vsock packets, and send them
217    /// to the backend for processing. Return `true` if descriptors have been added to the used
218    /// ring, and `false` otherwise.
219    pub fn process_tx(&mut self) -> Result<bool, InvalidAvailIdx> {
220        // This is safe since we checked in the event handler that the device is activated.
221        let mem = &self.device_state.active_state().unwrap().mem;
222
223        let queue = &mut self.queues[TXQ_INDEX];
224        let mut have_used = false;
225
226        while let Some(head) = queue.pop()? {
227            let index = head.index;
228            // let pkt = match VsockPacket::from_tx_virtq_head(mem, head) {
229            match self.tx_packet.parse(mem, head) {
230                Ok(()) => (),
231                Err(err) => {
232                    error!("vsock: error reading TX packet: {:?}", err);
233                    have_used = true;
234                    queue.add_used(index, 0).unwrap_or_else(|err| {
235                        error!("Failed to add available descriptor {}: {}", index, err);
236                    });
237                    continue;
238                }
239            };
240
241            if self.backend.send_pkt(&self.tx_packet).is_err() {
242                queue.undo_pop();
243                break;
244            }
245
246            have_used = true;
247            queue.add_used(index, 0).unwrap_or_else(|err| {
248                error!("Failed to add available descriptor {}: {}", index, err);
249            });
250        }
251        queue.advance_used_ring_idx();
252
253        Ok(have_used)
254    }
255
256    // Send TRANSPORT_RESET_EVENT to driver. According to specs, the driver shuts down established
257    // connections and the guest_cid configuration field is fetched again. Existing listen sockets
258    // remain but their CID is updated to reflect the current guest_cid.
259    pub fn send_transport_reset_event(&mut self) -> Result<(), DeviceError> {
260        // This is safe since we checked in the caller function that the device is activated.
261        let mem = &self.device_state.active_state().unwrap().mem;
262
263        let queue = &mut self.queues[EVQ_INDEX];
264        let head = queue.pop()?.ok_or_else(|| {
265            METRICS.ev_queue_event_fails.inc();
266            DeviceError::VsockError(VsockError::EmptyQueue)
267        })?;
268
269        mem.write_obj::<u32>(VIRTIO_VSOCK_EVENT_TRANSPORT_RESET, head.addr)
270            .unwrap_or_else(|err| error!("Failed to write virtio vsock reset event: {:?}", err));
271
272        queue.add_used(head.index, head.len).unwrap_or_else(|err| {
273            error!("Failed to add used descriptor {}: {}", head.index, err);
274        });
275        queue.advance_used_ring_idx();
276
277        self.signal_used_queue(EVQ_INDEX)?;
278
279        Ok(())
280    }
281}
282
283impl<B> VirtioDevice for Vsock<B>
284where
285    B: VsockBackend + Debug + 'static,
286{
287    impl_device_type!(VIRTIO_ID_VSOCK);
288
289    fn avail_features(&self) -> u64 {
290        self.avail_features
291    }
292
293    fn acked_features(&self) -> u64 {
294        self.acked_features
295    }
296
297    fn set_acked_features(&mut self, acked_features: u64) {
298        self.acked_features = acked_features
299    }
300
301    fn queues(&self) -> &[VirtQueue] {
302        &self.queues
303    }
304
305    fn queues_mut(&mut self) -> &mut [VirtQueue] {
306        &mut self.queues
307    }
308
309    fn queue_events(&self) -> &[EventFd] {
310        &self.queue_events
311    }
312
313    fn interrupt_trigger(&self) -> &dyn VirtioInterrupt {
314        self.device_state
315            .active_state()
316            .expect("Device is not initialized")
317            .interrupt
318            .deref()
319    }
320
321    fn read_config(&self, offset: u64, data: &mut [u8]) {
322        match offset {
323            0 if data.len() == 8 => byte_order::write_le_u64(data, self.cid()),
324            0 if data.len() == 4 => {
325                byte_order::write_le_u32(data, (self.cid() & 0xffff_ffff) as u32)
326            }
327            4 if data.len() == 4 => {
328                byte_order::write_le_u32(data, ((self.cid() >> 32) & 0xffff_ffff) as u32)
329            }
330            _ => {
331                METRICS.cfg_fails.inc();
332                warn!(
333                    "vsock: virtio-vsock received invalid read request of {} bytes at offset {}",
334                    data.len(),
335                    offset
336                )
337            }
338        }
339    }
340
341    fn write_config(&mut self, offset: u64, data: &[u8]) {
342        METRICS.cfg_fails.inc();
343        warn!(
344            "vsock: guest driver attempted to write device config (offset={:#x}, len={:#x})",
345            offset,
346            data.len()
347        );
348    }
349
350    fn activate(
351        &mut self,
352        mem: GuestMemoryMmap,
353        interrupt: Arc<dyn VirtioInterrupt>,
354    ) -> Result<(), ActivateError> {
355        for q in self.queues.iter_mut() {
356            q.initialize(&mem)
357                .map_err(ActivateError::QueueMemoryError)?;
358        }
359
360        if self.queues.len() != defs::VSOCK_NUM_QUEUES {
361            METRICS.activate_fails.inc();
362            return Err(ActivateError::QueueMismatch {
363                expected: defs::VSOCK_NUM_QUEUES,
364                got: self.queues.len(),
365            });
366        }
367
368        if self.activate_evt.write(1).is_err() {
369            METRICS.activate_fails.inc();
370            return Err(ActivateError::EventFd);
371        }
372
373        self.device_state = DeviceState::Activated(ActiveState { mem, interrupt });
374
375        Ok(())
376    }
377
378    fn is_activated(&self) -> bool {
379        self.device_state.is_activated()
380    }
381
382    fn kick(&mut self) {
383        // Vsock has complicated protocol that isn't resilient to any packet loss,
384        // so for Vsock we don't support connection persistence through snapshot.
385        // Any in-flight packets or events are simply lost.
386        // Vsock is restored 'empty'.
387        // The only reason we still `kick` it is to make guest process
388        // `TRANSPORT_RESET_EVENT` event we sent during snapshot creation.
389        if self.is_activated() {
390            info!("kick vsock {}.", self.id());
391            self.signal_used_queue(EVQ_INDEX).unwrap();
392        }
393    }
394}
395
396#[cfg(test)]
397mod tests {
398    use super::*;
399    use crate::devices::virtio::vsock::defs::uapi;
400    use crate::devices::virtio::vsock::test_utils::TestContext;
401
402    #[test]
403    fn test_virtio_device() {
404        let mut ctx = TestContext::new();
405        let device_features = AVAIL_FEATURES;
406        let driver_features: u64 = AVAIL_FEATURES | 1 | (1 << 32);
407        let device_pages = [
408            (device_features & 0xffff_ffff) as u32,
409            (device_features >> 32) as u32,
410        ];
411        let driver_pages = [
412            (driver_features & 0xffff_ffff) as u32,
413            (driver_features >> 32) as u32,
414        ];
415        assert_eq!(ctx.device.device_type(), VIRTIO_ID_VSOCK);
416        assert_eq!(ctx.device.avail_features_by_page(0), device_pages[0]);
417        assert_eq!(ctx.device.avail_features_by_page(1), device_pages[1]);
418        assert_eq!(ctx.device.avail_features_by_page(2), 0);
419
420        // Ack device features, page 0.
421        ctx.device.ack_features_by_page(0, driver_pages[0]);
422        // Ack device features, page 1.
423        ctx.device.ack_features_by_page(1, driver_pages[1]);
424        // Ack some bogus page (i.e. 2). This should have no side effect.
425        ctx.device.ack_features_by_page(2, 0);
426        // Attempt to un-ack the first feature page. This should have no side effect.
427        ctx.device.ack_features_by_page(0, !driver_pages[0]);
428        // Check that no side effect are present, and that the acked features are exactly the same
429        // as the device features.
430        assert_eq!(ctx.device.acked_features, device_features & driver_features);
431
432        // Test reading 32-bit chunks.
433        let mut data = [0u8; 8];
434        ctx.device.read_config(0, &mut data[..4]);
435        assert_eq!(
436            u64::from(byte_order::read_le_u32(&data[..])),
437            ctx.cid & 0xffff_ffff
438        );
439        ctx.device.read_config(4, &mut data[4..]);
440        assert_eq!(
441            u64::from(byte_order::read_le_u32(&data[4..])),
442            (ctx.cid >> 32) & 0xffff_ffff
443        );
444
445        // Test reading 64-bit.
446        let mut data = [0u8; 8];
447        ctx.device.read_config(0, &mut data);
448        assert_eq!(byte_order::read_le_u64(&data), ctx.cid);
449
450        // Check that out-of-bounds reading doesn't mutate the destination buffer.
451        let mut data = [0u8, 1, 2, 3, 4, 5, 6, 7];
452        ctx.device.read_config(2, &mut data);
453        assert_eq!(data, [0u8, 1, 2, 3, 4, 5, 6, 7]);
454
455        // Just covering lines here, since the vsock device has no writable config.
456        // A warning is, however, logged, if the guest driver attempts to write any config data.
457        ctx.device.write_config(0, &data[..4]);
458
459        // Test a bad activation.
460        // let bad_activate = ctx.device.activate(
461        //     ctx.mem.clone(),
462        // );
463        // match bad_activate {
464        //     Err(ActivateError::BadActivate) => (),
465        //     other => panic!("{:?}", other),
466        // }
467
468        // Test a correct activation.
469        ctx.device
470            .activate(ctx.mem.clone(), ctx.interrupt.clone())
471            .unwrap();
472    }
473}