vmm/devices/virtio/net/
device.rs

1// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3//
4// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
5// Use of this source code is governed by a BSD-style license that can be
6// found in the THIRD-PARTY file.
7
8use std::collections::VecDeque;
9use std::mem::{self};
10use std::net::Ipv4Addr;
11use std::num::Wrapping;
12use std::ops::Deref;
13use std::sync::{Arc, Mutex};
14
15use libc::{EAGAIN, iovec};
16use log::{error, info};
17use vmm_sys_util::eventfd::EventFd;
18
19use super::NET_QUEUE_MAX_SIZE;
20use crate::devices::virtio::ActivateError;
21use crate::devices::virtio::device::{ActiveState, DeviceState, VirtioDevice};
22use crate::devices::virtio::generated::virtio_config::VIRTIO_F_VERSION_1;
23use crate::devices::virtio::generated::virtio_ids::VIRTIO_ID_NET;
24use crate::devices::virtio::generated::virtio_net::{
25    VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
26    VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO,
27    VIRTIO_NET_F_MAC, VIRTIO_NET_F_MRG_RXBUF, virtio_net_hdr_v1,
28};
29use crate::devices::virtio::generated::virtio_ring::VIRTIO_RING_F_EVENT_IDX;
30use crate::devices::virtio::iovec::{
31    IoVecBuffer, IoVecBufferMut, IoVecError, ParsedDescriptorChain,
32};
33use crate::devices::virtio::net::metrics::{NetDeviceMetrics, NetMetricsPerDevice};
34use crate::devices::virtio::net::tap::Tap;
35use crate::devices::virtio::net::{
36    MAX_BUFFER_SIZE, NET_QUEUE_SIZES, NetError, NetQueue, RX_INDEX, TX_INDEX, generated,
37};
38use crate::devices::virtio::queue::{DescriptorChain, InvalidAvailIdx, Queue};
39use crate::devices::virtio::transport::{VirtioInterrupt, VirtioInterruptType};
40use crate::devices::{DeviceError, report_net_event_fail};
41use crate::dumbo::pdu::arp::ETH_IPV4_FRAME_LEN;
42use crate::dumbo::pdu::ethernet::{EthernetFrame, PAYLOAD_OFFSET};
43use crate::impl_device_type;
44use crate::logger::{IncMetric, METRICS};
45use crate::mmds::data_store::Mmds;
46use crate::mmds::ns::MmdsNetworkStack;
47use crate::rate_limiter::{BucketUpdate, RateLimiter, TokenType};
48use crate::utils::net::mac::MacAddr;
49use crate::utils::u64_to_usize;
50use crate::vstate::memory::{ByteValued, GuestMemoryMmap};
51
52const FRAME_HEADER_MAX_LEN: usize = PAYLOAD_OFFSET + ETH_IPV4_FRAME_LEN;
53
54pub(crate) const fn vnet_hdr_len() -> usize {
55    mem::size_of::<virtio_net_hdr_v1>()
56}
57
58// This returns the maximum frame header length. This includes the VNET header plus
59// the maximum L2 frame header bytes which includes the ethernet frame header plus
60// the header IPv4 ARP header which is 28 bytes long.
61const fn frame_hdr_len() -> usize {
62    vnet_hdr_len() + FRAME_HEADER_MAX_LEN
63}
64
65// Frames being sent/received through the network device model have a VNET header. This
66// function returns a slice which holds the L2 frame bytes without this header.
67fn frame_bytes_from_buf(buf: &[u8]) -> Result<&[u8], NetError> {
68    if buf.len() < vnet_hdr_len() {
69        Err(NetError::VnetHeaderMissing)
70    } else {
71        Ok(&buf[vnet_hdr_len()..])
72    }
73}
74
75fn frame_bytes_from_buf_mut(buf: &mut [u8]) -> Result<&mut [u8], NetError> {
76    if buf.len() < vnet_hdr_len() {
77        Err(NetError::VnetHeaderMissing)
78    } else {
79        Ok(&mut buf[vnet_hdr_len()..])
80    }
81}
82
83// This initializes to all 0 the VNET hdr part of a buf.
84fn init_vnet_hdr(buf: &mut [u8]) {
85    // The buffer should be larger than vnet_hdr_len.
86    buf[0..vnet_hdr_len()].fill(0);
87}
88
89#[derive(Debug, Default, Clone, Copy)]
90#[repr(C)]
91pub struct ConfigSpace {
92    pub guest_mac: MacAddr,
93}
94
95// SAFETY: `ConfigSpace` contains only PODs in `repr(C)` or `repr(transparent)`, without padding.
96unsafe impl ByteValued for ConfigSpace {}
97
98#[derive(Debug, thiserror::Error, displaydoc::Display)]
99enum AddRxBufferError {
100    /// Error while parsing new buffer: {0}
101    Parsing(#[from] IoVecError),
102    /// RX buffer is too small
103    BufferTooSmall,
104}
105
106/// A map of all the memory the guest has provided us with for performing RX
107#[derive(Debug)]
108pub struct RxBuffers {
109    // minimum size of a usable buffer for doing RX
110    pub min_buffer_size: u32,
111    // An [`IoVecBufferMut`] covering all the memory we have available for receiving network
112    // frames.
113    pub iovec: IoVecBufferMut<NET_QUEUE_MAX_SIZE>,
114    // A map of which part of the memory belongs to which `DescriptorChain` object
115    pub parsed_descriptors: VecDeque<ParsedDescriptorChain>,
116    // Buffers that we have used and they are ready to be given back to the guest.
117    pub used_descriptors: u16,
118    pub used_bytes: u32,
119}
120
121impl RxBuffers {
122    /// Create a new [`RxBuffers`] object for storing guest memory for performing RX
123    fn new() -> Result<Self, IoVecError> {
124        Ok(Self {
125            min_buffer_size: 0,
126            iovec: IoVecBufferMut::new()?,
127            parsed_descriptors: VecDeque::with_capacity(NET_QUEUE_MAX_SIZE.into()),
128            used_descriptors: 0,
129            used_bytes: 0,
130        })
131    }
132
133    /// Add a new `DescriptorChain` that we received from the RX queue in the buffer.
134    ///
135    /// SAFETY: The `DescriptorChain` cannot be referencing the same memory location as any other
136    /// `DescriptorChain`. (See also related comment in
137    /// [`IoVecBufferMut::append_descriptor_chain`]).
138    unsafe fn add_buffer(
139        &mut self,
140        mem: &GuestMemoryMmap,
141        head: DescriptorChain,
142    ) -> Result<(), AddRxBufferError> {
143        // SAFETY: descriptor chain cannot be referencing the same memory location as another chain
144        let parsed_dc = unsafe { self.iovec.append_descriptor_chain(mem, head)? };
145        if parsed_dc.length < self.min_buffer_size {
146            self.iovec.drop_chain_back(&parsed_dc);
147            return Err(AddRxBufferError::BufferTooSmall);
148        }
149        self.parsed_descriptors.push_back(parsed_dc);
150        Ok(())
151    }
152
153    /// Returns the total size of available space in the buffer.
154    #[inline(always)]
155    fn capacity(&self) -> u32 {
156        self.iovec.len()
157    }
158
159    /// Mark the first `size` bytes of available memory as used.
160    ///
161    /// # Safety:
162    ///
163    /// * The `RxBuffers` should include at least one parsed `DescriptorChain`.
164    /// * `size` needs to be smaller or equal to total length of the first `DescriptorChain` stored
165    ///   in the `RxBuffers`.
166    unsafe fn mark_used(&mut self, mut bytes_written: u32, rx_queue: &mut Queue) {
167        self.used_bytes = bytes_written;
168
169        let mut used_heads: u16 = 0;
170        for parsed_dc in self.parsed_descriptors.iter() {
171            let used_bytes = bytes_written.min(parsed_dc.length);
172            // Safe because we know head_index isn't out of bounds
173            rx_queue
174                .write_used_element(self.used_descriptors, parsed_dc.head_index, used_bytes)
175                .unwrap();
176            bytes_written -= used_bytes;
177            self.used_descriptors += 1;
178            used_heads += 1;
179
180            if bytes_written == 0 {
181                break;
182            }
183        }
184
185        // We need to set num_buffers before dropping chains from `self.iovec`. Otherwise
186        // when we set headers, we will iterate over new, yet unused chains instead of the ones
187        // we need.
188        self.header_set_num_buffers(used_heads);
189        for _ in 0..used_heads {
190            let parsed_dc = self
191                .parsed_descriptors
192                .pop_front()
193                .expect("This should never happen if write to the buffer succeeded.");
194            self.iovec.drop_chain_front(&parsed_dc);
195        }
196    }
197
198    /// Write the number of descriptors used in VirtIO header
199    fn header_set_num_buffers(&mut self, nr_descs: u16) {
200        // We can unwrap here, because we have checked before that the `IoVecBufferMut` holds at
201        // least one buffer with the proper size, depending on the feature negotiation. In any
202        // case, the buffer holds memory of at least `std::mem::size_of::<virtio_net_hdr_v1>()`
203        // bytes.
204        self.iovec
205            .write_all_volatile_at(
206                &nr_descs.to_le_bytes(),
207                std::mem::offset_of!(virtio_net_hdr_v1, num_buffers),
208            )
209            .unwrap()
210    }
211
212    /// This will let the guest know that about all the `DescriptorChain` object that has been
213    /// used to receive a frame from the TAP.
214    fn finish_frame(&mut self, rx_queue: &mut Queue) {
215        rx_queue.advance_next_used(self.used_descriptors);
216        self.used_descriptors = 0;
217        self.used_bytes = 0;
218    }
219
220    /// Return a slice of iovecs for the first slice in the buffer.
221    /// Panics if there are no parsed descriptors.
222    fn single_chain_slice_mut(&mut self) -> &mut [iovec] {
223        let nr_iovecs = self.parsed_descriptors[0].nr_iovecs as usize;
224        &mut self.iovec.as_iovec_mut_slice()[..nr_iovecs]
225    }
226
227    /// Return a slice of iovecs for all descriptor chains in the buffer.
228    fn all_chains_slice_mut(&mut self) -> &mut [iovec] {
229        self.iovec.as_iovec_mut_slice()
230    }
231}
232
233/// VirtIO network device.
234///
235/// It emulates a network device able to exchange L2 frames between the guest
236/// and a host-side tap device.
237#[derive(Debug)]
238pub struct Net {
239    pub(crate) id: String,
240
241    /// The backend for this device: a tap.
242    pub tap: Tap,
243
244    pub(crate) avail_features: u64,
245    pub(crate) acked_features: u64,
246
247    pub(crate) queues: Vec<Queue>,
248    pub(crate) queue_evts: Vec<EventFd>,
249
250    pub(crate) rx_rate_limiter: RateLimiter,
251    pub(crate) tx_rate_limiter: RateLimiter,
252
253    rx_frame_buf: [u8; MAX_BUFFER_SIZE],
254
255    tx_frame_headers: [u8; frame_hdr_len()],
256
257    pub(crate) config_space: ConfigSpace,
258    pub(crate) guest_mac: Option<MacAddr>,
259
260    pub(crate) device_state: DeviceState,
261    pub(crate) activate_evt: EventFd,
262
263    /// The MMDS stack corresponding to this interface.
264    /// Only if MMDS transport has been associated with it.
265    pub mmds_ns: Option<MmdsNetworkStack>,
266    pub(crate) metrics: Arc<NetDeviceMetrics>,
267
268    tx_buffer: IoVecBuffer,
269    pub(crate) rx_buffer: RxBuffers,
270}
271
272impl Net {
273    /// Create a new virtio network device with the given TAP interface.
274    pub fn new_with_tap(
275        id: String,
276        tap: Tap,
277        guest_mac: Option<MacAddr>,
278        rx_rate_limiter: RateLimiter,
279        tx_rate_limiter: RateLimiter,
280    ) -> Result<Self, NetError> {
281        let mut avail_features = (1 << VIRTIO_NET_F_GUEST_CSUM)
282            | (1 << VIRTIO_NET_F_CSUM)
283            | (1 << VIRTIO_NET_F_GUEST_TSO4)
284            | (1 << VIRTIO_NET_F_GUEST_TSO6)
285            | (1 << VIRTIO_NET_F_GUEST_UFO)
286            | (1 << VIRTIO_NET_F_HOST_TSO4)
287            | (1 << VIRTIO_NET_F_HOST_TSO6)
288            | (1 << VIRTIO_NET_F_HOST_UFO)
289            | (1 << VIRTIO_F_VERSION_1)
290            | (1 << VIRTIO_NET_F_MRG_RXBUF)
291            | (1 << VIRTIO_RING_F_EVENT_IDX);
292
293        let mut config_space = ConfigSpace::default();
294        if let Some(mac) = guest_mac {
295            config_space.guest_mac = mac;
296            // Enabling feature for MAC address configuration
297            // If not set, the driver will generates a random MAC address
298            avail_features |= 1 << VIRTIO_NET_F_MAC;
299        }
300
301        let mut queue_evts = Vec::new();
302        let mut queues = Vec::new();
303        for size in NET_QUEUE_SIZES {
304            queue_evts.push(EventFd::new(libc::EFD_NONBLOCK).map_err(NetError::EventFd)?);
305            queues.push(Queue::new(size));
306        }
307
308        Ok(Net {
309            id: id.clone(),
310            tap,
311            avail_features,
312            acked_features: 0u64,
313            queues,
314            queue_evts,
315            rx_rate_limiter,
316            tx_rate_limiter,
317            rx_frame_buf: [0u8; MAX_BUFFER_SIZE],
318            tx_frame_headers: [0u8; frame_hdr_len()],
319            config_space,
320            guest_mac,
321            device_state: DeviceState::Inactive,
322            activate_evt: EventFd::new(libc::EFD_NONBLOCK).map_err(NetError::EventFd)?,
323            mmds_ns: None,
324            metrics: NetMetricsPerDevice::alloc(id),
325            tx_buffer: Default::default(),
326            rx_buffer: RxBuffers::new()?,
327        })
328    }
329
330    /// Create a new virtio network device given the interface name.
331    pub fn new(
332        id: String,
333        tap_if_name: &str,
334        guest_mac: Option<MacAddr>,
335        rx_rate_limiter: RateLimiter,
336        tx_rate_limiter: RateLimiter,
337    ) -> Result<Self, NetError> {
338        let tap = Tap::open_named(tap_if_name).map_err(NetError::TapOpen)?;
339
340        let vnet_hdr_size = i32::try_from(vnet_hdr_len()).unwrap();
341        tap.set_vnet_hdr_size(vnet_hdr_size)
342            .map_err(NetError::TapSetVnetHdrSize)?;
343
344        Self::new_with_tap(id, tap, guest_mac, rx_rate_limiter, tx_rate_limiter)
345    }
346
347    /// Provides the ID of this net device.
348    pub fn id(&self) -> &String {
349        &self.id
350    }
351
352    /// Provides the MAC of this net device.
353    pub fn guest_mac(&self) -> Option<&MacAddr> {
354        self.guest_mac.as_ref()
355    }
356
357    /// Provides the host IFACE name of this net device.
358    pub fn iface_name(&self) -> String {
359        self.tap.if_name_as_str().to_string()
360    }
361
362    /// Provides the MmdsNetworkStack of this net device.
363    pub fn mmds_ns(&self) -> Option<&MmdsNetworkStack> {
364        self.mmds_ns.as_ref()
365    }
366
367    /// Configures the `MmdsNetworkStack` to allow device to forward MMDS requests.
368    /// If the device already supports MMDS, updates the IPv4 address.
369    pub fn configure_mmds_network_stack(&mut self, ipv4_addr: Ipv4Addr, mmds: Arc<Mutex<Mmds>>) {
370        if let Some(mmds_ns) = self.mmds_ns.as_mut() {
371            mmds_ns.set_ipv4_addr(ipv4_addr);
372        } else {
373            self.mmds_ns = Some(MmdsNetworkStack::new_with_defaults(Some(ipv4_addr), mmds))
374        }
375    }
376
377    /// Disables the `MmdsNetworkStack` to prevent device to forward MMDS requests.
378    pub fn disable_mmds_network_stack(&mut self) {
379        self.mmds_ns = None
380    }
381
382    /// Provides a reference to the configured RX rate limiter.
383    pub fn rx_rate_limiter(&self) -> &RateLimiter {
384        &self.rx_rate_limiter
385    }
386
387    /// Provides a reference to the configured TX rate limiter.
388    pub fn tx_rate_limiter(&self) -> &RateLimiter {
389        &self.tx_rate_limiter
390    }
391
392    /// Trigger queue notification for the guest if we used enough descriptors
393    /// for the notification to be enabled.
394    /// https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-320005
395    /// 2.6.7.1 Driver Requirements: Used Buffer Notification Suppression
396    fn try_signal_queue(&mut self, queue_type: NetQueue) -> Result<(), DeviceError> {
397        let qidx = match queue_type {
398            NetQueue::Rx => RX_INDEX,
399            NetQueue::Tx => TX_INDEX,
400        };
401        self.queues[qidx].advance_used_ring_idx();
402
403        if self.queues[qidx].prepare_kick() {
404            self.interrupt_trigger()
405                .trigger(VirtioInterruptType::Queue(qidx.try_into().unwrap()))
406                .map_err(|err| {
407                    self.metrics.event_fails.inc();
408                    DeviceError::FailedSignalingIrq(err)
409                })?;
410        }
411
412        Ok(())
413    }
414
415    // Helper function to consume one op with `size` bytes from a rate limiter
416    fn rate_limiter_consume_op(rate_limiter: &mut RateLimiter, size: u64) -> bool {
417        if !rate_limiter.consume(1, TokenType::Ops) {
418            return false;
419        }
420
421        if !rate_limiter.consume(size, TokenType::Bytes) {
422            rate_limiter.manual_replenish(1, TokenType::Ops);
423            return false;
424        }
425
426        true
427    }
428
429    // Helper function to replenish one operation with `size` bytes from a rate limiter
430    fn rate_limiter_replenish_op(rate_limiter: &mut RateLimiter, size: u64) {
431        rate_limiter.manual_replenish(1, TokenType::Ops);
432        rate_limiter.manual_replenish(size, TokenType::Bytes);
433    }
434
435    // Attempts to copy a single frame into the guest if there is enough
436    // rate limiting budget.
437    // Returns true on successful frame delivery.
438    pub fn rate_limited_rx_single_frame(&mut self, frame_size: u32) -> bool {
439        let rx_queue = &mut self.queues[RX_INDEX];
440        if !Self::rate_limiter_consume_op(&mut self.rx_rate_limiter, frame_size as u64) {
441            self.metrics.rx_rate_limiter_throttled.inc();
442            return false;
443        }
444
445        self.rx_buffer.finish_frame(rx_queue);
446        true
447    }
448
449    /// Returns the minimum size of buffer we expect the guest to provide us depending on the
450    /// features we have negotiated with it
451    fn minimum_rx_buffer_size(&self) -> u32 {
452        if !self.has_feature(VIRTIO_NET_F_MRG_RXBUF as u64) {
453            if self.has_feature(VIRTIO_NET_F_GUEST_TSO4 as u64)
454                || self.has_feature(VIRTIO_NET_F_GUEST_TSO6 as u64)
455                || self.has_feature(VIRTIO_NET_F_GUEST_UFO as u64)
456            {
457                MAX_BUFFER_SIZE.try_into().unwrap()
458            } else {
459                1526
460            }
461        } else {
462            vnet_hdr_len().try_into().unwrap()
463        }
464    }
465
466    /// Parse available RX `DescriptorChains` from the queue
467    pub fn parse_rx_descriptors(&mut self) -> Result<(), InvalidAvailIdx> {
468        // This is safe since we checked in the event handler that the device is activated.
469        let mem = &self.device_state.active_state().unwrap().mem;
470        let queue = &mut self.queues[RX_INDEX];
471        while let Some(head) = queue.pop_or_enable_notification()? {
472            let index = head.index;
473            // SAFETY: we are only using this `DescriptorChain` here.
474            if let Err(err) = unsafe { self.rx_buffer.add_buffer(mem, head) } {
475                self.metrics.rx_fails.inc();
476
477                // If guest uses dirty tricks to make us add more descriptors than
478                // we can hold, just stop processing.
479                if matches!(err, AddRxBufferError::Parsing(IoVecError::IovDequeOverflow)) {
480                    error!("net: Could not add an RX descriptor: {err}");
481                    queue.undo_pop();
482                    break;
483                }
484
485                error!("net: Could not parse an RX descriptor: {err}");
486
487                // Add this broken chain to the used_ring. It will be
488                // reported to the quest on the next `rx_buffer.finish_frame` call.
489                // SAFETY:
490                // index is verified on `DescriptorChain` creation.
491                queue
492                    .write_used_element(self.rx_buffer.used_descriptors, index, 0)
493                    .unwrap();
494                self.rx_buffer.used_descriptors += 1;
495            }
496        }
497
498        Ok(())
499    }
500
501    // Tries to detour the frame to MMDS and if MMDS doesn't accept it, sends it on the host TAP.
502    //
503    // Returns whether MMDS consumed the frame.
504    fn write_to_mmds_or_tap(
505        mmds_ns: Option<&mut MmdsNetworkStack>,
506        rate_limiter: &mut RateLimiter,
507        headers: &mut [u8],
508        frame_iovec: &IoVecBuffer,
509        tap: &mut Tap,
510        guest_mac: Option<MacAddr>,
511        net_metrics: &NetDeviceMetrics,
512    ) -> Result<bool, NetError> {
513        // Read the frame headers from the IoVecBuffer
514        let max_header_len = headers.len();
515        let header_len = frame_iovec
516            .read_volatile_at(&mut &mut *headers, 0, max_header_len)
517            .map_err(|err| {
518                error!("Received malformed TX buffer: {:?}", err);
519                net_metrics.tx_malformed_frames.inc();
520                NetError::VnetHeaderMissing
521            })?;
522
523        let headers = frame_bytes_from_buf(&headers[..header_len]).inspect_err(|_| {
524            error!("VNET headers missing in TX frame");
525            net_metrics.tx_malformed_frames.inc();
526        })?;
527
528        if let Some(ns) = mmds_ns
529            && ns.is_mmds_frame(headers)
530        {
531            let mut frame = vec![0u8; frame_iovec.len() as usize - vnet_hdr_len()];
532            // Ok to unwrap here, because we are passing a buffer that has the exact size
533            // of the `IoVecBuffer` minus the VNET headers.
534            frame_iovec
535                .read_exact_volatile_at(&mut frame, vnet_hdr_len())
536                .unwrap();
537            let _ = ns.detour_frame(&frame);
538            METRICS.mmds.rx_accepted.inc();
539
540            // MMDS frames are not accounted by the rate limiter.
541            Self::rate_limiter_replenish_op(rate_limiter, u64::from(frame_iovec.len()));
542
543            // MMDS consumed the frame.
544            return Ok(true);
545        }
546
547        // This frame goes to the TAP.
548
549        // Check for guest MAC spoofing.
550        if let Some(guest_mac) = guest_mac {
551            let _ = EthernetFrame::from_bytes(headers).map(|eth_frame| {
552                if guest_mac != eth_frame.src_mac() {
553                    net_metrics.tx_spoofed_mac_count.inc();
554                }
555            });
556        }
557
558        let _metric = net_metrics.tap_write_agg.record_latency_metrics();
559        match Self::write_tap(tap, frame_iovec) {
560            Ok(_) => {
561                let len = u64::from(frame_iovec.len());
562                net_metrics.tx_bytes_count.add(len);
563                net_metrics.tx_packets_count.inc();
564                net_metrics.tx_count.inc();
565            }
566            Err(err) => {
567                error!("Failed to write to tap: {:?}", err);
568                net_metrics.tap_write_fails.inc();
569            }
570        };
571        Ok(false)
572    }
573
574    // We currently prioritize packets from the MMDS over regular network packets.
575    fn read_from_mmds_or_tap(&mut self) -> Result<Option<u32>, NetError> {
576        // We only want to read from TAP (or mmds) if we have at least 64K of available capacity as
577        // this is the max size of 1 packet.
578        // SAFETY:
579        // * MAX_BUFFER_SIZE is constant and fits into u32
580        #[allow(clippy::cast_possible_truncation)]
581        if self.rx_buffer.capacity() < MAX_BUFFER_SIZE as u32 {
582            self.parse_rx_descriptors()?;
583
584            // If after parsing the RX queue we still don't have enough capacity, stop processing RX
585            // frames.
586            if self.rx_buffer.capacity() < MAX_BUFFER_SIZE as u32 {
587                return Ok(None);
588            }
589        }
590
591        if let Some(ns) = self.mmds_ns.as_mut()
592            && let Some(len) =
593                ns.write_next_frame(frame_bytes_from_buf_mut(&mut self.rx_frame_buf)?)
594        {
595            let len = len.get();
596            METRICS.mmds.tx_frames.inc();
597            METRICS.mmds.tx_bytes.add(len as u64);
598            init_vnet_hdr(&mut self.rx_frame_buf);
599            self.rx_buffer
600                .iovec
601                .write_all_volatile_at(&self.rx_frame_buf[..vnet_hdr_len() + len], 0)?;
602            // SAFETY:
603            // * len will never be bigger that u32::MAX because mmds is bound
604            // by the size of `self.rx_frame_buf` which is MAX_BUFFER_SIZE size.
605            let len: u32 = (vnet_hdr_len() + len).try_into().unwrap();
606
607            // SAFETY:
608            // * We checked that `rx_buffer` includes at least one `DescriptorChain`
609            // * `rx_frame_buf` has size of `MAX_BUFFER_SIZE` and all `DescriptorChain` objects are
610            //   at least that big.
611            unsafe {
612                self.rx_buffer.mark_used(len, &mut self.queues[RX_INDEX]);
613            }
614            return Ok(Some(len));
615        }
616
617        // SAFETY:
618        // * We ensured that `self.rx_buffer` has at least one DescriptorChain parsed in it.
619        let len = unsafe { self.read_tap().map_err(NetError::IO) }?;
620        // SAFETY:
621        // * len will never be bigger that u32::MAX
622        let len: u32 = len.try_into().unwrap();
623
624        // SAFETY:
625        // * `rx_buffer` has at least one `DescriptorChain`
626        // * `read_tap` passes the first `DescriptorChain` to `readv` so we can't have read more
627        //   bytes than its capacity.
628        unsafe {
629            self.rx_buffer.mark_used(len, &mut self.queues[RX_INDEX]);
630        }
631        Ok(Some(len))
632    }
633
634    /// Read as many frames as possible.
635    fn process_rx(&mut self) -> Result<(), DeviceError> {
636        loop {
637            match self.read_from_mmds_or_tap() {
638                Ok(None) => {
639                    self.metrics.no_rx_avail_buffer.inc();
640                    break;
641                }
642                Ok(Some(bytes)) => {
643                    self.metrics.rx_count.inc();
644                    self.metrics.rx_bytes_count.add(bytes as u64);
645                    self.metrics.rx_packets_count.inc();
646                    if !self.rate_limited_rx_single_frame(bytes) {
647                        break;
648                    }
649                }
650                Err(NetError::IO(err)) => {
651                    // The tap device is non-blocking, so any error aside from EAGAIN is
652                    // unexpected.
653                    match err.raw_os_error() {
654                        Some(err) if err == EAGAIN => (),
655                        _ => {
656                            error!("Failed to read tap: {:?}", err);
657                            self.metrics.tap_read_fails.inc();
658                            return Err(DeviceError::FailedReadTap);
659                        }
660                    };
661                    break;
662                }
663                Err(NetError::InvalidAvailIdx(err)) => {
664                    return Err(DeviceError::InvalidAvailIdx(err));
665                }
666                Err(err) => {
667                    error!("Spurious error in network RX: {:?}", err);
668                }
669            }
670        }
671
672        self.try_signal_queue(NetQueue::Rx)
673    }
674
675    fn resume_rx(&mut self) -> Result<(), DeviceError> {
676        // First try to handle any deferred frame
677        if self.rx_buffer.used_bytes != 0 {
678            // If can't finish sending this frame, re-set it as deferred and return; we can't
679            // process any more frames from the TAP.
680            if !self.rate_limited_rx_single_frame(self.rx_buffer.used_bytes) {
681                return Ok(());
682            }
683        }
684
685        self.process_rx()
686    }
687
688    fn process_tx(&mut self) -> Result<(), DeviceError> {
689        // This is safe since we checked in the event handler that the device is activated.
690        let mem = &self.device_state.active_state().unwrap().mem;
691
692        // The MMDS network stack works like a state machine, based on synchronous calls, and
693        // without being added to any event loop. If any frame is accepted by the MMDS, we also
694        // trigger a process_rx() which checks if there are any new frames to be sent, starting
695        // with the MMDS network stack.
696        let mut process_rx_for_mmds = false;
697        let mut used_any = false;
698        let tx_queue = &mut self.queues[TX_INDEX];
699
700        while let Some(head) = tx_queue.pop_or_enable_notification()? {
701            self.metrics
702                .tx_remaining_reqs_count
703                .add(tx_queue.len().into());
704            let head_index = head.index;
705            // Parse IoVecBuffer from descriptor head
706            // SAFETY: This descriptor chain is only loaded once
707            // virtio requests are handled sequentially so no two IoVecBuffers
708            // are live at the same time, meaning this has exclusive ownership over the memory
709            if unsafe { self.tx_buffer.load_descriptor_chain(mem, head).is_err() } {
710                self.metrics.tx_fails.inc();
711                tx_queue.add_used(head_index, 0)?;
712                continue;
713            };
714
715            // We only handle frames that are up to MAX_BUFFER_SIZE
716            if self.tx_buffer.len() as usize > MAX_BUFFER_SIZE {
717                error!("net: received too big frame from driver");
718                self.metrics.tx_malformed_frames.inc();
719                tx_queue.add_used(head_index, 0)?;
720                continue;
721            }
722
723            if !Self::rate_limiter_consume_op(
724                &mut self.tx_rate_limiter,
725                u64::from(self.tx_buffer.len()),
726            ) {
727                tx_queue.undo_pop();
728                self.metrics.tx_rate_limiter_throttled.inc();
729                break;
730            }
731
732            let frame_consumed_by_mmds = Self::write_to_mmds_or_tap(
733                self.mmds_ns.as_mut(),
734                &mut self.tx_rate_limiter,
735                &mut self.tx_frame_headers,
736                &self.tx_buffer,
737                &mut self.tap,
738                self.guest_mac,
739                &self.metrics,
740            )
741            .unwrap_or(false);
742            if frame_consumed_by_mmds && self.rx_buffer.used_bytes == 0 {
743                // MMDS consumed this frame/request, let's also try to process the response.
744                process_rx_for_mmds = true;
745            }
746
747            tx_queue.add_used(head_index, 0)?;
748            used_any = true;
749        }
750
751        if !used_any {
752            self.metrics.no_tx_avail_buffer.inc();
753        }
754
755        // Cleanup tx_buffer to ensure no two buffers point at the same memory
756        self.tx_buffer.clear();
757        self.try_signal_queue(NetQueue::Tx)?;
758
759        // An incoming frame for the MMDS may trigger the transmission of a new message.
760        if process_rx_for_mmds {
761            self.process_rx()
762        } else {
763            Ok(())
764        }
765    }
766
767    /// Builds the offload features we will setup on the TAP device based on the features that the
768    /// guest supports.
769    pub fn build_tap_offload_features(guest_supported_features: u64) -> u32 {
770        let add_if_supported =
771            |tap_features: &mut u32, supported_features: u64, tap_flag: u32, virtio_flag: u32| {
772                if supported_features & (1 << virtio_flag) != 0 {
773                    *tap_features |= tap_flag;
774                }
775            };
776
777        let mut tap_features: u32 = 0;
778
779        add_if_supported(
780            &mut tap_features,
781            guest_supported_features,
782            generated::TUN_F_CSUM,
783            VIRTIO_NET_F_GUEST_CSUM,
784        );
785        add_if_supported(
786            &mut tap_features,
787            guest_supported_features,
788            generated::TUN_F_UFO,
789            VIRTIO_NET_F_GUEST_UFO,
790        );
791        add_if_supported(
792            &mut tap_features,
793            guest_supported_features,
794            generated::TUN_F_TSO4,
795            VIRTIO_NET_F_GUEST_TSO4,
796        );
797        add_if_supported(
798            &mut tap_features,
799            guest_supported_features,
800            generated::TUN_F_TSO6,
801            VIRTIO_NET_F_GUEST_TSO6,
802        );
803
804        tap_features
805    }
806
807    /// Updates the parameters for the rate limiters
808    pub fn patch_rate_limiters(
809        &mut self,
810        rx_bytes: BucketUpdate,
811        rx_ops: BucketUpdate,
812        tx_bytes: BucketUpdate,
813        tx_ops: BucketUpdate,
814    ) {
815        self.rx_rate_limiter.update_buckets(rx_bytes, rx_ops);
816        self.tx_rate_limiter.update_buckets(tx_bytes, tx_ops);
817    }
818
819    /// Reads a frame from the TAP device inside the first descriptor held by `self.rx_buffer`.
820    ///
821    /// # Safety
822    ///
823    /// `self.rx_buffer` needs to have at least one descriptor chain parsed
824    pub unsafe fn read_tap(&mut self) -> std::io::Result<usize> {
825        let slice = if self.has_feature(VIRTIO_NET_F_MRG_RXBUF as u64) {
826            self.rx_buffer.all_chains_slice_mut()
827        } else {
828            self.rx_buffer.single_chain_slice_mut()
829        };
830        self.tap.read_iovec(slice)
831    }
832
833    fn write_tap(tap: &mut Tap, buf: &IoVecBuffer) -> std::io::Result<usize> {
834        tap.write_iovec(buf)
835    }
836
837    /// Process a single RX queue event.
838    ///
839    /// This is called by the event manager responding to the guest adding a new
840    /// buffer in the RX queue.
841    pub fn process_rx_queue_event(&mut self) {
842        self.metrics.rx_queue_event_count.inc();
843
844        if let Err(err) = self.queue_evts[RX_INDEX].read() {
845            // rate limiters present but with _very high_ allowed rate
846            error!("Failed to get rx queue event: {:?}", err);
847            self.metrics.event_fails.inc();
848            return;
849        } else {
850            self.parse_rx_descriptors().unwrap();
851        }
852
853        if self.rx_rate_limiter.is_blocked() {
854            self.metrics.rx_rate_limiter_throttled.inc();
855        } else {
856            // If the limiter is not blocked, resume the receiving of bytes.
857            self.resume_rx()
858                .unwrap_or_else(|err| report_net_event_fail(&self.metrics, err));
859        }
860    }
861
862    pub fn process_tap_rx_event(&mut self) {
863        // This is safe since we checked in the event handler that the device is activated.
864        self.metrics.rx_tap_event_count.inc();
865
866        // While limiter is blocked, don't process any more incoming.
867        if self.rx_rate_limiter.is_blocked() {
868            self.metrics.rx_rate_limiter_throttled.inc();
869            return;
870        }
871
872        self.resume_rx()
873            .unwrap_or_else(|err| report_net_event_fail(&self.metrics, err));
874    }
875
876    /// Process a single TX queue event.
877    ///
878    /// This is called by the event manager responding to the guest adding a new
879    /// buffer in the TX queue.
880    pub fn process_tx_queue_event(&mut self) {
881        self.metrics.tx_queue_event_count.inc();
882        if let Err(err) = self.queue_evts[TX_INDEX].read() {
883            error!("Failed to get tx queue event: {:?}", err);
884            self.metrics.event_fails.inc();
885        } else if !self.tx_rate_limiter.is_blocked()
886        // If the limiter is not blocked, continue transmitting bytes.
887        {
888            self.process_tx()
889                .unwrap_or_else(|err| report_net_event_fail(&self.metrics, err));
890        } else {
891            self.metrics.tx_rate_limiter_throttled.inc();
892        }
893    }
894
895    pub fn process_rx_rate_limiter_event(&mut self) {
896        self.metrics.rx_event_rate_limiter_count.inc();
897        // Upon rate limiter event, call the rate limiter handler
898        // and restart processing the queue.
899
900        match self.rx_rate_limiter.event_handler() {
901            Ok(_) => {
902                // There might be enough budget now to receive the frame.
903                self.resume_rx()
904                    .unwrap_or_else(|err| report_net_event_fail(&self.metrics, err));
905            }
906            Err(err) => {
907                error!("Failed to get rx rate-limiter event: {:?}", err);
908                self.metrics.event_fails.inc();
909            }
910        }
911    }
912
913    pub fn process_tx_rate_limiter_event(&mut self) {
914        self.metrics.tx_rate_limiter_event_count.inc();
915        // Upon rate limiter event, call the rate limiter handler
916        // and restart processing the queue.
917        match self.tx_rate_limiter.event_handler() {
918            Ok(_) => {
919                // There might be enough budget now to send the frame.
920                self.process_tx()
921                    .unwrap_or_else(|err| report_net_event_fail(&self.metrics, err));
922            }
923            Err(err) => {
924                error!("Failed to get tx rate-limiter event: {:?}", err);
925                self.metrics.event_fails.inc();
926            }
927        }
928    }
929
930    /// Process device virtio queue(s).
931    pub fn process_virtio_queues(&mut self) -> Result<(), InvalidAvailIdx> {
932        if let Err(DeviceError::InvalidAvailIdx(err)) = self.resume_rx() {
933            return Err(err);
934        }
935        if let Err(DeviceError::InvalidAvailIdx(err)) = self.process_tx() {
936            return Err(err);
937        }
938
939        Ok(())
940    }
941
942    /// Prepare saving state
943    pub fn prepare_save(&mut self) {
944        // We shouldn't be messing with the queue if the device is not activated.
945        // Anyways, if it isn't there's nothing to prepare; we haven't parsed any
946        // descriptors yet from it and we can't have a deferred frame.
947        if !self.is_activated() {
948            return;
949        }
950
951        // Give potential deferred RX frame to guest
952        self.rx_buffer.finish_frame(&mut self.queues[RX_INDEX]);
953        // Reset the parsed available descriptors, so we will re-parse them
954        self.queues[RX_INDEX].next_avail -=
955            Wrapping(u16::try_from(self.rx_buffer.parsed_descriptors.len()).unwrap());
956        self.rx_buffer.parsed_descriptors.clear();
957        self.rx_buffer.iovec.clear();
958        self.rx_buffer.used_bytes = 0;
959        self.rx_buffer.used_descriptors = 0;
960    }
961}
962
963impl VirtioDevice for Net {
964    impl_device_type!(VIRTIO_ID_NET);
965
966    fn avail_features(&self) -> u64 {
967        self.avail_features
968    }
969
970    fn acked_features(&self) -> u64 {
971        self.acked_features
972    }
973
974    fn set_acked_features(&mut self, acked_features: u64) {
975        self.acked_features = acked_features;
976    }
977
978    fn queues(&self) -> &[Queue] {
979        &self.queues
980    }
981
982    fn queues_mut(&mut self) -> &mut [Queue] {
983        &mut self.queues
984    }
985
986    fn queue_events(&self) -> &[EventFd] {
987        &self.queue_evts
988    }
989
990    fn interrupt_trigger(&self) -> &dyn VirtioInterrupt {
991        self.device_state
992            .active_state()
993            .expect("Device is not implemented")
994            .interrupt
995            .deref()
996    }
997
998    fn read_config(&self, offset: u64, data: &mut [u8]) {
999        if let Some(config_space_bytes) = self.config_space.as_slice().get(u64_to_usize(offset)..) {
1000            let len = config_space_bytes.len().min(data.len());
1001            data[..len].copy_from_slice(&config_space_bytes[..len]);
1002        } else {
1003            error!("Failed to read config space");
1004            self.metrics.cfg_fails.inc();
1005        }
1006    }
1007
1008    fn write_config(&mut self, offset: u64, data: &[u8]) {
1009        let config_space_bytes = self.config_space.as_mut_slice();
1010        let start = usize::try_from(offset).ok();
1011        let end = start.and_then(|s| s.checked_add(data.len()));
1012        let Some(dst) = start
1013            .zip(end)
1014            .and_then(|(start, end)| config_space_bytes.get_mut(start..end))
1015        else {
1016            error!("Failed to write config space");
1017            self.metrics.cfg_fails.inc();
1018            return;
1019        };
1020
1021        dst.copy_from_slice(data);
1022        self.guest_mac = Some(self.config_space.guest_mac);
1023        self.metrics.mac_address_updates.inc();
1024    }
1025
1026    fn activate(
1027        &mut self,
1028        mem: GuestMemoryMmap,
1029        interrupt: Arc<dyn VirtioInterrupt>,
1030    ) -> Result<(), ActivateError> {
1031        for q in self.queues.iter_mut() {
1032            q.initialize(&mem)
1033                .map_err(ActivateError::QueueMemoryError)?;
1034        }
1035
1036        let event_idx = self.has_feature(u64::from(VIRTIO_RING_F_EVENT_IDX));
1037        if event_idx {
1038            for queue in &mut self.queues {
1039                queue.enable_notif_suppression();
1040            }
1041        }
1042
1043        let supported_flags: u32 = Net::build_tap_offload_features(self.acked_features);
1044        self.tap
1045            .set_offload(supported_flags)
1046            .map_err(super::super::ActivateError::TapSetOffload)?;
1047
1048        self.rx_buffer.min_buffer_size = self.minimum_rx_buffer_size();
1049
1050        if self.activate_evt.write(1).is_err() {
1051            self.metrics.activate_fails.inc();
1052            return Err(ActivateError::EventFd);
1053        }
1054        self.device_state = DeviceState::Activated(ActiveState { mem, interrupt });
1055        Ok(())
1056    }
1057
1058    fn is_activated(&self) -> bool {
1059        self.device_state.is_activated()
1060    }
1061
1062    fn kick(&mut self) {
1063        // If device is activated, kick the net queue(s) to make up for any
1064        // pending or in-flight epoll events we may have not captured in snapshot.
1065        // No need to kick Ratelimiters because they are restored 'unblocked' so
1066        // any inflight `timer_fd` events can be safely discarded.
1067        if self.is_activated() {
1068            info!("kick net {}.", self.id());
1069            self.process_virtio_queues();
1070        }
1071    }
1072}
1073
1074#[cfg(test)]
1075#[macro_use]
1076#[allow(clippy::cast_possible_truncation)]
1077pub mod tests {
1078    use std::net::Ipv4Addr;
1079    use std::os::fd::AsRawFd;
1080    use std::str::FromStr;
1081    use std::time::Duration;
1082    use std::{mem, thread};
1083
1084    use vm_memory::GuestAddress;
1085
1086    use super::*;
1087    use crate::check_metric_after_block;
1088    use crate::devices::virtio::generated::virtio_ring::VIRTIO_RING_F_EVENT_IDX;
1089    use crate::devices::virtio::iovec::IoVecBuffer;
1090    use crate::devices::virtio::net::NET_QUEUE_SIZES;
1091    use crate::devices::virtio::net::device::{
1092        frame_bytes_from_buf, frame_bytes_from_buf_mut, frame_hdr_len, init_vnet_hdr, vnet_hdr_len,
1093    };
1094    use crate::devices::virtio::net::test_utils::test::TestHelper;
1095    use crate::devices::virtio::net::test_utils::{
1096        NetEvent, NetQueue, TapTrafficSimulator, default_net, if_index, inject_tap_tx_frame,
1097        set_mac,
1098    };
1099    use crate::devices::virtio::queue::VIRTQ_DESC_F_WRITE;
1100    use crate::devices::virtio::test_utils::VirtQueue;
1101    use crate::dumbo::EthernetFrame;
1102    use crate::dumbo::pdu::arp::{ETH_IPV4_FRAME_LEN, EthIPv4ArpFrame};
1103    use crate::dumbo::pdu::ethernet::ETHERTYPE_ARP;
1104    use crate::logger::IncMetric;
1105    use crate::rate_limiter::{BucketUpdate, RateLimiter, TokenBucket, TokenType};
1106    use crate::test_utils::single_region_mem;
1107    use crate::utils::net::mac::{MAC_ADDR_LEN, MacAddr};
1108    use crate::vstate::memory::{Address, GuestMemory};
1109
1110    impl Net {
1111        pub fn finish_frame(&mut self) {
1112            self.rx_buffer.finish_frame(&mut self.queues[RX_INDEX]);
1113            self.queues[RX_INDEX].advance_used_ring_idx();
1114        }
1115    }
1116
1117    /// Write the number of descriptors used in VirtIO header
1118    fn header_set_num_buffers(frame: &mut [u8], nr_descs: u16) {
1119        let bytes = nr_descs.to_le_bytes();
1120        let offset = std::mem::offset_of!(virtio_net_hdr_v1, num_buffers);
1121        frame[offset] = bytes[0];
1122        frame[offset + 1] = bytes[1];
1123    }
1124
1125    #[test]
1126    fn test_vnet_helpers() {
1127        let mut frame_buf = vec![42u8; vnet_hdr_len() - 1];
1128        assert_eq!(
1129            format!("{:?}", frame_bytes_from_buf_mut(&mut frame_buf)),
1130            "Err(VnetHeaderMissing)"
1131        );
1132
1133        let mut frame_buf: [u8; MAX_BUFFER_SIZE] = [42u8; MAX_BUFFER_SIZE];
1134
1135        let vnet_hdr_len_ = mem::size_of::<virtio_net_hdr_v1>();
1136        assert_eq!(vnet_hdr_len_, vnet_hdr_len());
1137
1138        init_vnet_hdr(&mut frame_buf);
1139        let zero_vnet_hdr = vec![0u8; vnet_hdr_len_];
1140        assert_eq!(zero_vnet_hdr, &frame_buf[..vnet_hdr_len_]);
1141
1142        let payload = vec![42u8; MAX_BUFFER_SIZE - vnet_hdr_len_];
1143        assert_eq!(payload, frame_bytes_from_buf(&frame_buf).unwrap());
1144
1145        {
1146            let payload = frame_bytes_from_buf_mut(&mut frame_buf).unwrap();
1147            payload[0] = 15;
1148        }
1149        assert_eq!(frame_buf[vnet_hdr_len_], 15);
1150    }
1151
1152    #[test]
1153    fn test_virtio_device_type() {
1154        let mut net = default_net();
1155        set_mac(&mut net, MacAddr::from_str("11:22:33:44:55:66").unwrap());
1156        assert_eq!(net.device_type(), VIRTIO_ID_NET);
1157    }
1158
1159    #[test]
1160    // Test that `Net::build_tap_offload_features` creates the TAP offload features that we expect
1161    // it to do, based on the available guest features
1162    fn test_build_tap_offload_features_all() {
1163        let supported_features = (1 << VIRTIO_NET_F_GUEST_CSUM)
1164            | (1 << VIRTIO_NET_F_GUEST_UFO)
1165            | (1 << VIRTIO_NET_F_GUEST_TSO4)
1166            | (1 << VIRTIO_NET_F_GUEST_TSO6);
1167        let expected_tap_features = generated::TUN_F_CSUM
1168            | generated::TUN_F_UFO
1169            | generated::TUN_F_TSO4
1170            | generated::TUN_F_TSO6;
1171        let supported_flags = Net::build_tap_offload_features(supported_features);
1172
1173        assert_eq!(supported_flags, expected_tap_features);
1174    }
1175
1176    #[test]
1177    // Same as before, however, using each supported feature one by one.
1178    fn test_build_tap_offload_features_one_by_one() {
1179        let features = [
1180            (1 << VIRTIO_NET_F_GUEST_CSUM, generated::TUN_F_CSUM),
1181            (1 << VIRTIO_NET_F_GUEST_UFO, generated::TUN_F_UFO),
1182            (1 << VIRTIO_NET_F_GUEST_TSO4, generated::TUN_F_TSO4),
1183        ];
1184        for (virtio_flag, tap_flag) in features {
1185            let supported_flags = Net::build_tap_offload_features(virtio_flag);
1186            assert_eq!(supported_flags, tap_flag);
1187        }
1188    }
1189
1190    #[test]
1191    fn test_virtio_device_read_config() {
1192        let mut net = default_net();
1193        set_mac(&mut net, MacAddr::from_str("11:22:33:44:55:66").unwrap());
1194
1195        // Test `read_config()`. This also validates the MAC was properly configured.
1196        let mac = MacAddr::from_str("11:22:33:44:55:66").unwrap();
1197        let mut config_mac = [0u8; MAC_ADDR_LEN as usize];
1198        net.read_config(0, &mut config_mac);
1199        assert_eq!(&config_mac, mac.get_bytes());
1200
1201        // Invalid read.
1202        config_mac = [0u8; MAC_ADDR_LEN as usize];
1203        net.read_config(u64::from(MAC_ADDR_LEN), &mut config_mac);
1204        assert_eq!(config_mac, [0u8, 0u8, 0u8, 0u8, 0u8, 0u8]);
1205    }
1206
1207    #[test]
1208    fn test_virtio_device_rewrite_config() {
1209        let mut net = default_net();
1210        set_mac(&mut net, MacAddr::from_str("11:22:33:44:55:66").unwrap());
1211
1212        let new_config: [u8; MAC_ADDR_LEN as usize] = [0x66, 0x55, 0x44, 0x33, 0x22, 0x11];
1213        net.write_config(0, &new_config);
1214        let mut new_config_read = [0u8; MAC_ADDR_LEN as usize];
1215        net.read_config(0, &mut new_config_read);
1216        assert_eq!(new_config, new_config_read);
1217
1218        // Check that the guest MAC was updated.
1219        let expected_guest_mac = MacAddr::from_bytes_unchecked(&new_config);
1220        assert_eq!(expected_guest_mac, net.guest_mac.unwrap());
1221        assert_eq!(net.metrics.mac_address_updates.count(), 1);
1222
1223        // Partial write (this is how the kernel sets a new mac address) - byte by byte.
1224        let new_config = [0x11, 0x22, 0x33, 0x44, 0x55, 0x66];
1225        for i in 0..new_config.len() {
1226            net.write_config(i as u64, &new_config[i..=i]);
1227        }
1228        net.read_config(0, &mut new_config_read);
1229        assert_eq!(new_config, new_config_read);
1230
1231        // Invalid write.
1232        net.write_config(5, &new_config);
1233        // Verify old config was untouched.
1234        new_config_read = [0u8; MAC_ADDR_LEN as usize];
1235        net.read_config(0, &mut new_config_read);
1236        assert_eq!(new_config, new_config_read);
1237
1238        // Large offset that may cause an overflow.
1239        net.write_config(u64::MAX, &new_config);
1240        // Verify old config was untouched.
1241        new_config_read = [0u8; MAC_ADDR_LEN as usize];
1242        net.read_config(0, &mut new_config_read);
1243        assert_eq!(new_config, new_config_read);
1244    }
1245
1246    #[test]
1247    fn test_rx_missing_queue_signal() {
1248        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
1249        let mut th = TestHelper::get_default(&mem);
1250        th.activate_net();
1251
1252        th.add_desc_chain(NetQueue::Rx, 0, &[(0, 4096, VIRTQ_DESC_F_WRITE)]);
1253        th.net().queue_evts[RX_INDEX].read().unwrap();
1254        check_metric_after_block!(
1255            th.net().metrics.event_fails,
1256            1,
1257            th.simulate_event(NetEvent::RxQueue)
1258        );
1259
1260        // Check that the used queue didn't advance.
1261        assert_eq!(th.rxq.used.idx.get(), 0);
1262    }
1263
1264    fn rx_read_only_descriptor(mut th: TestHelper) {
1265        th.activate_net();
1266
1267        th.add_desc_chain(
1268            NetQueue::Rx,
1269            0,
1270            &[
1271                (0, 100, VIRTQ_DESC_F_WRITE),
1272                (1, 100, 0),
1273                (2, 1000, VIRTQ_DESC_F_WRITE),
1274            ],
1275        );
1276        let mut frame = inject_tap_tx_frame(&th.net(), 1000);
1277        check_metric_after_block!(
1278            th.net().metrics.rx_fails,
1279            1,
1280            th.event_manager.run_with_timeout(100).unwrap()
1281        );
1282        th.rxq.check_used_elem(0, 0, 0);
1283        header_set_num_buffers(frame.as_mut_slice(), 1);
1284        th.check_rx_queue_resume(&frame);
1285    }
1286
1287    #[test]
1288    fn test_rx_read_only_descriptor() {
1289        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
1290        let th = TestHelper::get_default(&mem);
1291        rx_read_only_descriptor(th);
1292    }
1293
1294    #[test]
1295    fn test_rx_read_only_descriptor_mrg() {
1296        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
1297        let mut th = TestHelper::get_default(&mem);
1298        // VIRTIO_NET_F_MRG_RXBUF is not enabled by default
1299        th.net().acked_features = 1 << VIRTIO_NET_F_MRG_RXBUF;
1300        rx_read_only_descriptor(th);
1301    }
1302
1303    fn rx_short_descriptor(mut th: TestHelper) {
1304        th.activate_net();
1305
1306        th.add_desc_chain(NetQueue::Rx, 0, &[(0, 10, VIRTQ_DESC_F_WRITE)]);
1307        let mut frame = th.check_rx_discarded_buffer(1000);
1308        th.rxq.check_used_elem(0, 0, 0);
1309
1310        header_set_num_buffers(frame.as_mut_slice(), 1);
1311        th.check_rx_queue_resume(&frame);
1312    }
1313
1314    #[test]
1315    fn test_rx_short_descriptor() {
1316        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
1317        let th = TestHelper::get_default(&mem);
1318        rx_short_descriptor(th);
1319    }
1320
1321    #[test]
1322    fn test_rx_short_descriptor_mrg() {
1323        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
1324        let mut th = TestHelper::get_default(&mem);
1325        // VIRTIO_NET_F_MRG_RXBUF is not enabled by default
1326        th.net().acked_features = 1 << VIRTIO_NET_F_MRG_RXBUF;
1327        rx_short_descriptor(th);
1328    }
1329
1330    fn rx_invalid_descriptor(mut th: TestHelper) {
1331        th.activate_net();
1332
1333        // The descriptor chain is created so that the last descriptor doesn't fit in the
1334        // guest memory.
1335        let offset = th.mem.last_addr().raw_value() - th.data_addr() - 300;
1336        th.add_desc_chain(
1337            NetQueue::Rx,
1338            offset,
1339            &[
1340                (0, 100, VIRTQ_DESC_F_WRITE),
1341                (1, 50, VIRTQ_DESC_F_WRITE),
1342                (2, 4096, VIRTQ_DESC_F_WRITE),
1343            ],
1344        );
1345        let mut frame = th.check_rx_discarded_buffer(1000);
1346        th.rxq.check_used_elem(0, 0, 0);
1347
1348        header_set_num_buffers(frame.as_mut_slice(), 1);
1349        th.check_rx_queue_resume(&frame);
1350    }
1351
1352    #[test]
1353    fn test_rx_invalid_descriptor() {
1354        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
1355        let th = TestHelper::get_default(&mem);
1356        rx_invalid_descriptor(th);
1357    }
1358
1359    #[test]
1360    fn test_rx_invalid_descriptor_mrg() {
1361        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
1362        let mut th = TestHelper::get_default(&mem);
1363        // VIRTIO_NET_F_MRG_RXBUF is not enabled by default
1364        th.net().acked_features = 1 << VIRTIO_NET_F_MRG_RXBUF;
1365        rx_invalid_descriptor(th);
1366    }
1367
1368    fn rx_retry(mut th: TestHelper) {
1369        th.activate_net();
1370
1371        // Add invalid descriptor chain - read only descriptor.
1372        th.add_desc_chain(
1373            NetQueue::Rx,
1374            0,
1375            &[
1376                (0, 100, VIRTQ_DESC_F_WRITE),
1377                (1, 100, 0),
1378                (2, 1000, VIRTQ_DESC_F_WRITE),
1379            ],
1380        );
1381        // Add invalid descriptor chain - too short.
1382        th.add_desc_chain(NetQueue::Rx, 1200, &[(3, 10, VIRTQ_DESC_F_WRITE)]);
1383        // Add invalid descriptor chain - invalid memory offset.
1384        th.add_desc_chain(
1385            NetQueue::Rx,
1386            th.mem.last_addr().raw_value(),
1387            &[(4, 1000, VIRTQ_DESC_F_WRITE)],
1388        );
1389
1390        // Add valid descriptor chain. TestHelper does not negotiate any feature offloading so the
1391        // buffers need to be at least 1526 bytes long.
1392        th.add_desc_chain(
1393            NetQueue::Rx,
1394            1300,
1395            &[(5, MAX_BUFFER_SIZE as u32, VIRTQ_DESC_F_WRITE)],
1396        );
1397
1398        // Inject frame to tap and run epoll.
1399        let mut frame = inject_tap_tx_frame(&th.net(), 1000);
1400        check_metric_after_block!(
1401            th.net().metrics.rx_packets_count,
1402            1,
1403            th.event_manager.run_with_timeout(100).unwrap()
1404        );
1405
1406        // Check that the used queue has advanced.
1407        assert_eq!(th.rxq.used.idx.get(), 4);
1408        assert!(
1409            th.net()
1410                .interrupt_trigger()
1411                .has_pending_interrupt(VirtioInterruptType::Queue(RX_INDEX as u16))
1412        );
1413
1414        // Check that the invalid descriptor chains have been discarded
1415        th.rxq.check_used_elem(0, 0, 0);
1416        th.rxq.check_used_elem(1, 3, 0);
1417        th.rxq.check_used_elem(2, 4, 0);
1418        // Check that the frame wasn't deferred.
1419        assert!(th.net().rx_buffer.used_descriptors == 0);
1420        // Check that the frame has been written successfully to the valid Rx descriptor chain.
1421        th.rxq
1422            .check_used_elem(3, 5, frame.len().try_into().unwrap());
1423        header_set_num_buffers(frame.as_mut_slice(), 1);
1424        th.rxq.dtable[5].check_data(&frame);
1425    }
1426
1427    #[test]
1428    fn test_rx_retry() {
1429        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
1430        let th = TestHelper::get_default(&mem);
1431        rx_retry(th);
1432    }
1433
1434    #[test]
1435    fn test_rx_retry_mrg() {
1436        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
1437        let mut th = TestHelper::get_default(&mem);
1438        // VIRTIO_NET_F_MRG_RXBUF is not enabled by default
1439        th.net().acked_features = 1 << VIRTIO_NET_F_MRG_RXBUF;
1440        rx_retry(th);
1441    }
1442
1443    fn rx_complex_desc_chain(mut th: TestHelper) {
1444        th.activate_net();
1445
1446        // Create a valid Rx avail descriptor chain with multiple descriptors.
1447        th.add_desc_chain(
1448            NetQueue::Rx,
1449            0,
1450            // Add gaps between the descriptor ids in order to ensure that we follow
1451            // the `next` field.
1452            &[
1453                (3, 100, VIRTQ_DESC_F_WRITE),
1454                (5, 50, VIRTQ_DESC_F_WRITE),
1455                (11, MAX_BUFFER_SIZE as u32 - 100 - 50, VIRTQ_DESC_F_WRITE),
1456            ],
1457        );
1458        // Inject frame to tap and run epoll.
1459        let mut frame = inject_tap_tx_frame(&th.net(), 1000);
1460        check_metric_after_block!(
1461            th.net().metrics.rx_packets_count,
1462            1,
1463            th.event_manager.run_with_timeout(100).unwrap()
1464        );
1465
1466        // Check that the frame wasn't deferred.
1467        assert!(th.net().rx_buffer.used_descriptors == 0);
1468        // Check that the used queue has advanced.
1469        assert_eq!(th.rxq.used.idx.get(), 1);
1470        assert!(
1471            th.net()
1472                .interrupt_trigger()
1473                .has_pending_interrupt(VirtioInterruptType::Queue(RX_INDEX as u16))
1474        );
1475        // Check that the frame has been written successfully to the Rx descriptor chain.
1476        header_set_num_buffers(frame.as_mut_slice(), 1);
1477        th.rxq
1478            .check_used_elem(0, 3, frame.len().try_into().unwrap());
1479        th.rxq.dtable[3].check_data(&frame[..100]);
1480        th.rxq.dtable[5].check_data(&frame[100..150]);
1481        th.rxq.dtable[11].check_data(&frame[150..]);
1482    }
1483
1484    #[test]
1485    fn test_rx_complex_desc_chain() {
1486        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
1487        let th = TestHelper::get_default(&mem);
1488        rx_complex_desc_chain(th);
1489    }
1490
1491    #[test]
1492    fn test_rx_complex_desc_chain_mrg() {
1493        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
1494        let mut th = TestHelper::get_default(&mem);
1495        // VIRTIO_NET_F_MRG_RXBUF is not enabled by default
1496        th.net().acked_features = 1 << VIRTIO_NET_F_MRG_RXBUF;
1497        rx_complex_desc_chain(th);
1498    }
1499
1500    fn rx_multiple_frames(mut th: TestHelper) {
1501        th.activate_net();
1502
1503        // Create 2 valid Rx avail descriptor chains. Each one has enough space to fit the
1504        // following 2 frames. But only 1 frame has to be written to each chain.
1505        th.add_desc_chain(
1506            NetQueue::Rx,
1507            0,
1508            &[
1509                (0, 500, VIRTQ_DESC_F_WRITE),
1510                (1, 500, VIRTQ_DESC_F_WRITE),
1511                (2, MAX_BUFFER_SIZE as u32 - 1000, VIRTQ_DESC_F_WRITE),
1512            ],
1513        );
1514        // Second chain needs at least MAX_BUFFER_SIZE offset
1515        th.add_desc_chain(
1516            NetQueue::Rx,
1517            MAX_BUFFER_SIZE as u64 + 1000,
1518            &[
1519                (3, 500, VIRTQ_DESC_F_WRITE),
1520                (4, 500, VIRTQ_DESC_F_WRITE),
1521                (5, MAX_BUFFER_SIZE as u32 - 1000, VIRTQ_DESC_F_WRITE),
1522            ],
1523        );
1524        // Inject 2 frames to tap and run epoll.
1525        let mut frame_1 = inject_tap_tx_frame(&th.net(), 200);
1526        let mut frame_2 = inject_tap_tx_frame(&th.net(), 300);
1527        check_metric_after_block!(
1528            th.net().metrics.rx_packets_count,
1529            2,
1530            th.event_manager.run_with_timeout(100).unwrap()
1531        );
1532
1533        // Check that the frames weren't deferred.
1534        assert!(th.net().rx_buffer.used_bytes == 0);
1535        // Check that the used queue has advanced.
1536        assert_eq!(th.rxq.used.idx.get(), 2);
1537        assert!(
1538            th.net()
1539                .interrupt_trigger()
1540                .has_pending_interrupt(VirtioInterruptType::Queue(RX_INDEX as u16))
1541        );
1542        // Check that the 1st frame was written successfully to the 1st Rx descriptor chain.
1543        header_set_num_buffers(frame_1.as_mut_slice(), 1);
1544        th.rxq
1545            .check_used_elem(0, 0, frame_1.len().try_into().unwrap());
1546        th.rxq.dtable[0].check_data(&frame_1);
1547        th.rxq.dtable[1].check_data(&[0; 500]);
1548        th.rxq.dtable[2].check_data(&[0; MAX_BUFFER_SIZE - 1000]);
1549        // Check that the 2nd frame was written successfully to the 2nd Rx descriptor chain.
1550        header_set_num_buffers(frame_2.as_mut_slice(), 1);
1551        th.rxq
1552            .check_used_elem(1, 3, frame_2.len().try_into().unwrap());
1553        th.rxq.dtable[3].check_data(&frame_2);
1554        th.rxq.dtable[4].check_data(&[0; 500]);
1555        th.rxq.dtable[5].check_data(&[0; MAX_BUFFER_SIZE - 1000]);
1556    }
1557
1558    #[test]
1559    fn test_rx_multiple_frames() {
1560        let mem = single_region_mem(3 * MAX_BUFFER_SIZE);
1561        let th = TestHelper::get_default(&mem);
1562        rx_multiple_frames(th);
1563    }
1564
1565    #[test]
1566    fn test_rx_multiple_frames_mrg() {
1567        let mem = single_region_mem(3 * MAX_BUFFER_SIZE);
1568        let mut th = TestHelper::get_default(&mem);
1569        // VIRTIO_NET_F_MRG_RXBUF is not enabled by default
1570        th.net().acked_features = 1 << VIRTIO_NET_F_MRG_RXBUF;
1571        rx_multiple_frames(th);
1572    }
1573
1574    fn rx_mrg_rxbuf_only(mut th: TestHelper) {
1575        th.activate_net();
1576
1577        // Create 2 valid Rx avail descriptor chains. The total size should
1578        // be at least 64K to pass the capacity check for rx_buffers.
1579        // First chain is intentionally small, so non VIRTIO_NET_F_MRG_RXBUF
1580        // version will skip it.
1581        th.add_desc_chain(NetQueue::Rx, 0, &[(0, 500, VIRTQ_DESC_F_WRITE)]);
1582        th.add_desc_chain(
1583            NetQueue::Rx,
1584            1000,
1585            &[(1, MAX_BUFFER_SIZE as u32, VIRTQ_DESC_F_WRITE)],
1586        );
1587        // Inject frame to tap and run epoll.
1588        let mut frame = inject_tap_tx_frame(&th.net(), 1000);
1589        check_metric_after_block!(
1590            th.net().metrics.rx_packets_count,
1591            1,
1592            th.event_manager.run_with_timeout(100).unwrap()
1593        );
1594
1595        // Check that the frame wasn't deferred.
1596        assert!(th.net().rx_buffer.used_bytes == 0);
1597        // Check that the used queue has advanced.
1598        assert_eq!(th.rxq.used.idx.get(), 2);
1599        assert!(
1600            th.net()
1601                .interrupt_trigger()
1602                .has_pending_interrupt(VirtioInterruptType::Queue(RX_INDEX as u16))
1603        );
1604        // 2 chains should be used for the packet.
1605        header_set_num_buffers(frame.as_mut_slice(), 2);
1606
1607        // Here non VIRTIO_NET_F_MRG_RXBUF version should panic as
1608        // first descriptor will be discarded by it.
1609        th.rxq.check_used_elem(0, 0, 500);
1610
1611        th.rxq.check_used_elem(1, 1, 500);
1612        th.rxq.dtable[0].check_data(&frame[0..500]);
1613        th.rxq.dtable[1].check_data(&frame[500..]);
1614    }
1615
1616    #[test]
1617    #[should_panic]
1618    fn test_rx_mrg_rxbuf_only() {
1619        let mem = single_region_mem(3 * MAX_BUFFER_SIZE);
1620        let th = TestHelper::get_default(&mem);
1621        rx_mrg_rxbuf_only(th);
1622    }
1623
1624    #[test]
1625    fn test_rx_mrg_rxbuf_only_mrg() {
1626        let mem = single_region_mem(3 * MAX_BUFFER_SIZE);
1627        let mut th = TestHelper::get_default(&mem);
1628        // VIRTIO_NET_F_MRG_RXBUF is not enabled by default
1629        th.net().acked_features = 1 << VIRTIO_NET_F_MRG_RXBUF;
1630        rx_mrg_rxbuf_only(th);
1631    }
1632
1633    #[test]
1634    fn test_tx_missing_queue_signal() {
1635        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
1636        let mut th = TestHelper::get_default(&mem);
1637        th.activate_net();
1638        let tap_traffic_simulator = TapTrafficSimulator::new(if_index(&th.net().tap));
1639
1640        th.add_desc_chain(NetQueue::Tx, 0, &[(0, 4096, 0)]);
1641        th.net().queue_evts[TX_INDEX].read().unwrap();
1642        check_metric_after_block!(
1643            th.net().metrics.event_fails,
1644            1,
1645            th.simulate_event(NetEvent::TxQueue)
1646        );
1647
1648        // Check that the used queue didn't advance.
1649        assert_eq!(th.txq.used.idx.get(), 0);
1650        // Check that the frame wasn't sent to the tap.
1651        assert!(!tap_traffic_simulator.pop_rx_packet(&mut [0; 1000]));
1652    }
1653
1654    #[test]
1655    fn test_tx_writeable_descriptor() {
1656        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
1657        let mut th = TestHelper::get_default(&mem);
1658        th.activate_net();
1659        let tap_traffic_simulator = TapTrafficSimulator::new(if_index(&th.net().tap));
1660
1661        let desc_list = [(0, 100, 0), (1, 100, VIRTQ_DESC_F_WRITE), (2, 500, 0)];
1662        th.add_desc_chain(NetQueue::Tx, 0, &desc_list);
1663        th.write_tx_frame(&desc_list, 700);
1664        th.event_manager.run_with_timeout(100).unwrap();
1665
1666        // Check that the used queue advanced.
1667        assert_eq!(th.txq.used.idx.get(), 1);
1668        assert!(
1669            th.net()
1670                .interrupt_trigger()
1671                .has_pending_interrupt(VirtioInterruptType::Queue(TX_INDEX as u16))
1672        );
1673        th.txq.check_used_elem(0, 0, 0);
1674        // Check that the frame was skipped.
1675        assert!(!tap_traffic_simulator.pop_rx_packet(&mut []));
1676    }
1677
1678    #[test]
1679    fn test_tx_short_frame() {
1680        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
1681        let mut th = TestHelper::get_default(&mem);
1682        th.activate_net();
1683        let tap_traffic_simulator = TapTrafficSimulator::new(if_index(&th.net().tap));
1684
1685        // Send an invalid frame (too small, VNET header missing).
1686        th.add_desc_chain(NetQueue::Tx, 0, &[(0, 1, 0)]);
1687        check_metric_after_block!(
1688            th.net().metrics.tx_malformed_frames,
1689            1,
1690            th.event_manager.run_with_timeout(100)
1691        );
1692
1693        // Check that the used queue advanced.
1694        assert_eq!(th.txq.used.idx.get(), 1);
1695        assert!(
1696            th.net()
1697                .interrupt_trigger()
1698                .has_pending_interrupt(VirtioInterruptType::Queue(TX_INDEX as u16))
1699        );
1700        th.txq.check_used_elem(0, 0, 0);
1701        // Check that the frame was skipped.
1702        assert!(!tap_traffic_simulator.pop_rx_packet(&mut []));
1703    }
1704
1705    #[test]
1706    fn test_tx_big_frame() {
1707        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
1708        let mut th = TestHelper::get_default(&mem);
1709        th.activate_net();
1710        let tap_traffic_simulator = TapTrafficSimulator::new(if_index(&th.net().tap));
1711
1712        // Send an invalid frame (too big, maximum buffer is MAX_BUFFER_SIZE).
1713        th.add_desc_chain(
1714            NetQueue::Tx,
1715            0,
1716            &[(0, (MAX_BUFFER_SIZE + 1).try_into().unwrap(), 0)],
1717        );
1718        check_metric_after_block!(
1719            th.net().metrics.tx_malformed_frames,
1720            1,
1721            th.event_manager.run_with_timeout(100)
1722        );
1723
1724        // Check that the used queue advanced.
1725        assert_eq!(th.txq.used.idx.get(), 1);
1726        assert!(
1727            th.net()
1728                .interrupt_trigger()
1729                .has_pending_interrupt(VirtioInterruptType::Queue(TX_INDEX as u16))
1730        );
1731        th.txq.check_used_elem(0, 0, 0);
1732        // Check that the frame was skipped.
1733        assert!(!tap_traffic_simulator.pop_rx_packet(&mut []));
1734    }
1735
1736    #[test]
1737    fn test_tx_empty_frame() {
1738        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
1739        let mut th = TestHelper::get_default(&mem);
1740        th.activate_net();
1741        let tap_traffic_simulator = TapTrafficSimulator::new(if_index(&th.net().tap));
1742
1743        // Send an invalid frame (too small, VNET header missing).
1744        th.add_desc_chain(NetQueue::Tx, 0, &[(0, 0, 0)]);
1745        check_metric_after_block!(
1746            th.net().metrics.tx_malformed_frames,
1747            1,
1748            th.event_manager.run_with_timeout(100)
1749        );
1750
1751        // Check that the used queue advanced.
1752        assert_eq!(th.txq.used.idx.get(), 1);
1753        assert!(
1754            th.net()
1755                .interrupt_trigger()
1756                .has_pending_interrupt(VirtioInterruptType::Queue(TX_INDEX as u16))
1757        );
1758        th.txq.check_used_elem(0, 0, 0);
1759        // Check that the frame was skipped.
1760        assert!(!tap_traffic_simulator.pop_rx_packet(&mut []));
1761    }
1762
1763    #[test]
1764    fn test_tx_retry() {
1765        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
1766        let mut th = TestHelper::get_default(&mem);
1767        th.activate_net();
1768        let tap_traffic_simulator = TapTrafficSimulator::new(if_index(&th.net().tap));
1769
1770        // Add invalid descriptor chain - writeable descriptor.
1771        th.add_desc_chain(
1772            NetQueue::Tx,
1773            0,
1774            &[(0, 100, 0), (1, 100, VIRTQ_DESC_F_WRITE), (2, 500, 0)],
1775        );
1776        // Add invalid descriptor chain - invalid memory.
1777        th.add_desc_chain(NetQueue::Tx, th.mem.last_addr().raw_value(), &[(3, 100, 0)]);
1778        // Add invalid descriptor chain - too short.
1779        th.add_desc_chain(NetQueue::Tx, 700, &[(0, 1, 0)]);
1780
1781        // Add valid descriptor chain
1782        let desc_list = [(4, 1000, 0)];
1783        th.add_desc_chain(NetQueue::Tx, 0, &desc_list);
1784        let frame = th.write_tx_frame(&desc_list, 1000);
1785
1786        // One frame is valid, one will not be handled because it includes write-only memory
1787        // so that leaves us with 2 malformed (no vnet header) frames.
1788        check_metric_after_block!(
1789            th.net().metrics.tx_malformed_frames,
1790            2,
1791            th.event_manager.run_with_timeout(100)
1792        );
1793
1794        // Check that the used queue advanced.
1795        assert_eq!(th.txq.used.idx.get(), 4);
1796        assert!(
1797            th.net()
1798                .interrupt_trigger()
1799                .has_pending_interrupt(VirtioInterruptType::Queue(TX_INDEX as u16))
1800        );
1801        th.txq.check_used_elem(3, 4, 0);
1802        // Check that the valid frame was sent to the tap.
1803        let mut buf = vec![0; 1000];
1804        assert!(tap_traffic_simulator.pop_rx_packet(&mut buf[vnet_hdr_len()..]));
1805        assert_eq!(&buf, &frame);
1806        // Check that no other frame was sent to the tap.
1807        assert!(!tap_traffic_simulator.pop_rx_packet(&mut []));
1808    }
1809
1810    #[test]
1811    fn test_tx_complex_descriptor() {
1812        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
1813        let mut th = TestHelper::get_default(&mem);
1814        th.activate_net();
1815        let tap_traffic_simulator = TapTrafficSimulator::new(if_index(&th.net().tap));
1816
1817        // Add gaps between the descriptor ids in order to ensure that we follow
1818        // the `next` field.
1819        let desc_list = [(3, 100, 0), (5, 50, 0), (11, 850, 0)];
1820        th.add_desc_chain(NetQueue::Tx, 0, &desc_list);
1821        let frame = th.write_tx_frame(&desc_list, 1000);
1822
1823        check_metric_after_block!(
1824            th.net().metrics.tx_packets_count,
1825            1,
1826            th.event_manager.run_with_timeout(100).unwrap()
1827        );
1828
1829        // Check that the used queue advanced.
1830        assert_eq!(th.txq.used.idx.get(), 1);
1831        assert!(
1832            th.net()
1833                .interrupt_trigger()
1834                .has_pending_interrupt(VirtioInterruptType::Queue(TX_INDEX as u16))
1835        );
1836        th.txq.check_used_elem(0, 3, 0);
1837        // Check that the frame was sent to the tap.
1838        let mut buf = vec![0; 1000];
1839        assert!(tap_traffic_simulator.pop_rx_packet(&mut buf[vnet_hdr_len()..]));
1840        assert_eq!(&buf[..1000], &frame[..1000]);
1841    }
1842
1843    #[test]
1844    fn test_tx_tap_failure() {
1845        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
1846        let mut th = TestHelper::get_default(&mem);
1847        th.activate_net();
1848        // force the next write to the tap to return an error by simply closing the fd
1849        // SAFETY: its a valid fd
1850        unsafe { libc::close(th.net.lock().unwrap().tap.as_raw_fd()) };
1851
1852        let desc_list = [(0, 1000, 0)];
1853        th.add_desc_chain(NetQueue::Tx, 0, &desc_list);
1854        let _ = th.write_tx_frame(&desc_list, 1000);
1855
1856        check_metric_after_block!(
1857            th.net().metrics.tap_write_fails,
1858            1,
1859            th.event_manager.run_with_timeout(100).unwrap()
1860        );
1861
1862        // Check that the used queue advanced.
1863        assert_eq!(th.txq.used.idx.get(), 1);
1864        assert!(
1865            th.net()
1866                .interrupt_trigger()
1867                .has_pending_interrupt(VirtioInterruptType::Queue(TX_INDEX as u16))
1868        );
1869        th.txq.check_used_elem(0, 0, 0);
1870
1871        // dropping th would double close the tap fd, so leak it
1872        std::mem::forget(th);
1873    }
1874
1875    #[test]
1876    fn test_tx_multiple_frame() {
1877        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
1878        let mut th = TestHelper::get_default(&mem);
1879        th.activate_net();
1880        let tap_traffic_simulator = TapTrafficSimulator::new(if_index(&th.net().tap));
1881
1882        // Write the first frame to the Tx queue
1883        let desc_list = [(0, 50, 0), (1, 100, 0), (2, 150, 0)];
1884        th.add_desc_chain(NetQueue::Tx, 0, &desc_list);
1885        let frame_1 = th.write_tx_frame(&desc_list, 300);
1886        // Write the second frame to the Tx queue
1887        let desc_list = [(3, 100, 0), (4, 200, 0), (5, 300, 0)];
1888        th.add_desc_chain(NetQueue::Tx, 500, &desc_list);
1889        let frame_2 = th.write_tx_frame(&desc_list, 600);
1890
1891        check_metric_after_block!(
1892            th.net().metrics.tx_packets_count,
1893            2,
1894            th.event_manager.run_with_timeout(100).unwrap()
1895        );
1896
1897        // Check that the used queue advanced.
1898        assert_eq!(th.txq.used.idx.get(), 2);
1899        assert!(
1900            th.net()
1901                .interrupt_trigger()
1902                .has_pending_interrupt(VirtioInterruptType::Queue(TX_INDEX as u16))
1903        );
1904        th.txq.check_used_elem(0, 0, 0);
1905        th.txq.check_used_elem(1, 3, 0);
1906        // Check that the first frame was sent to the tap.
1907        let mut buf = vec![0; 300];
1908        assert!(tap_traffic_simulator.pop_rx_packet(&mut buf[vnet_hdr_len()..]));
1909        assert_eq!(&buf[..300], &frame_1[..300]);
1910        // Check that the second frame was sent to the tap.
1911        let mut buf = vec![0; 600];
1912        assert!(tap_traffic_simulator.pop_rx_packet(&mut buf[vnet_hdr_len()..]));
1913        assert_eq!(&buf[..600], &frame_2[..600]);
1914    }
1915
1916    fn create_arp_request(
1917        src_mac: MacAddr,
1918        src_ip: Ipv4Addr,
1919        dst_mac: MacAddr,
1920        dst_ip: Ipv4Addr,
1921    ) -> ([u8; MAX_BUFFER_SIZE], usize) {
1922        let mut frame_buf = [b'\0'; MAX_BUFFER_SIZE];
1923
1924        // Create an ethernet frame.
1925        let incomplete_frame = EthernetFrame::write_incomplete(
1926            frame_bytes_from_buf_mut(&mut frame_buf).unwrap(),
1927            dst_mac,
1928            src_mac,
1929            ETHERTYPE_ARP,
1930        )
1931        .ok()
1932        .unwrap();
1933        // Set its length to hold an ARP request.
1934        let mut frame = incomplete_frame.with_payload_len_unchecked(ETH_IPV4_FRAME_LEN);
1935
1936        // Save the total frame length.
1937        let frame_len = vnet_hdr_len() + frame.payload_offset() + ETH_IPV4_FRAME_LEN;
1938
1939        // Create the ARP request.
1940        let arp_request =
1941            EthIPv4ArpFrame::write_request(frame.payload_mut(), src_mac, src_ip, dst_mac, dst_ip);
1942        // Validate success.
1943        arp_request.unwrap();
1944
1945        (frame_buf, frame_len)
1946    }
1947
1948    #[test]
1949    fn test_mmds_detour_and_injection() {
1950        let mut net = default_net();
1951
1952        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
1953        let rxq = VirtQueue::new(GuestAddress(0), &mem, 16);
1954        net.queues[RX_INDEX] = rxq.create_queue();
1955
1956        // Inject a fake buffer in the devices buffers, otherwise we won't be able to receive the
1957        // MMDS frame. One iovec will be just fine.
1958        let mut fake_buffer = vec![0u8; MAX_BUFFER_SIZE];
1959        let iov_buffer = IoVecBufferMut::from(fake_buffer.as_mut_slice());
1960        net.rx_buffer.iovec = iov_buffer;
1961        net.rx_buffer
1962            .parsed_descriptors
1963            .push_back(ParsedDescriptorChain {
1964                head_index: 1,
1965                length: 1024,
1966                nr_iovecs: 1,
1967            });
1968
1969        let src_mac = MacAddr::from_str("11:11:11:11:11:11").unwrap();
1970        let src_ip = Ipv4Addr::new(10, 1, 2, 3);
1971        let dst_mac = MacAddr::from_str("22:22:22:22:22:22").unwrap();
1972        let dst_ip = Ipv4Addr::new(169, 254, 169, 254);
1973
1974        let (frame_buf, frame_len) = create_arp_request(src_mac, src_ip, dst_mac, dst_ip);
1975        let buffer = IoVecBuffer::from(&frame_buf[..frame_len]);
1976
1977        let mut headers = vec![0; frame_hdr_len()];
1978        buffer.read_exact_volatile_at(&mut headers, 0).unwrap();
1979
1980        // Call the code which sends the packet to the host or MMDS.
1981        // Validate the frame was consumed by MMDS and that the metrics reflect that.
1982        check_metric_after_block!(
1983            &METRICS.mmds.rx_accepted,
1984            1,
1985            assert!(
1986                Net::write_to_mmds_or_tap(
1987                    net.mmds_ns.as_mut(),
1988                    &mut net.tx_rate_limiter,
1989                    &mut headers,
1990                    &buffer,
1991                    &mut net.tap,
1992                    Some(src_mac),
1993                    &net.metrics,
1994                )
1995                .unwrap()
1996            )
1997        );
1998
1999        // Validate that MMDS has a response and we can retrieve it.
2000        check_metric_after_block!(
2001            &METRICS.mmds.tx_frames,
2002            1,
2003            net.read_from_mmds_or_tap().unwrap()
2004        );
2005    }
2006
2007    #[test]
2008    fn test_mac_spoofing_detection() {
2009        let mut net = default_net();
2010
2011        let guest_mac = MacAddr::from_str("11:11:11:11:11:11").unwrap();
2012        let not_guest_mac = MacAddr::from_str("33:33:33:33:33:33").unwrap();
2013        let guest_ip = Ipv4Addr::new(10, 1, 2, 3);
2014        let dst_mac = MacAddr::from_str("22:22:22:22:22:22").unwrap();
2015        let dst_ip = Ipv4Addr::new(10, 1, 1, 1);
2016
2017        let (frame_buf, frame_len) = create_arp_request(guest_mac, guest_ip, dst_mac, dst_ip);
2018        let buffer = IoVecBuffer::from(&frame_buf[..frame_len]);
2019        let mut headers = vec![0; frame_hdr_len()];
2020
2021        // Check that a legit MAC doesn't affect the spoofed MAC metric.
2022        check_metric_after_block!(
2023            net.metrics.tx_spoofed_mac_count,
2024            0,
2025            Net::write_to_mmds_or_tap(
2026                net.mmds_ns.as_mut(),
2027                &mut net.tx_rate_limiter,
2028                &mut headers,
2029                &buffer,
2030                &mut net.tap,
2031                Some(guest_mac),
2032                &net.metrics,
2033            )
2034        );
2035
2036        // Check that a spoofed MAC increases our spoofed MAC metric.
2037        check_metric_after_block!(
2038            net.metrics.tx_spoofed_mac_count,
2039            1,
2040            Net::write_to_mmds_or_tap(
2041                net.mmds_ns.as_mut(),
2042                &mut net.tx_rate_limiter,
2043                &mut headers,
2044                &buffer,
2045                &mut net.tap,
2046                Some(not_guest_mac),
2047                &net.metrics,
2048            )
2049        );
2050    }
2051
2052    #[test]
2053    fn test_process_error_cases() {
2054        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
2055        let mut th = TestHelper::get_default(&mem);
2056        th.activate_net();
2057
2058        // RX rate limiter events should error since the limiter is not blocked.
2059        // Validate that the event failed and failure was properly accounted for.
2060        check_metric_after_block!(
2061            th.net().metrics.event_fails,
2062            1,
2063            th.simulate_event(NetEvent::RxRateLimiter)
2064        );
2065
2066        // TX rate limiter events should error since the limiter is not blocked.
2067        // Validate that the event failed and failure was properly accounted for.
2068        check_metric_after_block!(
2069            th.net().metrics.event_fails,
2070            1,
2071            th.simulate_event(NetEvent::TxRateLimiter)
2072        );
2073    }
2074
2075    // Cannot easily test failures for:
2076    //  * queue_evt.read (rx and tx)
2077    //  * interrupt_evt.write
2078    #[test]
2079    fn test_read_tap_fail_event_handler() {
2080        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
2081        let mut th = TestHelper::get_default(&mem);
2082        th.activate_net();
2083        // force the next write to the tap to return an error by simply closing the fd
2084        // SAFETY: its a valid fd
2085        unsafe { libc::close(th.net.lock().unwrap().tap.as_raw_fd()) };
2086
2087        // The RX queue is empty and there is a deferred frame.
2088        th.net().rx_buffer.used_descriptors = 1;
2089        th.net().rx_buffer.used_bytes = 100;
2090        check_metric_after_block!(
2091            th.net().metrics.no_rx_avail_buffer,
2092            1,
2093            th.simulate_event(NetEvent::Tap)
2094        );
2095
2096        // We need to set this here to false, otherwise the device will try to
2097        // handle a deferred frame, it will fail and will never try to read from
2098        // the tap.
2099        th.net().rx_buffer.used_descriptors = 0;
2100        th.net().rx_buffer.used_bytes = 0;
2101
2102        th.add_desc_chain(
2103            NetQueue::Rx,
2104            0,
2105            &[(0, MAX_BUFFER_SIZE as u32, VIRTQ_DESC_F_WRITE)],
2106        );
2107        check_metric_after_block!(
2108            th.net().metrics.tap_read_fails,
2109            1,
2110            th.simulate_event(NetEvent::Tap)
2111        );
2112
2113        // dropping th would double close the tap fd, so leak it
2114        std::mem::forget(th);
2115    }
2116
2117    #[test]
2118    fn test_rx_rate_limiter_handling() {
2119        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
2120        let mut th = TestHelper::get_default(&mem);
2121        th.activate_net();
2122
2123        th.net().rx_rate_limiter = RateLimiter::new(0, 0, 0, 0, 0, 0).unwrap();
2124        // There is no actual event on the rate limiter's timerfd.
2125        check_metric_after_block!(
2126            th.net().metrics.event_fails,
2127            1,
2128            th.simulate_event(NetEvent::RxRateLimiter)
2129        );
2130    }
2131
2132    #[test]
2133    fn test_tx_rate_limiter_handling() {
2134        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
2135        let mut th = TestHelper::get_default(&mem);
2136        th.activate_net();
2137
2138        th.net().tx_rate_limiter = RateLimiter::new(0, 0, 0, 0, 0, 0).unwrap();
2139        th.simulate_event(NetEvent::TxRateLimiter);
2140        // There is no actual event on the rate limiter's timerfd.
2141        check_metric_after_block!(
2142            th.net().metrics.event_fails,
2143            1,
2144            th.simulate_event(NetEvent::TxRateLimiter)
2145        );
2146    }
2147
2148    #[test]
2149    fn test_bandwidth_rate_limiter() {
2150        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
2151        let mut th = TestHelper::get_default(&mem);
2152        th.activate_net();
2153
2154        // Test TX bandwidth rate limiting
2155        {
2156            // create bandwidth rate limiter that allows 40960 bytes/s with bucket size 4096 bytes
2157            let mut rl = RateLimiter::new(0x1000, 0, 100, 0, 0, 0).unwrap();
2158            // use up the budget
2159            assert!(rl.consume(0x1000, TokenType::Bytes));
2160
2161            // set this tx rate limiter to be used
2162            th.net().tx_rate_limiter = rl;
2163
2164            // try doing TX
2165            // following TX procedure should fail because of bandwidth rate limiting
2166            {
2167                // trigger the TX handler
2168                th.add_desc_chain(NetQueue::Tx, 0, &[(0, 4096, 0)]);
2169                th.simulate_event(NetEvent::TxQueue);
2170
2171                // assert that limiter is blocked
2172                assert!(th.net().tx_rate_limiter.is_blocked());
2173                assert_eq!(th.net().metrics.tx_rate_limiter_throttled.count(), 1);
2174                // make sure the data is still queued for processing
2175                assert_eq!(th.txq.used.idx.get(), 0);
2176            }
2177
2178            // A second TX queue event should be throttled too
2179            {
2180                th.add_desc_chain(NetQueue::Tx, 0, &[(1, 1024, 0)]);
2181                // trigger the RX queue event handler
2182                th.simulate_event(NetEvent::TxQueue);
2183
2184                assert_eq!(th.net().metrics.tx_rate_limiter_throttled.count(), 2);
2185            }
2186
2187            // wait for 100ms to give the rate-limiter timer a chance to replenish
2188            // wait for an extra 100ms to make sure the timerfd event makes its way from the kernel
2189            thread::sleep(Duration::from_millis(200));
2190
2191            // following TX procedure should succeed because bandwidth should now be available
2192            {
2193                // tx_count increments 1 from write_to_mmds_or_tap()
2194                check_metric_after_block!(
2195                    th.net().metrics.tx_count,
2196                    1,
2197                    th.simulate_event(NetEvent::TxRateLimiter)
2198                );
2199                // This should be still blocked. We managed to send the first frame, but
2200                // not enough budget for the second
2201                assert!(th.net().tx_rate_limiter.is_blocked());
2202                // make sure the data queue advanced
2203                assert_eq!(th.txq.used.idx.get(), 1);
2204            }
2205
2206            thread::sleep(Duration::from_millis(200));
2207
2208            // following TX procedure should succeed to handle the second frame as well
2209            {
2210                // tx_count increments 1 from write_to_mmds_or_tap()
2211                check_metric_after_block!(
2212                    th.net().metrics.tx_count,
2213                    1,
2214                    th.simulate_event(NetEvent::TxRateLimiter)
2215                );
2216                // validate the rate_limiter is no longer blocked
2217                assert!(!th.net().tx_rate_limiter.is_blocked());
2218                // make sure the data queue advance one more place
2219                assert_eq!(th.txq.used.idx.get(), 2);
2220            }
2221        }
2222
2223        // Test RX bandwidth rate limiting
2224        {
2225            // create bandwidth rate limiter that allows 2000 bytes/s with bucket size 1000 bytes
2226            let mut rl = RateLimiter::new(1000, 0, 1000, 0, 0, 0).unwrap();
2227
2228            // set up RX
2229            assert!(th.net().rx_buffer.used_descriptors == 0);
2230            th.add_desc_chain(
2231                NetQueue::Rx,
2232                0,
2233                &[(0, MAX_BUFFER_SIZE as u32, VIRTQ_DESC_F_WRITE)],
2234            );
2235
2236            let mut frame = inject_tap_tx_frame(&th.net(), 1000);
2237
2238            // use up the budget (do it after injecting the tx frame, as socket communication is
2239            // slow enough that the ratelimiter could replenish in the meantime).
2240            assert!(rl.consume(1000, TokenType::Bytes));
2241
2242            // set this rx rate limiter to be used
2243            th.net().rx_rate_limiter = rl;
2244
2245            // following RX procedure should fail because of bandwidth rate limiting
2246            {
2247                // trigger the RX handler
2248                th.simulate_event(NetEvent::Tap);
2249
2250                // assert that limiter is blocked
2251                assert!(th.net().rx_rate_limiter.is_blocked());
2252                assert_eq!(th.net().metrics.rx_rate_limiter_throttled.count(), 1);
2253                assert!(th.net().rx_buffer.used_descriptors != 0);
2254                // assert that no operation actually completed (limiter blocked it)
2255                assert!(
2256                    th.net()
2257                        .interrupt_trigger()
2258                        .has_pending_interrupt(VirtioInterruptType::Queue(RX_INDEX as u16))
2259                );
2260                // make sure the data is still queued for processing
2261                assert_eq!(th.rxq.used.idx.get(), 0);
2262            }
2263
2264            // An RX queue event should be throttled too
2265            {
2266                // trigger the RX queue event handler
2267                th.simulate_event(NetEvent::RxQueue);
2268
2269                assert_eq!(th.net().metrics.rx_rate_limiter_throttled.count(), 2);
2270            }
2271
2272            // wait for 1000ms to give the rate-limiter timer a chance to replenish
2273            // wait for an extra 1000ms to make sure the timerfd event makes its way from the kernel
2274            thread::sleep(Duration::from_millis(2000));
2275
2276            // following RX procedure should succeed because bandwidth should now be available
2277            {
2278                // no longer throttled
2279                check_metric_after_block!(
2280                    th.net().metrics.rx_rate_limiter_throttled,
2281                    0,
2282                    th.simulate_event(NetEvent::RxRateLimiter)
2283                );
2284                // validate the rate_limiter is no longer blocked
2285                assert!(!th.net().rx_rate_limiter.is_blocked());
2286                // make sure the virtio queue operation completed this time
2287                assert!(
2288                    th.net()
2289                        .interrupt_trigger()
2290                        .has_pending_interrupt(VirtioInterruptType::Queue(RX_INDEX as u16))
2291                );
2292                // make sure the data queue advanced
2293                assert_eq!(th.rxq.used.idx.get(), 1);
2294                th.rxq
2295                    .check_used_elem(0, 0, frame.len().try_into().unwrap());
2296                header_set_num_buffers(frame.as_mut_slice(), 1);
2297                th.rxq.dtable[0].check_data(&frame);
2298            }
2299        }
2300    }
2301
2302    #[test]
2303    fn test_ops_rate_limiter() {
2304        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
2305        let mut th = TestHelper::get_default(&mem);
2306        th.activate_net();
2307
2308        // Test TX ops rate limiting
2309        {
2310            // create ops rate limiter that allows 10 ops/s with bucket size 1 ops
2311            let mut rl = RateLimiter::new(0, 0, 0, 1, 0, 100).unwrap();
2312            // use up the budget
2313            assert!(rl.consume(1, TokenType::Ops));
2314
2315            // set this tx rate limiter to be used
2316            th.net().tx_rate_limiter = rl;
2317
2318            // try doing TX
2319            // following TX procedure should fail because of ops rate limiting
2320            {
2321                // trigger the TX handler
2322                th.add_desc_chain(NetQueue::Tx, 0, &[(0, 4096, 0)]);
2323                check_metric_after_block!(
2324                    th.net().metrics.tx_rate_limiter_throttled,
2325                    1,
2326                    th.simulate_event(NetEvent::TxQueue)
2327                );
2328
2329                // assert that limiter is blocked
2330                assert!(th.net().tx_rate_limiter.is_blocked());
2331                // make sure the data is still queued for processing
2332                assert_eq!(th.txq.used.idx.get(), 0);
2333            }
2334
2335            // wait for 100ms to give the rate-limiter timer a chance to replenish
2336            // wait for an extra 100ms to make sure the timerfd event makes its way from the kernel
2337            thread::sleep(Duration::from_millis(200));
2338
2339            // following TX procedure should succeed because ops should now be available
2340            {
2341                // no longer throttled
2342                check_metric_after_block!(
2343                    th.net().metrics.tx_rate_limiter_throttled,
2344                    0,
2345                    th.simulate_event(NetEvent::TxRateLimiter)
2346                );
2347                // validate the rate_limiter is no longer blocked
2348                assert!(!th.net().tx_rate_limiter.is_blocked());
2349                // make sure the data queue advanced
2350                assert_eq!(th.txq.used.idx.get(), 1);
2351            }
2352        }
2353
2354        // Test RX ops rate limiting
2355        {
2356            // create ops rate limiter that allows 2 ops/s with bucket size 1 ops
2357            let mut rl = RateLimiter::new(0, 0, 0, 1, 0, 1000).unwrap();
2358
2359            // set up RX
2360            assert!(th.net().rx_buffer.used_descriptors == 0);
2361            th.add_desc_chain(
2362                NetQueue::Rx,
2363                0,
2364                &[(0, MAX_BUFFER_SIZE as u32, VIRTQ_DESC_F_WRITE)],
2365            );
2366            let mut frame = inject_tap_tx_frame(&th.net(), 1234);
2367
2368            // use up the initial budget
2369            assert!(rl.consume(1, TokenType::Ops));
2370
2371            // set this rx rate limiter to be used
2372            th.net().rx_rate_limiter = rl;
2373
2374            // following RX procedure should fail because of ops rate limiting
2375            {
2376                // trigger the RX handler
2377                check_metric_after_block!(
2378                    th.net().metrics.rx_rate_limiter_throttled,
2379                    1,
2380                    th.simulate_event(NetEvent::Tap)
2381                );
2382
2383                // assert that limiter is blocked
2384                assert!(th.net().rx_rate_limiter.is_blocked());
2385                assert!(th.net().metrics.rx_rate_limiter_throttled.count() >= 1);
2386                assert!(th.net().rx_buffer.used_descriptors != 0);
2387                // assert that no operation actually completed (limiter blocked it)
2388                assert!(
2389                    th.net()
2390                        .interrupt_trigger()
2391                        .has_pending_interrupt(VirtioInterruptType::Queue(RX_INDEX as u16))
2392                );
2393                // make sure the data is still queued for processing
2394                assert_eq!(th.rxq.used.idx.get(), 0);
2395
2396                // trigger the RX handler again, this time it should do the limiter fast path exit
2397                th.simulate_event(NetEvent::Tap);
2398                // assert that no operation actually completed, that the limiter blocked it
2399                assert!(
2400                    !th.net()
2401                        .interrupt_trigger()
2402                        .has_pending_interrupt(VirtioInterruptType::Queue(RX_INDEX as u16))
2403                );
2404                // make sure the data is still queued for processing
2405                assert_eq!(th.rxq.used.idx.get(), 0);
2406            }
2407
2408            // wait for 1000ms to give the rate-limiter timer a chance to replenish
2409            // wait for an extra 1000ms to make sure the timerfd event makes its way from the kernel
2410            thread::sleep(Duration::from_millis(2000));
2411
2412            // following RX procedure should succeed because ops should now be available
2413            {
2414                th.simulate_event(NetEvent::RxRateLimiter);
2415                // make sure the virtio queue operation completed this time
2416                assert!(
2417                    th.net()
2418                        .interrupt_trigger()
2419                        .has_pending_interrupt(VirtioInterruptType::Queue(RX_INDEX as u16))
2420                );
2421                // make sure the data queue advanced
2422                assert_eq!(th.rxq.used.idx.get(), 1);
2423                th.rxq
2424                    .check_used_elem(0, 0, frame.len().try_into().unwrap());
2425                header_set_num_buffers(frame.as_mut_slice(), 1);
2426                th.rxq.dtable[0].check_data(&frame);
2427            }
2428        }
2429    }
2430
2431    #[test]
2432    fn test_patch_rate_limiters() {
2433        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
2434        let mut th = TestHelper::get_default(&mem);
2435        th.activate_net();
2436
2437        th.net().rx_rate_limiter = RateLimiter::new(10, 0, 10, 2, 0, 2).unwrap();
2438        th.net().tx_rate_limiter = RateLimiter::new(10, 0, 10, 2, 0, 2).unwrap();
2439
2440        let rx_bytes = TokenBucket::new(1000, 1001, 1002).unwrap();
2441        let rx_ops = TokenBucket::new(1003, 1004, 1005).unwrap();
2442        let tx_bytes = TokenBucket::new(1006, 1007, 1008).unwrap();
2443        let tx_ops = TokenBucket::new(1009, 1010, 1011).unwrap();
2444
2445        th.net().patch_rate_limiters(
2446            BucketUpdate::Update(rx_bytes.clone()),
2447            BucketUpdate::Update(rx_ops.clone()),
2448            BucketUpdate::Update(tx_bytes.clone()),
2449            BucketUpdate::Update(tx_ops.clone()),
2450        );
2451        let compare_buckets = |a: &TokenBucket, b: &TokenBucket| {
2452            assert_eq!(a.capacity(), b.capacity());
2453            assert_eq!(a.one_time_burst(), b.one_time_burst());
2454            assert_eq!(a.refill_time_ms(), b.refill_time_ms());
2455        };
2456        compare_buckets(th.net().rx_rate_limiter.bandwidth().unwrap(), &rx_bytes);
2457        compare_buckets(th.net().rx_rate_limiter.ops().unwrap(), &rx_ops);
2458        compare_buckets(th.net().tx_rate_limiter.bandwidth().unwrap(), &tx_bytes);
2459        compare_buckets(th.net().tx_rate_limiter.ops().unwrap(), &tx_ops);
2460
2461        th.net().patch_rate_limiters(
2462            BucketUpdate::Disabled,
2463            BucketUpdate::Disabled,
2464            BucketUpdate::Disabled,
2465            BucketUpdate::Disabled,
2466        );
2467        assert!(th.net().rx_rate_limiter.bandwidth().is_none());
2468        assert!(th.net().rx_rate_limiter.ops().is_none());
2469        assert!(th.net().tx_rate_limiter.bandwidth().is_none());
2470        assert!(th.net().tx_rate_limiter.ops().is_none());
2471    }
2472
2473    #[test]
2474    fn test_virtio_device() {
2475        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
2476        let mut th = TestHelper::get_default(&mem);
2477        th.activate_net();
2478        let net = th.net.lock().unwrap();
2479
2480        // Test queues count (TX and RX).
2481        let queues = net.queues();
2482        assert_eq!(queues.len(), NET_QUEUE_SIZES.len());
2483        assert_eq!(queues[RX_INDEX].size, th.rxq.size());
2484        assert_eq!(queues[TX_INDEX].size, th.txq.size());
2485
2486        // Test corresponding queues events.
2487        assert_eq!(net.queue_events().len(), NET_QUEUE_SIZES.len());
2488
2489        // Test interrupts.
2490        assert!(
2491            !net.interrupt_trigger()
2492                .has_pending_interrupt(VirtioInterruptType::Queue(RX_INDEX as u16))
2493        );
2494        assert!(
2495            !net.interrupt_trigger()
2496                .has_pending_interrupt(VirtioInterruptType::Queue(TX_INDEX as u16))
2497        );
2498    }
2499
2500    #[test]
2501    fn test_queues_notification_suppression() {
2502        let features = 1 << VIRTIO_RING_F_EVENT_IDX;
2503
2504        let mem = single_region_mem(2 * MAX_BUFFER_SIZE);
2505        let mut th = TestHelper::get_default(&mem);
2506        th.net().set_acked_features(features);
2507        th.activate_net();
2508
2509        let net = th.net();
2510        let queues = net.queues();
2511        assert!(queues[RX_INDEX].uses_notif_suppression);
2512        assert!(queues[TX_INDEX].uses_notif_suppression);
2513    }
2514}