vmm/devices/virtio/block/virtio/
metrics.rs

1// Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//! Defines the metrics system for block devices.
5//!
6//! # Metrics format
7//! The metrics are flushed in JSON when requested by vmm::logger::metrics::METRICS.write().
8//!
9//! ## JSON example with metrics:
10//! ```json
11//! {
12//!  "block_drv0": {
13//!     "activate_fails": "SharedIncMetric",
14//!     "cfg_fails": "SharedIncMetric",
15//!     "no_avail_buffer": "SharedIncMetric",
16//!     "event_fails": "SharedIncMetric",
17//!     "execute_fails": "SharedIncMetric",
18//!     ...
19//!  }
20//!  "block_drv1": {
21//!     "activate_fails": "SharedIncMetric",
22//!     "cfg_fails": "SharedIncMetric",
23//!     "no_avail_buffer": "SharedIncMetric",
24//!     "event_fails": "SharedIncMetric",
25//!     "execute_fails": "SharedIncMetric",
26//!     ...
27//!  }
28//!  ...
29//!  "block_drive_id": {
30//!     "activate_fails": "SharedIncMetric",
31//!     "cfg_fails": "SharedIncMetric",
32//!     "no_avail_buffer": "SharedIncMetric",
33//!     "event_fails": "SharedIncMetric",
34//!     "execute_fails": "SharedIncMetric",
35//!     ...
36//!  }
37//!  "block": {
38//!     "activate_fails": "SharedIncMetric",
39//!     "cfg_fails": "SharedIncMetric",
40//!     "no_avail_buffer": "SharedIncMetric",
41//!     "event_fails": "SharedIncMetric",
42//!     "execute_fails": "SharedIncMetric",
43//!     ...
44//!  }
45//! }
46//! ```
47//! Each `block` field in the example above is a serializable `BlockDeviceMetrics` structure
48//! collecting metrics such as `activate_fails`, `cfg_fails`, etc. for the block device.
49//! `block_drv0` represent metrics for the endpoint "/drives/drv0",
50//! `block_drv1` represent metrics for the endpoint "/drives/drv1", and
51//! `block_drive_id` represent metrics for the endpoint "/drives/{drive_id}"
52//! block device respectively and `block` is the aggregate of all the per device metrics.
53//!
54//! # Limitations
55//! block device currently do not have `vmm::logger::metrics::StoreMetrics` so aggregate
56//! doesn't consider them.
57//!
58//! # Design
59//! The main design goals of this system are:
60//! * To improve block device metrics by logging them at per device granularity.
61//! * Continue to provide aggregate block metrics to maintain backward compatibility.
62//! * Move BlockDeviceMetrics out of from logger and decouple it.
63//! * Rely on `serde` to provide the actual serialization for writing the metrics.
64//! * Since all metrics start at 0, we implement the `Default` trait via derive for all of them, to
65//!   avoid having to initialize everything by hand.
66//!
67//! * Devices could be created in any order i.e. the first device created could either be drv0 or
68//!   drv1 so if we use a vector for BlockDeviceMetrics and call 1st device as block0, then block0
69//!   could sometimes point to drv0 and sometimes to drv1 which doesn't help with analysing the
70//!   metrics. So, use Map instead of Vec to help understand which drive the metrics actually
71//!   belongs to.
72//!
73//! The system implements 1 type of metrics:
74//! * Shared Incremental Metrics (SharedIncMetrics) - dedicated for the metrics which need a counter
75//!   (i.e the number of times an API request failed). These metrics are reset upon flush.
76//!
77//! We add BlockDeviceMetrics entries from block::metrics::METRICS into Block device instead of
78//! Block device having individual separate BlockDeviceMetrics entries because Block device is not
79//! accessible from signal handlers to flush metrics and block::metrics::METRICS is.
80
81use std::collections::BTreeMap;
82use std::sync::{Arc, RwLock};
83
84use serde::ser::SerializeMap;
85use serde::{Serialize, Serializer};
86
87use crate::logger::{IncMetric, LatencyAggregateMetrics, SharedIncMetric};
88
89/// map of block drive id and metrics
90/// this should be protected by a lock before accessing.
91#[derive(Debug)]
92pub struct BlockMetricsPerDevice {
93    /// used to access per block device metrics
94    pub metrics: BTreeMap<String, Arc<BlockDeviceMetrics>>,
95}
96
97impl BlockMetricsPerDevice {
98    /// Allocate `BlockDeviceMetrics` for block device having
99    /// id `drive_id`. Also, allocate only if it doesn't
100    /// exist to avoid overwriting previously allocated data.
101    /// lock is always initialized so it is safe the unwrap
102    /// the lock without a check.
103    pub fn alloc(drive_id: String) -> Arc<BlockDeviceMetrics> {
104        Arc::clone(
105            METRICS
106                .write()
107                .unwrap()
108                .metrics
109                .entry(drive_id)
110                .or_insert_with(|| Arc::new(BlockDeviceMetrics::default())),
111        )
112    }
113}
114
115/// Pool of block-related metrics per device behind a lock to
116/// keep things thread safe. Since the lock is initialized here
117/// it is safe to unwrap it without any check.
118static METRICS: RwLock<BlockMetricsPerDevice> = RwLock::new(BlockMetricsPerDevice {
119    metrics: BTreeMap::new(),
120});
121
122/// This function facilitates aggregation and serialization of
123/// per block device metrics.
124pub fn flush_metrics<S: Serializer>(serializer: S) -> Result<S::Ok, S::Error> {
125    let block_metrics = METRICS.read().unwrap();
126    let metrics_len = block_metrics.metrics.len();
127    // +1 to accommodate aggregate block metrics
128    let mut seq = serializer.serialize_map(Some(1 + metrics_len))?;
129
130    let mut block_aggregated: BlockDeviceMetrics = BlockDeviceMetrics::default();
131
132    for (name, metrics) in block_metrics.metrics.iter() {
133        let devn = format!("block_{}", name);
134        // serialization will flush the metrics so aggregate before it.
135        let m: &BlockDeviceMetrics = metrics;
136        block_aggregated.aggregate(m);
137        seq.serialize_entry(&devn, m)?;
138    }
139    seq.serialize_entry("block", &block_aggregated)?;
140    seq.end()
141}
142
143/// Block Device associated metrics.
144#[derive(Debug, Default, Serialize)]
145pub struct BlockDeviceMetrics {
146    /// Number of times when activate failed on a block device.
147    pub activate_fails: SharedIncMetric,
148    /// Number of times when interacting with the space config of a block device failed.
149    pub cfg_fails: SharedIncMetric,
150    /// No available buffer for the block queue.
151    pub no_avail_buffer: SharedIncMetric,
152    /// Number of times when handling events on a block device failed.
153    pub event_fails: SharedIncMetric,
154    /// Number of failures in executing a request on a block device.
155    pub execute_fails: SharedIncMetric,
156    /// Number of invalid requests received for this block device.
157    pub invalid_reqs_count: SharedIncMetric,
158    /// Number of flushes operation triggered on this block device.
159    pub flush_count: SharedIncMetric,
160    /// Number of events triggered on the queue of this block device.
161    pub queue_event_count: SharedIncMetric,
162    /// Number of events ratelimiter-related.
163    pub rate_limiter_event_count: SharedIncMetric,
164    /// Number of update operation triggered on this block device.
165    pub update_count: SharedIncMetric,
166    /// Number of failures while doing update on this block device.
167    pub update_fails: SharedIncMetric,
168    /// Number of bytes read by this block device.
169    pub read_bytes: SharedIncMetric,
170    /// Number of bytes written by this block device.
171    pub write_bytes: SharedIncMetric,
172    /// Number of successful read operations.
173    pub read_count: SharedIncMetric,
174    /// Number of successful write operations.
175    pub write_count: SharedIncMetric,
176    /// Duration of all read operations.
177    pub read_agg: LatencyAggregateMetrics,
178    /// Duration of all write operations.
179    pub write_agg: LatencyAggregateMetrics,
180    /// Number of rate limiter throttling events.
181    pub rate_limiter_throttled_events: SharedIncMetric,
182    /// Number of virtio events throttled because of the IO engine.
183    /// This happens when the io_uring submission queue is full.
184    pub io_engine_throttled_events: SharedIncMetric,
185    /// Number of remaining requests in the queue.
186    pub remaining_reqs_count: SharedIncMetric,
187}
188
189impl BlockDeviceMetrics {
190    /// Const default construction.
191    pub fn new() -> Self {
192        Self {
193            read_agg: LatencyAggregateMetrics::new(),
194            write_agg: LatencyAggregateMetrics::new(),
195            ..Default::default()
196        }
197    }
198
199    /// block metrics are SharedIncMetric where the diff of current vs
200    /// old is serialized i.e. serialize_u64(current-old).
201    /// So to have the aggregate serialized in same way we need to
202    /// fetch the diff of current vs old metrics and add it to the
203    /// aggregate.
204    pub fn aggregate(&mut self, other: &Self) {
205        self.activate_fails.add(other.activate_fails.fetch_diff());
206        self.cfg_fails.add(other.cfg_fails.fetch_diff());
207        self.no_avail_buffer.add(other.no_avail_buffer.fetch_diff());
208        self.event_fails.add(other.event_fails.fetch_diff());
209        self.execute_fails.add(other.execute_fails.fetch_diff());
210        self.invalid_reqs_count
211            .add(other.invalid_reqs_count.fetch_diff());
212        self.flush_count.add(other.flush_count.fetch_diff());
213        self.queue_event_count
214            .add(other.queue_event_count.fetch_diff());
215        self.rate_limiter_event_count
216            .add(other.rate_limiter_event_count.fetch_diff());
217        self.update_count.add(other.update_count.fetch_diff());
218        self.update_fails.add(other.update_fails.fetch_diff());
219        self.read_bytes.add(other.read_bytes.fetch_diff());
220        self.write_bytes.add(other.write_bytes.fetch_diff());
221        self.read_count.add(other.read_count.fetch_diff());
222        self.write_count.add(other.write_count.fetch_diff());
223        self.read_agg.sum_us.add(other.read_agg.sum_us.fetch_diff());
224        self.write_agg
225            .sum_us
226            .add(other.write_agg.sum_us.fetch_diff());
227        self.rate_limiter_throttled_events
228            .add(other.rate_limiter_throttled_events.fetch_diff());
229        self.io_engine_throttled_events
230            .add(other.io_engine_throttled_events.fetch_diff());
231        self.remaining_reqs_count
232            .add(other.remaining_reqs_count.fetch_diff());
233    }
234}
235
236#[cfg(test)]
237pub mod tests {
238    use super::*;
239
240    #[test]
241    fn test_max_block_dev_metrics() {
242        // Note: this test has nothing to do with
243        // block structure or IRQs, this is just to allocate
244        // metrics for max number of devices that system can have.
245        // We have 5-23 IRQ for block devices on x86_64 so, there
246        // are 19 block devices at max. And, even though we have more
247        // devices on aarch64 but we stick to 19 to keep test common.
248        const MAX_BLOCK_DEVICES: usize = 19;
249
250        // This is to make sure that RwLock for block::metrics::METRICS is good.
251        drop(METRICS.read().unwrap());
252        drop(METRICS.write().unwrap());
253
254        // block::metrics::METRICS is in short RwLock on Vec of BlockDeviceMetrics.
255        // Normally, pointer to unique entries of block::metrics::METRICS are stored
256        // in Block device so that Block device can do self.metrics.* to
257        // update a metric. We try to do something similar here without
258        // using Block device by allocating max number of
259        // BlockDeviceMetrics in block::metrics::METRICS and store pointer to
260        // each entry in the local `metrics` vec.
261        // We then update 1 IncMetric and 2 SharedMetric for each metrics
262        // and validate if the metrics for per device was updated as
263        // expected.
264        let mut metrics: Vec<Arc<BlockDeviceMetrics>> = Vec::new();
265        for i in 0..MAX_BLOCK_DEVICES {
266            let devn: String = format!("drv{}", i);
267            metrics.push(BlockMetricsPerDevice::alloc(devn.clone()));
268            // update IncMetric
269            metrics[i].activate_fails.inc();
270            // update SharedMetric
271            metrics[i].read_bytes.add(10);
272            metrics[i].write_bytes.add(5);
273
274            if i == 0 {
275                // Unit tests run in parallel and we have
276                // `test_single_block_dev_metrics` that also increases
277                // the IncMetric count of drv0 by 1 (intentional to check
278                // thread safety) so we check if the count is >=1.
279                assert!(metrics[i].activate_fails.count() >= 1);
280
281                // For the same reason as above since we have
282                // another unit test running in parallel which updates
283                // drv0 metrics we check if count is >=10.
284                assert!(metrics[i].read_bytes.count() >= 10);
285            } else {
286                assert!(metrics[i].activate_fails.count() == 1);
287                assert!(metrics[i].read_bytes.count() == 10);
288            }
289            assert_eq!(metrics[i].write_bytes.count(), 5);
290        }
291    }
292
293    #[test]
294    fn test_single_block_dev_metrics() {
295        // Use drv0 so that we can check thread safety with the
296        // `test_max_block_dev_metrics` which also uses the same name.
297        let devn = "drv0";
298
299        // This is to make sure that RwLock for block::metrics::METRICS is good.
300        drop(METRICS.read().unwrap());
301        drop(METRICS.write().unwrap());
302
303        let test_metrics = BlockMetricsPerDevice::alloc(String::from(devn));
304        // Test to update IncMetrics
305        test_metrics.activate_fails.inc();
306        assert!(
307            test_metrics.activate_fails.count() > 0,
308            "{}",
309            test_metrics.activate_fails.count()
310        );
311
312        // We expect only 2 tests (this and test_max_block_dev_metrics)
313        // to update activate_fails count for drv0.
314        assert!(
315            test_metrics.activate_fails.count() <= 2,
316            "{}",
317            test_metrics.activate_fails.count()
318        );
319
320        // Test to update SharedMetrics
321        test_metrics.read_bytes.add(5);
322        // We expect only 2 tests (this and test_max_block_dev_metrics)
323        // to update read_bytes count for drv0 by 5.
324        assert!(test_metrics.read_bytes.count() >= 5);
325        assert!(test_metrics.read_bytes.count() <= 15);
326    }
327}