vmm/devices/virtio/block/virtio/metrics.rs
1// Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//! Defines the metrics system for block devices.
5//!
6//! # Metrics format
7//! The metrics are flushed in JSON when requested by vmm::logger::metrics::METRICS.write().
8//!
9//! ## JSON example with metrics:
10//! ```json
11//! {
12//! "block_drv0": {
13//! "activate_fails": "SharedIncMetric",
14//! "cfg_fails": "SharedIncMetric",
15//! "no_avail_buffer": "SharedIncMetric",
16//! "event_fails": "SharedIncMetric",
17//! "execute_fails": "SharedIncMetric",
18//! ...
19//! }
20//! "block_drv1": {
21//! "activate_fails": "SharedIncMetric",
22//! "cfg_fails": "SharedIncMetric",
23//! "no_avail_buffer": "SharedIncMetric",
24//! "event_fails": "SharedIncMetric",
25//! "execute_fails": "SharedIncMetric",
26//! ...
27//! }
28//! ...
29//! "block_drive_id": {
30//! "activate_fails": "SharedIncMetric",
31//! "cfg_fails": "SharedIncMetric",
32//! "no_avail_buffer": "SharedIncMetric",
33//! "event_fails": "SharedIncMetric",
34//! "execute_fails": "SharedIncMetric",
35//! ...
36//! }
37//! "block": {
38//! "activate_fails": "SharedIncMetric",
39//! "cfg_fails": "SharedIncMetric",
40//! "no_avail_buffer": "SharedIncMetric",
41//! "event_fails": "SharedIncMetric",
42//! "execute_fails": "SharedIncMetric",
43//! ...
44//! }
45//! }
46//! ```
47//! Each `block` field in the example above is a serializable `BlockDeviceMetrics` structure
48//! collecting metrics such as `activate_fails`, `cfg_fails`, etc. for the block device.
49//! `block_drv0` represent metrics for the endpoint "/drives/drv0",
50//! `block_drv1` represent metrics for the endpoint "/drives/drv1", and
51//! `block_drive_id` represent metrics for the endpoint "/drives/{drive_id}"
52//! block device respectively and `block` is the aggregate of all the per device metrics.
53//!
54//! # Limitations
55//! block device currently do not have `vmm::logger::metrics::StoreMetrics` so aggregate
56//! doesn't consider them.
57//!
58//! # Design
59//! The main design goals of this system are:
60//! * To improve block device metrics by logging them at per device granularity.
61//! * Continue to provide aggregate block metrics to maintain backward compatibility.
62//! * Move BlockDeviceMetrics out of from logger and decouple it.
63//! * Rely on `serde` to provide the actual serialization for writing the metrics.
64//! * Since all metrics start at 0, we implement the `Default` trait via derive for all of them, to
65//! avoid having to initialize everything by hand.
66//!
67//! * Devices could be created in any order i.e. the first device created could either be drv0 or
68//! drv1 so if we use a vector for BlockDeviceMetrics and call 1st device as block0, then block0
69//! could sometimes point to drv0 and sometimes to drv1 which doesn't help with analysing the
70//! metrics. So, use Map instead of Vec to help understand which drive the metrics actually
71//! belongs to.
72//!
73//! The system implements 1 type of metrics:
74//! * Shared Incremental Metrics (SharedIncMetrics) - dedicated for the metrics which need a counter
75//! (i.e the number of times an API request failed). These metrics are reset upon flush.
76//!
77//! We add BlockDeviceMetrics entries from block::metrics::METRICS into Block device instead of
78//! Block device having individual separate BlockDeviceMetrics entries because Block device is not
79//! accessible from signal handlers to flush metrics and block::metrics::METRICS is.
80
81use std::collections::BTreeMap;
82use std::sync::{Arc, RwLock};
83
84use serde::ser::SerializeMap;
85use serde::{Serialize, Serializer};
86
87use crate::logger::{IncMetric, LatencyAggregateMetrics, SharedIncMetric};
88
89/// map of block drive id and metrics
90/// this should be protected by a lock before accessing.
91#[derive(Debug)]
92pub struct BlockMetricsPerDevice {
93 /// used to access per block device metrics
94 pub metrics: BTreeMap<String, Arc<BlockDeviceMetrics>>,
95}
96
97impl BlockMetricsPerDevice {
98 /// Allocate `BlockDeviceMetrics` for block device having
99 /// id `drive_id`. Also, allocate only if it doesn't
100 /// exist to avoid overwriting previously allocated data.
101 /// lock is always initialized so it is safe the unwrap
102 /// the lock without a check.
103 pub fn alloc(drive_id: String) -> Arc<BlockDeviceMetrics> {
104 Arc::clone(
105 METRICS
106 .write()
107 .unwrap()
108 .metrics
109 .entry(drive_id)
110 .or_insert_with(|| Arc::new(BlockDeviceMetrics::default())),
111 )
112 }
113}
114
115/// Pool of block-related metrics per device behind a lock to
116/// keep things thread safe. Since the lock is initialized here
117/// it is safe to unwrap it without any check.
118static METRICS: RwLock<BlockMetricsPerDevice> = RwLock::new(BlockMetricsPerDevice {
119 metrics: BTreeMap::new(),
120});
121
122/// This function facilitates aggregation and serialization of
123/// per block device metrics.
124pub fn flush_metrics<S: Serializer>(serializer: S) -> Result<S::Ok, S::Error> {
125 let block_metrics = METRICS.read().unwrap();
126 let metrics_len = block_metrics.metrics.len();
127 // +1 to accommodate aggregate block metrics
128 let mut seq = serializer.serialize_map(Some(1 + metrics_len))?;
129
130 let mut block_aggregated: BlockDeviceMetrics = BlockDeviceMetrics::default();
131
132 for (name, metrics) in block_metrics.metrics.iter() {
133 let devn = format!("block_{}", name);
134 // serialization will flush the metrics so aggregate before it.
135 let m: &BlockDeviceMetrics = metrics;
136 block_aggregated.aggregate(m);
137 seq.serialize_entry(&devn, m)?;
138 }
139 seq.serialize_entry("block", &block_aggregated)?;
140 seq.end()
141}
142
143/// Block Device associated metrics.
144#[derive(Debug, Default, Serialize)]
145pub struct BlockDeviceMetrics {
146 /// Number of times when activate failed on a block device.
147 pub activate_fails: SharedIncMetric,
148 /// Number of times when interacting with the space config of a block device failed.
149 pub cfg_fails: SharedIncMetric,
150 /// No available buffer for the block queue.
151 pub no_avail_buffer: SharedIncMetric,
152 /// Number of times when handling events on a block device failed.
153 pub event_fails: SharedIncMetric,
154 /// Number of failures in executing a request on a block device.
155 pub execute_fails: SharedIncMetric,
156 /// Number of invalid requests received for this block device.
157 pub invalid_reqs_count: SharedIncMetric,
158 /// Number of flushes operation triggered on this block device.
159 pub flush_count: SharedIncMetric,
160 /// Number of events triggered on the queue of this block device.
161 pub queue_event_count: SharedIncMetric,
162 /// Number of events ratelimiter-related.
163 pub rate_limiter_event_count: SharedIncMetric,
164 /// Number of update operation triggered on this block device.
165 pub update_count: SharedIncMetric,
166 /// Number of failures while doing update on this block device.
167 pub update_fails: SharedIncMetric,
168 /// Number of bytes read by this block device.
169 pub read_bytes: SharedIncMetric,
170 /// Number of bytes written by this block device.
171 pub write_bytes: SharedIncMetric,
172 /// Number of successful read operations.
173 pub read_count: SharedIncMetric,
174 /// Number of successful write operations.
175 pub write_count: SharedIncMetric,
176 /// Duration of all read operations.
177 pub read_agg: LatencyAggregateMetrics,
178 /// Duration of all write operations.
179 pub write_agg: LatencyAggregateMetrics,
180 /// Number of rate limiter throttling events.
181 pub rate_limiter_throttled_events: SharedIncMetric,
182 /// Number of virtio events throttled because of the IO engine.
183 /// This happens when the io_uring submission queue is full.
184 pub io_engine_throttled_events: SharedIncMetric,
185 /// Number of remaining requests in the queue.
186 pub remaining_reqs_count: SharedIncMetric,
187}
188
189impl BlockDeviceMetrics {
190 /// Const default construction.
191 pub fn new() -> Self {
192 Self {
193 read_agg: LatencyAggregateMetrics::new(),
194 write_agg: LatencyAggregateMetrics::new(),
195 ..Default::default()
196 }
197 }
198
199 /// block metrics are SharedIncMetric where the diff of current vs
200 /// old is serialized i.e. serialize_u64(current-old).
201 /// So to have the aggregate serialized in same way we need to
202 /// fetch the diff of current vs old metrics and add it to the
203 /// aggregate.
204 pub fn aggregate(&mut self, other: &Self) {
205 self.activate_fails.add(other.activate_fails.fetch_diff());
206 self.cfg_fails.add(other.cfg_fails.fetch_diff());
207 self.no_avail_buffer.add(other.no_avail_buffer.fetch_diff());
208 self.event_fails.add(other.event_fails.fetch_diff());
209 self.execute_fails.add(other.execute_fails.fetch_diff());
210 self.invalid_reqs_count
211 .add(other.invalid_reqs_count.fetch_diff());
212 self.flush_count.add(other.flush_count.fetch_diff());
213 self.queue_event_count
214 .add(other.queue_event_count.fetch_diff());
215 self.rate_limiter_event_count
216 .add(other.rate_limiter_event_count.fetch_diff());
217 self.update_count.add(other.update_count.fetch_diff());
218 self.update_fails.add(other.update_fails.fetch_diff());
219 self.read_bytes.add(other.read_bytes.fetch_diff());
220 self.write_bytes.add(other.write_bytes.fetch_diff());
221 self.read_count.add(other.read_count.fetch_diff());
222 self.write_count.add(other.write_count.fetch_diff());
223 self.read_agg.sum_us.add(other.read_agg.sum_us.fetch_diff());
224 self.write_agg
225 .sum_us
226 .add(other.write_agg.sum_us.fetch_diff());
227 self.rate_limiter_throttled_events
228 .add(other.rate_limiter_throttled_events.fetch_diff());
229 self.io_engine_throttled_events
230 .add(other.io_engine_throttled_events.fetch_diff());
231 self.remaining_reqs_count
232 .add(other.remaining_reqs_count.fetch_diff());
233 }
234}
235
236#[cfg(test)]
237pub mod tests {
238 use super::*;
239
240 #[test]
241 fn test_max_block_dev_metrics() {
242 // Note: this test has nothing to do with
243 // block structure or IRQs, this is just to allocate
244 // metrics for max number of devices that system can have.
245 // We have 5-23 IRQ for block devices on x86_64 so, there
246 // are 19 block devices at max. And, even though we have more
247 // devices on aarch64 but we stick to 19 to keep test common.
248 const MAX_BLOCK_DEVICES: usize = 19;
249
250 // This is to make sure that RwLock for block::metrics::METRICS is good.
251 drop(METRICS.read().unwrap());
252 drop(METRICS.write().unwrap());
253
254 // block::metrics::METRICS is in short RwLock on Vec of BlockDeviceMetrics.
255 // Normally, pointer to unique entries of block::metrics::METRICS are stored
256 // in Block device so that Block device can do self.metrics.* to
257 // update a metric. We try to do something similar here without
258 // using Block device by allocating max number of
259 // BlockDeviceMetrics in block::metrics::METRICS and store pointer to
260 // each entry in the local `metrics` vec.
261 // We then update 1 IncMetric and 2 SharedMetric for each metrics
262 // and validate if the metrics for per device was updated as
263 // expected.
264 let mut metrics: Vec<Arc<BlockDeviceMetrics>> = Vec::new();
265 for i in 0..MAX_BLOCK_DEVICES {
266 let devn: String = format!("drv{}", i);
267 metrics.push(BlockMetricsPerDevice::alloc(devn.clone()));
268 // update IncMetric
269 metrics[i].activate_fails.inc();
270 // update SharedMetric
271 metrics[i].read_bytes.add(10);
272 metrics[i].write_bytes.add(5);
273
274 if i == 0 {
275 // Unit tests run in parallel and we have
276 // `test_single_block_dev_metrics` that also increases
277 // the IncMetric count of drv0 by 1 (intentional to check
278 // thread safety) so we check if the count is >=1.
279 assert!(metrics[i].activate_fails.count() >= 1);
280
281 // For the same reason as above since we have
282 // another unit test running in parallel which updates
283 // drv0 metrics we check if count is >=10.
284 assert!(metrics[i].read_bytes.count() >= 10);
285 } else {
286 assert!(metrics[i].activate_fails.count() == 1);
287 assert!(metrics[i].read_bytes.count() == 10);
288 }
289 assert_eq!(metrics[i].write_bytes.count(), 5);
290 }
291 }
292
293 #[test]
294 fn test_single_block_dev_metrics() {
295 // Use drv0 so that we can check thread safety with the
296 // `test_max_block_dev_metrics` which also uses the same name.
297 let devn = "drv0";
298
299 // This is to make sure that RwLock for block::metrics::METRICS is good.
300 drop(METRICS.read().unwrap());
301 drop(METRICS.write().unwrap());
302
303 let test_metrics = BlockMetricsPerDevice::alloc(String::from(devn));
304 // Test to update IncMetrics
305 test_metrics.activate_fails.inc();
306 assert!(
307 test_metrics.activate_fails.count() > 0,
308 "{}",
309 test_metrics.activate_fails.count()
310 );
311
312 // We expect only 2 tests (this and test_max_block_dev_metrics)
313 // to update activate_fails count for drv0.
314 assert!(
315 test_metrics.activate_fails.count() <= 2,
316 "{}",
317 test_metrics.activate_fails.count()
318 );
319
320 // Test to update SharedMetrics
321 test_metrics.read_bytes.add(5);
322 // We expect only 2 tests (this and test_max_block_dev_metrics)
323 // to update read_bytes count for drv0 by 5.
324 assert!(test_metrics.read_bytes.count() >= 5);
325 assert!(test_metrics.read_bytes.count() <= 15);
326 }
327}