vmm/devices/virtio/pmem/metrics.rs
1// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//! Defines the metrics system for pmem devices.
5//!
6//! # Metrics format
7//! The metrics are flushed in JSON when requested by vmm::logger::metrics::METRICS.write().
8//!
9//! ## JSON example with metrics:
10//! ```json
11//! {
12//! "pmem_drv0": {
13//! "activate_fails": "SharedIncMetric",
14//! "cfg_fails": "SharedIncMetric",
15//! "no_avail_buffer": "SharedIncMetric",
16//! "event_fails": "SharedIncMetric",
17//! "execute_fails": "SharedIncMetric",
18//! ...
19//! }
20//! "pmem_drv1": {
21//! "activate_fails": "SharedIncMetric",
22//! "cfg_fails": "SharedIncMetric",
23//! "no_avail_buffer": "SharedIncMetric",
24//! "event_fails": "SharedIncMetric",
25//! "execute_fails": "SharedIncMetric",
26//! ...
27//! }
28//! ...
29//! "pmem_drive_id": {
30//! "activate_fails": "SharedIncMetric",
31//! "cfg_fails": "SharedIncMetric",
32//! "no_avail_buffer": "SharedIncMetric",
33//! "event_fails": "SharedIncMetric",
34//! "execute_fails": "SharedIncMetric",
35//! ...
36//! }
37//! "pmem": {
38//! "activate_fails": "SharedIncMetric",
39//! "cfg_fails": "SharedIncMetric",
40//! "no_avail_buffer": "SharedIncMetric",
41//! "event_fails": "SharedIncMetric",
42//! "execute_fails": "SharedIncMetric",
43//! ...
44//! }
45//! }
46//! ```
47//! Each `pmem` field in the example above is a serializable `PmemDeviceMetrics` structure
48//! collecting metrics such as `activate_fails`, `cfg_fails`, etc. for the pmem device.
49//! `pmem_drv0` represent metrics for the endpoint "/pmem/drv0",
50//! `pmem_drv1` represent metrics for the endpoint "/pmem/drv1", and
51//! `pmem_drive_id` represent metrics for the endpoint "/pmem/{drive_id}"
52//! pmem device respectively and `pmem` is the aggregate of all the per device metrics.
53//!
54//! # Limitations
55//! pmem device currently do not have `vmm::logger::metrics::StoreMetrics` so aggregate
56//! doesn't consider them.
57//!
58//! # Design
59//! The main design goals of this system are:
60//! * To improve pmem device metrics by logging them at per device granularity.
61//! * Continue to provide aggregate pmem metrics to maintain backward compatibility.
62//! * Move PmemDeviceMetrics out of from logger and decouple it.
63//! * Rely on `serde` to provide the actual serialization for writing the metrics.
64//! * Since all metrics start at 0, we implement the `Default` trait via derive for all of them, to
65//! avoid having to initialize everything by hand.
66//!
67//! * Devices could be created in any order i.e. the first device created could either be drv0 or
68//! drv1 so if we use a vector for PmemDeviceMetrics and call 1st device as pmem0, then pmem0
69//! could sometimes point to drv0 and sometimes to drv1 which doesn't help with analysing the
70//! metrics. So, use Map instead of Vec to help understand which drive the metrics actually
71//! belongs to.
72//!
73//! The system implements 1 type of metrics:
74//! * Shared Incremental Metrics (SharedIncMetrics) - dedicated for the metrics which need a counter
75//! (i.e the number of times an API request failed). These metrics are reset upon flush.
76//!
77//! We add PmemDeviceMetrics entries from pmem::metrics::METRICS into Pmem device instead of
78//! Pmem device having individual separate PmemDeviceMetrics entries because Pmem device is not
79//! accessible from signal handlers to flush metrics and pmem::metrics::METRICS is.
80
81use std::collections::BTreeMap;
82use std::sync::{Arc, RwLock};
83
84use serde::ser::SerializeMap;
85use serde::{Serialize, Serializer};
86
87use crate::logger::{IncMetric, LatencyAggregateMetrics, SharedIncMetric};
88
89/// map of pmem drive id and metrics
90/// this should be protected by a lock before accessing.
91#[derive(Debug)]
92pub struct PmemMetricsPerDevice {
93 /// used to access per pmem device metrics
94 pub metrics: BTreeMap<String, Arc<PmemMetrics>>,
95}
96
97impl PmemMetricsPerDevice {
98 /// Allocate `PmemDeviceMetrics` for pmem device having
99 /// id `drive_id`. Also, allocate only if it doesn't
100 /// exist to avoid overwriting previously allocated data.
101 /// lock is always initialized so it is safe the unwrap
102 /// the lock without a check.
103 pub fn alloc(drive_id: String) -> Arc<PmemMetrics> {
104 Arc::clone(
105 METRICS
106 .write()
107 .unwrap()
108 .metrics
109 .entry(drive_id)
110 .or_insert_with(|| Arc::new(PmemMetrics::default())),
111 )
112 }
113}
114
115/// Pool of pmem-related metrics per device behind a lock to
116/// keep things thread safe. Since the lock is initialized here
117/// it is safe to unwrap it without any check.
118static METRICS: RwLock<PmemMetricsPerDevice> = RwLock::new(PmemMetricsPerDevice {
119 metrics: BTreeMap::new(),
120});
121
122/// This function facilitates aggregation and serialization of
123/// per pmem device metrics.
124pub fn flush_metrics<S: Serializer>(serializer: S) -> Result<S::Ok, S::Error> {
125 let pmem_metrics = METRICS.read().unwrap();
126 let metrics_len = pmem_metrics.metrics.len();
127 // +1 to accommodate aggregate pmem metrics
128 let mut seq = serializer.serialize_map(Some(1 + metrics_len))?;
129
130 let mut pmem_aggregated: PmemMetrics = PmemMetrics::default();
131
132 for (name, metrics) in pmem_metrics.metrics.iter() {
133 let devn = format!("pmem_{}", name);
134 // serialization will flush the metrics so aggregate before it.
135 let m: &PmemMetrics = metrics;
136 pmem_aggregated.aggregate(m);
137 seq.serialize_entry(&devn, m)?;
138 }
139 seq.serialize_entry("pmem", &pmem_aggregated)?;
140 seq.end()
141}
142
143/// Pmem Device associated metrics.
144#[derive(Debug, Default, Serialize)]
145pub struct PmemMetrics {
146 /// Number of times when activate failed on a pmem device.
147 pub activate_fails: SharedIncMetric,
148 /// Number of times when interacting with the space config of a pmem device failed.
149 pub cfg_fails: SharedIncMetric,
150 /// Number of times when handling events on a pmem device failed.
151 pub event_fails: SharedIncMetric,
152 /// Number of events triggered on the queue of this pmem device.
153 pub queue_event_count: SharedIncMetric,
154}
155
156impl PmemMetrics {
157 /// Const default construction.
158 pub fn new() -> Self {
159 Self {
160 ..Default::default()
161 }
162 }
163
164 /// pmem metrics are SharedIncMetric where the diff of current vs
165 /// old is serialized i.e. serialize_u64(current-old).
166 /// So to have the aggregate serialized in same way we need to
167 /// fetch the diff of current vs old metrics and add it to the
168 /// aggregate.
169 pub fn aggregate(&mut self, other: &Self) {
170 self.activate_fails.add(other.activate_fails.fetch_diff());
171 self.cfg_fails.add(other.cfg_fails.fetch_diff());
172 self.event_fails.add(other.event_fails.fetch_diff());
173 self.queue_event_count
174 .add(other.queue_event_count.fetch_diff());
175 }
176}
177
178#[cfg(test)]
179pub mod tests {
180 use super::*;
181
182 #[test]
183 fn test_max_pmem_dev_metrics() {
184 // Note: this test has nothing to do with
185 // pmem structure or IRQs, this is just to allocate
186 // metrics for max number of devices that system can have.
187 // We have 5-23 IRQ for pmem devices on x86_64 so, there
188 // are 19 pmem devices at max. And, even though we have more
189 // devices on aarch64 but we stick to 19 to keep test common.
190 const MAX_PMEM_DEVICES: usize = 19;
191
192 // This is to make sure that RwLock for pmem::metrics::METRICS is good.
193 drop(METRICS.read().unwrap());
194 drop(METRICS.write().unwrap());
195
196 // pmem::metrics::METRICS is in short RwLock on Vec of PmemDeviceMetrics.
197 // Normally, pointer to unique entries of pmem::metrics::METRICS are stored
198 // in Pmem device so that Pmem device can do self.metrics.* to
199 // update a metric. We try to do something similar here without
200 // using Pmem device by allocating max number of
201 // PmemDeviceMetrics in pmem::metrics::METRICS and store pointer to
202 // each entry in the local `metrics` vec.
203 // We then update 1 IncMetric and 2 SharedMetric for each metrics
204 // and validate if the metrics for per device was updated as
205 // expected.
206 let mut metrics: Vec<Arc<PmemMetrics>> = Vec::new();
207 for i in 0..MAX_PMEM_DEVICES {
208 let pmem_name: String = format!("pmem{}", i);
209 metrics.push(PmemMetricsPerDevice::alloc(pmem_name.clone()));
210 // update IncMetric
211 metrics[i].activate_fails.inc();
212
213 if i == 0 {
214 // Unit tests run in parallel and we have
215 // `test_single_pmem_dev_metrics` that also increases
216 // the IncMetric count of drv0 by 1 (intentional to check
217 // thread safety) so we check if the count is >=1.
218 assert!(metrics[i].activate_fails.count() >= 1);
219 } else {
220 assert!(metrics[i].activate_fails.count() == 1);
221 }
222 }
223 }
224
225 #[test]
226 fn test_single_pmem_dev_metrics() {
227 let test_metrics = PmemMetricsPerDevice::alloc(String::from("pmem0"));
228 // Test to update IncMetrics
229 test_metrics.activate_fails.inc();
230 assert!(
231 test_metrics.activate_fails.count() > 0,
232 "{}",
233 test_metrics.activate_fails.count()
234 );
235
236 // We expect only 2 tests (this and test_max_pmem_dev_metrics)
237 // to update activate_fails count for pmem0.
238 assert!(
239 test_metrics.activate_fails.count() <= 2,
240 "{}",
241 test_metrics.activate_fails.count()
242 );
243 }
244}