vmm/devices/virtio/pmem/
metrics.rs

1// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//! Defines the metrics system for pmem devices.
5//!
6//! # Metrics format
7//! The metrics are flushed in JSON when requested by vmm::logger::metrics::METRICS.write().
8//!
9//! ## JSON example with metrics:
10//! ```json
11//! {
12//!  "pmem_drv0": {
13//!     "activate_fails": "SharedIncMetric",
14//!     "cfg_fails": "SharedIncMetric",
15//!     "no_avail_buffer": "SharedIncMetric",
16//!     "event_fails": "SharedIncMetric",
17//!     "execute_fails": "SharedIncMetric",
18//!     ...
19//!  }
20//!  "pmem_drv1": {
21//!     "activate_fails": "SharedIncMetric",
22//!     "cfg_fails": "SharedIncMetric",
23//!     "no_avail_buffer": "SharedIncMetric",
24//!     "event_fails": "SharedIncMetric",
25//!     "execute_fails": "SharedIncMetric",
26//!     ...
27//!  }
28//!  ...
29//!  "pmem_drive_id": {
30//!     "activate_fails": "SharedIncMetric",
31//!     "cfg_fails": "SharedIncMetric",
32//!     "no_avail_buffer": "SharedIncMetric",
33//!     "event_fails": "SharedIncMetric",
34//!     "execute_fails": "SharedIncMetric",
35//!     ...
36//!  }
37//!  "pmem": {
38//!     "activate_fails": "SharedIncMetric",
39//!     "cfg_fails": "SharedIncMetric",
40//!     "no_avail_buffer": "SharedIncMetric",
41//!     "event_fails": "SharedIncMetric",
42//!     "execute_fails": "SharedIncMetric",
43//!     ...
44//!  }
45//! }
46//! ```
47//! Each `pmem` field in the example above is a serializable `PmemDeviceMetrics` structure
48//! collecting metrics such as `activate_fails`, `cfg_fails`, etc. for the pmem device.
49//! `pmem_drv0` represent metrics for the endpoint "/pmem/drv0",
50//! `pmem_drv1` represent metrics for the endpoint "/pmem/drv1", and
51//! `pmem_drive_id` represent metrics for the endpoint "/pmem/{drive_id}"
52//! pmem device respectively and `pmem` is the aggregate of all the per device metrics.
53//!
54//! # Limitations
55//! pmem device currently do not have `vmm::logger::metrics::StoreMetrics` so aggregate
56//! doesn't consider them.
57//!
58//! # Design
59//! The main design goals of this system are:
60//! * To improve pmem device metrics by logging them at per device granularity.
61//! * Continue to provide aggregate pmem metrics to maintain backward compatibility.
62//! * Move PmemDeviceMetrics out of from logger and decouple it.
63//! * Rely on `serde` to provide the actual serialization for writing the metrics.
64//! * Since all metrics start at 0, we implement the `Default` trait via derive for all of them, to
65//!   avoid having to initialize everything by hand.
66//!
67//! * Devices could be created in any order i.e. the first device created could either be drv0 or
68//!   drv1 so if we use a vector for PmemDeviceMetrics and call 1st device as pmem0, then pmem0
69//!   could sometimes point to drv0 and sometimes to drv1 which doesn't help with analysing the
70//!   metrics. So, use Map instead of Vec to help understand which drive the metrics actually
71//!   belongs to.
72//!
73//! The system implements 1 type of metrics:
74//! * Shared Incremental Metrics (SharedIncMetrics) - dedicated for the metrics which need a counter
75//!   (i.e the number of times an API request failed). These metrics are reset upon flush.
76//!
77//! We add PmemDeviceMetrics entries from pmem::metrics::METRICS into Pmem device instead of
78//! Pmem device having individual separate PmemDeviceMetrics entries because Pmem device is not
79//! accessible from signal handlers to flush metrics and pmem::metrics::METRICS is.
80
81use std::collections::BTreeMap;
82use std::sync::{Arc, RwLock};
83
84use serde::ser::SerializeMap;
85use serde::{Serialize, Serializer};
86
87use crate::logger::{IncMetric, LatencyAggregateMetrics, SharedIncMetric};
88
89/// map of pmem drive id and metrics
90/// this should be protected by a lock before accessing.
91#[derive(Debug)]
92pub struct PmemMetricsPerDevice {
93    /// used to access per pmem device metrics
94    pub metrics: BTreeMap<String, Arc<PmemMetrics>>,
95}
96
97impl PmemMetricsPerDevice {
98    /// Allocate `PmemDeviceMetrics` for pmem device having
99    /// id `drive_id`. Also, allocate only if it doesn't
100    /// exist to avoid overwriting previously allocated data.
101    /// lock is always initialized so it is safe the unwrap
102    /// the lock without a check.
103    pub fn alloc(drive_id: String) -> Arc<PmemMetrics> {
104        Arc::clone(
105            METRICS
106                .write()
107                .unwrap()
108                .metrics
109                .entry(drive_id)
110                .or_insert_with(|| Arc::new(PmemMetrics::default())),
111        )
112    }
113}
114
115/// Pool of pmem-related metrics per device behind a lock to
116/// keep things thread safe. Since the lock is initialized here
117/// it is safe to unwrap it without any check.
118static METRICS: RwLock<PmemMetricsPerDevice> = RwLock::new(PmemMetricsPerDevice {
119    metrics: BTreeMap::new(),
120});
121
122/// This function facilitates aggregation and serialization of
123/// per pmem device metrics.
124pub fn flush_metrics<S: Serializer>(serializer: S) -> Result<S::Ok, S::Error> {
125    let pmem_metrics = METRICS.read().unwrap();
126    let metrics_len = pmem_metrics.metrics.len();
127    // +1 to accommodate aggregate pmem metrics
128    let mut seq = serializer.serialize_map(Some(1 + metrics_len))?;
129
130    let mut pmem_aggregated: PmemMetrics = PmemMetrics::default();
131
132    for (name, metrics) in pmem_metrics.metrics.iter() {
133        let devn = format!("pmem_{}", name);
134        // serialization will flush the metrics so aggregate before it.
135        let m: &PmemMetrics = metrics;
136        pmem_aggregated.aggregate(m);
137        seq.serialize_entry(&devn, m)?;
138    }
139    seq.serialize_entry("pmem", &pmem_aggregated)?;
140    seq.end()
141}
142
143/// Pmem Device associated metrics.
144#[derive(Debug, Default, Serialize)]
145pub struct PmemMetrics {
146    /// Number of times when activate failed on a pmem device.
147    pub activate_fails: SharedIncMetric,
148    /// Number of times when interacting with the space config of a pmem device failed.
149    pub cfg_fails: SharedIncMetric,
150    /// Number of times when handling events on a pmem device failed.
151    pub event_fails: SharedIncMetric,
152    /// Number of events triggered on the queue of this pmem device.
153    pub queue_event_count: SharedIncMetric,
154}
155
156impl PmemMetrics {
157    /// Const default construction.
158    pub fn new() -> Self {
159        Self {
160            ..Default::default()
161        }
162    }
163
164    /// pmem metrics are SharedIncMetric where the diff of current vs
165    /// old is serialized i.e. serialize_u64(current-old).
166    /// So to have the aggregate serialized in same way we need to
167    /// fetch the diff of current vs old metrics and add it to the
168    /// aggregate.
169    pub fn aggregate(&mut self, other: &Self) {
170        self.activate_fails.add(other.activate_fails.fetch_diff());
171        self.cfg_fails.add(other.cfg_fails.fetch_diff());
172        self.event_fails.add(other.event_fails.fetch_diff());
173        self.queue_event_count
174            .add(other.queue_event_count.fetch_diff());
175    }
176}
177
178#[cfg(test)]
179pub mod tests {
180    use super::*;
181
182    #[test]
183    fn test_max_pmem_dev_metrics() {
184        // Note: this test has nothing to do with
185        // pmem structure or IRQs, this is just to allocate
186        // metrics for max number of devices that system can have.
187        // We have 5-23 IRQ for pmem devices on x86_64 so, there
188        // are 19 pmem devices at max. And, even though we have more
189        // devices on aarch64 but we stick to 19 to keep test common.
190        const MAX_PMEM_DEVICES: usize = 19;
191
192        // This is to make sure that RwLock for pmem::metrics::METRICS is good.
193        drop(METRICS.read().unwrap());
194        drop(METRICS.write().unwrap());
195
196        // pmem::metrics::METRICS is in short RwLock on Vec of PmemDeviceMetrics.
197        // Normally, pointer to unique entries of pmem::metrics::METRICS are stored
198        // in Pmem device so that Pmem device can do self.metrics.* to
199        // update a metric. We try to do something similar here without
200        // using Pmem device by allocating max number of
201        // PmemDeviceMetrics in pmem::metrics::METRICS and store pointer to
202        // each entry in the local `metrics` vec.
203        // We then update 1 IncMetric and 2 SharedMetric for each metrics
204        // and validate if the metrics for per device was updated as
205        // expected.
206        let mut metrics: Vec<Arc<PmemMetrics>> = Vec::new();
207        for i in 0..MAX_PMEM_DEVICES {
208            let pmem_name: String = format!("pmem{}", i);
209            metrics.push(PmemMetricsPerDevice::alloc(pmem_name.clone()));
210            // update IncMetric
211            metrics[i].activate_fails.inc();
212
213            if i == 0 {
214                // Unit tests run in parallel and we have
215                // `test_single_pmem_dev_metrics` that also increases
216                // the IncMetric count of drv0 by 1 (intentional to check
217                // thread safety) so we check if the count is >=1.
218                assert!(metrics[i].activate_fails.count() >= 1);
219            } else {
220                assert!(metrics[i].activate_fails.count() == 1);
221            }
222        }
223    }
224
225    #[test]
226    fn test_single_pmem_dev_metrics() {
227        let test_metrics = PmemMetricsPerDevice::alloc(String::from("pmem0"));
228        // Test to update IncMetrics
229        test_metrics.activate_fails.inc();
230        assert!(
231            test_metrics.activate_fails.count() > 0,
232            "{}",
233            test_metrics.activate_fails.count()
234        );
235
236        // We expect only 2 tests (this and test_max_pmem_dev_metrics)
237        // to update activate_fails count for pmem0.
238        assert!(
239            test_metrics.activate_fails.count() <= 2,
240            "{}",
241            test_metrics.activate_fails.count()
242        );
243    }
244}