rwkvzip/rwkv7/
profiling.rs

1use std::time::Duration;
2
3/// Timing data for a single transformer block.
4#[derive(Clone, Copy, Debug, Default)]
5pub struct LayerTiming {
6    /// Accumulated attention time in nanoseconds.
7    pub attention_ns: u64,
8    /// Accumulated FFN time in nanoseconds.
9    pub ffn_ns: u64,
10}
11
12/// Sink trait used by the model to surface per-layer timings without
13/// committing to a particular profiler implementation.
14pub trait ProfilerSink {
15    #[inline(always)]
16    fn begin_token(&mut self) {}
17
18    #[inline(always)]
19    fn record_attention(&mut self, _layer: usize, _duration: Duration) {}
20
21    #[inline(always)]
22    fn record_ffn(&mut self, _layer: usize, _duration: Duration) {}
23}
24
25/// No-op profiler used by default to keep the fast path branch-free.
26pub struct NullProfiler;
27
28impl ProfilerSink for NullProfiler {}
29
30/// Collects wall-clock timings for each transformer block.
31#[derive(Clone, Debug)]
32pub struct LayerProfiler {
33    layers: Vec<LayerTiming>,
34    tokens: u64,
35}
36
37impl LayerProfiler {
38    pub fn new(num_layers: usize) -> Self {
39        Self {
40            layers: vec![LayerTiming::default(); num_layers],
41            tokens: 0,
42        }
43    }
44
45    #[inline]
46    pub fn reset(&mut self) {
47        self.tokens = 0;
48        self.layers.fill(LayerTiming::default());
49    }
50
51    #[inline]
52    pub fn tokens(&self) -> u64 {
53        self.tokens
54    }
55
56    #[inline]
57    pub fn timings(&self) -> &[LayerTiming] {
58        &self.layers
59    }
60
61    fn accumulate(target: &mut u64, duration: Duration) {
62        let nanos = duration.as_nanos().min(u64::MAX as u128) as u64;
63        *target = target.saturating_add(nanos);
64    }
65}
66
67impl ProfilerSink for LayerProfiler {
68    #[inline(always)]
69    fn begin_token(&mut self) {
70        self.tokens = self.tokens.saturating_add(1);
71    }
72
73    #[inline(always)]
74    fn record_attention(&mut self, layer: usize, duration: Duration) {
75        if let Some(entry) = self.layers.get_mut(layer) {
76            Self::accumulate(&mut entry.attention_ns, duration);
77        }
78    }
79
80    #[inline(always)]
81    fn record_ffn(&mut self, layer: usize, duration: Duration) {
82        if let Some(entry) = self.layers.get_mut(layer) {
83            Self::accumulate(&mut entry.ffn_ns, duration);
84        }
85    }
86}