infotheory/backends/rwkvzip/rwkv7/
profiling.rs

1use std::time::Duration;
2
3/// Timing data for a single transformer block.
4#[derive(Clone, Copy, Debug, Default)]
5pub struct LayerTiming {
6    /// Accumulated attention time in nanoseconds.
7    pub attention_ns: u64,
8    /// Accumulated FFN time in nanoseconds.
9    pub ffn_ns: u64,
10}
11
12/// Sink trait used by the model to surface per-layer timings without
13/// committing to a particular profiler implementation.
14pub trait ProfilerSink {
15    /// Whether the caller should pay profiling overhead on the hot path.
16    const ENABLED: bool = false;
17
18    /// Start timing a new token forward pass.
19    #[inline(always)]
20    fn begin_token(&mut self) {}
21
22    /// Record attention-kernel duration for `layer`.
23    #[inline(always)]
24    fn record_attention(&mut self, _layer: usize, _duration: Duration) {}
25
26    /// Record feed-forward duration for `layer`.
27    #[inline(always)]
28    fn record_ffn(&mut self, _layer: usize, _duration: Duration) {}
29}
30
31/// No-op profiler used by default to keep the fast path branch-free.
32pub struct NullProfiler;
33
34impl ProfilerSink for NullProfiler {}
35
36/// Collects wall-clock timings for each transformer block.
37#[derive(Clone, Debug)]
38pub struct LayerProfiler {
39    layers: Vec<LayerTiming>,
40    tokens: u64,
41}
42
43impl LayerProfiler {
44    /// Create a layer profiler with `num_layers` counters.
45    pub fn new(num_layers: usize) -> Self {
46        Self {
47            layers: vec![LayerTiming::default(); num_layers],
48            tokens: 0,
49        }
50    }
51
52    #[inline]
53    /// Reset token counter and all accumulated timings.
54    pub fn reset(&mut self) {
55        self.tokens = 0;
56        self.layers.fill(LayerTiming::default());
57    }
58
59    #[inline]
60    /// Number of tokens observed by this profiler.
61    pub fn tokens(&self) -> u64 {
62        self.tokens
63    }
64
65    #[inline]
66    /// Per-layer timing accumulators.
67    pub fn timings(&self) -> &[LayerTiming] {
68        &self.layers
69    }
70
71    fn accumulate(target: &mut u64, duration: Duration) {
72        let nanos = duration.as_nanos().min(u64::MAX as u128) as u64;
73        *target = target.saturating_add(nanos);
74    }
75}
76
77impl ProfilerSink for LayerProfiler {
78    const ENABLED: bool = true;
79
80    #[inline(always)]
81    fn begin_token(&mut self) {
82        self.tokens = self.tokens.saturating_add(1);
83    }
84
85    #[inline(always)]
86    fn record_attention(&mut self, layer: usize, duration: Duration) {
87        if let Some(entry) = self.layers.get_mut(layer) {
88            Self::accumulate(&mut entry.attention_ns, duration);
89        }
90    }
91
92    #[inline(always)]
93    fn record_ffn(&mut self, layer: usize, duration: Duration) {
94        if let Some(entry) = self.layers.get_mut(layer) {
95            Self::accumulate(&mut entry.ffn_ns, duration);
96        }
97    }
98}