infotheory/coders/
mod.rs

1//! Entropy coders for rwkvzip.
2//!
3//! This module provides both Arithmetic Coding (AC) and rANS coders.
4//!
5//! # Coder Selection
6//!
7//! - **Arithmetic Coding (AC)**: Optimal compression ratio, slightly slower.
8//!   Best for small files or maximum compression.
9//! - **rANS**: Near-optimal compression with better throughput, especially
10//!   with lane-interleaved encoding. Best for larger files.
11
12pub mod ac;
13pub mod rans;
14
15/// Entropy coder type used by generic rate-coded compression.
16#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
17pub enum CoderType {
18    /// Arithmetic coding: optimal compression ratio, slightly slower.
19    #[default]
20    AC,
21    /// rANS coding: near-optimal compression with better throughput.
22    RANS,
23}
24
25impl std::fmt::Display for CoderType {
26    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27        match self {
28            CoderType::AC => write!(f, "AC"),
29            CoderType::RANS => write!(f, "rANS"),
30        }
31    }
32}
33
34/// Compute CRC32 checksum for data integrity verification.
35#[inline]
36pub fn crc32(data: &[u8]) -> u32 {
37    let mut hasher = crc32fast::Hasher::new();
38    hasher.update(data);
39    hasher.finalize()
40}
41
42#[inline]
43pub(crate) fn quantize_pdf_to_integer_cdf_with_buffer(
44    pdf: &[f64],
45    total: u32,
46    cdf_out: &mut [u32],
47    _freq_buf: &mut [i64],
48) {
49    let n = pdf.len();
50    assert!(cdf_out.len() > n, "cdf buffer too small");
51
52    if n == 0 {
53        cdf_out[0] = 0;
54        return;
55    }
56    assert!(
57        (n as u32) <= total,
58        "CDF total {total} must be >= symbol count {n} to guarantee positive widths"
59    );
60
61    let scale = total as f64;
62    let mut acc = 0.0f64;
63    let mut prev = 0u32;
64
65    unsafe {
66        *cdf_out.get_unchecked_mut(0) = 0;
67        for i in 0..n {
68            let p = *pdf.get_unchecked(i);
69            if p.is_finite() && p > 0.0 {
70                acc += p;
71            }
72
73            let next = (acc * scale) as u32;
74            if next <= prev || next > total {
75                quantize_pdf_to_integer_cdf_positive_width(pdf, total, cdf_out);
76                return;
77            }
78            *cdf_out.get_unchecked_mut(i + 1) = next;
79            prev = next;
80        }
81        *cdf_out.get_unchecked_mut(n) = total;
82    }
83}
84
85#[inline]
86pub(crate) fn quantize_pdf_to_integer_cdf_dense_positive_with_buffer(
87    pdf: &[f64],
88    total: u32,
89    cdf_out: &mut [u32],
90) {
91    let n = pdf.len();
92    assert!(cdf_out.len() > n, "cdf buffer too small");
93
94    if n == 0 {
95        cdf_out[0] = 0;
96        return;
97    }
98    assert!(
99        (n as u32) <= total,
100        "CDF total {total} must be >= symbol count {n} to guarantee positive widths"
101    );
102
103    debug_assert!(pdf.iter().all(|&p| p.is_finite() && p > 0.0));
104
105    let scale = total as f64;
106    let mut acc = 0.0f64;
107    let mut prev = 0u32;
108
109    unsafe {
110        *cdf_out.get_unchecked_mut(0) = 0;
111        for i in 0..n {
112            acc += *pdf.get_unchecked(i);
113
114            let next = (acc * scale) as u32;
115            if next <= prev || next > total {
116                quantize_pdf_to_integer_cdf_positive_width_dense(pdf, total, cdf_out);
117                return;
118            }
119            *cdf_out.get_unchecked_mut(i + 1) = next;
120            prev = next;
121        }
122        *cdf_out.get_unchecked_mut(n) = total;
123    }
124}
125
126#[inline]
127fn quantize_pdf_to_integer_cdf_positive_width(pdf: &[f64], total: u32, cdf_out: &mut [u32]) {
128    let n = pdf.len();
129    let scale = total as f64;
130    let remaining_extra = total - (n as u32);
131    let mut acc = 0.0f64;
132    let mut extra = 0u32;
133
134    unsafe {
135        *cdf_out.get_unchecked_mut(0) = 0;
136        for i in 0..n {
137            let p = *pdf.get_unchecked(i);
138            if p.is_finite() && p > 0.0 {
139                acc += p;
140            }
141
142            let raw_extra = ((acc * scale) as u32).saturating_sub((i as u32) + 1);
143            let capped_extra = raw_extra.min(remaining_extra);
144            if capped_extra > extra {
145                extra = capped_extra;
146            }
147            *cdf_out.get_unchecked_mut(i + 1) = extra + (i as u32) + 1;
148        }
149        *cdf_out.get_unchecked_mut(n) = total;
150    }
151}
152
153#[inline]
154fn quantize_pdf_to_integer_cdf_positive_width_dense(pdf: &[f64], total: u32, cdf_out: &mut [u32]) {
155    let n = pdf.len();
156    let scale = total as f64;
157    let remaining_extra = total - (n as u32);
158    let mut acc = 0.0f64;
159    let mut extra = 0u32;
160
161    unsafe {
162        *cdf_out.get_unchecked_mut(0) = 0;
163        for i in 0..n {
164            acc += *pdf.get_unchecked(i);
165
166            let raw_extra = ((acc * scale) as u32).saturating_sub((i as u32) + 1);
167            let capped_extra = raw_extra.min(remaining_extra);
168            if capped_extra > extra {
169                extra = capped_extra;
170            }
171            *cdf_out.get_unchecked_mut(i + 1) = extra + (i as u32) + 1;
172        }
173        *cdf_out.get_unchecked_mut(n) = total;
174    }
175}
176
177// Re-export main types
178pub use ac::{
179    ArithmeticDecoder, ArithmeticEncoder, CDF_TOTAL, p_min, quantize_pdf_to_cdf,
180    quantize_pdf_to_cdf_inplace, quantize_pdf_to_cdf_with_buffer, softmax_pdf, softmax_pdf_floor,
181    softmax_pdf_floor_inplace, softmax_pdf_inplace,
182};
183
184pub use rans::{
185    ANS_BITS, ANS_HIGH, ANS_LOW, ANS_TOTAL, BLOCK_SIZE, BlockedRansDecoder, BlockedRansEncoder,
186    Cdf, RansDecoder, RansEncoder, cdf_for_symbol, quantize_pdf_to_rans_cdf,
187    quantize_pdf_to_rans_cdf_with_buffer,
188};
189
190// Interleaved multi-lane rANS types
191pub use rans::{RANS_LANES, SimdRansDecoder, SimdRansEncoder};