infotheory/aixi/
vm_nyx.rs

1//! High-performance VM-backed AIXI environment using nyx-lite (Firecracker).
2//!
3//! This module provides a VM environment implementation built on top of nyx-lite,
4//! enabling high-frequency snapshot-based resets for fast experimentation (hardware and
5//! guest behavior dependent).
6//!
7//! ## Architecture
8//!
9//! The environment uses Firecracker's KVM-based microVM with nyx-lite's incremental
10//! snapshot and reset capabilities. Communication with the guest occurs via:
11//!
12//! 1. **Shared Memory**: Zero-copy data transfer between host and guest
13//! 2. **Hypercalls**: Control plane communication (snapshot, done, etc.)
14//! 3. **Serial PTY**: Optional console I/O for simpler protocols
15//!
16//! ## Design Principles
17//!
18//! - **Universal**: Not biased towards any specific use case (fuzzing, etc.)
19//! - **High Performance**: Leverages incremental snapshots and dirty page tracking
20//! - **Configurable**: Pluggable reward policies, action sources, observation modes
21//! - **Information-Theoretic**: Built-in support for entropy-based metrics
22
23use crate::aixi::common::{Action, PerceptVal, RandomGenerator, Reward};
24use crate::aixi::environment::Environment;
25#[cfg(feature = "backend-rwkv")]
26use crate::coders::softmax_pdf_inplace;
27#[cfg(feature = "backend-mamba")]
28use crate::mambazip;
29#[cfg(feature = "backend-mamba")]
30use crate::mambazip::Compressor as MambaCompressor;
31use crate::mixture::OnlineBytePredictor;
32use crate::rosaplus::RosaPlus;
33#[cfg(feature = "backend-rwkv")]
34use crate::rwkvzip::Compressor;
35use crate::zpaq_rate::ZpaqRateModel;
36use crate::{
37    RateBackend, cross_entropy_rate_backend, entropy_rate_backend, marginal_entropy_bytes,
38};
39use serde_json::Value;
40use std::borrow::Cow;
41use std::fs::OpenOptions;
42use std::io::Write;
43use std::path::Path;
44use std::sync::Arc;
45use std::time::{Duration, Instant};
46
47// Re-export nyx-lite types for external use
48pub use nyx_lite::mem::SharedMemoryRegion;
49pub use nyx_lite::snapshot::NyxSnapshot;
50pub use nyx_lite::{ExitReason, NyxVM, SharedMemoryPolicy};
51
52// ============================================================================
53// Encoding Types
54// ============================================================================
55
56/// Payload encoding for wire protocol.
57#[derive(Clone, Copy, Debug, Eq, PartialEq)]
58pub enum PayloadEncoding {
59    /// Treat payloads as UTF-8/text bytes.
60    Utf8,
61    /// Treat payloads as hexadecimal text.
62    Hex,
63}
64
65impl PayloadEncoding {
66    /// Parse a payload encoding label.
67    ///
68    /// Accepted values are `utf8`, `text`, and `hex`.
69    #[allow(clippy::should_implement_trait)]
70    pub fn from_str(s: &str) -> Option<Self> {
71        Self::parse(s)
72    }
73
74    /// Parse a payload encoding label.
75    ///
76    /// Accepted values are `utf8`, `text`, and `hex`.
77    pub fn parse(s: &str) -> Option<Self> {
78        match s {
79            "utf8" | "text" => Some(Self::Utf8),
80            "hex" => Some(Self::Hex),
81            _ => None,
82        }
83    }
84
85    /// Decode a wire payload string into raw bytes using this encoding.
86    pub fn decode(self, s: &str) -> anyhow::Result<Vec<u8>> {
87        match self {
88            Self::Utf8 => Ok(s.as_bytes().to_vec()),
89            Self::Hex => hex_decode(s),
90        }
91    }
92
93    /// Encode raw bytes for transport over the configured wire protocol.
94    pub fn encode(self, bytes: &[u8]) -> String {
95        match self {
96            Self::Utf8 => String::from_utf8_lossy(bytes).to_string(),
97            Self::Hex => hex_encode(bytes),
98        }
99    }
100}
101
102impl std::str::FromStr for PayloadEncoding {
103    type Err = &'static str;
104
105    fn from_str(s: &str) -> Result<Self, Self::Err> {
106        Self::parse(s).ok_or("unknown payload encoding")
107    }
108}
109
110fn hex_decode(s: &str) -> anyhow::Result<Vec<u8>> {
111    let mut out = Vec::with_capacity(s.len() / 2);
112    let mut buf = 0u8;
113    let mut high = true;
114    for c in s.bytes() {
115        let v = match c {
116            b'0'..=b'9' => c - b'0',
117            b'a'..=b'f' => c - b'a' + 10,
118            b'A'..=b'F' => c - b'A' + 10,
119            b' ' | b'\n' | b'\r' | b'\t' => continue,
120            _ => return Err(anyhow::anyhow!("invalid hex byte: {}", c as char)),
121        };
122        if high {
123            buf = v << 4;
124            high = false;
125        } else {
126            buf |= v;
127            out.push(buf);
128            high = true;
129        }
130    }
131    if !high {
132        return Err(anyhow::anyhow!("hex string has odd length"));
133    }
134    Ok(out)
135}
136
137fn resolve_relative_path(base: &Path, path: &str) -> String {
138    let p = Path::new(path);
139    if p.is_absolute() {
140        path.to_string()
141    } else {
142        base.join(p).to_string_lossy().to_string()
143    }
144}
145
146fn rewrite_firecracker_config_paths(config_path: &str, raw_json: &str) -> anyhow::Result<String> {
147    let base_dir = Path::new(config_path)
148        .parent()
149        .unwrap_or_else(|| Path::new("."));
150    let mut v: Value = serde_json::from_str(raw_json)?;
151
152    if let Some(boot) = v.get_mut("boot-source") {
153        if let Some(path_val) = boot.get_mut("kernel_image_path")
154            && let Some(path_str) = path_val.as_str()
155        {
156            let resolved = resolve_relative_path(base_dir, path_str);
157            *path_val = Value::String(resolved);
158        }
159        if let Some(path_val) = boot.get_mut("initrd_path")
160            && let Some(path_str) = path_val.as_str()
161        {
162            let resolved = resolve_relative_path(base_dir, path_str);
163            *path_val = Value::String(resolved);
164        }
165    }
166
167    if let Some(drives) = v.get_mut("drives").and_then(|d| d.as_array_mut()) {
168        for drive in drives {
169            if let Some(path_val) = drive.get_mut("path_on_host")
170                && let Some(path_str) = path_val.as_str()
171            {
172                let resolved = resolve_relative_path(base_dir, path_str);
173                *path_val = Value::String(resolved);
174            }
175        }
176    }
177
178    Ok(serde_json::to_string(&v)?)
179}
180
181fn hex_encode(bytes: &[u8]) -> String {
182    let mut s = String::with_capacity(bytes.len() * 2);
183    for b in bytes {
184        s.push(hex_digit(b >> 4));
185        s.push(hex_digit(b & 0x0F));
186    }
187    s
188}
189
190fn hex_digit(v: u8) -> char {
191    match v {
192        0..=9 => (b'0' + v) as char,
193        _ => (b'a' + (v - 10)) as char,
194    }
195}
196
197// ============================================================================
198// Guest Communication Protocol
199// ============================================================================
200
201/// Hypercall identifiers (must match guest implementation).
202/// These are exported for use by custom guest programs.
203#[allow(dead_code)]
204pub const HYPERCALL_EXECDONE: u64 = 0x656e6f6463657865; // "execdone"
205/// Guest requested host-side snapshot operation.
206#[allow(dead_code)]
207pub const HYPERCALL_SNAPSHOT: u64 = 0x746f687370616e73; // "snapshot"
208/// Guest announced nyx-lite protocol/version handshake.
209#[allow(dead_code)]
210pub const HYPERCALL_NYX_LITE: u64 = 0x6574696c2d78796e; // "nyx-lite"
211/// Guest requested shared memory initialization/refresh.
212#[allow(dead_code)]
213pub const HYPERCALL_SHAREMEM: u64 = 0x6d656d6572616873; // "sharemem"
214/// Guest emitted a debug-print hypercall payload.
215#[allow(dead_code)]
216pub const HYPERCALL_DBGPRINT: u64 = 0x746e697270676264; // "dbgprint"
217
218const SHARED_ACTION_LEN_OFFSET: u64 = 0;
219const SHARED_RESP_LEN_OFFSET: u64 = 8;
220const SHARED_PAYLOAD_OFFSET: u64 = 16;
221
222/// Protocol configuration for structured communication.
223#[derive(Clone, Debug)]
224pub struct NyxProtocolConfig {
225    /// Prefix for action messages.
226    pub action_prefix: String,
227    /// Suffix for action messages.
228    pub action_suffix: String,
229    /// Prefix for observation responses.
230    pub obs_prefix: String,
231    /// Prefix for reward responses.
232    pub rew_prefix: String,
233    /// Prefix for done indicator.
234    pub done_prefix: String,
235    /// Prefix for data payloads.
236    pub data_prefix: String,
237    /// Wire encoding for payloads.
238    pub wire_encoding: PayloadEncoding,
239}
240
241impl Default for NyxProtocolConfig {
242    fn default() -> Self {
243        Self {
244            action_prefix: "ACT ".to_string(),
245            action_suffix: "\n".to_string(),
246            obs_prefix: "OBS ".to_string(),
247            rew_prefix: "REW ".to_string(),
248            done_prefix: "DONE ".to_string(),
249            data_prefix: "DATA ".to_string(),
250            wire_encoding: PayloadEncoding::Hex,
251        }
252    }
253}
254
255// ============================================================================
256// Action Configuration
257// ============================================================================
258
259/// A single action specification.
260#[derive(Clone, Debug)]
261pub struct NyxActionSpec {
262    /// Optional human-readable name.
263    pub name: Option<String>,
264    /// Raw payload bytes to send.
265    pub payload: Vec<u8>,
266}
267
268/// Fuzzing mutator types.
269#[derive(Clone, Debug)]
270pub enum FuzzMutator {
271    /// Flip one random bit.
272    FlipBit,
273    /// Flip one full byte.
274    FlipByte,
275    /// Insert a random byte at a random position.
276    InsertByte,
277    /// Delete one random byte.
278    DeleteByte,
279    /// Splice bytes from an existing seed input.
280    SpliceSeed,
281    /// Replace the working input with a seed input.
282    ResetSeed,
283    /// Apply a short sequence of random mutations.
284    Havoc,
285}
286
287/// Fuzzing configuration for action generation.
288#[derive(Clone, Debug)]
289pub struct NyxFuzzConfig {
290    /// Corpus used for seed/reset/splice operations.
291    pub seeds: Vec<Vec<u8>>,
292    /// Mutator set available for action generation.
293    pub mutators: Vec<FuzzMutator>,
294    /// Minimum generated action length.
295    pub min_len: usize,
296    /// Maximum generated action length.
297    pub max_len: usize,
298    /// Optional dictionary tokens for insertion/splicing.
299    pub dictionary: Vec<Vec<u8>>,
300    /// Deterministic RNG seed for mutation sampling.
301    pub rng_seed: u64,
302}
303
304/// Source of actions for the environment.
305#[derive(Clone, Debug)]
306pub enum NyxActionSource {
307    /// Fixed set of action payloads.
308    Literal(Vec<NyxActionSpec>),
309    /// Mutation-based action generation.
310    Fuzz(NyxFuzzConfig),
311}
312
313// ============================================================================
314// Observation Configuration
315// ============================================================================
316
317/// How observations are derived from guest output.
318#[derive(Clone, Copy, Debug)]
319pub enum NyxObservationPolicy {
320    /// Parse structured OBS/REW/DONE messages from guest.
321    FromGuest,
322    /// Hash raw output to derive observation.
323    OutputHash,
324    /// Use raw output bytes as observation stream.
325    RawOutput,
326    /// Use shared memory contents as observation.
327    SharedMemory,
328}
329
330/// Stream normalization mode.
331#[derive(Clone, Copy, Debug)]
332pub enum NyxObservationStreamMode {
333    /// Pad short streams, truncate long ones.
334    PadTruncate,
335    /// Only pad short streams.
336    Pad,
337    /// Only truncate long streams.
338    Truncate,
339}
340
341// ============================================================================
342// Reward Configuration
343// ============================================================================
344
345/// How rewards are computed.
346#[derive(Clone)]
347pub enum NyxRewardPolicy {
348    /// Parse reward from guest response.
349    FromGuest,
350    /// Pattern matching on output.
351    Pattern {
352        /// Substring/pattern tested against guest output.
353        pattern: String,
354        /// Reward returned when the pattern does not match.
355        base_reward: i64,
356        /// Additional reward added when the pattern matches.
357        bonus_reward: i64,
358    },
359    /// Custom reward function (callback-based).
360    Custom(Arc<dyn Fn(&NyxStepResult) -> Reward + Send + Sync>),
361}
362
363/// Optional reward shaping (additive to base reward).
364#[derive(Clone, Debug)]
365pub enum NyxRewardShaping {
366    /// Entropy reduction vs baseline.
367    EntropyReduction {
368        /// Reference bytes used as baseline data distribution.
369        baseline_bytes: Vec<u8>,
370        /// Max order passed to entropy estimators.
371        max_order: i64,
372        /// Scaling factor applied to the shaping term.
373        scale: f64,
374        /// Optional additive bonus when guest crashes.
375        crash_bonus: Option<i64>,
376        /// Optional additive bonus when guest times out.
377        timeout_bonus: Option<i64>,
378    },
379    /// Entropy of trace data (online learning).
380    TraceEntropy {
381        /// Max order passed to trace entropy estimation.
382        max_order: i64,
383        /// Scaling factor applied to the shaping term.
384        scale: f64,
385        /// If true, normalize by trace length.
386        normalize: bool,
387    },
388}
389
390impl std::fmt::Debug for NyxRewardPolicy {
391    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
392        match self {
393            Self::FromGuest => write!(f, "FromGuest"),
394            Self::Pattern {
395                pattern,
396                base_reward,
397                bonus_reward,
398            } => f
399                .debug_struct("Pattern")
400                .field("pattern", pattern)
401                .field("base_reward", base_reward)
402                .field("bonus_reward", bonus_reward)
403                .finish(),
404            Self::Custom(_) => write!(f, "Custom(<fn>)"),
405        }
406    }
407}
408
409// ============================================================================
410// Action Filtering
411// ============================================================================
412
413/// Information-theoretic action filtering.
414#[derive(Clone, Debug)]
415pub struct NyxActionFilter {
416    /// Minimum entropy threshold.
417    pub min_entropy: Option<f64>,
418    /// Maximum entropy threshold.
419    pub max_entropy: Option<f64>,
420    /// Minimum intrinsic dependence.
421    pub min_intrinsic_dependence: Option<f64>,
422    /// Minimum novelty (cross-entropy vs prior).
423    pub min_novelty: Option<f64>,
424    /// Prior corpus for novelty computation.
425    pub novelty_prior: Option<Vec<u8>>,
426    /// Max order for entropy estimation.
427    pub max_order: i64,
428    /// Reward to assign when action is rejected.
429    pub reject_reward: Option<i64>,
430}
431
432// ============================================================================
433// Trace Configuration
434// ============================================================================
435
436/// Configuration for trace collection and analysis.
437#[derive(Clone, Debug)]
438pub struct NyxTraceConfig {
439    /// Shared memory region name for trace data.
440    pub shared_region_name: Option<String>,
441    /// Maximum bytes to collect per step.
442    pub max_bytes: usize,
443    /// Reset trace model on episode boundary.
444    pub reset_on_episode: bool,
445}
446
447// ============================================================================
448// Main Configuration
449// ============================================================================
450
451/// Complete configuration for the nyx-lite VM environment.
452#[derive(Clone)]
453pub struct NyxVmConfig {
454    /// Path to Firecracker JSON config.
455    pub firecracker_config: String,
456    /// Instance ID for the VM.
457    pub instance_id: String,
458
459    // Shared memory configuration
460    /// Name of the shared memory region for communication.
461    pub shared_region_name: String,
462    /// Size of the shared memory region.
463    pub shared_region_size: usize,
464    /// Shared memory policy (snapshot vs preserve).
465    pub shared_memory_policy: SharedMemoryPolicy,
466
467    // Timing configuration
468    /// Timeout for each step.
469    pub step_timeout: Duration,
470    /// Timeout for initial boot.
471    pub boot_timeout: Duration,
472
473    // Episode configuration
474    /// Number of steps per episode.
475    pub episode_steps: usize,
476    /// Cost subtracted from reward each step.
477    pub step_cost: i64,
478
479    // Observation configuration
480    /// Observation derivation policy.
481    pub observation_policy: NyxObservationPolicy,
482    /// Bits per observation symbol.
483    pub observation_bits: usize,
484    /// Number of observation symbols per action.
485    pub observation_stream_len: usize,
486    /// Stream normalization mode.
487    pub observation_stream_mode: NyxObservationStreamMode,
488    /// Padding byte for short streams.
489    pub observation_pad_byte: u8,
490
491    // Reward configuration
492    /// Bits for reward encoding.
493    pub reward_bits: usize,
494    /// Reward computation policy.
495    pub reward_policy: NyxRewardPolicy,
496    /// Optional reward shaping (additive; non-canonical).
497    pub reward_shaping: Option<NyxRewardShaping>,
498
499    // Action configuration
500    /// Source of actions.
501    pub action_source: NyxActionSource,
502    /// Optional action filter.
503    pub action_filter: Option<NyxActionFilter>,
504
505    // Protocol configuration
506    /// Wire protocol for structured communication.
507    pub protocol: NyxProtocolConfig,
508
509    // Statistics backend
510    /// Backend for entropy estimation.
511    pub stats_backend: RateBackend,
512
513    // Trace configuration
514    /// Optional trace collection.
515    pub trace: Option<NyxTraceConfig>,
516
517    // Debug mode
518    /// Enable verbose VM/protocol diagnostics.
519    pub debug_mode: bool,
520
521    // Crash logging
522    /// Path to log crashes/interesting behaviors (JSONL format).
523    pub crash_log: Option<String>,
524}
525
526impl Default for NyxVmConfig {
527    fn default() -> Self {
528        Self {
529            firecracker_config: String::new(),
530            instance_id: "aixi-nyx".to_string(),
531            shared_region_name: "shared".to_string(),
532            shared_region_size: 4096,
533            shared_memory_policy: SharedMemoryPolicy::Snapshot,
534            step_timeout: Duration::from_millis(100),
535            boot_timeout: Duration::from_secs(30),
536            episode_steps: 100,
537            step_cost: 0,
538            observation_policy: NyxObservationPolicy::SharedMemory,
539            observation_bits: 8,
540            observation_stream_len: 64,
541            observation_stream_mode: NyxObservationStreamMode::PadTruncate,
542            observation_pad_byte: 0,
543            reward_bits: 8,
544            reward_policy: NyxRewardPolicy::FromGuest,
545            reward_shaping: None,
546            action_source: NyxActionSource::Literal(vec![]),
547            action_filter: None,
548            protocol: NyxProtocolConfig::default(),
549            stats_backend: RateBackend::default(),
550            trace: None,
551            debug_mode: false,
552            crash_log: None,
553        }
554    }
555}
556
557// ============================================================================
558// Step Result
559// ============================================================================
560
561/// Result of a single environment step.
562#[derive(Clone, Debug)]
563pub struct NyxStepResult {
564    /// Exit reason from the VM.
565    pub exit_reason: NyxExitKind,
566    /// Raw output data from guest.
567    pub output: Vec<u8>,
568    /// Parsed observation (if any).
569    pub parsed_obs: Option<u64>,
570    /// Parsed reward (if any).
571    pub parsed_rew: Option<i64>,
572    /// Done flag.
573    pub done: bool,
574    /// Trace data (if collected).
575    pub trace_data: Vec<u8>,
576    /// Shared memory contents snapshot.
577    pub shared_memory: Vec<u8>,
578}
579
580/// Simplified exit reason categories.
581#[derive(Clone, Debug)]
582pub enum NyxExitKind {
583    /// Guest terminated normally with an application-defined code.
584    ExecDone(u64),
585    /// Step timed out before a terminal signal/response.
586    Timeout,
587    /// VM reported a shutdown event.
588    Shutdown,
589    /// Raw hypercall event with integer arguments.
590    Hypercall {
591        /// Hypercall identifier/magic value.
592        code: u64,
593        /// Hypercall argument 1.
594        arg1: u64,
595        /// Hypercall argument 2.
596        arg2: u64,
597        /// Hypercall argument 3.
598        arg3: u64,
599        /// Hypercall argument 4.
600        arg4: u64,
601    },
602    /// Debug string emitted by guest/host bridge.
603    DebugPrint(String),
604    /// Breakpoint/trap-like stop event.
605    Breakpoint,
606    /// Uncategorized exit event represented as text.
607    Other(String),
608}
609
610impl From<ExitReason> for NyxExitKind {
611    fn from(reason: ExitReason) -> Self {
612        match reason {
613            ExitReason::ExecDone(code) => Self::ExecDone(code),
614            ExitReason::Timeout => Self::Timeout,
615            ExitReason::Shutdown => Self::Shutdown,
616            ExitReason::Hypercall(r8, r9, r10, r11, r12) => Self::Hypercall {
617                code: r8,
618                arg1: r9,
619                arg2: r10,
620                arg3: r11,
621                arg4: r12,
622            },
623            ExitReason::DebugPrint(s) => Self::DebugPrint(s),
624            ExitReason::Breakpoint => Self::Breakpoint,
625            ExitReason::RequestSnapshot => Self::Other("RequestSnapshot".to_string()),
626            ExitReason::SharedMem(name, _, _) => Self::Other(format!("SharedMem({})", name)),
627            ExitReason::SingleStep => Self::Other("SingleStep".to_string()),
628            ExitReason::Interrupted => Self::Other("Interrupted".to_string()),
629            ExitReason::HWBreakpoint(n) => Self::Other(format!("HWBreakpoint({})", n)),
630            ExitReason::BadMemoryAccess(_) => Self::Other("BadMemoryAccess".to_string()),
631        }
632    }
633}
634
635// ============================================================================
636// Trace Model
637// ============================================================================
638
639/// Predictive model for trace-based reward computation.
640enum TraceModel {
641    Rosa {
642        model: RosaPlus,
643        max_order: i64,
644    },
645    Ctw {
646        tree: crate::ctw::ContextTree,
647    },
648    FacCtw {
649        tree: crate::ctw::FacContextTree,
650        bits_per_symbol: usize,
651    },
652    #[cfg(feature = "backend-mamba")]
653    Mamba {
654        compressor: MambaCompressor,
655        primed: bool,
656    },
657    Rwkv7 {
658        compressor: Compressor,
659        primed: bool,
660    },
661    Zpaq {
662        model: ZpaqRateModel,
663    },
664    Mixture {
665        backend: RateBackend,
666        model: crate::mixture::RateBackendPredictor,
667    },
668}
669
670impl TraceModel {
671    fn predictor_backed(backend: RateBackend) -> Self {
672        let mut model =
673            crate::mixture::RateBackendPredictor::from_backend(backend.clone(), -1, 2f64.powi(-24));
674        model
675            .begin_stream(None)
676            .unwrap_or_else(|e| panic!("predictor-backed stream init failed: {e}"));
677        TraceModel::Mixture { backend, model }
678    }
679
680    fn new(backend: &RateBackend, max_order: i64) -> Self {
681        match backend {
682            RateBackend::RosaPlus => {
683                let mut model = RosaPlus::new(max_order, false, 0, 42);
684                model.build_lm_full_bytes_no_finalize_endpos();
685                TraceModel::Rosa { model, max_order }
686            }
687            #[cfg(feature = "backend-mamba")]
688            RateBackend::Mamba { model } => {
689                let compressor = MambaCompressor::new_from_model(model.clone());
690                TraceModel::Mamba {
691                    compressor,
692                    primed: false,
693                }
694            }
695            #[cfg(feature = "backend-mamba")]
696            RateBackend::MambaMethod { method } => {
697                let compressor = MambaCompressor::new_from_method(method)
698                    .unwrap_or_else(|e| panic!("invalid mamba method for vm trace model: {e}"));
699                TraceModel::Mamba {
700                    compressor,
701                    primed: false,
702                }
703            }
704            RateBackend::Rwkv7 { model } => {
705                let compressor = Compressor::new_from_model(model.clone());
706                TraceModel::Rwkv7 {
707                    compressor,
708                    primed: false,
709                }
710            }
711            RateBackend::Rwkv7Method { method } => {
712                let compressor = Compressor::new_from_method(method)
713                    .unwrap_or_else(|e| panic!("invalid rwkv7 method for vm trace model: {e}"));
714                TraceModel::Rwkv7 {
715                    compressor,
716                    primed: false,
717                }
718            }
719            RateBackend::Zpaq { method } => TraceModel::Zpaq {
720                model: ZpaqRateModel::new(method.clone(), 2f64.powi(-24)),
721            },
722            RateBackend::Mixture { .. }
723            | RateBackend::Particle { .. }
724            | RateBackend::Match { .. }
725            | RateBackend::SparseMatch { .. }
726            | RateBackend::Ppmd { .. }
727            | RateBackend::Calibrated { .. } => TraceModel::predictor_backed(backend.clone()),
728            RateBackend::Ctw { depth } => TraceModel::Ctw {
729                tree: crate::ctw::ContextTree::new(*depth),
730            },
731            RateBackend::FacCtw {
732                base_depth,
733                num_percept_bits: _,
734                encoding_bits,
735            } => {
736                let bits_per_symbol = (*encoding_bits).clamp(1, 8);
737                TraceModel::FacCtw {
738                    tree: crate::ctw::FacContextTree::new(*base_depth, bits_per_symbol),
739                    bits_per_symbol,
740                }
741            }
742        }
743    }
744
745    fn reset(&mut self) {
746        match self {
747            TraceModel::Rosa { model, max_order } => {
748                let mut fresh = RosaPlus::new(*max_order, false, 0, 42);
749                fresh.build_lm_full_bytes_no_finalize_endpos();
750                *model = fresh;
751            }
752            TraceModel::Ctw { tree } => tree.clear(),
753            TraceModel::FacCtw { tree, .. } => tree.clear(),
754            #[cfg(feature = "backend-mamba")]
755            TraceModel::Mamba { compressor, primed } => {
756                compressor.state.reset();
757                *primed = false;
758            }
759            TraceModel::Rwkv7 { compressor, primed } => {
760                compressor.state.reset();
761                *primed = false;
762            }
763            TraceModel::Zpaq { model } => {
764                model.reset();
765            }
766            TraceModel::Mixture { backend, model } => {
767                *model = crate::mixture::RateBackendPredictor::from_backend(
768                    backend.clone(),
769                    -1,
770                    2f64.powi(-24),
771                );
772                model
773                    .begin_stream(None)
774                    .unwrap_or_else(|e| panic!("mixture stream init failed: {e}"));
775            }
776        }
777    }
778
779    /// Update the model with new data and return the surprise (bits).
780    fn update_and_score(&mut self, data: &[u8]) -> f64 {
781        if data.is_empty() {
782            return 0.0;
783        }
784        match self {
785            TraceModel::Rosa { model, .. } => {
786                let mut bits = 0.0;
787                for &b in data {
788                    let p = model.prob_for_last(b as u32).max(1e-12);
789                    bits -= p.log2();
790                    model.train_byte(b);
791                }
792                bits
793            }
794            TraceModel::Ctw { tree } => {
795                let log_before = tree.get_log_block_probability();
796                for &b in data {
797                    for i in (0..8).rev() {
798                        tree.update(((b >> i) & 1) == 1);
799                    }
800                }
801                let log_after = tree.get_log_block_probability();
802                let log_delta = log_after - log_before;
803                -log_delta / std::f64::consts::LN_2
804            }
805            TraceModel::FacCtw {
806                tree,
807                bits_per_symbol,
808            } => {
809                let log_before = tree.get_log_block_probability();
810                for &b in data {
811                    for i in 0..*bits_per_symbol {
812                        tree.update(((b >> i) & 1) == 1, i);
813                    }
814                }
815                let log_after = tree.get_log_block_probability();
816                let log_delta = log_after - log_before;
817                -log_delta / std::f64::consts::LN_2
818            }
819            #[cfg(feature = "backend-mamba")]
820            TraceModel::Mamba { compressor, primed } => {
821                if !*primed {
822                    let bias = compressor.online_bias_snapshot();
823                    let logits =
824                        compressor
825                            .model
826                            .forward(&mut compressor.scratch, 0, &mut compressor.state);
827                    mambazip::Compressor::logits_to_pdf(
828                        logits,
829                        bias.as_deref(),
830                        &mut compressor.pdf_buffer,
831                    );
832                    *primed = true;
833                }
834                let mut bits = 0.0;
835                for &b in data {
836                    let p = compressor.pdf_buffer[b as usize].max(1e-12);
837                    bits -= p.log2();
838                    let bias = compressor.online_bias_snapshot();
839                    let logits = compressor.model.forward(
840                        &mut compressor.scratch,
841                        b as u32,
842                        &mut compressor.state,
843                    );
844                    mambazip::Compressor::logits_to_pdf(
845                        logits,
846                        bias.as_deref(),
847                        &mut compressor.pdf_buffer,
848                    );
849                }
850                bits
851            }
852            TraceModel::Rwkv7 { compressor, primed } => {
853                if !*primed {
854                    let vocab_size = compressor.vocab_size();
855                    let logits =
856                        compressor
857                            .model
858                            .forward(&mut compressor.scratch, 0, &mut compressor.state);
859                    softmax_pdf_inplace(logits, vocab_size, &mut compressor.pdf_buffer);
860                    *primed = true;
861                }
862                let mut bits = 0.0;
863                let vocab_size = compressor.vocab_size();
864                for &b in data {
865                    let p = compressor.pdf_buffer[b as usize].max(1e-12);
866                    bits -= p.log2();
867                    let logits = compressor.model.forward(
868                        &mut compressor.scratch,
869                        b as u32,
870                        &mut compressor.state,
871                    );
872                    softmax_pdf_inplace(logits, vocab_size, &mut compressor.pdf_buffer);
873                }
874                bits
875            }
876            TraceModel::Zpaq { model } => model.update_and_score(data),
877            TraceModel::Mixture { model, .. } => {
878                let mut bits = 0.0;
879                for &b in data {
880                    let logp = model.log_prob(b);
881                    bits -= logp / std::f64::consts::LN_2;
882                    model.update(b);
883                }
884                bits
885            }
886        }
887    }
888}
889
890// ============================================================================
891// Fuzz State
892// ============================================================================
893
894struct FuzzState {
895    current: Vec<u8>,
896    rng: RandomGenerator,
897}
898
899// ============================================================================
900// NyxVmEnvironment
901// ============================================================================
902
903/// High-performance VM environment using nyx-lite.
904pub struct NyxVmEnvironment {
905    /// Configuration.
906    config: NyxVmConfig,
907    /// The nyx-lite VM instance.
908    vm: NyxVM,
909    /// Base snapshot for episode resets.
910    base_snapshot: Option<Arc<NyxSnapshot>>,
911    /// Shared memory virtual address in guest.
912    shared_vaddr: Option<u64>,
913    /// CR3 used when shared memory was registered.
914    shared_cr3: Option<u64>,
915    /// Trace model for entropy-based rewards.
916    trace_model: Option<TraceModel>,
917    /// Baseline entropy for entropy reduction rewards.
918    baseline_entropy: Option<f64>,
919    /// Effective reward shaping policy (additive).
920    reward_shaping: Option<NyxRewardShaping>,
921    /// Fuzzing state.
922    fuzz_state: Option<FuzzState>,
923
924    // Current step state
925    /// Current observation.
926    obs: PerceptVal,
927    /// Current reward.
928    rew: Reward,
929    /// Current observation stream.
930    obs_stream: Vec<PerceptVal>,
931    /// Step within current episode.
932    step_in_episode: usize,
933    /// Whether the environment needs reset.
934    needs_reset: bool,
935    /// Whether the VM has been initialized.
936    initialized: bool,
937}
938
939impl NyxVmEnvironment {
940    /// Creates a new NyxVmEnvironment with the given configuration.
941    pub fn new(config: NyxVmConfig) -> anyhow::Result<Self> {
942        // Validate configuration
943        if config.firecracker_config.is_empty() {
944            return Err(anyhow::anyhow!("firecracker_config path must be set"));
945        }
946        if config.episode_steps == 0 {
947            return Err(anyhow::anyhow!("episode_steps must be > 0"));
948        }
949        if matches!(config.observation_policy, NyxObservationPolicy::RawOutput)
950            && config.observation_stream_len == 0
951        {
952            return Err(anyhow::anyhow!(
953                "observation_stream_len must be > 0 for RawOutput policy"
954            ));
955        }
956
957        // Load Firecracker config and resolve relative paths
958        let fc_config_raw = std::fs::read_to_string(&config.firecracker_config)
959            .map_err(|e| anyhow::anyhow!("Failed to read firecracker config: {}", e))?;
960        let fc_config =
961            rewrite_firecracker_config_paths(&config.firecracker_config, &fc_config_raw)
962                .map_err(|e| anyhow::anyhow!("Failed to parse firecracker config: {}", e))?;
963
964        // Create the VM
965        let vm = NyxVM::new(config.instance_id.clone(), &fc_config);
966
967        // Initialize reward shaping
968        let reward_shaping = config.reward_shaping.clone();
969
970        if matches!(reward_shaping, Some(NyxRewardShaping::TraceEntropy { .. }))
971            && config.trace.is_none()
972        {
973            return Err(anyhow::anyhow!(
974                "vm_trace must be configured for vm_reward_shaping.mode=trace-entropy"
975            ));
976        }
977
978        // Initialize trace model if needed
979        let trace_model = match &reward_shaping {
980            Some(NyxRewardShaping::TraceEntropy { max_order, .. }) => {
981                Some(TraceModel::new(&config.stats_backend, *max_order))
982            }
983            _ => None,
984        };
985
986        // Compute baseline entropy if needed
987        let baseline_entropy = match &reward_shaping {
988            Some(NyxRewardShaping::EntropyReduction {
989                baseline_bytes,
990                max_order,
991                ..
992            }) => {
993                let h = if *max_order == 0 {
994                    marginal_entropy_bytes(baseline_bytes)
995                } else {
996                    entropy_rate_backend(baseline_bytes, *max_order, &config.stats_backend)
997                };
998                Some(h)
999            }
1000            _ => None,
1001        };
1002
1003        // Initialize fuzz state if needed
1004        let fuzz_state = match &config.action_source {
1005            NyxActionSource::Fuzz(fuzz) => {
1006                if fuzz.seeds.is_empty() {
1007                    return Err(anyhow::anyhow!("Fuzz mode requires at least one seed"));
1008                }
1009                if fuzz.mutators.is_empty() {
1010                    return Err(anyhow::anyhow!("Fuzz mode requires at least one mutator"));
1011                }
1012                let seed = fuzz.seeds[0].clone();
1013                Some(FuzzState {
1014                    current: seed,
1015                    rng: RandomGenerator::new().fork_with(fuzz.rng_seed),
1016                })
1017            }
1018            NyxActionSource::Literal(actions) => {
1019                if actions.is_empty() {
1020                    return Err(anyhow::anyhow!("Literal mode requires at least one action"));
1021                }
1022                None
1023            }
1024        };
1025
1026        let mut env = Self {
1027            config,
1028            vm,
1029            base_snapshot: None,
1030            shared_vaddr: None,
1031            shared_cr3: None,
1032            trace_model,
1033            baseline_entropy,
1034            reward_shaping,
1035            fuzz_state,
1036            obs: 0,
1037            rew: 0,
1038            obs_stream: Vec::new(),
1039            step_in_episode: 0,
1040            needs_reset: true,
1041            initialized: false,
1042        };
1043
1044        // Boot and initialize
1045        env.initialize()?;
1046
1047        Ok(env)
1048    }
1049
1050    /// Initializes the VM by booting to the snapshot point.
1051    fn initialize(&mut self) -> anyhow::Result<()> {
1052        if self.initialized {
1053            return Ok(());
1054        }
1055
1056        if self.config.debug_mode {
1057            eprintln!("[NyxVm] Booting VM...");
1058        }
1059
1060        // Run until we get the shared memory registration
1061        let start = Instant::now();
1062        loop {
1063            if start.elapsed() > self.config.boot_timeout {
1064                return Err(anyhow::anyhow!("Boot timeout waiting for shared memory"));
1065            }
1066
1067            let exit = self.vm.run(Duration::from_secs(1));
1068            match exit {
1069                ExitReason::SharedMem(name, vaddr, size) => {
1070                    if self.config.debug_mode {
1071                        eprintln!(
1072                            "[NyxVm] Shared memory registered: {} @ {:#x} ({} bytes)",
1073                            name, vaddr, size
1074                        );
1075                    }
1076                    if name.trim_end_matches('\0') == self.config.shared_region_name {
1077                        self.shared_vaddr = Some(vaddr);
1078                        self.shared_cr3 = Some(self.vm.sregs().cr3);
1079                        // Register the shared region with the configured policy
1080                        let _ = self.vm.register_shared_region_current(
1081                            vaddr,
1082                            size,
1083                            self.config.shared_memory_policy,
1084                        );
1085                        break;
1086                    }
1087                }
1088                ExitReason::DebugPrint(msg) => {
1089                    if self.config.debug_mode {
1090                        eprintln!("[NyxVm] Guest: {}", msg);
1091                    }
1092                }
1093                ExitReason::Shutdown => {
1094                    return Err(anyhow::anyhow!("VM shut down during boot"));
1095                }
1096                _ => {
1097                    if self.config.debug_mode {
1098                        eprintln!("[NyxVm] Boot exit: {:?}", exit);
1099                    }
1100                    // Continue waiting
1101                }
1102            }
1103        }
1104
1105        // Continue running until snapshot request
1106        loop {
1107            if start.elapsed() > self.config.boot_timeout {
1108                return Err(anyhow::anyhow!("Boot timeout waiting for snapshot request"));
1109            }
1110
1111            let exit = self.vm.run(Duration::from_secs(1));
1112            match exit {
1113                ExitReason::RequestSnapshot => {
1114                    if self.config.debug_mode {
1115                        eprintln!("[NyxVm] Taking base snapshot...");
1116                    }
1117                    self.base_snapshot = Some(self.vm.take_base_snapshot());
1118                    break;
1119                }
1120                ExitReason::DebugPrint(msg) => {
1121                    if self.config.debug_mode {
1122                        eprintln!("[NyxVm] Guest: {}", msg);
1123                    }
1124                }
1125                ExitReason::Shutdown => {
1126                    return Err(anyhow::anyhow!("VM shut down before snapshot"));
1127                }
1128                _ => {
1129                    if self.config.debug_mode {
1130                        eprintln!("[NyxVm] Snapshot wait exit: {:?}", exit);
1131                    }
1132                    // Continue waiting
1133                }
1134            }
1135        }
1136
1137        if self.config.debug_mode {
1138            eprintln!("[NyxVm] Initialization complete");
1139        }
1140
1141        self.initialized = true;
1142        self.needs_reset = false;
1143        Ok(())
1144    }
1145
1146    /// Resets to the base snapshot.
1147    pub fn reset(&mut self) -> anyhow::Result<()> {
1148        let snapshot = self
1149            .base_snapshot
1150            .as_ref()
1151            .ok_or_else(|| anyhow::anyhow!("No base snapshot available"))?
1152            .clone();
1153
1154        self.vm.apply_snapshot(&snapshot);
1155
1156        // Reset trace model if configured
1157        if let Some(trace_cfg) = &self.config.trace
1158            && trace_cfg.reset_on_episode
1159            && let Some(model) = &mut self.trace_model
1160        {
1161            model.reset();
1162        }
1163
1164        self.step_in_episode = 0;
1165        self.needs_reset = false;
1166
1167        Ok(())
1168    }
1169
1170    /// Writes action data to shared memory.
1171    fn write_action_to_shared_memory(&mut self, payload: &[u8]) -> anyhow::Result<()> {
1172        let vaddr = self
1173            .shared_vaddr
1174            .ok_or_else(|| anyhow::anyhow!("Shared memory not initialized"))?;
1175        let cr3 = self
1176            .shared_cr3
1177            .ok_or_else(|| anyhow::anyhow!("Shared memory CR3 not initialized"))?;
1178        let process = self.vm.process_memory(cr3);
1179
1180        // Ensure guest has cleared the previous message length to avoid races.
1181        let wait_start = Instant::now();
1182        loop {
1183            let cur_len = process
1184                .read_u64(vaddr + SHARED_ACTION_LEN_OFFSET)
1185                .unwrap_or(0);
1186            if cur_len == 0 {
1187                break;
1188            }
1189            if wait_start.elapsed() > self.config.step_timeout {
1190                return Err(anyhow::anyhow!("shared buffer busy (len={cur_len})"));
1191            }
1192            std::thread::yield_now();
1193        }
1194
1195        // Write length as first 8 bytes (u64 LE)
1196        let len = payload.len() as u64;
1197        process
1198            .write_u64(vaddr + SHARED_ACTION_LEN_OFFSET, len)
1199            .map_err(|e| anyhow::anyhow!("write len failed: {e}"))?;
1200        let _ = process.write_u64(vaddr + SHARED_RESP_LEN_OFFSET, 0);
1201
1202        // Write payload starting at offset 8
1203        let max_len = self
1204            .config
1205            .shared_region_size
1206            .saturating_sub(SHARED_PAYLOAD_OFFSET as usize);
1207        let write_len = payload.len().min(max_len);
1208        if write_len > 0 {
1209            let _ = process
1210                .write_bytes(vaddr + SHARED_PAYLOAD_OFFSET, &payload[..write_len])
1211                .map_err(|e| anyhow::anyhow!("write payload failed: {e}"))?;
1212        }
1213
1214        if self.config.debug_mode {
1215            let verify = process
1216                .read_u64(vaddr + SHARED_ACTION_LEN_OFFSET)
1217                .unwrap_or(0) as usize;
1218            eprintln!(
1219                "[NyxVm] Wrote action len={}, verified len={}",
1220                write_len, verify
1221            );
1222        }
1223
1224        Ok(())
1225    }
1226
1227    /// Reads response from shared memory.
1228    fn read_shared_memory(&self) -> Vec<u8> {
1229        let Some(vaddr) = self.shared_vaddr else {
1230            return Vec::new();
1231        };
1232        let Some(cr3) = self.shared_cr3 else {
1233            return Vec::new();
1234        };
1235        let process = self.vm.process_memory(cr3);
1236
1237        // Read length from first 8 bytes
1238        let len = process
1239            .read_u64(vaddr + SHARED_RESP_LEN_OFFSET)
1240            .unwrap_or(0) as usize;
1241        let max_len = self
1242            .config
1243            .shared_region_size
1244            .saturating_sub(SHARED_PAYLOAD_OFFSET as usize);
1245        let read_len = len.min(max_len);
1246
1247        if read_len == 0 {
1248            return Vec::new();
1249        }
1250
1251        let mut buf = vec![0u8; read_len];
1252        let _ = process.read_bytes(vaddr + SHARED_PAYLOAD_OFFSET, &mut buf);
1253        buf
1254    }
1255
1256    fn clear_shared_length(&self) {
1257        let (Some(vaddr), Some(cr3)) = (self.shared_vaddr, self.shared_cr3) else {
1258            return;
1259        };
1260        let process = self.vm.process_memory(cr3);
1261        let _ = process.write_u64(vaddr + SHARED_ACTION_LEN_OFFSET, 0);
1262        let _ = process.write_u64(vaddr + SHARED_RESP_LEN_OFFSET, 0);
1263    }
1264
1265    /// Runs a single step, returning detailed results.
1266    pub fn run_step(&mut self, payload: &[u8]) -> anyhow::Result<NyxStepResult> {
1267        // Write action to shared memory
1268        self.write_action_to_shared_memory(payload)?;
1269
1270        // Run the VM until we get a meaningful exit
1271        let start = Instant::now();
1272        let mut output = Vec::new();
1273        let mut trace_data = Vec::new();
1274        let mut parsed_obs = None;
1275        let mut parsed_rew = None;
1276        let mut done = false;
1277        let exit_kind;
1278        let collect_output =
1279            matches!(
1280                self.config.observation_policy,
1281                NyxObservationPolicy::OutputHash | NyxObservationPolicy::RawOutput
1282            ) || matches!(self.config.reward_policy, NyxRewardPolicy::Pattern { .. })
1283                || matches!(
1284                    self.reward_shaping,
1285                    Some(NyxRewardShaping::EntropyReduction { .. })
1286                );
1287
1288        loop {
1289            let remaining = self
1290                .config
1291                .step_timeout
1292                .checked_sub(start.elapsed())
1293                .unwrap_or(Duration::ZERO);
1294
1295            if remaining.is_zero() {
1296                exit_kind = NyxExitKind::Timeout;
1297                break;
1298            }
1299
1300            let exit = self.vm.run(remaining);
1301            match exit {
1302                ExitReason::ExecDone(code) => {
1303                    exit_kind = NyxExitKind::ExecDone(code);
1304                    done = true;
1305                    break;
1306                }
1307                ExitReason::Timeout => {
1308                    if self.config.debug_mode {
1309                        eprintln!("[NyxVm] Step timeout");
1310                    }
1311                    exit_kind = NyxExitKind::Timeout;
1312                    break;
1313                }
1314                ExitReason::Shutdown => {
1315                    if self.config.debug_mode {
1316                        eprintln!("[NyxVm] VM shutdown during step");
1317                    }
1318                    exit_kind = NyxExitKind::Shutdown;
1319                    done = true;
1320                    break;
1321                }
1322                ExitReason::DebugPrint(msg) => {
1323                    if self.config.debug_mode {
1324                        eprintln!("[NyxVm] Guest: {}", msg);
1325                    }
1326                    // Accumulate debug output
1327                    if collect_output {
1328                        output.extend_from_slice(msg.as_bytes());
1329                    }
1330                    // Continue running
1331                }
1332                ExitReason::Hypercall(r8, r9, r10, r11, r12) => {
1333                    exit_kind = NyxExitKind::Hypercall {
1334                        code: r8,
1335                        arg1: r9,
1336                        arg2: r10,
1337                        arg3: r11,
1338                        arg4: r12,
1339                    };
1340                    // Attempt to parse structured response
1341                    if let Some(obs) = Self::try_parse_u64(r9) {
1342                        parsed_obs = Some(obs);
1343                    }
1344                    if let Some(rew) = Self::try_parse_i64(r10) {
1345                        parsed_rew = Some(rew);
1346                    }
1347                    break;
1348                }
1349                ExitReason::Breakpoint => {
1350                    if self.config.debug_mode {
1351                        eprintln!("[NyxVm] Breakpoint exit during step");
1352                    }
1353                    exit_kind = NyxExitKind::Breakpoint;
1354                    break;
1355                }
1356                _ => {
1357                    // Continue for other exits
1358                }
1359            }
1360        }
1361
1362        // Read shared memory contents (only if needed)
1363        let need_shared_memory = matches!(
1364            self.config.observation_policy,
1365            NyxObservationPolicy::SharedMemory
1366        ) || matches!(
1367            self.config.reward_policy,
1368            NyxRewardPolicy::Pattern { .. }
1369        ) || matches!(
1370            self.reward_shaping,
1371            Some(NyxRewardShaping::EntropyReduction { .. })
1372        ) || self.config.trace.is_some();
1373        let shared_memory = if need_shared_memory {
1374            self.read_shared_memory()
1375        } else {
1376            Vec::new()
1377        };
1378
1379        // Clear shared length to avoid host/guest races on the next step.
1380        self.clear_shared_length();
1381
1382        // Collect trace data if configured
1383        if let Some(trace_cfg) = &self.config.trace
1384            && trace_cfg.shared_region_name.is_some()
1385        {
1386            // Read from trace shared memory region (implementation-specific)
1387            // For now, use main shared memory as fallback
1388            trace_data = shared_memory.clone();
1389            if trace_data.len() > trace_cfg.max_bytes {
1390                trace_data.truncate(trace_cfg.max_bytes);
1391            }
1392        }
1393
1394        Ok(NyxStepResult {
1395            exit_reason: exit_kind,
1396            output,
1397            parsed_obs,
1398            parsed_rew,
1399            done,
1400            trace_data,
1401            shared_memory,
1402        })
1403    }
1404
1405    fn try_parse_u64(val: u64) -> Option<u64> {
1406        // Hypercall args are already u64
1407        Some(val)
1408    }
1409
1410    fn try_parse_i64(val: u64) -> Option<i64> {
1411        Some(val as i64)
1412    }
1413
1414    /// Gets the action payload for the given action index.
1415    fn get_action_payload(&mut self, action: Action) -> anyhow::Result<Cow<'_, [u8]>> {
1416        match &self.config.action_source {
1417            NyxActionSource::Literal(actions) => {
1418                let idx = action as usize;
1419                if idx >= actions.len() {
1420                    return Err(anyhow::anyhow!("Action index out of range"));
1421                }
1422                Ok(Cow::Borrowed(actions[idx].payload.as_slice()))
1423            }
1424            NyxActionSource::Fuzz(fuzz) => {
1425                let state = self
1426                    .fuzz_state
1427                    .as_mut()
1428                    .ok_or_else(|| anyhow::anyhow!("Fuzz state missing"))?;
1429                let idx = action as usize % fuzz.mutators.len();
1430                let mut input = state.current.clone();
1431                let mutator = &fuzz.mutators[idx];
1432                apply_mutator(mutator, &mut input, fuzz, &mut state.rng);
1433                if input.len() < fuzz.min_len {
1434                    input.resize(fuzz.min_len, 0);
1435                }
1436                if input.len() > fuzz.max_len {
1437                    input.truncate(fuzz.max_len);
1438                }
1439                state.current = input.clone();
1440                Ok(Cow::Owned(input))
1441            }
1442        }
1443    }
1444
1445    /// Applies action filtering, returning reject reward if filtered.
1446    fn filter_action(&self, payload: &[u8]) -> Option<i64> {
1447        let filter = self.config.action_filter.as_ref()?;
1448        if payload.is_empty() {
1449            return filter.reject_reward;
1450        }
1451
1452        let (entropy, intrinsic, novelty) = self.compute_filter_metrics(payload, filter);
1453
1454        if let Some(min_entropy) = filter.min_entropy
1455            && entropy < min_entropy
1456        {
1457            return filter.reject_reward;
1458        }
1459        if let Some(max_entropy) = filter.max_entropy
1460            && entropy > max_entropy
1461        {
1462            return filter.reject_reward;
1463        }
1464        if let Some(min_intrinsic) = filter.min_intrinsic_dependence
1465            && intrinsic < min_intrinsic
1466        {
1467            return filter.reject_reward;
1468        }
1469        if let Some(min_novelty) = filter.min_novelty
1470            && filter.novelty_prior.is_some()
1471            && novelty < min_novelty
1472        {
1473            return filter.reject_reward;
1474        }
1475        None
1476    }
1477
1478    fn wrap_action_payload(&self, payload: &[u8]) -> Vec<u8> {
1479        let p = &self.config.protocol;
1480        let mut wrapped = p.action_prefix.clone().into_bytes();
1481        wrapped.extend_from_slice(p.wire_encoding.encode(payload).as_bytes());
1482        wrapped.extend_from_slice(p.action_suffix.as_bytes());
1483        wrapped
1484    }
1485
1486    fn compute_filter_metrics(&self, payload: &[u8], filter: &NyxActionFilter) -> (f64, f64, f64) {
1487        let h_marg = marginal_entropy_bytes(payload);
1488        let h_rate = if filter.max_order == 0 {
1489            h_marg
1490        } else {
1491            entropy_rate_backend(payload, filter.max_order, &self.config.stats_backend)
1492        };
1493
1494        let intrinsic = if h_marg < 1e-9 {
1495            0.0
1496        } else {
1497            ((h_marg - h_rate) / h_marg).clamp(0.0, 1.0)
1498        };
1499
1500        let novelty = if let Some(ref prior) = filter.novelty_prior {
1501            cross_entropy_rate_backend(payload, prior, filter.max_order, &self.config.stats_backend)
1502        } else {
1503            0.0
1504        };
1505
1506        (h_rate, intrinsic, novelty)
1507    }
1508
1509    /// Computes reward from step result.
1510    fn compute_reward(&mut self, result: &NyxStepResult) -> Reward {
1511        let base_reward = match &self.config.reward_policy {
1512            NyxRewardPolicy::FromGuest => result.parsed_rew.unwrap_or(0),
1513            NyxRewardPolicy::Pattern {
1514                pattern,
1515                base_reward,
1516                bonus_reward,
1517            } => {
1518                let text = String::from_utf8_lossy(&result.output);
1519                let shared_text = String::from_utf8_lossy(&result.shared_memory);
1520                if text.contains(pattern) || shared_text.contains(pattern) {
1521                    base_reward + bonus_reward
1522                } else {
1523                    *base_reward
1524                }
1525            }
1526            NyxRewardPolicy::Custom(f) => f(result),
1527        };
1528
1529        let shaping_reward = if let Some(shaping) = self.reward_shaping.clone() {
1530            self.compute_reward_shaping(&shaping, result)
1531        } else {
1532            0
1533        };
1534
1535        let mut reward = base_reward.saturating_add(shaping_reward);
1536
1537        reward = reward.saturating_sub(self.config.step_cost);
1538        let min_reward = self.min_reward();
1539        let max_reward = self.max_reward();
1540        reward.clamp(min_reward, max_reward)
1541    }
1542
1543    fn compute_reward_shaping(
1544        &mut self,
1545        shaping: &NyxRewardShaping,
1546        result: &NyxStepResult,
1547    ) -> Reward {
1548        match shaping {
1549            NyxRewardShaping::EntropyReduction {
1550                max_order,
1551                scale,
1552                crash_bonus,
1553                timeout_bonus,
1554                ..
1555            } => {
1556                let mut base_reward = {
1557                    let data = if result.shared_memory.is_empty() {
1558                        &result.output
1559                    } else {
1560                        &result.shared_memory
1561                    };
1562                    let h_obs = if *max_order == 0 {
1563                        marginal_entropy_bytes(data)
1564                    } else {
1565                        entropy_rate_backend(data, *max_order, &self.config.stats_backend)
1566                    };
1567                    let h_base = self.baseline_entropy.unwrap_or(0.0);
1568                    let er = (h_base - h_obs) * scale;
1569                    er.round() as i64
1570                };
1571
1572                // Add bonuses for interesting behaviors (bugs/crashes)
1573                match &result.exit_reason {
1574                    NyxExitKind::Shutdown | NyxExitKind::Breakpoint => {
1575                        if let Some(bonus) = crash_bonus {
1576                            base_reward = base_reward.saturating_add(*bonus);
1577                        }
1578                    }
1579                    NyxExitKind::Timeout => {
1580                        if let Some(bonus) = timeout_bonus {
1581                            base_reward = base_reward.saturating_add(*bonus);
1582                        }
1583                    }
1584                    _ => {}
1585                }
1586
1587                base_reward
1588            }
1589            NyxRewardShaping::TraceEntropy {
1590                scale, normalize, ..
1591            } => {
1592                let data = &result.trace_data;
1593                let bits = match self.trace_model.as_mut() {
1594                    Some(model) => model.update_and_score(data),
1595                    None => 0.0,
1596                };
1597                let bits = if *normalize && !data.is_empty() {
1598                    bits / data.len() as f64
1599                } else {
1600                    bits
1601                };
1602                (bits * scale).round() as i64
1603            }
1604        }
1605    }
1606
1607    fn mask_observation(&self, value: u64) -> u64 {
1608        let bits = self.config.observation_bits;
1609        if bits >= 64 {
1610            value
1611        } else if bits == 0 {
1612            0
1613        } else {
1614            value & ((1u64 << bits) - 1)
1615        }
1616    }
1617
1618    fn build_observation_stream(&self, result: &NyxStepResult) -> Vec<PerceptVal> {
1619        let mut observations = match self.config.observation_policy {
1620            NyxObservationPolicy::FromGuest => {
1621                if let Some(obs) = result.parsed_obs {
1622                    vec![self.mask_observation(obs)]
1623                } else {
1624                    vec![self.hash_observation(&result.shared_memory)]
1625                }
1626            }
1627            NyxObservationPolicy::OutputHash => {
1628                vec![self.hash_observation(&result.output)]
1629            }
1630            NyxObservationPolicy::RawOutput => {
1631                result.output.iter().map(|b| *b as PerceptVal).collect()
1632            }
1633            NyxObservationPolicy::SharedMemory => result
1634                .shared_memory
1635                .iter()
1636                .map(|b| *b as PerceptVal)
1637                .collect(),
1638        };
1639
1640        if observations.is_empty() {
1641            observations.push(0);
1642        }
1643
1644        self.normalize_observation_stream(&mut observations);
1645        observations
1646    }
1647
1648    fn hash_observation(&self, data: &[u8]) -> PerceptVal {
1649        let h = robust_hash_bytes(data);
1650        self.mask_observation(h)
1651    }
1652
1653    fn normalize_observation_stream(&self, observations: &mut Vec<PerceptVal>) {
1654        let mask = if self.config.observation_bits >= 64 {
1655            u64::MAX
1656        } else if self.config.observation_bits == 0 {
1657            0
1658        } else {
1659            (1u64 << self.config.observation_bits) - 1
1660        };
1661
1662        for obs in observations.iter_mut() {
1663            *obs &= mask;
1664        }
1665
1666        let target = self.config.observation_stream_len;
1667        if target == 0 {
1668            return;
1669        }
1670
1671        if observations.len() > target {
1672            match self.config.observation_stream_mode {
1673                NyxObservationStreamMode::Truncate | NyxObservationStreamMode::PadTruncate => {
1674                    observations.truncate(target);
1675                }
1676                NyxObservationStreamMode::Pad => {}
1677            }
1678        } else if observations.len() < target {
1679            match self.config.observation_stream_mode {
1680                NyxObservationStreamMode::Pad | NyxObservationStreamMode::PadTruncate => {
1681                    let pad = self.config.observation_pad_byte as PerceptVal;
1682                    observations.resize(target, pad);
1683                }
1684                NyxObservationStreamMode::Truncate => {}
1685            }
1686        }
1687    }
1688
1689    fn action_count(&self) -> usize {
1690        match &self.config.action_source {
1691            NyxActionSource::Literal(actions) => actions.len(),
1692            NyxActionSource::Fuzz(fuzz) => fuzz.mutators.len(),
1693        }
1694    }
1695
1696    /// Direct access to the underlying NyxVM for advanced use cases.
1697    pub fn vm(&self) -> &NyxVM {
1698        &self.vm
1699    }
1700
1701    /// Mutable access to the underlying NyxVM.
1702    pub fn vm_mut(&mut self) -> &mut NyxVM {
1703        &mut self.vm
1704    }
1705
1706    /// Takes a new snapshot at the current state.
1707    pub fn take_snapshot(&mut self) -> Arc<NyxSnapshot> {
1708        self.vm.take_snapshot()
1709    }
1710
1711    /// Applies a specific snapshot.
1712    pub fn apply_snapshot(&mut self, snapshot: &Arc<NyxSnapshot>) {
1713        self.vm.apply_snapshot(snapshot);
1714    }
1715
1716    /// Resets trace model.
1717    pub fn reset_trace_model(&mut self) {
1718        if let Some(model) = &mut self.trace_model {
1719            model.reset();
1720        }
1721    }
1722
1723    /// Logs crashes and interesting behaviors to file.
1724    fn log_crash(&self, action_payload: &[u8], result: &NyxStepResult, reward: i64) {
1725        let Some(log_path) = &self.config.crash_log else {
1726            return;
1727        };
1728
1729        // Only log interesting exits
1730        let is_interesting = matches!(
1731            result.exit_reason,
1732            NyxExitKind::Shutdown | NyxExitKind::Breakpoint | NyxExitKind::Timeout
1733        );
1734
1735        if !is_interesting {
1736            return;
1737        }
1738
1739        let log_entry = serde_json::json!({
1740            "timestamp": std::time::SystemTime::now()
1741                .duration_since(std::time::UNIX_EPOCH)
1742                .unwrap_or_default()
1743                .as_secs(),
1744            "exit_reason": format!("{:?}", result.exit_reason),
1745            "action_payload": hex_encode(action_payload),
1746            "action_payload_str": String::from_utf8_lossy(action_payload),
1747            "output": String::from_utf8_lossy(&result.output),
1748            "shared_memory": hex_encode(&result.shared_memory),
1749            "reward": reward,
1750            "parsed_obs": result.parsed_obs,
1751            "parsed_rew": result.parsed_rew,
1752        });
1753
1754        // Append to JSONL file
1755        if let Ok(mut file) = OpenOptions::new().create(true).append(true).open(log_path)
1756            && let Ok(json_str) = serde_json::to_string(&log_entry)
1757        {
1758            let _ = writeln!(file, "{}", json_str);
1759        }
1760    }
1761}
1762
1763// ============================================================================
1764// Environment Trait Implementation
1765// ============================================================================
1766
1767impl Environment for NyxVmEnvironment {
1768    fn perform_action(&mut self, action: Action) {
1769        if self.needs_reset
1770            && let Err(e) = self.reset()
1771            && self.config.debug_mode
1772        {
1773            eprintln!("[NyxVm] Reset failed: {}", e);
1774        }
1775
1776        let payload = match self.get_action_payload(action) {
1777            Ok(payload) => payload.into_owned(),
1778            Err(e) => {
1779                if self.config.debug_mode {
1780                    eprintln!("[NyxVm] Invalid action: {}", e);
1781                }
1782                self.obs = 0;
1783                self.rew = self.min_reward();
1784                self.obs_stream.clear();
1785                self.obs_stream.push(0);
1786                self.step_in_episode = (self.step_in_episode + 1) % self.config.episode_steps;
1787                if self.step_in_episode == 0 {
1788                    self.needs_reset = true;
1789                }
1790                return;
1791            }
1792        };
1793
1794        // Check action filter
1795        if let Some(reject_reward) = self.filter_action(&payload) {
1796            self.obs = 0;
1797            self.rew = reject_reward.clamp(self.min_reward(), self.max_reward());
1798            self.obs_stream.clear();
1799            self.obs_stream.push(0);
1800            self.step_in_episode = (self.step_in_episode + 1) % self.config.episode_steps;
1801            if self.step_in_episode == 0 {
1802                self.needs_reset = true;
1803            }
1804            return;
1805        }
1806
1807        // Run the step
1808        let wrapped_payload = self.wrap_action_payload(&payload);
1809        let result = match self.run_step(&wrapped_payload) {
1810            Ok(result) => result,
1811            Err(e) => {
1812                if self.config.debug_mode {
1813                    eprintln!("[NyxVm] Step failed: {}", e);
1814                }
1815                self.obs = 0;
1816                self.rew = self.min_reward();
1817                self.obs_stream.clear();
1818                self.obs_stream.push(0);
1819                self.step_in_episode = (self.step_in_episode + 1) % self.config.episode_steps;
1820                if self.step_in_episode == 0 {
1821                    self.needs_reset = true;
1822                }
1823                return;
1824            }
1825        };
1826
1827        // Process results
1828        self.obs_stream = self.build_observation_stream(&result);
1829        self.obs = self.obs_stream.first().copied().unwrap_or(0);
1830        self.rew = self.compute_reward(&result);
1831
1832        // Log crashes and interesting behaviors
1833        self.log_crash(&payload, &result, self.rew);
1834
1835        if self.config.debug_mode {
1836            eprintln!(
1837                "[NyxVm] Action={} Obs={} Rew={} Done={:?} Exit={:?}",
1838                action, self.obs, self.rew, result.done, result.exit_reason
1839            );
1840        }
1841
1842        self.step_in_episode = (self.step_in_episode + 1) % self.config.episode_steps;
1843        if self.step_in_episode == 0 || result.done {
1844            self.needs_reset = true;
1845        }
1846    }
1847
1848    fn get_observation(&self) -> PerceptVal {
1849        self.obs
1850    }
1851
1852    fn drain_observations(&mut self) -> Vec<PerceptVal> {
1853        if self.obs_stream.is_empty() {
1854            vec![self.obs]
1855        } else {
1856            std::mem::take(&mut self.obs_stream)
1857        }
1858    }
1859
1860    fn get_reward(&self) -> Reward {
1861        self.rew
1862    }
1863
1864    fn is_finished(&self) -> bool {
1865        false
1866    }
1867
1868    fn get_observation_bits(&self) -> usize {
1869        self.config.observation_bits
1870    }
1871
1872    fn get_reward_bits(&self) -> usize {
1873        self.config.reward_bits
1874    }
1875
1876    fn get_action_bits(&self) -> usize {
1877        let n = self.action_count();
1878        if n <= 1 {
1879            return 1;
1880        }
1881        (n as f64).log2().ceil() as usize
1882    }
1883
1884    fn get_num_actions(&self) -> usize {
1885        self.action_count()
1886    }
1887
1888    fn max_reward(&self) -> Reward {
1889        let bits = self.config.reward_bits;
1890        if bits >= 64 {
1891            i64::MAX
1892        } else if bits == 0 {
1893            0
1894        } else {
1895            (1i64 << (bits - 1)) - 1
1896        }
1897    }
1898
1899    fn min_reward(&self) -> Reward {
1900        let bits = self.config.reward_bits;
1901        if bits >= 64 {
1902            i64::MIN
1903        } else if bits == 0 {
1904            0
1905        } else {
1906            -(1i64 << (bits - 1))
1907        }
1908    }
1909}
1910
1911// ============================================================================
1912// Helper Functions
1913// ============================================================================
1914
1915fn robust_hash_bytes(data: &[u8]) -> u64 {
1916    let mut h = 0u64;
1917    for &b in data {
1918        h = h.rotate_left(7) ^ (b as u64);
1919    }
1920    h
1921}
1922
1923fn apply_mutator(
1924    mutator: &FuzzMutator,
1925    input: &mut Vec<u8>,
1926    fuzz: &NyxFuzzConfig,
1927    rng: &mut RandomGenerator,
1928) {
1929    match mutator {
1930        FuzzMutator::FlipBit => {
1931            if input.is_empty() {
1932                input.push(0);
1933            }
1934            let idx = rng.gen_range(input.len());
1935            let bit = rng.gen_range(8);
1936            input[idx] ^= 1u8 << bit;
1937        }
1938        FuzzMutator::FlipByte => {
1939            if input.is_empty() {
1940                input.push(0);
1941            }
1942            let idx = rng.gen_range(input.len());
1943            input[idx] ^= rng.next_u64() as u8;
1944        }
1945        FuzzMutator::InsertByte => {
1946            let idx = if input.is_empty() {
1947                0
1948            } else {
1949                rng.gen_range(input.len() + 1)
1950            };
1951            let byte = if !fuzz.dictionary.is_empty() {
1952                let d = rng.gen_range(fuzz.dictionary.len());
1953                let entry = &fuzz.dictionary[d];
1954                if entry.is_empty() {
1955                    0
1956                } else {
1957                    entry[rng.gen_range(entry.len())]
1958                }
1959            } else {
1960                rng.next_u64() as u8
1961            };
1962            input.insert(idx, byte);
1963        }
1964        FuzzMutator::DeleteByte => {
1965            if input.len() > 1 {
1966                let idx = rng.gen_range(input.len());
1967                input.remove(idx);
1968            }
1969        }
1970        FuzzMutator::SpliceSeed => {
1971            if fuzz.seeds.is_empty() {
1972                return;
1973            }
1974            let seed = &fuzz.seeds[rng.gen_range(fuzz.seeds.len())];
1975            if input.is_empty() {
1976                input.extend_from_slice(seed);
1977            } else if !seed.is_empty() {
1978                let cut = rng.gen_range(input.len());
1979                let seed_cut = rng.gen_range(seed.len());
1980                let mut out = Vec::new();
1981                out.extend_from_slice(&input[..cut]);
1982                out.extend_from_slice(&seed[seed_cut..]);
1983                *input = out;
1984            }
1985        }
1986        FuzzMutator::ResetSeed => {
1987            if fuzz.seeds.is_empty() {
1988                return;
1989            }
1990            *input = fuzz.seeds[rng.gen_range(fuzz.seeds.len())].clone();
1991        }
1992        FuzzMutator::Havoc => {
1993            let flips = 1 + rng.gen_range(8);
1994            for _ in 0..flips {
1995                if input.is_empty() {
1996                    input.push(0);
1997                }
1998                let idx = rng.gen_range(input.len());
1999                input[idx] ^= rng.next_u64() as u8;
2000            }
2001        }
2002    }
2003}
2004
2005// ============================================================================
2006// Tests
2007// ============================================================================
2008
2009#[cfg(test)]
2010mod tests {
2011    use super::*;
2012
2013    #[test]
2014    fn test_hex_encoding() {
2015        let data = b"hello";
2016        let encoded = hex_encode(data);
2017        assert_eq!(encoded, "68656c6c6f");
2018        let decoded = hex_decode(&encoded).unwrap();
2019        assert_eq!(decoded, data);
2020    }
2021
2022    #[test]
2023    fn test_robust_hash() {
2024        let data1 = b"test data";
2025        let data2 = b"test data";
2026        let data3 = b"different";
2027
2028        assert_eq!(robust_hash_bytes(data1), robust_hash_bytes(data2));
2029        assert_ne!(robust_hash_bytes(data1), robust_hash_bytes(data3));
2030    }
2031
2032    #[test]
2033    fn test_payload_encoding() {
2034        let utf8 = PayloadEncoding::Utf8;
2035        let hex = PayloadEncoding::Hex;
2036
2037        let data = b"test";
2038        assert_eq!(utf8.encode(data), "test");
2039        assert_eq!(hex.encode(data), "74657374");
2040
2041        assert_eq!(utf8.decode("test").unwrap(), data);
2042        assert_eq!(hex.decode("74657374").unwrap(), data);
2043    }
2044
2045    #[test]
2046    fn trace_model_supports_predictor_backed_backends() {
2047        let backends = vec![
2048            RateBackend::Match {
2049                hash_bits: 20,
2050                min_len: 4,
2051                max_len: 255,
2052                base_mix: 0.02,
2053                confidence_scale: 1.0,
2054            },
2055            RateBackend::SparseMatch {
2056                hash_bits: 19,
2057                min_len: 3,
2058                max_len: 64,
2059                gap_min: 1,
2060                gap_max: 2,
2061                base_mix: 0.05,
2062                confidence_scale: 1.0,
2063            },
2064            RateBackend::Ppmd {
2065                order: 8,
2066                memory_mb: 8,
2067            },
2068            RateBackend::Calibrated {
2069                spec: Arc::new(crate::CalibratedSpec {
2070                    base: RateBackend::Ctw { depth: 8 },
2071                    context: crate::CalibrationContextKind::Text,
2072                    bins: 33,
2073                    learning_rate: 0.02,
2074                    bias_clip: 4.0,
2075                }),
2076            },
2077            RateBackend::Particle {
2078                spec: Arc::new(crate::ParticleSpec {
2079                    num_particles: 4,
2080                    num_cells: 4,
2081                    cell_dim: 8,
2082                    ..crate::ParticleSpec::default()
2083                }),
2084            },
2085            RateBackend::Mixture {
2086                spec: Arc::new(crate::MixtureSpec::new(
2087                    crate::MixtureKind::Bayes,
2088                    vec![crate::MixtureExpertSpec {
2089                        name: Some("ctw".to_string()),
2090                        log_prior: 0.0,
2091                        max_order: -1,
2092                        backend: RateBackend::Ctw { depth: 8 },
2093                    }],
2094                )),
2095            },
2096        ];
2097
2098        for backend in backends {
2099            let mut model = TraceModel::new(&backend, 4);
2100            let bits = model.update_and_score(b"trace payload");
2101            assert!(bits.is_finite() && bits >= 0.0, "bits={bits}");
2102            model.reset();
2103            let bits_after_reset = model.update_and_score(b"trace payload");
2104            assert!(
2105                bits_after_reset.is_finite() && bits_after_reset >= 0.0,
2106                "bits_after_reset={bits_after_reset}"
2107            );
2108        }
2109    }
2110}