1use crate::aixi::common::{Action, PerceptVal, RandomGenerator, Reward};
24use crate::aixi::environment::Environment;
25#[cfg(feature = "backend-rwkv")]
26use crate::coders::softmax_pdf_inplace;
27#[cfg(feature = "backend-mamba")]
28use crate::mambazip;
29#[cfg(feature = "backend-mamba")]
30use crate::mambazip::Compressor as MambaCompressor;
31use crate::mixture::OnlineBytePredictor;
32use crate::rosaplus::RosaPlus;
33#[cfg(feature = "backend-rwkv")]
34use crate::rwkvzip::Compressor;
35use crate::zpaq_rate::ZpaqRateModel;
36use crate::{
37 RateBackend, cross_entropy_rate_backend, entropy_rate_backend, marginal_entropy_bytes,
38};
39use serde_json::Value;
40use std::borrow::Cow;
41use std::fs::OpenOptions;
42use std::io::Write;
43use std::path::Path;
44use std::sync::Arc;
45use std::time::{Duration, Instant};
46
47pub use nyx_lite::mem::SharedMemoryRegion;
49pub use nyx_lite::snapshot::NyxSnapshot;
50pub use nyx_lite::{ExitReason, NyxVM, SharedMemoryPolicy};
51
52#[derive(Clone, Copy, Debug, Eq, PartialEq)]
58pub enum PayloadEncoding {
59 Utf8,
61 Hex,
63}
64
65impl PayloadEncoding {
66 #[allow(clippy::should_implement_trait)]
70 pub fn from_str(s: &str) -> Option<Self> {
71 Self::parse(s)
72 }
73
74 pub fn parse(s: &str) -> Option<Self> {
78 match s {
79 "utf8" | "text" => Some(Self::Utf8),
80 "hex" => Some(Self::Hex),
81 _ => None,
82 }
83 }
84
85 pub fn decode(self, s: &str) -> anyhow::Result<Vec<u8>> {
87 match self {
88 Self::Utf8 => Ok(s.as_bytes().to_vec()),
89 Self::Hex => hex_decode(s),
90 }
91 }
92
93 pub fn encode(self, bytes: &[u8]) -> String {
95 match self {
96 Self::Utf8 => String::from_utf8_lossy(bytes).to_string(),
97 Self::Hex => hex_encode(bytes),
98 }
99 }
100}
101
102impl std::str::FromStr for PayloadEncoding {
103 type Err = &'static str;
104
105 fn from_str(s: &str) -> Result<Self, Self::Err> {
106 Self::parse(s).ok_or("unknown payload encoding")
107 }
108}
109
110fn hex_decode(s: &str) -> anyhow::Result<Vec<u8>> {
111 let mut out = Vec::with_capacity(s.len() / 2);
112 let mut buf = 0u8;
113 let mut high = true;
114 for c in s.bytes() {
115 let v = match c {
116 b'0'..=b'9' => c - b'0',
117 b'a'..=b'f' => c - b'a' + 10,
118 b'A'..=b'F' => c - b'A' + 10,
119 b' ' | b'\n' | b'\r' | b'\t' => continue,
120 _ => return Err(anyhow::anyhow!("invalid hex byte: {}", c as char)),
121 };
122 if high {
123 buf = v << 4;
124 high = false;
125 } else {
126 buf |= v;
127 out.push(buf);
128 high = true;
129 }
130 }
131 if !high {
132 return Err(anyhow::anyhow!("hex string has odd length"));
133 }
134 Ok(out)
135}
136
137fn resolve_relative_path(base: &Path, path: &str) -> String {
138 let p = Path::new(path);
139 if p.is_absolute() {
140 path.to_string()
141 } else {
142 base.join(p).to_string_lossy().to_string()
143 }
144}
145
146fn rewrite_firecracker_config_paths(config_path: &str, raw_json: &str) -> anyhow::Result<String> {
147 let base_dir = Path::new(config_path)
148 .parent()
149 .unwrap_or_else(|| Path::new("."));
150 let mut v: Value = serde_json::from_str(raw_json)?;
151
152 if let Some(boot) = v.get_mut("boot-source") {
153 if let Some(path_val) = boot.get_mut("kernel_image_path")
154 && let Some(path_str) = path_val.as_str()
155 {
156 let resolved = resolve_relative_path(base_dir, path_str);
157 *path_val = Value::String(resolved);
158 }
159 if let Some(path_val) = boot.get_mut("initrd_path")
160 && let Some(path_str) = path_val.as_str()
161 {
162 let resolved = resolve_relative_path(base_dir, path_str);
163 *path_val = Value::String(resolved);
164 }
165 }
166
167 if let Some(drives) = v.get_mut("drives").and_then(|d| d.as_array_mut()) {
168 for drive in drives {
169 if let Some(path_val) = drive.get_mut("path_on_host")
170 && let Some(path_str) = path_val.as_str()
171 {
172 let resolved = resolve_relative_path(base_dir, path_str);
173 *path_val = Value::String(resolved);
174 }
175 }
176 }
177
178 Ok(serde_json::to_string(&v)?)
179}
180
181fn hex_encode(bytes: &[u8]) -> String {
182 let mut s = String::with_capacity(bytes.len() * 2);
183 for b in bytes {
184 s.push(hex_digit(b >> 4));
185 s.push(hex_digit(b & 0x0F));
186 }
187 s
188}
189
190fn hex_digit(v: u8) -> char {
191 match v {
192 0..=9 => (b'0' + v) as char,
193 _ => (b'a' + (v - 10)) as char,
194 }
195}
196
197#[allow(dead_code)]
204pub const HYPERCALL_EXECDONE: u64 = 0x656e6f6463657865; #[allow(dead_code)]
207pub const HYPERCALL_SNAPSHOT: u64 = 0x746f687370616e73; #[allow(dead_code)]
210pub const HYPERCALL_NYX_LITE: u64 = 0x6574696c2d78796e; #[allow(dead_code)]
213pub const HYPERCALL_SHAREMEM: u64 = 0x6d656d6572616873; #[allow(dead_code)]
216pub const HYPERCALL_DBGPRINT: u64 = 0x746e697270676264; const SHARED_ACTION_LEN_OFFSET: u64 = 0;
219const SHARED_RESP_LEN_OFFSET: u64 = 8;
220const SHARED_PAYLOAD_OFFSET: u64 = 16;
221
222#[derive(Clone, Debug)]
224pub struct NyxProtocolConfig {
225 pub action_prefix: String,
227 pub action_suffix: String,
229 pub obs_prefix: String,
231 pub rew_prefix: String,
233 pub done_prefix: String,
235 pub data_prefix: String,
237 pub wire_encoding: PayloadEncoding,
239}
240
241impl Default for NyxProtocolConfig {
242 fn default() -> Self {
243 Self {
244 action_prefix: "ACT ".to_string(),
245 action_suffix: "\n".to_string(),
246 obs_prefix: "OBS ".to_string(),
247 rew_prefix: "REW ".to_string(),
248 done_prefix: "DONE ".to_string(),
249 data_prefix: "DATA ".to_string(),
250 wire_encoding: PayloadEncoding::Hex,
251 }
252 }
253}
254
255#[derive(Clone, Debug)]
261pub struct NyxActionSpec {
262 pub name: Option<String>,
264 pub payload: Vec<u8>,
266}
267
268#[derive(Clone, Debug)]
270pub enum FuzzMutator {
271 FlipBit,
273 FlipByte,
275 InsertByte,
277 DeleteByte,
279 SpliceSeed,
281 ResetSeed,
283 Havoc,
285}
286
287#[derive(Clone, Debug)]
289pub struct NyxFuzzConfig {
290 pub seeds: Vec<Vec<u8>>,
292 pub mutators: Vec<FuzzMutator>,
294 pub min_len: usize,
296 pub max_len: usize,
298 pub dictionary: Vec<Vec<u8>>,
300 pub rng_seed: u64,
302}
303
304#[derive(Clone, Debug)]
306pub enum NyxActionSource {
307 Literal(Vec<NyxActionSpec>),
309 Fuzz(NyxFuzzConfig),
311}
312
313#[derive(Clone, Copy, Debug)]
319pub enum NyxObservationPolicy {
320 FromGuest,
322 OutputHash,
324 RawOutput,
326 SharedMemory,
328}
329
330#[derive(Clone, Copy, Debug)]
332pub enum NyxObservationStreamMode {
333 PadTruncate,
335 Pad,
337 Truncate,
339}
340
341#[derive(Clone)]
347pub enum NyxRewardPolicy {
348 FromGuest,
350 Pattern {
352 pattern: String,
354 base_reward: i64,
356 bonus_reward: i64,
358 },
359 Custom(Arc<dyn Fn(&NyxStepResult) -> Reward + Send + Sync>),
361}
362
363#[derive(Clone, Debug)]
365pub enum NyxRewardShaping {
366 EntropyReduction {
368 baseline_bytes: Vec<u8>,
370 max_order: i64,
372 scale: f64,
374 crash_bonus: Option<i64>,
376 timeout_bonus: Option<i64>,
378 },
379 TraceEntropy {
381 max_order: i64,
383 scale: f64,
385 normalize: bool,
387 },
388}
389
390impl std::fmt::Debug for NyxRewardPolicy {
391 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
392 match self {
393 Self::FromGuest => write!(f, "FromGuest"),
394 Self::Pattern {
395 pattern,
396 base_reward,
397 bonus_reward,
398 } => f
399 .debug_struct("Pattern")
400 .field("pattern", pattern)
401 .field("base_reward", base_reward)
402 .field("bonus_reward", bonus_reward)
403 .finish(),
404 Self::Custom(_) => write!(f, "Custom(<fn>)"),
405 }
406 }
407}
408
409#[derive(Clone, Debug)]
415pub struct NyxActionFilter {
416 pub min_entropy: Option<f64>,
418 pub max_entropy: Option<f64>,
420 pub min_intrinsic_dependence: Option<f64>,
422 pub min_novelty: Option<f64>,
424 pub novelty_prior: Option<Vec<u8>>,
426 pub max_order: i64,
428 pub reject_reward: Option<i64>,
430}
431
432#[derive(Clone, Debug)]
438pub struct NyxTraceConfig {
439 pub shared_region_name: Option<String>,
441 pub max_bytes: usize,
443 pub reset_on_episode: bool,
445}
446
447#[derive(Clone)]
453pub struct NyxVmConfig {
454 pub firecracker_config: String,
456 pub instance_id: String,
458
459 pub shared_region_name: String,
462 pub shared_region_size: usize,
464 pub shared_memory_policy: SharedMemoryPolicy,
466
467 pub step_timeout: Duration,
470 pub boot_timeout: Duration,
472
473 pub episode_steps: usize,
476 pub step_cost: i64,
478
479 pub observation_policy: NyxObservationPolicy,
482 pub observation_bits: usize,
484 pub observation_stream_len: usize,
486 pub observation_stream_mode: NyxObservationStreamMode,
488 pub observation_pad_byte: u8,
490
491 pub reward_bits: usize,
494 pub reward_policy: NyxRewardPolicy,
496 pub reward_shaping: Option<NyxRewardShaping>,
498
499 pub action_source: NyxActionSource,
502 pub action_filter: Option<NyxActionFilter>,
504
505 pub protocol: NyxProtocolConfig,
508
509 pub stats_backend: RateBackend,
512
513 pub trace: Option<NyxTraceConfig>,
516
517 pub debug_mode: bool,
520
521 pub crash_log: Option<String>,
524}
525
526impl Default for NyxVmConfig {
527 fn default() -> Self {
528 Self {
529 firecracker_config: String::new(),
530 instance_id: "aixi-nyx".to_string(),
531 shared_region_name: "shared".to_string(),
532 shared_region_size: 4096,
533 shared_memory_policy: SharedMemoryPolicy::Snapshot,
534 step_timeout: Duration::from_millis(100),
535 boot_timeout: Duration::from_secs(30),
536 episode_steps: 100,
537 step_cost: 0,
538 observation_policy: NyxObservationPolicy::SharedMemory,
539 observation_bits: 8,
540 observation_stream_len: 64,
541 observation_stream_mode: NyxObservationStreamMode::PadTruncate,
542 observation_pad_byte: 0,
543 reward_bits: 8,
544 reward_policy: NyxRewardPolicy::FromGuest,
545 reward_shaping: None,
546 action_source: NyxActionSource::Literal(vec![]),
547 action_filter: None,
548 protocol: NyxProtocolConfig::default(),
549 stats_backend: RateBackend::default(),
550 trace: None,
551 debug_mode: false,
552 crash_log: None,
553 }
554 }
555}
556
557#[derive(Clone, Debug)]
563pub struct NyxStepResult {
564 pub exit_reason: NyxExitKind,
566 pub output: Vec<u8>,
568 pub parsed_obs: Option<u64>,
570 pub parsed_rew: Option<i64>,
572 pub done: bool,
574 pub trace_data: Vec<u8>,
576 pub shared_memory: Vec<u8>,
578}
579
580#[derive(Clone, Debug)]
582pub enum NyxExitKind {
583 ExecDone(u64),
585 Timeout,
587 Shutdown,
589 Hypercall {
591 code: u64,
593 arg1: u64,
595 arg2: u64,
597 arg3: u64,
599 arg4: u64,
601 },
602 DebugPrint(String),
604 Breakpoint,
606 Other(String),
608}
609
610impl From<ExitReason> for NyxExitKind {
611 fn from(reason: ExitReason) -> Self {
612 match reason {
613 ExitReason::ExecDone(code) => Self::ExecDone(code),
614 ExitReason::Timeout => Self::Timeout,
615 ExitReason::Shutdown => Self::Shutdown,
616 ExitReason::Hypercall(r8, r9, r10, r11, r12) => Self::Hypercall {
617 code: r8,
618 arg1: r9,
619 arg2: r10,
620 arg3: r11,
621 arg4: r12,
622 },
623 ExitReason::DebugPrint(s) => Self::DebugPrint(s),
624 ExitReason::Breakpoint => Self::Breakpoint,
625 ExitReason::RequestSnapshot => Self::Other("RequestSnapshot".to_string()),
626 ExitReason::SharedMem(name, _, _) => Self::Other(format!("SharedMem({})", name)),
627 ExitReason::SingleStep => Self::Other("SingleStep".to_string()),
628 ExitReason::Interrupted => Self::Other("Interrupted".to_string()),
629 ExitReason::HWBreakpoint(n) => Self::Other(format!("HWBreakpoint({})", n)),
630 ExitReason::BadMemoryAccess(_) => Self::Other("BadMemoryAccess".to_string()),
631 }
632 }
633}
634
635enum TraceModel {
641 Rosa {
642 model: RosaPlus,
643 max_order: i64,
644 },
645 Ctw {
646 tree: crate::ctw::ContextTree,
647 },
648 FacCtw {
649 tree: crate::ctw::FacContextTree,
650 bits_per_symbol: usize,
651 },
652 #[cfg(feature = "backend-mamba")]
653 Mamba {
654 compressor: MambaCompressor,
655 primed: bool,
656 },
657 Rwkv7 {
658 compressor: Compressor,
659 primed: bool,
660 },
661 Zpaq {
662 model: ZpaqRateModel,
663 },
664 Mixture {
665 backend: RateBackend,
666 model: crate::mixture::RateBackendPredictor,
667 },
668}
669
670impl TraceModel {
671 fn predictor_backed(backend: RateBackend) -> Self {
672 let mut model =
673 crate::mixture::RateBackendPredictor::from_backend(backend.clone(), -1, 2f64.powi(-24));
674 model
675 .begin_stream(None)
676 .unwrap_or_else(|e| panic!("predictor-backed stream init failed: {e}"));
677 TraceModel::Mixture { backend, model }
678 }
679
680 fn new(backend: &RateBackend, max_order: i64) -> Self {
681 match backend {
682 RateBackend::RosaPlus => {
683 let mut model = RosaPlus::new(max_order, false, 0, 42);
684 model.build_lm_full_bytes_no_finalize_endpos();
685 TraceModel::Rosa { model, max_order }
686 }
687 #[cfg(feature = "backend-mamba")]
688 RateBackend::Mamba { model } => {
689 let compressor = MambaCompressor::new_from_model(model.clone());
690 TraceModel::Mamba {
691 compressor,
692 primed: false,
693 }
694 }
695 #[cfg(feature = "backend-mamba")]
696 RateBackend::MambaMethod { method } => {
697 let compressor = MambaCompressor::new_from_method(method)
698 .unwrap_or_else(|e| panic!("invalid mamba method for vm trace model: {e}"));
699 TraceModel::Mamba {
700 compressor,
701 primed: false,
702 }
703 }
704 RateBackend::Rwkv7 { model } => {
705 let compressor = Compressor::new_from_model(model.clone());
706 TraceModel::Rwkv7 {
707 compressor,
708 primed: false,
709 }
710 }
711 RateBackend::Rwkv7Method { method } => {
712 let compressor = Compressor::new_from_method(method)
713 .unwrap_or_else(|e| panic!("invalid rwkv7 method for vm trace model: {e}"));
714 TraceModel::Rwkv7 {
715 compressor,
716 primed: false,
717 }
718 }
719 RateBackend::Zpaq { method } => TraceModel::Zpaq {
720 model: ZpaqRateModel::new(method.clone(), 2f64.powi(-24)),
721 },
722 RateBackend::Mixture { .. }
723 | RateBackend::Particle { .. }
724 | RateBackend::Match { .. }
725 | RateBackend::SparseMatch { .. }
726 | RateBackend::Ppmd { .. }
727 | RateBackend::Calibrated { .. } => TraceModel::predictor_backed(backend.clone()),
728 RateBackend::Ctw { depth } => TraceModel::Ctw {
729 tree: crate::ctw::ContextTree::new(*depth),
730 },
731 RateBackend::FacCtw {
732 base_depth,
733 num_percept_bits: _,
734 encoding_bits,
735 } => {
736 let bits_per_symbol = (*encoding_bits).clamp(1, 8);
737 TraceModel::FacCtw {
738 tree: crate::ctw::FacContextTree::new(*base_depth, bits_per_symbol),
739 bits_per_symbol,
740 }
741 }
742 }
743 }
744
745 fn reset(&mut self) {
746 match self {
747 TraceModel::Rosa { model, max_order } => {
748 let mut fresh = RosaPlus::new(*max_order, false, 0, 42);
749 fresh.build_lm_full_bytes_no_finalize_endpos();
750 *model = fresh;
751 }
752 TraceModel::Ctw { tree } => tree.clear(),
753 TraceModel::FacCtw { tree, .. } => tree.clear(),
754 #[cfg(feature = "backend-mamba")]
755 TraceModel::Mamba { compressor, primed } => {
756 compressor.state.reset();
757 *primed = false;
758 }
759 TraceModel::Rwkv7 { compressor, primed } => {
760 compressor.state.reset();
761 *primed = false;
762 }
763 TraceModel::Zpaq { model } => {
764 model.reset();
765 }
766 TraceModel::Mixture { backend, model } => {
767 *model = crate::mixture::RateBackendPredictor::from_backend(
768 backend.clone(),
769 -1,
770 2f64.powi(-24),
771 );
772 model
773 .begin_stream(None)
774 .unwrap_or_else(|e| panic!("mixture stream init failed: {e}"));
775 }
776 }
777 }
778
779 fn update_and_score(&mut self, data: &[u8]) -> f64 {
781 if data.is_empty() {
782 return 0.0;
783 }
784 match self {
785 TraceModel::Rosa { model, .. } => {
786 let mut bits = 0.0;
787 for &b in data {
788 let p = model.prob_for_last(b as u32).max(1e-12);
789 bits -= p.log2();
790 model.train_byte(b);
791 }
792 bits
793 }
794 TraceModel::Ctw { tree } => {
795 let log_before = tree.get_log_block_probability();
796 for &b in data {
797 for i in (0..8).rev() {
798 tree.update(((b >> i) & 1) == 1);
799 }
800 }
801 let log_after = tree.get_log_block_probability();
802 let log_delta = log_after - log_before;
803 -log_delta / std::f64::consts::LN_2
804 }
805 TraceModel::FacCtw {
806 tree,
807 bits_per_symbol,
808 } => {
809 let log_before = tree.get_log_block_probability();
810 for &b in data {
811 for i in 0..*bits_per_symbol {
812 tree.update(((b >> i) & 1) == 1, i);
813 }
814 }
815 let log_after = tree.get_log_block_probability();
816 let log_delta = log_after - log_before;
817 -log_delta / std::f64::consts::LN_2
818 }
819 #[cfg(feature = "backend-mamba")]
820 TraceModel::Mamba { compressor, primed } => {
821 if !*primed {
822 let bias = compressor.online_bias_snapshot();
823 let logits =
824 compressor
825 .model
826 .forward(&mut compressor.scratch, 0, &mut compressor.state);
827 mambazip::Compressor::logits_to_pdf(
828 logits,
829 bias.as_deref(),
830 &mut compressor.pdf_buffer,
831 );
832 *primed = true;
833 }
834 let mut bits = 0.0;
835 for &b in data {
836 let p = compressor.pdf_buffer[b as usize].max(1e-12);
837 bits -= p.log2();
838 let bias = compressor.online_bias_snapshot();
839 let logits = compressor.model.forward(
840 &mut compressor.scratch,
841 b as u32,
842 &mut compressor.state,
843 );
844 mambazip::Compressor::logits_to_pdf(
845 logits,
846 bias.as_deref(),
847 &mut compressor.pdf_buffer,
848 );
849 }
850 bits
851 }
852 TraceModel::Rwkv7 { compressor, primed } => {
853 if !*primed {
854 let vocab_size = compressor.vocab_size();
855 let logits =
856 compressor
857 .model
858 .forward(&mut compressor.scratch, 0, &mut compressor.state);
859 softmax_pdf_inplace(logits, vocab_size, &mut compressor.pdf_buffer);
860 *primed = true;
861 }
862 let mut bits = 0.0;
863 let vocab_size = compressor.vocab_size();
864 for &b in data {
865 let p = compressor.pdf_buffer[b as usize].max(1e-12);
866 bits -= p.log2();
867 let logits = compressor.model.forward(
868 &mut compressor.scratch,
869 b as u32,
870 &mut compressor.state,
871 );
872 softmax_pdf_inplace(logits, vocab_size, &mut compressor.pdf_buffer);
873 }
874 bits
875 }
876 TraceModel::Zpaq { model } => model.update_and_score(data),
877 TraceModel::Mixture { model, .. } => {
878 let mut bits = 0.0;
879 for &b in data {
880 let logp = model.log_prob(b);
881 bits -= logp / std::f64::consts::LN_2;
882 model.update(b);
883 }
884 bits
885 }
886 }
887 }
888}
889
890struct FuzzState {
895 current: Vec<u8>,
896 rng: RandomGenerator,
897}
898
899pub struct NyxVmEnvironment {
905 config: NyxVmConfig,
907 vm: NyxVM,
909 base_snapshot: Option<Arc<NyxSnapshot>>,
911 shared_vaddr: Option<u64>,
913 shared_cr3: Option<u64>,
915 trace_model: Option<TraceModel>,
917 baseline_entropy: Option<f64>,
919 reward_shaping: Option<NyxRewardShaping>,
921 fuzz_state: Option<FuzzState>,
923
924 obs: PerceptVal,
927 rew: Reward,
929 obs_stream: Vec<PerceptVal>,
931 step_in_episode: usize,
933 needs_reset: bool,
935 initialized: bool,
937}
938
939impl NyxVmEnvironment {
940 pub fn new(config: NyxVmConfig) -> anyhow::Result<Self> {
942 if config.firecracker_config.is_empty() {
944 return Err(anyhow::anyhow!("firecracker_config path must be set"));
945 }
946 if config.episode_steps == 0 {
947 return Err(anyhow::anyhow!("episode_steps must be > 0"));
948 }
949 if matches!(config.observation_policy, NyxObservationPolicy::RawOutput)
950 && config.observation_stream_len == 0
951 {
952 return Err(anyhow::anyhow!(
953 "observation_stream_len must be > 0 for RawOutput policy"
954 ));
955 }
956
957 let fc_config_raw = std::fs::read_to_string(&config.firecracker_config)
959 .map_err(|e| anyhow::anyhow!("Failed to read firecracker config: {}", e))?;
960 let fc_config =
961 rewrite_firecracker_config_paths(&config.firecracker_config, &fc_config_raw)
962 .map_err(|e| anyhow::anyhow!("Failed to parse firecracker config: {}", e))?;
963
964 let vm = NyxVM::new(config.instance_id.clone(), &fc_config);
966
967 let reward_shaping = config.reward_shaping.clone();
969
970 if matches!(reward_shaping, Some(NyxRewardShaping::TraceEntropy { .. }))
971 && config.trace.is_none()
972 {
973 return Err(anyhow::anyhow!(
974 "vm_trace must be configured for vm_reward_shaping.mode=trace-entropy"
975 ));
976 }
977
978 let trace_model = match &reward_shaping {
980 Some(NyxRewardShaping::TraceEntropy { max_order, .. }) => {
981 Some(TraceModel::new(&config.stats_backend, *max_order))
982 }
983 _ => None,
984 };
985
986 let baseline_entropy = match &reward_shaping {
988 Some(NyxRewardShaping::EntropyReduction {
989 baseline_bytes,
990 max_order,
991 ..
992 }) => {
993 let h = if *max_order == 0 {
994 marginal_entropy_bytes(baseline_bytes)
995 } else {
996 entropy_rate_backend(baseline_bytes, *max_order, &config.stats_backend)
997 };
998 Some(h)
999 }
1000 _ => None,
1001 };
1002
1003 let fuzz_state = match &config.action_source {
1005 NyxActionSource::Fuzz(fuzz) => {
1006 if fuzz.seeds.is_empty() {
1007 return Err(anyhow::anyhow!("Fuzz mode requires at least one seed"));
1008 }
1009 if fuzz.mutators.is_empty() {
1010 return Err(anyhow::anyhow!("Fuzz mode requires at least one mutator"));
1011 }
1012 let seed = fuzz.seeds[0].clone();
1013 Some(FuzzState {
1014 current: seed,
1015 rng: RandomGenerator::new().fork_with(fuzz.rng_seed),
1016 })
1017 }
1018 NyxActionSource::Literal(actions) => {
1019 if actions.is_empty() {
1020 return Err(anyhow::anyhow!("Literal mode requires at least one action"));
1021 }
1022 None
1023 }
1024 };
1025
1026 let mut env = Self {
1027 config,
1028 vm,
1029 base_snapshot: None,
1030 shared_vaddr: None,
1031 shared_cr3: None,
1032 trace_model,
1033 baseline_entropy,
1034 reward_shaping,
1035 fuzz_state,
1036 obs: 0,
1037 rew: 0,
1038 obs_stream: Vec::new(),
1039 step_in_episode: 0,
1040 needs_reset: true,
1041 initialized: false,
1042 };
1043
1044 env.initialize()?;
1046
1047 Ok(env)
1048 }
1049
1050 fn initialize(&mut self) -> anyhow::Result<()> {
1052 if self.initialized {
1053 return Ok(());
1054 }
1055
1056 if self.config.debug_mode {
1057 eprintln!("[NyxVm] Booting VM...");
1058 }
1059
1060 let start = Instant::now();
1062 loop {
1063 if start.elapsed() > self.config.boot_timeout {
1064 return Err(anyhow::anyhow!("Boot timeout waiting for shared memory"));
1065 }
1066
1067 let exit = self.vm.run(Duration::from_secs(1));
1068 match exit {
1069 ExitReason::SharedMem(name, vaddr, size) => {
1070 if self.config.debug_mode {
1071 eprintln!(
1072 "[NyxVm] Shared memory registered: {} @ {:#x} ({} bytes)",
1073 name, vaddr, size
1074 );
1075 }
1076 if name.trim_end_matches('\0') == self.config.shared_region_name {
1077 self.shared_vaddr = Some(vaddr);
1078 self.shared_cr3 = Some(self.vm.sregs().cr3);
1079 let _ = self.vm.register_shared_region_current(
1081 vaddr,
1082 size,
1083 self.config.shared_memory_policy,
1084 );
1085 break;
1086 }
1087 }
1088 ExitReason::DebugPrint(msg) => {
1089 if self.config.debug_mode {
1090 eprintln!("[NyxVm] Guest: {}", msg);
1091 }
1092 }
1093 ExitReason::Shutdown => {
1094 return Err(anyhow::anyhow!("VM shut down during boot"));
1095 }
1096 _ => {
1097 if self.config.debug_mode {
1098 eprintln!("[NyxVm] Boot exit: {:?}", exit);
1099 }
1100 }
1102 }
1103 }
1104
1105 loop {
1107 if start.elapsed() > self.config.boot_timeout {
1108 return Err(anyhow::anyhow!("Boot timeout waiting for snapshot request"));
1109 }
1110
1111 let exit = self.vm.run(Duration::from_secs(1));
1112 match exit {
1113 ExitReason::RequestSnapshot => {
1114 if self.config.debug_mode {
1115 eprintln!("[NyxVm] Taking base snapshot...");
1116 }
1117 self.base_snapshot = Some(self.vm.take_base_snapshot());
1118 break;
1119 }
1120 ExitReason::DebugPrint(msg) => {
1121 if self.config.debug_mode {
1122 eprintln!("[NyxVm] Guest: {}", msg);
1123 }
1124 }
1125 ExitReason::Shutdown => {
1126 return Err(anyhow::anyhow!("VM shut down before snapshot"));
1127 }
1128 _ => {
1129 if self.config.debug_mode {
1130 eprintln!("[NyxVm] Snapshot wait exit: {:?}", exit);
1131 }
1132 }
1134 }
1135 }
1136
1137 if self.config.debug_mode {
1138 eprintln!("[NyxVm] Initialization complete");
1139 }
1140
1141 self.initialized = true;
1142 self.needs_reset = false;
1143 Ok(())
1144 }
1145
1146 pub fn reset(&mut self) -> anyhow::Result<()> {
1148 let snapshot = self
1149 .base_snapshot
1150 .as_ref()
1151 .ok_or_else(|| anyhow::anyhow!("No base snapshot available"))?
1152 .clone();
1153
1154 self.vm.apply_snapshot(&snapshot);
1155
1156 if let Some(trace_cfg) = &self.config.trace
1158 && trace_cfg.reset_on_episode
1159 && let Some(model) = &mut self.trace_model
1160 {
1161 model.reset();
1162 }
1163
1164 self.step_in_episode = 0;
1165 self.needs_reset = false;
1166
1167 Ok(())
1168 }
1169
1170 fn write_action_to_shared_memory(&mut self, payload: &[u8]) -> anyhow::Result<()> {
1172 let vaddr = self
1173 .shared_vaddr
1174 .ok_or_else(|| anyhow::anyhow!("Shared memory not initialized"))?;
1175 let cr3 = self
1176 .shared_cr3
1177 .ok_or_else(|| anyhow::anyhow!("Shared memory CR3 not initialized"))?;
1178 let process = self.vm.process_memory(cr3);
1179
1180 let wait_start = Instant::now();
1182 loop {
1183 let cur_len = process
1184 .read_u64(vaddr + SHARED_ACTION_LEN_OFFSET)
1185 .unwrap_or(0);
1186 if cur_len == 0 {
1187 break;
1188 }
1189 if wait_start.elapsed() > self.config.step_timeout {
1190 return Err(anyhow::anyhow!("shared buffer busy (len={cur_len})"));
1191 }
1192 std::thread::yield_now();
1193 }
1194
1195 let len = payload.len() as u64;
1197 process
1198 .write_u64(vaddr + SHARED_ACTION_LEN_OFFSET, len)
1199 .map_err(|e| anyhow::anyhow!("write len failed: {e}"))?;
1200 let _ = process.write_u64(vaddr + SHARED_RESP_LEN_OFFSET, 0);
1201
1202 let max_len = self
1204 .config
1205 .shared_region_size
1206 .saturating_sub(SHARED_PAYLOAD_OFFSET as usize);
1207 let write_len = payload.len().min(max_len);
1208 if write_len > 0 {
1209 let _ = process
1210 .write_bytes(vaddr + SHARED_PAYLOAD_OFFSET, &payload[..write_len])
1211 .map_err(|e| anyhow::anyhow!("write payload failed: {e}"))?;
1212 }
1213
1214 if self.config.debug_mode {
1215 let verify = process
1216 .read_u64(vaddr + SHARED_ACTION_LEN_OFFSET)
1217 .unwrap_or(0) as usize;
1218 eprintln!(
1219 "[NyxVm] Wrote action len={}, verified len={}",
1220 write_len, verify
1221 );
1222 }
1223
1224 Ok(())
1225 }
1226
1227 fn read_shared_memory(&self) -> Vec<u8> {
1229 let Some(vaddr) = self.shared_vaddr else {
1230 return Vec::new();
1231 };
1232 let Some(cr3) = self.shared_cr3 else {
1233 return Vec::new();
1234 };
1235 let process = self.vm.process_memory(cr3);
1236
1237 let len = process
1239 .read_u64(vaddr + SHARED_RESP_LEN_OFFSET)
1240 .unwrap_or(0) as usize;
1241 let max_len = self
1242 .config
1243 .shared_region_size
1244 .saturating_sub(SHARED_PAYLOAD_OFFSET as usize);
1245 let read_len = len.min(max_len);
1246
1247 if read_len == 0 {
1248 return Vec::new();
1249 }
1250
1251 let mut buf = vec![0u8; read_len];
1252 let _ = process.read_bytes(vaddr + SHARED_PAYLOAD_OFFSET, &mut buf);
1253 buf
1254 }
1255
1256 fn clear_shared_length(&self) {
1257 let (Some(vaddr), Some(cr3)) = (self.shared_vaddr, self.shared_cr3) else {
1258 return;
1259 };
1260 let process = self.vm.process_memory(cr3);
1261 let _ = process.write_u64(vaddr + SHARED_ACTION_LEN_OFFSET, 0);
1262 let _ = process.write_u64(vaddr + SHARED_RESP_LEN_OFFSET, 0);
1263 }
1264
1265 pub fn run_step(&mut self, payload: &[u8]) -> anyhow::Result<NyxStepResult> {
1267 self.write_action_to_shared_memory(payload)?;
1269
1270 let start = Instant::now();
1272 let mut output = Vec::new();
1273 let mut trace_data = Vec::new();
1274 let mut parsed_obs = None;
1275 let mut parsed_rew = None;
1276 let mut done = false;
1277 let exit_kind;
1278 let collect_output =
1279 matches!(
1280 self.config.observation_policy,
1281 NyxObservationPolicy::OutputHash | NyxObservationPolicy::RawOutput
1282 ) || matches!(self.config.reward_policy, NyxRewardPolicy::Pattern { .. })
1283 || matches!(
1284 self.reward_shaping,
1285 Some(NyxRewardShaping::EntropyReduction { .. })
1286 );
1287
1288 loop {
1289 let remaining = self
1290 .config
1291 .step_timeout
1292 .checked_sub(start.elapsed())
1293 .unwrap_or(Duration::ZERO);
1294
1295 if remaining.is_zero() {
1296 exit_kind = NyxExitKind::Timeout;
1297 break;
1298 }
1299
1300 let exit = self.vm.run(remaining);
1301 match exit {
1302 ExitReason::ExecDone(code) => {
1303 exit_kind = NyxExitKind::ExecDone(code);
1304 done = true;
1305 break;
1306 }
1307 ExitReason::Timeout => {
1308 if self.config.debug_mode {
1309 eprintln!("[NyxVm] Step timeout");
1310 }
1311 exit_kind = NyxExitKind::Timeout;
1312 break;
1313 }
1314 ExitReason::Shutdown => {
1315 if self.config.debug_mode {
1316 eprintln!("[NyxVm] VM shutdown during step");
1317 }
1318 exit_kind = NyxExitKind::Shutdown;
1319 done = true;
1320 break;
1321 }
1322 ExitReason::DebugPrint(msg) => {
1323 if self.config.debug_mode {
1324 eprintln!("[NyxVm] Guest: {}", msg);
1325 }
1326 if collect_output {
1328 output.extend_from_slice(msg.as_bytes());
1329 }
1330 }
1332 ExitReason::Hypercall(r8, r9, r10, r11, r12) => {
1333 exit_kind = NyxExitKind::Hypercall {
1334 code: r8,
1335 arg1: r9,
1336 arg2: r10,
1337 arg3: r11,
1338 arg4: r12,
1339 };
1340 if let Some(obs) = Self::try_parse_u64(r9) {
1342 parsed_obs = Some(obs);
1343 }
1344 if let Some(rew) = Self::try_parse_i64(r10) {
1345 parsed_rew = Some(rew);
1346 }
1347 break;
1348 }
1349 ExitReason::Breakpoint => {
1350 if self.config.debug_mode {
1351 eprintln!("[NyxVm] Breakpoint exit during step");
1352 }
1353 exit_kind = NyxExitKind::Breakpoint;
1354 break;
1355 }
1356 _ => {
1357 }
1359 }
1360 }
1361
1362 let need_shared_memory = matches!(
1364 self.config.observation_policy,
1365 NyxObservationPolicy::SharedMemory
1366 ) || matches!(
1367 self.config.reward_policy,
1368 NyxRewardPolicy::Pattern { .. }
1369 ) || matches!(
1370 self.reward_shaping,
1371 Some(NyxRewardShaping::EntropyReduction { .. })
1372 ) || self.config.trace.is_some();
1373 let shared_memory = if need_shared_memory {
1374 self.read_shared_memory()
1375 } else {
1376 Vec::new()
1377 };
1378
1379 self.clear_shared_length();
1381
1382 if let Some(trace_cfg) = &self.config.trace
1384 && trace_cfg.shared_region_name.is_some()
1385 {
1386 trace_data = shared_memory.clone();
1389 if trace_data.len() > trace_cfg.max_bytes {
1390 trace_data.truncate(trace_cfg.max_bytes);
1391 }
1392 }
1393
1394 Ok(NyxStepResult {
1395 exit_reason: exit_kind,
1396 output,
1397 parsed_obs,
1398 parsed_rew,
1399 done,
1400 trace_data,
1401 shared_memory,
1402 })
1403 }
1404
1405 fn try_parse_u64(val: u64) -> Option<u64> {
1406 Some(val)
1408 }
1409
1410 fn try_parse_i64(val: u64) -> Option<i64> {
1411 Some(val as i64)
1412 }
1413
1414 fn get_action_payload(&mut self, action: Action) -> anyhow::Result<Cow<'_, [u8]>> {
1416 match &self.config.action_source {
1417 NyxActionSource::Literal(actions) => {
1418 let idx = action as usize;
1419 if idx >= actions.len() {
1420 return Err(anyhow::anyhow!("Action index out of range"));
1421 }
1422 Ok(Cow::Borrowed(actions[idx].payload.as_slice()))
1423 }
1424 NyxActionSource::Fuzz(fuzz) => {
1425 let state = self
1426 .fuzz_state
1427 .as_mut()
1428 .ok_or_else(|| anyhow::anyhow!("Fuzz state missing"))?;
1429 let idx = action as usize % fuzz.mutators.len();
1430 let mut input = state.current.clone();
1431 let mutator = &fuzz.mutators[idx];
1432 apply_mutator(mutator, &mut input, fuzz, &mut state.rng);
1433 if input.len() < fuzz.min_len {
1434 input.resize(fuzz.min_len, 0);
1435 }
1436 if input.len() > fuzz.max_len {
1437 input.truncate(fuzz.max_len);
1438 }
1439 state.current = input.clone();
1440 Ok(Cow::Owned(input))
1441 }
1442 }
1443 }
1444
1445 fn filter_action(&self, payload: &[u8]) -> Option<i64> {
1447 let filter = self.config.action_filter.as_ref()?;
1448 if payload.is_empty() {
1449 return filter.reject_reward;
1450 }
1451
1452 let (entropy, intrinsic, novelty) = self.compute_filter_metrics(payload, filter);
1453
1454 if let Some(min_entropy) = filter.min_entropy
1455 && entropy < min_entropy
1456 {
1457 return filter.reject_reward;
1458 }
1459 if let Some(max_entropy) = filter.max_entropy
1460 && entropy > max_entropy
1461 {
1462 return filter.reject_reward;
1463 }
1464 if let Some(min_intrinsic) = filter.min_intrinsic_dependence
1465 && intrinsic < min_intrinsic
1466 {
1467 return filter.reject_reward;
1468 }
1469 if let Some(min_novelty) = filter.min_novelty
1470 && filter.novelty_prior.is_some()
1471 && novelty < min_novelty
1472 {
1473 return filter.reject_reward;
1474 }
1475 None
1476 }
1477
1478 fn wrap_action_payload(&self, payload: &[u8]) -> Vec<u8> {
1479 let p = &self.config.protocol;
1480 let mut wrapped = p.action_prefix.clone().into_bytes();
1481 wrapped.extend_from_slice(p.wire_encoding.encode(payload).as_bytes());
1482 wrapped.extend_from_slice(p.action_suffix.as_bytes());
1483 wrapped
1484 }
1485
1486 fn compute_filter_metrics(&self, payload: &[u8], filter: &NyxActionFilter) -> (f64, f64, f64) {
1487 let h_marg = marginal_entropy_bytes(payload);
1488 let h_rate = if filter.max_order == 0 {
1489 h_marg
1490 } else {
1491 entropy_rate_backend(payload, filter.max_order, &self.config.stats_backend)
1492 };
1493
1494 let intrinsic = if h_marg < 1e-9 {
1495 0.0
1496 } else {
1497 ((h_marg - h_rate) / h_marg).clamp(0.0, 1.0)
1498 };
1499
1500 let novelty = if let Some(ref prior) = filter.novelty_prior {
1501 cross_entropy_rate_backend(payload, prior, filter.max_order, &self.config.stats_backend)
1502 } else {
1503 0.0
1504 };
1505
1506 (h_rate, intrinsic, novelty)
1507 }
1508
1509 fn compute_reward(&mut self, result: &NyxStepResult) -> Reward {
1511 let base_reward = match &self.config.reward_policy {
1512 NyxRewardPolicy::FromGuest => result.parsed_rew.unwrap_or(0),
1513 NyxRewardPolicy::Pattern {
1514 pattern,
1515 base_reward,
1516 bonus_reward,
1517 } => {
1518 let text = String::from_utf8_lossy(&result.output);
1519 let shared_text = String::from_utf8_lossy(&result.shared_memory);
1520 if text.contains(pattern) || shared_text.contains(pattern) {
1521 base_reward + bonus_reward
1522 } else {
1523 *base_reward
1524 }
1525 }
1526 NyxRewardPolicy::Custom(f) => f(result),
1527 };
1528
1529 let shaping_reward = if let Some(shaping) = self.reward_shaping.clone() {
1530 self.compute_reward_shaping(&shaping, result)
1531 } else {
1532 0
1533 };
1534
1535 let mut reward = base_reward.saturating_add(shaping_reward);
1536
1537 reward = reward.saturating_sub(self.config.step_cost);
1538 let min_reward = self.min_reward();
1539 let max_reward = self.max_reward();
1540 reward.clamp(min_reward, max_reward)
1541 }
1542
1543 fn compute_reward_shaping(
1544 &mut self,
1545 shaping: &NyxRewardShaping,
1546 result: &NyxStepResult,
1547 ) -> Reward {
1548 match shaping {
1549 NyxRewardShaping::EntropyReduction {
1550 max_order,
1551 scale,
1552 crash_bonus,
1553 timeout_bonus,
1554 ..
1555 } => {
1556 let mut base_reward = {
1557 let data = if result.shared_memory.is_empty() {
1558 &result.output
1559 } else {
1560 &result.shared_memory
1561 };
1562 let h_obs = if *max_order == 0 {
1563 marginal_entropy_bytes(data)
1564 } else {
1565 entropy_rate_backend(data, *max_order, &self.config.stats_backend)
1566 };
1567 let h_base = self.baseline_entropy.unwrap_or(0.0);
1568 let er = (h_base - h_obs) * scale;
1569 er.round() as i64
1570 };
1571
1572 match &result.exit_reason {
1574 NyxExitKind::Shutdown | NyxExitKind::Breakpoint => {
1575 if let Some(bonus) = crash_bonus {
1576 base_reward = base_reward.saturating_add(*bonus);
1577 }
1578 }
1579 NyxExitKind::Timeout => {
1580 if let Some(bonus) = timeout_bonus {
1581 base_reward = base_reward.saturating_add(*bonus);
1582 }
1583 }
1584 _ => {}
1585 }
1586
1587 base_reward
1588 }
1589 NyxRewardShaping::TraceEntropy {
1590 scale, normalize, ..
1591 } => {
1592 let data = &result.trace_data;
1593 let bits = match self.trace_model.as_mut() {
1594 Some(model) => model.update_and_score(data),
1595 None => 0.0,
1596 };
1597 let bits = if *normalize && !data.is_empty() {
1598 bits / data.len() as f64
1599 } else {
1600 bits
1601 };
1602 (bits * scale).round() as i64
1603 }
1604 }
1605 }
1606
1607 fn mask_observation(&self, value: u64) -> u64 {
1608 let bits = self.config.observation_bits;
1609 if bits >= 64 {
1610 value
1611 } else if bits == 0 {
1612 0
1613 } else {
1614 value & ((1u64 << bits) - 1)
1615 }
1616 }
1617
1618 fn build_observation_stream(&self, result: &NyxStepResult) -> Vec<PerceptVal> {
1619 let mut observations = match self.config.observation_policy {
1620 NyxObservationPolicy::FromGuest => {
1621 if let Some(obs) = result.parsed_obs {
1622 vec![self.mask_observation(obs)]
1623 } else {
1624 vec![self.hash_observation(&result.shared_memory)]
1625 }
1626 }
1627 NyxObservationPolicy::OutputHash => {
1628 vec![self.hash_observation(&result.output)]
1629 }
1630 NyxObservationPolicy::RawOutput => {
1631 result.output.iter().map(|b| *b as PerceptVal).collect()
1632 }
1633 NyxObservationPolicy::SharedMemory => result
1634 .shared_memory
1635 .iter()
1636 .map(|b| *b as PerceptVal)
1637 .collect(),
1638 };
1639
1640 if observations.is_empty() {
1641 observations.push(0);
1642 }
1643
1644 self.normalize_observation_stream(&mut observations);
1645 observations
1646 }
1647
1648 fn hash_observation(&self, data: &[u8]) -> PerceptVal {
1649 let h = robust_hash_bytes(data);
1650 self.mask_observation(h)
1651 }
1652
1653 fn normalize_observation_stream(&self, observations: &mut Vec<PerceptVal>) {
1654 let mask = if self.config.observation_bits >= 64 {
1655 u64::MAX
1656 } else if self.config.observation_bits == 0 {
1657 0
1658 } else {
1659 (1u64 << self.config.observation_bits) - 1
1660 };
1661
1662 for obs in observations.iter_mut() {
1663 *obs &= mask;
1664 }
1665
1666 let target = self.config.observation_stream_len;
1667 if target == 0 {
1668 return;
1669 }
1670
1671 if observations.len() > target {
1672 match self.config.observation_stream_mode {
1673 NyxObservationStreamMode::Truncate | NyxObservationStreamMode::PadTruncate => {
1674 observations.truncate(target);
1675 }
1676 NyxObservationStreamMode::Pad => {}
1677 }
1678 } else if observations.len() < target {
1679 match self.config.observation_stream_mode {
1680 NyxObservationStreamMode::Pad | NyxObservationStreamMode::PadTruncate => {
1681 let pad = self.config.observation_pad_byte as PerceptVal;
1682 observations.resize(target, pad);
1683 }
1684 NyxObservationStreamMode::Truncate => {}
1685 }
1686 }
1687 }
1688
1689 fn action_count(&self) -> usize {
1690 match &self.config.action_source {
1691 NyxActionSource::Literal(actions) => actions.len(),
1692 NyxActionSource::Fuzz(fuzz) => fuzz.mutators.len(),
1693 }
1694 }
1695
1696 pub fn vm(&self) -> &NyxVM {
1698 &self.vm
1699 }
1700
1701 pub fn vm_mut(&mut self) -> &mut NyxVM {
1703 &mut self.vm
1704 }
1705
1706 pub fn take_snapshot(&mut self) -> Arc<NyxSnapshot> {
1708 self.vm.take_snapshot()
1709 }
1710
1711 pub fn apply_snapshot(&mut self, snapshot: &Arc<NyxSnapshot>) {
1713 self.vm.apply_snapshot(snapshot);
1714 }
1715
1716 pub fn reset_trace_model(&mut self) {
1718 if let Some(model) = &mut self.trace_model {
1719 model.reset();
1720 }
1721 }
1722
1723 fn log_crash(&self, action_payload: &[u8], result: &NyxStepResult, reward: i64) {
1725 let Some(log_path) = &self.config.crash_log else {
1726 return;
1727 };
1728
1729 let is_interesting = matches!(
1731 result.exit_reason,
1732 NyxExitKind::Shutdown | NyxExitKind::Breakpoint | NyxExitKind::Timeout
1733 );
1734
1735 if !is_interesting {
1736 return;
1737 }
1738
1739 let log_entry = serde_json::json!({
1740 "timestamp": std::time::SystemTime::now()
1741 .duration_since(std::time::UNIX_EPOCH)
1742 .unwrap_or_default()
1743 .as_secs(),
1744 "exit_reason": format!("{:?}", result.exit_reason),
1745 "action_payload": hex_encode(action_payload),
1746 "action_payload_str": String::from_utf8_lossy(action_payload),
1747 "output": String::from_utf8_lossy(&result.output),
1748 "shared_memory": hex_encode(&result.shared_memory),
1749 "reward": reward,
1750 "parsed_obs": result.parsed_obs,
1751 "parsed_rew": result.parsed_rew,
1752 });
1753
1754 if let Ok(mut file) = OpenOptions::new().create(true).append(true).open(log_path)
1756 && let Ok(json_str) = serde_json::to_string(&log_entry)
1757 {
1758 let _ = writeln!(file, "{}", json_str);
1759 }
1760 }
1761}
1762
1763impl Environment for NyxVmEnvironment {
1768 fn perform_action(&mut self, action: Action) {
1769 if self.needs_reset
1770 && let Err(e) = self.reset()
1771 && self.config.debug_mode
1772 {
1773 eprintln!("[NyxVm] Reset failed: {}", e);
1774 }
1775
1776 let payload = match self.get_action_payload(action) {
1777 Ok(payload) => payload.into_owned(),
1778 Err(e) => {
1779 if self.config.debug_mode {
1780 eprintln!("[NyxVm] Invalid action: {}", e);
1781 }
1782 self.obs = 0;
1783 self.rew = self.min_reward();
1784 self.obs_stream.clear();
1785 self.obs_stream.push(0);
1786 self.step_in_episode = (self.step_in_episode + 1) % self.config.episode_steps;
1787 if self.step_in_episode == 0 {
1788 self.needs_reset = true;
1789 }
1790 return;
1791 }
1792 };
1793
1794 if let Some(reject_reward) = self.filter_action(&payload) {
1796 self.obs = 0;
1797 self.rew = reject_reward.clamp(self.min_reward(), self.max_reward());
1798 self.obs_stream.clear();
1799 self.obs_stream.push(0);
1800 self.step_in_episode = (self.step_in_episode + 1) % self.config.episode_steps;
1801 if self.step_in_episode == 0 {
1802 self.needs_reset = true;
1803 }
1804 return;
1805 }
1806
1807 let wrapped_payload = self.wrap_action_payload(&payload);
1809 let result = match self.run_step(&wrapped_payload) {
1810 Ok(result) => result,
1811 Err(e) => {
1812 if self.config.debug_mode {
1813 eprintln!("[NyxVm] Step failed: {}", e);
1814 }
1815 self.obs = 0;
1816 self.rew = self.min_reward();
1817 self.obs_stream.clear();
1818 self.obs_stream.push(0);
1819 self.step_in_episode = (self.step_in_episode + 1) % self.config.episode_steps;
1820 if self.step_in_episode == 0 {
1821 self.needs_reset = true;
1822 }
1823 return;
1824 }
1825 };
1826
1827 self.obs_stream = self.build_observation_stream(&result);
1829 self.obs = self.obs_stream.first().copied().unwrap_or(0);
1830 self.rew = self.compute_reward(&result);
1831
1832 self.log_crash(&payload, &result, self.rew);
1834
1835 if self.config.debug_mode {
1836 eprintln!(
1837 "[NyxVm] Action={} Obs={} Rew={} Done={:?} Exit={:?}",
1838 action, self.obs, self.rew, result.done, result.exit_reason
1839 );
1840 }
1841
1842 self.step_in_episode = (self.step_in_episode + 1) % self.config.episode_steps;
1843 if self.step_in_episode == 0 || result.done {
1844 self.needs_reset = true;
1845 }
1846 }
1847
1848 fn get_observation(&self) -> PerceptVal {
1849 self.obs
1850 }
1851
1852 fn drain_observations(&mut self) -> Vec<PerceptVal> {
1853 if self.obs_stream.is_empty() {
1854 vec![self.obs]
1855 } else {
1856 std::mem::take(&mut self.obs_stream)
1857 }
1858 }
1859
1860 fn get_reward(&self) -> Reward {
1861 self.rew
1862 }
1863
1864 fn is_finished(&self) -> bool {
1865 false
1866 }
1867
1868 fn get_observation_bits(&self) -> usize {
1869 self.config.observation_bits
1870 }
1871
1872 fn get_reward_bits(&self) -> usize {
1873 self.config.reward_bits
1874 }
1875
1876 fn get_action_bits(&self) -> usize {
1877 let n = self.action_count();
1878 if n <= 1 {
1879 return 1;
1880 }
1881 (n as f64).log2().ceil() as usize
1882 }
1883
1884 fn get_num_actions(&self) -> usize {
1885 self.action_count()
1886 }
1887
1888 fn max_reward(&self) -> Reward {
1889 let bits = self.config.reward_bits;
1890 if bits >= 64 {
1891 i64::MAX
1892 } else if bits == 0 {
1893 0
1894 } else {
1895 (1i64 << (bits - 1)) - 1
1896 }
1897 }
1898
1899 fn min_reward(&self) -> Reward {
1900 let bits = self.config.reward_bits;
1901 if bits >= 64 {
1902 i64::MIN
1903 } else if bits == 0 {
1904 0
1905 } else {
1906 -(1i64 << (bits - 1))
1907 }
1908 }
1909}
1910
1911fn robust_hash_bytes(data: &[u8]) -> u64 {
1916 let mut h = 0u64;
1917 for &b in data {
1918 h = h.rotate_left(7) ^ (b as u64);
1919 }
1920 h
1921}
1922
1923fn apply_mutator(
1924 mutator: &FuzzMutator,
1925 input: &mut Vec<u8>,
1926 fuzz: &NyxFuzzConfig,
1927 rng: &mut RandomGenerator,
1928) {
1929 match mutator {
1930 FuzzMutator::FlipBit => {
1931 if input.is_empty() {
1932 input.push(0);
1933 }
1934 let idx = rng.gen_range(input.len());
1935 let bit = rng.gen_range(8);
1936 input[idx] ^= 1u8 << bit;
1937 }
1938 FuzzMutator::FlipByte => {
1939 if input.is_empty() {
1940 input.push(0);
1941 }
1942 let idx = rng.gen_range(input.len());
1943 input[idx] ^= rng.next_u64() as u8;
1944 }
1945 FuzzMutator::InsertByte => {
1946 let idx = if input.is_empty() {
1947 0
1948 } else {
1949 rng.gen_range(input.len() + 1)
1950 };
1951 let byte = if !fuzz.dictionary.is_empty() {
1952 let d = rng.gen_range(fuzz.dictionary.len());
1953 let entry = &fuzz.dictionary[d];
1954 if entry.is_empty() {
1955 0
1956 } else {
1957 entry[rng.gen_range(entry.len())]
1958 }
1959 } else {
1960 rng.next_u64() as u8
1961 };
1962 input.insert(idx, byte);
1963 }
1964 FuzzMutator::DeleteByte => {
1965 if input.len() > 1 {
1966 let idx = rng.gen_range(input.len());
1967 input.remove(idx);
1968 }
1969 }
1970 FuzzMutator::SpliceSeed => {
1971 if fuzz.seeds.is_empty() {
1972 return;
1973 }
1974 let seed = &fuzz.seeds[rng.gen_range(fuzz.seeds.len())];
1975 if input.is_empty() {
1976 input.extend_from_slice(seed);
1977 } else if !seed.is_empty() {
1978 let cut = rng.gen_range(input.len());
1979 let seed_cut = rng.gen_range(seed.len());
1980 let mut out = Vec::new();
1981 out.extend_from_slice(&input[..cut]);
1982 out.extend_from_slice(&seed[seed_cut..]);
1983 *input = out;
1984 }
1985 }
1986 FuzzMutator::ResetSeed => {
1987 if fuzz.seeds.is_empty() {
1988 return;
1989 }
1990 *input = fuzz.seeds[rng.gen_range(fuzz.seeds.len())].clone();
1991 }
1992 FuzzMutator::Havoc => {
1993 let flips = 1 + rng.gen_range(8);
1994 for _ in 0..flips {
1995 if input.is_empty() {
1996 input.push(0);
1997 }
1998 let idx = rng.gen_range(input.len());
1999 input[idx] ^= rng.next_u64() as u8;
2000 }
2001 }
2002 }
2003}
2004
2005#[cfg(test)]
2010mod tests {
2011 use super::*;
2012
2013 #[test]
2014 fn test_hex_encoding() {
2015 let data = b"hello";
2016 let encoded = hex_encode(data);
2017 assert_eq!(encoded, "68656c6c6f");
2018 let decoded = hex_decode(&encoded).unwrap();
2019 assert_eq!(decoded, data);
2020 }
2021
2022 #[test]
2023 fn test_robust_hash() {
2024 let data1 = b"test data";
2025 let data2 = b"test data";
2026 let data3 = b"different";
2027
2028 assert_eq!(robust_hash_bytes(data1), robust_hash_bytes(data2));
2029 assert_ne!(robust_hash_bytes(data1), robust_hash_bytes(data3));
2030 }
2031
2032 #[test]
2033 fn test_payload_encoding() {
2034 let utf8 = PayloadEncoding::Utf8;
2035 let hex = PayloadEncoding::Hex;
2036
2037 let data = b"test";
2038 assert_eq!(utf8.encode(data), "test");
2039 assert_eq!(hex.encode(data), "74657374");
2040
2041 assert_eq!(utf8.decode("test").unwrap(), data);
2042 assert_eq!(hex.decode("74657374").unwrap(), data);
2043 }
2044
2045 #[test]
2046 fn trace_model_supports_predictor_backed_backends() {
2047 let backends = vec![
2048 RateBackend::Match {
2049 hash_bits: 20,
2050 min_len: 4,
2051 max_len: 255,
2052 base_mix: 0.02,
2053 confidence_scale: 1.0,
2054 },
2055 RateBackend::SparseMatch {
2056 hash_bits: 19,
2057 min_len: 3,
2058 max_len: 64,
2059 gap_min: 1,
2060 gap_max: 2,
2061 base_mix: 0.05,
2062 confidence_scale: 1.0,
2063 },
2064 RateBackend::Ppmd {
2065 order: 8,
2066 memory_mb: 8,
2067 },
2068 RateBackend::Calibrated {
2069 spec: Arc::new(crate::CalibratedSpec {
2070 base: RateBackend::Ctw { depth: 8 },
2071 context: crate::CalibrationContextKind::Text,
2072 bins: 33,
2073 learning_rate: 0.02,
2074 bias_clip: 4.0,
2075 }),
2076 },
2077 RateBackend::Particle {
2078 spec: Arc::new(crate::ParticleSpec {
2079 num_particles: 4,
2080 num_cells: 4,
2081 cell_dim: 8,
2082 ..crate::ParticleSpec::default()
2083 }),
2084 },
2085 RateBackend::Mixture {
2086 spec: Arc::new(crate::MixtureSpec::new(
2087 crate::MixtureKind::Bayes,
2088 vec![crate::MixtureExpertSpec {
2089 name: Some("ctw".to_string()),
2090 log_prior: 0.0,
2091 max_order: -1,
2092 backend: RateBackend::Ctw { depth: 8 },
2093 }],
2094 )),
2095 },
2096 ];
2097
2098 for backend in backends {
2099 let mut model = TraceModel::new(&backend, 4);
2100 let bits = model.update_and_score(b"trace payload");
2101 assert!(bits.is_finite() && bits >= 0.0, "bits={bits}");
2102 model.reset();
2103 let bits_after_reset = model.update_and_score(b"trace payload");
2104 assert!(
2105 bits_after_reset.is_finite() && bits_after_reset >= 0.0,
2106 "bits_after_reset={bits_after_reset}"
2107 );
2108 }
2109 }
2110}