pub trait AgentSimulator: Send + Sync {
Show 21 methods
// Required methods
fn get_num_actions(&self) -> usize;
fn get_num_observation_bits(&self) -> usize;
fn get_num_reward_bits(&self) -> usize;
fn horizon(&self) -> usize;
fn max_reward(&self) -> Reward;
fn min_reward(&self) -> Reward;
fn model_update_action(&mut self, action: Action);
fn gen_percept_and_update(&mut self, bits: usize) -> u64;
fn model_revert(&mut self, steps: usize);
fn gen_range(&mut self, end: usize) -> usize;
fn gen_f64(&mut self) -> f64;
fn boxed_clone_with_seed(&self, seed: u64) -> Box<dyn AgentSimulator>;
// Provided methods
fn observation_stream_len(&self) -> usize { ... }
fn observation_key_mode(&self) -> ObservationKeyMode { ... }
fn observation_repr_from_stream(
&self,
observations: &[PerceptVal],
) -> Vec<PerceptVal> ⓘ { ... }
fn reward_offset(&self) -> i64 { ... }
fn get_explore_exploit_ratio(&self) -> f64 { ... }
fn discount_gamma(&self) -> f64 { ... }
fn boxed_clone(&self) -> Box<dyn AgentSimulator> { ... }
fn norm_reward(&self, reward: f64) -> f64 { ... }
fn gen_percepts_and_update(&mut self) -> (Vec<PerceptVal>, Reward) { ... }
}Expand description
Interface for an agent that can be simulated during MCTS.
This trait allows the MCTS algorithm to interact with an agent
(like Agent in agent.rs) to perform “imagined” actions and
receive “imagined” percepts during planning.
Required Methods§
Sourcefn get_num_actions(&self) -> usize
fn get_num_actions(&self) -> usize
Returns the number of possible actions the agent can perform.
Sourcefn get_num_observation_bits(&self) -> usize
fn get_num_observation_bits(&self) -> usize
Returns the bit-width used to encode observations.
Sourcefn get_num_reward_bits(&self) -> usize
fn get_num_reward_bits(&self) -> usize
Returns the bit-width used to encode rewards.
Sourcefn max_reward(&self) -> Reward
fn max_reward(&self) -> Reward
Returns the maximum possible reward value.
Sourcefn min_reward(&self) -> Reward
fn min_reward(&self) -> Reward
Returns the minimum possible reward value.
Sourcefn model_update_action(&mut self, action: Action)
fn model_update_action(&mut self, action: Action)
Updates the internal model state with a simulated action.
Sourcefn gen_percept_and_update(&mut self, bits: usize) -> u64
fn gen_percept_and_update(&mut self, bits: usize) -> u64
Generates a simulated percept and updates the model state.
Sourcefn model_revert(&mut self, steps: usize)
fn model_revert(&mut self, steps: usize)
Reverts the model state to a previous point in the simulation.
Sourcefn boxed_clone_with_seed(&self, seed: u64) -> Box<dyn AgentSimulator>
fn boxed_clone_with_seed(&self, seed: u64) -> Box<dyn AgentSimulator>
Creates a boxed clone of this simulator, re-seeding any RNG state.
Provided Methods§
Sourcefn observation_stream_len(&self) -> usize
fn observation_stream_len(&self) -> usize
Returns the number of observation symbols per action.
Sourcefn observation_key_mode(&self) -> ObservationKeyMode
fn observation_key_mode(&self) -> ObservationKeyMode
Returns the observation key mode for search-tree branching.
Sourcefn observation_repr_from_stream(
&self,
observations: &[PerceptVal],
) -> Vec<PerceptVal> ⓘ
fn observation_repr_from_stream( &self, observations: &[PerceptVal], ) -> Vec<PerceptVal> ⓘ
Returns the observation representation used for tree branching.
Sourcefn reward_offset(&self) -> i64
fn reward_offset(&self) -> i64
Returns the reward offset used to ensure encoded rewards are non-negative.
Paper-compatible encoding uses unsigned reward bits and shifts rewards by an offset.
Sourcefn get_explore_exploit_ratio(&self) -> f64
fn get_explore_exploit_ratio(&self) -> f64
Returns the exploration-exploitation constant (often denoted as C).
Sourcefn discount_gamma(&self) -> f64
fn discount_gamma(&self) -> f64
Returns the discount factor for future rewards.
Sourcefn boxed_clone(&self) -> Box<dyn AgentSimulator>
fn boxed_clone(&self) -> Box<dyn AgentSimulator>
Creates a boxed clone of this simulator for parallel search.
Sourcefn norm_reward(&self, reward: f64) -> f64
fn norm_reward(&self, reward: f64) -> f64
Normalizes a reward value to [0, 1] based on the agent’s range and horizon.
For discounted rewards, the cumulative range is sum_{t=0}^{h-1} gamma^t * (max - min).
Similarly, the minimum cumulative reward is sum_{t=0}^{h-1} gamma^t * min.
Sourcefn gen_percepts_and_update(&mut self) -> (Vec<PerceptVal>, Reward)
fn gen_percepts_and_update(&mut self) -> (Vec<PerceptVal>, Reward)
Helper to generate a percept stream, update the model, and return a search key + reward.