Trait AgentSimulator

Source

pub trait AgentSimulator: Send + Sync {
Show 21 methods    // Required methods
    fn get_num_actions(&self) -> usize;
    fn get_num_observation_bits(&self) -> usize;
    fn get_num_reward_bits(&self) -> usize;
    fn horizon(&self) -> usize;
    fn max_reward(&self) -> Reward;
    fn min_reward(&self) -> Reward;
    fn model_update_action(&mut self, action: Action);
    fn gen_percept_and_update(&mut self, bits: usize) -> u64;
    fn model_revert(&mut self, steps: usize);
    fn gen_range(&mut self, end: usize) -> usize;
    fn gen_f64(&mut self) -> f64;
    fn boxed_clone_with_seed(&self, seed: u64) -> Box<dyn AgentSimulator>;

    // Provided methods
    fn observation_stream_len(&self) -> usize { ... }
    fn observation_key_mode(&self) -> ObservationKeyMode { ... }
    fn observation_repr_from_stream(
        &self,
        observations: &[PerceptVal],
    ) -> Vec<PerceptVal> ⓘ { ... }
    fn reward_offset(&self) -> i64 { ... }
    fn get_explore_exploit_ratio(&self) -> f64 { ... }
    fn discount_gamma(&self) -> f64 { ... }
    fn boxed_clone(&self) -> Box<dyn AgentSimulator> { ... }
    fn norm_reward(&self, reward: f64) -> f64 { ... }
    fn gen_percepts_and_update(&mut self) -> (Vec<PerceptVal>, Reward) { ... }
}

Expand description

Interface for an agent that can be simulated during MCTS.

This trait allows the MCTS algorithm to interact with an agent (like Agent in agent.rs) to perform “imagined” actions and receive “imagined” percepts during planning.

Required Methods§

Source

fn get_num_actions(&self) -> usize

Returns the number of possible actions the agent can perform.

Source

fn get_num_observation_bits(&self) -> usize

Returns the bit-width used to encode observations.

Source

fn get_num_reward_bits(&self) -> usize

Returns the bit-width used to encode rewards.

Source

fn horizon(&self) -> usize

Returns the planning horizon (depth of simulations).

Source

fn max_reward(&self) -> Reward

Returns the maximum possible reward value.

Source

fn min_reward(&self) -> Reward

Returns the minimum possible reward value.

Source

fn model_update_action(&mut self, action: Action)

Updates the internal model state with a simulated action.

Source

fn gen_percept_and_update(&mut self, bits: usize) -> u64

Generates a simulated percept and updates the model state.

Source

fn model_revert(&mut self, steps: usize)

Reverts the model state to a previous point in the simulation.

Source

fn gen_range(&mut self, end: usize) -> usize

Generates a random value in [0, end).

Source

fn gen_f64(&mut self) -> f64

Generates a random f64 in [0, 1).

Source

fn boxed_clone_with_seed(&self, seed: u64) -> Box<dyn AgentSimulator>

Creates a boxed clone of this simulator, re-seeding any RNG state.

Provided Methods§

Source

fn observation_stream_len(&self) -> usize

Returns the number of observation symbols per action.

Source

fn observation_key_mode(&self) -> ObservationKeyMode

Returns the observation key mode for search-tree branching.

Source

fn observation_repr_from_stream( &self, observations: &[PerceptVal], ) -> Vec<PerceptVal> ⓘ

Returns the observation representation used for tree branching.

Source

fn reward_offset(&self) -> i64

Returns the reward offset used to ensure encoded rewards are non-negative.

Paper-compatible encoding uses unsigned reward bits and shifts rewards by an offset.

Source

fn get_explore_exploit_ratio(&self) -> f64

Returns the exploration-exploitation constant (often denoted as C).

Source

fn discount_gamma(&self) -> f64

Returns the discount factor for future rewards.

Source

fn boxed_clone(&self) -> Box<dyn AgentSimulator>

Creates a boxed clone of this simulator for parallel search.

Source

fn norm_reward(&self, reward: f64) -> f64

Normalizes a reward value to [0, 1] based on the agent’s range and horizon.

For discounted rewards, the cumulative range is sum_{t=0}^{h-1} gamma^t * (max - min). Similarly, the minimum cumulative reward is sum_{t=0}^{h-1} gamma^t * min.

Source

fn gen_percepts_and_update(&mut self) -> (Vec<PerceptVal>, Reward)

Helper to generate a percept stream, update the model, and return a search key + reward.

Implementors§

Source §

AgentSimulator

Trait AgentSimulator Copy item path

Required Methods§

fn get_num_actions(&self) -> usize

fn get_num_observation_bits(&self) -> usize

fn get_num_reward_bits(&self) -> usize

fn horizon(&self) -> usize

fn max_reward(&self) -> Reward

fn min_reward(&self) -> Reward

fn model_update_action(&mut self, action: Action)

fn gen_percept_and_update(&mut self, bits: usize) -> u64

fn model_revert(&mut self, steps: usize)

fn gen_range(&mut self, end: usize) -> usize

fn gen_f64(&mut self) -> f64

fn boxed_clone_with_seed(&self, seed: u64) -> Box<dyn AgentSimulator>

Provided Methods§

fn observation_stream_len(&self) -> usize

fn observation_key_mode(&self) -> ObservationKeyMode

fn observation_repr_from_stream( &self, observations: &[PerceptVal], ) -> Vec<PerceptVal> ⓘ

fn reward_offset(&self) -> i64

fn get_explore_exploit_ratio(&self) -> f64

fn discount_gamma(&self) -> f64

fn boxed_clone(&self) -> Box<dyn AgentSimulator>

fn norm_reward(&self, reward: f64) -> f64

fn gen_percepts_and_update(&mut self) -> (Vec<PerceptVal>, Reward)

Implementors§

impl AgentSimulator for Agent

Trait AgentSimulator