From 6aa9002e92567770c538a82aab580158a170707b Mon Sep 17 00:00:00 2001 From: David Kruger Date: Mon, 30 Jun 2025 21:41:49 -0700 Subject: [PATCH] GameState::reward_for_player is no longer a trait function We need GameState::rewards_for_players implemented, the library currently doesn't need to specify for a singular player. --- benches/e2e.rs | 26 +++++++++++++------------- examples/auto_tic_tac_toe.rs | 26 +++++++++++++------------- examples/tic_tac_toe.rs | 26 +++++++++++++------------- src/state.rs | 11 ++++------- 4 files changed, 43 insertions(+), 46 deletions(-) diff --git a/benches/e2e.rs b/benches/e2e.rs index b8f27a8..11397c0 100644 --- a/benches/e2e.rs +++ b/benches/e2e.rs @@ -125,6 +125,19 @@ impl TicTacToe { None } + + fn reward_for_player(&self, player: &Player) -> RewardVal { + if let Some(winner) = self.get_winner() { + if winner == *player { + return 1.0; // Win + } else { + return 0.0; // Loss + } + } + + // Draw + 0.5 + } } impl GameState for TicTacToe { @@ -161,19 +174,6 @@ impl GameState for TicTacToe { self.get_winner().is_some() || self.moves_played == 9 } - fn reward_for_player(&self, player: &Self::Player) -> RewardVal { - if let Some(winner) = self.get_winner() { - if winner == *player { - return 1.0; // Win - } else { - return 0.0; // Loss - } - } - - // Draw - 0.5 - } - fn rewards_for_players(&self) -> HashMap { HashMap::from_iter(vec![ (Player::X, self.reward_for_player(&Player::X)), diff --git a/examples/auto_tic_tac_toe.rs b/examples/auto_tic_tac_toe.rs index 1183ea5..819a5f6 100644 --- a/examples/auto_tic_tac_toe.rs +++ b/examples/auto_tic_tac_toe.rs @@ -148,6 +148,19 @@ impl TicTacToe { None } + + fn reward_for_player(&self, player: &Player) -> RewardVal { + if let Some(winner) = self.get_winner() { + if winner == *player { + return 1.0; // Win + } else { + return 0.0; // Loss + } + } + + // Draw + 0.5 + } } impl GameState for TicTacToe { @@ -184,19 +197,6 @@ impl GameState for TicTacToe { self.get_winner().is_some() || self.moves_played == 9 } - fn reward_for_player(&self, player: &Self::Player) -> RewardVal { - if let Some(winner) = self.get_winner() { - if winner == *player { - return 1.0; // Win - } else { - return 0.0; // Loss - } - } - - // Draw - 0.5 - } - fn rewards_for_players(&self) -> HashMap { HashMap::from_iter(vec![ (Player::X, self.reward_for_player(&Player::X)), diff --git a/examples/tic_tac_toe.rs b/examples/tic_tac_toe.rs index c0bc3ec..09053b9 100644 --- a/examples/tic_tac_toe.rs +++ b/examples/tic_tac_toe.rs @@ -191,6 +191,19 @@ impl TicTacToe { None } + + fn reward_for_player(&self, player: &Player) -> RewardVal { + if let Some(winner) = self.get_winner() { + if winner == *player { + return 1.0; // Win + } else { + return 0.0; // Loss + } + } + + // Draw + 0.5 + } } impl GameState for TicTacToe { @@ -227,19 +240,6 @@ impl GameState for TicTacToe { self.get_winner().is_some() || self.moves_played == 9 } - fn reward_for_player(&self, player: &Self::Player) -> RewardVal { - if let Some(winner) = self.get_winner() { - if winner == *player { - return 1.0; // Win - } else { - return 0.0; // Loss - } - } - - // Draw - 0.5 - } - fn rewards_for_players(&self) -> HashMap { HashMap::from_iter(vec![ (Player::X, self.reward_for_player(&Player::X)), diff --git a/src/state.rs b/src/state.rs index 2048962..250b4f4 100644 --- a/src/state.rs +++ b/src/state.rs @@ -40,11 +40,11 @@ pub trait GameState: Clone + Debug { /// instead should modify a copy of the state and return that. fn state_after_action(&self, action: &Self::Action) -> Self; - /// Returns the reward from the perspective of the given player for the game state + /// Returns the rewards for all players from their perspective for the game state /// - /// This evaluates the current state from the perspective of the given player, and - /// returns the reward indicating how good of a result the given state is for the - /// player. + /// This evaluates the current state from the perspective of each player, and + /// returns a HashMap mapping each player to the result of this evaluation, which + /// we call the reward. /// /// This is used in the MCTS backpropagation and simulation phases to evaluate /// the value of a given node in the search tree. @@ -55,9 +55,6 @@ pub trait GameState: Clone + Debug { /// - 0.0 => a loss for the player /// /// Other values can be used for relative wins or losses - fn reward_for_player(&self, player: &Self::Player) -> RewardVal; - - /// Returns the rewards for all players at the current state fn rewards_for_players(&self) -> HashMap; /// Returns the player whose turn it is for the game state