GameState::reward_for_player is no longer a trait function

We need GameState::rewards_for_players implemented, the library currently doesn't need to specify for a singular player.
Removing the id field from the Action trait
2025-06-30 21:41:49 -07:00 · 2025-06-30 21:41:27 -07:00 · 2025-06-30 20:08:18 -07:00
6 changed files with 57 additions and 68 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -16,7 +16,7 @@ rand = "~0.9"
 thiserror = "~2.0"

 [dev-dependencies]
-divan = "0.1.21"
+divan = "~0.1"

 [[bench]]
 name = "e2e"
--- a/benches/e2e.rs
+++ b/benches/e2e.rs
@ -61,11 +61,7 @@ struct Move {
    index: usize,
 }

-impl Action for Move {
-    fn id(&self) -> usize {
-        self.index
-    }
-}
+impl Action for Move {}

 /// Tic-Tac-Toe game state
 #[derive(Debug, Clone)]
@ -129,6 +125,19 @@ impl TicTacToe {

        None
    }
+
+    fn reward_for_player(&self, player: &Player) -> RewardVal {
+        if let Some(winner) = self.get_winner() {
+            if winner == *player {
+                return 1.0; // Win
+            } else {
+                return 0.0; // Loss
+            }
+        }
+
+        // Draw
+        0.5
+    }
 }

 impl GameState for TicTacToe {
@ -165,19 +174,6 @@ impl GameState for TicTacToe {
        self.get_winner().is_some() || self.moves_played == 9
    }

-    fn reward_for_player(&self, player: &Self::Player) -> RewardVal {
-        if let Some(winner) = self.get_winner() {
-            if winner == *player {
-                return 1.0; // Win
-            } else {
-                return 0.0; // Loss
-            }
-        }
-
-        // Draw
-        0.5
-    }
-
    fn rewards_for_players(&self) -> HashMap<Self::Player, RewardVal> {
        HashMap::from_iter(vec![
            (Player::X, self.reward_for_player(&Player::X)),
--- a/examples/auto_tic_tac_toe.rs
+++ b/examples/auto_tic_tac_toe.rs
@ -84,11 +84,7 @@ struct Move {
    index: usize,
 }

-impl Action for Move {
-    fn id(&self) -> usize {
-        self.index
-    }
-}
+impl Action for Move {}

 /// Tic-Tac-Toe game state
 #[derive(Clone)]
@ -152,6 +148,19 @@ impl TicTacToe {

        None
    }
+
+    fn reward_for_player(&self, player: &Player) -> RewardVal {
+        if let Some(winner) = self.get_winner() {
+            if winner == *player {
+                return 1.0; // Win
+            } else {
+                return 0.0; // Loss
+            }
+        }
+
+        // Draw
+        0.5
+    }
 }

 impl GameState for TicTacToe {
@ -188,19 +197,6 @@ impl GameState for TicTacToe {
        self.get_winner().is_some() || self.moves_played == 9
    }

-    fn reward_for_player(&self, player: &Self::Player) -> RewardVal {
-        if let Some(winner) = self.get_winner() {
-            if winner == *player {
-                return 1.0; // Win
-            } else {
-                return 0.0; // Loss
-            }
-        }
-
-        // Draw
-        0.5
-    }
-
    fn rewards_for_players(&self) -> HashMap<Self::Player, RewardVal> {
        HashMap::from_iter(vec![
            (Player::X, self.reward_for_player(&Player::X)),
--- a/examples/tic_tac_toe.rs
+++ b/examples/tic_tac_toe.rs
@ -119,11 +119,7 @@ struct Move {
    index: usize,
 }

-impl Action for Move {
-    fn id(&self) -> usize {
-        self.index
-    }
-}
+impl Action for Move {}

 /// Tic-Tac-Toe game state
 #[derive(Clone)]
@ -195,6 +191,19 @@ impl TicTacToe {

        None
    }
+
+    fn reward_for_player(&self, player: &Player) -> RewardVal {
+        if let Some(winner) = self.get_winner() {
+            if winner == *player {
+                return 1.0; // Win
+            } else {
+                return 0.0; // Loss
+            }
+        }
+
+        // Draw
+        0.5
+    }
 }

 impl GameState for TicTacToe {
@ -231,19 +240,6 @@ impl GameState for TicTacToe {
        self.get_winner().is_some() || self.moves_played == 9
    }

-    fn reward_for_player(&self, player: &Self::Player) -> RewardVal {
-        if let Some(winner) = self.get_winner() {
-            if winner == *player {
-                return 1.0; // Win
-            } else {
-                return 0.0; // Loss
-            }
-        }
-
-        // Draw
-        0.5
-    }
-
    fn rewards_for_players(&self) -> HashMap<Self::Player, RewardVal> {
        HashMap::from_iter(vec![
            (Player::X, self.reward_for_player(&Player::X)),
--- a/src/mcts.rs
+++ b/src/mcts.rs
@ -75,9 +75,16 @@ impl<'conf, S: GameState + std::fmt::Debug> MCTS<'conf, S> {
        if !selected_node.state.is_terminal() {
            self.expand(selected_id);
            let children: &Vec<usize> = &self.arena.get_node(selected_id).children;
-            let random_child: usize = *children.choose(&mut rand::rng()).unwrap();
+            match children.choose(&mut rand::rng()) {
+                Some(&random_child) => {
                    selected_id = random_child;
                }
+                None => {
+                    // We ran out of nodes
+                    return Err(MCTSError::NonTerminalGame);
+                }
+            }
+        }
        let rewards = self.simulate(selected_id);
        self.backprop(selected_id, &rewards);

--- a/src/state.rs
+++ b/src/state.rs
@ -40,11 +40,11 @@ pub trait GameState: Clone + Debug {
    /// instead should modify a copy of the state and return that.
    fn state_after_action(&self, action: &Self::Action) -> Self;

-    /// Returns the reward from the perspective of the given player for the game state
+    /// Returns the rewards for all players from their perspective for the game state
    ///
-    /// This evaluates the current state from the perspective of the given player, and
-    /// returns the reward indicating how good of a result the given state is for the
-    /// player.
+    /// This evaluates the current state from the perspective of each player, and
+    /// returns a HashMap mapping each player to the result of this evaluation, which
+    /// we call the reward.
    ///
    /// This is used in the MCTS backpropagation and simulation phases to evaluate
    /// the value of a given node in the search tree.
@ -55,9 +55,6 @@ pub trait GameState: Clone + Debug {
    /// - 0.0 => a loss for the player
    ///
    /// Other values can be used for relative wins or losses
-    fn reward_for_player(&self, player: &Self::Player) -> RewardVal;
-
-    /// Returns the rewards for all players at the current state
    fn rewards_for_players(&self) -> HashMap<Self::Player, RewardVal>;

    /// Returns the player whose turn it is for the game state
@ -72,10 +69,7 @@ pub trait GameState: Clone + Debug {
 ///
 /// An action is dependent upon the specific game being defined, and includes
 /// things like moves, attacks, and other decisions.
-pub trait Action: Clone + Debug {
-    /// Returns a uniqie identifier for this action
-    fn id(&self) -> usize;
-}
+pub trait Action: Clone + Debug {}

 /// Trait used for players participating in a game
 pub trait Player: Clone + Debug + PartialEq + Eq + Hash {}