We propagate the reward for both sides

The AI now properly choses the optimal path for the active player
This commit is contained in:
David Kruger 2025-06-27 16:07:33 -07:00
parent 0f9d4f0c4e
commit 6a33818238

View File

@ -69,20 +69,16 @@ fn standard_backprop<S: GameState>(
let mut current_id: usize = node_id; let mut current_id: usize = node_id;
loop { loop {
let node = arena.get_node_mut(current_id); let node = arena.get_node_mut(current_id);
let player = node.state.get_current_player().clone();
match rewards.get(&player) {
Some(reward) => {
node.increment_visits(); node.increment_visits();
node.record_player_reward(player, *reward); for (player, reward) in rewards.iter() {
node.record_player_reward(player.clone(), *reward);
}
if let Some(parent_id) = node.parent { if let Some(parent_id) = node.parent {
current_id = parent_id; current_id = parent_id;
} else { } else {
break; break;
} }
} }
None => (),
}
}
} }
fn weighted_backprop<S: GameState>( fn weighted_backprop<S: GameState>(
@ -94,21 +90,16 @@ fn weighted_backprop<S: GameState>(
let mut current_id: usize = node_id; let mut current_id: usize = node_id;
loop { loop {
let node = arena.get_node_mut(current_id); let node = arena.get_node_mut(current_id);
let player = node.state.get_current_player().clone();
let weight = weight_for_depth(depth_factor, node.depth); let weight = weight_for_depth(depth_factor, node.depth);
match rewards.get(&player) { for (player, reward) in rewards.iter() {
Some(reward) => { node.record_player_reward(player.clone(), (*reward) * weight);
node.increment_visits(); }
node.record_player_reward(player, (*reward) * weight);
if let Some(parent_id) = node.parent { if let Some(parent_id) = node.parent {
current_id = parent_id; current_id = parent_id;
} else { } else {
break; break;
} }
} }
None => (),
}
}
} }
/// Calculate the weight based on the current depth /// Calculate the weight based on the current depth