Now possible to prohibit a Collection of Actions from being included in the List of returned actions (or as a single returned value). All unit tests now pass with the exception of MonteCarloUCT. TODO: playerToMove and previousPlayerPassed should be made part of the GameState. This would remove the superfluous Player parameter from many methods and make it possible to check for the "I'm ahead and my opponent is offering to end the game" killer move.
134 lines
5.1 KiB
Java
134 lines
5.1 KiB
Java
package net.woodyfolsom.msproj.policy;
|
|
|
|
import java.util.ArrayList;
|
|
import java.util.Collection;
|
|
import java.util.List;
|
|
import java.util.Set;
|
|
|
|
import net.woodyfolsom.msproj.Action;
|
|
import net.woodyfolsom.msproj.GameConfig;
|
|
import net.woodyfolsom.msproj.GameState;
|
|
import net.woodyfolsom.msproj.GoGame;
|
|
import net.woodyfolsom.msproj.Player;
|
|
import net.woodyfolsom.msproj.StateEvaluator;
|
|
import net.woodyfolsom.msproj.tree.GameTreeNode;
|
|
import net.woodyfolsom.msproj.tree.MonteCarloProperties;
|
|
|
|
public class MonteCarloUCT extends MonteCarlo {
|
|
public static final double TUNING_CONSTANT = 0.50;
|
|
|
|
public MonteCarloUCT(Policy movePolicy, long searchTimeLimit) {
|
|
super(movePolicy, searchTimeLimit);
|
|
}
|
|
|
|
@Override
|
|
public List<GameTreeNode<MonteCarloProperties>> descend(GameTreeNode<MonteCarloProperties> node) {
|
|
double bestScore = Double.NEGATIVE_INFINITY;
|
|
GameTreeNode<MonteCarloProperties> bestNode = node;
|
|
|
|
//TODO: WHAT TO DO if the optimum leaf node is actually a terminal node?
|
|
//from Kocsis and Szepesvari, the value of an actual terminal node is 0, unless estimated by rollout
|
|
double nodeVisits = node.getProperties().getVisits();
|
|
|
|
for (Action action : node.getActions()) {
|
|
GameTreeNode<MonteCarloProperties> childNode = node.getChild(action);
|
|
|
|
MonteCarloProperties properties = childNode.getProperties();
|
|
double childScore = (double) properties.getWins() / properties.getVisits() + TUNING_CONSTANT * Math.log(nodeVisits) / childNode.getProperties().getVisits();
|
|
|
|
if (childScore >= bestScore) {
|
|
bestScore = childScore;
|
|
bestNode = childNode;
|
|
}
|
|
}
|
|
|
|
if (bestNode == node) {
|
|
List<GameTreeNode<MonteCarloProperties>> bestNodeList = new ArrayList<GameTreeNode<MonteCarloProperties>>();
|
|
bestNodeList.add(bestNode);
|
|
return bestNodeList;
|
|
} else {
|
|
return descend(bestNode);
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public Action getBestAction(GameTreeNode<MonteCarloProperties> node) {
|
|
Action bestAction = Action.NONE;
|
|
double bestScore = Double.NEGATIVE_INFINITY;
|
|
|
|
for (Action action : node.getActions()) {
|
|
GameTreeNode<MonteCarloProperties> childNode = node.getChild(action);
|
|
|
|
MonteCarloProperties properties = childNode.getProperties();
|
|
double childScore = (double) properties.getWins() / properties.getVisits();
|
|
|
|
if (childScore >= bestScore) {
|
|
bestScore = childScore;
|
|
bestAction = action;
|
|
}
|
|
}
|
|
|
|
return bestAction;
|
|
}
|
|
|
|
@Override
|
|
public List<GameTreeNode<MonteCarloProperties>> grow(GameConfig gameConfig, GameTreeNode<MonteCarloProperties> node, Player player) {
|
|
GameState nextGameState = new GameState(node.getGameState());
|
|
Policy randomMovePolicy = new RandomMovePolicy();
|
|
Set<Action> exploredActions = node.getActions();
|
|
Action action = randomMovePolicy.getAction(gameConfig, node.getGameState(), exploredActions, player);
|
|
if (Action.NONE == action) {
|
|
throw new RuntimeException("Unable to grow node - are all actions already explored? Board state: " + node.getGameState() + "\nExplored actions: " + exploredActions);
|
|
}
|
|
nextGameState.playStone(player, action);
|
|
List<GameTreeNode<MonteCarloProperties>> newChildren = new ArrayList<GameTreeNode<MonteCarloProperties>>();
|
|
newChildren.add(new GameTreeNode<MonteCarloProperties>(nextGameState,new MonteCarloProperties()));
|
|
return newChildren;
|
|
}
|
|
|
|
@Override
|
|
/**
|
|
* Rollout currently depends on the hardcoded ROLLOUT_DEPTH_LIMIT superclass parameter,
|
|
* since without (super)ko detection, there is no way to guarantee a rollout will terminate.
|
|
* Even with ko detection, a rollout might take an unrealistically long time due to unlikely playouts.
|
|
*/
|
|
public int rollout(GameConfig gameConfig, StateEvaluator stateEvaluator, GameTreeNode<MonteCarloProperties> node, Player player) {
|
|
Policy randomMovePolicy = new RandomMovePolicy();
|
|
|
|
Action action;
|
|
int rolloutDepth = 0;
|
|
GameState finalGameState = new GameState(node.getGameState());
|
|
Player currentPlayer = player;
|
|
do {
|
|
rolloutDepth++;
|
|
action = randomMovePolicy.getAction(gameConfig, node.getGameState(), player);
|
|
if (action != Action.NONE) {
|
|
finalGameState.playStone(currentPlayer, action);
|
|
currentPlayer = GoGame.getColorToPlay(currentPlayer, true);
|
|
}
|
|
} while (action != Action.NONE && rolloutDepth < ROLLOUT_DEPTH_LIMIT);
|
|
|
|
if (stateEvaluator.scoreGame(finalGameState).isWinner(player)) {
|
|
return 1;
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public void update(GameTreeNode<MonteCarloProperties> node, int reward) {
|
|
GameTreeNode<MonteCarloProperties> currentNode = node;
|
|
while (currentNode != null) {
|
|
MonteCarloProperties nodeProperties = node.getProperties();
|
|
nodeProperties.setWins(nodeProperties.getWins() + reward);
|
|
nodeProperties.setVisits(nodeProperties.getVisits() + 1);
|
|
currentNode = currentNode.getParent();
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public Action getAction(GameConfig gameConfig, GameState gameState,
|
|
Collection<Action> prohibitedActions, Player player) {
|
|
throw new UnsupportedOperationException("Prohibited actions not supported by this class.");
|
|
}
|
|
} |