package net.woodyfolsom.msproj.policy; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Set; import net.woodyfolsom.msproj.Action; import net.woodyfolsom.msproj.GameConfig; import net.woodyfolsom.msproj.GameState; import net.woodyfolsom.msproj.GoGame; import net.woodyfolsom.msproj.Player; import net.woodyfolsom.msproj.StateEvaluator; import net.woodyfolsom.msproj.tree.GameTreeNode; import net.woodyfolsom.msproj.tree.MonteCarloProperties; public class MonteCarloUCT extends MonteCarlo { public static final double TUNING_CONSTANT = 0.50; public MonteCarloUCT(Policy movePolicy, long searchTimeLimit) { super(movePolicy, searchTimeLimit); } @Override public List> descend(GameTreeNode node) { double bestScore = Double.NEGATIVE_INFINITY; GameTreeNode bestNode = node; //TODO: WHAT TO DO if the optimum leaf node is actually a terminal node? //from Kocsis and Szepesvari, the value of an actual terminal node is 0, unless estimated by rollout double nodeVisits = node.getProperties().getVisits(); for (Action action : node.getActions()) { GameTreeNode childNode = node.getChild(action); MonteCarloProperties properties = childNode.getProperties(); double childScore = (double) properties.getWins() / properties.getVisits() + TUNING_CONSTANT * Math.log(nodeVisits) / childNode.getProperties().getVisits(); if (childScore >= bestScore) { bestScore = childScore; bestNode = childNode; } } if (bestNode == node) { List> bestNodeList = new ArrayList>(); bestNodeList.add(bestNode); return bestNodeList; } else { return descend(bestNode); } } @Override public Action getBestAction(GameTreeNode node) { Action bestAction = Action.NONE; double bestScore = Double.NEGATIVE_INFINITY; for (Action action : node.getActions()) { GameTreeNode childNode = node.getChild(action); MonteCarloProperties properties = childNode.getProperties(); double childScore = (double) properties.getWins() / properties.getVisits(); if (childScore >= bestScore) { bestScore = childScore; bestAction = action; } } return bestAction; } @Override public List> grow(GameConfig gameConfig, GameTreeNode node, Player player) { GameState nextGameState = new GameState(node.getGameState()); Policy randomMovePolicy = new RandomMovePolicy(); Set exploredActions = node.getActions(); Action action = randomMovePolicy.getAction(gameConfig, node.getGameState(), exploredActions, player); if (Action.NONE == action) { throw new RuntimeException("Unable to grow node - are all actions already explored? Board state: " + node.getGameState() + "\nExplored actions: " + exploredActions); } nextGameState.playStone(player, action); List> newChildren = new ArrayList>(); newChildren.add(new GameTreeNode(nextGameState,new MonteCarloProperties())); return newChildren; } @Override /** * Rollout currently depends on the hardcoded ROLLOUT_DEPTH_LIMIT superclass parameter, * since without (super)ko detection, there is no way to guarantee a rollout will terminate. * Even with ko detection, a rollout might take an unrealistically long time due to unlikely playouts. */ public int rollout(GameConfig gameConfig, StateEvaluator stateEvaluator, GameTreeNode node, Player player) { Policy randomMovePolicy = new RandomMovePolicy(); Action action; int rolloutDepth = 0; GameState finalGameState = new GameState(node.getGameState()); Player currentPlayer = player; do { rolloutDepth++; action = randomMovePolicy.getAction(gameConfig, node.getGameState(), player); if (action != Action.NONE) { finalGameState.playStone(currentPlayer, action); currentPlayer = GoGame.getColorToPlay(currentPlayer, true); } } while (action != Action.NONE && rolloutDepth < ROLLOUT_DEPTH_LIMIT); if (stateEvaluator.scoreGame(finalGameState).isWinner(player)) { return 1; } else { return 0; } } @Override public void update(GameTreeNode node, int reward) { GameTreeNode currentNode = node; while (currentNode != null) { MonteCarloProperties nodeProperties = node.getProperties(); nodeProperties.setWins(nodeProperties.getWins() + reward); nodeProperties.setVisits(nodeProperties.getVisits() + 1); currentNode = currentNode.getParent(); } } @Override public Action getAction(GameConfig gameConfig, GameState gameState, Collection prohibitedActions, Player player) { throw new UnsupportedOperationException("Prohibited actions not supported by this class."); } }