Files
cs6601p1/src/net/woodyfolsom/msproj/policy/MonteCarloUCT.java
cs6601 d3c03f2c51 Updated RandomMoveGenerator to support MonteCarloUCT.
Now possible to prohibit a Collection of Actions from being included in the List of returned actions (or as a single returned value).
All unit tests now pass with the exception of MonteCarloUCT.

TODO: playerToMove and previousPlayerPassed should be made part of the GameState.
This would remove the superfluous Player parameter from many methods and make it possible
to check for the "I'm ahead and my opponent is offering to end the game" killer move.
2012-08-31 09:17:43 -04:00

134 lines
5.1 KiB
Java

package net.woodyfolsom.msproj.policy;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Set;
import net.woodyfolsom.msproj.Action;
import net.woodyfolsom.msproj.GameConfig;
import net.woodyfolsom.msproj.GameState;
import net.woodyfolsom.msproj.GoGame;
import net.woodyfolsom.msproj.Player;
import net.woodyfolsom.msproj.StateEvaluator;
import net.woodyfolsom.msproj.tree.GameTreeNode;
import net.woodyfolsom.msproj.tree.MonteCarloProperties;
public class MonteCarloUCT extends MonteCarlo {
public static final double TUNING_CONSTANT = 0.50;
public MonteCarloUCT(Policy movePolicy, long searchTimeLimit) {
super(movePolicy, searchTimeLimit);
}
@Override
public List<GameTreeNode<MonteCarloProperties>> descend(GameTreeNode<MonteCarloProperties> node) {
double bestScore = Double.NEGATIVE_INFINITY;
GameTreeNode<MonteCarloProperties> bestNode = node;
//TODO: WHAT TO DO if the optimum leaf node is actually a terminal node?
//from Kocsis and Szepesvari, the value of an actual terminal node is 0, unless estimated by rollout
double nodeVisits = node.getProperties().getVisits();
for (Action action : node.getActions()) {
GameTreeNode<MonteCarloProperties> childNode = node.getChild(action);
MonteCarloProperties properties = childNode.getProperties();
double childScore = (double) properties.getWins() / properties.getVisits() + TUNING_CONSTANT * Math.log(nodeVisits) / childNode.getProperties().getVisits();
if (childScore >= bestScore) {
bestScore = childScore;
bestNode = childNode;
}
}
if (bestNode == node) {
List<GameTreeNode<MonteCarloProperties>> bestNodeList = new ArrayList<GameTreeNode<MonteCarloProperties>>();
bestNodeList.add(bestNode);
return bestNodeList;
} else {
return descend(bestNode);
}
}
@Override
public Action getBestAction(GameTreeNode<MonteCarloProperties> node) {
Action bestAction = Action.NONE;
double bestScore = Double.NEGATIVE_INFINITY;
for (Action action : node.getActions()) {
GameTreeNode<MonteCarloProperties> childNode = node.getChild(action);
MonteCarloProperties properties = childNode.getProperties();
double childScore = (double) properties.getWins() / properties.getVisits();
if (childScore >= bestScore) {
bestScore = childScore;
bestAction = action;
}
}
return bestAction;
}
@Override
public List<GameTreeNode<MonteCarloProperties>> grow(GameConfig gameConfig, GameTreeNode<MonteCarloProperties> node, Player player) {
GameState nextGameState = new GameState(node.getGameState());
Policy randomMovePolicy = new RandomMovePolicy();
Set<Action> exploredActions = node.getActions();
Action action = randomMovePolicy.getAction(gameConfig, node.getGameState(), exploredActions, player);
if (Action.NONE == action) {
throw new RuntimeException("Unable to grow node - are all actions already explored? Board state: " + node.getGameState() + "\nExplored actions: " + exploredActions);
}
nextGameState.playStone(player, action);
List<GameTreeNode<MonteCarloProperties>> newChildren = new ArrayList<GameTreeNode<MonteCarloProperties>>();
newChildren.add(new GameTreeNode<MonteCarloProperties>(nextGameState,new MonteCarloProperties()));
return newChildren;
}
@Override
/**
* Rollout currently depends on the hardcoded ROLLOUT_DEPTH_LIMIT superclass parameter,
* since without (super)ko detection, there is no way to guarantee a rollout will terminate.
* Even with ko detection, a rollout might take an unrealistically long time due to unlikely playouts.
*/
public int rollout(GameConfig gameConfig, StateEvaluator stateEvaluator, GameTreeNode<MonteCarloProperties> node, Player player) {
Policy randomMovePolicy = new RandomMovePolicy();
Action action;
int rolloutDepth = 0;
GameState finalGameState = new GameState(node.getGameState());
Player currentPlayer = player;
do {
rolloutDepth++;
action = randomMovePolicy.getAction(gameConfig, node.getGameState(), player);
if (action != Action.NONE) {
finalGameState.playStone(currentPlayer, action);
currentPlayer = GoGame.getColorToPlay(currentPlayer, true);
}
} while (action != Action.NONE && rolloutDepth < ROLLOUT_DEPTH_LIMIT);
if (stateEvaluator.scoreGame(finalGameState).isWinner(player)) {
return 1;
} else {
return 0;
}
}
@Override
public void update(GameTreeNode<MonteCarloProperties> node, int reward) {
GameTreeNode<MonteCarloProperties> currentNode = node;
while (currentNode != null) {
MonteCarloProperties nodeProperties = node.getProperties();
nodeProperties.setWins(nodeProperties.getWins() + reward);
nodeProperties.setVisits(nodeProperties.getVisits() + 1);
currentNode = currentNode.getParent();
}
}
@Override
public Action getAction(GameConfig gameConfig, GameState gameState,
Collection<Action> prohibitedActions, Player player) {
throw new UnsupportedOperationException("Prohibited actions not supported by this class.");
}
}