cs6601p1/src/net/woodyfolsom/msproj/policy/MonteCarloUCT.java

package net.woodyfolsom.msproj.policy;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Set;

import net.woodyfolsom.msproj.Action;
import net.woodyfolsom.msproj.GameConfig;
import net.woodyfolsom.msproj.GameState;
import net.woodyfolsom.msproj.GoGame;
import net.woodyfolsom.msproj.Player;
import net.woodyfolsom.msproj.StateEvaluator;
import net.woodyfolsom.msproj.tree.GameTreeNode;
import net.woodyfolsom.msproj.tree.MonteCarloProperties;

public class MonteCarloUCT extends MonteCarlo {
	public static final double TUNING_CONSTANT = 0.50;

	public MonteCarloUCT(Policy movePolicy, long searchTimeLimit) {
		super(movePolicy, searchTimeLimit);
	}

	@Override
	public List<GameTreeNode<MonteCarloProperties>> descend(GameTreeNode<MonteCarloProperties> node) {
		double bestScore = Double.NEGATIVE_INFINITY;
		GameTreeNode<MonteCarloProperties> bestNode = node;

		//TODO: WHAT TO DO if the optimum leaf node is actually a terminal node?
		//from Kocsis and Szepesvari, the value of an actual terminal node is 0, unless estimated by rollout
		double nodeVisits = node.getProperties().getVisits();

		for (Action action : node.getActions()) {
			GameTreeNode<MonteCarloProperties> childNode = node.getChild(action);

			MonteCarloProperties properties = childNode.getProperties();
			double childScore = (double) properties.getWins() / properties.getVisits() + TUNING_CONSTANT * Math.log(nodeVisits) / childNode.getProperties().getVisits();

			if (childScore >= bestScore) {
					bestScore = childScore;
					bestNode = childNode;
			}
		}

		if (bestNode == node) {
			List<GameTreeNode<MonteCarloProperties>> bestNodeList = new ArrayList<GameTreeNode<MonteCarloProperties>>();
			bestNodeList.add(bestNode);
			return bestNodeList;
		} else {
			return descend(bestNode);
		}
	}

	@Override
	public Action getBestAction(GameTreeNode<MonteCarloProperties> node) {
		Action bestAction = Action.NONE;
		double bestScore = Double.NEGATIVE_INFINITY;

		for (Action action : node.getActions()) {
			GameTreeNode<MonteCarloProperties> childNode = node.getChild(action);

			MonteCarloProperties properties = childNode.getProperties();
			double childScore = (double) properties.getWins() / properties.getVisits();

			if (childScore >= bestScore) {
				bestScore = childScore;
				bestAction = action;
			}
		}

		return bestAction;
	}

	@Override
	public List<GameTreeNode<MonteCarloProperties>> grow(GameConfig gameConfig, GameTreeNode<MonteCarloProperties> node, Player player) {
		GameState nextGameState = new GameState(node.getGameState());
		Policy randomMovePolicy = new RandomMovePolicy();
		Set<Action> exploredActions = node.getActions();
		Action action = randomMovePolicy.getAction(gameConfig, node.getGameState(), exploredActions, player);
		if (Action.NONE == action) {
			throw new RuntimeException("Unable to grow node - are all actions already explored?  Board state: " + node.getGameState() + "\nExplored actions: " + exploredActions);
		}
		nextGameState.playStone(player, action);
		List<GameTreeNode<MonteCarloProperties>> newChildren = new ArrayList<GameTreeNode<MonteCarloProperties>>();
		newChildren.add(new GameTreeNode<MonteCarloProperties>(nextGameState,new MonteCarloProperties()));
		return newChildren;
	}

	@Override
	/**
	 * Rollout currently depends on the hardcoded ROLLOUT_DEPTH_LIMIT superclass parameter,
	 * since without (super)ko detection, there is no way to guarantee a rollout will terminate.
	 * Even with ko detection, a rollout might take an unrealistically long time due to unlikely playouts.
	 */
	public int rollout(GameConfig gameConfig, StateEvaluator stateEvaluator, GameTreeNode<MonteCarloProperties> node, Player player) {
		Policy randomMovePolicy = new RandomMovePolicy();

		Action action;
		int rolloutDepth = 0;
		GameState finalGameState = new GameState(node.getGameState());
		Player currentPlayer = player;
		do {
			rolloutDepth++;
			action = randomMovePolicy.getAction(gameConfig, node.getGameState(), player);
			if (action != Action.NONE) {
				finalGameState.playStone(currentPlayer, action);
				currentPlayer = GoGame.getColorToPlay(currentPlayer, true);
			}
		} while (action != Action.NONE && rolloutDepth < ROLLOUT_DEPTH_LIMIT);

		if (stateEvaluator.scoreGame(finalGameState).isWinner(player)) {
			return 1;
		} else {
			return 0;
		}
	}

	@Override
	public void update(GameTreeNode<MonteCarloProperties> node, int reward) {
		GameTreeNode<MonteCarloProperties> currentNode = node;
		while (currentNode != null) {
			MonteCarloProperties nodeProperties = node.getProperties();
			nodeProperties.setWins(nodeProperties.getWins() + reward);
			nodeProperties.setVisits(nodeProperties.getVisits() + 1);
			currentNode = currentNode.getParent();
		}
	}

	@Override
	public Action getAction(GameConfig gameConfig, GameState gameState,
			Collection<Action> prohibitedActions, Player player) {
		throw new UnsupportedOperationException("Prohibited actions not supported by this class.");
	}
}