Updated RandomMoveGenerator to support MonteCarloUCT.

Now possible to prohibit a Collection of Actions from being included in the List of returned actions (or as a single returned value). All unit tests now pass with the exception of MonteCarloUCT. TODO: playerToMove and previousPlayerPassed should be made part of the GameState. This would remove the superfluous Player parameter from many methods and make it possible to check for the "I'm ahead and my opponent is offering to end the game" killer move.
2012-08-31 09:17:43 -04:00
parent 4a1c64843d
commit d3c03f2c51
17 changed files with 453 additions and 257 deletions
--- a/src/net/woodyfolsom/msproj/policy/MonteCarloUCT.java
+++ b/src/net/woodyfolsom/msproj/policy/MonteCarloUCT.java
@@ -1,36 +1,44 @@
 package net.woodyfolsom.msproj.policy;

 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.List;
+import java.util.Set;

 import net.woodyfolsom.msproj.Action;
+import net.woodyfolsom.msproj.GameConfig;
+import net.woodyfolsom.msproj.GameState;
+import net.woodyfolsom.msproj.GoGame;
+import net.woodyfolsom.msproj.Player;
+import net.woodyfolsom.msproj.StateEvaluator;
 import net.woodyfolsom.msproj.tree.GameTreeNode;
 import net.woodyfolsom.msproj.tree.MonteCarloProperties;

 public class MonteCarloUCT extends MonteCarlo {
+	public static final double TUNING_CONSTANT = 0.50;
 	
 	public MonteCarloUCT(Policy movePolicy, long searchTimeLimit) {
 		super(movePolicy, searchTimeLimit);
 	}
-
+	
 	@Override
 	public List<GameTreeNode<MonteCarloProperties>> descend(GameTreeNode<MonteCarloProperties> node) {
 		double bestScore = Double.NEGATIVE_INFINITY;
 		GameTreeNode<MonteCarloProperties> bestNode = node;
 		
-		//This appears slightly redundant with getBestAction() but it is not -
-		//descend() may pick the current node rather than a child to expand (if a child has a good score but high/low uncertainty)
-		//but getBestAction specifically asks for the optimum action to take from the current node,
-		//even if it results in a worse next state.
+		//TODO: WHAT TO DO if the optimum leaf node is actually a terminal node?
+		//from Kocsis and Szepesvari, the value of an actual terminal node is 0, unless estimated by rollout
+		double nodeVisits = node.getProperties().getVisits();
+		
 		for (Action action : node.getActions()) {
 			GameTreeNode<MonteCarloProperties> childNode = node.getChild(action);
 			
 			MonteCarloProperties properties = childNode.getProperties();
-			double childScore = (double) properties.getWins() / properties.getVisits();
+			double childScore = (double) properties.getWins() / properties.getVisits() + TUNING_CONSTANT * Math.log(nodeVisits) / childNode.getProperties().getVisits();
 			
 			if (childScore >= bestScore) {
-				bestScore = childScore;
-				bestNode = childNode;
+					bestScore = childScore;
+					bestNode = childNode;
 			}
 		}
 		
@@ -64,21 +72,63 @@ public class MonteCarloUCT extends MonteCarlo {
 	}

 	@Override
-	public List<GameTreeNode<MonteCarloProperties>> grow(GameTreeNode<MonteCarloProperties> node) {
-		// TODO Auto-generated method stub
-		return null;
+	public List<GameTreeNode<MonteCarloProperties>> grow(GameConfig gameConfig, GameTreeNode<MonteCarloProperties> node, Player player) {
+		GameState nextGameState = new GameState(node.getGameState());
+		Policy randomMovePolicy = new RandomMovePolicy();
+		Set<Action> exploredActions = node.getActions();
+		Action action = randomMovePolicy.getAction(gameConfig, node.getGameState(), exploredActions, player);
+		if (Action.NONE == action) {
+			throw new RuntimeException("Unable to grow node - are all actions already explored?  Board state: " + node.getGameState() + "\nExplored actions: " + exploredActions);
+		}
+		nextGameState.playStone(player, action);
+		List<GameTreeNode<MonteCarloProperties>> newChildren = new ArrayList<GameTreeNode<MonteCarloProperties>>();
+		newChildren.add(new GameTreeNode<MonteCarloProperties>(nextGameState,new MonteCarloProperties()));
+		return newChildren;
 	}

 	@Override
-	public int rollout(GameTreeNode<MonteCarloProperties> node) {
-		// TODO Auto-generated method stub
-		return 0;
+	/**
+	 * Rollout currently depends on the hardcoded ROLLOUT_DEPTH_LIMIT superclass parameter,
+	 * since without (super)ko detection, there is no way to guarantee a rollout will terminate.
+	 * Even with ko detection, a rollout might take an unrealistically long time due to unlikely playouts.
+	 */
+	public int rollout(GameConfig gameConfig, StateEvaluator stateEvaluator, GameTreeNode<MonteCarloProperties> node, Player player) {
+		Policy randomMovePolicy = new RandomMovePolicy();
+		
+		Action action;
+		int rolloutDepth = 0;
+		GameState finalGameState = new GameState(node.getGameState());
+		Player currentPlayer = player;
+		do {
+			rolloutDepth++;
+			action = randomMovePolicy.getAction(gameConfig, node.getGameState(), player);
+			if (action != Action.NONE) {
+				finalGameState.playStone(currentPlayer, action);
+				currentPlayer = GoGame.getColorToPlay(currentPlayer, true);
+			}
+		} while (action != Action.NONE && rolloutDepth < ROLLOUT_DEPTH_LIMIT);
+		
+		if (stateEvaluator.scoreGame(finalGameState).isWinner(player)) {
+			return 1;
+		} else {
+			return 0;
+		}
 	}

 	@Override
 	public void update(GameTreeNode<MonteCarloProperties> node, int reward) {
-		// TODO Auto-generated method stub
-		
+		GameTreeNode<MonteCarloProperties> currentNode = node;
+		while (currentNode != null) {
+			MonteCarloProperties nodeProperties = node.getProperties();
+			nodeProperties.setWins(nodeProperties.getWins() + reward);
+			nodeProperties.setVisits(nodeProperties.getVisits() + 1);
+			currentNode = currentNode.getParent();
+		}
+	}
+
+	@Override
+	public Action getAction(GameConfig gameConfig, GameState gameState,
+			Collection<Action> prohibitedActions, Player player) {
+		throw new UnsupportedOperationException("Prohibited actions not supported by this class.");
 	}
-	
 }