From d3c03f2c5131f2b6f633eb0756b02254827ab2ca Mon Sep 17 00:00:00 2001 From: cs6601 Date: Fri, 31 Aug 2012 09:17:43 -0400 Subject: [PATCH] Updated RandomMoveGenerator to support MonteCarloUCT. Now possible to prohibit a Collection of Actions from being included in the List of returned actions (or as a single returned value). All unit tests now pass with the exception of MonteCarloUCT. TODO: playerToMove and previousPlayerPassed should be made part of the GameState. This would remove the superfluous Player parameter from many methods and make it possible to check for the "I'm ahead and my opponent is offering to end the game" killer move. --- src/net/woodyfolsom/msproj/GameScore.java | 4 +- src/net/woodyfolsom/msproj/GameState.java | 4 + .../msproj/policy/ActionGenerator.java | 4 + .../woodyfolsom/msproj/policy/AlphaBeta.java | 224 +++++++++++------- .../woodyfolsom/msproj/policy/Minimax.java | 183 +++++++------- .../woodyfolsom/msproj/policy/MonteCarlo.java | 25 +- .../msproj/policy/MonteCarloUCT.java | 84 +++++-- src/net/woodyfolsom/msproj/policy/Policy.java | 4 + .../msproj/policy/RandomMovePolicy.java | 81 ++++--- .../msproj/policy/ValidMoveGenerator.java | 26 +- .../msproj/tree/AlphaBetaProperties.java | 23 ++ .../msproj/tree/AlphaBetaPropeties.java | 18 -- .../msproj/tree/MonteCarloProperties.java | 4 + .../msproj/policy/AlphaBetaTest.java | 6 +- .../msproj/policy/MinimaxTest.java | 6 +- .../msproj/policy/MonteCarloUCTTest.java | 4 + .../woodyfolsom/msproj/policy/RandomTest.java | 10 +- 17 files changed, 453 insertions(+), 257 deletions(-) create mode 100644 src/net/woodyfolsom/msproj/tree/AlphaBetaProperties.java delete mode 100644 src/net/woodyfolsom/msproj/tree/AlphaBetaPropeties.java diff --git a/src/net/woodyfolsom/msproj/GameScore.java b/src/net/woodyfolsom/msproj/GameScore.java index 7f535b8..fe08e32 100644 --- a/src/net/woodyfolsom/msproj/GameScore.java +++ b/src/net/woodyfolsom/msproj/GameScore.java @@ -42,8 +42,8 @@ public class GameScore { return (double)whiteScore + komi; } - public boolean isWinner(String color) { - if ("w".equals(color)) { + public boolean isWinner(Player player) { + if (Player.WHITE == player) { return getWhiteScore() < NORMALIZED_ZERO_SCORE; } else { return getBlackScore() > NORMALIZED_ZERO_SCORE; diff --git a/src/net/woodyfolsom/msproj/GameState.java b/src/net/woodyfolsom/msproj/GameState.java index 4e3ea80..ce7b926 100644 --- a/src/net/woodyfolsom/msproj/GameState.java +++ b/src/net/woodyfolsom/msproj/GameState.java @@ -87,6 +87,10 @@ public class GameState { * @return */ public boolean playStone(Player player, Action action) { + if (action == Action.PASS) { + return true; + } + char currentStone = gameBoard.getSymbolAt(action.getColumn(), action.getRow()); if (currentStone != GameBoard.EMPTY_INTERSECTION) { diff --git a/src/net/woodyfolsom/msproj/policy/ActionGenerator.java b/src/net/woodyfolsom/msproj/policy/ActionGenerator.java index adf66cc..3216ba0 100644 --- a/src/net/woodyfolsom/msproj/policy/ActionGenerator.java +++ b/src/net/woodyfolsom/msproj/policy/ActionGenerator.java @@ -1,5 +1,6 @@ package net.woodyfolsom.msproj.policy; +import java.util.Collection; import java.util.List; import net.woodyfolsom.msproj.Action; @@ -12,4 +13,7 @@ public interface ActionGenerator { public List getActions(GameConfig gameConfig, GameState gameState, Player color, int numActions); + + public List getActions(GameConfig gameConfig, GameState gameState, + Collection prohibitedMoves, Player color, int numActions); } diff --git a/src/net/woodyfolsom/msproj/policy/AlphaBeta.java b/src/net/woodyfolsom/msproj/policy/AlphaBeta.java index 62e1de1..3cb0c39 100644 --- a/src/net/woodyfolsom/msproj/policy/AlphaBeta.java +++ b/src/net/woodyfolsom/msproj/policy/AlphaBeta.java @@ -1,5 +1,6 @@ package net.woodyfolsom.msproj.policy; +import java.util.Collection; import java.util.List; import net.woodyfolsom.msproj.Action; @@ -8,122 +9,173 @@ import net.woodyfolsom.msproj.GameState; import net.woodyfolsom.msproj.GoGame; import net.woodyfolsom.msproj.Player; import net.woodyfolsom.msproj.StateEvaluator; - +import net.woodyfolsom.msproj.tree.AlphaBetaProperties; +import net.woodyfolsom.msproj.tree.GameTreeNode; public class AlphaBeta implements Policy { - private static final int DEFAULT_RECURSIVE_PLAYS = 1; + private static final int DEFAULT_LOOKAHEAD = 1; + private final ValidMoveGenerator validMoveGenerator = new ValidMoveGenerator(); - private Action bestPick = Action.PASS; + private int lookAhead; + private int numStateEvaluations = 0; + + public AlphaBeta() { + this(DEFAULT_LOOKAHEAD); + } + + public AlphaBeta(int lookAhead) { + this.lookAhead = lookAhead; + } @Override public Action getAction(GameConfig gameConfig, GameState gameState, Player player) { - int alpha = Integer.MIN_VALUE; - int beta = Integer.MAX_VALUE; + numStateEvaluations = 0; + + StateEvaluator stateEvaluator = new StateEvaluator(gameConfig); + + GameTreeNode rootNode = new GameTreeNode( + gameState, new AlphaBetaProperties()); if (player == Player.BLACK) { - getMaxValue(gameConfig, gameState, player, false, - DEFAULT_RECURSIVE_PLAYS * 2, alpha, beta); - return bestPick; - } else if (player == Player.WHITE) { - getMinValue(gameConfig, gameState, player, false, - DEFAULT_RECURSIVE_PLAYS * 2, alpha, beta); - return bestPick; + return getMax(lookAhead * 2, stateEvaluator, rootNode, player); } else { - return Action.PASS; + return getMin(lookAhead * 2, stateEvaluator, rootNode, player); } } - private int getMaxValue(GameConfig gameConfig, GameState gameState, - Player initialColor, boolean playAsOpponent, int recursionLevel, - int alpha, int beta) { - if (terminalTest(recursionLevel)) { - return getUtility(gameConfig, gameState); - } + private boolean isTerminal(int nValidMoves, int recursionLevels) { + return recursionLevels == 0 || nValidMoves == 0; + } - Player colorPlaying = GoGame.getColorToPlay(initialColor, playAsOpponent); + private Action getMax(int recursionLevels, StateEvaluator stateEvaluator, + GameTreeNode node, Player player) { - List validMoves = validMoveGenerator.getActions(gameConfig, - gameState, colorPlaying, ActionGenerator.ALL_ACTIONS); + GameState gameState = new GameState(node.getGameState()); - int value = Integer.MIN_VALUE; + List validMoves = validMoveGenerator.getActions( + stateEvaluator.getGameConfig(), node.getGameState(), player, + ActionGenerator.ALL_ACTIONS); - for (Action nextMove : validMoves) { - GameState nextState = new GameState(gameState); + boolean terminal = isTerminal(validMoves.size(), recursionLevels); - if (!nextState.playStone(colorPlaying, nextMove)) { - throw new RuntimeException( - "Illegal move attempted during search!"); - } + double maxScore = Double.NEGATIVE_INFINITY; + Action bestAction = Action.NONE; - int minValue = getMinValue(gameConfig, nextState, initialColor, - !playAsOpponent, recursionLevel - 1, alpha, beta); + if (terminal) { + node.getProperties().setReward( + stateEvaluator.scoreGame(gameState).getAggregateScore()); + + numStateEvaluations++; + + return bestAction; + } else { - if (minValue > value) { - value = minValue; - if (recursionLevel == DEFAULT_RECURSIVE_PLAYS * 2) { - bestPick = nextMove; + for (Action nextMove : validMoves) { + GameState nextState = new GameState(gameState); + nextState.playStone(player, nextMove); + GameTreeNode childNode = new GameTreeNode( + nextState, new AlphaBetaProperties()); + + childNode.getProperties().setAlpha( + node.getProperties().getAlpha()); + childNode.getProperties().setBeta( + node.getProperties().getBeta()); + + node.addChild(nextMove, childNode); + getMin(recursionLevels - 1, stateEvaluator, childNode, + GoGame.getColorToPlay(player, true)); + + double gameScore = childNode.getProperties().getReward(); + + if (gameScore > maxScore) { + maxScore = gameScore; + bestAction = nextMove; } - } - if (value >= beta) { - return value; - } - alpha = Math.max(alpha, value); - } - - return value; - } - - private int getMinValue(GameConfig gameConfig, GameState gameState, - Player initialColor, boolean playAsOpponent, int recursionLevel, - int alpha, int beta) { - if (terminalTest(recursionLevel)) { - return getUtility(gameConfig, gameState); - } - - Player colorPlaying = GoGame.getColorToPlay(initialColor, playAsOpponent); - - List validMoves = validMoveGenerator.getActions(gameConfig, - gameState, colorPlaying, ActionGenerator.ALL_ACTIONS); - - int value = Integer.MAX_VALUE; - - for (Action nextMove : validMoves) { - GameState nextState = new GameState(gameState); - - if (!nextState.playStone(colorPlaying, nextMove)) { - throw new RuntimeException( - "Illegal move attempted during search!"); - } - - int maxValue = getMaxValue(gameConfig, nextState, initialColor, - !playAsOpponent, recursionLevel - 1, alpha, beta); - - if (maxValue < value) { - value = maxValue; - if (recursionLevel == 2 * DEFAULT_RECURSIVE_PLAYS) { - bestPick = nextMove; + if (gameScore >= node.getProperties().getBeta()) { + node.getProperties().setReward(gameScore); + return bestAction; } + + node.getProperties().setAlpha( + Math.max(node.getProperties().getAlpha(), maxScore)); } - if (value <= alpha) { - return value; - } - beta = Math.min(beta, value); + node.getProperties().setReward(maxScore); + return bestAction; } - - return value; } - private boolean terminalTest(int recursionLevel) { - return recursionLevel < 1; - } + private Action getMin(int recursionLevels, StateEvaluator stateEvaluator, + GameTreeNode node, Player player) { - private int getUtility(GameConfig gameConfig, GameState gameState) { - StateEvaluator stateEvaluator = new StateEvaluator(gameConfig); - return stateEvaluator.scoreGame(gameState).getAggregateScore(); + GameState gameState = new GameState(node.getGameState()); + + List validMoves = validMoveGenerator.getActions( + stateEvaluator.getGameConfig(), node.getGameState(), player, + ActionGenerator.ALL_ACTIONS); + + boolean terminal = isTerminal(validMoves.size(), recursionLevels); + + double minScore = Double.POSITIVE_INFINITY; + Action bestAction = Action.NONE; + + if (terminal) { + node.getProperties().setReward( + stateEvaluator.scoreGame(gameState).getAggregateScore()); + + numStateEvaluations++; + + return bestAction; + } else { + + for (Action nextMove : validMoves) { + GameState nextState = new GameState(gameState); + nextState.playStone(player, nextMove); + GameTreeNode childNode = new GameTreeNode( + nextState, new AlphaBetaProperties()); + + childNode.getProperties().setAlpha( + node.getProperties().getAlpha()); + childNode.getProperties().setBeta( + node.getProperties().getBeta()); + + node.addChild(nextMove, childNode); + getMax(recursionLevels - 1, stateEvaluator, childNode, + GoGame.getColorToPlay(player, true)); + + double gameScore = childNode.getProperties().getReward(); + + if (gameScore < minScore) { + minScore = gameScore; + bestAction = nextMove; + } + + if (gameScore <= node.getProperties().getAlpha()) { + node.getProperties().setReward(gameScore); + return bestAction; + } + + node.getProperties().setBeta( + Math.min(node.getProperties().getBeta(), minScore)); + } + + node.getProperties().setReward(minScore); + return bestAction; + } + } + + @Override + public int getNumStateEvaluations() { + return numStateEvaluations; + } + + @Override + public Action getAction(GameConfig gameConfig, GameState gameState, + Collection prohibitedActions, Player player) { + throw new UnsupportedOperationException("Prohibited actions not supported by this class."); } } \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/policy/Minimax.java b/src/net/woodyfolsom/msproj/policy/Minimax.java index 9b7bc6a..c132992 100644 --- a/src/net/woodyfolsom/msproj/policy/Minimax.java +++ b/src/net/woodyfolsom/msproj/policy/Minimax.java @@ -1,5 +1,6 @@ package net.woodyfolsom.msproj.policy; +import java.util.Collection; import java.util.List; import net.woodyfolsom.msproj.Action; @@ -13,119 +14,141 @@ import net.woodyfolsom.msproj.tree.MinimaxProperties; public class Minimax implements Policy { private static final int DEFAULT_LOOKAHEAD = 1; - + private final ValidMoveGenerator validMoveGenerator = new ValidMoveGenerator(); private int lookAhead; - + private int numStateEvaluations = 0; + public Minimax() { this(DEFAULT_LOOKAHEAD); } - + public Minimax(int lookAhead) { this.lookAhead = lookAhead; } - + @Override public Action getAction(GameConfig gameConfig, GameState gameState, Player player) { + numStateEvaluations = 0; StateEvaluator stateEvaluator = new StateEvaluator(gameConfig); - - GameTreeNode rootNode = new GameTreeNode(gameState, new MinimaxProperties()); - + + GameTreeNode rootNode = new GameTreeNode( + gameState, new MinimaxProperties()); + if (player == Player.BLACK) { - return getMax( - lookAhead * 2, - stateEvaluator, - rootNode, - player); + return getMax(lookAhead * 2, stateEvaluator, rootNode, player); } else { - return getMin( - lookAhead * 2, - stateEvaluator, - rootNode, - player); + return getMin(lookAhead * 2, stateEvaluator, rootNode, player); } } - private Action getMax(int recursionLevels, - StateEvaluator stateEvaluator, - GameTreeNode node, - Player player) { + private boolean isTerminal(int nValidMoves, int recursionLevels) { + return recursionLevels == 0 || nValidMoves == 0; + } + + private Action getMax(int recursionLevels, StateEvaluator stateEvaluator, + GameTreeNode node, Player player) { GameState gameState = new GameState(node.getGameState()); - - List validMoves = validMoveGenerator.getActions(stateEvaluator.getGameConfig(), - node.getGameState(), player, ActionGenerator.ALL_ACTIONS); - - for (Action nextMove : validMoves) { - GameState nextState = new GameState(gameState); - nextState.playStone(player, nextMove); - GameTreeNode childNode = new GameTreeNode(nextState, new MinimaxProperties()); - node.addChild(nextMove, childNode); - if (recursionLevels > 1) { - getMin(recursionLevels - 1, stateEvaluator, childNode, GoGame.getColorToPlay(player, true)); - } else { - //tail condition - set reward of this leaf node - childNode.getProperties().setReward(stateEvaluator.scoreGame(nextState).getAggregateScore()); - } - } - + + List validMoves = validMoveGenerator.getActions( + stateEvaluator.getGameConfig(), node.getGameState(), player, + ActionGenerator.ALL_ACTIONS); + + boolean terminal = isTerminal(validMoves.size(), recursionLevels); + double maxScore = Double.NEGATIVE_INFINITY; Action bestAction = Action.NONE; - - for (Action nextMove : validMoves) { - GameTreeNode childNode = node.getChild(nextMove); - double gameScore = childNode.getProperties().getReward(); + + if (terminal) { + node.getProperties().setReward( + stateEvaluator.scoreGame(gameState).getAggregateScore()); - if (gameScore > maxScore) { - maxScore = gameScore; - bestAction = nextMove; + numStateEvaluations++; + + return bestAction; + } else { + + for (Action nextMove : validMoves) { + GameState nextState = new GameState(gameState); + nextState.playStone(player, nextMove); + GameTreeNode childNode = new GameTreeNode( + nextState, new MinimaxProperties()); + node.addChild(nextMove, childNode); + + getMin(recursionLevels - 1, stateEvaluator, childNode, + GoGame.getColorToPlay(player, true)); + + double gameScore = childNode.getProperties().getReward(); + + if (gameScore > maxScore) { + maxScore = gameScore; + bestAction = nextMove; + } } + + node.getProperties().setReward(maxScore); + return bestAction; } - - node.getProperties().setReward(maxScore); - return bestAction; } - - private Action getMin(int recursionLevels, - StateEvaluator stateEvaluator, - GameTreeNode node, - Player player) { + + private Action getMin(int recursionLevels, StateEvaluator stateEvaluator, + GameTreeNode node, Player player) { GameState gameState = new GameState(node.getGameState()); - - List validMoves = validMoveGenerator.getActions(stateEvaluator.getGameConfig(), - node.getGameState(), player, ActionGenerator.ALL_ACTIONS); - - for (Action nextMove : validMoves) { - GameState nextState = new GameState(gameState); - nextState.playStone(player, nextMove); - GameTreeNode childNode = new GameTreeNode(nextState, new MinimaxProperties()); - node.addChild(nextMove, childNode); - if (recursionLevels > 1) { - getMax(recursionLevels - 1, stateEvaluator, childNode, GoGame.getColorToPlay(player, true)); - } else { - //tail condition - set reward of this leaf node - childNode.getProperties().setReward(stateEvaluator.scoreGame(nextState).getAggregateScore()); - } - } - + + List validMoves = validMoveGenerator.getActions( + stateEvaluator.getGameConfig(), node.getGameState(), player, + ActionGenerator.ALL_ACTIONS); + + boolean terminal = isTerminal(validMoves.size(), recursionLevels); + double minScore = Double.POSITIVE_INFINITY; Action bestAction = Action.NONE; - - for (Action nextMove : validMoves) { - GameTreeNode childNode = node.getChild(nextMove); - double gameScore = childNode.getProperties().getReward(); + + if (terminal) { + node.getProperties().setReward( + stateEvaluator.scoreGame(gameState).getAggregateScore()); - if (gameScore < minScore) { - minScore = gameScore; - bestAction = nextMove; + numStateEvaluations++; + + return bestAction; + } else { + + for (Action nextMove : validMoves) { + GameState nextState = new GameState(gameState); + nextState.playStone(player, nextMove); + GameTreeNode childNode = new GameTreeNode( + nextState, new MinimaxProperties()); + node.addChild(nextMove, childNode); + + getMax(recursionLevels - 1, stateEvaluator, childNode, + GoGame.getColorToPlay(player, true)); + + double gameScore = childNode.getProperties().getReward(); + + if (gameScore < minScore) { + minScore = gameScore; + bestAction = nextMove; + } } + + node.getProperties().setReward(minScore); + return bestAction; } - - node.getProperties().setReward(minScore); - return bestAction; + } + + @Override + public int getNumStateEvaluations() { + return numStateEvaluations; + } + + @Override + public Action getAction(GameConfig gameConfig, GameState gameState, + Collection prohibitedActions, Player player) { + throw new UnsupportedOperationException("Prohibited actions not supported by this class."); } } \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/policy/MonteCarlo.java b/src/net/woodyfolsom/msproj/policy/MonteCarlo.java index 8d97afb..fe0c18e 100644 --- a/src/net/woodyfolsom/msproj/policy/MonteCarlo.java +++ b/src/net/woodyfolsom/msproj/policy/MonteCarlo.java @@ -6,12 +6,18 @@ import java.util.List; import net.woodyfolsom.msproj.Action; import net.woodyfolsom.msproj.GameConfig; import net.woodyfolsom.msproj.GameState; +import net.woodyfolsom.msproj.GoGame; import net.woodyfolsom.msproj.Player; +import net.woodyfolsom.msproj.StateEvaluator; import net.woodyfolsom.msproj.tree.GameTreeNode; import net.woodyfolsom.msproj.tree.MonteCarloProperties; public abstract class MonteCarlo implements Policy { + protected static final int ROLLOUT_DEPTH_LIMIT = 20; + + protected int numStateEvaluations = 0; protected Policy movePolicy; + protected long searchTimeLimit; protected volatile long elapsedTime = 0L; @@ -30,7 +36,7 @@ public abstract class MonteCarlo implements Policy { @Override public Action getAction(GameConfig gameConfig, GameState gameState, - Player initialColor) { + Player player) { long startTime = System.currentTimeMillis(); //If for some reason no moves are evaluated within the time limit, pass. @@ -38,20 +44,23 @@ public abstract class MonteCarlo implements Policy { //result in a win. GameTreeNode rootNode = new GameTreeNode(gameState, new MonteCarloProperties()); - + StateEvaluator stateEvaluator = new StateEvaluator(gameConfig); do { + //TODO these return types may need to be lists for some MC methods List> selectedNodes = descend(rootNode); List> newLeaves = new ArrayList>(); + Player nextPlayer = GoGame.getColorToPlay(player, true); + for (GameTreeNode selectedNode: selectedNodes) { - for (GameTreeNode newLeaf : grow(selectedNode)) { + for (GameTreeNode newLeaf : grow(gameConfig, selectedNode, nextPlayer)) { newLeaves.add(newLeaf); } } for (GameTreeNode newLeaf : newLeaves) { - int reward = rollout(newLeaf); + int reward = rollout(gameConfig, stateEvaluator, newLeaf, player); update(newLeaf, reward); } @@ -67,9 +76,9 @@ public abstract class MonteCarlo implements Policy { public abstract Action getBestAction(GameTreeNode node); - public abstract List> grow(GameTreeNode node); + public abstract List> grow(GameConfig gameConfig, GameTreeNode node, Player player); - public abstract int rollout(GameTreeNode node); + public abstract int rollout(GameConfig gameConfig, StateEvaluator stateEvaluator, GameTreeNode node, Player player); public abstract void update(GameTreeNode node, int reward); @@ -80,4 +89,8 @@ public abstract class MonteCarlo implements Policy { public int doRollout() { return 0; } + + public int getNumStateEvaluations() { + return numStateEvaluations; + } } \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/policy/MonteCarloUCT.java b/src/net/woodyfolsom/msproj/policy/MonteCarloUCT.java index 3da3834..6c095d3 100644 --- a/src/net/woodyfolsom/msproj/policy/MonteCarloUCT.java +++ b/src/net/woodyfolsom/msproj/policy/MonteCarloUCT.java @@ -1,36 +1,44 @@ package net.woodyfolsom.msproj.policy; import java.util.ArrayList; +import java.util.Collection; import java.util.List; +import java.util.Set; import net.woodyfolsom.msproj.Action; +import net.woodyfolsom.msproj.GameConfig; +import net.woodyfolsom.msproj.GameState; +import net.woodyfolsom.msproj.GoGame; +import net.woodyfolsom.msproj.Player; +import net.woodyfolsom.msproj.StateEvaluator; import net.woodyfolsom.msproj.tree.GameTreeNode; import net.woodyfolsom.msproj.tree.MonteCarloProperties; public class MonteCarloUCT extends MonteCarlo { + public static final double TUNING_CONSTANT = 0.50; public MonteCarloUCT(Policy movePolicy, long searchTimeLimit) { super(movePolicy, searchTimeLimit); } - + @Override public List> descend(GameTreeNode node) { double bestScore = Double.NEGATIVE_INFINITY; GameTreeNode bestNode = node; - //This appears slightly redundant with getBestAction() but it is not - - //descend() may pick the current node rather than a child to expand (if a child has a good score but high/low uncertainty) - //but getBestAction specifically asks for the optimum action to take from the current node, - //even if it results in a worse next state. + //TODO: WHAT TO DO if the optimum leaf node is actually a terminal node? + //from Kocsis and Szepesvari, the value of an actual terminal node is 0, unless estimated by rollout + double nodeVisits = node.getProperties().getVisits(); + for (Action action : node.getActions()) { GameTreeNode childNode = node.getChild(action); MonteCarloProperties properties = childNode.getProperties(); - double childScore = (double) properties.getWins() / properties.getVisits(); + double childScore = (double) properties.getWins() / properties.getVisits() + TUNING_CONSTANT * Math.log(nodeVisits) / childNode.getProperties().getVisits(); if (childScore >= bestScore) { - bestScore = childScore; - bestNode = childNode; + bestScore = childScore; + bestNode = childNode; } } @@ -64,21 +72,63 @@ public class MonteCarloUCT extends MonteCarlo { } @Override - public List> grow(GameTreeNode node) { - // TODO Auto-generated method stub - return null; + public List> grow(GameConfig gameConfig, GameTreeNode node, Player player) { + GameState nextGameState = new GameState(node.getGameState()); + Policy randomMovePolicy = new RandomMovePolicy(); + Set exploredActions = node.getActions(); + Action action = randomMovePolicy.getAction(gameConfig, node.getGameState(), exploredActions, player); + if (Action.NONE == action) { + throw new RuntimeException("Unable to grow node - are all actions already explored? Board state: " + node.getGameState() + "\nExplored actions: " + exploredActions); + } + nextGameState.playStone(player, action); + List> newChildren = new ArrayList>(); + newChildren.add(new GameTreeNode(nextGameState,new MonteCarloProperties())); + return newChildren; } @Override - public int rollout(GameTreeNode node) { - // TODO Auto-generated method stub - return 0; + /** + * Rollout currently depends on the hardcoded ROLLOUT_DEPTH_LIMIT superclass parameter, + * since without (super)ko detection, there is no way to guarantee a rollout will terminate. + * Even with ko detection, a rollout might take an unrealistically long time due to unlikely playouts. + */ + public int rollout(GameConfig gameConfig, StateEvaluator stateEvaluator, GameTreeNode node, Player player) { + Policy randomMovePolicy = new RandomMovePolicy(); + + Action action; + int rolloutDepth = 0; + GameState finalGameState = new GameState(node.getGameState()); + Player currentPlayer = player; + do { + rolloutDepth++; + action = randomMovePolicy.getAction(gameConfig, node.getGameState(), player); + if (action != Action.NONE) { + finalGameState.playStone(currentPlayer, action); + currentPlayer = GoGame.getColorToPlay(currentPlayer, true); + } + } while (action != Action.NONE && rolloutDepth < ROLLOUT_DEPTH_LIMIT); + + if (stateEvaluator.scoreGame(finalGameState).isWinner(player)) { + return 1; + } else { + return 0; + } } @Override public void update(GameTreeNode node, int reward) { - // TODO Auto-generated method stub - + GameTreeNode currentNode = node; + while (currentNode != null) { + MonteCarloProperties nodeProperties = node.getProperties(); + nodeProperties.setWins(nodeProperties.getWins() + reward); + nodeProperties.setVisits(nodeProperties.getVisits() + 1); + currentNode = currentNode.getParent(); + } + } + + @Override + public Action getAction(GameConfig gameConfig, GameState gameState, + Collection prohibitedActions, Player player) { + throw new UnsupportedOperationException("Prohibited actions not supported by this class."); } - } \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/policy/Policy.java b/src/net/woodyfolsom/msproj/policy/Policy.java index 6d62e22..e6a90ef 100644 --- a/src/net/woodyfolsom/msproj/policy/Policy.java +++ b/src/net/woodyfolsom/msproj/policy/Policy.java @@ -1,5 +1,7 @@ package net.woodyfolsom.msproj.policy; +import java.util.Collection; + import net.woodyfolsom.msproj.Action; import net.woodyfolsom.msproj.GameConfig; import net.woodyfolsom.msproj.GameState; @@ -8,4 +10,6 @@ import net.woodyfolsom.msproj.Player; public interface Policy { public Action getAction(GameConfig gameConfig, GameState gameState, Player player); + public Action getAction(GameConfig gameConfig, GameState gameState, Collection prohibitedActions, Player player); + public int getNumStateEvaluations(); } \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/policy/RandomMovePolicy.java b/src/net/woodyfolsom/msproj/policy/RandomMovePolicy.java index cba21f8..1049c06 100644 --- a/src/net/woodyfolsom/msproj/policy/RandomMovePolicy.java +++ b/src/net/woodyfolsom/msproj/policy/RandomMovePolicy.java @@ -1,6 +1,7 @@ package net.woodyfolsom.msproj.policy; import java.util.ArrayList; +import java.util.Collection; import java.util.List; import net.woodyfolsom.msproj.Action; @@ -9,34 +10,20 @@ import net.woodyfolsom.msproj.GameState; import net.woodyfolsom.msproj.Player; -public class RandomMovePolicy implements Policy { +public class RandomMovePolicy implements Policy, ActionGenerator { /** * Does NOT modify the gameState. */ public Action getAction(GameConfig gameConfig, GameState gameState, - Player color) { - GameState gameStateCopy = new GameState(gameState); - List emptyCoordinates = gameStateCopy.getEmptyCoords(); - - while (emptyCoordinates.size() > 0) { - Action randomMove = Action.getInstance(emptyCoordinates - .get((int) (Math.random() * emptyCoordinates.size()))); - - if (gameStateCopy.playStone(color, randomMove)) { - return randomMove; - } else { - emptyCoordinates.remove(randomMove); - } - } - - return Action.PASS; + Collection prohibitedMoves, Player player) { + return getActions(gameConfig, gameState, prohibitedMoves, player, 1).get(0); } /** * Attempts to generate up to nMoves random moves on behalf of the specified - * player. Will return at least one move, which may be 'pass' if random - * search does not success in discovering a valid move. Does NOT modify the + * player. Will return at least one move, which may be 'NONE' if random + * search does not succeeed in discovering a valid move. Does NOT modify the * gameState. * * @param gameConfig @@ -45,25 +32,47 @@ public class RandomMovePolicy implements Policy { * * @return */ - public List genMoves(GameConfig gameConfig, GameState gameState, - Player color, int nMoves) { + public List getActions(GameConfig gameConfig, GameState gameState, + Collection prohibitedMoves, Player player, int nMoves) { GameState gameStateCopy = new GameState(gameState); - List emptyCoordinates = gameStateCopy.getEmptyCoords(); - List randomMoves = new ArrayList(); - - while (emptyCoordinates.size() > 0 && randomMoves.size() < nMoves) { - Action randomMove = Action.getInstance(emptyCoordinates - .get((int) (Math.random() * emptyCoordinates.size()))); - if (gameStateCopy.playStone(color, randomMove)) { - randomMoves.add(randomMove); - } - emptyCoordinates.remove(randomMove); + ActionGenerator actionGenerator = new ValidMoveGenerator(); + + List possibleActions = actionGenerator.getActions(gameConfig, gameStateCopy, prohibitedMoves, player, ActionGenerator.ALL_ACTIONS); + List randomActions = new ArrayList(); + + while (possibleActions.size() > 0 && randomActions.size() < nMoves) { + Action randomAction = possibleActions + .remove((int) (Math.random() * possibleActions.size())); + + randomActions.add(randomAction); } - if (randomMoves.size() == 0) { - randomMoves.add(Action.PASS); + if (randomActions.size() == 0) { + randomActions.add(Action.NONE); } - - return randomMoves; + + return randomActions; } -} + + /** + * RandomMoveGenerator does not evaluate any states, but simply returns elements of + * a set of uniformly distributed, distinct valid moves. + * + * @return + */ + public int getNumStateEvaluations() { + return 0; + } + + @Override + public List getActions(GameConfig gameConfig, GameState gameState, + Player color, int numActions) { + return getActions(gameConfig, gameState, new ArrayList(), color, numActions); + } + + @Override + public Action getAction(GameConfig gameConfig, GameState gameState, + Player player) { + return getActions(gameConfig,gameState,player,1).get(0); + } +} \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/policy/ValidMoveGenerator.java b/src/net/woodyfolsom/msproj/policy/ValidMoveGenerator.java index b7dc0a9..39b1069 100644 --- a/src/net/woodyfolsom/msproj/policy/ValidMoveGenerator.java +++ b/src/net/woodyfolsom/msproj/policy/ValidMoveGenerator.java @@ -1,6 +1,7 @@ package net.woodyfolsom.msproj.policy; import java.util.ArrayList; +import java.util.Collection; import java.util.List; import net.woodyfolsom.msproj.Action; @@ -8,29 +9,36 @@ import net.woodyfolsom.msproj.GameConfig; import net.woodyfolsom.msproj.GameState; import net.woodyfolsom.msproj.Player; -//import org.apache.log4j.Logger; - public class ValidMoveGenerator implements ActionGenerator { - //private static final Logger LOGGER = Logger.getLogger(ValidMoveGenerator.class.getName()); @Override public List getActions(GameConfig gameConfig, GameState gameState, Player color, int nMoves) { - + return getActions(gameConfig, gameState, new ArrayList(), + color, nMoves); + } + + @Override + public List getActions(GameConfig gameConfig, GameState gameState, + Collection prohibitedMoves, Player color, int nMoves) { + GameState gameStateCopy = new GameState(gameState); List emptyCoordinates = gameStateCopy.getEmptyCoords(); List validMoves = new ArrayList(); while (emptyCoordinates.size() > 0) { - Action nextMove = Action.getInstance(emptyCoordinates.remove(emptyCoordinates.size()-1)); - if (gameStateCopy.playStone(color, nextMove)) { + Action nextMove = Action.getInstance(emptyCoordinates + .remove(emptyCoordinates.size() - 1)); + if (!prohibitedMoves.contains(nextMove) + && gameStateCopy.playStone(color, nextMove)) { validMoves.add(nextMove); - gameStateCopy = new GameState(gameState); // play successful? regenerate copy of gameState + gameStateCopy = new GameState(gameState); // play successful? + // regenerate copy + // of gameState } } - //Passing is always a VALID move. It may not be a GOOD move. - if (validMoves.size() == 0) { + if (!prohibitedMoves.contains(Action.PASS)) { validMoves.add(Action.PASS); } diff --git a/src/net/woodyfolsom/msproj/tree/AlphaBetaProperties.java b/src/net/woodyfolsom/msproj/tree/AlphaBetaProperties.java new file mode 100644 index 0000000..d5612a0 --- /dev/null +++ b/src/net/woodyfolsom/msproj/tree/AlphaBetaProperties.java @@ -0,0 +1,23 @@ +package net.woodyfolsom.msproj.tree; + +public class AlphaBetaProperties extends MinimaxProperties { + + double alpha = Double.NEGATIVE_INFINITY; + double beta = Double.POSITIVE_INFINITY; + + public double getAlpha() { + return alpha; + } + + public void setAlpha(double d) { + this.alpha = d; + } + + public double getBeta() { + return beta; + } + + public void setBeta(double beta) { + this.beta = beta; + } +} diff --git a/src/net/woodyfolsom/msproj/tree/AlphaBetaPropeties.java b/src/net/woodyfolsom/msproj/tree/AlphaBetaPropeties.java deleted file mode 100644 index 05c60ff..0000000 --- a/src/net/woodyfolsom/msproj/tree/AlphaBetaPropeties.java +++ /dev/null @@ -1,18 +0,0 @@ -package net.woodyfolsom.msproj.tree; - -public class AlphaBetaPropeties extends GameTreeNodeProperties{ - int alpha = 0; - int beta = 0; - public int getAlpha() { - return alpha; - } - public void setAlpha(int alpha) { - this.alpha = alpha; - } - public int getBeta() { - return beta; - } - public void setBeta(int beta) { - this.beta = beta; - } -} diff --git a/src/net/woodyfolsom/msproj/tree/MonteCarloProperties.java b/src/net/woodyfolsom/msproj/tree/MonteCarloProperties.java index 364356b..30d2361 100644 --- a/src/net/woodyfolsom/msproj/tree/MonteCarloProperties.java +++ b/src/net/woodyfolsom/msproj/tree/MonteCarloProperties.java @@ -3,15 +3,19 @@ package net.woodyfolsom.msproj.tree; public class MonteCarloProperties extends GameTreeNodeProperties { int visits = 0; int wins = 0; + public int getVisits() { return visits; } + public void setVisits(int visits) { this.visits = visits; } + public int getWins() { return wins; } + public void setWins(int wins) { this.wins = wins; } diff --git a/test/net/woodyfolsom/msproj/policy/AlphaBetaTest.java b/test/net/woodyfolsom/msproj/policy/AlphaBetaTest.java index 4d58ba7..49c86d0 100644 --- a/test/net/woodyfolsom/msproj/policy/AlphaBetaTest.java +++ b/test/net/woodyfolsom/msproj/policy/AlphaBetaTest.java @@ -23,7 +23,9 @@ public class AlphaBetaTest { System.out.println(gameState); System.out.println("Generated move: " + move); - assertEquals("Expected B3 but was: " + move, Action.getInstance("B3"), move); + System.out.println("NumStateEvaluations: " + treeSearch.getNumStateEvaluations()); + + assertEquals(Action.getInstance("B3"), move); gameState.playStone(Player.WHITE, move); System.out.println("Final board state:"); System.out.println(gameState); @@ -43,6 +45,8 @@ public class AlphaBetaTest { System.out.println(gameState); System.out.println("Generated move: " + move); + System.out.println("NumStateEvaluations: " + treeSearch.getNumStateEvaluations()); + assertEquals("Expected B3 but was: " + move, Action.getInstance("B3"), move); gameState.playStone(Player.BLACK, move); diff --git a/test/net/woodyfolsom/msproj/policy/MinimaxTest.java b/test/net/woodyfolsom/msproj/policy/MinimaxTest.java index 83a2b2a..0beea5d 100644 --- a/test/net/woodyfolsom/msproj/policy/MinimaxTest.java +++ b/test/net/woodyfolsom/msproj/policy/MinimaxTest.java @@ -24,7 +24,9 @@ public class MinimaxTest { System.out.println(gameState); System.out.println("Generated move: " + move); - assertEquals("Expected B3 but was: " + move, Action.getInstance("B3"), move); + System.out.println("NumStateEvaluations: " + treeSearch.getNumStateEvaluations()); + + assertEquals(Action.getInstance("B3"), move); gameState.playStone(Player.WHITE, move); System.out.println(gameState); @@ -45,6 +47,8 @@ public class MinimaxTest { System.out.println(gameState); System.out.println("Generated move: " + move); + System.out.println("NumStateEvaluations: " + treeSearch.getNumStateEvaluations()); + assertEquals("Expected B3 but was: " + move, Action.getInstance("B3"), move); gameState.playStone(Player.BLACK, move); diff --git a/test/net/woodyfolsom/msproj/policy/MonteCarloUCTTest.java b/test/net/woodyfolsom/msproj/policy/MonteCarloUCTTest.java index 297a047..4ead0ac 100644 --- a/test/net/woodyfolsom/msproj/policy/MonteCarloUCTTest.java +++ b/test/net/woodyfolsom/msproj/policy/MonteCarloUCTTest.java @@ -25,6 +25,8 @@ public class MonteCarloUCTTest { System.out.println(gameState); System.out.println("Generated move: " + move); + System.out.println("NumStateEvaluations: " + treeSearch.getNumStateEvaluations()); + assertEquals("Expected B3 but was: " + move, "B3", move); gameState.playStone(Player.WHITE, move); @@ -45,6 +47,8 @@ public class MonteCarloUCTTest { System.out.println(gameState); System.out.println("Generated move: " + move); + System.out.println("NumStateEvaluations: " + treeSearch.getNumStateEvaluations()); + assertEquals("Expected B3 but was: " + move, "B3", move); gameState.playStone(Player.BLACK, move); diff --git a/test/net/woodyfolsom/msproj/policy/RandomTest.java b/test/net/woodyfolsom/msproj/policy/RandomTest.java index 8934784..61d01bf 100644 --- a/test/net/woodyfolsom/msproj/policy/RandomTest.java +++ b/test/net/woodyfolsom/msproj/policy/RandomTest.java @@ -1,6 +1,10 @@ package net.woodyfolsom.msproj.policy; import static org.junit.Assert.assertEquals; + +import java.util.ArrayList; +import java.util.List; + import net.woodyfolsom.msproj.Action; import net.woodyfolsom.msproj.GameConfig; import net.woodyfolsom.msproj.GameState; @@ -49,7 +53,11 @@ public class RandomTest { System.out.println("State before random WHITE move selection:"); System.out.println(gameState); //This is correct - checked vs. MFOG - assertEquals(Action.getInstance("B3"), new RandomMovePolicy().getAction(new GameConfig(), gameState, Player.WHITE)); + //PASS would otherwise be a valid move + List prohibitedMoves = new ArrayList(); + prohibitedMoves.add(Action.PASS); + + assertEquals(Action.getInstance("B3"), new RandomMovePolicy().getAction(new GameConfig(), gameState, prohibitedMoves, Player.WHITE)); System.out.println(gameState); }