diff --git a/src/net/woodyfolsom/msproj/GameScore.java b/src/net/woodyfolsom/msproj/GameScore.java index 7f535b8..fe08e32 100644 --- a/src/net/woodyfolsom/msproj/GameScore.java +++ b/src/net/woodyfolsom/msproj/GameScore.java @@ -42,8 +42,8 @@ public class GameScore { return (double)whiteScore + komi; } - public boolean isWinner(String color) { - if ("w".equals(color)) { + public boolean isWinner(Player player) { + if (Player.WHITE == player) { return getWhiteScore() < NORMALIZED_ZERO_SCORE; } else { return getBlackScore() > NORMALIZED_ZERO_SCORE; diff --git a/src/net/woodyfolsom/msproj/GameState.java b/src/net/woodyfolsom/msproj/GameState.java index 4e3ea80..ce7b926 100644 --- a/src/net/woodyfolsom/msproj/GameState.java +++ b/src/net/woodyfolsom/msproj/GameState.java @@ -87,6 +87,10 @@ public class GameState { * @return */ public boolean playStone(Player player, Action action) { + if (action == Action.PASS) { + return true; + } + char currentStone = gameBoard.getSymbolAt(action.getColumn(), action.getRow()); if (currentStone != GameBoard.EMPTY_INTERSECTION) { diff --git a/src/net/woodyfolsom/msproj/policy/ActionGenerator.java b/src/net/woodyfolsom/msproj/policy/ActionGenerator.java index adf66cc..3216ba0 100644 --- a/src/net/woodyfolsom/msproj/policy/ActionGenerator.java +++ b/src/net/woodyfolsom/msproj/policy/ActionGenerator.java @@ -1,5 +1,6 @@ package net.woodyfolsom.msproj.policy; +import java.util.Collection; import java.util.List; import net.woodyfolsom.msproj.Action; @@ -12,4 +13,7 @@ public interface ActionGenerator { public List getActions(GameConfig gameConfig, GameState gameState, Player color, int numActions); + + public List getActions(GameConfig gameConfig, GameState gameState, + Collection prohibitedMoves, Player color, int numActions); } diff --git a/src/net/woodyfolsom/msproj/policy/AlphaBeta.java b/src/net/woodyfolsom/msproj/policy/AlphaBeta.java index 62e1de1..3cb0c39 100644 --- a/src/net/woodyfolsom/msproj/policy/AlphaBeta.java +++ b/src/net/woodyfolsom/msproj/policy/AlphaBeta.java @@ -1,5 +1,6 @@ package net.woodyfolsom.msproj.policy; +import java.util.Collection; import java.util.List; import net.woodyfolsom.msproj.Action; @@ -8,122 +9,173 @@ import net.woodyfolsom.msproj.GameState; import net.woodyfolsom.msproj.GoGame; import net.woodyfolsom.msproj.Player; import net.woodyfolsom.msproj.StateEvaluator; - +import net.woodyfolsom.msproj.tree.AlphaBetaProperties; +import net.woodyfolsom.msproj.tree.GameTreeNode; public class AlphaBeta implements Policy { - private static final int DEFAULT_RECURSIVE_PLAYS = 1; + private static final int DEFAULT_LOOKAHEAD = 1; + private final ValidMoveGenerator validMoveGenerator = new ValidMoveGenerator(); - private Action bestPick = Action.PASS; + private int lookAhead; + private int numStateEvaluations = 0; + + public AlphaBeta() { + this(DEFAULT_LOOKAHEAD); + } + + public AlphaBeta(int lookAhead) { + this.lookAhead = lookAhead; + } @Override public Action getAction(GameConfig gameConfig, GameState gameState, Player player) { - int alpha = Integer.MIN_VALUE; - int beta = Integer.MAX_VALUE; + numStateEvaluations = 0; + + StateEvaluator stateEvaluator = new StateEvaluator(gameConfig); + + GameTreeNode rootNode = new GameTreeNode( + gameState, new AlphaBetaProperties()); if (player == Player.BLACK) { - getMaxValue(gameConfig, gameState, player, false, - DEFAULT_RECURSIVE_PLAYS * 2, alpha, beta); - return bestPick; - } else if (player == Player.WHITE) { - getMinValue(gameConfig, gameState, player, false, - DEFAULT_RECURSIVE_PLAYS * 2, alpha, beta); - return bestPick; + return getMax(lookAhead * 2, stateEvaluator, rootNode, player); } else { - return Action.PASS; + return getMin(lookAhead * 2, stateEvaluator, rootNode, player); } } - private int getMaxValue(GameConfig gameConfig, GameState gameState, - Player initialColor, boolean playAsOpponent, int recursionLevel, - int alpha, int beta) { - if (terminalTest(recursionLevel)) { - return getUtility(gameConfig, gameState); - } + private boolean isTerminal(int nValidMoves, int recursionLevels) { + return recursionLevels == 0 || nValidMoves == 0; + } - Player colorPlaying = GoGame.getColorToPlay(initialColor, playAsOpponent); + private Action getMax(int recursionLevels, StateEvaluator stateEvaluator, + GameTreeNode node, Player player) { - List validMoves = validMoveGenerator.getActions(gameConfig, - gameState, colorPlaying, ActionGenerator.ALL_ACTIONS); + GameState gameState = new GameState(node.getGameState()); - int value = Integer.MIN_VALUE; + List validMoves = validMoveGenerator.getActions( + stateEvaluator.getGameConfig(), node.getGameState(), player, + ActionGenerator.ALL_ACTIONS); - for (Action nextMove : validMoves) { - GameState nextState = new GameState(gameState); + boolean terminal = isTerminal(validMoves.size(), recursionLevels); - if (!nextState.playStone(colorPlaying, nextMove)) { - throw new RuntimeException( - "Illegal move attempted during search!"); - } + double maxScore = Double.NEGATIVE_INFINITY; + Action bestAction = Action.NONE; - int minValue = getMinValue(gameConfig, nextState, initialColor, - !playAsOpponent, recursionLevel - 1, alpha, beta); + if (terminal) { + node.getProperties().setReward( + stateEvaluator.scoreGame(gameState).getAggregateScore()); + + numStateEvaluations++; + + return bestAction; + } else { - if (minValue > value) { - value = minValue; - if (recursionLevel == DEFAULT_RECURSIVE_PLAYS * 2) { - bestPick = nextMove; + for (Action nextMove : validMoves) { + GameState nextState = new GameState(gameState); + nextState.playStone(player, nextMove); + GameTreeNode childNode = new GameTreeNode( + nextState, new AlphaBetaProperties()); + + childNode.getProperties().setAlpha( + node.getProperties().getAlpha()); + childNode.getProperties().setBeta( + node.getProperties().getBeta()); + + node.addChild(nextMove, childNode); + getMin(recursionLevels - 1, stateEvaluator, childNode, + GoGame.getColorToPlay(player, true)); + + double gameScore = childNode.getProperties().getReward(); + + if (gameScore > maxScore) { + maxScore = gameScore; + bestAction = nextMove; } - } - if (value >= beta) { - return value; - } - alpha = Math.max(alpha, value); - } - - return value; - } - - private int getMinValue(GameConfig gameConfig, GameState gameState, - Player initialColor, boolean playAsOpponent, int recursionLevel, - int alpha, int beta) { - if (terminalTest(recursionLevel)) { - return getUtility(gameConfig, gameState); - } - - Player colorPlaying = GoGame.getColorToPlay(initialColor, playAsOpponent); - - List validMoves = validMoveGenerator.getActions(gameConfig, - gameState, colorPlaying, ActionGenerator.ALL_ACTIONS); - - int value = Integer.MAX_VALUE; - - for (Action nextMove : validMoves) { - GameState nextState = new GameState(gameState); - - if (!nextState.playStone(colorPlaying, nextMove)) { - throw new RuntimeException( - "Illegal move attempted during search!"); - } - - int maxValue = getMaxValue(gameConfig, nextState, initialColor, - !playAsOpponent, recursionLevel - 1, alpha, beta); - - if (maxValue < value) { - value = maxValue; - if (recursionLevel == 2 * DEFAULT_RECURSIVE_PLAYS) { - bestPick = nextMove; + if (gameScore >= node.getProperties().getBeta()) { + node.getProperties().setReward(gameScore); + return bestAction; } + + node.getProperties().setAlpha( + Math.max(node.getProperties().getAlpha(), maxScore)); } - if (value <= alpha) { - return value; - } - beta = Math.min(beta, value); + node.getProperties().setReward(maxScore); + return bestAction; } - - return value; } - private boolean terminalTest(int recursionLevel) { - return recursionLevel < 1; - } + private Action getMin(int recursionLevels, StateEvaluator stateEvaluator, + GameTreeNode node, Player player) { - private int getUtility(GameConfig gameConfig, GameState gameState) { - StateEvaluator stateEvaluator = new StateEvaluator(gameConfig); - return stateEvaluator.scoreGame(gameState).getAggregateScore(); + GameState gameState = new GameState(node.getGameState()); + + List validMoves = validMoveGenerator.getActions( + stateEvaluator.getGameConfig(), node.getGameState(), player, + ActionGenerator.ALL_ACTIONS); + + boolean terminal = isTerminal(validMoves.size(), recursionLevels); + + double minScore = Double.POSITIVE_INFINITY; + Action bestAction = Action.NONE; + + if (terminal) { + node.getProperties().setReward( + stateEvaluator.scoreGame(gameState).getAggregateScore()); + + numStateEvaluations++; + + return bestAction; + } else { + + for (Action nextMove : validMoves) { + GameState nextState = new GameState(gameState); + nextState.playStone(player, nextMove); + GameTreeNode childNode = new GameTreeNode( + nextState, new AlphaBetaProperties()); + + childNode.getProperties().setAlpha( + node.getProperties().getAlpha()); + childNode.getProperties().setBeta( + node.getProperties().getBeta()); + + node.addChild(nextMove, childNode); + getMax(recursionLevels - 1, stateEvaluator, childNode, + GoGame.getColorToPlay(player, true)); + + double gameScore = childNode.getProperties().getReward(); + + if (gameScore < minScore) { + minScore = gameScore; + bestAction = nextMove; + } + + if (gameScore <= node.getProperties().getAlpha()) { + node.getProperties().setReward(gameScore); + return bestAction; + } + + node.getProperties().setBeta( + Math.min(node.getProperties().getBeta(), minScore)); + } + + node.getProperties().setReward(minScore); + return bestAction; + } + } + + @Override + public int getNumStateEvaluations() { + return numStateEvaluations; + } + + @Override + public Action getAction(GameConfig gameConfig, GameState gameState, + Collection prohibitedActions, Player player) { + throw new UnsupportedOperationException("Prohibited actions not supported by this class."); } } \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/policy/Minimax.java b/src/net/woodyfolsom/msproj/policy/Minimax.java index 9b7bc6a..c132992 100644 --- a/src/net/woodyfolsom/msproj/policy/Minimax.java +++ b/src/net/woodyfolsom/msproj/policy/Minimax.java @@ -1,5 +1,6 @@ package net.woodyfolsom.msproj.policy; +import java.util.Collection; import java.util.List; import net.woodyfolsom.msproj.Action; @@ -13,119 +14,141 @@ import net.woodyfolsom.msproj.tree.MinimaxProperties; public class Minimax implements Policy { private static final int DEFAULT_LOOKAHEAD = 1; - + private final ValidMoveGenerator validMoveGenerator = new ValidMoveGenerator(); private int lookAhead; - + private int numStateEvaluations = 0; + public Minimax() { this(DEFAULT_LOOKAHEAD); } - + public Minimax(int lookAhead) { this.lookAhead = lookAhead; } - + @Override public Action getAction(GameConfig gameConfig, GameState gameState, Player player) { + numStateEvaluations = 0; StateEvaluator stateEvaluator = new StateEvaluator(gameConfig); - - GameTreeNode rootNode = new GameTreeNode(gameState, new MinimaxProperties()); - + + GameTreeNode rootNode = new GameTreeNode( + gameState, new MinimaxProperties()); + if (player == Player.BLACK) { - return getMax( - lookAhead * 2, - stateEvaluator, - rootNode, - player); + return getMax(lookAhead * 2, stateEvaluator, rootNode, player); } else { - return getMin( - lookAhead * 2, - stateEvaluator, - rootNode, - player); + return getMin(lookAhead * 2, stateEvaluator, rootNode, player); } } - private Action getMax(int recursionLevels, - StateEvaluator stateEvaluator, - GameTreeNode node, - Player player) { + private boolean isTerminal(int nValidMoves, int recursionLevels) { + return recursionLevels == 0 || nValidMoves == 0; + } + + private Action getMax(int recursionLevels, StateEvaluator stateEvaluator, + GameTreeNode node, Player player) { GameState gameState = new GameState(node.getGameState()); - - List validMoves = validMoveGenerator.getActions(stateEvaluator.getGameConfig(), - node.getGameState(), player, ActionGenerator.ALL_ACTIONS); - - for (Action nextMove : validMoves) { - GameState nextState = new GameState(gameState); - nextState.playStone(player, nextMove); - GameTreeNode childNode = new GameTreeNode(nextState, new MinimaxProperties()); - node.addChild(nextMove, childNode); - if (recursionLevels > 1) { - getMin(recursionLevels - 1, stateEvaluator, childNode, GoGame.getColorToPlay(player, true)); - } else { - //tail condition - set reward of this leaf node - childNode.getProperties().setReward(stateEvaluator.scoreGame(nextState).getAggregateScore()); - } - } - + + List validMoves = validMoveGenerator.getActions( + stateEvaluator.getGameConfig(), node.getGameState(), player, + ActionGenerator.ALL_ACTIONS); + + boolean terminal = isTerminal(validMoves.size(), recursionLevels); + double maxScore = Double.NEGATIVE_INFINITY; Action bestAction = Action.NONE; - - for (Action nextMove : validMoves) { - GameTreeNode childNode = node.getChild(nextMove); - double gameScore = childNode.getProperties().getReward(); + + if (terminal) { + node.getProperties().setReward( + stateEvaluator.scoreGame(gameState).getAggregateScore()); - if (gameScore > maxScore) { - maxScore = gameScore; - bestAction = nextMove; + numStateEvaluations++; + + return bestAction; + } else { + + for (Action nextMove : validMoves) { + GameState nextState = new GameState(gameState); + nextState.playStone(player, nextMove); + GameTreeNode childNode = new GameTreeNode( + nextState, new MinimaxProperties()); + node.addChild(nextMove, childNode); + + getMin(recursionLevels - 1, stateEvaluator, childNode, + GoGame.getColorToPlay(player, true)); + + double gameScore = childNode.getProperties().getReward(); + + if (gameScore > maxScore) { + maxScore = gameScore; + bestAction = nextMove; + } } + + node.getProperties().setReward(maxScore); + return bestAction; } - - node.getProperties().setReward(maxScore); - return bestAction; } - - private Action getMin(int recursionLevels, - StateEvaluator stateEvaluator, - GameTreeNode node, - Player player) { + + private Action getMin(int recursionLevels, StateEvaluator stateEvaluator, + GameTreeNode node, Player player) { GameState gameState = new GameState(node.getGameState()); - - List validMoves = validMoveGenerator.getActions(stateEvaluator.getGameConfig(), - node.getGameState(), player, ActionGenerator.ALL_ACTIONS); - - for (Action nextMove : validMoves) { - GameState nextState = new GameState(gameState); - nextState.playStone(player, nextMove); - GameTreeNode childNode = new GameTreeNode(nextState, new MinimaxProperties()); - node.addChild(nextMove, childNode); - if (recursionLevels > 1) { - getMax(recursionLevels - 1, stateEvaluator, childNode, GoGame.getColorToPlay(player, true)); - } else { - //tail condition - set reward of this leaf node - childNode.getProperties().setReward(stateEvaluator.scoreGame(nextState).getAggregateScore()); - } - } - + + List validMoves = validMoveGenerator.getActions( + stateEvaluator.getGameConfig(), node.getGameState(), player, + ActionGenerator.ALL_ACTIONS); + + boolean terminal = isTerminal(validMoves.size(), recursionLevels); + double minScore = Double.POSITIVE_INFINITY; Action bestAction = Action.NONE; - - for (Action nextMove : validMoves) { - GameTreeNode childNode = node.getChild(nextMove); - double gameScore = childNode.getProperties().getReward(); + + if (terminal) { + node.getProperties().setReward( + stateEvaluator.scoreGame(gameState).getAggregateScore()); - if (gameScore < minScore) { - minScore = gameScore; - bestAction = nextMove; + numStateEvaluations++; + + return bestAction; + } else { + + for (Action nextMove : validMoves) { + GameState nextState = new GameState(gameState); + nextState.playStone(player, nextMove); + GameTreeNode childNode = new GameTreeNode( + nextState, new MinimaxProperties()); + node.addChild(nextMove, childNode); + + getMax(recursionLevels - 1, stateEvaluator, childNode, + GoGame.getColorToPlay(player, true)); + + double gameScore = childNode.getProperties().getReward(); + + if (gameScore < minScore) { + minScore = gameScore; + bestAction = nextMove; + } } + + node.getProperties().setReward(minScore); + return bestAction; } - - node.getProperties().setReward(minScore); - return bestAction; + } + + @Override + public int getNumStateEvaluations() { + return numStateEvaluations; + } + + @Override + public Action getAction(GameConfig gameConfig, GameState gameState, + Collection prohibitedActions, Player player) { + throw new UnsupportedOperationException("Prohibited actions not supported by this class."); } } \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/policy/MonteCarlo.java b/src/net/woodyfolsom/msproj/policy/MonteCarlo.java index 8d97afb..fe0c18e 100644 --- a/src/net/woodyfolsom/msproj/policy/MonteCarlo.java +++ b/src/net/woodyfolsom/msproj/policy/MonteCarlo.java @@ -6,12 +6,18 @@ import java.util.List; import net.woodyfolsom.msproj.Action; import net.woodyfolsom.msproj.GameConfig; import net.woodyfolsom.msproj.GameState; +import net.woodyfolsom.msproj.GoGame; import net.woodyfolsom.msproj.Player; +import net.woodyfolsom.msproj.StateEvaluator; import net.woodyfolsom.msproj.tree.GameTreeNode; import net.woodyfolsom.msproj.tree.MonteCarloProperties; public abstract class MonteCarlo implements Policy { + protected static final int ROLLOUT_DEPTH_LIMIT = 20; + + protected int numStateEvaluations = 0; protected Policy movePolicy; + protected long searchTimeLimit; protected volatile long elapsedTime = 0L; @@ -30,7 +36,7 @@ public abstract class MonteCarlo implements Policy { @Override public Action getAction(GameConfig gameConfig, GameState gameState, - Player initialColor) { + Player player) { long startTime = System.currentTimeMillis(); //If for some reason no moves are evaluated within the time limit, pass. @@ -38,20 +44,23 @@ public abstract class MonteCarlo implements Policy { //result in a win. GameTreeNode rootNode = new GameTreeNode(gameState, new MonteCarloProperties()); - + StateEvaluator stateEvaluator = new StateEvaluator(gameConfig); do { + //TODO these return types may need to be lists for some MC methods List> selectedNodes = descend(rootNode); List> newLeaves = new ArrayList>(); + Player nextPlayer = GoGame.getColorToPlay(player, true); + for (GameTreeNode selectedNode: selectedNodes) { - for (GameTreeNode newLeaf : grow(selectedNode)) { + for (GameTreeNode newLeaf : grow(gameConfig, selectedNode, nextPlayer)) { newLeaves.add(newLeaf); } } for (GameTreeNode newLeaf : newLeaves) { - int reward = rollout(newLeaf); + int reward = rollout(gameConfig, stateEvaluator, newLeaf, player); update(newLeaf, reward); } @@ -67,9 +76,9 @@ public abstract class MonteCarlo implements Policy { public abstract Action getBestAction(GameTreeNode node); - public abstract List> grow(GameTreeNode node); + public abstract List> grow(GameConfig gameConfig, GameTreeNode node, Player player); - public abstract int rollout(GameTreeNode node); + public abstract int rollout(GameConfig gameConfig, StateEvaluator stateEvaluator, GameTreeNode node, Player player); public abstract void update(GameTreeNode node, int reward); @@ -80,4 +89,8 @@ public abstract class MonteCarlo implements Policy { public int doRollout() { return 0; } + + public int getNumStateEvaluations() { + return numStateEvaluations; + } } \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/policy/MonteCarloUCT.java b/src/net/woodyfolsom/msproj/policy/MonteCarloUCT.java index 3da3834..6c095d3 100644 --- a/src/net/woodyfolsom/msproj/policy/MonteCarloUCT.java +++ b/src/net/woodyfolsom/msproj/policy/MonteCarloUCT.java @@ -1,36 +1,44 @@ package net.woodyfolsom.msproj.policy; import java.util.ArrayList; +import java.util.Collection; import java.util.List; +import java.util.Set; import net.woodyfolsom.msproj.Action; +import net.woodyfolsom.msproj.GameConfig; +import net.woodyfolsom.msproj.GameState; +import net.woodyfolsom.msproj.GoGame; +import net.woodyfolsom.msproj.Player; +import net.woodyfolsom.msproj.StateEvaluator; import net.woodyfolsom.msproj.tree.GameTreeNode; import net.woodyfolsom.msproj.tree.MonteCarloProperties; public class MonteCarloUCT extends MonteCarlo { + public static final double TUNING_CONSTANT = 0.50; public MonteCarloUCT(Policy movePolicy, long searchTimeLimit) { super(movePolicy, searchTimeLimit); } - + @Override public List> descend(GameTreeNode node) { double bestScore = Double.NEGATIVE_INFINITY; GameTreeNode bestNode = node; - //This appears slightly redundant with getBestAction() but it is not - - //descend() may pick the current node rather than a child to expand (if a child has a good score but high/low uncertainty) - //but getBestAction specifically asks for the optimum action to take from the current node, - //even if it results in a worse next state. + //TODO: WHAT TO DO if the optimum leaf node is actually a terminal node? + //from Kocsis and Szepesvari, the value of an actual terminal node is 0, unless estimated by rollout + double nodeVisits = node.getProperties().getVisits(); + for (Action action : node.getActions()) { GameTreeNode childNode = node.getChild(action); MonteCarloProperties properties = childNode.getProperties(); - double childScore = (double) properties.getWins() / properties.getVisits(); + double childScore = (double) properties.getWins() / properties.getVisits() + TUNING_CONSTANT * Math.log(nodeVisits) / childNode.getProperties().getVisits(); if (childScore >= bestScore) { - bestScore = childScore; - bestNode = childNode; + bestScore = childScore; + bestNode = childNode; } } @@ -64,21 +72,63 @@ public class MonteCarloUCT extends MonteCarlo { } @Override - public List> grow(GameTreeNode node) { - // TODO Auto-generated method stub - return null; + public List> grow(GameConfig gameConfig, GameTreeNode node, Player player) { + GameState nextGameState = new GameState(node.getGameState()); + Policy randomMovePolicy = new RandomMovePolicy(); + Set exploredActions = node.getActions(); + Action action = randomMovePolicy.getAction(gameConfig, node.getGameState(), exploredActions, player); + if (Action.NONE == action) { + throw new RuntimeException("Unable to grow node - are all actions already explored? Board state: " + node.getGameState() + "\nExplored actions: " + exploredActions); + } + nextGameState.playStone(player, action); + List> newChildren = new ArrayList>(); + newChildren.add(new GameTreeNode(nextGameState,new MonteCarloProperties())); + return newChildren; } @Override - public int rollout(GameTreeNode node) { - // TODO Auto-generated method stub - return 0; + /** + * Rollout currently depends on the hardcoded ROLLOUT_DEPTH_LIMIT superclass parameter, + * since without (super)ko detection, there is no way to guarantee a rollout will terminate. + * Even with ko detection, a rollout might take an unrealistically long time due to unlikely playouts. + */ + public int rollout(GameConfig gameConfig, StateEvaluator stateEvaluator, GameTreeNode node, Player player) { + Policy randomMovePolicy = new RandomMovePolicy(); + + Action action; + int rolloutDepth = 0; + GameState finalGameState = new GameState(node.getGameState()); + Player currentPlayer = player; + do { + rolloutDepth++; + action = randomMovePolicy.getAction(gameConfig, node.getGameState(), player); + if (action != Action.NONE) { + finalGameState.playStone(currentPlayer, action); + currentPlayer = GoGame.getColorToPlay(currentPlayer, true); + } + } while (action != Action.NONE && rolloutDepth < ROLLOUT_DEPTH_LIMIT); + + if (stateEvaluator.scoreGame(finalGameState).isWinner(player)) { + return 1; + } else { + return 0; + } } @Override public void update(GameTreeNode node, int reward) { - // TODO Auto-generated method stub - + GameTreeNode currentNode = node; + while (currentNode != null) { + MonteCarloProperties nodeProperties = node.getProperties(); + nodeProperties.setWins(nodeProperties.getWins() + reward); + nodeProperties.setVisits(nodeProperties.getVisits() + 1); + currentNode = currentNode.getParent(); + } + } + + @Override + public Action getAction(GameConfig gameConfig, GameState gameState, + Collection prohibitedActions, Player player) { + throw new UnsupportedOperationException("Prohibited actions not supported by this class."); } - } \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/policy/Policy.java b/src/net/woodyfolsom/msproj/policy/Policy.java index 6d62e22..e6a90ef 100644 --- a/src/net/woodyfolsom/msproj/policy/Policy.java +++ b/src/net/woodyfolsom/msproj/policy/Policy.java @@ -1,5 +1,7 @@ package net.woodyfolsom.msproj.policy; +import java.util.Collection; + import net.woodyfolsom.msproj.Action; import net.woodyfolsom.msproj.GameConfig; import net.woodyfolsom.msproj.GameState; @@ -8,4 +10,6 @@ import net.woodyfolsom.msproj.Player; public interface Policy { public Action getAction(GameConfig gameConfig, GameState gameState, Player player); + public Action getAction(GameConfig gameConfig, GameState gameState, Collection prohibitedActions, Player player); + public int getNumStateEvaluations(); } \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/policy/RandomMovePolicy.java b/src/net/woodyfolsom/msproj/policy/RandomMovePolicy.java index cba21f8..1049c06 100644 --- a/src/net/woodyfolsom/msproj/policy/RandomMovePolicy.java +++ b/src/net/woodyfolsom/msproj/policy/RandomMovePolicy.java @@ -1,6 +1,7 @@ package net.woodyfolsom.msproj.policy; import java.util.ArrayList; +import java.util.Collection; import java.util.List; import net.woodyfolsom.msproj.Action; @@ -9,34 +10,20 @@ import net.woodyfolsom.msproj.GameState; import net.woodyfolsom.msproj.Player; -public class RandomMovePolicy implements Policy { +public class RandomMovePolicy implements Policy, ActionGenerator { /** * Does NOT modify the gameState. */ public Action getAction(GameConfig gameConfig, GameState gameState, - Player color) { - GameState gameStateCopy = new GameState(gameState); - List emptyCoordinates = gameStateCopy.getEmptyCoords(); - - while (emptyCoordinates.size() > 0) { - Action randomMove = Action.getInstance(emptyCoordinates - .get((int) (Math.random() * emptyCoordinates.size()))); - - if (gameStateCopy.playStone(color, randomMove)) { - return randomMove; - } else { - emptyCoordinates.remove(randomMove); - } - } - - return Action.PASS; + Collection prohibitedMoves, Player player) { + return getActions(gameConfig, gameState, prohibitedMoves, player, 1).get(0); } /** * Attempts to generate up to nMoves random moves on behalf of the specified - * player. Will return at least one move, which may be 'pass' if random - * search does not success in discovering a valid move. Does NOT modify the + * player. Will return at least one move, which may be 'NONE' if random + * search does not succeeed in discovering a valid move. Does NOT modify the * gameState. * * @param gameConfig @@ -45,25 +32,47 @@ public class RandomMovePolicy implements Policy { * * @return */ - public List genMoves(GameConfig gameConfig, GameState gameState, - Player color, int nMoves) { + public List getActions(GameConfig gameConfig, GameState gameState, + Collection prohibitedMoves, Player player, int nMoves) { GameState gameStateCopy = new GameState(gameState); - List emptyCoordinates = gameStateCopy.getEmptyCoords(); - List randomMoves = new ArrayList(); - - while (emptyCoordinates.size() > 0 && randomMoves.size() < nMoves) { - Action randomMove = Action.getInstance(emptyCoordinates - .get((int) (Math.random() * emptyCoordinates.size()))); - if (gameStateCopy.playStone(color, randomMove)) { - randomMoves.add(randomMove); - } - emptyCoordinates.remove(randomMove); + ActionGenerator actionGenerator = new ValidMoveGenerator(); + + List possibleActions = actionGenerator.getActions(gameConfig, gameStateCopy, prohibitedMoves, player, ActionGenerator.ALL_ACTIONS); + List randomActions = new ArrayList(); + + while (possibleActions.size() > 0 && randomActions.size() < nMoves) { + Action randomAction = possibleActions + .remove((int) (Math.random() * possibleActions.size())); + + randomActions.add(randomAction); } - if (randomMoves.size() == 0) { - randomMoves.add(Action.PASS); + if (randomActions.size() == 0) { + randomActions.add(Action.NONE); } - - return randomMoves; + + return randomActions; } -} + + /** + * RandomMoveGenerator does not evaluate any states, but simply returns elements of + * a set of uniformly distributed, distinct valid moves. + * + * @return + */ + public int getNumStateEvaluations() { + return 0; + } + + @Override + public List getActions(GameConfig gameConfig, GameState gameState, + Player color, int numActions) { + return getActions(gameConfig, gameState, new ArrayList(), color, numActions); + } + + @Override + public Action getAction(GameConfig gameConfig, GameState gameState, + Player player) { + return getActions(gameConfig,gameState,player,1).get(0); + } +} \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/policy/ValidMoveGenerator.java b/src/net/woodyfolsom/msproj/policy/ValidMoveGenerator.java index b7dc0a9..39b1069 100644 --- a/src/net/woodyfolsom/msproj/policy/ValidMoveGenerator.java +++ b/src/net/woodyfolsom/msproj/policy/ValidMoveGenerator.java @@ -1,6 +1,7 @@ package net.woodyfolsom.msproj.policy; import java.util.ArrayList; +import java.util.Collection; import java.util.List; import net.woodyfolsom.msproj.Action; @@ -8,29 +9,36 @@ import net.woodyfolsom.msproj.GameConfig; import net.woodyfolsom.msproj.GameState; import net.woodyfolsom.msproj.Player; -//import org.apache.log4j.Logger; - public class ValidMoveGenerator implements ActionGenerator { - //private static final Logger LOGGER = Logger.getLogger(ValidMoveGenerator.class.getName()); @Override public List getActions(GameConfig gameConfig, GameState gameState, Player color, int nMoves) { - + return getActions(gameConfig, gameState, new ArrayList(), + color, nMoves); + } + + @Override + public List getActions(GameConfig gameConfig, GameState gameState, + Collection prohibitedMoves, Player color, int nMoves) { + GameState gameStateCopy = new GameState(gameState); List emptyCoordinates = gameStateCopy.getEmptyCoords(); List validMoves = new ArrayList(); while (emptyCoordinates.size() > 0) { - Action nextMove = Action.getInstance(emptyCoordinates.remove(emptyCoordinates.size()-1)); - if (gameStateCopy.playStone(color, nextMove)) { + Action nextMove = Action.getInstance(emptyCoordinates + .remove(emptyCoordinates.size() - 1)); + if (!prohibitedMoves.contains(nextMove) + && gameStateCopy.playStone(color, nextMove)) { validMoves.add(nextMove); - gameStateCopy = new GameState(gameState); // play successful? regenerate copy of gameState + gameStateCopy = new GameState(gameState); // play successful? + // regenerate copy + // of gameState } } - //Passing is always a VALID move. It may not be a GOOD move. - if (validMoves.size() == 0) { + if (!prohibitedMoves.contains(Action.PASS)) { validMoves.add(Action.PASS); } diff --git a/src/net/woodyfolsom/msproj/tree/AlphaBetaProperties.java b/src/net/woodyfolsom/msproj/tree/AlphaBetaProperties.java new file mode 100644 index 0000000..d5612a0 --- /dev/null +++ b/src/net/woodyfolsom/msproj/tree/AlphaBetaProperties.java @@ -0,0 +1,23 @@ +package net.woodyfolsom.msproj.tree; + +public class AlphaBetaProperties extends MinimaxProperties { + + double alpha = Double.NEGATIVE_INFINITY; + double beta = Double.POSITIVE_INFINITY; + + public double getAlpha() { + return alpha; + } + + public void setAlpha(double d) { + this.alpha = d; + } + + public double getBeta() { + return beta; + } + + public void setBeta(double beta) { + this.beta = beta; + } +} diff --git a/src/net/woodyfolsom/msproj/tree/AlphaBetaPropeties.java b/src/net/woodyfolsom/msproj/tree/AlphaBetaPropeties.java deleted file mode 100644 index 05c60ff..0000000 --- a/src/net/woodyfolsom/msproj/tree/AlphaBetaPropeties.java +++ /dev/null @@ -1,18 +0,0 @@ -package net.woodyfolsom.msproj.tree; - -public class AlphaBetaPropeties extends GameTreeNodeProperties{ - int alpha = 0; - int beta = 0; - public int getAlpha() { - return alpha; - } - public void setAlpha(int alpha) { - this.alpha = alpha; - } - public int getBeta() { - return beta; - } - public void setBeta(int beta) { - this.beta = beta; - } -} diff --git a/src/net/woodyfolsom/msproj/tree/MonteCarloProperties.java b/src/net/woodyfolsom/msproj/tree/MonteCarloProperties.java index 364356b..30d2361 100644 --- a/src/net/woodyfolsom/msproj/tree/MonteCarloProperties.java +++ b/src/net/woodyfolsom/msproj/tree/MonteCarloProperties.java @@ -3,15 +3,19 @@ package net.woodyfolsom.msproj.tree; public class MonteCarloProperties extends GameTreeNodeProperties { int visits = 0; int wins = 0; + public int getVisits() { return visits; } + public void setVisits(int visits) { this.visits = visits; } + public int getWins() { return wins; } + public void setWins(int wins) { this.wins = wins; } diff --git a/test/net/woodyfolsom/msproj/policy/AlphaBetaTest.java b/test/net/woodyfolsom/msproj/policy/AlphaBetaTest.java index 4d58ba7..49c86d0 100644 --- a/test/net/woodyfolsom/msproj/policy/AlphaBetaTest.java +++ b/test/net/woodyfolsom/msproj/policy/AlphaBetaTest.java @@ -23,7 +23,9 @@ public class AlphaBetaTest { System.out.println(gameState); System.out.println("Generated move: " + move); - assertEquals("Expected B3 but was: " + move, Action.getInstance("B3"), move); + System.out.println("NumStateEvaluations: " + treeSearch.getNumStateEvaluations()); + + assertEquals(Action.getInstance("B3"), move); gameState.playStone(Player.WHITE, move); System.out.println("Final board state:"); System.out.println(gameState); @@ -43,6 +45,8 @@ public class AlphaBetaTest { System.out.println(gameState); System.out.println("Generated move: " + move); + System.out.println("NumStateEvaluations: " + treeSearch.getNumStateEvaluations()); + assertEquals("Expected B3 but was: " + move, Action.getInstance("B3"), move); gameState.playStone(Player.BLACK, move); diff --git a/test/net/woodyfolsom/msproj/policy/MinimaxTest.java b/test/net/woodyfolsom/msproj/policy/MinimaxTest.java index 83a2b2a..0beea5d 100644 --- a/test/net/woodyfolsom/msproj/policy/MinimaxTest.java +++ b/test/net/woodyfolsom/msproj/policy/MinimaxTest.java @@ -24,7 +24,9 @@ public class MinimaxTest { System.out.println(gameState); System.out.println("Generated move: " + move); - assertEquals("Expected B3 but was: " + move, Action.getInstance("B3"), move); + System.out.println("NumStateEvaluations: " + treeSearch.getNumStateEvaluations()); + + assertEquals(Action.getInstance("B3"), move); gameState.playStone(Player.WHITE, move); System.out.println(gameState); @@ -45,6 +47,8 @@ public class MinimaxTest { System.out.println(gameState); System.out.println("Generated move: " + move); + System.out.println("NumStateEvaluations: " + treeSearch.getNumStateEvaluations()); + assertEquals("Expected B3 but was: " + move, Action.getInstance("B3"), move); gameState.playStone(Player.BLACK, move); diff --git a/test/net/woodyfolsom/msproj/policy/MonteCarloUCTTest.java b/test/net/woodyfolsom/msproj/policy/MonteCarloUCTTest.java index 297a047..4ead0ac 100644 --- a/test/net/woodyfolsom/msproj/policy/MonteCarloUCTTest.java +++ b/test/net/woodyfolsom/msproj/policy/MonteCarloUCTTest.java @@ -25,6 +25,8 @@ public class MonteCarloUCTTest { System.out.println(gameState); System.out.println("Generated move: " + move); + System.out.println("NumStateEvaluations: " + treeSearch.getNumStateEvaluations()); + assertEquals("Expected B3 but was: " + move, "B3", move); gameState.playStone(Player.WHITE, move); @@ -45,6 +47,8 @@ public class MonteCarloUCTTest { System.out.println(gameState); System.out.println("Generated move: " + move); + System.out.println("NumStateEvaluations: " + treeSearch.getNumStateEvaluations()); + assertEquals("Expected B3 but was: " + move, "B3", move); gameState.playStone(Player.BLACK, move); diff --git a/test/net/woodyfolsom/msproj/policy/RandomTest.java b/test/net/woodyfolsom/msproj/policy/RandomTest.java index 8934784..61d01bf 100644 --- a/test/net/woodyfolsom/msproj/policy/RandomTest.java +++ b/test/net/woodyfolsom/msproj/policy/RandomTest.java @@ -1,6 +1,10 @@ package net.woodyfolsom.msproj.policy; import static org.junit.Assert.assertEquals; + +import java.util.ArrayList; +import java.util.List; + import net.woodyfolsom.msproj.Action; import net.woodyfolsom.msproj.GameConfig; import net.woodyfolsom.msproj.GameState; @@ -49,7 +53,11 @@ public class RandomTest { System.out.println("State before random WHITE move selection:"); System.out.println(gameState); //This is correct - checked vs. MFOG - assertEquals(Action.getInstance("B3"), new RandomMovePolicy().getAction(new GameConfig(), gameState, Player.WHITE)); + //PASS would otherwise be a valid move + List prohibitedMoves = new ArrayList(); + prohibitedMoves.add(Action.PASS); + + assertEquals(Action.getInstance("B3"), new RandomMovePolicy().getAction(new GameConfig(), gameState, prohibitedMoves, Player.WHITE)); System.out.println(gameState); }