From 4a1c64843d32bbe114c9a9a7ad017b6639c33adc Mon Sep 17 00:00:00 2001 From: cs6601 Date: Thu, 30 Aug 2012 10:51:04 -0400 Subject: [PATCH] Fixed Minimax search to use the new GameTreeNode, MinimaxProperty classes. The previous implementation was overly complicated and may have been buggy except when searching only 2 plies ahead. --- .../woodyfolsom/msproj/StateEvaluator.java | 4 + .../msproj/policy/GameTreeNode.java | 69 --------- .../woodyfolsom/msproj/policy/Minimax.java | 144 ++++++++++++------ .../woodyfolsom/msproj/policy/MonteCarlo.java | 24 +-- .../msproj/policy/MonteCarloUCT.java | 32 ++-- .../msproj/policy/MoveCandidate.java | 14 -- .../msproj/tree/AlphaBetaPropeties.java | 18 +++ .../woodyfolsom/msproj/tree/GameTreeNode.java | 57 +++++++ .../msproj/tree/GameTreeNodeProperties.java | 5 + .../msproj/tree/MinimaxProperties.java | 13 ++ .../msproj/tree/MonteCarloProperties.java | 18 +++ .../msproj/policy/MinimaxTest.java | 4 +- 12 files changed, 249 insertions(+), 153 deletions(-) delete mode 100644 src/net/woodyfolsom/msproj/policy/GameTreeNode.java delete mode 100644 src/net/woodyfolsom/msproj/policy/MoveCandidate.java create mode 100644 src/net/woodyfolsom/msproj/tree/AlphaBetaPropeties.java create mode 100644 src/net/woodyfolsom/msproj/tree/GameTreeNode.java create mode 100644 src/net/woodyfolsom/msproj/tree/GameTreeNodeProperties.java create mode 100644 src/net/woodyfolsom/msproj/tree/MinimaxProperties.java create mode 100644 src/net/woodyfolsom/msproj/tree/MonteCarloProperties.java diff --git a/src/net/woodyfolsom/msproj/StateEvaluator.java b/src/net/woodyfolsom/msproj/StateEvaluator.java index 2d7e83c..57ed496 100644 --- a/src/net/woodyfolsom/msproj/StateEvaluator.java +++ b/src/net/woodyfolsom/msproj/StateEvaluator.java @@ -7,6 +7,10 @@ public class StateEvaluator { this.gameConfig = gameConfig; } + public GameConfig getGameConfig() { + return gameConfig; + } + public GameScore scoreGame(GameState gameState) { GameBoard gameBoard; if (gameState.getGameBoard().isTerritoryMarked()) { diff --git a/src/net/woodyfolsom/msproj/policy/GameTreeNode.java b/src/net/woodyfolsom/msproj/policy/GameTreeNode.java deleted file mode 100644 index b826f95..0000000 --- a/src/net/woodyfolsom/msproj/policy/GameTreeNode.java +++ /dev/null @@ -1,69 +0,0 @@ -package net.woodyfolsom.msproj.policy; - -import java.util.HashMap; -import java.util.Map; -import java.util.Set; - -import net.woodyfolsom.msproj.Action; -import net.woodyfolsom.msproj.GameState; - -public class GameTreeNode { - private GameState gameState; - private GameTreeNode parent; - private int numVisits; - private int numWins; - private Map children = new HashMap(); - - public GameTreeNode(GameState gameState) { - this.gameState = gameState; - } - - public void addChild(Action action, GameTreeNode child) { - children.put(action, child); - child.parent = this; - } - - public Set getActions() { - return children.keySet(); - } - - public GameTreeNode getChild(Action action) { - return children.get(action); - } - - public int getNumChildren() { - return children.size(); - } - - public GameState getGameState() { - return gameState; - } - - public int getNumVisits() { - return numVisits; - } - - public int getNumWins() { - return numWins; - } - - public GameTreeNode getParent() { - return parent; - } - - public boolean isRoot() { - return parent == null; - } - - public boolean isTerminal() { - return children.size() == 0; - } - - public void incrementVisits() { - numVisits++; - } - - public void incrementWins() { - numWins++; - } -} \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/policy/Minimax.java b/src/net/woodyfolsom/msproj/policy/Minimax.java index 4e8053e..9b7bc6a 100644 --- a/src/net/woodyfolsom/msproj/policy/Minimax.java +++ b/src/net/woodyfolsom/msproj/policy/Minimax.java @@ -1,77 +1,131 @@ package net.woodyfolsom.msproj.policy; -import java.util.ArrayList; import java.util.List; import net.woodyfolsom.msproj.Action; import net.woodyfolsom.msproj.GameConfig; -import net.woodyfolsom.msproj.GameScore; import net.woodyfolsom.msproj.GameState; import net.woodyfolsom.msproj.GoGame; import net.woodyfolsom.msproj.Player; import net.woodyfolsom.msproj.StateEvaluator; +import net.woodyfolsom.msproj.tree.GameTreeNode; +import net.woodyfolsom.msproj.tree.MinimaxProperties; public class Minimax implements Policy { - private static final int DEFAULT_RECURSIVE_PLAYS = 1; + private static final int DEFAULT_LOOKAHEAD = 1; private final ValidMoveGenerator validMoveGenerator = new ValidMoveGenerator(); + private int lookAhead; + + public Minimax() { + this(DEFAULT_LOOKAHEAD); + } + + public Minimax(int lookAhead) { + this.lookAhead = lookAhead; + } + @Override public Action getAction(GameConfig gameConfig, GameState gameState, - Player color) { - MoveCandidate moveCandidate = findBestMinimaxResult( - DEFAULT_RECURSIVE_PLAYS * 2, - gameConfig, gameState, color, false, Action.PASS); - - return moveCandidate.move; + Player player) { + + StateEvaluator stateEvaluator = new StateEvaluator(gameConfig); + + GameTreeNode rootNode = new GameTreeNode(gameState, new MinimaxProperties()); + + if (player == Player.BLACK) { + return getMax( + lookAhead * 2, + stateEvaluator, + rootNode, + player); + } else { + return getMin( + lookAhead * 2, + stateEvaluator, + rootNode, + player); + } } - private MoveCandidate findBestMinimaxResult(int recursionLevels, - GameConfig gameConfig, GameState gameState, - Player initialColor, boolean playAsOpponent, Action bestPrevMove) { + private Action getMax(int recursionLevels, + StateEvaluator stateEvaluator, + GameTreeNode node, + Player player) { - StateEvaluator stateEvaluator = new StateEvaluator(gameConfig); - List randomMoveCandidates = new ArrayList(); - - Player colorPlaying = GoGame.getColorToPlay(initialColor, playAsOpponent); + GameState gameState = new GameState(node.getGameState()); - List validMoves = validMoveGenerator.getActions(gameConfig, - gameState, colorPlaying, ActionGenerator.ALL_ACTIONS); + List validMoves = validMoveGenerator.getActions(stateEvaluator.getGameConfig(), + node.getGameState(), player, ActionGenerator.ALL_ACTIONS); - for (Action randomMove : validMoves) { - GameState stateCopy = new GameState(gameState); - stateCopy.playStone(colorPlaying, randomMove); + for (Action nextMove : validMoves) { + GameState nextState = new GameState(gameState); + nextState.playStone(player, nextMove); + GameTreeNode childNode = new GameTreeNode(nextState, new MinimaxProperties()); + node.addChild(nextMove, childNode); if (recursionLevels > 1) { - randomMoveCandidates.add(findBestMinimaxResult(recursionLevels - 1, - gameConfig, stateCopy, initialColor, - !playAsOpponent, randomMove)); + getMin(recursionLevels - 1, stateEvaluator, childNode, GoGame.getColorToPlay(player, true)); } else { - GameScore score = stateEvaluator.scoreGame(stateCopy); - randomMoveCandidates.add(new MoveCandidate(randomMove, score)); + //tail condition - set reward of this leaf node + childNode.getProperties().setReward(stateEvaluator.scoreGame(nextState).getAggregateScore()); } } - - // TODO use a sorted list and just return the last element - MoveCandidate bestMove = randomMoveCandidates.get(0); - double bestScoreSoFar = bestMove.score.getScore(colorPlaying); - - for (MoveCandidate moveCandidate : randomMoveCandidates) { - if (moveCandidate.score.getScore(colorPlaying) > bestScoreSoFar) { - bestMove = moveCandidate; - bestScoreSoFar = moveCandidate.score.getScore(colorPlaying); + + double maxScore = Double.NEGATIVE_INFINITY; + Action bestAction = Action.NONE; + + for (Action nextMove : validMoves) { + GameTreeNode childNode = node.getChild(nextMove); + double gameScore = childNode.getProperties().getReward(); + + if (gameScore > maxScore) { + maxScore = gameScore; + bestAction = nextMove; } } + + node.getProperties().setReward(maxScore); + return bestAction; + } + + private Action getMin(int recursionLevels, + StateEvaluator stateEvaluator, + GameTreeNode node, + Player player) { - // Fix to prevent thinking that the _opponent's_ best move is the move - // to make. - // If evaluating an opponent's move, the best move (for my opponent) is - // my previous move which gives the opponent the highest score. - // This should only happen if recursionLevels is initially odd. - if (playAsOpponent) { - return new MoveCandidate(bestPrevMove, bestMove.score); - } else { // if evaluating my own move, the move which gives me the - // highest score is the best. - return bestMove; + GameState gameState = new GameState(node.getGameState()); + + List validMoves = validMoveGenerator.getActions(stateEvaluator.getGameConfig(), + node.getGameState(), player, ActionGenerator.ALL_ACTIONS); + + for (Action nextMove : validMoves) { + GameState nextState = new GameState(gameState); + nextState.playStone(player, nextMove); + GameTreeNode childNode = new GameTreeNode(nextState, new MinimaxProperties()); + node.addChild(nextMove, childNode); + if (recursionLevels > 1) { + getMax(recursionLevels - 1, stateEvaluator, childNode, GoGame.getColorToPlay(player, true)); + } else { + //tail condition - set reward of this leaf node + childNode.getProperties().setReward(stateEvaluator.scoreGame(nextState).getAggregateScore()); + } } + + double minScore = Double.POSITIVE_INFINITY; + Action bestAction = Action.NONE; + + for (Action nextMove : validMoves) { + GameTreeNode childNode = node.getChild(nextMove); + double gameScore = childNode.getProperties().getReward(); + + if (gameScore < minScore) { + minScore = gameScore; + bestAction = nextMove; + } + } + + node.getProperties().setReward(minScore); + return bestAction; } } \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/policy/MonteCarlo.java b/src/net/woodyfolsom/msproj/policy/MonteCarlo.java index a6d4615..8d97afb 100644 --- a/src/net/woodyfolsom/msproj/policy/MonteCarlo.java +++ b/src/net/woodyfolsom/msproj/policy/MonteCarlo.java @@ -7,6 +7,8 @@ import net.woodyfolsom.msproj.Action; import net.woodyfolsom.msproj.GameConfig; import net.woodyfolsom.msproj.GameState; import net.woodyfolsom.msproj.Player; +import net.woodyfolsom.msproj.tree.GameTreeNode; +import net.woodyfolsom.msproj.tree.MonteCarloProperties; public abstract class MonteCarlo implements Policy { protected Policy movePolicy; @@ -24,7 +26,7 @@ public abstract class MonteCarlo implements Policy { * @param node * @return */ - public abstract List descend(GameTreeNode node); + public abstract List> descend(GameTreeNode node); @Override public Action getAction(GameConfig gameConfig, GameState gameState, @@ -35,20 +37,20 @@ public abstract class MonteCarlo implements Policy { //Note that this may lose the game by forfeit even when picking any random move could //result in a win. - GameTreeNode rootNode = new GameTreeNode(gameState); + GameTreeNode rootNode = new GameTreeNode(gameState, new MonteCarloProperties()); do { //TODO these return types may need to be lists for some MC methods - List selectedNodes = descend(rootNode); - List newLeaves = new ArrayList(); + List> selectedNodes = descend(rootNode); + List> newLeaves = new ArrayList>(); - for (GameTreeNode selectedNode: selectedNodes) { - for (GameTreeNode newLeaf : grow(selectedNode)) { + for (GameTreeNode selectedNode: selectedNodes) { + for (GameTreeNode newLeaf : grow(selectedNode)) { newLeaves.add(newLeaf); } } - for (GameTreeNode newLeaf : newLeaves) { + for (GameTreeNode newLeaf : newLeaves) { int reward = rollout(newLeaf); update(newLeaf, reward); } @@ -63,13 +65,13 @@ public abstract class MonteCarlo implements Policy { return elapsedTime; } - public abstract Action getBestAction(GameTreeNode node); + public abstract Action getBestAction(GameTreeNode node); - public abstract List grow(GameTreeNode node); + public abstract List> grow(GameTreeNode node); - public abstract int rollout(GameTreeNode node); + public abstract int rollout(GameTreeNode node); - public abstract void update(GameTreeNode node, int reward); + public abstract void update(GameTreeNode node, int reward); public long getSearchTimeLimit() { return searchTimeLimit; diff --git a/src/net/woodyfolsom/msproj/policy/MonteCarloUCT.java b/src/net/woodyfolsom/msproj/policy/MonteCarloUCT.java index b503880..3da3834 100644 --- a/src/net/woodyfolsom/msproj/policy/MonteCarloUCT.java +++ b/src/net/woodyfolsom/msproj/policy/MonteCarloUCT.java @@ -4,6 +4,8 @@ import java.util.ArrayList; import java.util.List; import net.woodyfolsom.msproj.Action; +import net.woodyfolsom.msproj.tree.GameTreeNode; +import net.woodyfolsom.msproj.tree.MonteCarloProperties; public class MonteCarloUCT extends MonteCarlo { @@ -12,17 +14,20 @@ public class MonteCarloUCT extends MonteCarlo { } @Override - public List descend(GameTreeNode node) { - double bestScore = (double) node.getNumWins() / node.getNumVisits(); - GameTreeNode bestNode = node; + public List> descend(GameTreeNode node) { + double bestScore = Double.NEGATIVE_INFINITY; + GameTreeNode bestNode = node; //This appears slightly redundant with getBestAction() but it is not - //descend() may pick the current node rather than a child to expand (if a child has a good score but high/low uncertainty) //but getBestAction specifically asks for the optimum action to take from the current node, //even if it results in a worse next state. for (Action action : node.getActions()) { - GameTreeNode childNode = node.getChild(action); - double childScore = (double) childNode.getNumWins() / childNode.getNumVisits(); + GameTreeNode childNode = node.getChild(action); + + MonteCarloProperties properties = childNode.getProperties(); + double childScore = (double) properties.getWins() / properties.getVisits(); + if (childScore >= bestScore) { bestScore = childScore; bestNode = childNode; @@ -30,7 +35,7 @@ public class MonteCarloUCT extends MonteCarlo { } if (bestNode == node) { - List bestNodeList = new ArrayList(); + List> bestNodeList = new ArrayList>(); bestNodeList.add(bestNode); return bestNodeList; } else { @@ -39,13 +44,16 @@ public class MonteCarloUCT extends MonteCarlo { } @Override - public Action getBestAction(GameTreeNode node) { + public Action getBestAction(GameTreeNode node) { Action bestAction = Action.NONE; double bestScore = Double.NEGATIVE_INFINITY; for (Action action : node.getActions()) { - GameTreeNode childNode = node.getChild(action); - double childScore = (double) childNode.getNumWins() / childNode.getNumVisits(); + GameTreeNode childNode = node.getChild(action); + + MonteCarloProperties properties = childNode.getProperties(); + double childScore = (double) properties.getWins() / properties.getVisits(); + if (childScore >= bestScore) { bestScore = childScore; bestAction = action; @@ -56,19 +64,19 @@ public class MonteCarloUCT extends MonteCarlo { } @Override - public List grow(GameTreeNode node) { + public List> grow(GameTreeNode node) { // TODO Auto-generated method stub return null; } @Override - public int rollout(GameTreeNode node) { + public int rollout(GameTreeNode node) { // TODO Auto-generated method stub return 0; } @Override - public void update(GameTreeNode node, int reward) { + public void update(GameTreeNode node, int reward) { // TODO Auto-generated method stub } diff --git a/src/net/woodyfolsom/msproj/policy/MoveCandidate.java b/src/net/woodyfolsom/msproj/policy/MoveCandidate.java deleted file mode 100644 index b79c07a..0000000 --- a/src/net/woodyfolsom/msproj/policy/MoveCandidate.java +++ /dev/null @@ -1,14 +0,0 @@ -package net.woodyfolsom.msproj.policy; - -import net.woodyfolsom.msproj.Action; -import net.woodyfolsom.msproj.GameScore; - -public class MoveCandidate { - public final Action move; - public final GameScore score; - - public MoveCandidate(Action move, GameScore score) { - this.move = move; - this.score = score; - } -} \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/tree/AlphaBetaPropeties.java b/src/net/woodyfolsom/msproj/tree/AlphaBetaPropeties.java new file mode 100644 index 0000000..05c60ff --- /dev/null +++ b/src/net/woodyfolsom/msproj/tree/AlphaBetaPropeties.java @@ -0,0 +1,18 @@ +package net.woodyfolsom.msproj.tree; + +public class AlphaBetaPropeties extends GameTreeNodeProperties{ + int alpha = 0; + int beta = 0; + public int getAlpha() { + return alpha; + } + public void setAlpha(int alpha) { + this.alpha = alpha; + } + public int getBeta() { + return beta; + } + public void setBeta(int beta) { + this.beta = beta; + } +} diff --git a/src/net/woodyfolsom/msproj/tree/GameTreeNode.java b/src/net/woodyfolsom/msproj/tree/GameTreeNode.java new file mode 100644 index 0000000..8fc0c8a --- /dev/null +++ b/src/net/woodyfolsom/msproj/tree/GameTreeNode.java @@ -0,0 +1,57 @@ +package net.woodyfolsom.msproj.tree; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +import net.woodyfolsom.msproj.Action; +import net.woodyfolsom.msproj.GameState; + +public class GameTreeNode { + private GameState gameState; + private GameTreeNode parent; + private Map> children = new HashMap>(); + private T properties; + + public GameTreeNode(GameState gameState, T properties) { + this.gameState = gameState; + this.properties = properties; + } + + public void addChild(Action action, GameTreeNode child) { + children.put(action, child); + child.parent = this; + } + + public Set getActions() { + return children.keySet(); + } + + public GameTreeNode getChild(Action action) { + return children.get(action); + } + + public int getNumChildren() { + return children.size(); + } + + public GameState getGameState() { + return gameState; + } + + public GameTreeNode getParent() { + return parent; + } + + public T getProperties() { + return properties; + } + + public boolean isRoot() { + return parent == null; + } + + public boolean isTerminal() { + return children.size() == 0; + } +} \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/tree/GameTreeNodeProperties.java b/src/net/woodyfolsom/msproj/tree/GameTreeNodeProperties.java new file mode 100644 index 0000000..673a2f9 --- /dev/null +++ b/src/net/woodyfolsom/msproj/tree/GameTreeNodeProperties.java @@ -0,0 +1,5 @@ +package net.woodyfolsom.msproj.tree; + +public abstract class GameTreeNodeProperties { + +} \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/tree/MinimaxProperties.java b/src/net/woodyfolsom/msproj/tree/MinimaxProperties.java new file mode 100644 index 0000000..e4db7fc --- /dev/null +++ b/src/net/woodyfolsom/msproj/tree/MinimaxProperties.java @@ -0,0 +1,13 @@ +package net.woodyfolsom.msproj.tree; + +public class MinimaxProperties extends GameTreeNodeProperties { + private double reward = 0.0; + + public double getReward() { + return reward; + } + + public void setReward(double reward) { + this.reward = reward; + } +} diff --git a/src/net/woodyfolsom/msproj/tree/MonteCarloProperties.java b/src/net/woodyfolsom/msproj/tree/MonteCarloProperties.java new file mode 100644 index 0000000..364356b --- /dev/null +++ b/src/net/woodyfolsom/msproj/tree/MonteCarloProperties.java @@ -0,0 +1,18 @@ +package net.woodyfolsom.msproj.tree; + +public class MonteCarloProperties extends GameTreeNodeProperties { + int visits = 0; + int wins = 0; + public int getVisits() { + return visits; + } + public void setVisits(int visits) { + this.visits = visits; + } + public int getWins() { + return wins; + } + public void setWins(int wins) { + this.wins = wins; + } +} diff --git a/test/net/woodyfolsom/msproj/policy/MinimaxTest.java b/test/net/woodyfolsom/msproj/policy/MinimaxTest.java index 8dc0923..83a2b2a 100644 --- a/test/net/woodyfolsom/msproj/policy/MinimaxTest.java +++ b/test/net/woodyfolsom/msproj/policy/MinimaxTest.java @@ -24,7 +24,7 @@ public class MinimaxTest { System.out.println(gameState); System.out.println("Generated move: " + move); - assertEquals("Expected B3 but was: " + move, "B3", move); + assertEquals("Expected B3 but was: " + move, Action.getInstance("B3"), move); gameState.playStone(Player.WHITE, move); System.out.println(gameState); @@ -45,7 +45,7 @@ public class MinimaxTest { System.out.println(gameState); System.out.println("Generated move: " + move); - assertEquals("Expected B3 but was: " + move, "B3", move); + assertEquals("Expected B3 but was: " + move, Action.getInstance("B3"), move); gameState.playStone(Player.BLACK, move); System.out.println(gameState);