Updated RandomMoveGenerator to support MonteCarloUCT.

Now possible to prohibit a Collection of Actions from being included in the List of returned actions (or as a single returned value).
All unit tests now pass with the exception of MonteCarloUCT.

TODO: playerToMove and previousPlayerPassed should be made part of the GameState.
This would remove the superfluous Player parameter from many methods and make it possible
to check for the "I'm ahead and my opponent is offering to end the game" killer move.
This commit is contained in:
cs6601
2012-08-31 09:17:43 -04:00
parent 4a1c64843d
commit d3c03f2c51
17 changed files with 453 additions and 257 deletions

View File

@@ -42,8 +42,8 @@ public class GameScore {
return (double)whiteScore + komi; return (double)whiteScore + komi;
} }
public boolean isWinner(String color) { public boolean isWinner(Player player) {
if ("w".equals(color)) { if (Player.WHITE == player) {
return getWhiteScore() < NORMALIZED_ZERO_SCORE; return getWhiteScore() < NORMALIZED_ZERO_SCORE;
} else { } else {
return getBlackScore() > NORMALIZED_ZERO_SCORE; return getBlackScore() > NORMALIZED_ZERO_SCORE;

View File

@@ -87,6 +87,10 @@ public class GameState {
* @return * @return
*/ */
public boolean playStone(Player player, Action action) { public boolean playStone(Player player, Action action) {
if (action == Action.PASS) {
return true;
}
char currentStone = gameBoard.getSymbolAt(action.getColumn(), action.getRow()); char currentStone = gameBoard.getSymbolAt(action.getColumn(), action.getRow());
if (currentStone != GameBoard.EMPTY_INTERSECTION) { if (currentStone != GameBoard.EMPTY_INTERSECTION) {

View File

@@ -1,5 +1,6 @@
package net.woodyfolsom.msproj.policy; package net.woodyfolsom.msproj.policy;
import java.util.Collection;
import java.util.List; import java.util.List;
import net.woodyfolsom.msproj.Action; import net.woodyfolsom.msproj.Action;
@@ -12,4 +13,7 @@ public interface ActionGenerator {
public List<Action> getActions(GameConfig gameConfig, GameState gameState, public List<Action> getActions(GameConfig gameConfig, GameState gameState,
Player color, int numActions); Player color, int numActions);
public List<Action> getActions(GameConfig gameConfig, GameState gameState,
Collection<Action> prohibitedMoves, Player color, int numActions);
} }

View File

@@ -1,5 +1,6 @@
package net.woodyfolsom.msproj.policy; package net.woodyfolsom.msproj.policy;
import java.util.Collection;
import java.util.List; import java.util.List;
import net.woodyfolsom.msproj.Action; import net.woodyfolsom.msproj.Action;
@@ -8,122 +9,173 @@ import net.woodyfolsom.msproj.GameState;
import net.woodyfolsom.msproj.GoGame; import net.woodyfolsom.msproj.GoGame;
import net.woodyfolsom.msproj.Player; import net.woodyfolsom.msproj.Player;
import net.woodyfolsom.msproj.StateEvaluator; import net.woodyfolsom.msproj.StateEvaluator;
import net.woodyfolsom.msproj.tree.AlphaBetaProperties;
import net.woodyfolsom.msproj.tree.GameTreeNode;
public class AlphaBeta implements Policy { public class AlphaBeta implements Policy {
private static final int DEFAULT_RECURSIVE_PLAYS = 1; private static final int DEFAULT_LOOKAHEAD = 1;
private final ValidMoveGenerator validMoveGenerator = new ValidMoveGenerator(); private final ValidMoveGenerator validMoveGenerator = new ValidMoveGenerator();
private Action bestPick = Action.PASS; private int lookAhead;
private int numStateEvaluations = 0;
public AlphaBeta() {
this(DEFAULT_LOOKAHEAD);
}
public AlphaBeta(int lookAhead) {
this.lookAhead = lookAhead;
}
@Override @Override
public Action getAction(GameConfig gameConfig, GameState gameState, public Action getAction(GameConfig gameConfig, GameState gameState,
Player player) { Player player) {
int alpha = Integer.MIN_VALUE; numStateEvaluations = 0;
int beta = Integer.MAX_VALUE;
StateEvaluator stateEvaluator = new StateEvaluator(gameConfig);
GameTreeNode<AlphaBetaProperties> rootNode = new GameTreeNode<AlphaBetaProperties>(
gameState, new AlphaBetaProperties());
if (player == Player.BLACK) { if (player == Player.BLACK) {
getMaxValue(gameConfig, gameState, player, false, return getMax(lookAhead * 2, stateEvaluator, rootNode, player);
DEFAULT_RECURSIVE_PLAYS * 2, alpha, beta);
return bestPick;
} else if (player == Player.WHITE) {
getMinValue(gameConfig, gameState, player, false,
DEFAULT_RECURSIVE_PLAYS * 2, alpha, beta);
return bestPick;
} else { } else {
return Action.PASS; return getMin(lookAhead * 2, stateEvaluator, rootNode, player);
} }
} }
private int getMaxValue(GameConfig gameConfig, GameState gameState, private boolean isTerminal(int nValidMoves, int recursionLevels) {
Player initialColor, boolean playAsOpponent, int recursionLevel, return recursionLevels == 0 || nValidMoves == 0;
int alpha, int beta) { }
if (terminalTest(recursionLevel)) {
return getUtility(gameConfig, gameState);
}
Player colorPlaying = GoGame.getColorToPlay(initialColor, playAsOpponent); private Action getMax(int recursionLevels, StateEvaluator stateEvaluator,
GameTreeNode<AlphaBetaProperties> node, Player player) {
List<Action> validMoves = validMoveGenerator.getActions(gameConfig, GameState gameState = new GameState(node.getGameState());
gameState, colorPlaying, ActionGenerator.ALL_ACTIONS);
int value = Integer.MIN_VALUE; List<Action> validMoves = validMoveGenerator.getActions(
stateEvaluator.getGameConfig(), node.getGameState(), player,
ActionGenerator.ALL_ACTIONS);
for (Action nextMove : validMoves) { boolean terminal = isTerminal(validMoves.size(), recursionLevels);
GameState nextState = new GameState(gameState);
if (!nextState.playStone(colorPlaying, nextMove)) { double maxScore = Double.NEGATIVE_INFINITY;
throw new RuntimeException( Action bestAction = Action.NONE;
"Illegal move attempted during search!");
}
int minValue = getMinValue(gameConfig, nextState, initialColor, if (terminal) {
!playAsOpponent, recursionLevel - 1, alpha, beta); node.getProperties().setReward(
stateEvaluator.scoreGame(gameState).getAggregateScore());
if (minValue > value) { numStateEvaluations++;
value = minValue;
if (recursionLevel == DEFAULT_RECURSIVE_PLAYS * 2) { return bestAction;
bestPick = nextMove; } else {
for (Action nextMove : validMoves) {
GameState nextState = new GameState(gameState);
nextState.playStone(player, nextMove);
GameTreeNode<AlphaBetaProperties> childNode = new GameTreeNode<AlphaBetaProperties>(
nextState, new AlphaBetaProperties());
childNode.getProperties().setAlpha(
node.getProperties().getAlpha());
childNode.getProperties().setBeta(
node.getProperties().getBeta());
node.addChild(nextMove, childNode);
getMin(recursionLevels - 1, stateEvaluator, childNode,
GoGame.getColorToPlay(player, true));
double gameScore = childNode.getProperties().getReward();
if (gameScore > maxScore) {
maxScore = gameScore;
bestAction = nextMove;
} }
}
if (value >= beta) { if (gameScore >= node.getProperties().getBeta()) {
return value; node.getProperties().setReward(gameScore);
} return bestAction;
alpha = Math.max(alpha, value);
}
return value;
}
private int getMinValue(GameConfig gameConfig, GameState gameState,
Player initialColor, boolean playAsOpponent, int recursionLevel,
int alpha, int beta) {
if (terminalTest(recursionLevel)) {
return getUtility(gameConfig, gameState);
}
Player colorPlaying = GoGame.getColorToPlay(initialColor, playAsOpponent);
List<Action> validMoves = validMoveGenerator.getActions(gameConfig,
gameState, colorPlaying, ActionGenerator.ALL_ACTIONS);
int value = Integer.MAX_VALUE;
for (Action nextMove : validMoves) {
GameState nextState = new GameState(gameState);
if (!nextState.playStone(colorPlaying, nextMove)) {
throw new RuntimeException(
"Illegal move attempted during search!");
}
int maxValue = getMaxValue(gameConfig, nextState, initialColor,
!playAsOpponent, recursionLevel - 1, alpha, beta);
if (maxValue < value) {
value = maxValue;
if (recursionLevel == 2 * DEFAULT_RECURSIVE_PLAYS) {
bestPick = nextMove;
} }
node.getProperties().setAlpha(
Math.max(node.getProperties().getAlpha(), maxScore));
} }
if (value <= alpha) { node.getProperties().setReward(maxScore);
return value; return bestAction;
}
beta = Math.min(beta, value);
} }
return value;
} }
private boolean terminalTest(int recursionLevel) { private Action getMin(int recursionLevels, StateEvaluator stateEvaluator,
return recursionLevel < 1; GameTreeNode<AlphaBetaProperties> node, Player player) {
GameState gameState = new GameState(node.getGameState());
List<Action> validMoves = validMoveGenerator.getActions(
stateEvaluator.getGameConfig(), node.getGameState(), player,
ActionGenerator.ALL_ACTIONS);
boolean terminal = isTerminal(validMoves.size(), recursionLevels);
double minScore = Double.POSITIVE_INFINITY;
Action bestAction = Action.NONE;
if (terminal) {
node.getProperties().setReward(
stateEvaluator.scoreGame(gameState).getAggregateScore());
numStateEvaluations++;
return bestAction;
} else {
for (Action nextMove : validMoves) {
GameState nextState = new GameState(gameState);
nextState.playStone(player, nextMove);
GameTreeNode<AlphaBetaProperties> childNode = new GameTreeNode<AlphaBetaProperties>(
nextState, new AlphaBetaProperties());
childNode.getProperties().setAlpha(
node.getProperties().getAlpha());
childNode.getProperties().setBeta(
node.getProperties().getBeta());
node.addChild(nextMove, childNode);
getMax(recursionLevels - 1, stateEvaluator, childNode,
GoGame.getColorToPlay(player, true));
double gameScore = childNode.getProperties().getReward();
if (gameScore < minScore) {
minScore = gameScore;
bestAction = nextMove;
}
if (gameScore <= node.getProperties().getAlpha()) {
node.getProperties().setReward(gameScore);
return bestAction;
}
node.getProperties().setBeta(
Math.min(node.getProperties().getBeta(), minScore));
}
node.getProperties().setReward(minScore);
return bestAction;
}
} }
private int getUtility(GameConfig gameConfig, GameState gameState) { @Override
StateEvaluator stateEvaluator = new StateEvaluator(gameConfig); public int getNumStateEvaluations() {
return stateEvaluator.scoreGame(gameState).getAggregateScore(); return numStateEvaluations;
}
@Override
public Action getAction(GameConfig gameConfig, GameState gameState,
Collection<Action> prohibitedActions, Player player) {
throw new UnsupportedOperationException("Prohibited actions not supported by this class.");
} }
} }

View File

@@ -1,5 +1,6 @@
package net.woodyfolsom.msproj.policy; package net.woodyfolsom.msproj.policy;
import java.util.Collection;
import java.util.List; import java.util.List;
import net.woodyfolsom.msproj.Action; import net.woodyfolsom.msproj.Action;
@@ -17,6 +18,7 @@ public class Minimax implements Policy {
private final ValidMoveGenerator validMoveGenerator = new ValidMoveGenerator(); private final ValidMoveGenerator validMoveGenerator = new ValidMoveGenerator();
private int lookAhead; private int lookAhead;
private int numStateEvaluations = 0;
public Minimax() { public Minimax() {
this(DEFAULT_LOOKAHEAD); this(DEFAULT_LOOKAHEAD);
@@ -29,103 +31,124 @@ public class Minimax implements Policy {
@Override @Override
public Action getAction(GameConfig gameConfig, GameState gameState, public Action getAction(GameConfig gameConfig, GameState gameState,
Player player) { Player player) {
numStateEvaluations = 0;
StateEvaluator stateEvaluator = new StateEvaluator(gameConfig); StateEvaluator stateEvaluator = new StateEvaluator(gameConfig);
GameTreeNode<MinimaxProperties> rootNode = new GameTreeNode<MinimaxProperties>(gameState, new MinimaxProperties()); GameTreeNode<MinimaxProperties> rootNode = new GameTreeNode<MinimaxProperties>(
gameState, new MinimaxProperties());
if (player == Player.BLACK) { if (player == Player.BLACK) {
return getMax( return getMax(lookAhead * 2, stateEvaluator, rootNode, player);
lookAhead * 2,
stateEvaluator,
rootNode,
player);
} else { } else {
return getMin( return getMin(lookAhead * 2, stateEvaluator, rootNode, player);
lookAhead * 2,
stateEvaluator,
rootNode,
player);
} }
} }
private Action getMax(int recursionLevels, private boolean isTerminal(int nValidMoves, int recursionLevels) {
StateEvaluator stateEvaluator, return recursionLevels == 0 || nValidMoves == 0;
GameTreeNode<MinimaxProperties> node, }
Player player) {
private Action getMax(int recursionLevels, StateEvaluator stateEvaluator,
GameTreeNode<MinimaxProperties> node, Player player) {
GameState gameState = new GameState(node.getGameState()); GameState gameState = new GameState(node.getGameState());
List<Action> validMoves = validMoveGenerator.getActions(stateEvaluator.getGameConfig(), List<Action> validMoves = validMoveGenerator.getActions(
node.getGameState(), player, ActionGenerator.ALL_ACTIONS); stateEvaluator.getGameConfig(), node.getGameState(), player,
ActionGenerator.ALL_ACTIONS);
for (Action nextMove : validMoves) { boolean terminal = isTerminal(validMoves.size(), recursionLevels);
GameState nextState = new GameState(gameState);
nextState.playStone(player, nextMove);
GameTreeNode<MinimaxProperties> childNode = new GameTreeNode<MinimaxProperties>(nextState, new MinimaxProperties());
node.addChild(nextMove, childNode);
if (recursionLevels > 1) {
getMin(recursionLevels - 1, stateEvaluator, childNode, GoGame.getColorToPlay(player, true));
} else {
//tail condition - set reward of this leaf node
childNode.getProperties().setReward(stateEvaluator.scoreGame(nextState).getAggregateScore());
}
}
double maxScore = Double.NEGATIVE_INFINITY; double maxScore = Double.NEGATIVE_INFINITY;
Action bestAction = Action.NONE; Action bestAction = Action.NONE;
for (Action nextMove : validMoves) { if (terminal) {
GameTreeNode<MinimaxProperties> childNode = node.getChild(nextMove); node.getProperties().setReward(
double gameScore = childNode.getProperties().getReward(); stateEvaluator.scoreGame(gameState).getAggregateScore());
if (gameScore > maxScore) { numStateEvaluations++;
maxScore = gameScore;
bestAction = nextMove; return bestAction;
} else {
for (Action nextMove : validMoves) {
GameState nextState = new GameState(gameState);
nextState.playStone(player, nextMove);
GameTreeNode<MinimaxProperties> childNode = new GameTreeNode<MinimaxProperties>(
nextState, new MinimaxProperties());
node.addChild(nextMove, childNode);
getMin(recursionLevels - 1, stateEvaluator, childNode,
GoGame.getColorToPlay(player, true));
double gameScore = childNode.getProperties().getReward();
if (gameScore > maxScore) {
maxScore = gameScore;
bestAction = nextMove;
}
} }
}
node.getProperties().setReward(maxScore); node.getProperties().setReward(maxScore);
return bestAction; return bestAction;
}
} }
private Action getMin(int recursionLevels, private Action getMin(int recursionLevels, StateEvaluator stateEvaluator,
StateEvaluator stateEvaluator, GameTreeNode<MinimaxProperties> node, Player player) {
GameTreeNode<MinimaxProperties> node,
Player player) {
GameState gameState = new GameState(node.getGameState()); GameState gameState = new GameState(node.getGameState());
List<Action> validMoves = validMoveGenerator.getActions(stateEvaluator.getGameConfig(), List<Action> validMoves = validMoveGenerator.getActions(
node.getGameState(), player, ActionGenerator.ALL_ACTIONS); stateEvaluator.getGameConfig(), node.getGameState(), player,
ActionGenerator.ALL_ACTIONS);
for (Action nextMove : validMoves) { boolean terminal = isTerminal(validMoves.size(), recursionLevels);
GameState nextState = new GameState(gameState);
nextState.playStone(player, nextMove);
GameTreeNode<MinimaxProperties> childNode = new GameTreeNode<MinimaxProperties>(nextState, new MinimaxProperties());
node.addChild(nextMove, childNode);
if (recursionLevels > 1) {
getMax(recursionLevels - 1, stateEvaluator, childNode, GoGame.getColorToPlay(player, true));
} else {
//tail condition - set reward of this leaf node
childNode.getProperties().setReward(stateEvaluator.scoreGame(nextState).getAggregateScore());
}
}
double minScore = Double.POSITIVE_INFINITY; double minScore = Double.POSITIVE_INFINITY;
Action bestAction = Action.NONE; Action bestAction = Action.NONE;
for (Action nextMove : validMoves) { if (terminal) {
GameTreeNode<MinimaxProperties> childNode = node.getChild(nextMove); node.getProperties().setReward(
double gameScore = childNode.getProperties().getReward(); stateEvaluator.scoreGame(gameState).getAggregateScore());
if (gameScore < minScore) { numStateEvaluations++;
minScore = gameScore;
bestAction = nextMove; return bestAction;
} else {
for (Action nextMove : validMoves) {
GameState nextState = new GameState(gameState);
nextState.playStone(player, nextMove);
GameTreeNode<MinimaxProperties> childNode = new GameTreeNode<MinimaxProperties>(
nextState, new MinimaxProperties());
node.addChild(nextMove, childNode);
getMax(recursionLevels - 1, stateEvaluator, childNode,
GoGame.getColorToPlay(player, true));
double gameScore = childNode.getProperties().getReward();
if (gameScore < minScore) {
minScore = gameScore;
bestAction = nextMove;
}
} }
}
node.getProperties().setReward(minScore); node.getProperties().setReward(minScore);
return bestAction; return bestAction;
}
}
@Override
public int getNumStateEvaluations() {
return numStateEvaluations;
}
@Override
public Action getAction(GameConfig gameConfig, GameState gameState,
Collection<Action> prohibitedActions, Player player) {
throw new UnsupportedOperationException("Prohibited actions not supported by this class.");
} }
} }

View File

@@ -6,12 +6,18 @@ import java.util.List;
import net.woodyfolsom.msproj.Action; import net.woodyfolsom.msproj.Action;
import net.woodyfolsom.msproj.GameConfig; import net.woodyfolsom.msproj.GameConfig;
import net.woodyfolsom.msproj.GameState; import net.woodyfolsom.msproj.GameState;
import net.woodyfolsom.msproj.GoGame;
import net.woodyfolsom.msproj.Player; import net.woodyfolsom.msproj.Player;
import net.woodyfolsom.msproj.StateEvaluator;
import net.woodyfolsom.msproj.tree.GameTreeNode; import net.woodyfolsom.msproj.tree.GameTreeNode;
import net.woodyfolsom.msproj.tree.MonteCarloProperties; import net.woodyfolsom.msproj.tree.MonteCarloProperties;
public abstract class MonteCarlo implements Policy { public abstract class MonteCarlo implements Policy {
protected static final int ROLLOUT_DEPTH_LIMIT = 20;
protected int numStateEvaluations = 0;
protected Policy movePolicy; protected Policy movePolicy;
protected long searchTimeLimit; protected long searchTimeLimit;
protected volatile long elapsedTime = 0L; protected volatile long elapsedTime = 0L;
@@ -30,7 +36,7 @@ public abstract class MonteCarlo implements Policy {
@Override @Override
public Action getAction(GameConfig gameConfig, GameState gameState, public Action getAction(GameConfig gameConfig, GameState gameState,
Player initialColor) { Player player) {
long startTime = System.currentTimeMillis(); long startTime = System.currentTimeMillis();
//If for some reason no moves are evaluated within the time limit, pass. //If for some reason no moves are evaluated within the time limit, pass.
@@ -38,20 +44,23 @@ public abstract class MonteCarlo implements Policy {
//result in a win. //result in a win.
GameTreeNode<MonteCarloProperties> rootNode = new GameTreeNode<MonteCarloProperties>(gameState, new MonteCarloProperties()); GameTreeNode<MonteCarloProperties> rootNode = new GameTreeNode<MonteCarloProperties>(gameState, new MonteCarloProperties());
StateEvaluator stateEvaluator = new StateEvaluator(gameConfig);
do { do {
//TODO these return types may need to be lists for some MC methods //TODO these return types may need to be lists for some MC methods
List<GameTreeNode<MonteCarloProperties>> selectedNodes = descend(rootNode); List<GameTreeNode<MonteCarloProperties>> selectedNodes = descend(rootNode);
List<GameTreeNode<MonteCarloProperties>> newLeaves = new ArrayList<GameTreeNode<MonteCarloProperties>>(); List<GameTreeNode<MonteCarloProperties>> newLeaves = new ArrayList<GameTreeNode<MonteCarloProperties>>();
Player nextPlayer = GoGame.getColorToPlay(player, true);
for (GameTreeNode<MonteCarloProperties> selectedNode: selectedNodes) { for (GameTreeNode<MonteCarloProperties> selectedNode: selectedNodes) {
for (GameTreeNode<MonteCarloProperties> newLeaf : grow(selectedNode)) { for (GameTreeNode<MonteCarloProperties> newLeaf : grow(gameConfig, selectedNode, nextPlayer)) {
newLeaves.add(newLeaf); newLeaves.add(newLeaf);
} }
} }
for (GameTreeNode<MonteCarloProperties> newLeaf : newLeaves) { for (GameTreeNode<MonteCarloProperties> newLeaf : newLeaves) {
int reward = rollout(newLeaf); int reward = rollout(gameConfig, stateEvaluator, newLeaf, player);
update(newLeaf, reward); update(newLeaf, reward);
} }
@@ -67,9 +76,9 @@ public abstract class MonteCarlo implements Policy {
public abstract Action getBestAction(GameTreeNode<MonteCarloProperties> node); public abstract Action getBestAction(GameTreeNode<MonteCarloProperties> node);
public abstract List<GameTreeNode<MonteCarloProperties>> grow(GameTreeNode<MonteCarloProperties> node); public abstract List<GameTreeNode<MonteCarloProperties>> grow(GameConfig gameConfig, GameTreeNode<MonteCarloProperties> node, Player player);
public abstract int rollout(GameTreeNode<MonteCarloProperties> node); public abstract int rollout(GameConfig gameConfig, StateEvaluator stateEvaluator, GameTreeNode<MonteCarloProperties> node, Player player);
public abstract void update(GameTreeNode<MonteCarloProperties> node, int reward); public abstract void update(GameTreeNode<MonteCarloProperties> node, int reward);
@@ -80,4 +89,8 @@ public abstract class MonteCarlo implements Policy {
public int doRollout() { public int doRollout() {
return 0; return 0;
} }
public int getNumStateEvaluations() {
return numStateEvaluations;
}
} }

View File

@@ -1,13 +1,21 @@
package net.woodyfolsom.msproj.policy; package net.woodyfolsom.msproj.policy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.Set;
import net.woodyfolsom.msproj.Action; import net.woodyfolsom.msproj.Action;
import net.woodyfolsom.msproj.GameConfig;
import net.woodyfolsom.msproj.GameState;
import net.woodyfolsom.msproj.GoGame;
import net.woodyfolsom.msproj.Player;
import net.woodyfolsom.msproj.StateEvaluator;
import net.woodyfolsom.msproj.tree.GameTreeNode; import net.woodyfolsom.msproj.tree.GameTreeNode;
import net.woodyfolsom.msproj.tree.MonteCarloProperties; import net.woodyfolsom.msproj.tree.MonteCarloProperties;
public class MonteCarloUCT extends MonteCarlo { public class MonteCarloUCT extends MonteCarlo {
public static final double TUNING_CONSTANT = 0.50;
public MonteCarloUCT(Policy movePolicy, long searchTimeLimit) { public MonteCarloUCT(Policy movePolicy, long searchTimeLimit) {
super(movePolicy, searchTimeLimit); super(movePolicy, searchTimeLimit);
@@ -18,19 +26,19 @@ public class MonteCarloUCT extends MonteCarlo {
double bestScore = Double.NEGATIVE_INFINITY; double bestScore = Double.NEGATIVE_INFINITY;
GameTreeNode<MonteCarloProperties> bestNode = node; GameTreeNode<MonteCarloProperties> bestNode = node;
//This appears slightly redundant with getBestAction() but it is not - //TODO: WHAT TO DO if the optimum leaf node is actually a terminal node?
//descend() may pick the current node rather than a child to expand (if a child has a good score but high/low uncertainty) //from Kocsis and Szepesvari, the value of an actual terminal node is 0, unless estimated by rollout
//but getBestAction specifically asks for the optimum action to take from the current node, double nodeVisits = node.getProperties().getVisits();
//even if it results in a worse next state.
for (Action action : node.getActions()) { for (Action action : node.getActions()) {
GameTreeNode<MonteCarloProperties> childNode = node.getChild(action); GameTreeNode<MonteCarloProperties> childNode = node.getChild(action);
MonteCarloProperties properties = childNode.getProperties(); MonteCarloProperties properties = childNode.getProperties();
double childScore = (double) properties.getWins() / properties.getVisits(); double childScore = (double) properties.getWins() / properties.getVisits() + TUNING_CONSTANT * Math.log(nodeVisits) / childNode.getProperties().getVisits();
if (childScore >= bestScore) { if (childScore >= bestScore) {
bestScore = childScore; bestScore = childScore;
bestNode = childNode; bestNode = childNode;
} }
} }
@@ -64,21 +72,63 @@ public class MonteCarloUCT extends MonteCarlo {
} }
@Override @Override
public List<GameTreeNode<MonteCarloProperties>> grow(GameTreeNode<MonteCarloProperties> node) { public List<GameTreeNode<MonteCarloProperties>> grow(GameConfig gameConfig, GameTreeNode<MonteCarloProperties> node, Player player) {
// TODO Auto-generated method stub GameState nextGameState = new GameState(node.getGameState());
return null; Policy randomMovePolicy = new RandomMovePolicy();
Set<Action> exploredActions = node.getActions();
Action action = randomMovePolicy.getAction(gameConfig, node.getGameState(), exploredActions, player);
if (Action.NONE == action) {
throw new RuntimeException("Unable to grow node - are all actions already explored? Board state: " + node.getGameState() + "\nExplored actions: " + exploredActions);
}
nextGameState.playStone(player, action);
List<GameTreeNode<MonteCarloProperties>> newChildren = new ArrayList<GameTreeNode<MonteCarloProperties>>();
newChildren.add(new GameTreeNode<MonteCarloProperties>(nextGameState,new MonteCarloProperties()));
return newChildren;
} }
@Override @Override
public int rollout(GameTreeNode<MonteCarloProperties> node) { /**
// TODO Auto-generated method stub * Rollout currently depends on the hardcoded ROLLOUT_DEPTH_LIMIT superclass parameter,
return 0; * since without (super)ko detection, there is no way to guarantee a rollout will terminate.
* Even with ko detection, a rollout might take an unrealistically long time due to unlikely playouts.
*/
public int rollout(GameConfig gameConfig, StateEvaluator stateEvaluator, GameTreeNode<MonteCarloProperties> node, Player player) {
Policy randomMovePolicy = new RandomMovePolicy();
Action action;
int rolloutDepth = 0;
GameState finalGameState = new GameState(node.getGameState());
Player currentPlayer = player;
do {
rolloutDepth++;
action = randomMovePolicy.getAction(gameConfig, node.getGameState(), player);
if (action != Action.NONE) {
finalGameState.playStone(currentPlayer, action);
currentPlayer = GoGame.getColorToPlay(currentPlayer, true);
}
} while (action != Action.NONE && rolloutDepth < ROLLOUT_DEPTH_LIMIT);
if (stateEvaluator.scoreGame(finalGameState).isWinner(player)) {
return 1;
} else {
return 0;
}
} }
@Override @Override
public void update(GameTreeNode<MonteCarloProperties> node, int reward) { public void update(GameTreeNode<MonteCarloProperties> node, int reward) {
// TODO Auto-generated method stub GameTreeNode<MonteCarloProperties> currentNode = node;
while (currentNode != null) {
MonteCarloProperties nodeProperties = node.getProperties();
nodeProperties.setWins(nodeProperties.getWins() + reward);
nodeProperties.setVisits(nodeProperties.getVisits() + 1);
currentNode = currentNode.getParent();
}
} }
@Override
public Action getAction(GameConfig gameConfig, GameState gameState,
Collection<Action> prohibitedActions, Player player) {
throw new UnsupportedOperationException("Prohibited actions not supported by this class.");
}
} }

View File

@@ -1,5 +1,7 @@
package net.woodyfolsom.msproj.policy; package net.woodyfolsom.msproj.policy;
import java.util.Collection;
import net.woodyfolsom.msproj.Action; import net.woodyfolsom.msproj.Action;
import net.woodyfolsom.msproj.GameConfig; import net.woodyfolsom.msproj.GameConfig;
import net.woodyfolsom.msproj.GameState; import net.woodyfolsom.msproj.GameState;
@@ -8,4 +10,6 @@ import net.woodyfolsom.msproj.Player;
public interface Policy { public interface Policy {
public Action getAction(GameConfig gameConfig, GameState gameState, Player player); public Action getAction(GameConfig gameConfig, GameState gameState, Player player);
public Action getAction(GameConfig gameConfig, GameState gameState, Collection<Action> prohibitedActions, Player player);
public int getNumStateEvaluations();
} }

View File

@@ -1,6 +1,7 @@
package net.woodyfolsom.msproj.policy; package net.woodyfolsom.msproj.policy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection;
import java.util.List; import java.util.List;
import net.woodyfolsom.msproj.Action; import net.woodyfolsom.msproj.Action;
@@ -9,34 +10,20 @@ import net.woodyfolsom.msproj.GameState;
import net.woodyfolsom.msproj.Player; import net.woodyfolsom.msproj.Player;
public class RandomMovePolicy implements Policy { public class RandomMovePolicy implements Policy, ActionGenerator {
/** /**
* Does NOT modify the gameState. * Does NOT modify the gameState.
*/ */
public Action getAction(GameConfig gameConfig, GameState gameState, public Action getAction(GameConfig gameConfig, GameState gameState,
Player color) { Collection<Action> prohibitedMoves, Player player) {
GameState gameStateCopy = new GameState(gameState); return getActions(gameConfig, gameState, prohibitedMoves, player, 1).get(0);
List<String> emptyCoordinates = gameStateCopy.getEmptyCoords();
while (emptyCoordinates.size() > 0) {
Action randomMove = Action.getInstance(emptyCoordinates
.get((int) (Math.random() * emptyCoordinates.size())));
if (gameStateCopy.playStone(color, randomMove)) {
return randomMove;
} else {
emptyCoordinates.remove(randomMove);
}
}
return Action.PASS;
} }
/** /**
* Attempts to generate up to nMoves random moves on behalf of the specified * Attempts to generate up to nMoves random moves on behalf of the specified
* player. Will return at least one move, which may be 'pass' if random * player. Will return at least one move, which may be 'NONE' if random
* search does not success in discovering a valid move. Does NOT modify the * search does not succeeed in discovering a valid move. Does NOT modify the
* gameState. * gameState.
* *
* @param gameConfig * @param gameConfig
@@ -45,25 +32,47 @@ public class RandomMovePolicy implements Policy {
* *
* @return * @return
*/ */
public List<Action> genMoves(GameConfig gameConfig, GameState gameState, public List<Action> getActions(GameConfig gameConfig, GameState gameState,
Player color, int nMoves) { Collection<Action> prohibitedMoves, Player player, int nMoves) {
GameState gameStateCopy = new GameState(gameState); GameState gameStateCopy = new GameState(gameState);
List<String> emptyCoordinates = gameStateCopy.getEmptyCoords(); ActionGenerator actionGenerator = new ValidMoveGenerator();
List<Action> randomMoves = new ArrayList<Action>();
while (emptyCoordinates.size() > 0 && randomMoves.size() < nMoves) { List<Action> possibleActions = actionGenerator.getActions(gameConfig, gameStateCopy, prohibitedMoves, player, ActionGenerator.ALL_ACTIONS);
Action randomMove = Action.getInstance(emptyCoordinates List<Action> randomActions = new ArrayList<Action>();
.get((int) (Math.random() * emptyCoordinates.size())));
if (gameStateCopy.playStone(color, randomMove)) { while (possibleActions.size() > 0 && randomActions.size() < nMoves) {
randomMoves.add(randomMove); Action randomAction = possibleActions
} .remove((int) (Math.random() * possibleActions.size()));
emptyCoordinates.remove(randomMove);
randomActions.add(randomAction);
} }
if (randomMoves.size() == 0) { if (randomActions.size() == 0) {
randomMoves.add(Action.PASS); randomActions.add(Action.NONE);
} }
return randomMoves; return randomActions;
}
/**
* RandomMoveGenerator does not evaluate any states, but simply returns elements of
* a set of uniformly distributed, distinct valid moves.
*
* @return
*/
public int getNumStateEvaluations() {
return 0;
}
@Override
public List<Action> getActions(GameConfig gameConfig, GameState gameState,
Player color, int numActions) {
return getActions(gameConfig, gameState, new ArrayList<Action>(), color, numActions);
}
@Override
public Action getAction(GameConfig gameConfig, GameState gameState,
Player player) {
return getActions(gameConfig,gameState,player,1).get(0);
} }
} }

View File

@@ -1,6 +1,7 @@
package net.woodyfolsom.msproj.policy; package net.woodyfolsom.msproj.policy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection;
import java.util.List; import java.util.List;
import net.woodyfolsom.msproj.Action; import net.woodyfolsom.msproj.Action;
@@ -8,29 +9,36 @@ import net.woodyfolsom.msproj.GameConfig;
import net.woodyfolsom.msproj.GameState; import net.woodyfolsom.msproj.GameState;
import net.woodyfolsom.msproj.Player; import net.woodyfolsom.msproj.Player;
//import org.apache.log4j.Logger;
public class ValidMoveGenerator implements ActionGenerator { public class ValidMoveGenerator implements ActionGenerator {
//private static final Logger LOGGER = Logger.getLogger(ValidMoveGenerator.class.getName());
@Override @Override
public List<Action> getActions(GameConfig gameConfig, GameState gameState, public List<Action> getActions(GameConfig gameConfig, GameState gameState,
Player color, int nMoves) { Player color, int nMoves) {
return getActions(gameConfig, gameState, new ArrayList<Action>(),
color, nMoves);
}
@Override
public List<Action> getActions(GameConfig gameConfig, GameState gameState,
Collection<Action> prohibitedMoves, Player color, int nMoves) {
GameState gameStateCopy = new GameState(gameState); GameState gameStateCopy = new GameState(gameState);
List<String> emptyCoordinates = gameStateCopy.getEmptyCoords(); List<String> emptyCoordinates = gameStateCopy.getEmptyCoords();
List<Action> validMoves = new ArrayList<Action>(); List<Action> validMoves = new ArrayList<Action>();
while (emptyCoordinates.size() > 0) { while (emptyCoordinates.size() > 0) {
Action nextMove = Action.getInstance(emptyCoordinates.remove(emptyCoordinates.size()-1)); Action nextMove = Action.getInstance(emptyCoordinates
if (gameStateCopy.playStone(color, nextMove)) { .remove(emptyCoordinates.size() - 1));
if (!prohibitedMoves.contains(nextMove)
&& gameStateCopy.playStone(color, nextMove)) {
validMoves.add(nextMove); validMoves.add(nextMove);
gameStateCopy = new GameState(gameState); // play successful? regenerate copy of gameState gameStateCopy = new GameState(gameState); // play successful?
// regenerate copy
// of gameState
} }
} }
//Passing is always a VALID move. It may not be a GOOD move. if (!prohibitedMoves.contains(Action.PASS)) {
if (validMoves.size() == 0) {
validMoves.add(Action.PASS); validMoves.add(Action.PASS);
} }

View File

@@ -0,0 +1,23 @@
package net.woodyfolsom.msproj.tree;
public class AlphaBetaProperties extends MinimaxProperties {
double alpha = Double.NEGATIVE_INFINITY;
double beta = Double.POSITIVE_INFINITY;
public double getAlpha() {
return alpha;
}
public void setAlpha(double d) {
this.alpha = d;
}
public double getBeta() {
return beta;
}
public void setBeta(double beta) {
this.beta = beta;
}
}

View File

@@ -1,18 +0,0 @@
package net.woodyfolsom.msproj.tree;
public class AlphaBetaPropeties extends GameTreeNodeProperties{
int alpha = 0;
int beta = 0;
public int getAlpha() {
return alpha;
}
public void setAlpha(int alpha) {
this.alpha = alpha;
}
public int getBeta() {
return beta;
}
public void setBeta(int beta) {
this.beta = beta;
}
}

View File

@@ -3,15 +3,19 @@ package net.woodyfolsom.msproj.tree;
public class MonteCarloProperties extends GameTreeNodeProperties { public class MonteCarloProperties extends GameTreeNodeProperties {
int visits = 0; int visits = 0;
int wins = 0; int wins = 0;
public int getVisits() { public int getVisits() {
return visits; return visits;
} }
public void setVisits(int visits) { public void setVisits(int visits) {
this.visits = visits; this.visits = visits;
} }
public int getWins() { public int getWins() {
return wins; return wins;
} }
public void setWins(int wins) { public void setWins(int wins) {
this.wins = wins; this.wins = wins;
} }

View File

@@ -23,7 +23,9 @@ public class AlphaBetaTest {
System.out.println(gameState); System.out.println(gameState);
System.out.println("Generated move: " + move); System.out.println("Generated move: " + move);
assertEquals("Expected B3 but was: " + move, Action.getInstance("B3"), move); System.out.println("NumStateEvaluations: " + treeSearch.getNumStateEvaluations());
assertEquals(Action.getInstance("B3"), move);
gameState.playStone(Player.WHITE, move); gameState.playStone(Player.WHITE, move);
System.out.println("Final board state:"); System.out.println("Final board state:");
System.out.println(gameState); System.out.println(gameState);
@@ -43,6 +45,8 @@ public class AlphaBetaTest {
System.out.println(gameState); System.out.println(gameState);
System.out.println("Generated move: " + move); System.out.println("Generated move: " + move);
System.out.println("NumStateEvaluations: " + treeSearch.getNumStateEvaluations());
assertEquals("Expected B3 but was: " + move, Action.getInstance("B3"), move); assertEquals("Expected B3 but was: " + move, Action.getInstance("B3"), move);
gameState.playStone(Player.BLACK, move); gameState.playStone(Player.BLACK, move);

View File

@@ -24,7 +24,9 @@ public class MinimaxTest {
System.out.println(gameState); System.out.println(gameState);
System.out.println("Generated move: " + move); System.out.println("Generated move: " + move);
assertEquals("Expected B3 but was: " + move, Action.getInstance("B3"), move); System.out.println("NumStateEvaluations: " + treeSearch.getNumStateEvaluations());
assertEquals(Action.getInstance("B3"), move);
gameState.playStone(Player.WHITE, move); gameState.playStone(Player.WHITE, move);
System.out.println(gameState); System.out.println(gameState);
@@ -45,6 +47,8 @@ public class MinimaxTest {
System.out.println(gameState); System.out.println(gameState);
System.out.println("Generated move: " + move); System.out.println("Generated move: " + move);
System.out.println("NumStateEvaluations: " + treeSearch.getNumStateEvaluations());
assertEquals("Expected B3 but was: " + move, Action.getInstance("B3"), move); assertEquals("Expected B3 but was: " + move, Action.getInstance("B3"), move);
gameState.playStone(Player.BLACK, move); gameState.playStone(Player.BLACK, move);

View File

@@ -25,6 +25,8 @@ public class MonteCarloUCTTest {
System.out.println(gameState); System.out.println(gameState);
System.out.println("Generated move: " + move); System.out.println("Generated move: " + move);
System.out.println("NumStateEvaluations: " + treeSearch.getNumStateEvaluations());
assertEquals("Expected B3 but was: " + move, "B3", move); assertEquals("Expected B3 but was: " + move, "B3", move);
gameState.playStone(Player.WHITE, move); gameState.playStone(Player.WHITE, move);
@@ -45,6 +47,8 @@ public class MonteCarloUCTTest {
System.out.println(gameState); System.out.println(gameState);
System.out.println("Generated move: " + move); System.out.println("Generated move: " + move);
System.out.println("NumStateEvaluations: " + treeSearch.getNumStateEvaluations());
assertEquals("Expected B3 but was: " + move, "B3", move); assertEquals("Expected B3 but was: " + move, "B3", move);
gameState.playStone(Player.BLACK, move); gameState.playStone(Player.BLACK, move);

View File

@@ -1,6 +1,10 @@
package net.woodyfolsom.msproj.policy; package net.woodyfolsom.msproj.policy;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import java.util.ArrayList;
import java.util.List;
import net.woodyfolsom.msproj.Action; import net.woodyfolsom.msproj.Action;
import net.woodyfolsom.msproj.GameConfig; import net.woodyfolsom.msproj.GameConfig;
import net.woodyfolsom.msproj.GameState; import net.woodyfolsom.msproj.GameState;
@@ -49,7 +53,11 @@ public class RandomTest {
System.out.println("State before random WHITE move selection:"); System.out.println("State before random WHITE move selection:");
System.out.println(gameState); System.out.println(gameState);
//This is correct - checked vs. MFOG //This is correct - checked vs. MFOG
assertEquals(Action.getInstance("B3"), new RandomMovePolicy().getAction(new GameConfig(), gameState, Player.WHITE)); //PASS would otherwise be a valid move
List<Action> prohibitedMoves = new ArrayList<Action>();
prohibitedMoves.add(Action.PASS);
assertEquals(Action.getInstance("B3"), new RandomMovePolicy().getAction(new GameConfig(), gameState, prohibitedMoves, Player.WHITE));
System.out.println(gameState); System.out.println(gameState);
} }