Updated RandomMoveGenerator to support MonteCarloUCT.
Now possible to prohibit a Collection of Actions from being included in the List of returned actions (or as a single returned value). All unit tests now pass with the exception of MonteCarloUCT. TODO: playerToMove and previousPlayerPassed should be made part of the GameState. This would remove the superfluous Player parameter from many methods and make it possible to check for the "I'm ahead and my opponent is offering to end the game" killer move.
This commit is contained in:
@@ -42,8 +42,8 @@ public class GameScore {
|
||||
return (double)whiteScore + komi;
|
||||
}
|
||||
|
||||
public boolean isWinner(String color) {
|
||||
if ("w".equals(color)) {
|
||||
public boolean isWinner(Player player) {
|
||||
if (Player.WHITE == player) {
|
||||
return getWhiteScore() < NORMALIZED_ZERO_SCORE;
|
||||
} else {
|
||||
return getBlackScore() > NORMALIZED_ZERO_SCORE;
|
||||
|
||||
@@ -87,6 +87,10 @@ public class GameState {
|
||||
* @return
|
||||
*/
|
||||
public boolean playStone(Player player, Action action) {
|
||||
if (action == Action.PASS) {
|
||||
return true;
|
||||
}
|
||||
|
||||
char currentStone = gameBoard.getSymbolAt(action.getColumn(), action.getRow());
|
||||
|
||||
if (currentStone != GameBoard.EMPTY_INTERSECTION) {
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
package net.woodyfolsom.msproj.policy;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import net.woodyfolsom.msproj.Action;
|
||||
@@ -12,4 +13,7 @@ public interface ActionGenerator {
|
||||
|
||||
public List<Action> getActions(GameConfig gameConfig, GameState gameState,
|
||||
Player color, int numActions);
|
||||
|
||||
public List<Action> getActions(GameConfig gameConfig, GameState gameState,
|
||||
Collection<Action> prohibitedMoves, Player color, int numActions);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
package net.woodyfolsom.msproj.policy;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import net.woodyfolsom.msproj.Action;
|
||||
@@ -8,122 +9,173 @@ import net.woodyfolsom.msproj.GameState;
|
||||
import net.woodyfolsom.msproj.GoGame;
|
||||
import net.woodyfolsom.msproj.Player;
|
||||
import net.woodyfolsom.msproj.StateEvaluator;
|
||||
|
||||
import net.woodyfolsom.msproj.tree.AlphaBetaProperties;
|
||||
import net.woodyfolsom.msproj.tree.GameTreeNode;
|
||||
|
||||
public class AlphaBeta implements Policy {
|
||||
private static final int DEFAULT_RECURSIVE_PLAYS = 1;
|
||||
private static final int DEFAULT_LOOKAHEAD = 1;
|
||||
|
||||
private final ValidMoveGenerator validMoveGenerator = new ValidMoveGenerator();
|
||||
|
||||
private Action bestPick = Action.PASS;
|
||||
private int lookAhead;
|
||||
private int numStateEvaluations = 0;
|
||||
|
||||
public AlphaBeta() {
|
||||
this(DEFAULT_LOOKAHEAD);
|
||||
}
|
||||
|
||||
public AlphaBeta(int lookAhead) {
|
||||
this.lookAhead = lookAhead;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Action getAction(GameConfig gameConfig, GameState gameState,
|
||||
Player player) {
|
||||
|
||||
int alpha = Integer.MIN_VALUE;
|
||||
int beta = Integer.MAX_VALUE;
|
||||
numStateEvaluations = 0;
|
||||
|
||||
StateEvaluator stateEvaluator = new StateEvaluator(gameConfig);
|
||||
|
||||
GameTreeNode<AlphaBetaProperties> rootNode = new GameTreeNode<AlphaBetaProperties>(
|
||||
gameState, new AlphaBetaProperties());
|
||||
|
||||
if (player == Player.BLACK) {
|
||||
getMaxValue(gameConfig, gameState, player, false,
|
||||
DEFAULT_RECURSIVE_PLAYS * 2, alpha, beta);
|
||||
return bestPick;
|
||||
} else if (player == Player.WHITE) {
|
||||
getMinValue(gameConfig, gameState, player, false,
|
||||
DEFAULT_RECURSIVE_PLAYS * 2, alpha, beta);
|
||||
return bestPick;
|
||||
return getMax(lookAhead * 2, stateEvaluator, rootNode, player);
|
||||
} else {
|
||||
return Action.PASS;
|
||||
return getMin(lookAhead * 2, stateEvaluator, rootNode, player);
|
||||
}
|
||||
}
|
||||
|
||||
private int getMaxValue(GameConfig gameConfig, GameState gameState,
|
||||
Player initialColor, boolean playAsOpponent, int recursionLevel,
|
||||
int alpha, int beta) {
|
||||
if (terminalTest(recursionLevel)) {
|
||||
return getUtility(gameConfig, gameState);
|
||||
}
|
||||
private boolean isTerminal(int nValidMoves, int recursionLevels) {
|
||||
return recursionLevels == 0 || nValidMoves == 0;
|
||||
}
|
||||
|
||||
Player colorPlaying = GoGame.getColorToPlay(initialColor, playAsOpponent);
|
||||
private Action getMax(int recursionLevels, StateEvaluator stateEvaluator,
|
||||
GameTreeNode<AlphaBetaProperties> node, Player player) {
|
||||
|
||||
List<Action> validMoves = validMoveGenerator.getActions(gameConfig,
|
||||
gameState, colorPlaying, ActionGenerator.ALL_ACTIONS);
|
||||
GameState gameState = new GameState(node.getGameState());
|
||||
|
||||
int value = Integer.MIN_VALUE;
|
||||
List<Action> validMoves = validMoveGenerator.getActions(
|
||||
stateEvaluator.getGameConfig(), node.getGameState(), player,
|
||||
ActionGenerator.ALL_ACTIONS);
|
||||
|
||||
for (Action nextMove : validMoves) {
|
||||
GameState nextState = new GameState(gameState);
|
||||
boolean terminal = isTerminal(validMoves.size(), recursionLevels);
|
||||
|
||||
if (!nextState.playStone(colorPlaying, nextMove)) {
|
||||
throw new RuntimeException(
|
||||
"Illegal move attempted during search!");
|
||||
}
|
||||
double maxScore = Double.NEGATIVE_INFINITY;
|
||||
Action bestAction = Action.NONE;
|
||||
|
||||
int minValue = getMinValue(gameConfig, nextState, initialColor,
|
||||
!playAsOpponent, recursionLevel - 1, alpha, beta);
|
||||
if (terminal) {
|
||||
node.getProperties().setReward(
|
||||
stateEvaluator.scoreGame(gameState).getAggregateScore());
|
||||
|
||||
numStateEvaluations++;
|
||||
|
||||
return bestAction;
|
||||
} else {
|
||||
|
||||
if (minValue > value) {
|
||||
value = minValue;
|
||||
if (recursionLevel == DEFAULT_RECURSIVE_PLAYS * 2) {
|
||||
bestPick = nextMove;
|
||||
for (Action nextMove : validMoves) {
|
||||
GameState nextState = new GameState(gameState);
|
||||
nextState.playStone(player, nextMove);
|
||||
GameTreeNode<AlphaBetaProperties> childNode = new GameTreeNode<AlphaBetaProperties>(
|
||||
nextState, new AlphaBetaProperties());
|
||||
|
||||
childNode.getProperties().setAlpha(
|
||||
node.getProperties().getAlpha());
|
||||
childNode.getProperties().setBeta(
|
||||
node.getProperties().getBeta());
|
||||
|
||||
node.addChild(nextMove, childNode);
|
||||
getMin(recursionLevels - 1, stateEvaluator, childNode,
|
||||
GoGame.getColorToPlay(player, true));
|
||||
|
||||
double gameScore = childNode.getProperties().getReward();
|
||||
|
||||
if (gameScore > maxScore) {
|
||||
maxScore = gameScore;
|
||||
bestAction = nextMove;
|
||||
}
|
||||
}
|
||||
|
||||
if (value >= beta) {
|
||||
return value;
|
||||
}
|
||||
alpha = Math.max(alpha, value);
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
private int getMinValue(GameConfig gameConfig, GameState gameState,
|
||||
Player initialColor, boolean playAsOpponent, int recursionLevel,
|
||||
int alpha, int beta) {
|
||||
if (terminalTest(recursionLevel)) {
|
||||
return getUtility(gameConfig, gameState);
|
||||
}
|
||||
|
||||
Player colorPlaying = GoGame.getColorToPlay(initialColor, playAsOpponent);
|
||||
|
||||
List<Action> validMoves = validMoveGenerator.getActions(gameConfig,
|
||||
gameState, colorPlaying, ActionGenerator.ALL_ACTIONS);
|
||||
|
||||
int value = Integer.MAX_VALUE;
|
||||
|
||||
for (Action nextMove : validMoves) {
|
||||
GameState nextState = new GameState(gameState);
|
||||
|
||||
if (!nextState.playStone(colorPlaying, nextMove)) {
|
||||
throw new RuntimeException(
|
||||
"Illegal move attempted during search!");
|
||||
}
|
||||
|
||||
int maxValue = getMaxValue(gameConfig, nextState, initialColor,
|
||||
!playAsOpponent, recursionLevel - 1, alpha, beta);
|
||||
|
||||
if (maxValue < value) {
|
||||
value = maxValue;
|
||||
if (recursionLevel == 2 * DEFAULT_RECURSIVE_PLAYS) {
|
||||
bestPick = nextMove;
|
||||
if (gameScore >= node.getProperties().getBeta()) {
|
||||
node.getProperties().setReward(gameScore);
|
||||
return bestAction;
|
||||
}
|
||||
|
||||
node.getProperties().setAlpha(
|
||||
Math.max(node.getProperties().getAlpha(), maxScore));
|
||||
}
|
||||
|
||||
if (value <= alpha) {
|
||||
return value;
|
||||
}
|
||||
beta = Math.min(beta, value);
|
||||
node.getProperties().setReward(maxScore);
|
||||
return bestAction;
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
private boolean terminalTest(int recursionLevel) {
|
||||
return recursionLevel < 1;
|
||||
}
|
||||
private Action getMin(int recursionLevels, StateEvaluator stateEvaluator,
|
||||
GameTreeNode<AlphaBetaProperties> node, Player player) {
|
||||
|
||||
private int getUtility(GameConfig gameConfig, GameState gameState) {
|
||||
StateEvaluator stateEvaluator = new StateEvaluator(gameConfig);
|
||||
return stateEvaluator.scoreGame(gameState).getAggregateScore();
|
||||
GameState gameState = new GameState(node.getGameState());
|
||||
|
||||
List<Action> validMoves = validMoveGenerator.getActions(
|
||||
stateEvaluator.getGameConfig(), node.getGameState(), player,
|
||||
ActionGenerator.ALL_ACTIONS);
|
||||
|
||||
boolean terminal = isTerminal(validMoves.size(), recursionLevels);
|
||||
|
||||
double minScore = Double.POSITIVE_INFINITY;
|
||||
Action bestAction = Action.NONE;
|
||||
|
||||
if (terminal) {
|
||||
node.getProperties().setReward(
|
||||
stateEvaluator.scoreGame(gameState).getAggregateScore());
|
||||
|
||||
numStateEvaluations++;
|
||||
|
||||
return bestAction;
|
||||
} else {
|
||||
|
||||
for (Action nextMove : validMoves) {
|
||||
GameState nextState = new GameState(gameState);
|
||||
nextState.playStone(player, nextMove);
|
||||
GameTreeNode<AlphaBetaProperties> childNode = new GameTreeNode<AlphaBetaProperties>(
|
||||
nextState, new AlphaBetaProperties());
|
||||
|
||||
childNode.getProperties().setAlpha(
|
||||
node.getProperties().getAlpha());
|
||||
childNode.getProperties().setBeta(
|
||||
node.getProperties().getBeta());
|
||||
|
||||
node.addChild(nextMove, childNode);
|
||||
getMax(recursionLevels - 1, stateEvaluator, childNode,
|
||||
GoGame.getColorToPlay(player, true));
|
||||
|
||||
double gameScore = childNode.getProperties().getReward();
|
||||
|
||||
if (gameScore < minScore) {
|
||||
minScore = gameScore;
|
||||
bestAction = nextMove;
|
||||
}
|
||||
|
||||
if (gameScore <= node.getProperties().getAlpha()) {
|
||||
node.getProperties().setReward(gameScore);
|
||||
return bestAction;
|
||||
}
|
||||
|
||||
node.getProperties().setBeta(
|
||||
Math.min(node.getProperties().getBeta(), minScore));
|
||||
}
|
||||
|
||||
node.getProperties().setReward(minScore);
|
||||
return bestAction;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumStateEvaluations() {
|
||||
return numStateEvaluations;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Action getAction(GameConfig gameConfig, GameState gameState,
|
||||
Collection<Action> prohibitedActions, Player player) {
|
||||
throw new UnsupportedOperationException("Prohibited actions not supported by this class.");
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
package net.woodyfolsom.msproj.policy;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import net.woodyfolsom.msproj.Action;
|
||||
@@ -13,119 +14,141 @@ import net.woodyfolsom.msproj.tree.MinimaxProperties;
|
||||
|
||||
public class Minimax implements Policy {
|
||||
private static final int DEFAULT_LOOKAHEAD = 1;
|
||||
|
||||
|
||||
private final ValidMoveGenerator validMoveGenerator = new ValidMoveGenerator();
|
||||
|
||||
private int lookAhead;
|
||||
|
||||
private int numStateEvaluations = 0;
|
||||
|
||||
public Minimax() {
|
||||
this(DEFAULT_LOOKAHEAD);
|
||||
}
|
||||
|
||||
|
||||
public Minimax(int lookAhead) {
|
||||
this.lookAhead = lookAhead;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Action getAction(GameConfig gameConfig, GameState gameState,
|
||||
Player player) {
|
||||
numStateEvaluations = 0;
|
||||
|
||||
StateEvaluator stateEvaluator = new StateEvaluator(gameConfig);
|
||||
|
||||
GameTreeNode<MinimaxProperties> rootNode = new GameTreeNode<MinimaxProperties>(gameState, new MinimaxProperties());
|
||||
|
||||
|
||||
GameTreeNode<MinimaxProperties> rootNode = new GameTreeNode<MinimaxProperties>(
|
||||
gameState, new MinimaxProperties());
|
||||
|
||||
if (player == Player.BLACK) {
|
||||
return getMax(
|
||||
lookAhead * 2,
|
||||
stateEvaluator,
|
||||
rootNode,
|
||||
player);
|
||||
return getMax(lookAhead * 2, stateEvaluator, rootNode, player);
|
||||
} else {
|
||||
return getMin(
|
||||
lookAhead * 2,
|
||||
stateEvaluator,
|
||||
rootNode,
|
||||
player);
|
||||
return getMin(lookAhead * 2, stateEvaluator, rootNode, player);
|
||||
}
|
||||
}
|
||||
|
||||
private Action getMax(int recursionLevels,
|
||||
StateEvaluator stateEvaluator,
|
||||
GameTreeNode<MinimaxProperties> node,
|
||||
Player player) {
|
||||
private boolean isTerminal(int nValidMoves, int recursionLevels) {
|
||||
return recursionLevels == 0 || nValidMoves == 0;
|
||||
}
|
||||
|
||||
private Action getMax(int recursionLevels, StateEvaluator stateEvaluator,
|
||||
GameTreeNode<MinimaxProperties> node, Player player) {
|
||||
|
||||
GameState gameState = new GameState(node.getGameState());
|
||||
|
||||
List<Action> validMoves = validMoveGenerator.getActions(stateEvaluator.getGameConfig(),
|
||||
node.getGameState(), player, ActionGenerator.ALL_ACTIONS);
|
||||
|
||||
for (Action nextMove : validMoves) {
|
||||
GameState nextState = new GameState(gameState);
|
||||
nextState.playStone(player, nextMove);
|
||||
GameTreeNode<MinimaxProperties> childNode = new GameTreeNode<MinimaxProperties>(nextState, new MinimaxProperties());
|
||||
node.addChild(nextMove, childNode);
|
||||
if (recursionLevels > 1) {
|
||||
getMin(recursionLevels - 1, stateEvaluator, childNode, GoGame.getColorToPlay(player, true));
|
||||
} else {
|
||||
//tail condition - set reward of this leaf node
|
||||
childNode.getProperties().setReward(stateEvaluator.scoreGame(nextState).getAggregateScore());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
List<Action> validMoves = validMoveGenerator.getActions(
|
||||
stateEvaluator.getGameConfig(), node.getGameState(), player,
|
||||
ActionGenerator.ALL_ACTIONS);
|
||||
|
||||
boolean terminal = isTerminal(validMoves.size(), recursionLevels);
|
||||
|
||||
double maxScore = Double.NEGATIVE_INFINITY;
|
||||
Action bestAction = Action.NONE;
|
||||
|
||||
for (Action nextMove : validMoves) {
|
||||
GameTreeNode<MinimaxProperties> childNode = node.getChild(nextMove);
|
||||
double gameScore = childNode.getProperties().getReward();
|
||||
|
||||
if (terminal) {
|
||||
node.getProperties().setReward(
|
||||
stateEvaluator.scoreGame(gameState).getAggregateScore());
|
||||
|
||||
if (gameScore > maxScore) {
|
||||
maxScore = gameScore;
|
||||
bestAction = nextMove;
|
||||
numStateEvaluations++;
|
||||
|
||||
return bestAction;
|
||||
} else {
|
||||
|
||||
for (Action nextMove : validMoves) {
|
||||
GameState nextState = new GameState(gameState);
|
||||
nextState.playStone(player, nextMove);
|
||||
GameTreeNode<MinimaxProperties> childNode = new GameTreeNode<MinimaxProperties>(
|
||||
nextState, new MinimaxProperties());
|
||||
node.addChild(nextMove, childNode);
|
||||
|
||||
getMin(recursionLevels - 1, stateEvaluator, childNode,
|
||||
GoGame.getColorToPlay(player, true));
|
||||
|
||||
double gameScore = childNode.getProperties().getReward();
|
||||
|
||||
if (gameScore > maxScore) {
|
||||
maxScore = gameScore;
|
||||
bestAction = nextMove;
|
||||
}
|
||||
}
|
||||
|
||||
node.getProperties().setReward(maxScore);
|
||||
return bestAction;
|
||||
}
|
||||
|
||||
node.getProperties().setReward(maxScore);
|
||||
return bestAction;
|
||||
}
|
||||
|
||||
private Action getMin(int recursionLevels,
|
||||
StateEvaluator stateEvaluator,
|
||||
GameTreeNode<MinimaxProperties> node,
|
||||
Player player) {
|
||||
|
||||
private Action getMin(int recursionLevels, StateEvaluator stateEvaluator,
|
||||
GameTreeNode<MinimaxProperties> node, Player player) {
|
||||
|
||||
GameState gameState = new GameState(node.getGameState());
|
||||
|
||||
List<Action> validMoves = validMoveGenerator.getActions(stateEvaluator.getGameConfig(),
|
||||
node.getGameState(), player, ActionGenerator.ALL_ACTIONS);
|
||||
|
||||
for (Action nextMove : validMoves) {
|
||||
GameState nextState = new GameState(gameState);
|
||||
nextState.playStone(player, nextMove);
|
||||
GameTreeNode<MinimaxProperties> childNode = new GameTreeNode<MinimaxProperties>(nextState, new MinimaxProperties());
|
||||
node.addChild(nextMove, childNode);
|
||||
if (recursionLevels > 1) {
|
||||
getMax(recursionLevels - 1, stateEvaluator, childNode, GoGame.getColorToPlay(player, true));
|
||||
} else {
|
||||
//tail condition - set reward of this leaf node
|
||||
childNode.getProperties().setReward(stateEvaluator.scoreGame(nextState).getAggregateScore());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
List<Action> validMoves = validMoveGenerator.getActions(
|
||||
stateEvaluator.getGameConfig(), node.getGameState(), player,
|
||||
ActionGenerator.ALL_ACTIONS);
|
||||
|
||||
boolean terminal = isTerminal(validMoves.size(), recursionLevels);
|
||||
|
||||
double minScore = Double.POSITIVE_INFINITY;
|
||||
Action bestAction = Action.NONE;
|
||||
|
||||
for (Action nextMove : validMoves) {
|
||||
GameTreeNode<MinimaxProperties> childNode = node.getChild(nextMove);
|
||||
double gameScore = childNode.getProperties().getReward();
|
||||
|
||||
if (terminal) {
|
||||
node.getProperties().setReward(
|
||||
stateEvaluator.scoreGame(gameState).getAggregateScore());
|
||||
|
||||
if (gameScore < minScore) {
|
||||
minScore = gameScore;
|
||||
bestAction = nextMove;
|
||||
numStateEvaluations++;
|
||||
|
||||
return bestAction;
|
||||
} else {
|
||||
|
||||
for (Action nextMove : validMoves) {
|
||||
GameState nextState = new GameState(gameState);
|
||||
nextState.playStone(player, nextMove);
|
||||
GameTreeNode<MinimaxProperties> childNode = new GameTreeNode<MinimaxProperties>(
|
||||
nextState, new MinimaxProperties());
|
||||
node.addChild(nextMove, childNode);
|
||||
|
||||
getMax(recursionLevels - 1, stateEvaluator, childNode,
|
||||
GoGame.getColorToPlay(player, true));
|
||||
|
||||
double gameScore = childNode.getProperties().getReward();
|
||||
|
||||
if (gameScore < minScore) {
|
||||
minScore = gameScore;
|
||||
bestAction = nextMove;
|
||||
}
|
||||
}
|
||||
|
||||
node.getProperties().setReward(minScore);
|
||||
return bestAction;
|
||||
}
|
||||
|
||||
node.getProperties().setReward(minScore);
|
||||
return bestAction;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumStateEvaluations() {
|
||||
return numStateEvaluations;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Action getAction(GameConfig gameConfig, GameState gameState,
|
||||
Collection<Action> prohibitedActions, Player player) {
|
||||
throw new UnsupportedOperationException("Prohibited actions not supported by this class.");
|
||||
}
|
||||
}
|
||||
@@ -6,12 +6,18 @@ import java.util.List;
|
||||
import net.woodyfolsom.msproj.Action;
|
||||
import net.woodyfolsom.msproj.GameConfig;
|
||||
import net.woodyfolsom.msproj.GameState;
|
||||
import net.woodyfolsom.msproj.GoGame;
|
||||
import net.woodyfolsom.msproj.Player;
|
||||
import net.woodyfolsom.msproj.StateEvaluator;
|
||||
import net.woodyfolsom.msproj.tree.GameTreeNode;
|
||||
import net.woodyfolsom.msproj.tree.MonteCarloProperties;
|
||||
|
||||
public abstract class MonteCarlo implements Policy {
|
||||
protected static final int ROLLOUT_DEPTH_LIMIT = 20;
|
||||
|
||||
protected int numStateEvaluations = 0;
|
||||
protected Policy movePolicy;
|
||||
|
||||
protected long searchTimeLimit;
|
||||
protected volatile long elapsedTime = 0L;
|
||||
|
||||
@@ -30,7 +36,7 @@ public abstract class MonteCarlo implements Policy {
|
||||
|
||||
@Override
|
||||
public Action getAction(GameConfig gameConfig, GameState gameState,
|
||||
Player initialColor) {
|
||||
Player player) {
|
||||
long startTime = System.currentTimeMillis();
|
||||
|
||||
//If for some reason no moves are evaluated within the time limit, pass.
|
||||
@@ -38,20 +44,23 @@ public abstract class MonteCarlo implements Policy {
|
||||
//result in a win.
|
||||
|
||||
GameTreeNode<MonteCarloProperties> rootNode = new GameTreeNode<MonteCarloProperties>(gameState, new MonteCarloProperties());
|
||||
|
||||
StateEvaluator stateEvaluator = new StateEvaluator(gameConfig);
|
||||
do {
|
||||
|
||||
//TODO these return types may need to be lists for some MC methods
|
||||
List<GameTreeNode<MonteCarloProperties>> selectedNodes = descend(rootNode);
|
||||
List<GameTreeNode<MonteCarloProperties>> newLeaves = new ArrayList<GameTreeNode<MonteCarloProperties>>();
|
||||
|
||||
Player nextPlayer = GoGame.getColorToPlay(player, true);
|
||||
|
||||
for (GameTreeNode<MonteCarloProperties> selectedNode: selectedNodes) {
|
||||
for (GameTreeNode<MonteCarloProperties> newLeaf : grow(selectedNode)) {
|
||||
for (GameTreeNode<MonteCarloProperties> newLeaf : grow(gameConfig, selectedNode, nextPlayer)) {
|
||||
newLeaves.add(newLeaf);
|
||||
}
|
||||
}
|
||||
|
||||
for (GameTreeNode<MonteCarloProperties> newLeaf : newLeaves) {
|
||||
int reward = rollout(newLeaf);
|
||||
int reward = rollout(gameConfig, stateEvaluator, newLeaf, player);
|
||||
update(newLeaf, reward);
|
||||
}
|
||||
|
||||
@@ -67,9 +76,9 @@ public abstract class MonteCarlo implements Policy {
|
||||
|
||||
public abstract Action getBestAction(GameTreeNode<MonteCarloProperties> node);
|
||||
|
||||
public abstract List<GameTreeNode<MonteCarloProperties>> grow(GameTreeNode<MonteCarloProperties> node);
|
||||
public abstract List<GameTreeNode<MonteCarloProperties>> grow(GameConfig gameConfig, GameTreeNode<MonteCarloProperties> node, Player player);
|
||||
|
||||
public abstract int rollout(GameTreeNode<MonteCarloProperties> node);
|
||||
public abstract int rollout(GameConfig gameConfig, StateEvaluator stateEvaluator, GameTreeNode<MonteCarloProperties> node, Player player);
|
||||
|
||||
public abstract void update(GameTreeNode<MonteCarloProperties> node, int reward);
|
||||
|
||||
@@ -80,4 +89,8 @@ public abstract class MonteCarlo implements Policy {
|
||||
public int doRollout() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
public int getNumStateEvaluations() {
|
||||
return numStateEvaluations;
|
||||
}
|
||||
}
|
||||
@@ -1,36 +1,44 @@
|
||||
package net.woodyfolsom.msproj.policy;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import net.woodyfolsom.msproj.Action;
|
||||
import net.woodyfolsom.msproj.GameConfig;
|
||||
import net.woodyfolsom.msproj.GameState;
|
||||
import net.woodyfolsom.msproj.GoGame;
|
||||
import net.woodyfolsom.msproj.Player;
|
||||
import net.woodyfolsom.msproj.StateEvaluator;
|
||||
import net.woodyfolsom.msproj.tree.GameTreeNode;
|
||||
import net.woodyfolsom.msproj.tree.MonteCarloProperties;
|
||||
|
||||
public class MonteCarloUCT extends MonteCarlo {
|
||||
public static final double TUNING_CONSTANT = 0.50;
|
||||
|
||||
public MonteCarloUCT(Policy movePolicy, long searchTimeLimit) {
|
||||
super(movePolicy, searchTimeLimit);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<GameTreeNode<MonteCarloProperties>> descend(GameTreeNode<MonteCarloProperties> node) {
|
||||
double bestScore = Double.NEGATIVE_INFINITY;
|
||||
GameTreeNode<MonteCarloProperties> bestNode = node;
|
||||
|
||||
//This appears slightly redundant with getBestAction() but it is not -
|
||||
//descend() may pick the current node rather than a child to expand (if a child has a good score but high/low uncertainty)
|
||||
//but getBestAction specifically asks for the optimum action to take from the current node,
|
||||
//even if it results in a worse next state.
|
||||
//TODO: WHAT TO DO if the optimum leaf node is actually a terminal node?
|
||||
//from Kocsis and Szepesvari, the value of an actual terminal node is 0, unless estimated by rollout
|
||||
double nodeVisits = node.getProperties().getVisits();
|
||||
|
||||
for (Action action : node.getActions()) {
|
||||
GameTreeNode<MonteCarloProperties> childNode = node.getChild(action);
|
||||
|
||||
MonteCarloProperties properties = childNode.getProperties();
|
||||
double childScore = (double) properties.getWins() / properties.getVisits();
|
||||
double childScore = (double) properties.getWins() / properties.getVisits() + TUNING_CONSTANT * Math.log(nodeVisits) / childNode.getProperties().getVisits();
|
||||
|
||||
if (childScore >= bestScore) {
|
||||
bestScore = childScore;
|
||||
bestNode = childNode;
|
||||
bestScore = childScore;
|
||||
bestNode = childNode;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -64,21 +72,63 @@ public class MonteCarloUCT extends MonteCarlo {
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<GameTreeNode<MonteCarloProperties>> grow(GameTreeNode<MonteCarloProperties> node) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
public List<GameTreeNode<MonteCarloProperties>> grow(GameConfig gameConfig, GameTreeNode<MonteCarloProperties> node, Player player) {
|
||||
GameState nextGameState = new GameState(node.getGameState());
|
||||
Policy randomMovePolicy = new RandomMovePolicy();
|
||||
Set<Action> exploredActions = node.getActions();
|
||||
Action action = randomMovePolicy.getAction(gameConfig, node.getGameState(), exploredActions, player);
|
||||
if (Action.NONE == action) {
|
||||
throw new RuntimeException("Unable to grow node - are all actions already explored? Board state: " + node.getGameState() + "\nExplored actions: " + exploredActions);
|
||||
}
|
||||
nextGameState.playStone(player, action);
|
||||
List<GameTreeNode<MonteCarloProperties>> newChildren = new ArrayList<GameTreeNode<MonteCarloProperties>>();
|
||||
newChildren.add(new GameTreeNode<MonteCarloProperties>(nextGameState,new MonteCarloProperties()));
|
||||
return newChildren;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int rollout(GameTreeNode<MonteCarloProperties> node) {
|
||||
// TODO Auto-generated method stub
|
||||
return 0;
|
||||
/**
|
||||
* Rollout currently depends on the hardcoded ROLLOUT_DEPTH_LIMIT superclass parameter,
|
||||
* since without (super)ko detection, there is no way to guarantee a rollout will terminate.
|
||||
* Even with ko detection, a rollout might take an unrealistically long time due to unlikely playouts.
|
||||
*/
|
||||
public int rollout(GameConfig gameConfig, StateEvaluator stateEvaluator, GameTreeNode<MonteCarloProperties> node, Player player) {
|
||||
Policy randomMovePolicy = new RandomMovePolicy();
|
||||
|
||||
Action action;
|
||||
int rolloutDepth = 0;
|
||||
GameState finalGameState = new GameState(node.getGameState());
|
||||
Player currentPlayer = player;
|
||||
do {
|
||||
rolloutDepth++;
|
||||
action = randomMovePolicy.getAction(gameConfig, node.getGameState(), player);
|
||||
if (action != Action.NONE) {
|
||||
finalGameState.playStone(currentPlayer, action);
|
||||
currentPlayer = GoGame.getColorToPlay(currentPlayer, true);
|
||||
}
|
||||
} while (action != Action.NONE && rolloutDepth < ROLLOUT_DEPTH_LIMIT);
|
||||
|
||||
if (stateEvaluator.scoreGame(finalGameState).isWinner(player)) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void update(GameTreeNode<MonteCarloProperties> node, int reward) {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
GameTreeNode<MonteCarloProperties> currentNode = node;
|
||||
while (currentNode != null) {
|
||||
MonteCarloProperties nodeProperties = node.getProperties();
|
||||
nodeProperties.setWins(nodeProperties.getWins() + reward);
|
||||
nodeProperties.setVisits(nodeProperties.getVisits() + 1);
|
||||
currentNode = currentNode.getParent();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Action getAction(GameConfig gameConfig, GameState gameState,
|
||||
Collection<Action> prohibitedActions, Player player) {
|
||||
throw new UnsupportedOperationException("Prohibited actions not supported by this class.");
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,5 +1,7 @@
|
||||
package net.woodyfolsom.msproj.policy;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
import net.woodyfolsom.msproj.Action;
|
||||
import net.woodyfolsom.msproj.GameConfig;
|
||||
import net.woodyfolsom.msproj.GameState;
|
||||
@@ -8,4 +10,6 @@ import net.woodyfolsom.msproj.Player;
|
||||
|
||||
public interface Policy {
|
||||
public Action getAction(GameConfig gameConfig, GameState gameState, Player player);
|
||||
public Action getAction(GameConfig gameConfig, GameState gameState, Collection<Action> prohibitedActions, Player player);
|
||||
public int getNumStateEvaluations();
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
package net.woodyfolsom.msproj.policy;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import net.woodyfolsom.msproj.Action;
|
||||
@@ -9,34 +10,20 @@ import net.woodyfolsom.msproj.GameState;
|
||||
import net.woodyfolsom.msproj.Player;
|
||||
|
||||
|
||||
public class RandomMovePolicy implements Policy {
|
||||
public class RandomMovePolicy implements Policy, ActionGenerator {
|
||||
|
||||
/**
|
||||
* Does NOT modify the gameState.
|
||||
*/
|
||||
public Action getAction(GameConfig gameConfig, GameState gameState,
|
||||
Player color) {
|
||||
GameState gameStateCopy = new GameState(gameState);
|
||||
List<String> emptyCoordinates = gameStateCopy.getEmptyCoords();
|
||||
|
||||
while (emptyCoordinates.size() > 0) {
|
||||
Action randomMove = Action.getInstance(emptyCoordinates
|
||||
.get((int) (Math.random() * emptyCoordinates.size())));
|
||||
|
||||
if (gameStateCopy.playStone(color, randomMove)) {
|
||||
return randomMove;
|
||||
} else {
|
||||
emptyCoordinates.remove(randomMove);
|
||||
}
|
||||
}
|
||||
|
||||
return Action.PASS;
|
||||
Collection<Action> prohibitedMoves, Player player) {
|
||||
return getActions(gameConfig, gameState, prohibitedMoves, player, 1).get(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to generate up to nMoves random moves on behalf of the specified
|
||||
* player. Will return at least one move, which may be 'pass' if random
|
||||
* search does not success in discovering a valid move. Does NOT modify the
|
||||
* player. Will return at least one move, which may be 'NONE' if random
|
||||
* search does not succeeed in discovering a valid move. Does NOT modify the
|
||||
* gameState.
|
||||
*
|
||||
* @param gameConfig
|
||||
@@ -45,25 +32,47 @@ public class RandomMovePolicy implements Policy {
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public List<Action> genMoves(GameConfig gameConfig, GameState gameState,
|
||||
Player color, int nMoves) {
|
||||
public List<Action> getActions(GameConfig gameConfig, GameState gameState,
|
||||
Collection<Action> prohibitedMoves, Player player, int nMoves) {
|
||||
GameState gameStateCopy = new GameState(gameState);
|
||||
List<String> emptyCoordinates = gameStateCopy.getEmptyCoords();
|
||||
List<Action> randomMoves = new ArrayList<Action>();
|
||||
|
||||
while (emptyCoordinates.size() > 0 && randomMoves.size() < nMoves) {
|
||||
Action randomMove = Action.getInstance(emptyCoordinates
|
||||
.get((int) (Math.random() * emptyCoordinates.size())));
|
||||
if (gameStateCopy.playStone(color, randomMove)) {
|
||||
randomMoves.add(randomMove);
|
||||
}
|
||||
emptyCoordinates.remove(randomMove);
|
||||
ActionGenerator actionGenerator = new ValidMoveGenerator();
|
||||
|
||||
List<Action> possibleActions = actionGenerator.getActions(gameConfig, gameStateCopy, prohibitedMoves, player, ActionGenerator.ALL_ACTIONS);
|
||||
List<Action> randomActions = new ArrayList<Action>();
|
||||
|
||||
while (possibleActions.size() > 0 && randomActions.size() < nMoves) {
|
||||
Action randomAction = possibleActions
|
||||
.remove((int) (Math.random() * possibleActions.size()));
|
||||
|
||||
randomActions.add(randomAction);
|
||||
}
|
||||
|
||||
if (randomMoves.size() == 0) {
|
||||
randomMoves.add(Action.PASS);
|
||||
if (randomActions.size() == 0) {
|
||||
randomActions.add(Action.NONE);
|
||||
}
|
||||
|
||||
return randomMoves;
|
||||
|
||||
return randomActions;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* RandomMoveGenerator does not evaluate any states, but simply returns elements of
|
||||
* a set of uniformly distributed, distinct valid moves.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public int getNumStateEvaluations() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Action> getActions(GameConfig gameConfig, GameState gameState,
|
||||
Player color, int numActions) {
|
||||
return getActions(gameConfig, gameState, new ArrayList<Action>(), color, numActions);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Action getAction(GameConfig gameConfig, GameState gameState,
|
||||
Player player) {
|
||||
return getActions(gameConfig,gameState,player,1).get(0);
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
package net.woodyfolsom.msproj.policy;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import net.woodyfolsom.msproj.Action;
|
||||
@@ -8,29 +9,36 @@ import net.woodyfolsom.msproj.GameConfig;
|
||||
import net.woodyfolsom.msproj.GameState;
|
||||
import net.woodyfolsom.msproj.Player;
|
||||
|
||||
//import org.apache.log4j.Logger;
|
||||
|
||||
public class ValidMoveGenerator implements ActionGenerator {
|
||||
//private static final Logger LOGGER = Logger.getLogger(ValidMoveGenerator.class.getName());
|
||||
|
||||
@Override
|
||||
public List<Action> getActions(GameConfig gameConfig, GameState gameState,
|
||||
Player color, int nMoves) {
|
||||
|
||||
return getActions(gameConfig, gameState, new ArrayList<Action>(),
|
||||
color, nMoves);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Action> getActions(GameConfig gameConfig, GameState gameState,
|
||||
Collection<Action> prohibitedMoves, Player color, int nMoves) {
|
||||
|
||||
GameState gameStateCopy = new GameState(gameState);
|
||||
List<String> emptyCoordinates = gameStateCopy.getEmptyCoords();
|
||||
List<Action> validMoves = new ArrayList<Action>();
|
||||
|
||||
while (emptyCoordinates.size() > 0) {
|
||||
Action nextMove = Action.getInstance(emptyCoordinates.remove(emptyCoordinates.size()-1));
|
||||
if (gameStateCopy.playStone(color, nextMove)) {
|
||||
Action nextMove = Action.getInstance(emptyCoordinates
|
||||
.remove(emptyCoordinates.size() - 1));
|
||||
if (!prohibitedMoves.contains(nextMove)
|
||||
&& gameStateCopy.playStone(color, nextMove)) {
|
||||
validMoves.add(nextMove);
|
||||
gameStateCopy = new GameState(gameState); // play successful? regenerate copy of gameState
|
||||
gameStateCopy = new GameState(gameState); // play successful?
|
||||
// regenerate copy
|
||||
// of gameState
|
||||
}
|
||||
}
|
||||
|
||||
//Passing is always a VALID move. It may not be a GOOD move.
|
||||
if (validMoves.size() == 0) {
|
||||
if (!prohibitedMoves.contains(Action.PASS)) {
|
||||
validMoves.add(Action.PASS);
|
||||
}
|
||||
|
||||
|
||||
23
src/net/woodyfolsom/msproj/tree/AlphaBetaProperties.java
Normal file
23
src/net/woodyfolsom/msproj/tree/AlphaBetaProperties.java
Normal file
@@ -0,0 +1,23 @@
|
||||
package net.woodyfolsom.msproj.tree;
|
||||
|
||||
public class AlphaBetaProperties extends MinimaxProperties {
|
||||
|
||||
double alpha = Double.NEGATIVE_INFINITY;
|
||||
double beta = Double.POSITIVE_INFINITY;
|
||||
|
||||
public double getAlpha() {
|
||||
return alpha;
|
||||
}
|
||||
|
||||
public void setAlpha(double d) {
|
||||
this.alpha = d;
|
||||
}
|
||||
|
||||
public double getBeta() {
|
||||
return beta;
|
||||
}
|
||||
|
||||
public void setBeta(double beta) {
|
||||
this.beta = beta;
|
||||
}
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
package net.woodyfolsom.msproj.tree;
|
||||
|
||||
public class AlphaBetaPropeties extends GameTreeNodeProperties{
|
||||
int alpha = 0;
|
||||
int beta = 0;
|
||||
public int getAlpha() {
|
||||
return alpha;
|
||||
}
|
||||
public void setAlpha(int alpha) {
|
||||
this.alpha = alpha;
|
||||
}
|
||||
public int getBeta() {
|
||||
return beta;
|
||||
}
|
||||
public void setBeta(int beta) {
|
||||
this.beta = beta;
|
||||
}
|
||||
}
|
||||
@@ -3,15 +3,19 @@ package net.woodyfolsom.msproj.tree;
|
||||
public class MonteCarloProperties extends GameTreeNodeProperties {
|
||||
int visits = 0;
|
||||
int wins = 0;
|
||||
|
||||
public int getVisits() {
|
||||
return visits;
|
||||
}
|
||||
|
||||
public void setVisits(int visits) {
|
||||
this.visits = visits;
|
||||
}
|
||||
|
||||
public int getWins() {
|
||||
return wins;
|
||||
}
|
||||
|
||||
public void setWins(int wins) {
|
||||
this.wins = wins;
|
||||
}
|
||||
|
||||
@@ -23,7 +23,9 @@ public class AlphaBetaTest {
|
||||
System.out.println(gameState);
|
||||
|
||||
System.out.println("Generated move: " + move);
|
||||
assertEquals("Expected B3 but was: " + move, Action.getInstance("B3"), move);
|
||||
System.out.println("NumStateEvaluations: " + treeSearch.getNumStateEvaluations());
|
||||
|
||||
assertEquals(Action.getInstance("B3"), move);
|
||||
gameState.playStone(Player.WHITE, move);
|
||||
System.out.println("Final board state:");
|
||||
System.out.println(gameState);
|
||||
@@ -43,6 +45,8 @@ public class AlphaBetaTest {
|
||||
System.out.println(gameState);
|
||||
|
||||
System.out.println("Generated move: " + move);
|
||||
System.out.println("NumStateEvaluations: " + treeSearch.getNumStateEvaluations());
|
||||
|
||||
assertEquals("Expected B3 but was: " + move, Action.getInstance("B3"), move);
|
||||
gameState.playStone(Player.BLACK, move);
|
||||
|
||||
|
||||
@@ -24,7 +24,9 @@ public class MinimaxTest {
|
||||
System.out.println(gameState);
|
||||
|
||||
System.out.println("Generated move: " + move);
|
||||
assertEquals("Expected B3 but was: " + move, Action.getInstance("B3"), move);
|
||||
System.out.println("NumStateEvaluations: " + treeSearch.getNumStateEvaluations());
|
||||
|
||||
assertEquals(Action.getInstance("B3"), move);
|
||||
gameState.playStone(Player.WHITE, move);
|
||||
|
||||
System.out.println(gameState);
|
||||
@@ -45,6 +47,8 @@ public class MinimaxTest {
|
||||
System.out.println(gameState);
|
||||
|
||||
System.out.println("Generated move: " + move);
|
||||
System.out.println("NumStateEvaluations: " + treeSearch.getNumStateEvaluations());
|
||||
|
||||
assertEquals("Expected B3 but was: " + move, Action.getInstance("B3"), move);
|
||||
gameState.playStone(Player.BLACK, move);
|
||||
|
||||
|
||||
@@ -25,6 +25,8 @@ public class MonteCarloUCTTest {
|
||||
System.out.println(gameState);
|
||||
|
||||
System.out.println("Generated move: " + move);
|
||||
System.out.println("NumStateEvaluations: " + treeSearch.getNumStateEvaluations());
|
||||
|
||||
assertEquals("Expected B3 but was: " + move, "B3", move);
|
||||
gameState.playStone(Player.WHITE, move);
|
||||
|
||||
@@ -45,6 +47,8 @@ public class MonteCarloUCTTest {
|
||||
System.out.println(gameState);
|
||||
|
||||
System.out.println("Generated move: " + move);
|
||||
System.out.println("NumStateEvaluations: " + treeSearch.getNumStateEvaluations());
|
||||
|
||||
assertEquals("Expected B3 but was: " + move, "B3", move);
|
||||
gameState.playStone(Player.BLACK, move);
|
||||
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
package net.woodyfolsom.msproj.policy;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import net.woodyfolsom.msproj.Action;
|
||||
import net.woodyfolsom.msproj.GameConfig;
|
||||
import net.woodyfolsom.msproj.GameState;
|
||||
@@ -49,7 +53,11 @@ public class RandomTest {
|
||||
System.out.println("State before random WHITE move selection:");
|
||||
System.out.println(gameState);
|
||||
//This is correct - checked vs. MFOG
|
||||
assertEquals(Action.getInstance("B3"), new RandomMovePolicy().getAction(new GameConfig(), gameState, Player.WHITE));
|
||||
//PASS would otherwise be a valid move
|
||||
List<Action> prohibitedMoves = new ArrayList<Action>();
|
||||
prohibitedMoves.add(Action.PASS);
|
||||
|
||||
assertEquals(Action.getInstance("B3"), new RandomMovePolicy().getAction(new GameConfig(), gameState, prohibitedMoves, Player.WHITE));
|
||||
|
||||
System.out.println(gameState);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user