Updated RandomMoveGenerator to support MonteCarloUCT.

Now possible to prohibit a Collection of Actions from being included in the List of returned actions (or as a single returned value).
All unit tests now pass with the exception of MonteCarloUCT.

TODO: playerToMove and previousPlayerPassed should be made part of the GameState.
This would remove the superfluous Player parameter from many methods and make it possible
to check for the "I'm ahead and my opponent is offering to end the game" killer move.
This commit is contained in:
cs6601
2012-08-31 09:17:43 -04:00
parent 4a1c64843d
commit d3c03f2c51
17 changed files with 453 additions and 257 deletions

View File

@@ -1,5 +1,6 @@
package net.woodyfolsom.msproj.policy;
import java.util.Collection;
import java.util.List;
import net.woodyfolsom.msproj.Action;
@@ -13,119 +14,141 @@ import net.woodyfolsom.msproj.tree.MinimaxProperties;
public class Minimax implements Policy {
private static final int DEFAULT_LOOKAHEAD = 1;
private final ValidMoveGenerator validMoveGenerator = new ValidMoveGenerator();
private int lookAhead;
private int numStateEvaluations = 0;
public Minimax() {
this(DEFAULT_LOOKAHEAD);
}
public Minimax(int lookAhead) {
this.lookAhead = lookAhead;
}
@Override
public Action getAction(GameConfig gameConfig, GameState gameState,
Player player) {
numStateEvaluations = 0;
StateEvaluator stateEvaluator = new StateEvaluator(gameConfig);
GameTreeNode<MinimaxProperties> rootNode = new GameTreeNode<MinimaxProperties>(gameState, new MinimaxProperties());
GameTreeNode<MinimaxProperties> rootNode = new GameTreeNode<MinimaxProperties>(
gameState, new MinimaxProperties());
if (player == Player.BLACK) {
return getMax(
lookAhead * 2,
stateEvaluator,
rootNode,
player);
return getMax(lookAhead * 2, stateEvaluator, rootNode, player);
} else {
return getMin(
lookAhead * 2,
stateEvaluator,
rootNode,
player);
return getMin(lookAhead * 2, stateEvaluator, rootNode, player);
}
}
private Action getMax(int recursionLevels,
StateEvaluator stateEvaluator,
GameTreeNode<MinimaxProperties> node,
Player player) {
private boolean isTerminal(int nValidMoves, int recursionLevels) {
return recursionLevels == 0 || nValidMoves == 0;
}
private Action getMax(int recursionLevels, StateEvaluator stateEvaluator,
GameTreeNode<MinimaxProperties> node, Player player) {
GameState gameState = new GameState(node.getGameState());
List<Action> validMoves = validMoveGenerator.getActions(stateEvaluator.getGameConfig(),
node.getGameState(), player, ActionGenerator.ALL_ACTIONS);
for (Action nextMove : validMoves) {
GameState nextState = new GameState(gameState);
nextState.playStone(player, nextMove);
GameTreeNode<MinimaxProperties> childNode = new GameTreeNode<MinimaxProperties>(nextState, new MinimaxProperties());
node.addChild(nextMove, childNode);
if (recursionLevels > 1) {
getMin(recursionLevels - 1, stateEvaluator, childNode, GoGame.getColorToPlay(player, true));
} else {
//tail condition - set reward of this leaf node
childNode.getProperties().setReward(stateEvaluator.scoreGame(nextState).getAggregateScore());
}
}
List<Action> validMoves = validMoveGenerator.getActions(
stateEvaluator.getGameConfig(), node.getGameState(), player,
ActionGenerator.ALL_ACTIONS);
boolean terminal = isTerminal(validMoves.size(), recursionLevels);
double maxScore = Double.NEGATIVE_INFINITY;
Action bestAction = Action.NONE;
for (Action nextMove : validMoves) {
GameTreeNode<MinimaxProperties> childNode = node.getChild(nextMove);
double gameScore = childNode.getProperties().getReward();
if (terminal) {
node.getProperties().setReward(
stateEvaluator.scoreGame(gameState).getAggregateScore());
if (gameScore > maxScore) {
maxScore = gameScore;
bestAction = nextMove;
numStateEvaluations++;
return bestAction;
} else {
for (Action nextMove : validMoves) {
GameState nextState = new GameState(gameState);
nextState.playStone(player, nextMove);
GameTreeNode<MinimaxProperties> childNode = new GameTreeNode<MinimaxProperties>(
nextState, new MinimaxProperties());
node.addChild(nextMove, childNode);
getMin(recursionLevels - 1, stateEvaluator, childNode,
GoGame.getColorToPlay(player, true));
double gameScore = childNode.getProperties().getReward();
if (gameScore > maxScore) {
maxScore = gameScore;
bestAction = nextMove;
}
}
node.getProperties().setReward(maxScore);
return bestAction;
}
node.getProperties().setReward(maxScore);
return bestAction;
}
private Action getMin(int recursionLevels,
StateEvaluator stateEvaluator,
GameTreeNode<MinimaxProperties> node,
Player player) {
private Action getMin(int recursionLevels, StateEvaluator stateEvaluator,
GameTreeNode<MinimaxProperties> node, Player player) {
GameState gameState = new GameState(node.getGameState());
List<Action> validMoves = validMoveGenerator.getActions(stateEvaluator.getGameConfig(),
node.getGameState(), player, ActionGenerator.ALL_ACTIONS);
for (Action nextMove : validMoves) {
GameState nextState = new GameState(gameState);
nextState.playStone(player, nextMove);
GameTreeNode<MinimaxProperties> childNode = new GameTreeNode<MinimaxProperties>(nextState, new MinimaxProperties());
node.addChild(nextMove, childNode);
if (recursionLevels > 1) {
getMax(recursionLevels - 1, stateEvaluator, childNode, GoGame.getColorToPlay(player, true));
} else {
//tail condition - set reward of this leaf node
childNode.getProperties().setReward(stateEvaluator.scoreGame(nextState).getAggregateScore());
}
}
List<Action> validMoves = validMoveGenerator.getActions(
stateEvaluator.getGameConfig(), node.getGameState(), player,
ActionGenerator.ALL_ACTIONS);
boolean terminal = isTerminal(validMoves.size(), recursionLevels);
double minScore = Double.POSITIVE_INFINITY;
Action bestAction = Action.NONE;
for (Action nextMove : validMoves) {
GameTreeNode<MinimaxProperties> childNode = node.getChild(nextMove);
double gameScore = childNode.getProperties().getReward();
if (terminal) {
node.getProperties().setReward(
stateEvaluator.scoreGame(gameState).getAggregateScore());
if (gameScore < minScore) {
minScore = gameScore;
bestAction = nextMove;
numStateEvaluations++;
return bestAction;
} else {
for (Action nextMove : validMoves) {
GameState nextState = new GameState(gameState);
nextState.playStone(player, nextMove);
GameTreeNode<MinimaxProperties> childNode = new GameTreeNode<MinimaxProperties>(
nextState, new MinimaxProperties());
node.addChild(nextMove, childNode);
getMax(recursionLevels - 1, stateEvaluator, childNode,
GoGame.getColorToPlay(player, true));
double gameScore = childNode.getProperties().getReward();
if (gameScore < minScore) {
minScore = gameScore;
bestAction = nextMove;
}
}
node.getProperties().setReward(minScore);
return bestAction;
}
node.getProperties().setReward(minScore);
return bestAction;
}
@Override
public int getNumStateEvaluations() {
return numStateEvaluations;
}
@Override
public Action getAction(GameConfig gameConfig, GameState gameState,
Collection<Action> prohibitedActions, Player player) {
throw new UnsupportedOperationException("Prohibited actions not supported by this class.");
}
}