Now possible to prohibit a Collection of Actions from being included in the List of returned actions (or as a single returned value). All unit tests now pass with the exception of MonteCarloUCT. TODO: playerToMove and previousPlayerPassed should be made part of the GameState. This would remove the superfluous Player parameter from many methods and make it possible to check for the "I'm ahead and my opponent is offering to end the game" killer move.
154 lines
4.5 KiB
Java
154 lines
4.5 KiB
Java
package net.woodyfolsom.msproj.policy;
|
|
|
|
import java.util.Collection;
|
|
import java.util.List;
|
|
|
|
import net.woodyfolsom.msproj.Action;
|
|
import net.woodyfolsom.msproj.GameConfig;
|
|
import net.woodyfolsom.msproj.GameState;
|
|
import net.woodyfolsom.msproj.GoGame;
|
|
import net.woodyfolsom.msproj.Player;
|
|
import net.woodyfolsom.msproj.StateEvaluator;
|
|
import net.woodyfolsom.msproj.tree.GameTreeNode;
|
|
import net.woodyfolsom.msproj.tree.MinimaxProperties;
|
|
|
|
public class Minimax implements Policy {
|
|
private static final int DEFAULT_LOOKAHEAD = 1;
|
|
|
|
private final ValidMoveGenerator validMoveGenerator = new ValidMoveGenerator();
|
|
|
|
private int lookAhead;
|
|
private int numStateEvaluations = 0;
|
|
|
|
public Minimax() {
|
|
this(DEFAULT_LOOKAHEAD);
|
|
}
|
|
|
|
public Minimax(int lookAhead) {
|
|
this.lookAhead = lookAhead;
|
|
}
|
|
|
|
@Override
|
|
public Action getAction(GameConfig gameConfig, GameState gameState,
|
|
Player player) {
|
|
numStateEvaluations = 0;
|
|
|
|
StateEvaluator stateEvaluator = new StateEvaluator(gameConfig);
|
|
|
|
GameTreeNode<MinimaxProperties> rootNode = new GameTreeNode<MinimaxProperties>(
|
|
gameState, new MinimaxProperties());
|
|
|
|
if (player == Player.BLACK) {
|
|
return getMax(lookAhead * 2, stateEvaluator, rootNode, player);
|
|
} else {
|
|
return getMin(lookAhead * 2, stateEvaluator, rootNode, player);
|
|
}
|
|
}
|
|
|
|
private boolean isTerminal(int nValidMoves, int recursionLevels) {
|
|
return recursionLevels == 0 || nValidMoves == 0;
|
|
}
|
|
|
|
private Action getMax(int recursionLevels, StateEvaluator stateEvaluator,
|
|
GameTreeNode<MinimaxProperties> node, Player player) {
|
|
|
|
GameState gameState = new GameState(node.getGameState());
|
|
|
|
List<Action> validMoves = validMoveGenerator.getActions(
|
|
stateEvaluator.getGameConfig(), node.getGameState(), player,
|
|
ActionGenerator.ALL_ACTIONS);
|
|
|
|
boolean terminal = isTerminal(validMoves.size(), recursionLevels);
|
|
|
|
double maxScore = Double.NEGATIVE_INFINITY;
|
|
Action bestAction = Action.NONE;
|
|
|
|
if (terminal) {
|
|
node.getProperties().setReward(
|
|
stateEvaluator.scoreGame(gameState).getAggregateScore());
|
|
|
|
numStateEvaluations++;
|
|
|
|
return bestAction;
|
|
} else {
|
|
|
|
for (Action nextMove : validMoves) {
|
|
GameState nextState = new GameState(gameState);
|
|
nextState.playStone(player, nextMove);
|
|
GameTreeNode<MinimaxProperties> childNode = new GameTreeNode<MinimaxProperties>(
|
|
nextState, new MinimaxProperties());
|
|
node.addChild(nextMove, childNode);
|
|
|
|
getMin(recursionLevels - 1, stateEvaluator, childNode,
|
|
GoGame.getColorToPlay(player, true));
|
|
|
|
double gameScore = childNode.getProperties().getReward();
|
|
|
|
if (gameScore > maxScore) {
|
|
maxScore = gameScore;
|
|
bestAction = nextMove;
|
|
}
|
|
}
|
|
|
|
node.getProperties().setReward(maxScore);
|
|
return bestAction;
|
|
}
|
|
}
|
|
|
|
private Action getMin(int recursionLevels, StateEvaluator stateEvaluator,
|
|
GameTreeNode<MinimaxProperties> node, Player player) {
|
|
|
|
GameState gameState = new GameState(node.getGameState());
|
|
|
|
List<Action> validMoves = validMoveGenerator.getActions(
|
|
stateEvaluator.getGameConfig(), node.getGameState(), player,
|
|
ActionGenerator.ALL_ACTIONS);
|
|
|
|
boolean terminal = isTerminal(validMoves.size(), recursionLevels);
|
|
|
|
double minScore = Double.POSITIVE_INFINITY;
|
|
Action bestAction = Action.NONE;
|
|
|
|
if (terminal) {
|
|
node.getProperties().setReward(
|
|
stateEvaluator.scoreGame(gameState).getAggregateScore());
|
|
|
|
numStateEvaluations++;
|
|
|
|
return bestAction;
|
|
} else {
|
|
|
|
for (Action nextMove : validMoves) {
|
|
GameState nextState = new GameState(gameState);
|
|
nextState.playStone(player, nextMove);
|
|
GameTreeNode<MinimaxProperties> childNode = new GameTreeNode<MinimaxProperties>(
|
|
nextState, new MinimaxProperties());
|
|
node.addChild(nextMove, childNode);
|
|
|
|
getMax(recursionLevels - 1, stateEvaluator, childNode,
|
|
GoGame.getColorToPlay(player, true));
|
|
|
|
double gameScore = childNode.getProperties().getReward();
|
|
|
|
if (gameScore < minScore) {
|
|
minScore = gameScore;
|
|
bestAction = nextMove;
|
|
}
|
|
}
|
|
|
|
node.getProperties().setReward(minScore);
|
|
return bestAction;
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public int getNumStateEvaluations() {
|
|
return numStateEvaluations;
|
|
}
|
|
|
|
@Override
|
|
public Action getAction(GameConfig gameConfig, GameState gameState,
|
|
Collection<Action> prohibitedActions, Player player) {
|
|
throw new UnsupportedOperationException("Prohibited actions not supported by this class.");
|
|
}
|
|
} |