Fixed Minimax search to use the new GameTreeNode, MinimaxProperty classes.
The previous implementation was overly complicated and may have been buggy except when searching only 2 plies ahead.
This commit is contained in:
@@ -1,77 +1,131 @@
|
||||
package net.woodyfolsom.msproj.policy;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import net.woodyfolsom.msproj.Action;
|
||||
import net.woodyfolsom.msproj.GameConfig;
|
||||
import net.woodyfolsom.msproj.GameScore;
|
||||
import net.woodyfolsom.msproj.GameState;
|
||||
import net.woodyfolsom.msproj.GoGame;
|
||||
import net.woodyfolsom.msproj.Player;
|
||||
import net.woodyfolsom.msproj.StateEvaluator;
|
||||
import net.woodyfolsom.msproj.tree.GameTreeNode;
|
||||
import net.woodyfolsom.msproj.tree.MinimaxProperties;
|
||||
|
||||
public class Minimax implements Policy {
|
||||
private static final int DEFAULT_RECURSIVE_PLAYS = 1;
|
||||
private static final int DEFAULT_LOOKAHEAD = 1;
|
||||
|
||||
private final ValidMoveGenerator validMoveGenerator = new ValidMoveGenerator();
|
||||
|
||||
private int lookAhead;
|
||||
|
||||
public Minimax() {
|
||||
this(DEFAULT_LOOKAHEAD);
|
||||
}
|
||||
|
||||
public Minimax(int lookAhead) {
|
||||
this.lookAhead = lookAhead;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Action getAction(GameConfig gameConfig, GameState gameState,
|
||||
Player color) {
|
||||
MoveCandidate moveCandidate = findBestMinimaxResult(
|
||||
DEFAULT_RECURSIVE_PLAYS * 2,
|
||||
gameConfig, gameState, color, false, Action.PASS);
|
||||
|
||||
return moveCandidate.move;
|
||||
Player player) {
|
||||
|
||||
StateEvaluator stateEvaluator = new StateEvaluator(gameConfig);
|
||||
|
||||
GameTreeNode<MinimaxProperties> rootNode = new GameTreeNode<MinimaxProperties>(gameState, new MinimaxProperties());
|
||||
|
||||
if (player == Player.BLACK) {
|
||||
return getMax(
|
||||
lookAhead * 2,
|
||||
stateEvaluator,
|
||||
rootNode,
|
||||
player);
|
||||
} else {
|
||||
return getMin(
|
||||
lookAhead * 2,
|
||||
stateEvaluator,
|
||||
rootNode,
|
||||
player);
|
||||
}
|
||||
}
|
||||
|
||||
private MoveCandidate findBestMinimaxResult(int recursionLevels,
|
||||
GameConfig gameConfig, GameState gameState,
|
||||
Player initialColor, boolean playAsOpponent, Action bestPrevMove) {
|
||||
private Action getMax(int recursionLevels,
|
||||
StateEvaluator stateEvaluator,
|
||||
GameTreeNode<MinimaxProperties> node,
|
||||
Player player) {
|
||||
|
||||
StateEvaluator stateEvaluator = new StateEvaluator(gameConfig);
|
||||
List<MoveCandidate> randomMoveCandidates = new ArrayList<MoveCandidate>();
|
||||
|
||||
Player colorPlaying = GoGame.getColorToPlay(initialColor, playAsOpponent);
|
||||
GameState gameState = new GameState(node.getGameState());
|
||||
|
||||
List<Action> validMoves = validMoveGenerator.getActions(gameConfig,
|
||||
gameState, colorPlaying, ActionGenerator.ALL_ACTIONS);
|
||||
List<Action> validMoves = validMoveGenerator.getActions(stateEvaluator.getGameConfig(),
|
||||
node.getGameState(), player, ActionGenerator.ALL_ACTIONS);
|
||||
|
||||
for (Action randomMove : validMoves) {
|
||||
GameState stateCopy = new GameState(gameState);
|
||||
stateCopy.playStone(colorPlaying, randomMove);
|
||||
for (Action nextMove : validMoves) {
|
||||
GameState nextState = new GameState(gameState);
|
||||
nextState.playStone(player, nextMove);
|
||||
GameTreeNode<MinimaxProperties> childNode = new GameTreeNode<MinimaxProperties>(nextState, new MinimaxProperties());
|
||||
node.addChild(nextMove, childNode);
|
||||
if (recursionLevels > 1) {
|
||||
randomMoveCandidates.add(findBestMinimaxResult(recursionLevels - 1,
|
||||
gameConfig, stateCopy, initialColor,
|
||||
!playAsOpponent, randomMove));
|
||||
getMin(recursionLevels - 1, stateEvaluator, childNode, GoGame.getColorToPlay(player, true));
|
||||
} else {
|
||||
GameScore score = stateEvaluator.scoreGame(stateCopy);
|
||||
randomMoveCandidates.add(new MoveCandidate(randomMove, score));
|
||||
//tail condition - set reward of this leaf node
|
||||
childNode.getProperties().setReward(stateEvaluator.scoreGame(nextState).getAggregateScore());
|
||||
}
|
||||
}
|
||||
|
||||
// TODO use a sorted list and just return the last element
|
||||
MoveCandidate bestMove = randomMoveCandidates.get(0);
|
||||
double bestScoreSoFar = bestMove.score.getScore(colorPlaying);
|
||||
|
||||
for (MoveCandidate moveCandidate : randomMoveCandidates) {
|
||||
if (moveCandidate.score.getScore(colorPlaying) > bestScoreSoFar) {
|
||||
bestMove = moveCandidate;
|
||||
bestScoreSoFar = moveCandidate.score.getScore(colorPlaying);
|
||||
|
||||
double maxScore = Double.NEGATIVE_INFINITY;
|
||||
Action bestAction = Action.NONE;
|
||||
|
||||
for (Action nextMove : validMoves) {
|
||||
GameTreeNode<MinimaxProperties> childNode = node.getChild(nextMove);
|
||||
double gameScore = childNode.getProperties().getReward();
|
||||
|
||||
if (gameScore > maxScore) {
|
||||
maxScore = gameScore;
|
||||
bestAction = nextMove;
|
||||
}
|
||||
}
|
||||
|
||||
node.getProperties().setReward(maxScore);
|
||||
return bestAction;
|
||||
}
|
||||
|
||||
private Action getMin(int recursionLevels,
|
||||
StateEvaluator stateEvaluator,
|
||||
GameTreeNode<MinimaxProperties> node,
|
||||
Player player) {
|
||||
|
||||
// Fix to prevent thinking that the _opponent's_ best move is the move
|
||||
// to make.
|
||||
// If evaluating an opponent's move, the best move (for my opponent) is
|
||||
// my previous move which gives the opponent the highest score.
|
||||
// This should only happen if recursionLevels is initially odd.
|
||||
if (playAsOpponent) {
|
||||
return new MoveCandidate(bestPrevMove, bestMove.score);
|
||||
} else { // if evaluating my own move, the move which gives me the
|
||||
// highest score is the best.
|
||||
return bestMove;
|
||||
GameState gameState = new GameState(node.getGameState());
|
||||
|
||||
List<Action> validMoves = validMoveGenerator.getActions(stateEvaluator.getGameConfig(),
|
||||
node.getGameState(), player, ActionGenerator.ALL_ACTIONS);
|
||||
|
||||
for (Action nextMove : validMoves) {
|
||||
GameState nextState = new GameState(gameState);
|
||||
nextState.playStone(player, nextMove);
|
||||
GameTreeNode<MinimaxProperties> childNode = new GameTreeNode<MinimaxProperties>(nextState, new MinimaxProperties());
|
||||
node.addChild(nextMove, childNode);
|
||||
if (recursionLevels > 1) {
|
||||
getMax(recursionLevels - 1, stateEvaluator, childNode, GoGame.getColorToPlay(player, true));
|
||||
} else {
|
||||
//tail condition - set reward of this leaf node
|
||||
childNode.getProperties().setReward(stateEvaluator.scoreGame(nextState).getAggregateScore());
|
||||
}
|
||||
}
|
||||
|
||||
double minScore = Double.POSITIVE_INFINITY;
|
||||
Action bestAction = Action.NONE;
|
||||
|
||||
for (Action nextMove : validMoves) {
|
||||
GameTreeNode<MinimaxProperties> childNode = node.getChild(nextMove);
|
||||
double gameScore = childNode.getProperties().getReward();
|
||||
|
||||
if (gameScore < minScore) {
|
||||
minScore = gameScore;
|
||||
bestAction = nextMove;
|
||||
}
|
||||
}
|
||||
|
||||
node.getProperties().setReward(minScore);
|
||||
return bestAction;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user