Fixed Minimax search to use the new GameTreeNode, MinimaxProperty classes.

The previous implementation was overly complicated and may have been buggy except when searching only 2 plies ahead.
This commit is contained in:
cs6601
2012-08-30 10:51:04 -04:00
parent 2e40440838
commit 4a1c64843d
12 changed files with 249 additions and 153 deletions

View File

@@ -1,77 +1,131 @@
package net.woodyfolsom.msproj.policy;
import java.util.ArrayList;
import java.util.List;
import net.woodyfolsom.msproj.Action;
import net.woodyfolsom.msproj.GameConfig;
import net.woodyfolsom.msproj.GameScore;
import net.woodyfolsom.msproj.GameState;
import net.woodyfolsom.msproj.GoGame;
import net.woodyfolsom.msproj.Player;
import net.woodyfolsom.msproj.StateEvaluator;
import net.woodyfolsom.msproj.tree.GameTreeNode;
import net.woodyfolsom.msproj.tree.MinimaxProperties;
public class Minimax implements Policy {
private static final int DEFAULT_RECURSIVE_PLAYS = 1;
private static final int DEFAULT_LOOKAHEAD = 1;
private final ValidMoveGenerator validMoveGenerator = new ValidMoveGenerator();
private int lookAhead;
public Minimax() {
this(DEFAULT_LOOKAHEAD);
}
public Minimax(int lookAhead) {
this.lookAhead = lookAhead;
}
@Override
public Action getAction(GameConfig gameConfig, GameState gameState,
Player color) {
MoveCandidate moveCandidate = findBestMinimaxResult(
DEFAULT_RECURSIVE_PLAYS * 2,
gameConfig, gameState, color, false, Action.PASS);
return moveCandidate.move;
Player player) {
StateEvaluator stateEvaluator = new StateEvaluator(gameConfig);
GameTreeNode<MinimaxProperties> rootNode = new GameTreeNode<MinimaxProperties>(gameState, new MinimaxProperties());
if (player == Player.BLACK) {
return getMax(
lookAhead * 2,
stateEvaluator,
rootNode,
player);
} else {
return getMin(
lookAhead * 2,
stateEvaluator,
rootNode,
player);
}
}
private MoveCandidate findBestMinimaxResult(int recursionLevels,
GameConfig gameConfig, GameState gameState,
Player initialColor, boolean playAsOpponent, Action bestPrevMove) {
private Action getMax(int recursionLevels,
StateEvaluator stateEvaluator,
GameTreeNode<MinimaxProperties> node,
Player player) {
StateEvaluator stateEvaluator = new StateEvaluator(gameConfig);
List<MoveCandidate> randomMoveCandidates = new ArrayList<MoveCandidate>();
Player colorPlaying = GoGame.getColorToPlay(initialColor, playAsOpponent);
GameState gameState = new GameState(node.getGameState());
List<Action> validMoves = validMoveGenerator.getActions(gameConfig,
gameState, colorPlaying, ActionGenerator.ALL_ACTIONS);
List<Action> validMoves = validMoveGenerator.getActions(stateEvaluator.getGameConfig(),
node.getGameState(), player, ActionGenerator.ALL_ACTIONS);
for (Action randomMove : validMoves) {
GameState stateCopy = new GameState(gameState);
stateCopy.playStone(colorPlaying, randomMove);
for (Action nextMove : validMoves) {
GameState nextState = new GameState(gameState);
nextState.playStone(player, nextMove);
GameTreeNode<MinimaxProperties> childNode = new GameTreeNode<MinimaxProperties>(nextState, new MinimaxProperties());
node.addChild(nextMove, childNode);
if (recursionLevels > 1) {
randomMoveCandidates.add(findBestMinimaxResult(recursionLevels - 1,
gameConfig, stateCopy, initialColor,
!playAsOpponent, randomMove));
getMin(recursionLevels - 1, stateEvaluator, childNode, GoGame.getColorToPlay(player, true));
} else {
GameScore score = stateEvaluator.scoreGame(stateCopy);
randomMoveCandidates.add(new MoveCandidate(randomMove, score));
//tail condition - set reward of this leaf node
childNode.getProperties().setReward(stateEvaluator.scoreGame(nextState).getAggregateScore());
}
}
// TODO use a sorted list and just return the last element
MoveCandidate bestMove = randomMoveCandidates.get(0);
double bestScoreSoFar = bestMove.score.getScore(colorPlaying);
for (MoveCandidate moveCandidate : randomMoveCandidates) {
if (moveCandidate.score.getScore(colorPlaying) > bestScoreSoFar) {
bestMove = moveCandidate;
bestScoreSoFar = moveCandidate.score.getScore(colorPlaying);
double maxScore = Double.NEGATIVE_INFINITY;
Action bestAction = Action.NONE;
for (Action nextMove : validMoves) {
GameTreeNode<MinimaxProperties> childNode = node.getChild(nextMove);
double gameScore = childNode.getProperties().getReward();
if (gameScore > maxScore) {
maxScore = gameScore;
bestAction = nextMove;
}
}
node.getProperties().setReward(maxScore);
return bestAction;
}
private Action getMin(int recursionLevels,
StateEvaluator stateEvaluator,
GameTreeNode<MinimaxProperties> node,
Player player) {
// Fix to prevent thinking that the _opponent's_ best move is the move
// to make.
// If evaluating an opponent's move, the best move (for my opponent) is
// my previous move which gives the opponent the highest score.
// This should only happen if recursionLevels is initially odd.
if (playAsOpponent) {
return new MoveCandidate(bestPrevMove, bestMove.score);
} else { // if evaluating my own move, the move which gives me the
// highest score is the best.
return bestMove;
GameState gameState = new GameState(node.getGameState());
List<Action> validMoves = validMoveGenerator.getActions(stateEvaluator.getGameConfig(),
node.getGameState(), player, ActionGenerator.ALL_ACTIONS);
for (Action nextMove : validMoves) {
GameState nextState = new GameState(gameState);
nextState.playStone(player, nextMove);
GameTreeNode<MinimaxProperties> childNode = new GameTreeNode<MinimaxProperties>(nextState, new MinimaxProperties());
node.addChild(nextMove, childNode);
if (recursionLevels > 1) {
getMax(recursionLevels - 1, stateEvaluator, childNode, GoGame.getColorToPlay(player, true));
} else {
//tail condition - set reward of this leaf node
childNode.getProperties().setReward(stateEvaluator.scoreGame(nextState).getAggregateScore());
}
}
double minScore = Double.POSITIVE_INFINITY;
Action bestAction = Action.NONE;
for (Action nextMove : validMoves) {
GameTreeNode<MinimaxProperties> childNode = node.getChild(nextMove);
double gameScore = childNode.getProperties().getReward();
if (gameScore < minScore) {
minScore = gameScore;
bestAction = nextMove;
}
}
node.getProperties().setReward(minScore);
return bestAction;
}
}