Substantial refactoring to implement correct Naive, UCT Monte Carlo tree search methods.

Removed unnecessary distinction between policy and tree search (tree search is a special kind of policy).
Calculation of all valid moves / arbitrary sets of moves is now a seperate class, as it serves a different purpose than a policy.
Introduced regression error in AlphaBeta test.
This commit is contained in:
cs6601
2012-08-28 10:40:37 -04:00
parent 36291171e5
commit bb5990a04f
39 changed files with 550 additions and 431 deletions

View File

@@ -0,0 +1,95 @@
package net.woodyfolsom.msproj.policy;
import java.util.ArrayList;
//import java.util.Arrays;
import java.util.List;
import net.woodyfolsom.msproj.GameConfig;
import net.woodyfolsom.msproj.GameScore;
import net.woodyfolsom.msproj.GameState;
import net.woodyfolsom.msproj.StateEvaluator;
//import org.apache.log4j.Logger;
public class Minimax implements Policy {
//private static final Logger LOGGER = Logger.getLogger(Minimax.class.getName());
private static final int DEFAULT_RECURSIVE_PLAYS = 1;
private final ValidMoveGenerator validMoveGenerator = new ValidMoveGenerator();
@Override
public String getAction(GameConfig gameConfig, GameState gameState,
String color) {
MoveCandidate moveCandidate = findBestMinimaxResult(
DEFAULT_RECURSIVE_PLAYS * 2,
gameConfig, gameState, color, false, Policy.PASS);
return moveCandidate.move;
}
private MoveCandidate findBestMinimaxResult(int recursionLevels,
GameConfig gameConfig, GameState gameState,
String initialColor, boolean playAsOpponent, String bestPrevMove) {
StateEvaluator stateEvaluator = new StateEvaluator(gameConfig);
List<MoveCandidate> randomMoveCandidates = new ArrayList<MoveCandidate>();
String colorPlaying = getColorToPlay(initialColor, playAsOpponent);
List<String> validMoves = validMoveGenerator.getActions(gameConfig,
gameState, colorPlaying, ActionGenerator.ALL_ACTIONS);
for (String randomMove : validMoves) {
GameState stateCopy = new GameState(gameState);
stateCopy.playStone(colorPlaying, randomMove);
if (recursionLevels > 1) {
randomMoveCandidates.add(findBestMinimaxResult(recursionLevels - 1,
gameConfig, stateCopy, initialColor,
!playAsOpponent, randomMove));
} else {
GameScore score = stateEvaluator.scoreGame(stateCopy);
randomMoveCandidates.add(new MoveCandidate(randomMove, score));
}
}
// TODO use a sorted list and just return the last element
MoveCandidate bestMove = randomMoveCandidates.get(0);
double bestScoreSoFar = bestMove.score.getScore(colorPlaying);
for (MoveCandidate moveCandidate : randomMoveCandidates) {
if (moveCandidate.score.getScore(colorPlaying) > bestScoreSoFar) {
bestMove = moveCandidate;
bestScoreSoFar = moveCandidate.score.getScore(colorPlaying);
}
}
// Fix to prevent thinking that the _opponent's_ best move is the move
// to make.
// If evaluating an opponent's move, the best move (for my opponent) is
// my previous move which gives the opponent the highest score.
// This should only happen if recursionLevels is initially odd.
if (playAsOpponent) {
return new MoveCandidate(bestPrevMove, bestMove.score);
} else { // if evaluating my own move, the move which gives me the
// highest score is the best.
return bestMove;
}
}
private String getColorToPlay(String color, boolean playAsOpponent) {
if (playAsOpponent) {
if ("w".equals(color)) {
return "b";
} else if ("b".equals(color)) {
return "w";
} else {
return "?"; // invalid color will cause randomMoveGenerator to
// PASS
}
} else {
return color;
}
}
}