Substantial refactoring to implement correct Naive, UCT Monte Carlo tree search methods.
Removed unnecessary distinction between policy and tree search (tree search is a special kind of policy). Calculation of all valid moves / arbitrary sets of moves is now a seperate class, as it serves a different purpose than a policy. Introduced regression error in AlphaBeta test.
This commit is contained in:
95
src/net/woodyfolsom/msproj/policy/Minimax.java
Normal file
95
src/net/woodyfolsom/msproj/policy/Minimax.java
Normal file
@@ -0,0 +1,95 @@
|
||||
package net.woodyfolsom.msproj.policy;
|
||||
|
||||
import java.util.ArrayList;
|
||||
//import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import net.woodyfolsom.msproj.GameConfig;
|
||||
import net.woodyfolsom.msproj.GameScore;
|
||||
import net.woodyfolsom.msproj.GameState;
|
||||
import net.woodyfolsom.msproj.StateEvaluator;
|
||||
|
||||
//import org.apache.log4j.Logger;
|
||||
|
||||
|
||||
public class Minimax implements Policy {
|
||||
//private static final Logger LOGGER = Logger.getLogger(Minimax.class.getName());
|
||||
|
||||
private static final int DEFAULT_RECURSIVE_PLAYS = 1;
|
||||
|
||||
private final ValidMoveGenerator validMoveGenerator = new ValidMoveGenerator();
|
||||
|
||||
@Override
|
||||
public String getAction(GameConfig gameConfig, GameState gameState,
|
||||
String color) {
|
||||
MoveCandidate moveCandidate = findBestMinimaxResult(
|
||||
DEFAULT_RECURSIVE_PLAYS * 2,
|
||||
gameConfig, gameState, color, false, Policy.PASS);
|
||||
|
||||
return moveCandidate.move;
|
||||
}
|
||||
|
||||
private MoveCandidate findBestMinimaxResult(int recursionLevels,
|
||||
GameConfig gameConfig, GameState gameState,
|
||||
String initialColor, boolean playAsOpponent, String bestPrevMove) {
|
||||
|
||||
StateEvaluator stateEvaluator = new StateEvaluator(gameConfig);
|
||||
List<MoveCandidate> randomMoveCandidates = new ArrayList<MoveCandidate>();
|
||||
|
||||
String colorPlaying = getColorToPlay(initialColor, playAsOpponent);
|
||||
|
||||
List<String> validMoves = validMoveGenerator.getActions(gameConfig,
|
||||
gameState, colorPlaying, ActionGenerator.ALL_ACTIONS);
|
||||
|
||||
for (String randomMove : validMoves) {
|
||||
GameState stateCopy = new GameState(gameState);
|
||||
stateCopy.playStone(colorPlaying, randomMove);
|
||||
if (recursionLevels > 1) {
|
||||
randomMoveCandidates.add(findBestMinimaxResult(recursionLevels - 1,
|
||||
gameConfig, stateCopy, initialColor,
|
||||
!playAsOpponent, randomMove));
|
||||
} else {
|
||||
GameScore score = stateEvaluator.scoreGame(stateCopy);
|
||||
randomMoveCandidates.add(new MoveCandidate(randomMove, score));
|
||||
}
|
||||
}
|
||||
|
||||
// TODO use a sorted list and just return the last element
|
||||
MoveCandidate bestMove = randomMoveCandidates.get(0);
|
||||
double bestScoreSoFar = bestMove.score.getScore(colorPlaying);
|
||||
|
||||
for (MoveCandidate moveCandidate : randomMoveCandidates) {
|
||||
if (moveCandidate.score.getScore(colorPlaying) > bestScoreSoFar) {
|
||||
bestMove = moveCandidate;
|
||||
bestScoreSoFar = moveCandidate.score.getScore(colorPlaying);
|
||||
}
|
||||
}
|
||||
|
||||
// Fix to prevent thinking that the _opponent's_ best move is the move
|
||||
// to make.
|
||||
// If evaluating an opponent's move, the best move (for my opponent) is
|
||||
// my previous move which gives the opponent the highest score.
|
||||
// This should only happen if recursionLevels is initially odd.
|
||||
if (playAsOpponent) {
|
||||
return new MoveCandidate(bestPrevMove, bestMove.score);
|
||||
} else { // if evaluating my own move, the move which gives me the
|
||||
// highest score is the best.
|
||||
return bestMove;
|
||||
}
|
||||
}
|
||||
|
||||
private String getColorToPlay(String color, boolean playAsOpponent) {
|
||||
if (playAsOpponent) {
|
||||
if ("w".equals(color)) {
|
||||
return "b";
|
||||
} else if ("b".equals(color)) {
|
||||
return "w";
|
||||
} else {
|
||||
return "?"; // invalid color will cause randomMoveGenerator to
|
||||
// PASS
|
||||
}
|
||||
} else {
|
||||
return color;
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user