Substantial refactoring to implement correct Naive, UCT Monte Carlo tree search methods.

Removed unnecessary distinction between policy and tree search (tree search is a special kind of policy). Calculation of all valid moves / arbitrary sets of moves is now a seperate class, as it serves a different purpose than a policy. Introduced regression error in AlphaBeta test.
2012-08-28 10:40:37 -04:00
parent 36291171e5
commit bb5990a04f
39 changed files with 550 additions and 431 deletions
--- a/src/net/woodyfolsom/msproj/policy/Minimax.java
+++ b/src/net/woodyfolsom/msproj/policy/Minimax.java
@@ -0,0 +1,95 @@
+package net.woodyfolsom.msproj.policy;
+
+import java.util.ArrayList;
+//import java.util.Arrays;
+import java.util.List;
+
+import net.woodyfolsom.msproj.GameConfig;
+import net.woodyfolsom.msproj.GameScore;
+import net.woodyfolsom.msproj.GameState;
+import net.woodyfolsom.msproj.StateEvaluator;
+
+//import org.apache.log4j.Logger;
+
+
+public class Minimax implements Policy {
+	//private static final Logger LOGGER = Logger.getLogger(Minimax.class.getName());
+
+	private static final int DEFAULT_RECURSIVE_PLAYS = 1;
+	
+	private final ValidMoveGenerator validMoveGenerator = new ValidMoveGenerator();
+
+	@Override
+	public String getAction(GameConfig gameConfig, GameState gameState,
+			String color) {
+		MoveCandidate moveCandidate = findBestMinimaxResult(
+				DEFAULT_RECURSIVE_PLAYS * 2,
+				gameConfig, gameState, color, false, Policy.PASS);
+
+		return moveCandidate.move;
+	}
+
+	private MoveCandidate findBestMinimaxResult(int recursionLevels,
+			GameConfig gameConfig, GameState gameState,
+			String initialColor, boolean playAsOpponent, String bestPrevMove) {
+
+		StateEvaluator stateEvaluator = new StateEvaluator(gameConfig);
+		List<MoveCandidate> randomMoveCandidates = new ArrayList<MoveCandidate>();
+
+		String colorPlaying = getColorToPlay(initialColor, playAsOpponent);
+		
+		List<String> validMoves = validMoveGenerator.getActions(gameConfig,
+				gameState, colorPlaying, ActionGenerator.ALL_ACTIONS);
+		
+		for (String randomMove : validMoves) {
+			GameState stateCopy = new GameState(gameState);
+			stateCopy.playStone(colorPlaying, randomMove);
+			if (recursionLevels > 1) {
+				randomMoveCandidates.add(findBestMinimaxResult(recursionLevels - 1,
+						gameConfig, stateCopy, initialColor,
+						!playAsOpponent, randomMove));
+			} else {
+				GameScore score = stateEvaluator.scoreGame(stateCopy);
+				randomMoveCandidates.add(new MoveCandidate(randomMove, score));
+			}
+		}
+
+		// TODO use a sorted list and just return the last element
+		MoveCandidate bestMove = randomMoveCandidates.get(0);
+		double bestScoreSoFar = bestMove.score.getScore(colorPlaying);
+
+		for (MoveCandidate moveCandidate : randomMoveCandidates) {
+			if (moveCandidate.score.getScore(colorPlaying) > bestScoreSoFar) {
+				bestMove = moveCandidate;
+				bestScoreSoFar = moveCandidate.score.getScore(colorPlaying);
+			}
+		}
+
+		// Fix to prevent thinking that the _opponent's_ best move is the move
+		// to make.
+		// If evaluating an opponent's move, the best move (for my opponent) is
+		// my previous move which gives the opponent the highest score.
+		// This should only happen if recursionLevels is initially odd.
+		if (playAsOpponent) {
+			return new MoveCandidate(bestPrevMove, bestMove.score);
+		} else { // if evaluating my own move, the move which gives me the
+					// highest score is the best.
+			return bestMove;
+		}
+	}
+
+	private String getColorToPlay(String color, boolean playAsOpponent) {
+		if (playAsOpponent) {
+			if ("w".equals(color)) {
+				return "b";
+			} else if ("b".equals(color)) {
+				return "w";
+			} else {
+				return "?"; // invalid color will cause randomMoveGenerator to
+							// PASS
+			}
+		} else {
+			return color;
+		}
+	}
+}