From c8dff2f594c705b50c4525f2118c6d57281ebb57 Mon Sep 17 00:00:00 2001
From: Woody Folsom <woody.folsom@gmail.com>
Date: Fri, 9 Nov 2012 20:20:54 -0500
Subject: [PATCH] Fixed bug in GameResult reporting (toString). 
 RootParallelization policy now explicitly excludes passing when losing unless
 it is the only winning move. GoGame beat me on 9x9 playing as white! (by 1/2
 pt using Chinese scoring).

---
 src/net/woodyfolsom/msproj/GameResult.java    |  2 +-
 .../woodyfolsom/msproj/StandAloneGame.java    |  4 +--
 .../msproj/policy/RandomMovePolicy.java       | 25 ++-----------------
 .../msproj/policy/RootParallelization.java    | 13 +++++++---
 4 files changed, 14 insertions(+), 30 deletions(-)
diff --git a/src/net/woodyfolsom/msproj/GameResult.java b/src/net/woodyfolsom/msproj/GameResult.java
index 2311262..6f99c26 100644
--- a/src/net/woodyfolsom/msproj/GameResult.java
+++ b/src/net/woodyfolsom/msproj/GameResult.java
@@ -91,7 +91,7 @@ public class GameResult {
 			double blackScore = getBlackScore();
 			double whiteScore = getWhiteScore();
 			if (blackScore > whiteScore) {
-				return "B+" + (whiteScore - blackScore);
+				return "B+" + (blackScore - whiteScore);
 			} else if (whiteScore > blackScore) {
 				return "W+" + (whiteScore - blackScore);
 			} else {
diff --git a/src/net/woodyfolsom/msproj/StandAloneGame.java b/src/net/woodyfolsom/msproj/StandAloneGame.java
index 20aadd9..3065407 100644
--- a/src/net/woodyfolsom/msproj/StandAloneGame.java
+++ b/src/net/woodyfolsom/msproj/StandAloneGame.java
@@ -51,7 +51,7 @@ public class StandAloneGame {
 		referee.setPolicy(Player.WHITE, player2);
 
 		List<GameResult> results = new ArrayList<GameResult>();
-		GameConfig gameConfig = new GameConfig(5);
+		GameConfig gameConfig = new GameConfig(9);
 		for (int round = 0; round < rounds; round++) {
 			results.add(referee.play(gameConfig));
 		}
@@ -68,7 +68,7 @@ public class StandAloneGame {
 		case HUMAN:
 			return new HumanKeyboardInput();
 		case ROOT_PAR:
-			return new RootParallelization(3, 4000L);
+			return new RootParallelization(3, 6000L);
 		case UCT_SLOW:
 			return new MonteCarloUCT(new RandomMovePolicy(), 4000L);
 		case UCT_FAST:
diff --git a/src/net/woodyfolsom/msproj/policy/RandomMovePolicy.java b/src/net/woodyfolsom/msproj/policy/RandomMovePolicy.java
index 84501a9..98c76a5 100644
--- a/src/net/woodyfolsom/msproj/policy/RandomMovePolicy.java
+++ b/src/net/woodyfolsom/msproj/policy/RandomMovePolicy.java
@@ -10,35 +10,14 @@ import net.woodyfolsom.msproj.GameState;
 import net.woodyfolsom.msproj.Player;
 
 public class RandomMovePolicy implements Policy, ActionGenerator {
-	private final boolean passWhenLosing = false;
+	//private final boolean passWhenLosing = false;
 	
 	/**
 	 * Does NOT modify the gameState.
 	 */
 	public Action getAction(GameConfig gameConfig, GameState gameState,
 			Collection<Action> prohibitedMoves, Player player) {
-		
-		List<Action> randomActions = getActions(gameConfig, gameState, prohibitedMoves, player, 1);
-		
-		//Never randomly generate PASS when losing, because a good opponent will immediately recognize the killer
-		//move of also passing, thereby causing the current player to lose.
-		Action firstAction = randomActions.get(0);
-		
-		//Pass when losing enabled? Just return the first random action;
-		if (passWhenLosing) {
-			return firstAction;
-		}
-		
-		//But if passing is the only valid move, pass even if losing.
-		if (firstAction.isPass() && !gameState.getResult().isWinner(player)) {
-			if (randomActions.size() > 1) {
-				return randomActions.get(1);
-			} else {
-				return firstAction;
-			}
-		} else {
-			return firstAction;
-		}
+		return getActions(gameConfig, gameState, prohibitedMoves, player, 1).get(0);
 	}
 
 	/**
diff --git a/src/net/woodyfolsom/msproj/policy/RootParallelization.java b/src/net/woodyfolsom/msproj/policy/RootParallelization.java
index d27a804..bcf99b3 100644
--- a/src/net/woodyfolsom/msproj/policy/RootParallelization.java
+++ b/src/net/woodyfolsom/msproj/policy/RootParallelization.java
@@ -72,17 +72,22 @@ public class RootParallelization implements Policy {
 		}
 		
 		double bestValue = 0.0;
-		int bestWins = 0;
 		int totalRollouts = 0;
+		int bestWins = 0;
+		int nValidActions = totalReward.keySet().size();
 		
 		for (Action action : totalReward.keySet())
 		{
+			if (action.isPass() && !gameState.getResult().isWinner(player) && nValidActions > 1) {
+				continue; //If the best rated action is PASS and I'm not winning and there are other valid actions,
+				//keep searching.
+			}
 			int totalWins = totalReward.get(action);
 			int totalSims = numSims.get(action);
 			
 			totalRollouts += totalSims;
 			
-			double value = ((double)totalWins) / totalSims;
+			double value = ((double)totalWins) / ((double)totalSims);
 			
 			if (bestAction.isNone() || bestValue < value) {
 				bestAction = action;
@@ -96,8 +101,8 @@ public class RootParallelization implements Policy {
 				+ player
 				+ " with simulated win ratio of "
 				+ (bestValue * 100.0 + "% among " + numTrees + " parallel simulations."));
-		System.out.println("It had a value of "
-				+ bestValue + " out of "
+		System.out.println("It won "
+				+ bestWins + " out of "
 				+ totalRollouts + " rollouts among "
 				+ " valid actions from the current state.");