From c8dff2f594c705b50c4525f2118c6d57281ebb57 Mon Sep 17 00:00:00 2001 From: Woody Folsom Date: Fri, 9 Nov 2012 20:20:54 -0500 Subject: [PATCH] Fixed bug in GameResult reporting (toString). RootParallelization policy now explicitly excludes passing when losing unless it is the only winning move. GoGame beat me on 9x9 playing as white! (by 1/2 pt using Chinese scoring). --- src/net/woodyfolsom/msproj/GameResult.java | 2 +- .../woodyfolsom/msproj/StandAloneGame.java | 4 +-- .../msproj/policy/RandomMovePolicy.java | 25 ++----------------- .../msproj/policy/RootParallelization.java | 13 +++++++--- 4 files changed, 14 insertions(+), 30 deletions(-) diff --git a/src/net/woodyfolsom/msproj/GameResult.java b/src/net/woodyfolsom/msproj/GameResult.java index 2311262..6f99c26 100644 --- a/src/net/woodyfolsom/msproj/GameResult.java +++ b/src/net/woodyfolsom/msproj/GameResult.java @@ -91,7 +91,7 @@ public class GameResult { double blackScore = getBlackScore(); double whiteScore = getWhiteScore(); if (blackScore > whiteScore) { - return "B+" + (whiteScore - blackScore); + return "B+" + (blackScore - whiteScore); } else if (whiteScore > blackScore) { return "W+" + (whiteScore - blackScore); } else { diff --git a/src/net/woodyfolsom/msproj/StandAloneGame.java b/src/net/woodyfolsom/msproj/StandAloneGame.java index 20aadd9..3065407 100644 --- a/src/net/woodyfolsom/msproj/StandAloneGame.java +++ b/src/net/woodyfolsom/msproj/StandAloneGame.java @@ -51,7 +51,7 @@ public class StandAloneGame { referee.setPolicy(Player.WHITE, player2); List results = new ArrayList(); - GameConfig gameConfig = new GameConfig(5); + GameConfig gameConfig = new GameConfig(9); for (int round = 0; round < rounds; round++) { results.add(referee.play(gameConfig)); } @@ -68,7 +68,7 @@ public class StandAloneGame { case HUMAN: return new HumanKeyboardInput(); case ROOT_PAR: - return new RootParallelization(3, 4000L); + return new RootParallelization(3, 6000L); case UCT_SLOW: return new MonteCarloUCT(new RandomMovePolicy(), 4000L); case UCT_FAST: diff --git a/src/net/woodyfolsom/msproj/policy/RandomMovePolicy.java b/src/net/woodyfolsom/msproj/policy/RandomMovePolicy.java index 84501a9..98c76a5 100644 --- a/src/net/woodyfolsom/msproj/policy/RandomMovePolicy.java +++ b/src/net/woodyfolsom/msproj/policy/RandomMovePolicy.java @@ -10,35 +10,14 @@ import net.woodyfolsom.msproj.GameState; import net.woodyfolsom.msproj.Player; public class RandomMovePolicy implements Policy, ActionGenerator { - private final boolean passWhenLosing = false; + //private final boolean passWhenLosing = false; /** * Does NOT modify the gameState. */ public Action getAction(GameConfig gameConfig, GameState gameState, Collection prohibitedMoves, Player player) { - - List randomActions = getActions(gameConfig, gameState, prohibitedMoves, player, 1); - - //Never randomly generate PASS when losing, because a good opponent will immediately recognize the killer - //move of also passing, thereby causing the current player to lose. - Action firstAction = randomActions.get(0); - - //Pass when losing enabled? Just return the first random action; - if (passWhenLosing) { - return firstAction; - } - - //But if passing is the only valid move, pass even if losing. - if (firstAction.isPass() && !gameState.getResult().isWinner(player)) { - if (randomActions.size() > 1) { - return randomActions.get(1); - } else { - return firstAction; - } - } else { - return firstAction; - } + return getActions(gameConfig, gameState, prohibitedMoves, player, 1).get(0); } /** diff --git a/src/net/woodyfolsom/msproj/policy/RootParallelization.java b/src/net/woodyfolsom/msproj/policy/RootParallelization.java index d27a804..bcf99b3 100644 --- a/src/net/woodyfolsom/msproj/policy/RootParallelization.java +++ b/src/net/woodyfolsom/msproj/policy/RootParallelization.java @@ -72,17 +72,22 @@ public class RootParallelization implements Policy { } double bestValue = 0.0; - int bestWins = 0; int totalRollouts = 0; + int bestWins = 0; + int nValidActions = totalReward.keySet().size(); for (Action action : totalReward.keySet()) { + if (action.isPass() && !gameState.getResult().isWinner(player) && nValidActions > 1) { + continue; //If the best rated action is PASS and I'm not winning and there are other valid actions, + //keep searching. + } int totalWins = totalReward.get(action); int totalSims = numSims.get(action); totalRollouts += totalSims; - double value = ((double)totalWins) / totalSims; + double value = ((double)totalWins) / ((double)totalSims); if (bestAction.isNone() || bestValue < value) { bestAction = action; @@ -96,8 +101,8 @@ public class RootParallelization implements Policy { + player + " with simulated win ratio of " + (bestValue * 100.0 + "% among " + numTrees + " parallel simulations.")); - System.out.println("It had a value of " - + bestValue + " out of " + System.out.println("It won " + + bestWins + " out of " + totalRollouts + " rollouts among " + " valid actions from the current state.");