Fixed bug in GameResult reporting (toString). RootParallelization policy now explicitly excludes passing when losing unless it is the only winning move.

GoGame beat me on 9x9 playing as white! (by 1/2 pt using Chinese scoring).
This commit is contained in:
2012-11-09 20:20:54 -05:00
parent 9b8693fc3e
commit c8dff2f594
4 changed files with 14 additions and 30 deletions

View File

@@ -91,7 +91,7 @@ public class GameResult {
double blackScore = getBlackScore(); double blackScore = getBlackScore();
double whiteScore = getWhiteScore(); double whiteScore = getWhiteScore();
if (blackScore > whiteScore) { if (blackScore > whiteScore) {
return "B+" + (whiteScore - blackScore); return "B+" + (blackScore - whiteScore);
} else if (whiteScore > blackScore) { } else if (whiteScore > blackScore) {
return "W+" + (whiteScore - blackScore); return "W+" + (whiteScore - blackScore);
} else { } else {

View File

@@ -51,7 +51,7 @@ public class StandAloneGame {
referee.setPolicy(Player.WHITE, player2); referee.setPolicy(Player.WHITE, player2);
List<GameResult> results = new ArrayList<GameResult>(); List<GameResult> results = new ArrayList<GameResult>();
GameConfig gameConfig = new GameConfig(5); GameConfig gameConfig = new GameConfig(9);
for (int round = 0; round < rounds; round++) { for (int round = 0; round < rounds; round++) {
results.add(referee.play(gameConfig)); results.add(referee.play(gameConfig));
} }
@@ -68,7 +68,7 @@ public class StandAloneGame {
case HUMAN: case HUMAN:
return new HumanKeyboardInput(); return new HumanKeyboardInput();
case ROOT_PAR: case ROOT_PAR:
return new RootParallelization(3, 4000L); return new RootParallelization(3, 6000L);
case UCT_SLOW: case UCT_SLOW:
return new MonteCarloUCT(new RandomMovePolicy(), 4000L); return new MonteCarloUCT(new RandomMovePolicy(), 4000L);
case UCT_FAST: case UCT_FAST:

View File

@@ -10,35 +10,14 @@ import net.woodyfolsom.msproj.GameState;
import net.woodyfolsom.msproj.Player; import net.woodyfolsom.msproj.Player;
public class RandomMovePolicy implements Policy, ActionGenerator { public class RandomMovePolicy implements Policy, ActionGenerator {
private final boolean passWhenLosing = false; //private final boolean passWhenLosing = false;
/** /**
* Does NOT modify the gameState. * Does NOT modify the gameState.
*/ */
public Action getAction(GameConfig gameConfig, GameState gameState, public Action getAction(GameConfig gameConfig, GameState gameState,
Collection<Action> prohibitedMoves, Player player) { Collection<Action> prohibitedMoves, Player player) {
return getActions(gameConfig, gameState, prohibitedMoves, player, 1).get(0);
List<Action> randomActions = getActions(gameConfig, gameState, prohibitedMoves, player, 1);
//Never randomly generate PASS when losing, because a good opponent will immediately recognize the killer
//move of also passing, thereby causing the current player to lose.
Action firstAction = randomActions.get(0);
//Pass when losing enabled? Just return the first random action;
if (passWhenLosing) {
return firstAction;
}
//But if passing is the only valid move, pass even if losing.
if (firstAction.isPass() && !gameState.getResult().isWinner(player)) {
if (randomActions.size() > 1) {
return randomActions.get(1);
} else {
return firstAction;
}
} else {
return firstAction;
}
} }
/** /**

View File

@@ -72,17 +72,22 @@ public class RootParallelization implements Policy {
} }
double bestValue = 0.0; double bestValue = 0.0;
int bestWins = 0;
int totalRollouts = 0; int totalRollouts = 0;
int bestWins = 0;
int nValidActions = totalReward.keySet().size();
for (Action action : totalReward.keySet()) for (Action action : totalReward.keySet())
{ {
if (action.isPass() && !gameState.getResult().isWinner(player) && nValidActions > 1) {
continue; //If the best rated action is PASS and I'm not winning and there are other valid actions,
//keep searching.
}
int totalWins = totalReward.get(action); int totalWins = totalReward.get(action);
int totalSims = numSims.get(action); int totalSims = numSims.get(action);
totalRollouts += totalSims; totalRollouts += totalSims;
double value = ((double)totalWins) / totalSims; double value = ((double)totalWins) / ((double)totalSims);
if (bestAction.isNone() || bestValue < value) { if (bestAction.isNone() || bestValue < value) {
bestAction = action; bestAction = action;
@@ -96,8 +101,8 @@ public class RootParallelization implements Policy {
+ player + player
+ " with simulated win ratio of " + " with simulated win ratio of "
+ (bestValue * 100.0 + "% among " + numTrees + " parallel simulations.")); + (bestValue * 100.0 + "% among " + numTrees + " parallel simulations."));
System.out.println("It had a value of " System.out.println("It won "
+ bestValue + " out of " + bestWins + " out of "
+ totalRollouts + " rollouts among " + totalRollouts + " rollouts among "
+ " valid actions from the current state."); + " valid actions from the current state.");