Fixed bug in GameResult reporting (toString). RootParallelization policy now explicitly excludes passing when losing unless it is the only winning move.
GoGame beat me on 9x9 playing as white! (by 1/2 pt using Chinese scoring).
This commit is contained in:
@@ -91,7 +91,7 @@ public class GameResult {
|
||||
double blackScore = getBlackScore();
|
||||
double whiteScore = getWhiteScore();
|
||||
if (blackScore > whiteScore) {
|
||||
return "B+" + (whiteScore - blackScore);
|
||||
return "B+" + (blackScore - whiteScore);
|
||||
} else if (whiteScore > blackScore) {
|
||||
return "W+" + (whiteScore - blackScore);
|
||||
} else {
|
||||
|
||||
@@ -51,7 +51,7 @@ public class StandAloneGame {
|
||||
referee.setPolicy(Player.WHITE, player2);
|
||||
|
||||
List<GameResult> results = new ArrayList<GameResult>();
|
||||
GameConfig gameConfig = new GameConfig(5);
|
||||
GameConfig gameConfig = new GameConfig(9);
|
||||
for (int round = 0; round < rounds; round++) {
|
||||
results.add(referee.play(gameConfig));
|
||||
}
|
||||
@@ -68,7 +68,7 @@ public class StandAloneGame {
|
||||
case HUMAN:
|
||||
return new HumanKeyboardInput();
|
||||
case ROOT_PAR:
|
||||
return new RootParallelization(3, 4000L);
|
||||
return new RootParallelization(3, 6000L);
|
||||
case UCT_SLOW:
|
||||
return new MonteCarloUCT(new RandomMovePolicy(), 4000L);
|
||||
case UCT_FAST:
|
||||
|
||||
@@ -10,35 +10,14 @@ import net.woodyfolsom.msproj.GameState;
|
||||
import net.woodyfolsom.msproj.Player;
|
||||
|
||||
public class RandomMovePolicy implements Policy, ActionGenerator {
|
||||
private final boolean passWhenLosing = false;
|
||||
//private final boolean passWhenLosing = false;
|
||||
|
||||
/**
|
||||
* Does NOT modify the gameState.
|
||||
*/
|
||||
public Action getAction(GameConfig gameConfig, GameState gameState,
|
||||
Collection<Action> prohibitedMoves, Player player) {
|
||||
|
||||
List<Action> randomActions = getActions(gameConfig, gameState, prohibitedMoves, player, 1);
|
||||
|
||||
//Never randomly generate PASS when losing, because a good opponent will immediately recognize the killer
|
||||
//move of also passing, thereby causing the current player to lose.
|
||||
Action firstAction = randomActions.get(0);
|
||||
|
||||
//Pass when losing enabled? Just return the first random action;
|
||||
if (passWhenLosing) {
|
||||
return firstAction;
|
||||
}
|
||||
|
||||
//But if passing is the only valid move, pass even if losing.
|
||||
if (firstAction.isPass() && !gameState.getResult().isWinner(player)) {
|
||||
if (randomActions.size() > 1) {
|
||||
return randomActions.get(1);
|
||||
} else {
|
||||
return firstAction;
|
||||
}
|
||||
} else {
|
||||
return firstAction;
|
||||
}
|
||||
return getActions(gameConfig, gameState, prohibitedMoves, player, 1).get(0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -72,17 +72,22 @@ public class RootParallelization implements Policy {
|
||||
}
|
||||
|
||||
double bestValue = 0.0;
|
||||
int bestWins = 0;
|
||||
int totalRollouts = 0;
|
||||
int bestWins = 0;
|
||||
int nValidActions = totalReward.keySet().size();
|
||||
|
||||
for (Action action : totalReward.keySet())
|
||||
{
|
||||
if (action.isPass() && !gameState.getResult().isWinner(player) && nValidActions > 1) {
|
||||
continue; //If the best rated action is PASS and I'm not winning and there are other valid actions,
|
||||
//keep searching.
|
||||
}
|
||||
int totalWins = totalReward.get(action);
|
||||
int totalSims = numSims.get(action);
|
||||
|
||||
totalRollouts += totalSims;
|
||||
|
||||
double value = ((double)totalWins) / totalSims;
|
||||
double value = ((double)totalWins) / ((double)totalSims);
|
||||
|
||||
if (bestAction.isNone() || bestValue < value) {
|
||||
bestAction = action;
|
||||
@@ -96,8 +101,8 @@ public class RootParallelization implements Policy {
|
||||
+ player
|
||||
+ " with simulated win ratio of "
|
||||
+ (bestValue * 100.0 + "% among " + numTrees + " parallel simulations."));
|
||||
System.out.println("It had a value of "
|
||||
+ bestValue + " out of "
|
||||
System.out.println("It won "
|
||||
+ bestWins + " out of "
|
||||
+ totalRollouts + " rollouts among "
|
||||
+ " valid actions from the current state.");
|
||||
|
||||
|
||||
Reference in New Issue
Block a user