Fixed bug in GameResult reporting (toString). RootParallelization policy now explicitly excludes passing when losing unless it is the only winning move.
GoGame beat me on 9x9 playing as white! (by 1/2 pt using Chinese scoring).
This commit is contained in:
@@ -91,7 +91,7 @@ public class GameResult {
|
|||||||
double blackScore = getBlackScore();
|
double blackScore = getBlackScore();
|
||||||
double whiteScore = getWhiteScore();
|
double whiteScore = getWhiteScore();
|
||||||
if (blackScore > whiteScore) {
|
if (blackScore > whiteScore) {
|
||||||
return "B+" + (whiteScore - blackScore);
|
return "B+" + (blackScore - whiteScore);
|
||||||
} else if (whiteScore > blackScore) {
|
} else if (whiteScore > blackScore) {
|
||||||
return "W+" + (whiteScore - blackScore);
|
return "W+" + (whiteScore - blackScore);
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ public class StandAloneGame {
|
|||||||
referee.setPolicy(Player.WHITE, player2);
|
referee.setPolicy(Player.WHITE, player2);
|
||||||
|
|
||||||
List<GameResult> results = new ArrayList<GameResult>();
|
List<GameResult> results = new ArrayList<GameResult>();
|
||||||
GameConfig gameConfig = new GameConfig(5);
|
GameConfig gameConfig = new GameConfig(9);
|
||||||
for (int round = 0; round < rounds; round++) {
|
for (int round = 0; round < rounds; round++) {
|
||||||
results.add(referee.play(gameConfig));
|
results.add(referee.play(gameConfig));
|
||||||
}
|
}
|
||||||
@@ -68,7 +68,7 @@ public class StandAloneGame {
|
|||||||
case HUMAN:
|
case HUMAN:
|
||||||
return new HumanKeyboardInput();
|
return new HumanKeyboardInput();
|
||||||
case ROOT_PAR:
|
case ROOT_PAR:
|
||||||
return new RootParallelization(3, 4000L);
|
return new RootParallelization(3, 6000L);
|
||||||
case UCT_SLOW:
|
case UCT_SLOW:
|
||||||
return new MonteCarloUCT(new RandomMovePolicy(), 4000L);
|
return new MonteCarloUCT(new RandomMovePolicy(), 4000L);
|
||||||
case UCT_FAST:
|
case UCT_FAST:
|
||||||
|
|||||||
@@ -10,35 +10,14 @@ import net.woodyfolsom.msproj.GameState;
|
|||||||
import net.woodyfolsom.msproj.Player;
|
import net.woodyfolsom.msproj.Player;
|
||||||
|
|
||||||
public class RandomMovePolicy implements Policy, ActionGenerator {
|
public class RandomMovePolicy implements Policy, ActionGenerator {
|
||||||
private final boolean passWhenLosing = false;
|
//private final boolean passWhenLosing = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Does NOT modify the gameState.
|
* Does NOT modify the gameState.
|
||||||
*/
|
*/
|
||||||
public Action getAction(GameConfig gameConfig, GameState gameState,
|
public Action getAction(GameConfig gameConfig, GameState gameState,
|
||||||
Collection<Action> prohibitedMoves, Player player) {
|
Collection<Action> prohibitedMoves, Player player) {
|
||||||
|
return getActions(gameConfig, gameState, prohibitedMoves, player, 1).get(0);
|
||||||
List<Action> randomActions = getActions(gameConfig, gameState, prohibitedMoves, player, 1);
|
|
||||||
|
|
||||||
//Never randomly generate PASS when losing, because a good opponent will immediately recognize the killer
|
|
||||||
//move of also passing, thereby causing the current player to lose.
|
|
||||||
Action firstAction = randomActions.get(0);
|
|
||||||
|
|
||||||
//Pass when losing enabled? Just return the first random action;
|
|
||||||
if (passWhenLosing) {
|
|
||||||
return firstAction;
|
|
||||||
}
|
|
||||||
|
|
||||||
//But if passing is the only valid move, pass even if losing.
|
|
||||||
if (firstAction.isPass() && !gameState.getResult().isWinner(player)) {
|
|
||||||
if (randomActions.size() > 1) {
|
|
||||||
return randomActions.get(1);
|
|
||||||
} else {
|
|
||||||
return firstAction;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return firstAction;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -72,17 +72,22 @@ public class RootParallelization implements Policy {
|
|||||||
}
|
}
|
||||||
|
|
||||||
double bestValue = 0.0;
|
double bestValue = 0.0;
|
||||||
int bestWins = 0;
|
|
||||||
int totalRollouts = 0;
|
int totalRollouts = 0;
|
||||||
|
int bestWins = 0;
|
||||||
|
int nValidActions = totalReward.keySet().size();
|
||||||
|
|
||||||
for (Action action : totalReward.keySet())
|
for (Action action : totalReward.keySet())
|
||||||
{
|
{
|
||||||
|
if (action.isPass() && !gameState.getResult().isWinner(player) && nValidActions > 1) {
|
||||||
|
continue; //If the best rated action is PASS and I'm not winning and there are other valid actions,
|
||||||
|
//keep searching.
|
||||||
|
}
|
||||||
int totalWins = totalReward.get(action);
|
int totalWins = totalReward.get(action);
|
||||||
int totalSims = numSims.get(action);
|
int totalSims = numSims.get(action);
|
||||||
|
|
||||||
totalRollouts += totalSims;
|
totalRollouts += totalSims;
|
||||||
|
|
||||||
double value = ((double)totalWins) / totalSims;
|
double value = ((double)totalWins) / ((double)totalSims);
|
||||||
|
|
||||||
if (bestAction.isNone() || bestValue < value) {
|
if (bestAction.isNone() || bestValue < value) {
|
||||||
bestAction = action;
|
bestAction = action;
|
||||||
@@ -96,8 +101,8 @@ public class RootParallelization implements Policy {
|
|||||||
+ player
|
+ player
|
||||||
+ " with simulated win ratio of "
|
+ " with simulated win ratio of "
|
||||||
+ (bestValue * 100.0 + "% among " + numTrees + " parallel simulations."));
|
+ (bestValue * 100.0 + "% among " + numTrees + " parallel simulations."));
|
||||||
System.out.println("It had a value of "
|
System.out.println("It won "
|
||||||
+ bestValue + " out of "
|
+ bestWins + " out of "
|
||||||
+ totalRollouts + " rollouts among "
|
+ totalRollouts + " rollouts among "
|
||||||
+ " valid actions from the current state.");
|
+ " valid actions from the current state.");
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user