Root_par beats GoFree!

This commit is contained in:
2012-11-18 18:44:09 -05:00
parent aca8320600
commit 270072006c
22 changed files with 381 additions and 126 deletions

View File

@@ -0,0 +1 @@
(;FF[4]GM[1]SZ[9]KM[5.5]RE[W+13.5];B[de];W[eb];B[gf];W[db];B[dc];W[hf];B[cc];W[dh];B[bf];W[he];B[ge];W[ea];B[gd];W[ca];B[bb];W[fg];B[eg];W[gc];B[gb];W[hc];B[hd];W[fh];B[gh];W[eh];B[ef];W[ed];B[fe];W[dg];B[cg];W[gg];B[df];W[id];B[fd];W[fc];B[fa];W[hg];B[ih];W[ic];B[hb];W[ec];B[fb];W[ie];B[gi];W[dd];B[cd];W[cb];B[ba];W[di];B[ch];W[ib];B[ci];W[fi];B[hh];W[ab];B[bc];W[ha];B[ig];W[bd];B[be];W[ga];B[ee];W[ii];B[ff];W[if];B[hi];W[ii];B[hi];W[ih];B[];W[hh];B[];W[aa];B[ac];W[ab];B[];W[gb];B[gh];W[fb];B[];W[ad];B[aa];W[bh];B[ah];W[af];B[ag];W[];B[bg];W[];B[])

View File

@@ -0,0 +1 @@
(;FF[4]GM[1]SZ[9]KM[5.5]RE[W+7.5];B[bg];W[gc];B[ba];W[ic];B[gd];W[ce];B[cf];W[eb];B[gi];W[ci];B[ab];W[bf];B[eh];W[cc];B[be];W[ca];B[ag];W[fd];B[de];W[ig];B[gb];W[fc];B[eg];W[dc];B[id];W[ee];B[bb];W[gf];B[ed];W[hc];B[ei];W[he];B[ia];W[gg];B[dh];W[hd];B[bi];W[ge];B[da];W[cb];B[db];W[ec];B[ga];W[bh];B[cd];W[hh];B[af];W[hb];B[ai];W[];B[ha];W[ad];B[ih];W[cg];B[hi];W[gh];B[ah];W[fi];B[fh];W[if];B[hg];W[hf];B[dg];W[ie];B[ch];W[];B[bc];W[fe];B[bd];W[ff];B[ef];W[fa];B[di];W[dd];B[fg];W[ea];B[db];W[da];B[ae];W[];B[ib];W[fb];B[ac];W[ha];B[ia];W[ga];B[];W[])

View File

@@ -0,0 +1 @@
(;FF[4]GM[1]SZ[9]KM[5.5]RE[W+11.5];B[cg];W[cf];B[eh];W[gh];B[bc];W[af];B[ge];W[fh];B[ci];W[ed];B[de];W[bb];B[ib];W[ab];B[ad];W[ag];B[db];W[bd];B[hg];W[gd];B[bg];W[ea];B[ha];W[ch];B[gg];W[cc];B[ac];W[dd];B[gc];W[ah];B[fe];W[bh];B[ei];W[hd];B[ba];W[gb];B[be];W[dg];B[gi];W[fg];B[id];W[];B[eb];W[fb];B[ca];W[ae];B[eg];W[hc];B[cd];W[ce];B[hf];W[fd];B[gf];W[ie];B[bc];W[ac];B[bi];W[ga];B[ff];W[he];B[fi];W[fc];B[ig];W[df];B[aa];W[ai];B[ii];W[ic];B[ef];W[dc];B[hh];W[da];B[fg];W[dh];B[fh];W[if];B[ee];W[ec];B[hb];W[];B[di];W[cb];B[ba];W[eb];B[ca];W[];B[])

View File

@@ -0,0 +1 @@
(;FF[4]GM[1]SZ[9]KM[5.5]RE[W+6.5];B[hg];W[ef];B[bg];W[ih];B[eh];W[hi];B[gi];W[dg];B[fd];W[gg];B[ic];W[cf];B[fe];W[da];B[ie];W[cd];B[be];W[ei];B[bb];W[df];B[ae];W[gc];B[hd];W[ci];B[cc];W[fg];B[ig];W[gd];B[gh];W[ce];B[gb];W[dd];B[bi];W[fc];B[ia];W[ec];B[ai];W[dc];B[if];W[ea];B[ab];W[hc];B[bh];W[fh];B[ff];W[fa];B[he];W[dh];B[ha];W[eg];B[bf];W[];B[cb];W[ad];B[bd];W[db];B[af];W[ee];B[ga];W[fi];B[hh];W[hb];B[ed];W[hf];B[cg];W[gf];B[ba];W[ah];B[ib];W[eb];B[bc];W[];B[fb];W[id];B[gb];W[ha];B[fb];W[ic];B[ca];W[ch];B[ia];W[];B[ac];W[ga];B[ag];W[fb];B[ii];W[];B[ge];W[];B[])

View File

@@ -0,0 +1 @@
(;FF[4]GM[1]SZ[9]KM[5.5]RE[B+27.5];B[ci];W[cf];B[de];W[hh];B[ec];W[hg];B[ga];W[ie];B[dh];W[da];B[df];W[ig];B[fd];W[ha];B[dd];W[bi];B[ff];W[ah];B[dc];W[ib];B[ef];W[ad];B[ce];W[eg];B[ab];W[di];B[fh];W[bd];B[eh];W[bg];B[gd];W[gi];B[he];W[hb];B[fg];W[fc];B[bf];W[ca];B[bb];W[cb];B[be];W[cc];B[id];W[ee];B[ea];W[ch];B[ae];W[gc];B[eb];W[gg];B[hd];W[ih];B[fi];W[fa];B[ba];W[af];B[ic];W[cg];B[fb];W[ei];B[dg];W[fe];B[if];W[ii];B[ed];W[hc];B[gf];W[db];B[ci];W[bc];B[gh];W[ac];B[hi];W[gb];B[hf];W[ih];B[ag];W[ei];B[bh];W[fa];B[ai];W[bg];B[cf];W[hg];B[];W[ig];B[ga];W[ii];B[];W[aa];B[ba];W[cd];B[bb];W[ab];B[ba];W[cg];B[bb];W[gg];B[hh];W[ih];B[];W[ig];B[];W[ii];B[];W[cd];B[];W[bc];B[gg];W[ca];B[];W[ab];B[];W[db];B[da];W[aa];B[cc];W[ac];B[bd];W[fa];B[ad];W[ac];B[ch];W[bg];B[cg];W[ab];B[cb];W[aa];B[ga];W[];B[])

View File

@@ -0,0 +1 @@
(;FF[4]GM[1]SZ[9]KM[5.5]RE[B+2.5];B[di];W[bd];B[fa];W[gd];B[fe];W[aa];B[ch];W[ae];B[eb];W[db];B[dh];W[fc];B[ec];W[ee];B[cd];W[ah];B[df];W[gb];B[ic];W[ff];B[af];W[ed];B[ii];W[ib];B[ac];W[ga];B[ia];W[eh];B[dg];W[ha];B[dc];W[gh];B[ig];W[ba];B[ef];W[be];B[bh];W[gc];B[de];W[cg];B[fh];W[bf];B[dd];W[gg];B[if];W[ce];B[fd];W[he];B[bg];W[bb];B[ed];W[fb];B[cb];W[hf];B[ea];W[cf];B[bc];W[ie];B[ei];W[];B[hd];W[gi];B[ai];W[hg];B[hh];W[hb];B[fi];W[ge];B[ca];W[fg];B[hc];W[ih];B[da];W[ig];B[ad];W[ci];B[bd];W[hi];B[be];W[id];B[hc];W[eg];B[ab];W[cf];B[ba];W[ic];B[bi];W[ce];B[ag];W[hd];B[cg];W[];B[])

View File

@@ -0,0 +1 @@
(;FF[4]GM[1]SZ[9]KM[5.5]RE[B+11.5];B[ch];W[id];B[gh];W[hf];B[hc];W[ei];B[fg];W[ec];B[bi];W[gi];B[fb];W[bb];B[ih];W[af];B[di];W[ah];B[ha];W[gd];B[ge];W[cg];B[db];W[df];B[dc];W[gb];B[bf];W[hh];B[hg];W[ce];B[be];W[ag];B[fc];W[fa];B[dg];W[ae];B[gc];W[bc];B[if];W[ga];B[gf];W[ba];B[ff];W[ai];B[ee];W[hb];B[cd];W[cc];B[bg];W[ca];B[ed];W[fi];B[cf];W[bd];B[da];W[hd];B[de];W[eg];B[eb];W[ac];B[bh];W[hi];B[ic];W[ea];B[ad];W[af];B[ai];W[fe];B[ia];W[ef];B[ie];W[ab];B[dh];W[eh];B[ah];W[ag];B[fd];W[cb];B[];W[ii];B[fh];W[ii];B[fi];W[gi];B[];W[eg];B[hh];W[ib];B[ha];W[df];B[];W[ef];B[];W[ae];B[ad];W[ia];B[ha];W[ea];B[hb];W[ag];B[];W[ei];B[fa];W[ae];B[];W[ga];B[];W[ib];B[];W[af];B[];W[])

View File

@@ -0,0 +1,9 @@
Cumulative results for 3 games (BLACK=RANDOM, WHITE=ROOT_PAR)
1. W+7.5
2. W+11.5
3. W+6.5
Cumulative results for 3 games (BLACK=ROOT_PAR, WHITE=RANDOM)
1. B+27.5
2. B+2.5
3. B+11.5
Elapsed Time: 300.899 seconds.

BIN
data/networks/Pass.nn Normal file

Binary file not shown.

50
data/test/ScoreTest.sgf Normal file
View File

@@ -0,0 +1,50 @@
(;SZ[9]CA[UTF-8]FF[4]GM[1]KM[6]PB[Player1]RE[W+5]PW[Player2]
;B[ga]
;W[fa]
;B[fb]
;W[ea]
;B[fc]
;W[eb]
;B[ec]
;W[db]
;B[dc]
;W[cc]
;B[ed]
;W[dd]
;B[ee]
;W[ce]
;B[de]
;W[cd]
;B[ef]
;W[bd]
;B[be]
;W[ae]
;B[bf]
;W[ad]
;B[af]
;W[bb]
;B[cf]
;W[ic]
;B[ib]
;W[id]
;B[hb]
;W[ie]
;B[hc]
;W[he]
;B[hd]
;W[hf]
;B[ge]
;W[ff]
;B[fe]
;W[gg]
;B[eg]
;W[fh]
;B[eh]
;W[hh]
;B[di]
;W[ei]
;B[ch]
;W[fi]
;B[]
;W[]
)

View File

@@ -1 +0,0 @@
(;FF[4]GM[1]SZ[5]KM[3.5]RE[W+11.5];B[ad];W[bd];B[be];W[ed];B[dd];W[ba];B[cd];W[ca];B[ee];W[cb];B[];W[dc];B[db];W[ac];B[];W[ec];B[ae];W[bc];B[ce];W[cc];B[aa];W[de];B[be];W[];B[ce];W[ad];B[dd];W[];B[ea];W[eb];B[ae];W[];B[ee];W[da];B[bb];W[de];B[];W[])

View File

@@ -124,11 +124,13 @@ public class GameState {
}
/**
* Used for setting up the board. Places the player's stone at the specified coordinates.
* Used for setting up the board. Places the player's stone at the specified
* coordinates.
*
* Returns false if the requested intersection is occupied or the resulting position is illegal.
* Returns false if the requested action is PASS, RESIGN or NONE.
* Returns false if the moveHistory's size is already >0 (method should only be used to set up board).
* Returns false if the requested intersection is occupied or the resulting
* position is illegal. Returns false if the requested action is PASS,
* RESIGN or NONE. Returns false if the moveHistory's size is already >0
* (method should only be used to set up board).
*
* Does NOT advance the playerToMove or add the action to the move history.
*
@@ -149,7 +151,7 @@ public class GameState {
playerToMove = player;
boolean validMove = playStone(player,action);
boolean validMove = playStone(player, action);
moveHistory.clear();
playerToMove = actualPTM;

View File

@@ -12,6 +12,7 @@ import java.util.List;
import net.woodyfolsom.msproj.gui.Goban;
import net.woodyfolsom.msproj.policy.HumanGuiInput;
import net.woodyfolsom.msproj.policy.HumanKeyboardInput;
import net.woodyfolsom.msproj.policy.MonteCarloAMAF;
import net.woodyfolsom.msproj.policy.MonteCarloUCT;
import net.woodyfolsom.msproj.policy.Policy;
import net.woodyfolsom.msproj.policy.RandomMovePolicy;
@@ -25,7 +26,7 @@ public class StandAloneGame {
private static final int DEFAULT_SIZE = 9;
enum PLAYER_TYPE {
HUMAN, HUMAN_GUI, ROOT_PAR, UCT, RANDOM
HUMAN, HUMAN_GUI, ROOT_PAR, UCT, RANDOM, RAVE
};
public static void main(String[] args) {
@@ -75,6 +76,8 @@ public class StandAloneGame {
return PLAYER_TYPE.HUMAN_GUI;
} else if ("RANDOM".equalsIgnoreCase(playerTypeStr)) {
return PLAYER_TYPE.RANDOM;
} else if ("RAVE".equalsIgnoreCase(playerTypeStr)) {
return PLAYER_TYPE.RAVE;
} else {
throw new RuntimeException("Unknown player type: " + playerTypeStr);
}
@@ -175,6 +178,8 @@ public class StandAloneGame {
turnLength * 1000L);
case RANDOM:
return new RandomMovePolicy();
case RAVE:
return new MonteCarloAMAF(new RandomMovePolicy(), turnLength * 1000L);
default:
throw new IllegalArgumentException("Invalid PLAYER_TYPE: "
+ playerType);

View File

@@ -13,7 +13,7 @@ import net.woodyfolsom.msproj.tree.GameTreeNode;
import net.woodyfolsom.msproj.tree.MonteCarloProperties;
public abstract class MonteCarlo implements Policy {
protected static final int ROLLOUT_DEPTH_LIMIT = 100;
protected static final int ROLLOUT_DEPTH_LIMIT = 150;
protected int numStateEvaluations = 0;
protected Policy movePolicy;
@@ -36,6 +36,11 @@ public abstract class MonteCarlo implements Policy {
public abstract List<GameTreeNode<MonteCarloProperties>> descend(
GameTreeNode<MonteCarloProperties> node);
protected GameTreeNode<MonteCarloProperties> createRootNode(GameState rootGameState) {
return new GameTreeNode<MonteCarloProperties>(
rootGameState, new MonteCarloProperties());
}
private GameTreeNode<MonteCarloProperties> buildTree(GameConfig gameConfig, GameState gameState, Player player) {
//System.out.println(player + " is thinking for up to "
// + (searchTimeLimit / 1000.0) + " seconds...");
@@ -47,8 +52,7 @@ public abstract class MonteCarlo implements Policy {
+ gameState.getPlayerToMove());
}
GameTreeNode<MonteCarloProperties> rootNode = new GameTreeNode<MonteCarloProperties>(
gameState, new MonteCarloProperties());
GameTreeNode<MonteCarloProperties> rootNode = createRootNode(gameState);
do {
@@ -67,8 +71,8 @@ public abstract class MonteCarlo implements Policy {
}
for (GameTreeNode<MonteCarloProperties> newLeaf : newLeaves) {
int reward = rollout(gameConfig, newLeaf, player);
update(newLeaf, reward);
Rollout rollout = rollout(gameConfig, newLeaf, player);
update(newLeaf, rollout);
}
elapsedTime = System.currentTimeMillis() - startTime;
@@ -103,11 +107,11 @@ public abstract class MonteCarlo implements Policy {
GameConfig gameConfig, GameTreeNode<MonteCarloProperties> node,
Player player);
public abstract int rollout(GameConfig gameConfig,
public abstract Rollout rollout(GameConfig gameConfig,
GameTreeNode<MonteCarloProperties> node, Player player);
public abstract void update(GameTreeNode<MonteCarloProperties> node,
int reward);
Rollout rollout);
public long getSearchTimeLimit() {
return searchTimeLimit;

View File

@@ -1,5 +1,14 @@
package net.woodyfolsom.msproj.policy;
import java.util.ArrayList;
import java.util.List;
import net.woodyfolsom.msproj.Action;
import net.woodyfolsom.msproj.GameState;
import net.woodyfolsom.msproj.Player;
import net.woodyfolsom.msproj.tree.AMAFProperties;
import net.woodyfolsom.msproj.tree.GameTreeNode;
import net.woodyfolsom.msproj.tree.MonteCarloProperties;
public class MonteCarloAMAF extends MonteCarloUCT {
@@ -7,4 +16,91 @@ public class MonteCarloAMAF extends MonteCarloUCT {
super(movePolicy, searchTimeLimit);
}
@Override
public void update(GameTreeNode<MonteCarloProperties> node, Rollout rollout) {
GameTreeNode<MonteCarloProperties> currentNode = node;
//List<Action> subTreeActions = new ArrayList<Action>(rollout.getPlayout());
List<Action> playout = rollout.getPlayout();
int reward = rollout.getReward();
while (currentNode != null) {
AMAFProperties nodeProperties = (AMAFProperties)currentNode.getProperties();
//Always update props for the current node
nodeProperties.setWins(nodeProperties.getWins() + reward);
nodeProperties.setVisits(nodeProperties.getVisits() + 1);
nodeProperties.setAmafWins(nodeProperties.getAmafWins() + reward);
nodeProperties.setAmafVisits(nodeProperties.getAmafVisits() + 1);
GameTreeNode<MonteCarloProperties> parentNode = currentNode.getParent();
if (parentNode != null) {
Player playerToMove = parentNode.getGameState().getPlayerToMove();
for (Action actionFromParent : parentNode.getActions()) {
if (playout.contains(actionFromParent)) {
GameTreeNode<MonteCarloProperties> subTreeChild = parentNode.getChild(actionFromParent);
//Don't count AMAF properties for the current node twice
if (subTreeChild == currentNode) {
continue;
}
AMAFProperties siblingProperties = (AMAFProperties)subTreeChild.getProperties();
//Only update AMAF properties if the sibling is reached by the same action with the same player to move
if (rollout.hasPlay(playerToMove,actionFromParent)) {
siblingProperties.setAmafWins(siblingProperties.getAmafWins() + reward);
siblingProperties.setAmafVisits(siblingProperties.getAmafVisits() + 1);
}
}
}
}
currentNode = currentNode.getParent();
}
}
@Override
protected GameTreeNode<MonteCarloProperties> createRootNode(GameState rootGameState) {
return new GameTreeNode<MonteCarloProperties>(
rootGameState, new AMAFProperties());
}
@Override
protected double getNodeScore(GameTreeNode<MonteCarloProperties> gameTreeNode) {
//double nodeVisits = gameTreeNode.getParent().getProperties().getVisits();
double parentAmafVisits = ((AMAFProperties)gameTreeNode.getParent().getProperties()).getAmafVisits();
double nodeScore;
if (gameTreeNode.getGameState().isTerminal()) {
nodeScore = 0.0;
} else {
/*
MonteCarloProperties properties = gameTreeNode.getProperties();
nodeScore = (double) (properties.getWins() / properties
.getVisits())
+ (TUNING_CONSTANT * Math.sqrt(Math.log(nodeVisits)
/ gameTreeNode.getProperties().getVisits()));
*
*/
AMAFProperties properties = (AMAFProperties) gameTreeNode.getProperties();
nodeScore = (double) (properties.getAmafWins() / properties
.getAmafVisits())
+ (TUNING_CONSTANT * Math.sqrt(Math.log(parentAmafVisits)
/ properties.getAmafVisits()));
}
return nodeScore;
}
@Override
protected List<GameTreeNode<MonteCarloProperties>> addNewChildren(
GameTreeNode<MonteCarloProperties> node, Action action,
GameState successorState) {
List<GameTreeNode<MonteCarloProperties>> newChildren = new ArrayList<GameTreeNode<MonteCarloProperties>>();
GameTreeNode<MonteCarloProperties> newChild = new GameTreeNode<MonteCarloProperties>(
successorState, new AMAFProperties());
newChildren.add(newChild);
node.addChild(action, newChild);
return newChildren;
}
}

View File

@@ -31,7 +31,6 @@ public class MonteCarloUCT extends MonteCarlo {
// From Kocsis and Szepesvari, the value of an actual terminal node is
// 0, so it will never be grown.
double nodeVisits = node.getProperties().getVisits();
Set<Action> actionsExplored = node.getActions();
GameState gameState = node.getGameState();
@@ -46,16 +45,7 @@ public class MonteCarloUCT extends MonteCarlo {
GameTreeNode<MonteCarloProperties> childNode = node
.getChild(action);
double childScore;
if (childNode.getGameState().isTerminal()) {
childScore = 0.0;
} else {
MonteCarloProperties properties = childNode.getProperties();
childScore = (double) (properties.getWins() / properties
.getVisits())
+ (TUNING_CONSTANT * Math.sqrt(Math.log(nodeVisits)
/ childNode.getProperties().getVisits()));
}
double childScore = getNodeScore(childNode);
// TODO add random tie breaker?
// otherwise the child that is selected first will be biased
if (childScore >= bestScore) {
@@ -74,34 +64,37 @@ public class MonteCarloUCT extends MonteCarlo {
}
}
protected double getNodeScore(GameTreeNode<MonteCarloProperties> gameTreeNode) {
double nodeScore;
double parentVisits = gameTreeNode.getParent().getProperties().getVisits();
if (gameTreeNode.getGameState().isTerminal()) {
nodeScore = 0.0;
} else {
MonteCarloProperties properties = gameTreeNode.getProperties();
nodeScore = (double) (properties.getWins() / properties
.getVisits())
+ (TUNING_CONSTANT * Math.sqrt(Math.log(parentVisits)
/ gameTreeNode.getProperties().getVisits()));
}
return nodeScore;
}
@Override
public Action getBestAction(GameTreeNode<MonteCarloProperties> node) {
Action bestAction = Action.NONE;
double bestScore = Double.NEGATIVE_INFINITY;
GameTreeNode<MonteCarloProperties> bestChild = null;
//int nActions = node.getNumChildren();
//GameState rootGameState = node.getGameState();
//boolean playerToMoveIsWinning = rootGameState.getResult().isWinner(rootGameState.getPlayerToMove());
//playerToMove is winning or only one move (PASS) is available
//boolean allowPass = playerToMoveIsWinning || nActions == 1;
for (Action action : node.getActions()) {
///HEURISTIC - work on ways of removing this go-specific logic /////
//If action is PASS and the play who moved is not the winner while other moves are available, don't pass
//i.e. don't pass when losing
//if (action.isPass() && !allowPass) {
// continue; //If the best rated action is PASS and I'm not winning and there are other valid actions,
// //keep searching.
//}
////////////////////////////////////////////////////////////////////
GameTreeNode<MonteCarloProperties> childNode = node
.getChild(action);
MonteCarloProperties properties = childNode.getProperties();
double childScore = (double) properties.getWins()
/ properties.getVisits();
//MonteCarloProperties properties = childNode.getProperties();
//double childScore = (double) properties.getWins()
// / properties.getVisits();
double childScore = getNodeScore(childNode);
if (childScore >= bestScore) {
bestScore = childScore;
@@ -130,25 +123,42 @@ public class MonteCarloUCT extends MonteCarlo {
@Override
public List<GameTreeNode<MonteCarloProperties>> grow(GameConfig gameConfig,
GameTreeNode<MonteCarloProperties> node, Player player) {
Policy randomMovePolicy = new RandomMovePolicy();
Set<Action> exploredActions = node.getActions();
Action action = randomMovePolicy.getAction(gameConfig,
node.getGameState(), exploredActions, player);
if (exploredActions.contains(action)) {
throw new RuntimeException("Bad action selection at this state: not a NEW leaf node for this Monte Carlo tree.");
}
if (Action.NONE == action) {
throw new RuntimeException(
"Unable to grow node - are all actions already explored? Board state: "
+ node.getGameState() + "\nExplored actions: "
+ exploredActions);
}
GameState nextGameState = new GameState(node.getGameState());
nextGameState.playStone(player, action);
//In principle, more than 1 new child could be generated from a call to grow. However,
//this algorithm only generates one. This interface is mainly for compatibility with Naive Monte Carlo.
return addNewChildren(node, action, nextGameState);
}
protected List<GameTreeNode<MonteCarloProperties>> addNewChildren(GameTreeNode<MonteCarloProperties> node, Action action, GameState successorState) {
List<GameTreeNode<MonteCarloProperties>> newChildren = new ArrayList<GameTreeNode<MonteCarloProperties>>();
GameTreeNode<MonteCarloProperties> newChild = new GameTreeNode<MonteCarloProperties>(
nextGameState, new MonteCarloProperties());
successorState, new MonteCarloProperties());
newChildren.add(newChild);
node.addChild(action, newChild);
return newChildren;
}
@@ -157,41 +167,47 @@ public class MonteCarloUCT extends MonteCarlo {
* Rollout currently depends on the hardcoded ROLLOUT_DEPTH_LIMIT superclass parameter,
* Even with super-ko detection, a rollout might take an unrealistically long time due to unlikely playouts.
*/
public int rollout(GameConfig gameConfig,
public Rollout rollout(GameConfig gameConfig,
GameTreeNode<MonteCarloProperties> node, Player player) {
Policy randomMovePolicy = new RandomMovePolicy();
Action action;
Action randomAction;
int rolloutDepth = 0;
GameState rolloutGameState = new GameState(node.getGameState());
GameState initialGameState = node.getGameState();
GameState rolloutGameState = new GameState(initialGameState);
Player currentPlayer = rolloutGameState.getPlayerToMove();
List<Action> rolloutActions = new ArrayList<Action>();
do {
rolloutDepth++;
action = randomMovePolicy.getAction(gameConfig, rolloutGameState,
randomAction = randomMovePolicy.getAction(gameConfig, rolloutGameState,
currentPlayer);
if (action != Action.NONE) {
if (!rolloutGameState.playStone(currentPlayer, action)) {
if (randomAction != Action.NONE) {
if (!rolloutGameState.playStone(currentPlayer, randomAction)) {
throw new RuntimeException(
"Failed to play move selected by RandomMovePolicy");
}
rolloutActions.add(randomAction);
currentPlayer = GoGame.getNextPlayer(currentPlayer);
}
} while (action != Action.NONE && rolloutDepth < ROLLOUT_DEPTH_LIMIT);
} while (randomAction != Action.NONE && rolloutDepth < ROLLOUT_DEPTH_LIMIT);
numStateEvaluations++;
GameResult gameScore = rolloutGameState.getResult();
if (gameScore.isWinner(player)) {
return 1;
return new Rollout(initialGameState,rolloutActions,1);
} else {
return 0;
return new Rollout(initialGameState,rolloutActions,0);
}
}
@Override
public void update(GameTreeNode<MonteCarloProperties> node, int reward) {
public void update(GameTreeNode<MonteCarloProperties> node, Rollout rollout) {
GameTreeNode<MonteCarloProperties> currentNode = node;
int reward = rollout.getReward();
while (currentNode != null) {
MonteCarloProperties nodeProperties = currentNode.getProperties();
nodeProperties.setWins(nodeProperties.getWins() + reward);

View File

@@ -34,6 +34,13 @@ public class RandomMovePolicy implements Policy, ActionGenerator {
*/
public List<Action> getActions(GameConfig gameConfig, GameState gameState,
Collection<Action> prohibitedMoves, Player player, int nMoves) {
List<Action> randomActions = new ArrayList<Action>();
/*if (gameState.isTerminal()) {
randomActions.add(Action.NONE);
return randomActions;
}*/
if (player != gameState.getPlayerToMove()) {
throw new IllegalArgumentException("It is not " + player
+ "'s turn to move!");
@@ -45,7 +52,6 @@ public class RandomMovePolicy implements Policy, ActionGenerator {
List<Action> possibleActions = actionGenerator.getActions(gameConfig,
gameStateCopy, prohibitedMoves, player,
ActionGenerator.ALL_ACTIONS);
List<Action> randomActions = new ArrayList<Action>();
//
boolean playerIsWinning = gameState.getResult().isWinner(player);
@@ -60,13 +66,20 @@ public class RandomMovePolicy implements Policy, ActionGenerator {
}
}
//if (randomActions.size() == 0) {
// randomActions.add(Action.NONE);
//}
//PASS is always the move of last resort if no valid moves exist
//Action.NONE exists for a reason - if the fail-safe was to ALWAYS return PASS, then MCTS would
//fail to descend properly because the root node would always appear to have additional unexplored actions.
if (randomActions.size() == 0) {
randomActions.add(Action.PASS);
if (prohibitedMoves.contains(Action.PASS)) {
randomActions.add(Action.NONE);
} else {
randomActions.add(Action.PASS);
}
}
//when to resign?

View File

@@ -0,0 +1,65 @@
package net.woodyfolsom.msproj.policy;
import java.util.ArrayList;
import java.util.List;
import net.woodyfolsom.msproj.Action;
import net.woodyfolsom.msproj.GameState;
import net.woodyfolsom.msproj.Player;
public class Rollout {
private GameState initialGameState;
private List<Action> blackPlays;
private List<Action> playout;
private List<Action> whitePlays;
private int reward;
public Rollout(GameState initialGameState, List<Action> playout, int reward) {
this.initialGameState = initialGameState;
this.playout = playout;
this.reward = reward;
blackPlays = new ArrayList<Action>();
whitePlays = new ArrayList<Action>();
Player playerToMove = initialGameState.getPlayerToMove();
List<List<Action>> plays = new ArrayList<List<Action>>();
if (playerToMove == Player.BLACK) {
plays.add(blackPlays);
plays.add(whitePlays);
} else if (playerToMove == Player.WHITE) {
plays.add(whitePlays);
plays.add(blackPlays);
} else {
throw new RuntimeException("Invalid player: " + playerToMove);
}
for (int i = 0; i < playout.size(); i++) {
plays.get(i%2).add(playout.get(i));
}
}
public GameState getInitialGameState() {
return initialGameState;
}
public List<Action> getPlayout() {
return playout;
}
public int getReward() {
return reward;
}
public boolean hasPlay(Player player, Action action) {
if (player == Player.BLACK) {
return blackPlays.contains(action);
} else if (player == Player.WHITE) {
return whitePlays.contains(action);
} else {
throw new RuntimeException("Invalid player: " + player);
}
}
}

View File

@@ -76,22 +76,8 @@ public class RootParallelization implements Policy {
int bestWins = 0;
int bestSims = 0;
//int nActions = totalReward.size();
//boolean playerToMoveIsWinning = gameState.getResult().isWinner(player);
//playerToMove is winning or only one move (PASS) is available
//boolean allowPass = playerToMoveIsWinning || nActions == 1;
for (Action action : totalReward.keySet())
{
//HEURISTIC - work on ways of removing this go-specific logic
//
//This heuristic must be duplicated here because RootPar. does not benefit
//from MonteCarloUCT culling PASS just before returning from getAction().
//if (action.isPass() && !allowPass) {
// continue; //If the best rated action is PASS and I'm not winning and there are other valid actions,
// //keep searching.
//}
int totalWins = totalReward.get(action);
int totalSims = numSims.get(action);

View File

@@ -22,9 +22,19 @@ public class ValidMoveGenerator implements ActionGenerator {
public List<Action> getActions(GameConfig gameConfig, GameState gameState,
Collection<Action> prohibitedMoves, Player color, int nMoves) {
List<Action> validMoves = new ArrayList<Action>();
if (gameState.isTerminal()) {
return validMoves;
}
GameState gameStateCopy = new GameState(gameState);
List<String> emptyCoordinates = gameStateCopy.getEmptyCoords();
List<Action> validMoves = new ArrayList<Action>();
//Pass is always valid unless prohibited (or if the state is terminal, as above)
if (!prohibitedMoves.contains(Action.PASS)) {
validMoves.add(Action.PASS);
}
while (emptyCoordinates.size() > 0) {
Action nextMove = Action.getInstance(emptyCoordinates
@@ -38,10 +48,6 @@ public class ValidMoveGenerator implements ActionGenerator {
}
}
if (!prohibitedMoves.contains(Action.PASS)) {
validMoves.add(Action.PASS);
}
return validMoves;
}
}

View File

@@ -0,0 +1,18 @@
package net.woodyfolsom.msproj.tree;
public class AMAFProperties extends MonteCarloProperties {
int amafWins = 0;
int amafVisits = 0;
public int getAmafWins() {
return amafWins;
}
public void setAmafWins(int amafWins) {
this.amafWins = amafWins;
}
public int getAmafVisits() {
return amafVisits;
}
public void setAmafVisits(int amafVisits) {
this.amafVisits = amafVisits;
}
}

View File

@@ -3,38 +3,15 @@ package net.woodyfolsom.msproj;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import net.woodyfolsom.msproj.GameResult;
import net.woodyfolsom.msproj.policy.MonteCarloUCT;
import net.woodyfolsom.msproj.policy.RandomMovePolicy;
import net.woodyfolsom.msproj.policy.RootParallelization;
import net.woodyfolsom.msproj.sgf.SGFLexer;
import net.woodyfolsom.msproj.sgf.SGFNodeCollection;
import net.woodyfolsom.msproj.sgf.SGFParser;
import org.antlr.runtime.ANTLRInputStream;
import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.RecognitionException;
import org.junit.Test;
public class GameScoreTest {
// public static final String endGameSGF =
// "(;FF[4]GM[1]SZ[9]KM[5.5];B[ef];W[ff];B[dg];W[aa];B[fc];W[da];B[cg];W[ei];B[gf]"
// +
// ";W[fi];B[ag];W[ii];B[bi];W[if];B[db];W[ci];B[cf];W[ih];B[bc];W[hb];B[eb];W[fh];B[ig];W[hc];B[be];W[he];B[gc];"
// +
// "W[id];B[cd];W[df];B[hf];W[ah];B[bh];W[fa];B[bg];W[fe];B[ec];W[eh];B[ee];W[bd];B[hg];W[ie];B[fg];W[ca];B[eg];"
// +
// "W[cb];B[ad];W[ba];B[ch];W[dh];B[gd];W[ic];B[ha];W[ab];B[gh];W[gb];B[ed];W[];B[])";
//public static final String endGameSGF = "(;FF[4]GM[1]SZ[9]KM[5.5]RE[W+0.5];B[ef];W[cb];B[fe];W[da];B[cd];W[hh];B[ed];W[cc];B[ci];W[bc];B[cg];W[fi];B[be];W[ea];B[hi];W[df];B[fd];W[bg];B[cf];W[aa];B[gd];W[ch];B[ad];W[dg];B[de];W[ge];B[bh];W[fa];B[ag];W[hd];B[if];W[bi];B[gf];W[bd];B[ah];W[gc];B[ff];W[ca];B[hf];W[dd];B[ce];W[ae];B[ga];W[hc];B[ac];W[gg];B[fg];W[fb];B[ie];W[dh];B[af];W[ec];B[dc];W[id];B[dd];W[eh];B[eb];W[gb];B[ae];W[ic];B[di];W[fh];B[ig];W[ab];B[ha];W[hg];B[hb];W[gi];B[ii];W[ia];B[fc];W[ba];B[eg];W[];B[db];W[];B[])";
public static final String endGameSGF = "(;FF[4]GM[1]SZ[6]KM[1.5]RE[B+0.5];B[bb];W[];B[ec];W[ef];B[ac];W[ed];B[ba];W[dc];B[cf];W[];B[])";
@Test
public void testGetAggregateScoreZero() {
GameResult gameScore = new GameResult(0, 0, 19, 0, true);
@@ -56,21 +33,23 @@ public class GameScoreTest {
@Test
public void testScoreEndGame() throws IOException, RecognitionException {
InputStream is = new ByteArrayInputStream(endGameSGF.getBytes());
GameRecord gameRecord = Referee.replay(is);
assertEquals(11, gameRecord.getNumTurns());
//test case from:
//http://www.online-go.com/faq.php?name=rules
GameRecord gameRecord = Referee.replay(new FileInputStream(new File("data/test/ScoreTest.sgf")));
GameState gameState = gameRecord.getGameState(gameRecord.getNumTurns());
GameConfig gameConfig = gameState.getGameConfig();
GameState gameState9 = gameRecord.getGameState(9);
System.out.println(gameState);
System.out.println(gameState.getResult());
for (int i = 0; i < 5; i++) {
//Action action = new RootParallelization(4, 1000L).getAction(gameRecord.getGameConfig(), gameState9, Player.WHITE);
Action action = new MonteCarloUCT(new RandomMovePolicy(),1000L).getAction(gameRecord.getGameConfig(), gameState9, Player.WHITE);
System.out.println("Suggested action for "+Player.WHITE+": " + action);
}
GameState gameStateCopy = new GameState(gameState);
TerritoryMarker.markTerritory(gameStateCopy.getGameBoard());
System.out.println(gameStateCopy);
gameState9.playStone(Player.WHITE, Action.PASS);
gameState9.playStone(Player.BLACK, Action.PASS);
assertTrue(gameState9.isTerminal());
System.out.println(gameState9.getResult());
assertEquals(9,gameConfig.getSize());
assertEquals(6.0,gameConfig.getKomi(),0.1);
assertTrue(gameState.isTerminal());
assertTrue(gameState.getResult().isWinner(Player.WHITE));
}
}