Root_par beats GoFree!

This commit is contained in:
2012-11-18 18:44:09 -05:00
parent aca8320600
commit 270072006c
22 changed files with 381 additions and 126 deletions

View File

@@ -0,0 +1 @@
(;FF[4]GM[1]SZ[9]KM[5.5]RE[W+13.5];B[de];W[eb];B[gf];W[db];B[dc];W[hf];B[cc];W[dh];B[bf];W[he];B[ge];W[ea];B[gd];W[ca];B[bb];W[fg];B[eg];W[gc];B[gb];W[hc];B[hd];W[fh];B[gh];W[eh];B[ef];W[ed];B[fe];W[dg];B[cg];W[gg];B[df];W[id];B[fd];W[fc];B[fa];W[hg];B[ih];W[ic];B[hb];W[ec];B[fb];W[ie];B[gi];W[dd];B[cd];W[cb];B[ba];W[di];B[ch];W[ib];B[ci];W[fi];B[hh];W[ab];B[bc];W[ha];B[ig];W[bd];B[be];W[ga];B[ee];W[ii];B[ff];W[if];B[hi];W[ii];B[hi];W[ih];B[];W[hh];B[];W[aa];B[ac];W[ab];B[];W[gb];B[gh];W[fb];B[];W[ad];B[aa];W[bh];B[ah];W[af];B[ag];W[];B[bg];W[];B[])

View File

@@ -0,0 +1 @@
(;FF[4]GM[1]SZ[9]KM[5.5]RE[W+7.5];B[bg];W[gc];B[ba];W[ic];B[gd];W[ce];B[cf];W[eb];B[gi];W[ci];B[ab];W[bf];B[eh];W[cc];B[be];W[ca];B[ag];W[fd];B[de];W[ig];B[gb];W[fc];B[eg];W[dc];B[id];W[ee];B[bb];W[gf];B[ed];W[hc];B[ei];W[he];B[ia];W[gg];B[dh];W[hd];B[bi];W[ge];B[da];W[cb];B[db];W[ec];B[ga];W[bh];B[cd];W[hh];B[af];W[hb];B[ai];W[];B[ha];W[ad];B[ih];W[cg];B[hi];W[gh];B[ah];W[fi];B[fh];W[if];B[hg];W[hf];B[dg];W[ie];B[ch];W[];B[bc];W[fe];B[bd];W[ff];B[ef];W[fa];B[di];W[dd];B[fg];W[ea];B[db];W[da];B[ae];W[];B[ib];W[fb];B[ac];W[ha];B[ia];W[ga];B[];W[])

View File

@@ -0,0 +1 @@
(;FF[4]GM[1]SZ[9]KM[5.5]RE[W+11.5];B[cg];W[cf];B[eh];W[gh];B[bc];W[af];B[ge];W[fh];B[ci];W[ed];B[de];W[bb];B[ib];W[ab];B[ad];W[ag];B[db];W[bd];B[hg];W[gd];B[bg];W[ea];B[ha];W[ch];B[gg];W[cc];B[ac];W[dd];B[gc];W[ah];B[fe];W[bh];B[ei];W[hd];B[ba];W[gb];B[be];W[dg];B[gi];W[fg];B[id];W[];B[eb];W[fb];B[ca];W[ae];B[eg];W[hc];B[cd];W[ce];B[hf];W[fd];B[gf];W[ie];B[bc];W[ac];B[bi];W[ga];B[ff];W[he];B[fi];W[fc];B[ig];W[df];B[aa];W[ai];B[ii];W[ic];B[ef];W[dc];B[hh];W[da];B[fg];W[dh];B[fh];W[if];B[ee];W[ec];B[hb];W[];B[di];W[cb];B[ba];W[eb];B[ca];W[];B[])

View File

@@ -0,0 +1 @@
(;FF[4]GM[1]SZ[9]KM[5.5]RE[W+6.5];B[hg];W[ef];B[bg];W[ih];B[eh];W[hi];B[gi];W[dg];B[fd];W[gg];B[ic];W[cf];B[fe];W[da];B[ie];W[cd];B[be];W[ei];B[bb];W[df];B[ae];W[gc];B[hd];W[ci];B[cc];W[fg];B[ig];W[gd];B[gh];W[ce];B[gb];W[dd];B[bi];W[fc];B[ia];W[ec];B[ai];W[dc];B[if];W[ea];B[ab];W[hc];B[bh];W[fh];B[ff];W[fa];B[he];W[dh];B[ha];W[eg];B[bf];W[];B[cb];W[ad];B[bd];W[db];B[af];W[ee];B[ga];W[fi];B[hh];W[hb];B[ed];W[hf];B[cg];W[gf];B[ba];W[ah];B[ib];W[eb];B[bc];W[];B[fb];W[id];B[gb];W[ha];B[fb];W[ic];B[ca];W[ch];B[ia];W[];B[ac];W[ga];B[ag];W[fb];B[ii];W[];B[ge];W[];B[])

View File

@@ -0,0 +1 @@
(;FF[4]GM[1]SZ[9]KM[5.5]RE[B+27.5];B[ci];W[cf];B[de];W[hh];B[ec];W[hg];B[ga];W[ie];B[dh];W[da];B[df];W[ig];B[fd];W[ha];B[dd];W[bi];B[ff];W[ah];B[dc];W[ib];B[ef];W[ad];B[ce];W[eg];B[ab];W[di];B[fh];W[bd];B[eh];W[bg];B[gd];W[gi];B[he];W[hb];B[fg];W[fc];B[bf];W[ca];B[bb];W[cb];B[be];W[cc];B[id];W[ee];B[ea];W[ch];B[ae];W[gc];B[eb];W[gg];B[hd];W[ih];B[fi];W[fa];B[ba];W[af];B[ic];W[cg];B[fb];W[ei];B[dg];W[fe];B[if];W[ii];B[ed];W[hc];B[gf];W[db];B[ci];W[bc];B[gh];W[ac];B[hi];W[gb];B[hf];W[ih];B[ag];W[ei];B[bh];W[fa];B[ai];W[bg];B[cf];W[hg];B[];W[ig];B[ga];W[ii];B[];W[aa];B[ba];W[cd];B[bb];W[ab];B[ba];W[cg];B[bb];W[gg];B[hh];W[ih];B[];W[ig];B[];W[ii];B[];W[cd];B[];W[bc];B[gg];W[ca];B[];W[ab];B[];W[db];B[da];W[aa];B[cc];W[ac];B[bd];W[fa];B[ad];W[ac];B[ch];W[bg];B[cg];W[ab];B[cb];W[aa];B[ga];W[];B[])

View File

@@ -0,0 +1 @@
(;FF[4]GM[1]SZ[9]KM[5.5]RE[B+2.5];B[di];W[bd];B[fa];W[gd];B[fe];W[aa];B[ch];W[ae];B[eb];W[db];B[dh];W[fc];B[ec];W[ee];B[cd];W[ah];B[df];W[gb];B[ic];W[ff];B[af];W[ed];B[ii];W[ib];B[ac];W[ga];B[ia];W[eh];B[dg];W[ha];B[dc];W[gh];B[ig];W[ba];B[ef];W[be];B[bh];W[gc];B[de];W[cg];B[fh];W[bf];B[dd];W[gg];B[if];W[ce];B[fd];W[he];B[bg];W[bb];B[ed];W[fb];B[cb];W[hf];B[ea];W[cf];B[bc];W[ie];B[ei];W[];B[hd];W[gi];B[ai];W[hg];B[hh];W[hb];B[fi];W[ge];B[ca];W[fg];B[hc];W[ih];B[da];W[ig];B[ad];W[ci];B[bd];W[hi];B[be];W[id];B[hc];W[eg];B[ab];W[cf];B[ba];W[ic];B[bi];W[ce];B[ag];W[hd];B[cg];W[];B[])

View File

@@ -0,0 +1 @@
(;FF[4]GM[1]SZ[9]KM[5.5]RE[B+11.5];B[ch];W[id];B[gh];W[hf];B[hc];W[ei];B[fg];W[ec];B[bi];W[gi];B[fb];W[bb];B[ih];W[af];B[di];W[ah];B[ha];W[gd];B[ge];W[cg];B[db];W[df];B[dc];W[gb];B[bf];W[hh];B[hg];W[ce];B[be];W[ag];B[fc];W[fa];B[dg];W[ae];B[gc];W[bc];B[if];W[ga];B[gf];W[ba];B[ff];W[ai];B[ee];W[hb];B[cd];W[cc];B[bg];W[ca];B[ed];W[fi];B[cf];W[bd];B[da];W[hd];B[de];W[eg];B[eb];W[ac];B[bh];W[hi];B[ic];W[ea];B[ad];W[af];B[ai];W[fe];B[ia];W[ef];B[ie];W[ab];B[dh];W[eh];B[ah];W[ag];B[fd];W[cb];B[];W[ii];B[fh];W[ii];B[fi];W[gi];B[];W[eg];B[hh];W[ib];B[ha];W[df];B[];W[ef];B[];W[ae];B[ad];W[ia];B[ha];W[ea];B[hb];W[ag];B[];W[ei];B[fa];W[ae];B[];W[ga];B[];W[ib];B[];W[af];B[];W[])

View File

@@ -0,0 +1,9 @@
Cumulative results for 3 games (BLACK=RANDOM, WHITE=ROOT_PAR)
1. W+7.5
2. W+11.5
3. W+6.5
Cumulative results for 3 games (BLACK=ROOT_PAR, WHITE=RANDOM)
1. B+27.5
2. B+2.5
3. B+11.5
Elapsed Time: 300.899 seconds.

BIN
data/networks/Pass.nn Normal file

Binary file not shown.

50
data/test/ScoreTest.sgf Normal file
View File

@@ -0,0 +1,50 @@
(;SZ[9]CA[UTF-8]FF[4]GM[1]KM[6]PB[Player1]RE[W+5]PW[Player2]
;B[ga]
;W[fa]
;B[fb]
;W[ea]
;B[fc]
;W[eb]
;B[ec]
;W[db]
;B[dc]
;W[cc]
;B[ed]
;W[dd]
;B[ee]
;W[ce]
;B[de]
;W[cd]
;B[ef]
;W[bd]
;B[be]
;W[ae]
;B[bf]
;W[ad]
;B[af]
;W[bb]
;B[cf]
;W[ic]
;B[ib]
;W[id]
;B[hb]
;W[ie]
;B[hc]
;W[he]
;B[hd]
;W[hf]
;B[ge]
;W[ff]
;B[fe]
;W[gg]
;B[eg]
;W[fh]
;B[eh]
;W[hh]
;B[di]
;W[ei]
;B[ch]
;W[fi]
;B[]
;W[]
)

View File

@@ -1 +0,0 @@
(;FF[4]GM[1]SZ[5]KM[3.5]RE[W+11.5];B[ad];W[bd];B[be];W[ed];B[dd];W[ba];B[cd];W[ca];B[ee];W[cb];B[];W[dc];B[db];W[ac];B[];W[ec];B[ae];W[bc];B[ce];W[cc];B[aa];W[de];B[be];W[];B[ce];W[ad];B[dd];W[];B[ea];W[eb];B[ae];W[];B[ee];W[da];B[bb];W[de];B[];W[])

View File

@@ -124,11 +124,13 @@ public class GameState {
} }
/** /**
* Used for setting up the board. Places the player's stone at the specified coordinates. * Used for setting up the board. Places the player's stone at the specified
* coordinates.
* *
* Returns false if the requested intersection is occupied or the resulting position is illegal. * Returns false if the requested intersection is occupied or the resulting
* Returns false if the requested action is PASS, RESIGN or NONE. * position is illegal. Returns false if the requested action is PASS,
* Returns false if the moveHistory's size is already >0 (method should only be used to set up board). * RESIGN or NONE. Returns false if the moveHistory's size is already >0
* (method should only be used to set up board).
* *
* Does NOT advance the playerToMove or add the action to the move history. * Does NOT advance the playerToMove or add the action to the move history.
* *
@@ -149,7 +151,7 @@ public class GameState {
playerToMove = player; playerToMove = player;
boolean validMove = playStone(player,action); boolean validMove = playStone(player, action);
moveHistory.clear(); moveHistory.clear();
playerToMove = actualPTM; playerToMove = actualPTM;

View File

@@ -12,6 +12,7 @@ import java.util.List;
import net.woodyfolsom.msproj.gui.Goban; import net.woodyfolsom.msproj.gui.Goban;
import net.woodyfolsom.msproj.policy.HumanGuiInput; import net.woodyfolsom.msproj.policy.HumanGuiInput;
import net.woodyfolsom.msproj.policy.HumanKeyboardInput; import net.woodyfolsom.msproj.policy.HumanKeyboardInput;
import net.woodyfolsom.msproj.policy.MonteCarloAMAF;
import net.woodyfolsom.msproj.policy.MonteCarloUCT; import net.woodyfolsom.msproj.policy.MonteCarloUCT;
import net.woodyfolsom.msproj.policy.Policy; import net.woodyfolsom.msproj.policy.Policy;
import net.woodyfolsom.msproj.policy.RandomMovePolicy; import net.woodyfolsom.msproj.policy.RandomMovePolicy;
@@ -25,7 +26,7 @@ public class StandAloneGame {
private static final int DEFAULT_SIZE = 9; private static final int DEFAULT_SIZE = 9;
enum PLAYER_TYPE { enum PLAYER_TYPE {
HUMAN, HUMAN_GUI, ROOT_PAR, UCT, RANDOM HUMAN, HUMAN_GUI, ROOT_PAR, UCT, RANDOM, RAVE
}; };
public static void main(String[] args) { public static void main(String[] args) {
@@ -75,6 +76,8 @@ public class StandAloneGame {
return PLAYER_TYPE.HUMAN_GUI; return PLAYER_TYPE.HUMAN_GUI;
} else if ("RANDOM".equalsIgnoreCase(playerTypeStr)) { } else if ("RANDOM".equalsIgnoreCase(playerTypeStr)) {
return PLAYER_TYPE.RANDOM; return PLAYER_TYPE.RANDOM;
} else if ("RAVE".equalsIgnoreCase(playerTypeStr)) {
return PLAYER_TYPE.RAVE;
} else { } else {
throw new RuntimeException("Unknown player type: " + playerTypeStr); throw new RuntimeException("Unknown player type: " + playerTypeStr);
} }
@@ -175,6 +178,8 @@ public class StandAloneGame {
turnLength * 1000L); turnLength * 1000L);
case RANDOM: case RANDOM:
return new RandomMovePolicy(); return new RandomMovePolicy();
case RAVE:
return new MonteCarloAMAF(new RandomMovePolicy(), turnLength * 1000L);
default: default:
throw new IllegalArgumentException("Invalid PLAYER_TYPE: " throw new IllegalArgumentException("Invalid PLAYER_TYPE: "
+ playerType); + playerType);

View File

@@ -13,7 +13,7 @@ import net.woodyfolsom.msproj.tree.GameTreeNode;
import net.woodyfolsom.msproj.tree.MonteCarloProperties; import net.woodyfolsom.msproj.tree.MonteCarloProperties;
public abstract class MonteCarlo implements Policy { public abstract class MonteCarlo implements Policy {
protected static final int ROLLOUT_DEPTH_LIMIT = 100; protected static final int ROLLOUT_DEPTH_LIMIT = 150;
protected int numStateEvaluations = 0; protected int numStateEvaluations = 0;
protected Policy movePolicy; protected Policy movePolicy;
@@ -36,6 +36,11 @@ public abstract class MonteCarlo implements Policy {
public abstract List<GameTreeNode<MonteCarloProperties>> descend( public abstract List<GameTreeNode<MonteCarloProperties>> descend(
GameTreeNode<MonteCarloProperties> node); GameTreeNode<MonteCarloProperties> node);
protected GameTreeNode<MonteCarloProperties> createRootNode(GameState rootGameState) {
return new GameTreeNode<MonteCarloProperties>(
rootGameState, new MonteCarloProperties());
}
private GameTreeNode<MonteCarloProperties> buildTree(GameConfig gameConfig, GameState gameState, Player player) { private GameTreeNode<MonteCarloProperties> buildTree(GameConfig gameConfig, GameState gameState, Player player) {
//System.out.println(player + " is thinking for up to " //System.out.println(player + " is thinking for up to "
// + (searchTimeLimit / 1000.0) + " seconds..."); // + (searchTimeLimit / 1000.0) + " seconds...");
@@ -47,8 +52,7 @@ public abstract class MonteCarlo implements Policy {
+ gameState.getPlayerToMove()); + gameState.getPlayerToMove());
} }
GameTreeNode<MonteCarloProperties> rootNode = new GameTreeNode<MonteCarloProperties>( GameTreeNode<MonteCarloProperties> rootNode = createRootNode(gameState);
gameState, new MonteCarloProperties());
do { do {
@@ -67,8 +71,8 @@ public abstract class MonteCarlo implements Policy {
} }
for (GameTreeNode<MonteCarloProperties> newLeaf : newLeaves) { for (GameTreeNode<MonteCarloProperties> newLeaf : newLeaves) {
int reward = rollout(gameConfig, newLeaf, player); Rollout rollout = rollout(gameConfig, newLeaf, player);
update(newLeaf, reward); update(newLeaf, rollout);
} }
elapsedTime = System.currentTimeMillis() - startTime; elapsedTime = System.currentTimeMillis() - startTime;
@@ -103,11 +107,11 @@ public abstract class MonteCarlo implements Policy {
GameConfig gameConfig, GameTreeNode<MonteCarloProperties> node, GameConfig gameConfig, GameTreeNode<MonteCarloProperties> node,
Player player); Player player);
public abstract int rollout(GameConfig gameConfig, public abstract Rollout rollout(GameConfig gameConfig,
GameTreeNode<MonteCarloProperties> node, Player player); GameTreeNode<MonteCarloProperties> node, Player player);
public abstract void update(GameTreeNode<MonteCarloProperties> node, public abstract void update(GameTreeNode<MonteCarloProperties> node,
int reward); Rollout rollout);
public long getSearchTimeLimit() { public long getSearchTimeLimit() {
return searchTimeLimit; return searchTimeLimit;

View File

@@ -1,5 +1,14 @@
package net.woodyfolsom.msproj.policy; package net.woodyfolsom.msproj.policy;
import java.util.ArrayList;
import java.util.List;
import net.woodyfolsom.msproj.Action;
import net.woodyfolsom.msproj.GameState;
import net.woodyfolsom.msproj.Player;
import net.woodyfolsom.msproj.tree.AMAFProperties;
import net.woodyfolsom.msproj.tree.GameTreeNode;
import net.woodyfolsom.msproj.tree.MonteCarloProperties;
public class MonteCarloAMAF extends MonteCarloUCT { public class MonteCarloAMAF extends MonteCarloUCT {
@@ -7,4 +16,91 @@ public class MonteCarloAMAF extends MonteCarloUCT {
super(movePolicy, searchTimeLimit); super(movePolicy, searchTimeLimit);
} }
@Override
public void update(GameTreeNode<MonteCarloProperties> node, Rollout rollout) {
GameTreeNode<MonteCarloProperties> currentNode = node;
//List<Action> subTreeActions = new ArrayList<Action>(rollout.getPlayout());
List<Action> playout = rollout.getPlayout();
int reward = rollout.getReward();
while (currentNode != null) {
AMAFProperties nodeProperties = (AMAFProperties)currentNode.getProperties();
//Always update props for the current node
nodeProperties.setWins(nodeProperties.getWins() + reward);
nodeProperties.setVisits(nodeProperties.getVisits() + 1);
nodeProperties.setAmafWins(nodeProperties.getAmafWins() + reward);
nodeProperties.setAmafVisits(nodeProperties.getAmafVisits() + 1);
GameTreeNode<MonteCarloProperties> parentNode = currentNode.getParent();
if (parentNode != null) {
Player playerToMove = parentNode.getGameState().getPlayerToMove();
for (Action actionFromParent : parentNode.getActions()) {
if (playout.contains(actionFromParent)) {
GameTreeNode<MonteCarloProperties> subTreeChild = parentNode.getChild(actionFromParent);
//Don't count AMAF properties for the current node twice
if (subTreeChild == currentNode) {
continue;
}
AMAFProperties siblingProperties = (AMAFProperties)subTreeChild.getProperties();
//Only update AMAF properties if the sibling is reached by the same action with the same player to move
if (rollout.hasPlay(playerToMove,actionFromParent)) {
siblingProperties.setAmafWins(siblingProperties.getAmafWins() + reward);
siblingProperties.setAmafVisits(siblingProperties.getAmafVisits() + 1);
}
}
}
}
currentNode = currentNode.getParent();
}
}
@Override
protected GameTreeNode<MonteCarloProperties> createRootNode(GameState rootGameState) {
return new GameTreeNode<MonteCarloProperties>(
rootGameState, new AMAFProperties());
}
@Override
protected double getNodeScore(GameTreeNode<MonteCarloProperties> gameTreeNode) {
//double nodeVisits = gameTreeNode.getParent().getProperties().getVisits();
double parentAmafVisits = ((AMAFProperties)gameTreeNode.getParent().getProperties()).getAmafVisits();
double nodeScore;
if (gameTreeNode.getGameState().isTerminal()) {
nodeScore = 0.0;
} else {
/*
MonteCarloProperties properties = gameTreeNode.getProperties();
nodeScore = (double) (properties.getWins() / properties
.getVisits())
+ (TUNING_CONSTANT * Math.sqrt(Math.log(nodeVisits)
/ gameTreeNode.getProperties().getVisits()));
*
*/
AMAFProperties properties = (AMAFProperties) gameTreeNode.getProperties();
nodeScore = (double) (properties.getAmafWins() / properties
.getAmafVisits())
+ (TUNING_CONSTANT * Math.sqrt(Math.log(parentAmafVisits)
/ properties.getAmafVisits()));
}
return nodeScore;
}
@Override
protected List<GameTreeNode<MonteCarloProperties>> addNewChildren(
GameTreeNode<MonteCarloProperties> node, Action action,
GameState successorState) {
List<GameTreeNode<MonteCarloProperties>> newChildren = new ArrayList<GameTreeNode<MonteCarloProperties>>();
GameTreeNode<MonteCarloProperties> newChild = new GameTreeNode<MonteCarloProperties>(
successorState, new AMAFProperties());
newChildren.add(newChild);
node.addChild(action, newChild);
return newChildren;
}
} }

View File

@@ -31,7 +31,6 @@ public class MonteCarloUCT extends MonteCarlo {
// From Kocsis and Szepesvari, the value of an actual terminal node is // From Kocsis and Szepesvari, the value of an actual terminal node is
// 0, so it will never be grown. // 0, so it will never be grown.
double nodeVisits = node.getProperties().getVisits();
Set<Action> actionsExplored = node.getActions(); Set<Action> actionsExplored = node.getActions();
GameState gameState = node.getGameState(); GameState gameState = node.getGameState();
@@ -46,16 +45,7 @@ public class MonteCarloUCT extends MonteCarlo {
GameTreeNode<MonteCarloProperties> childNode = node GameTreeNode<MonteCarloProperties> childNode = node
.getChild(action); .getChild(action);
double childScore; double childScore = getNodeScore(childNode);
if (childNode.getGameState().isTerminal()) {
childScore = 0.0;
} else {
MonteCarloProperties properties = childNode.getProperties();
childScore = (double) (properties.getWins() / properties
.getVisits())
+ (TUNING_CONSTANT * Math.sqrt(Math.log(nodeVisits)
/ childNode.getProperties().getVisits()));
}
// TODO add random tie breaker? // TODO add random tie breaker?
// otherwise the child that is selected first will be biased // otherwise the child that is selected first will be biased
if (childScore >= bestScore) { if (childScore >= bestScore) {
@@ -74,34 +64,37 @@ public class MonteCarloUCT extends MonteCarlo {
} }
} }
protected double getNodeScore(GameTreeNode<MonteCarloProperties> gameTreeNode) {
double nodeScore;
double parentVisits = gameTreeNode.getParent().getProperties().getVisits();
if (gameTreeNode.getGameState().isTerminal()) {
nodeScore = 0.0;
} else {
MonteCarloProperties properties = gameTreeNode.getProperties();
nodeScore = (double) (properties.getWins() / properties
.getVisits())
+ (TUNING_CONSTANT * Math.sqrt(Math.log(parentVisits)
/ gameTreeNode.getProperties().getVisits()));
}
return nodeScore;
}
@Override @Override
public Action getBestAction(GameTreeNode<MonteCarloProperties> node) { public Action getBestAction(GameTreeNode<MonteCarloProperties> node) {
Action bestAction = Action.NONE; Action bestAction = Action.NONE;
double bestScore = Double.NEGATIVE_INFINITY; double bestScore = Double.NEGATIVE_INFINITY;
GameTreeNode<MonteCarloProperties> bestChild = null; GameTreeNode<MonteCarloProperties> bestChild = null;
//int nActions = node.getNumChildren();
//GameState rootGameState = node.getGameState();
//boolean playerToMoveIsWinning = rootGameState.getResult().isWinner(rootGameState.getPlayerToMove());
//playerToMove is winning or only one move (PASS) is available
//boolean allowPass = playerToMoveIsWinning || nActions == 1;
for (Action action : node.getActions()) { for (Action action : node.getActions()) {
///HEURISTIC - work on ways of removing this go-specific logic /////
//If action is PASS and the play who moved is not the winner while other moves are available, don't pass
//i.e. don't pass when losing
//if (action.isPass() && !allowPass) {
// continue; //If the best rated action is PASS and I'm not winning and there are other valid actions,
// //keep searching.
//}
////////////////////////////////////////////////////////////////////
GameTreeNode<MonteCarloProperties> childNode = node GameTreeNode<MonteCarloProperties> childNode = node
.getChild(action); .getChild(action);
MonteCarloProperties properties = childNode.getProperties(); //MonteCarloProperties properties = childNode.getProperties();
double childScore = (double) properties.getWins() //double childScore = (double) properties.getWins()
/ properties.getVisits(); // / properties.getVisits();
double childScore = getNodeScore(childNode);
if (childScore >= bestScore) { if (childScore >= bestScore) {
bestScore = childScore; bestScore = childScore;
@@ -130,25 +123,42 @@ public class MonteCarloUCT extends MonteCarlo {
@Override @Override
public List<GameTreeNode<MonteCarloProperties>> grow(GameConfig gameConfig, public List<GameTreeNode<MonteCarloProperties>> grow(GameConfig gameConfig,
GameTreeNode<MonteCarloProperties> node, Player player) { GameTreeNode<MonteCarloProperties> node, Player player) {
Policy randomMovePolicy = new RandomMovePolicy(); Policy randomMovePolicy = new RandomMovePolicy();
Set<Action> exploredActions = node.getActions(); Set<Action> exploredActions = node.getActions();
Action action = randomMovePolicy.getAction(gameConfig, Action action = randomMovePolicy.getAction(gameConfig,
node.getGameState(), exploredActions, player); node.getGameState(), exploredActions, player);
if (exploredActions.contains(action)) {
throw new RuntimeException("Bad action selection at this state: not a NEW leaf node for this Monte Carlo tree.");
}
if (Action.NONE == action) { if (Action.NONE == action) {
throw new RuntimeException( throw new RuntimeException(
"Unable to grow node - are all actions already explored? Board state: " "Unable to grow node - are all actions already explored? Board state: "
+ node.getGameState() + "\nExplored actions: " + node.getGameState() + "\nExplored actions: "
+ exploredActions); + exploredActions);
} }
GameState nextGameState = new GameState(node.getGameState()); GameState nextGameState = new GameState(node.getGameState());
nextGameState.playStone(player, action); nextGameState.playStone(player, action);
//In principle, more than 1 new child could be generated from a call to grow. However,
//this algorithm only generates one. This interface is mainly for compatibility with Naive Monte Carlo.
return addNewChildren(node, action, nextGameState);
}
protected List<GameTreeNode<MonteCarloProperties>> addNewChildren(GameTreeNode<MonteCarloProperties> node, Action action, GameState successorState) {
List<GameTreeNode<MonteCarloProperties>> newChildren = new ArrayList<GameTreeNode<MonteCarloProperties>>(); List<GameTreeNode<MonteCarloProperties>> newChildren = new ArrayList<GameTreeNode<MonteCarloProperties>>();
GameTreeNode<MonteCarloProperties> newChild = new GameTreeNode<MonteCarloProperties>( GameTreeNode<MonteCarloProperties> newChild = new GameTreeNode<MonteCarloProperties>(
nextGameState, new MonteCarloProperties()); successorState, new MonteCarloProperties());
newChildren.add(newChild); newChildren.add(newChild);
node.addChild(action, newChild); node.addChild(action, newChild);
return newChildren; return newChildren;
} }
@@ -157,41 +167,47 @@ public class MonteCarloUCT extends MonteCarlo {
* Rollout currently depends on the hardcoded ROLLOUT_DEPTH_LIMIT superclass parameter, * Rollout currently depends on the hardcoded ROLLOUT_DEPTH_LIMIT superclass parameter,
* Even with super-ko detection, a rollout might take an unrealistically long time due to unlikely playouts. * Even with super-ko detection, a rollout might take an unrealistically long time due to unlikely playouts.
*/ */
public int rollout(GameConfig gameConfig, public Rollout rollout(GameConfig gameConfig,
GameTreeNode<MonteCarloProperties> node, Player player) { GameTreeNode<MonteCarloProperties> node, Player player) {
Policy randomMovePolicy = new RandomMovePolicy(); Policy randomMovePolicy = new RandomMovePolicy();
Action action; Action randomAction;
int rolloutDepth = 0; int rolloutDepth = 0;
GameState rolloutGameState = new GameState(node.getGameState()); GameState initialGameState = node.getGameState();
GameState rolloutGameState = new GameState(initialGameState);
Player currentPlayer = rolloutGameState.getPlayerToMove(); Player currentPlayer = rolloutGameState.getPlayerToMove();
List<Action> rolloutActions = new ArrayList<Action>();
do { do {
rolloutDepth++; rolloutDepth++;
action = randomMovePolicy.getAction(gameConfig, rolloutGameState, randomAction = randomMovePolicy.getAction(gameConfig, rolloutGameState,
currentPlayer); currentPlayer);
if (action != Action.NONE) { if (randomAction != Action.NONE) {
if (!rolloutGameState.playStone(currentPlayer, action)) { if (!rolloutGameState.playStone(currentPlayer, randomAction)) {
throw new RuntimeException( throw new RuntimeException(
"Failed to play move selected by RandomMovePolicy"); "Failed to play move selected by RandomMovePolicy");
} }
rolloutActions.add(randomAction);
currentPlayer = GoGame.getNextPlayer(currentPlayer); currentPlayer = GoGame.getNextPlayer(currentPlayer);
} }
} while (action != Action.NONE && rolloutDepth < ROLLOUT_DEPTH_LIMIT); } while (randomAction != Action.NONE && rolloutDepth < ROLLOUT_DEPTH_LIMIT);
numStateEvaluations++; numStateEvaluations++;
GameResult gameScore = rolloutGameState.getResult(); GameResult gameScore = rolloutGameState.getResult();
if (gameScore.isWinner(player)) { if (gameScore.isWinner(player)) {
return 1; return new Rollout(initialGameState,rolloutActions,1);
} else { } else {
return 0; return new Rollout(initialGameState,rolloutActions,0);
} }
} }
@Override @Override
public void update(GameTreeNode<MonteCarloProperties> node, int reward) { public void update(GameTreeNode<MonteCarloProperties> node, Rollout rollout) {
GameTreeNode<MonteCarloProperties> currentNode = node; GameTreeNode<MonteCarloProperties> currentNode = node;
int reward = rollout.getReward();
while (currentNode != null) { while (currentNode != null) {
MonteCarloProperties nodeProperties = currentNode.getProperties(); MonteCarloProperties nodeProperties = currentNode.getProperties();
nodeProperties.setWins(nodeProperties.getWins() + reward); nodeProperties.setWins(nodeProperties.getWins() + reward);

View File

@@ -34,6 +34,13 @@ public class RandomMovePolicy implements Policy, ActionGenerator {
*/ */
public List<Action> getActions(GameConfig gameConfig, GameState gameState, public List<Action> getActions(GameConfig gameConfig, GameState gameState,
Collection<Action> prohibitedMoves, Player player, int nMoves) { Collection<Action> prohibitedMoves, Player player, int nMoves) {
List<Action> randomActions = new ArrayList<Action>();
/*if (gameState.isTerminal()) {
randomActions.add(Action.NONE);
return randomActions;
}*/
if (player != gameState.getPlayerToMove()) { if (player != gameState.getPlayerToMove()) {
throw new IllegalArgumentException("It is not " + player throw new IllegalArgumentException("It is not " + player
+ "'s turn to move!"); + "'s turn to move!");
@@ -45,7 +52,6 @@ public class RandomMovePolicy implements Policy, ActionGenerator {
List<Action> possibleActions = actionGenerator.getActions(gameConfig, List<Action> possibleActions = actionGenerator.getActions(gameConfig,
gameStateCopy, prohibitedMoves, player, gameStateCopy, prohibitedMoves, player,
ActionGenerator.ALL_ACTIONS); ActionGenerator.ALL_ACTIONS);
List<Action> randomActions = new ArrayList<Action>();
// //
boolean playerIsWinning = gameState.getResult().isWinner(player); boolean playerIsWinning = gameState.getResult().isWinner(player);
@@ -60,14 +66,21 @@ public class RandomMovePolicy implements Policy, ActionGenerator {
} }
} }
//if (randomActions.size() == 0) { //if (randomActions.size() == 0) {
// randomActions.add(Action.NONE); // randomActions.add(Action.NONE);
//} //}
//PASS is always the move of last resort if no valid moves exist //PASS is always the move of last resort if no valid moves exist
//Action.NONE exists for a reason - if the fail-safe was to ALWAYS return PASS, then MCTS would
//fail to descend properly because the root node would always appear to have additional unexplored actions.
if (randomActions.size() == 0) { if (randomActions.size() == 0) {
if (prohibitedMoves.contains(Action.PASS)) {
randomActions.add(Action.NONE);
} else {
randomActions.add(Action.PASS); randomActions.add(Action.PASS);
} }
}
//when to resign? //when to resign?

View File

@@ -0,0 +1,65 @@
package net.woodyfolsom.msproj.policy;
import java.util.ArrayList;
import java.util.List;
import net.woodyfolsom.msproj.Action;
import net.woodyfolsom.msproj.GameState;
import net.woodyfolsom.msproj.Player;
public class Rollout {
private GameState initialGameState;
private List<Action> blackPlays;
private List<Action> playout;
private List<Action> whitePlays;
private int reward;
public Rollout(GameState initialGameState, List<Action> playout, int reward) {
this.initialGameState = initialGameState;
this.playout = playout;
this.reward = reward;
blackPlays = new ArrayList<Action>();
whitePlays = new ArrayList<Action>();
Player playerToMove = initialGameState.getPlayerToMove();
List<List<Action>> plays = new ArrayList<List<Action>>();
if (playerToMove == Player.BLACK) {
plays.add(blackPlays);
plays.add(whitePlays);
} else if (playerToMove == Player.WHITE) {
plays.add(whitePlays);
plays.add(blackPlays);
} else {
throw new RuntimeException("Invalid player: " + playerToMove);
}
for (int i = 0; i < playout.size(); i++) {
plays.get(i%2).add(playout.get(i));
}
}
public GameState getInitialGameState() {
return initialGameState;
}
public List<Action> getPlayout() {
return playout;
}
public int getReward() {
return reward;
}
public boolean hasPlay(Player player, Action action) {
if (player == Player.BLACK) {
return blackPlays.contains(action);
} else if (player == Player.WHITE) {
return whitePlays.contains(action);
} else {
throw new RuntimeException("Invalid player: " + player);
}
}
}

View File

@@ -76,22 +76,8 @@ public class RootParallelization implements Policy {
int bestWins = 0; int bestWins = 0;
int bestSims = 0; int bestSims = 0;
//int nActions = totalReward.size();
//boolean playerToMoveIsWinning = gameState.getResult().isWinner(player);
//playerToMove is winning or only one move (PASS) is available
//boolean allowPass = playerToMoveIsWinning || nActions == 1;
for (Action action : totalReward.keySet()) for (Action action : totalReward.keySet())
{ {
//HEURISTIC - work on ways of removing this go-specific logic
//
//This heuristic must be duplicated here because RootPar. does not benefit
//from MonteCarloUCT culling PASS just before returning from getAction().
//if (action.isPass() && !allowPass) {
// continue; //If the best rated action is PASS and I'm not winning and there are other valid actions,
// //keep searching.
//}
int totalWins = totalReward.get(action); int totalWins = totalReward.get(action);
int totalSims = numSims.get(action); int totalSims = numSims.get(action);

View File

@@ -22,9 +22,19 @@ public class ValidMoveGenerator implements ActionGenerator {
public List<Action> getActions(GameConfig gameConfig, GameState gameState, public List<Action> getActions(GameConfig gameConfig, GameState gameState,
Collection<Action> prohibitedMoves, Player color, int nMoves) { Collection<Action> prohibitedMoves, Player color, int nMoves) {
List<Action> validMoves = new ArrayList<Action>();
if (gameState.isTerminal()) {
return validMoves;
}
GameState gameStateCopy = new GameState(gameState); GameState gameStateCopy = new GameState(gameState);
List<String> emptyCoordinates = gameStateCopy.getEmptyCoords(); List<String> emptyCoordinates = gameStateCopy.getEmptyCoords();
List<Action> validMoves = new ArrayList<Action>();
//Pass is always valid unless prohibited (or if the state is terminal, as above)
if (!prohibitedMoves.contains(Action.PASS)) {
validMoves.add(Action.PASS);
}
while (emptyCoordinates.size() > 0) { while (emptyCoordinates.size() > 0) {
Action nextMove = Action.getInstance(emptyCoordinates Action nextMove = Action.getInstance(emptyCoordinates
@@ -38,10 +48,6 @@ public class ValidMoveGenerator implements ActionGenerator {
} }
} }
if (!prohibitedMoves.contains(Action.PASS)) {
validMoves.add(Action.PASS);
}
return validMoves; return validMoves;
} }
} }

View File

@@ -0,0 +1,18 @@
package net.woodyfolsom.msproj.tree;
public class AMAFProperties extends MonteCarloProperties {
int amafWins = 0;
int amafVisits = 0;
public int getAmafWins() {
return amafWins;
}
public void setAmafWins(int amafWins) {
this.amafWins = amafWins;
}
public int getAmafVisits() {
return amafVisits;
}
public void setAmafVisits(int amafVisits) {
this.amafVisits = amafVisits;
}
}

View File

@@ -3,38 +3,15 @@ package net.woodyfolsom.msproj;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
import java.io.ByteArrayInputStream; import java.io.File;
import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream;
import net.woodyfolsom.msproj.GameResult;
import net.woodyfolsom.msproj.policy.MonteCarloUCT;
import net.woodyfolsom.msproj.policy.RandomMovePolicy;
import net.woodyfolsom.msproj.policy.RootParallelization;
import net.woodyfolsom.msproj.sgf.SGFLexer;
import net.woodyfolsom.msproj.sgf.SGFNodeCollection;
import net.woodyfolsom.msproj.sgf.SGFParser;
import org.antlr.runtime.ANTLRInputStream;
import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.RecognitionException; import org.antlr.runtime.RecognitionException;
import org.junit.Test; import org.junit.Test;
public class GameScoreTest { public class GameScoreTest {
// public static final String endGameSGF =
// "(;FF[4]GM[1]SZ[9]KM[5.5];B[ef];W[ff];B[dg];W[aa];B[fc];W[da];B[cg];W[ei];B[gf]"
// +
// ";W[fi];B[ag];W[ii];B[bi];W[if];B[db];W[ci];B[cf];W[ih];B[bc];W[hb];B[eb];W[fh];B[ig];W[hc];B[be];W[he];B[gc];"
// +
// "W[id];B[cd];W[df];B[hf];W[ah];B[bh];W[fa];B[bg];W[fe];B[ec];W[eh];B[ee];W[bd];B[hg];W[ie];B[fg];W[ca];B[eg];"
// +
// "W[cb];B[ad];W[ba];B[ch];W[dh];B[gd];W[ic];B[ha];W[ab];B[gh];W[gb];B[ed];W[];B[])";
//public static final String endGameSGF = "(;FF[4]GM[1]SZ[9]KM[5.5]RE[W+0.5];B[ef];W[cb];B[fe];W[da];B[cd];W[hh];B[ed];W[cc];B[ci];W[bc];B[cg];W[fi];B[be];W[ea];B[hi];W[df];B[fd];W[bg];B[cf];W[aa];B[gd];W[ch];B[ad];W[dg];B[de];W[ge];B[bh];W[fa];B[ag];W[hd];B[if];W[bi];B[gf];W[bd];B[ah];W[gc];B[ff];W[ca];B[hf];W[dd];B[ce];W[ae];B[ga];W[hc];B[ac];W[gg];B[fg];W[fb];B[ie];W[dh];B[af];W[ec];B[dc];W[id];B[dd];W[eh];B[eb];W[gb];B[ae];W[ic];B[di];W[fh];B[ig];W[ab];B[ha];W[hg];B[hb];W[gi];B[ii];W[ia];B[fc];W[ba];B[eg];W[];B[db];W[];B[])";
public static final String endGameSGF = "(;FF[4]GM[1]SZ[6]KM[1.5]RE[B+0.5];B[bb];W[];B[ec];W[ef];B[ac];W[ed];B[ba];W[dc];B[cf];W[];B[])";
@Test @Test
public void testGetAggregateScoreZero() { public void testGetAggregateScoreZero() {
GameResult gameScore = new GameResult(0, 0, 19, 0, true); GameResult gameScore = new GameResult(0, 0, 19, 0, true);
@@ -56,21 +33,23 @@ public class GameScoreTest {
@Test @Test
public void testScoreEndGame() throws IOException, RecognitionException { public void testScoreEndGame() throws IOException, RecognitionException {
InputStream is = new ByteArrayInputStream(endGameSGF.getBytes()); //test case from:
GameRecord gameRecord = Referee.replay(is); //http://www.online-go.com/faq.php?name=rules
assertEquals(11, gameRecord.getNumTurns()); GameRecord gameRecord = Referee.replay(new FileInputStream(new File("data/test/ScoreTest.sgf")));
GameState gameState = gameRecord.getGameState(gameRecord.getNumTurns());
GameConfig gameConfig = gameState.getGameConfig();
GameState gameState9 = gameRecord.getGameState(9); System.out.println(gameState);
System.out.println(gameState.getResult());
for (int i = 0; i < 5; i++) { GameState gameStateCopy = new GameState(gameState);
//Action action = new RootParallelization(4, 1000L).getAction(gameRecord.getGameConfig(), gameState9, Player.WHITE); TerritoryMarker.markTerritory(gameStateCopy.getGameBoard());
Action action = new MonteCarloUCT(new RandomMovePolicy(),1000L).getAction(gameRecord.getGameConfig(), gameState9, Player.WHITE); System.out.println(gameStateCopy);
System.out.println("Suggested action for "+Player.WHITE+": " + action);
}
gameState9.playStone(Player.WHITE, Action.PASS); assertEquals(9,gameConfig.getSize());
gameState9.playStone(Player.BLACK, Action.PASS); assertEquals(6.0,gameConfig.getKomi(),0.1);
assertTrue(gameState9.isTerminal());
System.out.println(gameState9.getResult()); assertTrue(gameState.isTerminal());
assertTrue(gameState.getResult().isWinner(Player.WHITE));
} }
} }