Root_par beats GoFree!
This commit is contained in:
@@ -0,0 +1 @@
|
||||
(;FF[4]GM[1]SZ[9]KM[5.5]RE[W+13.5];B[de];W[eb];B[gf];W[db];B[dc];W[hf];B[cc];W[dh];B[bf];W[he];B[ge];W[ea];B[gd];W[ca];B[bb];W[fg];B[eg];W[gc];B[gb];W[hc];B[hd];W[fh];B[gh];W[eh];B[ef];W[ed];B[fe];W[dg];B[cg];W[gg];B[df];W[id];B[fd];W[fc];B[fa];W[hg];B[ih];W[ic];B[hb];W[ec];B[fb];W[ie];B[gi];W[dd];B[cd];W[cb];B[ba];W[di];B[ch];W[ib];B[ci];W[fi];B[hh];W[ab];B[bc];W[ha];B[ig];W[bd];B[be];W[ga];B[ee];W[ii];B[ff];W[if];B[hi];W[ii];B[hi];W[ih];B[];W[hh];B[];W[aa];B[ac];W[ab];B[];W[gb];B[gh];W[fb];B[];W[ad];B[aa];W[bh];B[ah];W[af];B[ag];W[];B[bg];W[];B[])
|
||||
@@ -0,0 +1 @@
|
||||
(;FF[4]GM[1]SZ[9]KM[5.5]RE[W+7.5];B[bg];W[gc];B[ba];W[ic];B[gd];W[ce];B[cf];W[eb];B[gi];W[ci];B[ab];W[bf];B[eh];W[cc];B[be];W[ca];B[ag];W[fd];B[de];W[ig];B[gb];W[fc];B[eg];W[dc];B[id];W[ee];B[bb];W[gf];B[ed];W[hc];B[ei];W[he];B[ia];W[gg];B[dh];W[hd];B[bi];W[ge];B[da];W[cb];B[db];W[ec];B[ga];W[bh];B[cd];W[hh];B[af];W[hb];B[ai];W[];B[ha];W[ad];B[ih];W[cg];B[hi];W[gh];B[ah];W[fi];B[fh];W[if];B[hg];W[hf];B[dg];W[ie];B[ch];W[];B[bc];W[fe];B[bd];W[ff];B[ef];W[fa];B[di];W[dd];B[fg];W[ea];B[db];W[da];B[ae];W[];B[ib];W[fb];B[ac];W[ha];B[ia];W[ga];B[];W[])
|
||||
@@ -0,0 +1 @@
|
||||
(;FF[4]GM[1]SZ[9]KM[5.5]RE[W+11.5];B[cg];W[cf];B[eh];W[gh];B[bc];W[af];B[ge];W[fh];B[ci];W[ed];B[de];W[bb];B[ib];W[ab];B[ad];W[ag];B[db];W[bd];B[hg];W[gd];B[bg];W[ea];B[ha];W[ch];B[gg];W[cc];B[ac];W[dd];B[gc];W[ah];B[fe];W[bh];B[ei];W[hd];B[ba];W[gb];B[be];W[dg];B[gi];W[fg];B[id];W[];B[eb];W[fb];B[ca];W[ae];B[eg];W[hc];B[cd];W[ce];B[hf];W[fd];B[gf];W[ie];B[bc];W[ac];B[bi];W[ga];B[ff];W[he];B[fi];W[fc];B[ig];W[df];B[aa];W[ai];B[ii];W[ic];B[ef];W[dc];B[hh];W[da];B[fg];W[dh];B[fh];W[if];B[ee];W[ec];B[hb];W[];B[di];W[cb];B[ba];W[eb];B[ca];W[];B[])
|
||||
@@ -0,0 +1 @@
|
||||
(;FF[4]GM[1]SZ[9]KM[5.5]RE[W+6.5];B[hg];W[ef];B[bg];W[ih];B[eh];W[hi];B[gi];W[dg];B[fd];W[gg];B[ic];W[cf];B[fe];W[da];B[ie];W[cd];B[be];W[ei];B[bb];W[df];B[ae];W[gc];B[hd];W[ci];B[cc];W[fg];B[ig];W[gd];B[gh];W[ce];B[gb];W[dd];B[bi];W[fc];B[ia];W[ec];B[ai];W[dc];B[if];W[ea];B[ab];W[hc];B[bh];W[fh];B[ff];W[fa];B[he];W[dh];B[ha];W[eg];B[bf];W[];B[cb];W[ad];B[bd];W[db];B[af];W[ee];B[ga];W[fi];B[hh];W[hb];B[ed];W[hf];B[cg];W[gf];B[ba];W[ah];B[ib];W[eb];B[bc];W[];B[fb];W[id];B[gb];W[ha];B[fb];W[ic];B[ca];W[ch];B[ia];W[];B[ac];W[ga];B[ag];W[fb];B[ii];W[];B[ge];W[];B[])
|
||||
@@ -0,0 +1 @@
|
||||
(;FF[4]GM[1]SZ[9]KM[5.5]RE[B+27.5];B[ci];W[cf];B[de];W[hh];B[ec];W[hg];B[ga];W[ie];B[dh];W[da];B[df];W[ig];B[fd];W[ha];B[dd];W[bi];B[ff];W[ah];B[dc];W[ib];B[ef];W[ad];B[ce];W[eg];B[ab];W[di];B[fh];W[bd];B[eh];W[bg];B[gd];W[gi];B[he];W[hb];B[fg];W[fc];B[bf];W[ca];B[bb];W[cb];B[be];W[cc];B[id];W[ee];B[ea];W[ch];B[ae];W[gc];B[eb];W[gg];B[hd];W[ih];B[fi];W[fa];B[ba];W[af];B[ic];W[cg];B[fb];W[ei];B[dg];W[fe];B[if];W[ii];B[ed];W[hc];B[gf];W[db];B[ci];W[bc];B[gh];W[ac];B[hi];W[gb];B[hf];W[ih];B[ag];W[ei];B[bh];W[fa];B[ai];W[bg];B[cf];W[hg];B[];W[ig];B[ga];W[ii];B[];W[aa];B[ba];W[cd];B[bb];W[ab];B[ba];W[cg];B[bb];W[gg];B[hh];W[ih];B[];W[ig];B[];W[ii];B[];W[cd];B[];W[bc];B[gg];W[ca];B[];W[ab];B[];W[db];B[da];W[aa];B[cc];W[ac];B[bd];W[fa];B[ad];W[ac];B[ch];W[bg];B[cg];W[ab];B[cb];W[aa];B[ga];W[];B[])
|
||||
@@ -0,0 +1 @@
|
||||
(;FF[4]GM[1]SZ[9]KM[5.5]RE[B+2.5];B[di];W[bd];B[fa];W[gd];B[fe];W[aa];B[ch];W[ae];B[eb];W[db];B[dh];W[fc];B[ec];W[ee];B[cd];W[ah];B[df];W[gb];B[ic];W[ff];B[af];W[ed];B[ii];W[ib];B[ac];W[ga];B[ia];W[eh];B[dg];W[ha];B[dc];W[gh];B[ig];W[ba];B[ef];W[be];B[bh];W[gc];B[de];W[cg];B[fh];W[bf];B[dd];W[gg];B[if];W[ce];B[fd];W[he];B[bg];W[bb];B[ed];W[fb];B[cb];W[hf];B[ea];W[cf];B[bc];W[ie];B[ei];W[];B[hd];W[gi];B[ai];W[hg];B[hh];W[hb];B[fi];W[ge];B[ca];W[fg];B[hc];W[ih];B[da];W[ig];B[ad];W[ci];B[bd];W[hi];B[be];W[id];B[hc];W[eg];B[ab];W[cf];B[ba];W[ic];B[bi];W[ce];B[ag];W[hd];B[cg];W[];B[])
|
||||
@@ -0,0 +1 @@
|
||||
(;FF[4]GM[1]SZ[9]KM[5.5]RE[B+11.5];B[ch];W[id];B[gh];W[hf];B[hc];W[ei];B[fg];W[ec];B[bi];W[gi];B[fb];W[bb];B[ih];W[af];B[di];W[ah];B[ha];W[gd];B[ge];W[cg];B[db];W[df];B[dc];W[gb];B[bf];W[hh];B[hg];W[ce];B[be];W[ag];B[fc];W[fa];B[dg];W[ae];B[gc];W[bc];B[if];W[ga];B[gf];W[ba];B[ff];W[ai];B[ee];W[hb];B[cd];W[cc];B[bg];W[ca];B[ed];W[fi];B[cf];W[bd];B[da];W[hd];B[de];W[eg];B[eb];W[ac];B[bh];W[hi];B[ic];W[ea];B[ad];W[af];B[ai];W[fe];B[ia];W[ef];B[ie];W[ab];B[dh];W[eh];B[ah];W[ag];B[fd];W[cb];B[];W[ii];B[fh];W[ii];B[fi];W[gi];B[];W[eg];B[hh];W[ib];B[ha];W[df];B[];W[ef];B[];W[ae];B[ad];W[ia];B[ha];W[ea];B[hb];W[ag];B[];W[ei];B[fa];W[ae];B[];W[ga];B[];W[ib];B[];W[af];B[];W[])
|
||||
@@ -0,0 +1,9 @@
|
||||
Cumulative results for 3 games (BLACK=RANDOM, WHITE=ROOT_PAR)
|
||||
1. W+7.5
|
||||
2. W+11.5
|
||||
3. W+6.5
|
||||
Cumulative results for 3 games (BLACK=ROOT_PAR, WHITE=RANDOM)
|
||||
1. B+27.5
|
||||
2. B+2.5
|
||||
3. B+11.5
|
||||
Elapsed Time: 300.899 seconds.
|
||||
BIN
data/networks/Pass.nn
Normal file
BIN
data/networks/Pass.nn
Normal file
Binary file not shown.
50
data/test/ScoreTest.sgf
Normal file
50
data/test/ScoreTest.sgf
Normal file
@@ -0,0 +1,50 @@
|
||||
(;SZ[9]CA[UTF-8]FF[4]GM[1]KM[6]PB[Player1]RE[W+5]PW[Player2]
|
||||
;B[ga]
|
||||
;W[fa]
|
||||
;B[fb]
|
||||
;W[ea]
|
||||
;B[fc]
|
||||
;W[eb]
|
||||
;B[ec]
|
||||
;W[db]
|
||||
;B[dc]
|
||||
;W[cc]
|
||||
;B[ed]
|
||||
;W[dd]
|
||||
;B[ee]
|
||||
;W[ce]
|
||||
;B[de]
|
||||
;W[cd]
|
||||
;B[ef]
|
||||
;W[bd]
|
||||
;B[be]
|
||||
;W[ae]
|
||||
;B[bf]
|
||||
;W[ad]
|
||||
;B[af]
|
||||
;W[bb]
|
||||
;B[cf]
|
||||
;W[ic]
|
||||
;B[ib]
|
||||
;W[id]
|
||||
;B[hb]
|
||||
;W[ie]
|
||||
;B[hc]
|
||||
;W[he]
|
||||
;B[hd]
|
||||
;W[hf]
|
||||
;B[ge]
|
||||
;W[ff]
|
||||
;B[fe]
|
||||
;W[gg]
|
||||
;B[eg]
|
||||
;W[fh]
|
||||
;B[eh]
|
||||
;W[hh]
|
||||
;B[di]
|
||||
;W[ei]
|
||||
;B[ch]
|
||||
;W[fi]
|
||||
;B[]
|
||||
;W[]
|
||||
)
|
||||
@@ -1 +0,0 @@
|
||||
(;FF[4]GM[1]SZ[5]KM[3.5]RE[W+11.5];B[ad];W[bd];B[be];W[ed];B[dd];W[ba];B[cd];W[ca];B[ee];W[cb];B[];W[dc];B[db];W[ac];B[];W[ec];B[ae];W[bc];B[ce];W[cc];B[aa];W[de];B[be];W[];B[ce];W[ad];B[dd];W[];B[ea];W[eb];B[ae];W[];B[ee];W[da];B[bb];W[de];B[];W[])
|
||||
@@ -124,11 +124,13 @@ public class GameState {
|
||||
}
|
||||
|
||||
/**
|
||||
* Used for setting up the board. Places the player's stone at the specified coordinates.
|
||||
* Used for setting up the board. Places the player's stone at the specified
|
||||
* coordinates.
|
||||
*
|
||||
* Returns false if the requested intersection is occupied or the resulting position is illegal.
|
||||
* Returns false if the requested action is PASS, RESIGN or NONE.
|
||||
* Returns false if the moveHistory's size is already >0 (method should only be used to set up board).
|
||||
* Returns false if the requested intersection is occupied or the resulting
|
||||
* position is illegal. Returns false if the requested action is PASS,
|
||||
* RESIGN or NONE. Returns false if the moveHistory's size is already >0
|
||||
* (method should only be used to set up board).
|
||||
*
|
||||
* Does NOT advance the playerToMove or add the action to the move history.
|
||||
*
|
||||
@@ -149,7 +151,7 @@ public class GameState {
|
||||
|
||||
playerToMove = player;
|
||||
|
||||
boolean validMove = playStone(player,action);
|
||||
boolean validMove = playStone(player, action);
|
||||
|
||||
moveHistory.clear();
|
||||
playerToMove = actualPTM;
|
||||
|
||||
@@ -12,6 +12,7 @@ import java.util.List;
|
||||
import net.woodyfolsom.msproj.gui.Goban;
|
||||
import net.woodyfolsom.msproj.policy.HumanGuiInput;
|
||||
import net.woodyfolsom.msproj.policy.HumanKeyboardInput;
|
||||
import net.woodyfolsom.msproj.policy.MonteCarloAMAF;
|
||||
import net.woodyfolsom.msproj.policy.MonteCarloUCT;
|
||||
import net.woodyfolsom.msproj.policy.Policy;
|
||||
import net.woodyfolsom.msproj.policy.RandomMovePolicy;
|
||||
@@ -25,7 +26,7 @@ public class StandAloneGame {
|
||||
private static final int DEFAULT_SIZE = 9;
|
||||
|
||||
enum PLAYER_TYPE {
|
||||
HUMAN, HUMAN_GUI, ROOT_PAR, UCT, RANDOM
|
||||
HUMAN, HUMAN_GUI, ROOT_PAR, UCT, RANDOM, RAVE
|
||||
};
|
||||
|
||||
public static void main(String[] args) {
|
||||
@@ -75,6 +76,8 @@ public class StandAloneGame {
|
||||
return PLAYER_TYPE.HUMAN_GUI;
|
||||
} else if ("RANDOM".equalsIgnoreCase(playerTypeStr)) {
|
||||
return PLAYER_TYPE.RANDOM;
|
||||
} else if ("RAVE".equalsIgnoreCase(playerTypeStr)) {
|
||||
return PLAYER_TYPE.RAVE;
|
||||
} else {
|
||||
throw new RuntimeException("Unknown player type: " + playerTypeStr);
|
||||
}
|
||||
@@ -175,6 +178,8 @@ public class StandAloneGame {
|
||||
turnLength * 1000L);
|
||||
case RANDOM:
|
||||
return new RandomMovePolicy();
|
||||
case RAVE:
|
||||
return new MonteCarloAMAF(new RandomMovePolicy(), turnLength * 1000L);
|
||||
default:
|
||||
throw new IllegalArgumentException("Invalid PLAYER_TYPE: "
|
||||
+ playerType);
|
||||
|
||||
@@ -13,7 +13,7 @@ import net.woodyfolsom.msproj.tree.GameTreeNode;
|
||||
import net.woodyfolsom.msproj.tree.MonteCarloProperties;
|
||||
|
||||
public abstract class MonteCarlo implements Policy {
|
||||
protected static final int ROLLOUT_DEPTH_LIMIT = 100;
|
||||
protected static final int ROLLOUT_DEPTH_LIMIT = 150;
|
||||
|
||||
protected int numStateEvaluations = 0;
|
||||
protected Policy movePolicy;
|
||||
@@ -36,6 +36,11 @@ public abstract class MonteCarlo implements Policy {
|
||||
public abstract List<GameTreeNode<MonteCarloProperties>> descend(
|
||||
GameTreeNode<MonteCarloProperties> node);
|
||||
|
||||
protected GameTreeNode<MonteCarloProperties> createRootNode(GameState rootGameState) {
|
||||
return new GameTreeNode<MonteCarloProperties>(
|
||||
rootGameState, new MonteCarloProperties());
|
||||
}
|
||||
|
||||
private GameTreeNode<MonteCarloProperties> buildTree(GameConfig gameConfig, GameState gameState, Player player) {
|
||||
//System.out.println(player + " is thinking for up to "
|
||||
// + (searchTimeLimit / 1000.0) + " seconds...");
|
||||
@@ -47,8 +52,7 @@ public abstract class MonteCarlo implements Policy {
|
||||
+ gameState.getPlayerToMove());
|
||||
}
|
||||
|
||||
GameTreeNode<MonteCarloProperties> rootNode = new GameTreeNode<MonteCarloProperties>(
|
||||
gameState, new MonteCarloProperties());
|
||||
GameTreeNode<MonteCarloProperties> rootNode = createRootNode(gameState);
|
||||
|
||||
do {
|
||||
|
||||
@@ -67,8 +71,8 @@ public abstract class MonteCarlo implements Policy {
|
||||
}
|
||||
|
||||
for (GameTreeNode<MonteCarloProperties> newLeaf : newLeaves) {
|
||||
int reward = rollout(gameConfig, newLeaf, player);
|
||||
update(newLeaf, reward);
|
||||
Rollout rollout = rollout(gameConfig, newLeaf, player);
|
||||
update(newLeaf, rollout);
|
||||
}
|
||||
|
||||
elapsedTime = System.currentTimeMillis() - startTime;
|
||||
@@ -103,11 +107,11 @@ public abstract class MonteCarlo implements Policy {
|
||||
GameConfig gameConfig, GameTreeNode<MonteCarloProperties> node,
|
||||
Player player);
|
||||
|
||||
public abstract int rollout(GameConfig gameConfig,
|
||||
public abstract Rollout rollout(GameConfig gameConfig,
|
||||
GameTreeNode<MonteCarloProperties> node, Player player);
|
||||
|
||||
public abstract void update(GameTreeNode<MonteCarloProperties> node,
|
||||
int reward);
|
||||
Rollout rollout);
|
||||
|
||||
public long getSearchTimeLimit() {
|
||||
return searchTimeLimit;
|
||||
|
||||
@@ -1,5 +1,14 @@
|
||||
package net.woodyfolsom.msproj.policy;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import net.woodyfolsom.msproj.Action;
|
||||
import net.woodyfolsom.msproj.GameState;
|
||||
import net.woodyfolsom.msproj.Player;
|
||||
import net.woodyfolsom.msproj.tree.AMAFProperties;
|
||||
import net.woodyfolsom.msproj.tree.GameTreeNode;
|
||||
import net.woodyfolsom.msproj.tree.MonteCarloProperties;
|
||||
|
||||
public class MonteCarloAMAF extends MonteCarloUCT {
|
||||
|
||||
@@ -7,4 +16,91 @@ public class MonteCarloAMAF extends MonteCarloUCT {
|
||||
super(movePolicy, searchTimeLimit);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void update(GameTreeNode<MonteCarloProperties> node, Rollout rollout) {
|
||||
GameTreeNode<MonteCarloProperties> currentNode = node;
|
||||
//List<Action> subTreeActions = new ArrayList<Action>(rollout.getPlayout());
|
||||
|
||||
List<Action> playout = rollout.getPlayout();
|
||||
int reward = rollout.getReward();
|
||||
while (currentNode != null) {
|
||||
AMAFProperties nodeProperties = (AMAFProperties)currentNode.getProperties();
|
||||
|
||||
//Always update props for the current node
|
||||
nodeProperties.setWins(nodeProperties.getWins() + reward);
|
||||
nodeProperties.setVisits(nodeProperties.getVisits() + 1);
|
||||
nodeProperties.setAmafWins(nodeProperties.getAmafWins() + reward);
|
||||
nodeProperties.setAmafVisits(nodeProperties.getAmafVisits() + 1);
|
||||
|
||||
GameTreeNode<MonteCarloProperties> parentNode = currentNode.getParent();
|
||||
if (parentNode != null) {
|
||||
Player playerToMove = parentNode.getGameState().getPlayerToMove();
|
||||
for (Action actionFromParent : parentNode.getActions()) {
|
||||
if (playout.contains(actionFromParent)) {
|
||||
GameTreeNode<MonteCarloProperties> subTreeChild = parentNode.getChild(actionFromParent);
|
||||
//Don't count AMAF properties for the current node twice
|
||||
if (subTreeChild == currentNode) {
|
||||
continue;
|
||||
}
|
||||
|
||||
AMAFProperties siblingProperties = (AMAFProperties)subTreeChild.getProperties();
|
||||
//Only update AMAF properties if the sibling is reached by the same action with the same player to move
|
||||
if (rollout.hasPlay(playerToMove,actionFromParent)) {
|
||||
siblingProperties.setAmafWins(siblingProperties.getAmafWins() + reward);
|
||||
siblingProperties.setAmafVisits(siblingProperties.getAmafVisits() + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
currentNode = currentNode.getParent();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected GameTreeNode<MonteCarloProperties> createRootNode(GameState rootGameState) {
|
||||
return new GameTreeNode<MonteCarloProperties>(
|
||||
rootGameState, new AMAFProperties());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected double getNodeScore(GameTreeNode<MonteCarloProperties> gameTreeNode) {
|
||||
//double nodeVisits = gameTreeNode.getParent().getProperties().getVisits();
|
||||
double parentAmafVisits = ((AMAFProperties)gameTreeNode.getParent().getProperties()).getAmafVisits();
|
||||
|
||||
double nodeScore;
|
||||
if (gameTreeNode.getGameState().isTerminal()) {
|
||||
nodeScore = 0.0;
|
||||
} else {
|
||||
/*
|
||||
MonteCarloProperties properties = gameTreeNode.getProperties();
|
||||
nodeScore = (double) (properties.getWins() / properties
|
||||
.getVisits())
|
||||
+ (TUNING_CONSTANT * Math.sqrt(Math.log(nodeVisits)
|
||||
/ gameTreeNode.getProperties().getVisits()));
|
||||
*
|
||||
*/
|
||||
AMAFProperties properties = (AMAFProperties) gameTreeNode.getProperties();
|
||||
nodeScore = (double) (properties.getAmafWins() / properties
|
||||
.getAmafVisits())
|
||||
+ (TUNING_CONSTANT * Math.sqrt(Math.log(parentAmafVisits)
|
||||
/ properties.getAmafVisits()));
|
||||
|
||||
}
|
||||
return nodeScore;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<GameTreeNode<MonteCarloProperties>> addNewChildren(
|
||||
GameTreeNode<MonteCarloProperties> node, Action action,
|
||||
GameState successorState) {
|
||||
List<GameTreeNode<MonteCarloProperties>> newChildren = new ArrayList<GameTreeNode<MonteCarloProperties>>();
|
||||
|
||||
GameTreeNode<MonteCarloProperties> newChild = new GameTreeNode<MonteCarloProperties>(
|
||||
successorState, new AMAFProperties());
|
||||
|
||||
newChildren.add(newChild);
|
||||
node.addChild(action, newChild);
|
||||
return newChildren;
|
||||
}
|
||||
}
|
||||
@@ -31,7 +31,6 @@ public class MonteCarloUCT extends MonteCarlo {
|
||||
// From Kocsis and Szepesvari, the value of an actual terminal node is
|
||||
// 0, so it will never be grown.
|
||||
|
||||
double nodeVisits = node.getProperties().getVisits();
|
||||
Set<Action> actionsExplored = node.getActions();
|
||||
GameState gameState = node.getGameState();
|
||||
|
||||
@@ -46,16 +45,7 @@ public class MonteCarloUCT extends MonteCarlo {
|
||||
GameTreeNode<MonteCarloProperties> childNode = node
|
||||
.getChild(action);
|
||||
|
||||
double childScore;
|
||||
if (childNode.getGameState().isTerminal()) {
|
||||
childScore = 0.0;
|
||||
} else {
|
||||
MonteCarloProperties properties = childNode.getProperties();
|
||||
childScore = (double) (properties.getWins() / properties
|
||||
.getVisits())
|
||||
+ (TUNING_CONSTANT * Math.sqrt(Math.log(nodeVisits)
|
||||
/ childNode.getProperties().getVisits()));
|
||||
}
|
||||
double childScore = getNodeScore(childNode);
|
||||
// TODO add random tie breaker?
|
||||
// otherwise the child that is selected first will be biased
|
||||
if (childScore >= bestScore) {
|
||||
@@ -74,34 +64,37 @@ public class MonteCarloUCT extends MonteCarlo {
|
||||
}
|
||||
}
|
||||
|
||||
protected double getNodeScore(GameTreeNode<MonteCarloProperties> gameTreeNode) {
|
||||
double nodeScore;
|
||||
double parentVisits = gameTreeNode.getParent().getProperties().getVisits();
|
||||
|
||||
if (gameTreeNode.getGameState().isTerminal()) {
|
||||
nodeScore = 0.0;
|
||||
} else {
|
||||
MonteCarloProperties properties = gameTreeNode.getProperties();
|
||||
nodeScore = (double) (properties.getWins() / properties
|
||||
.getVisits())
|
||||
+ (TUNING_CONSTANT * Math.sqrt(Math.log(parentVisits)
|
||||
/ gameTreeNode.getProperties().getVisits()));
|
||||
}
|
||||
return nodeScore;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Action getBestAction(GameTreeNode<MonteCarloProperties> node) {
|
||||
Action bestAction = Action.NONE;
|
||||
double bestScore = Double.NEGATIVE_INFINITY;
|
||||
GameTreeNode<MonteCarloProperties> bestChild = null;
|
||||
|
||||
//int nActions = node.getNumChildren();
|
||||
//GameState rootGameState = node.getGameState();
|
||||
//boolean playerToMoveIsWinning = rootGameState.getResult().isWinner(rootGameState.getPlayerToMove());
|
||||
//playerToMove is winning or only one move (PASS) is available
|
||||
//boolean allowPass = playerToMoveIsWinning || nActions == 1;
|
||||
|
||||
for (Action action : node.getActions()) {
|
||||
///HEURISTIC - work on ways of removing this go-specific logic /////
|
||||
//If action is PASS and the play who moved is not the winner while other moves are available, don't pass
|
||||
//i.e. don't pass when losing
|
||||
//if (action.isPass() && !allowPass) {
|
||||
// continue; //If the best rated action is PASS and I'm not winning and there are other valid actions,
|
||||
// //keep searching.
|
||||
//}
|
||||
////////////////////////////////////////////////////////////////////
|
||||
|
||||
GameTreeNode<MonteCarloProperties> childNode = node
|
||||
.getChild(action);
|
||||
|
||||
MonteCarloProperties properties = childNode.getProperties();
|
||||
double childScore = (double) properties.getWins()
|
||||
/ properties.getVisits();
|
||||
//MonteCarloProperties properties = childNode.getProperties();
|
||||
//double childScore = (double) properties.getWins()
|
||||
// / properties.getVisits();
|
||||
|
||||
double childScore = getNodeScore(childNode);
|
||||
|
||||
if (childScore >= bestScore) {
|
||||
bestScore = childScore;
|
||||
@@ -130,25 +123,42 @@ public class MonteCarloUCT extends MonteCarlo {
|
||||
@Override
|
||||
public List<GameTreeNode<MonteCarloProperties>> grow(GameConfig gameConfig,
|
||||
GameTreeNode<MonteCarloProperties> node, Player player) {
|
||||
|
||||
Policy randomMovePolicy = new RandomMovePolicy();
|
||||
Set<Action> exploredActions = node.getActions();
|
||||
|
||||
Action action = randomMovePolicy.getAction(gameConfig,
|
||||
node.getGameState(), exploredActions, player);
|
||||
|
||||
if (exploredActions.contains(action)) {
|
||||
throw new RuntimeException("Bad action selection at this state: not a NEW leaf node for this Monte Carlo tree.");
|
||||
}
|
||||
|
||||
if (Action.NONE == action) {
|
||||
throw new RuntimeException(
|
||||
"Unable to grow node - are all actions already explored? Board state: "
|
||||
+ node.getGameState() + "\nExplored actions: "
|
||||
+ exploredActions);
|
||||
}
|
||||
|
||||
GameState nextGameState = new GameState(node.getGameState());
|
||||
nextGameState.playStone(player, action);
|
||||
|
||||
//In principle, more than 1 new child could be generated from a call to grow. However,
|
||||
//this algorithm only generates one. This interface is mainly for compatibility with Naive Monte Carlo.
|
||||
|
||||
|
||||
return addNewChildren(node, action, nextGameState);
|
||||
}
|
||||
|
||||
protected List<GameTreeNode<MonteCarloProperties>> addNewChildren(GameTreeNode<MonteCarloProperties> node, Action action, GameState successorState) {
|
||||
List<GameTreeNode<MonteCarloProperties>> newChildren = new ArrayList<GameTreeNode<MonteCarloProperties>>();
|
||||
|
||||
GameTreeNode<MonteCarloProperties> newChild = new GameTreeNode<MonteCarloProperties>(
|
||||
nextGameState, new MonteCarloProperties());
|
||||
successorState, new MonteCarloProperties());
|
||||
|
||||
newChildren.add(newChild);
|
||||
node.addChild(action, newChild);
|
||||
|
||||
return newChildren;
|
||||
}
|
||||
|
||||
@@ -157,41 +167,47 @@ public class MonteCarloUCT extends MonteCarlo {
|
||||
* Rollout currently depends on the hardcoded ROLLOUT_DEPTH_LIMIT superclass parameter,
|
||||
* Even with super-ko detection, a rollout might take an unrealistically long time due to unlikely playouts.
|
||||
*/
|
||||
public int rollout(GameConfig gameConfig,
|
||||
public Rollout rollout(GameConfig gameConfig,
|
||||
GameTreeNode<MonteCarloProperties> node, Player player) {
|
||||
Policy randomMovePolicy = new RandomMovePolicy();
|
||||
|
||||
Action action;
|
||||
Action randomAction;
|
||||
int rolloutDepth = 0;
|
||||
GameState rolloutGameState = new GameState(node.getGameState());
|
||||
GameState initialGameState = node.getGameState();
|
||||
GameState rolloutGameState = new GameState(initialGameState);
|
||||
Player currentPlayer = rolloutGameState.getPlayerToMove();
|
||||
List<Action> rolloutActions = new ArrayList<Action>();
|
||||
do {
|
||||
rolloutDepth++;
|
||||
action = randomMovePolicy.getAction(gameConfig, rolloutGameState,
|
||||
randomAction = randomMovePolicy.getAction(gameConfig, rolloutGameState,
|
||||
currentPlayer);
|
||||
if (action != Action.NONE) {
|
||||
if (!rolloutGameState.playStone(currentPlayer, action)) {
|
||||
if (randomAction != Action.NONE) {
|
||||
if (!rolloutGameState.playStone(currentPlayer, randomAction)) {
|
||||
throw new RuntimeException(
|
||||
"Failed to play move selected by RandomMovePolicy");
|
||||
}
|
||||
rolloutActions.add(randomAction);
|
||||
currentPlayer = GoGame.getNextPlayer(currentPlayer);
|
||||
}
|
||||
} while (action != Action.NONE && rolloutDepth < ROLLOUT_DEPTH_LIMIT);
|
||||
} while (randomAction != Action.NONE && rolloutDepth < ROLLOUT_DEPTH_LIMIT);
|
||||
|
||||
numStateEvaluations++;
|
||||
|
||||
GameResult gameScore = rolloutGameState.getResult();
|
||||
|
||||
if (gameScore.isWinner(player)) {
|
||||
return 1;
|
||||
return new Rollout(initialGameState,rolloutActions,1);
|
||||
} else {
|
||||
return 0;
|
||||
return new Rollout(initialGameState,rolloutActions,0);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void update(GameTreeNode<MonteCarloProperties> node, int reward) {
|
||||
public void update(GameTreeNode<MonteCarloProperties> node, Rollout rollout) {
|
||||
GameTreeNode<MonteCarloProperties> currentNode = node;
|
||||
|
||||
int reward = rollout.getReward();
|
||||
|
||||
while (currentNode != null) {
|
||||
MonteCarloProperties nodeProperties = currentNode.getProperties();
|
||||
nodeProperties.setWins(nodeProperties.getWins() + reward);
|
||||
|
||||
@@ -34,6 +34,13 @@ public class RandomMovePolicy implements Policy, ActionGenerator {
|
||||
*/
|
||||
public List<Action> getActions(GameConfig gameConfig, GameState gameState,
|
||||
Collection<Action> prohibitedMoves, Player player, int nMoves) {
|
||||
List<Action> randomActions = new ArrayList<Action>();
|
||||
|
||||
/*if (gameState.isTerminal()) {
|
||||
randomActions.add(Action.NONE);
|
||||
return randomActions;
|
||||
}*/
|
||||
|
||||
if (player != gameState.getPlayerToMove()) {
|
||||
throw new IllegalArgumentException("It is not " + player
|
||||
+ "'s turn to move!");
|
||||
@@ -45,7 +52,6 @@ public class RandomMovePolicy implements Policy, ActionGenerator {
|
||||
List<Action> possibleActions = actionGenerator.getActions(gameConfig,
|
||||
gameStateCopy, prohibitedMoves, player,
|
||||
ActionGenerator.ALL_ACTIONS);
|
||||
List<Action> randomActions = new ArrayList<Action>();
|
||||
|
||||
//
|
||||
boolean playerIsWinning = gameState.getResult().isWinner(player);
|
||||
@@ -60,13 +66,20 @@ public class RandomMovePolicy implements Policy, ActionGenerator {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//if (randomActions.size() == 0) {
|
||||
// randomActions.add(Action.NONE);
|
||||
//}
|
||||
|
||||
//PASS is always the move of last resort if no valid moves exist
|
||||
//Action.NONE exists for a reason - if the fail-safe was to ALWAYS return PASS, then MCTS would
|
||||
//fail to descend properly because the root node would always appear to have additional unexplored actions.
|
||||
if (randomActions.size() == 0) {
|
||||
randomActions.add(Action.PASS);
|
||||
if (prohibitedMoves.contains(Action.PASS)) {
|
||||
randomActions.add(Action.NONE);
|
||||
} else {
|
||||
randomActions.add(Action.PASS);
|
||||
}
|
||||
}
|
||||
|
||||
//when to resign?
|
||||
|
||||
65
src/net/woodyfolsom/msproj/policy/Rollout.java
Normal file
65
src/net/woodyfolsom/msproj/policy/Rollout.java
Normal file
@@ -0,0 +1,65 @@
|
||||
package net.woodyfolsom.msproj.policy;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import net.woodyfolsom.msproj.Action;
|
||||
import net.woodyfolsom.msproj.GameState;
|
||||
import net.woodyfolsom.msproj.Player;
|
||||
|
||||
public class Rollout {
|
||||
private GameState initialGameState;
|
||||
private List<Action> blackPlays;
|
||||
private List<Action> playout;
|
||||
private List<Action> whitePlays;
|
||||
private int reward;
|
||||
|
||||
public Rollout(GameState initialGameState, List<Action> playout, int reward) {
|
||||
this.initialGameState = initialGameState;
|
||||
this.playout = playout;
|
||||
this.reward = reward;
|
||||
|
||||
blackPlays = new ArrayList<Action>();
|
||||
whitePlays = new ArrayList<Action>();
|
||||
|
||||
Player playerToMove = initialGameState.getPlayerToMove();
|
||||
|
||||
List<List<Action>> plays = new ArrayList<List<Action>>();
|
||||
|
||||
if (playerToMove == Player.BLACK) {
|
||||
plays.add(blackPlays);
|
||||
plays.add(whitePlays);
|
||||
} else if (playerToMove == Player.WHITE) {
|
||||
plays.add(whitePlays);
|
||||
plays.add(blackPlays);
|
||||
} else {
|
||||
throw new RuntimeException("Invalid player: " + playerToMove);
|
||||
}
|
||||
|
||||
for (int i = 0; i < playout.size(); i++) {
|
||||
plays.get(i%2).add(playout.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
public GameState getInitialGameState() {
|
||||
return initialGameState;
|
||||
}
|
||||
|
||||
public List<Action> getPlayout() {
|
||||
return playout;
|
||||
}
|
||||
|
||||
public int getReward() {
|
||||
return reward;
|
||||
}
|
||||
|
||||
public boolean hasPlay(Player player, Action action) {
|
||||
if (player == Player.BLACK) {
|
||||
return blackPlays.contains(action);
|
||||
} else if (player == Player.WHITE) {
|
||||
return whitePlays.contains(action);
|
||||
} else {
|
||||
throw new RuntimeException("Invalid player: " + player);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -76,22 +76,8 @@ public class RootParallelization implements Policy {
|
||||
int bestWins = 0;
|
||||
int bestSims = 0;
|
||||
|
||||
//int nActions = totalReward.size();
|
||||
//boolean playerToMoveIsWinning = gameState.getResult().isWinner(player);
|
||||
//playerToMove is winning or only one move (PASS) is available
|
||||
//boolean allowPass = playerToMoveIsWinning || nActions == 1;
|
||||
|
||||
for (Action action : totalReward.keySet())
|
||||
{
|
||||
//HEURISTIC - work on ways of removing this go-specific logic
|
||||
//
|
||||
//This heuristic must be duplicated here because RootPar. does not benefit
|
||||
//from MonteCarloUCT culling PASS just before returning from getAction().
|
||||
//if (action.isPass() && !allowPass) {
|
||||
// continue; //If the best rated action is PASS and I'm not winning and there are other valid actions,
|
||||
// //keep searching.
|
||||
//}
|
||||
|
||||
int totalWins = totalReward.get(action);
|
||||
int totalSims = numSims.get(action);
|
||||
|
||||
|
||||
@@ -22,9 +22,19 @@ public class ValidMoveGenerator implements ActionGenerator {
|
||||
public List<Action> getActions(GameConfig gameConfig, GameState gameState,
|
||||
Collection<Action> prohibitedMoves, Player color, int nMoves) {
|
||||
|
||||
List<Action> validMoves = new ArrayList<Action>();
|
||||
|
||||
if (gameState.isTerminal()) {
|
||||
return validMoves;
|
||||
}
|
||||
|
||||
GameState gameStateCopy = new GameState(gameState);
|
||||
List<String> emptyCoordinates = gameStateCopy.getEmptyCoords();
|
||||
List<Action> validMoves = new ArrayList<Action>();
|
||||
|
||||
//Pass is always valid unless prohibited (or if the state is terminal, as above)
|
||||
if (!prohibitedMoves.contains(Action.PASS)) {
|
||||
validMoves.add(Action.PASS);
|
||||
}
|
||||
|
||||
while (emptyCoordinates.size() > 0) {
|
||||
Action nextMove = Action.getInstance(emptyCoordinates
|
||||
@@ -38,10 +48,6 @@ public class ValidMoveGenerator implements ActionGenerator {
|
||||
}
|
||||
}
|
||||
|
||||
if (!prohibitedMoves.contains(Action.PASS)) {
|
||||
validMoves.add(Action.PASS);
|
||||
}
|
||||
|
||||
return validMoves;
|
||||
}
|
||||
}
|
||||
|
||||
18
src/net/woodyfolsom/msproj/tree/AMAFProperties.java
Normal file
18
src/net/woodyfolsom/msproj/tree/AMAFProperties.java
Normal file
@@ -0,0 +1,18 @@
|
||||
package net.woodyfolsom.msproj.tree;
|
||||
|
||||
public class AMAFProperties extends MonteCarloProperties {
|
||||
int amafWins = 0;
|
||||
int amafVisits = 0;
|
||||
public int getAmafWins() {
|
||||
return amafWins;
|
||||
}
|
||||
public void setAmafWins(int amafWins) {
|
||||
this.amafWins = amafWins;
|
||||
}
|
||||
public int getAmafVisits() {
|
||||
return amafVisits;
|
||||
}
|
||||
public void setAmafVisits(int amafVisits) {
|
||||
this.amafVisits = amafVisits;
|
||||
}
|
||||
}
|
||||
@@ -3,38 +3,15 @@ package net.woodyfolsom.msproj;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import net.woodyfolsom.msproj.GameResult;
|
||||
import net.woodyfolsom.msproj.policy.MonteCarloUCT;
|
||||
import net.woodyfolsom.msproj.policy.RandomMovePolicy;
|
||||
import net.woodyfolsom.msproj.policy.RootParallelization;
|
||||
import net.woodyfolsom.msproj.sgf.SGFLexer;
|
||||
import net.woodyfolsom.msproj.sgf.SGFNodeCollection;
|
||||
import net.woodyfolsom.msproj.sgf.SGFParser;
|
||||
|
||||
import org.antlr.runtime.ANTLRInputStream;
|
||||
import org.antlr.runtime.ANTLRStringStream;
|
||||
import org.antlr.runtime.CommonTokenStream;
|
||||
import org.antlr.runtime.RecognitionException;
|
||||
import org.junit.Test;
|
||||
|
||||
public class GameScoreTest {
|
||||
|
||||
// public static final String endGameSGF =
|
||||
// "(;FF[4]GM[1]SZ[9]KM[5.5];B[ef];W[ff];B[dg];W[aa];B[fc];W[da];B[cg];W[ei];B[gf]"
|
||||
// +
|
||||
// ";W[fi];B[ag];W[ii];B[bi];W[if];B[db];W[ci];B[cf];W[ih];B[bc];W[hb];B[eb];W[fh];B[ig];W[hc];B[be];W[he];B[gc];"
|
||||
// +
|
||||
// "W[id];B[cd];W[df];B[hf];W[ah];B[bh];W[fa];B[bg];W[fe];B[ec];W[eh];B[ee];W[bd];B[hg];W[ie];B[fg];W[ca];B[eg];"
|
||||
// +
|
||||
// "W[cb];B[ad];W[ba];B[ch];W[dh];B[gd];W[ic];B[ha];W[ab];B[gh];W[gb];B[ed];W[];B[])";
|
||||
|
||||
//public static final String endGameSGF = "(;FF[4]GM[1]SZ[9]KM[5.5]RE[W+0.5];B[ef];W[cb];B[fe];W[da];B[cd];W[hh];B[ed];W[cc];B[ci];W[bc];B[cg];W[fi];B[be];W[ea];B[hi];W[df];B[fd];W[bg];B[cf];W[aa];B[gd];W[ch];B[ad];W[dg];B[de];W[ge];B[bh];W[fa];B[ag];W[hd];B[if];W[bi];B[gf];W[bd];B[ah];W[gc];B[ff];W[ca];B[hf];W[dd];B[ce];W[ae];B[ga];W[hc];B[ac];W[gg];B[fg];W[fb];B[ie];W[dh];B[af];W[ec];B[dc];W[id];B[dd];W[eh];B[eb];W[gb];B[ae];W[ic];B[di];W[fh];B[ig];W[ab];B[ha];W[hg];B[hb];W[gi];B[ii];W[ia];B[fc];W[ba];B[eg];W[];B[db];W[];B[])";
|
||||
|
||||
public static final String endGameSGF = "(;FF[4]GM[1]SZ[6]KM[1.5]RE[B+0.5];B[bb];W[];B[ec];W[ef];B[ac];W[ed];B[ba];W[dc];B[cf];W[];B[])";
|
||||
@Test
|
||||
public void testGetAggregateScoreZero() {
|
||||
GameResult gameScore = new GameResult(0, 0, 19, 0, true);
|
||||
@@ -56,21 +33,23 @@ public class GameScoreTest {
|
||||
|
||||
@Test
|
||||
public void testScoreEndGame() throws IOException, RecognitionException {
|
||||
InputStream is = new ByteArrayInputStream(endGameSGF.getBytes());
|
||||
GameRecord gameRecord = Referee.replay(is);
|
||||
assertEquals(11, gameRecord.getNumTurns());
|
||||
//test case from:
|
||||
//http://www.online-go.com/faq.php?name=rules
|
||||
GameRecord gameRecord = Referee.replay(new FileInputStream(new File("data/test/ScoreTest.sgf")));
|
||||
GameState gameState = gameRecord.getGameState(gameRecord.getNumTurns());
|
||||
GameConfig gameConfig = gameState.getGameConfig();
|
||||
|
||||
GameState gameState9 = gameRecord.getGameState(9);
|
||||
System.out.println(gameState);
|
||||
System.out.println(gameState.getResult());
|
||||
|
||||
for (int i = 0; i < 5; i++) {
|
||||
//Action action = new RootParallelization(4, 1000L).getAction(gameRecord.getGameConfig(), gameState9, Player.WHITE);
|
||||
Action action = new MonteCarloUCT(new RandomMovePolicy(),1000L).getAction(gameRecord.getGameConfig(), gameState9, Player.WHITE);
|
||||
System.out.println("Suggested action for "+Player.WHITE+": " + action);
|
||||
}
|
||||
GameState gameStateCopy = new GameState(gameState);
|
||||
TerritoryMarker.markTerritory(gameStateCopy.getGameBoard());
|
||||
System.out.println(gameStateCopy);
|
||||
|
||||
gameState9.playStone(Player.WHITE, Action.PASS);
|
||||
gameState9.playStone(Player.BLACK, Action.PASS);
|
||||
assertTrue(gameState9.isTerminal());
|
||||
System.out.println(gameState9.getResult());
|
||||
assertEquals(9,gameConfig.getSize());
|
||||
assertEquals(6.0,gameConfig.getKomi(),0.1);
|
||||
|
||||
assertTrue(gameState.isTerminal());
|
||||
assertTrue(gameState.getResult().isWinner(Player.WHITE));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user