Fixed invalid move by Monte Carlo UCT. Still does not handle player's PASS.

This commit is contained in:
2012-09-24 11:08:03 -04:00
parent ded284deb4
commit 8d466d90fe
20 changed files with 4368 additions and 1380 deletions

View File

@@ -7,17 +7,20 @@ public class GameState {
private int blackPrisoners = 0; private int blackPrisoners = 0;
private int whitePrisoners = 0; private int whitePrisoners = 0;
private GameBoard gameBoard; private GameBoard gameBoard;
private Player playerToMove;
public GameState(int size) { public GameState(int size) {
if (size < 1 || size > 19) { if (size < 1 || size > 19) {
throw new IllegalArgumentException("Invalid board size: " + size); throw new IllegalArgumentException("Invalid board size: " + size);
} }
gameBoard = new GameBoard(size); gameBoard = new GameBoard(size);
playerToMove = Player.BLACK;
} }
public GameState(GameState that) { public GameState(GameState that) {
this.blackPrisoners = that.blackPrisoners; this.blackPrisoners = that.blackPrisoners;
this.whitePrisoners = that.whitePrisoners; this.whitePrisoners = that.whitePrisoners;
this.playerToMove = that.playerToMove;
gameBoard = new GameBoard(that.gameBoard); gameBoard = new GameBoard(that.gameBoard);
} }
@@ -25,6 +28,7 @@ public class GameState {
blackPrisoners = 0; blackPrisoners = 0;
whitePrisoners = 0; whitePrisoners = 0;
gameBoard.clear(); gameBoard.clear();
playerToMove = Player.BLACK;
} }
public int getBlackPrisoners() { public int getBlackPrisoners() {
@@ -49,6 +53,10 @@ public class GameState {
return gameBoard; return gameBoard;
} }
public Player getPlayerToMove() {
return playerToMove;
}
public int getWhitePrisoners() { public int getWhitePrisoners() {
return whitePrisoners; return whitePrisoners;
} }
@@ -72,12 +80,16 @@ public class GameState {
* @return * @return
*/ */
public boolean playStone(Player player, Action action) { public boolean playStone(Player player, Action action) {
if (player != playerToMove) {
throw new IllegalArgumentException("Requested " + player + " move, but it is " + playerToMove +"'s turn!");
}
if (player == Player.NONE) { if (player == Player.NONE) {
throw new IllegalArgumentException("Cannot play as " + player); throw new IllegalArgumentException("Cannot play as " + player);
} }
if (action.isPass()) { if (action.isPass()) {
playerToMove = GoGame.getNextPlayer(player);
return true; return true;
} }
@@ -189,6 +201,7 @@ public class GameState {
return false; return false;
} else { } else {
//assertCorrectHash(); //assertCorrectHash();
playerToMove = GoGame.getNextPlayer(player);
return true; return true;
} }
} }

View File

@@ -139,8 +139,8 @@ public class GoGame implements Runnable {
localOutput.println("=\n"); localOutput.println("=\n");
break; break;
case genmove: case genmove:
LOGGER.info("Generating move for:\n" + gameState); System.out.println("Generating move for:\n" + gameState);
System.out.println("It is currently " + gameState.getPlayerToMove() + "'s turn.");
String playerName = cmd.getStringField(1); String playerName = cmd.getStringField(1);
Player player; Player player;
@@ -224,6 +224,8 @@ public class GoGame implements Runnable {
executeCommand(cmd); executeCommand(cmd);
System.out.println("It is now " + gameState.getPlayerToMove() + "'s turn.\n");
if (cmd.getType() == Command.TYPE.genmove) { if (cmd.getType() == Command.TYPE.genmove) {
System.out.println("New game state:\n" + gameState); System.out.println("New game state:\n" + gameState);
} }

View File

@@ -39,6 +39,9 @@ public abstract class MonteCarlo implements Policy {
Player player) { Player player) {
long startTime = System.currentTimeMillis(); long startTime = System.currentTimeMillis();
if (gameState.getPlayerToMove() != player) {
throw new RuntimeException("getAction(..." + player +") was requested but GameState.playerToMove was " + gameState.getPlayerToMove());
}
//If for some reason no moves are evaluated within the time limit, pass. //If for some reason no moves are evaluated within the time limit, pass.
//Note that this may lose the game by forfeit even when picking any random move could //Note that this may lose the game by forfeit even when picking any random move could
//result in a win. //result in a win.
@@ -51,10 +54,10 @@ public abstract class MonteCarlo implements Policy {
List<GameTreeNode<MonteCarloProperties>> selectedNodes = descend(rootNode); List<GameTreeNode<MonteCarloProperties>> selectedNodes = descend(rootNode);
List<GameTreeNode<MonteCarloProperties>> newLeaves = new ArrayList<GameTreeNode<MonteCarloProperties>>(); List<GameTreeNode<MonteCarloProperties>> newLeaves = new ArrayList<GameTreeNode<MonteCarloProperties>>();
Player nextPlayer = GoGame.getNextPlayer(player);
for (GameTreeNode<MonteCarloProperties> selectedNode: selectedNodes) { for (GameTreeNode<MonteCarloProperties> selectedNode: selectedNodes) {
for (GameTreeNode<MonteCarloProperties> newLeaf : grow(gameConfig, selectedNode, nextPlayer)) { Player playerToMove = selectedNode.getGameState().getPlayerToMove();
for (GameTreeNode<MonteCarloProperties> newLeaf : grow(gameConfig, selectedNode, playerToMove)) {
newLeaves.add(newLeaf); newLeaves.add(newLeaf);
} }
} }

View File

@@ -27,7 +27,7 @@ public class MonteCarloUCT extends MonteCarlo {
GameTreeNode<MonteCarloProperties> bestNode = node; GameTreeNode<MonteCarloProperties> bestNode = node;
//What if the optimum leaf node is actually a terminal node? //What if the optimum leaf node is actually a terminal node?
//Grom Kocsis and Szepesvari, the value of an actual terminal node is 0, so it will never be grown. //From Kocsis and Szepesvari, the value of an actual terminal node is 0, so it will never be grown.
double nodeVisits = node.getProperties().getVisits(); double nodeVisits = node.getProperties().getVisits();
@@ -74,13 +74,13 @@ public class MonteCarloUCT extends MonteCarlo {
@Override @Override
public List<GameTreeNode<MonteCarloProperties>> grow(GameConfig gameConfig, GameTreeNode<MonteCarloProperties> node, Player player) { public List<GameTreeNode<MonteCarloProperties>> grow(GameConfig gameConfig, GameTreeNode<MonteCarloProperties> node, Player player) {
GameState nextGameState = new GameState(node.getGameState());
Policy randomMovePolicy = new RandomMovePolicy(); Policy randomMovePolicy = new RandomMovePolicy();
Set<Action> exploredActions = node.getActions(); Set<Action> exploredActions = node.getActions();
Action action = randomMovePolicy.getAction(gameConfig, node.getGameState(), exploredActions, player); Action action = randomMovePolicy.getAction(gameConfig, node.getGameState(), exploredActions, player);
if (Action.NONE == action) { if (Action.NONE == action) {
throw new RuntimeException("Unable to grow node - are all actions already explored? Board state: " + node.getGameState() + "\nExplored actions: " + exploredActions); throw new RuntimeException("Unable to grow node - are all actions already explored? Board state: " + node.getGameState() + "\nExplored actions: " + exploredActions);
} }
GameState nextGameState = new GameState(node.getGameState());
nextGameState.playStone(player, action); nextGameState.playStone(player, action);
List<GameTreeNode<MonteCarloProperties>> newChildren = new ArrayList<GameTreeNode<MonteCarloProperties>>(); List<GameTreeNode<MonteCarloProperties>> newChildren = new ArrayList<GameTreeNode<MonteCarloProperties>>();
GameTreeNode<MonteCarloProperties> newChild = new GameTreeNode<MonteCarloProperties>(nextGameState,new MonteCarloProperties()); GameTreeNode<MonteCarloProperties> newChild = new GameTreeNode<MonteCarloProperties>(nextGameState,new MonteCarloProperties());
@@ -92,23 +92,23 @@ public class MonteCarloUCT extends MonteCarlo {
} }
@Override @Override
/** /**
* Rollout currently depends on the hardcoded ROLLOUT_DEPTH_LIMIT superclass parameter, * Rollout currently depends on the hardcoded ROLLOUT_DEPTH_LIMIT superclass parameter,
* since without (super)ko detection, there is no way to guarantee a rollout will terminate. * Even with super-ko detection, a rollout might take an unrealistically long time due to unlikely playouts.
* Even with ko detection, a rollout might take an unrealistically long time due to unlikely playouts.
*/ */
public int rollout(GameConfig gameConfig, StateEvaluator stateEvaluator, GameTreeNode<MonteCarloProperties> node, Player player) { public int rollout(GameConfig gameConfig, StateEvaluator stateEvaluator, GameTreeNode<MonteCarloProperties> node, Player player) {
Policy randomMovePolicy = new RandomMovePolicy(); Policy randomMovePolicy = new RandomMovePolicy();
Action action; Action action;
int rolloutDepth = 0; int rolloutDepth = 0;
GameState finalGameState = new GameState(node.getGameState()); GameState rolloutGameState = new GameState(node.getGameState());
Player currentPlayer = player; Player currentPlayer = rolloutGameState.getPlayerToMove();
do { do {
rolloutDepth++; rolloutDepth++;
action = randomMovePolicy.getAction(gameConfig, finalGameState, currentPlayer); action = randomMovePolicy.getAction(gameConfig, rolloutGameState, currentPlayer);
if (action != Action.NONE) { if (action != Action.NONE) {
if (!finalGameState.playStone(currentPlayer, action)) { if (!rolloutGameState.playStone(currentPlayer, action)) {
throw new RuntimeException("Failed to play move selected by RandomMovePolicy"); throw new RuntimeException("Failed to play move selected by RandomMovePolicy");
} }
currentPlayer = GoGame.getNextPlayer(currentPlayer); currentPlayer = GoGame.getNextPlayer(currentPlayer);
@@ -117,7 +117,7 @@ public class MonteCarloUCT extends MonteCarlo {
numStateEvaluations++; numStateEvaluations++;
if (stateEvaluator.scoreGame(finalGameState).isWinner(player)) { if (stateEvaluator.scoreGame(rolloutGameState).isWinner(player)) {
return 1; return 1;
} else { } else {
return 0; return 0;

View File

@@ -34,6 +34,10 @@ public class RandomMovePolicy implements Policy, ActionGenerator {
*/ */
public List<Action> getActions(GameConfig gameConfig, GameState gameState, public List<Action> getActions(GameConfig gameConfig, GameState gameState,
Collection<Action> prohibitedMoves, Player player, int nMoves) { Collection<Action> prohibitedMoves, Player player, int nMoves) {
if (player != gameState.getPlayerToMove()) {
throw new IllegalArgumentException("It is not " + player + "'s turn to move!");
}
GameState gameStateCopy = new GameState(gameState); GameState gameStateCopy = new GameState(gameState);
ActionGenerator actionGenerator = new ValidMoveGenerator(); ActionGenerator actionGenerator = new ValidMoveGenerator();

View File

@@ -1,5 +1,3 @@
T__19=19
T__20=20
T__21=21 T__21=21
T__22=22 T__22=22
T__23=23 T__23=23
@@ -19,39 +17,53 @@ T__36=36
T__37=37 T__37=37
T__38=38 T__38=38
T__39=39 T__39=39
T__40=40
T__41=41
T__42=42
T__43=43
T__44=44
T__45=45
T__46=46
COLON=4 COLON=4
COMMA=5 COMMA=5
DIGIT=6 CR=6
LBRACKET=7 DIGIT=7
LCLETTER=8 LBRACKET=8
LPAREN=9 LCLETTER=9
MINUS=10 LPAREN=10
PERIOD=11 MINUS=11
PLUS=12 NEWLINE=12
RBRACKET=13 PERIOD=13
RPAREN=14 PLUS=14
SEMICOLON=15 RBRACKET=15
SLASH=16 RPAREN=16
SPACE=17 SEMICOLON=17
UCLETTER=18 SLASH=18
'AB'=19 SPACE=19
'AP'=20 UCLETTER=20
'BC'=21 'AB'=21
'BR'=22 'AP'=22
'CA'=23 'AW'=23
'CP'=24 'B'=24
'EV'=25 'BC'=25
'FF'=26 'BR'=26
'GM'=27 'CA'=27
'KM'=28 'CP'=28
'PB'=29 'DT'=29
'PC'=30 'EV'=30
'PW'=31 'FF'=31
'RE'=32 'GM'=32
'RU'=33 'KM'=33
'SO'=34 'PB'=34
'SZ'=35 'PC'=35
'TM'=36 'PW'=36
'US'=37 'R'=37
'WC'=38 'RE'=38
'WR'=39 'RU'=39
'SO'=40
'SZ'=41
'TM'=42
'US'=43
'W'=44
'WC'=45
'WR'=46

View File

@@ -0,0 +1,27 @@
package net.woodyfolsom.msproj.sgf;
public class SGFCoord {
private char column;
private char row;
public SGFCoord(String coords) {
if (coords == null || coords.length() != 2) {
throw new IllegalArgumentException(coords);
}
column = coords.charAt(0);
row = coords.charAt(1);
}
public SGFCoord(char column, char row) {
this.column = column;
this.row = row;
}
public char getColumn() {
return column;
}
public char getRow() {
return row;
}
}

View File

@@ -0,0 +1,35 @@
package net.woodyfolsom.msproj.sgf;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class SGFGameTree {
private List<SGFNode> nodeSequence = new ArrayList<SGFNode>();
private List<SGFGameTree> subTrees = new ArrayList<SGFGameTree>();
public int getNodeCount() {
return nodeSequence.size();
}
public List<SGFNode> getNodeSequence() {
return Collections.unmodifiableList(nodeSequence);
}
public void setNodeSequence(List<SGFNode> nodeSequence) {
this.nodeSequence.clear();
for(SGFNode node : nodeSequence) {
this.nodeSequence.add(node);
}
}
public void addSubTree(SGFGameTree subTree) {
subTrees.add(subTree);
}
public int getSubTreeCount() {
return subTrees.size();
}
}

View File

@@ -0,0 +1,26 @@
package net.woodyfolsom.msproj.sgf;
public class SGFIdentifier {
public static final SGFIdentifier ADD_BLACK = new SGFIdentifier("AB");
public static final SGFIdentifier ADD_WHITE = new SGFIdentifier("AW");
public static final SGFIdentifier CHARSET = new SGFIdentifier("CA");
public static final SGFIdentifier FILE_FORMAT = new SGFIdentifier("FF");
public static final SGFIdentifier GAME = new SGFIdentifier("GM");
public static final SGFIdentifier KOMI = new SGFIdentifier("KM");
public static final SGFIdentifier MOVE_BLACK = new SGFIdentifier("B");
public static final SGFIdentifier MOVE_WHITE = new SGFIdentifier("W");
public static final SGFIdentifier RESULT = new SGFIdentifier("RE");
public static final SGFIdentifier SIZE = new SGFIdentifier("SZ");
public static final SGFIdentifier TIME = new SGFIdentifier("TM");
private String text;
private SGFIdentifier(String value) {
this.text = value.toString();
}
@Override
public String toString() {
return text;
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,16 @@
package net.woodyfolsom.msproj.sgf;
import java.util.ArrayList;
import java.util.List;
public class SGFNode {
private List<SGFProperty> properties = new ArrayList<SGFProperty>();
public void addProperty(SGFProperty property) {
properties.add(property);
}
public int getPropertyCount() {
return properties.size();
}
}

View File

@@ -0,0 +1,25 @@
package net.woodyfolsom.msproj.sgf;
import java.util.ArrayList;
import java.util.List;
public class SGFNodeCollection {
private List<SGFGameTree> gameTrees = new ArrayList<SGFGameTree>();
public void add(SGFGameTree gameTree) {
gameTrees.add(gameTree);
}
public SGFGameTree getGameTree(int index) {
return gameTrees.get(index);
}
public int getGameTreeCount() {
return gameTrees.size();
}
@Override
public String toString() {
return "foo";
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,37 @@
package net.woodyfolsom.msproj.sgf;
public class SGFPlayer {
public static final SGFPlayer BLACK = new SGFPlayer("Black");
public static final SGFPlayer WHITE = new SGFPlayer("White");
private String color;
public static SGFPlayer getInstance(String color) {
if ("B".equals(color)) {
return BLACK;
} else if ("W".equals(color)) {
return WHITE;
} else {
throw new IllegalArgumentException(color);
}
}
private SGFPlayer(String color) {
this.color = color;
}
public String getColor() {
return color;
}
@Override
public String toString() {
if (this == BLACK) {
return "B";
} else if (this == WHITE){
return "W";
} else {
throw new RuntimeException("Invalid player");
}
}
}

View File

@@ -1,22 +1,26 @@
package net.woodyfolsom.msproj.sgf; package net.woodyfolsom.msproj.sgf;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class SGFProperty { public class SGFProperty {
private StrIdent ident; private SGFIdentifier ident;
private StrValue value; private List<SGFValue<?>> values = new ArrayList<SGFValue<?>>();
public StrIdent getIdent() { public void addValue(SGFValue<?> value) {
this.values.add(value);
}
public SGFIdentifier getIdentifier() {
return ident; return ident;
} }
public StrValue getValue() { public List<SGFValue<?>> getValues() {
return value; return Collections.unmodifiableList(values);
} }
public void setIdent(StrIdent ident) { public void setIdentifier(SGFIdentifier ident) {
this.ident = ident; this.ident = ident;
} }
public void setValue(StrValue value) {
this.value = value;
}
} }

View File

@@ -0,0 +1,31 @@
package net.woodyfolsom.msproj.sgf;
public class SGFResult {
private boolean resignation = false;
private boolean tie;
private double score;
private SGFPlayer winner;
public SGFResult(String value) {
String[] params = value.split("\\+");
winner = SGFPlayer.getInstance(params[0]);
if ("R".equals(params[1])) {
resignation = true;
} else {
score = Double.parseDouble(params[1]);
}
}
@Override
public String toString() {
if (resignation == false && tie == false) {
return winner.toString() + "+" + score;
} else if (resignation == true && tie == false) {
return winner.toString() + "+R";
} else {
throw new UnsupportedOperationException("Not implemented");
}
}
}

View File

@@ -0,0 +1,21 @@
package net.woodyfolsom.msproj.sgf;
public class SGFValue<T> {
public static final SGFValue<String> EMPTY = new SGFValue<String>("");
private String text;
private T value;
public SGFValue(T value) {
this.text = value.toString();
this.value = value;
}
public String getText() {
return text;
}
public T getValue() {
return value;
}
}

View File

@@ -6,6 +6,7 @@ import java.util.Set;
import net.woodyfolsom.msproj.Action; import net.woodyfolsom.msproj.Action;
import net.woodyfolsom.msproj.GameState; import net.woodyfolsom.msproj.GameState;
import net.woodyfolsom.msproj.Player;
public class GameTreeNode<T extends GameTreeNodeProperties> { public class GameTreeNode<T extends GameTreeNodeProperties> {
private GameState gameState; private GameState gameState;

View File

@@ -1,5 +1,6 @@
package net.woodyfolsom.msproj; package net.woodyfolsom.msproj;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
import org.junit.Test; import org.junit.Test;
@@ -9,8 +10,14 @@ public class LegalMoveTest {
public void testLegalMove1Liberty() { public void testLegalMove1Liberty() {
GameState gameState = new GameState(5); GameState gameState = new GameState(5);
gameState.playStone(Player.BLACK, Action.getInstance("A2")); gameState.playStone(Player.BLACK, Action.getInstance("A2"));
assertEquals(Player.WHITE, gameState.getPlayerToMove());
gameState.playStone(Player.WHITE, Action.PASS);
gameState.playStone(Player.BLACK, Action.getInstance("B3")); gameState.playStone(Player.BLACK, Action.getInstance("B3"));
gameState.playStone(Player.WHITE, Action.PASS);
gameState.playStone(Player.BLACK, Action.getInstance("B1")); gameState.playStone(Player.BLACK, Action.getInstance("B1"));
assertTrue(gameState.playStone(Player.WHITE, Action.getInstance("B2"))); assertTrue(gameState.playStone(Player.WHITE, Action.getInstance("B2")));
System.out.println(gameState); System.out.println(gameState);
} }

View File

@@ -1,6 +1,7 @@
package net.woodyfolsom.msproj.policy; package net.woodyfolsom.msproj.policy;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import net.woodyfolsom.msproj.Action; import net.woodyfolsom.msproj.Action;
import net.woodyfolsom.msproj.GameConfig; import net.woodyfolsom.msproj.GameConfig;
@@ -54,4 +55,25 @@ public class MonteCarloUCTTest {
System.out.println(gameState); System.out.println(gameState);
} }
@Test
public void testIllegalMoveRejection() {
Policy treeSearch = new MonteCarloUCT(new RandomMovePolicy(),2000L);
GameState gameState = new GameState(4);
gameState.playStone(Player.WHITE, Action.getInstance("A2"));
gameState.playStone(Player.WHITE, Action.getInstance("B1"));
gameState.playStone(Player.WHITE, Action.getInstance("C2"));
gameState.playStone(Player.WHITE, Action.getInstance("B3"));
Action move;
for (int i = 0; i < 10; i++) {
move = treeSearch.getAction(new GameConfig(), gameState, Player.BLACK);
System.out.println("Generated move: " + move);
GameState stateCopy = new GameState(gameState);
stateCopy.playStone(Player.BLACK, move);
System.out.println(stateCopy);
assertFalse(Action.getInstance("B2").equals(move));
}
}
} }