Fixed Minimax search to use the new GameTreeNode, MinimaxProperty classes.

The previous implementation was overly complicated and may have been buggy except when searching only 2 plies ahead.
This commit is contained in:
cs6601
2012-08-30 10:51:04 -04:00
parent 2e40440838
commit 4a1c64843d
12 changed files with 249 additions and 153 deletions

View File

@@ -4,6 +4,8 @@ import java.util.ArrayList;
import java.util.List;
import net.woodyfolsom.msproj.Action;
import net.woodyfolsom.msproj.tree.GameTreeNode;
import net.woodyfolsom.msproj.tree.MonteCarloProperties;
public class MonteCarloUCT extends MonteCarlo {
@@ -12,17 +14,20 @@ public class MonteCarloUCT extends MonteCarlo {
}
@Override
public List<GameTreeNode> descend(GameTreeNode node) {
double bestScore = (double) node.getNumWins() / node.getNumVisits();
GameTreeNode bestNode = node;
public List<GameTreeNode<MonteCarloProperties>> descend(GameTreeNode<MonteCarloProperties> node) {
double bestScore = Double.NEGATIVE_INFINITY;
GameTreeNode<MonteCarloProperties> bestNode = node;
//This appears slightly redundant with getBestAction() but it is not -
//descend() may pick the current node rather than a child to expand (if a child has a good score but high/low uncertainty)
//but getBestAction specifically asks for the optimum action to take from the current node,
//even if it results in a worse next state.
for (Action action : node.getActions()) {
GameTreeNode childNode = node.getChild(action);
double childScore = (double) childNode.getNumWins() / childNode.getNumVisits();
GameTreeNode<MonteCarloProperties> childNode = node.getChild(action);
MonteCarloProperties properties = childNode.getProperties();
double childScore = (double) properties.getWins() / properties.getVisits();
if (childScore >= bestScore) {
bestScore = childScore;
bestNode = childNode;
@@ -30,7 +35,7 @@ public class MonteCarloUCT extends MonteCarlo {
}
if (bestNode == node) {
List<GameTreeNode> bestNodeList = new ArrayList<GameTreeNode>();
List<GameTreeNode<MonteCarloProperties>> bestNodeList = new ArrayList<GameTreeNode<MonteCarloProperties>>();
bestNodeList.add(bestNode);
return bestNodeList;
} else {
@@ -39,13 +44,16 @@ public class MonteCarloUCT extends MonteCarlo {
}
@Override
public Action getBestAction(GameTreeNode node) {
public Action getBestAction(GameTreeNode<MonteCarloProperties> node) {
Action bestAction = Action.NONE;
double bestScore = Double.NEGATIVE_INFINITY;
for (Action action : node.getActions()) {
GameTreeNode childNode = node.getChild(action);
double childScore = (double) childNode.getNumWins() / childNode.getNumVisits();
GameTreeNode<MonteCarloProperties> childNode = node.getChild(action);
MonteCarloProperties properties = childNode.getProperties();
double childScore = (double) properties.getWins() / properties.getVisits();
if (childScore >= bestScore) {
bestScore = childScore;
bestAction = action;
@@ -56,19 +64,19 @@ public class MonteCarloUCT extends MonteCarlo {
}
@Override
public List<GameTreeNode> grow(GameTreeNode node) {
public List<GameTreeNode<MonteCarloProperties>> grow(GameTreeNode<MonteCarloProperties> node) {
// TODO Auto-generated method stub
return null;
}
@Override
public int rollout(GameTreeNode node) {
public int rollout(GameTreeNode<MonteCarloProperties> node) {
// TODO Auto-generated method stub
return 0;
}
@Override
public void update(GameTreeNode node, int reward) {
public void update(GameTreeNode<MonteCarloProperties> node, int reward) {
// TODO Auto-generated method stub
}