Fixed Minimax search to use the new GameTreeNode, MinimaxProperty classes.
The previous implementation was overly complicated and may have been buggy except when searching only 2 plies ahead.
This commit is contained in:
@@ -4,6 +4,8 @@ import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import net.woodyfolsom.msproj.Action;
|
||||
import net.woodyfolsom.msproj.tree.GameTreeNode;
|
||||
import net.woodyfolsom.msproj.tree.MonteCarloProperties;
|
||||
|
||||
public class MonteCarloUCT extends MonteCarlo {
|
||||
|
||||
@@ -12,17 +14,20 @@ public class MonteCarloUCT extends MonteCarlo {
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<GameTreeNode> descend(GameTreeNode node) {
|
||||
double bestScore = (double) node.getNumWins() / node.getNumVisits();
|
||||
GameTreeNode bestNode = node;
|
||||
public List<GameTreeNode<MonteCarloProperties>> descend(GameTreeNode<MonteCarloProperties> node) {
|
||||
double bestScore = Double.NEGATIVE_INFINITY;
|
||||
GameTreeNode<MonteCarloProperties> bestNode = node;
|
||||
|
||||
//This appears slightly redundant with getBestAction() but it is not -
|
||||
//descend() may pick the current node rather than a child to expand (if a child has a good score but high/low uncertainty)
|
||||
//but getBestAction specifically asks for the optimum action to take from the current node,
|
||||
//even if it results in a worse next state.
|
||||
for (Action action : node.getActions()) {
|
||||
GameTreeNode childNode = node.getChild(action);
|
||||
double childScore = (double) childNode.getNumWins() / childNode.getNumVisits();
|
||||
GameTreeNode<MonteCarloProperties> childNode = node.getChild(action);
|
||||
|
||||
MonteCarloProperties properties = childNode.getProperties();
|
||||
double childScore = (double) properties.getWins() / properties.getVisits();
|
||||
|
||||
if (childScore >= bestScore) {
|
||||
bestScore = childScore;
|
||||
bestNode = childNode;
|
||||
@@ -30,7 +35,7 @@ public class MonteCarloUCT extends MonteCarlo {
|
||||
}
|
||||
|
||||
if (bestNode == node) {
|
||||
List<GameTreeNode> bestNodeList = new ArrayList<GameTreeNode>();
|
||||
List<GameTreeNode<MonteCarloProperties>> bestNodeList = new ArrayList<GameTreeNode<MonteCarloProperties>>();
|
||||
bestNodeList.add(bestNode);
|
||||
return bestNodeList;
|
||||
} else {
|
||||
@@ -39,13 +44,16 @@ public class MonteCarloUCT extends MonteCarlo {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Action getBestAction(GameTreeNode node) {
|
||||
public Action getBestAction(GameTreeNode<MonteCarloProperties> node) {
|
||||
Action bestAction = Action.NONE;
|
||||
double bestScore = Double.NEGATIVE_INFINITY;
|
||||
|
||||
for (Action action : node.getActions()) {
|
||||
GameTreeNode childNode = node.getChild(action);
|
||||
double childScore = (double) childNode.getNumWins() / childNode.getNumVisits();
|
||||
GameTreeNode<MonteCarloProperties> childNode = node.getChild(action);
|
||||
|
||||
MonteCarloProperties properties = childNode.getProperties();
|
||||
double childScore = (double) properties.getWins() / properties.getVisits();
|
||||
|
||||
if (childScore >= bestScore) {
|
||||
bestScore = childScore;
|
||||
bestAction = action;
|
||||
@@ -56,19 +64,19 @@ public class MonteCarloUCT extends MonteCarlo {
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<GameTreeNode> grow(GameTreeNode node) {
|
||||
public List<GameTreeNode<MonteCarloProperties>> grow(GameTreeNode<MonteCarloProperties> node) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int rollout(GameTreeNode node) {
|
||||
public int rollout(GameTreeNode<MonteCarloProperties> node) {
|
||||
// TODO Auto-generated method stub
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void update(GameTreeNode node, int reward) {
|
||||
public void update(GameTreeNode<MonteCarloProperties> node, int reward) {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user