Refactoring in progress.

Player and Action classes are now singletons (factory pattern) rather than String values.
Implementing more general treesearch code for minimax, alpha-beta, monte carlo using simplified backup logic.
This commit is contained in:
cs6601
2012-08-30 08:41:03 -04:00
parent b44b666663
commit 2e40440838
26 changed files with 647 additions and 433 deletions

View File

@@ -1,5 +1,9 @@
package net.woodyfolsom.msproj.policy;
import java.util.ArrayList;
import java.util.List;
import net.woodyfolsom.msproj.Action;
public class MonteCarloUCT extends MonteCarlo {
@@ -8,27 +12,65 @@ public class MonteCarloUCT extends MonteCarlo {
}
@Override
public MonteCarloTreeNode descend(MonteCarloTreeNode node) {
public List<GameTreeNode> descend(GameTreeNode node) {
double bestScore = (double) node.getNumWins() / node.getNumVisits();
GameTreeNode bestNode = node;
//This appears slightly redundant with getBestAction() but it is not -
//descend() may pick the current node rather than a child to expand (if a child has a good score but high/low uncertainty)
//but getBestAction specifically asks for the optimum action to take from the current node,
//even if it results in a worse next state.
for (Action action : node.getActions()) {
GameTreeNode childNode = node.getChild(action);
double childScore = (double) childNode.getNumWins() / childNode.getNumVisits();
if (childScore >= bestScore) {
bestScore = childScore;
bestNode = childNode;
}
}
if (bestNode == node) {
List<GameTreeNode> bestNodeList = new ArrayList<GameTreeNode>();
bestNodeList.add(bestNode);
return bestNodeList;
} else {
return descend(bestNode);
}
}
@Override
public Action getBestAction(GameTreeNode node) {
Action bestAction = Action.NONE;
double bestScore = Double.NEGATIVE_INFINITY;
for (Action action : node.getActions()) {
GameTreeNode childNode = node.getChild(action);
double childScore = (double) childNode.getNumWins() / childNode.getNumVisits();
if (childScore >= bestScore) {
bestScore = childScore;
bestAction = action;
}
}
return bestAction;
}
@Override
public List<GameTreeNode> grow(GameTreeNode node) {
// TODO Auto-generated method stub
return null;
}
@Override
public MonteCarloTreeNode grow(MonteCarloTreeNode node) {
// TODO Auto-generated method stub
return null;
}
@Override
public int rollout(MonteCarloTreeNode node) {
public int rollout(GameTreeNode node) {
// TODO Auto-generated method stub
return 0;
}
@Override
public void update(MonteCarloTreeNode node, int reward) {
public void update(GameTreeNode node, int reward) {
// TODO Auto-generated method stub
}
}