Refactoring in progress.
Player and Action classes are now singletons (factory pattern) rather than String values. Implementing more general treesearch code for minimax, alpha-beta, monte carlo using simplified backup logic.
This commit is contained in:
@@ -1,5 +1,9 @@
|
||||
package net.woodyfolsom.msproj.policy;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import net.woodyfolsom.msproj.Action;
|
||||
|
||||
public class MonteCarloUCT extends MonteCarlo {
|
||||
|
||||
@@ -8,27 +12,65 @@ public class MonteCarloUCT extends MonteCarlo {
|
||||
}
|
||||
|
||||
@Override
|
||||
public MonteCarloTreeNode descend(MonteCarloTreeNode node) {
|
||||
public List<GameTreeNode> descend(GameTreeNode node) {
|
||||
double bestScore = (double) node.getNumWins() / node.getNumVisits();
|
||||
GameTreeNode bestNode = node;
|
||||
|
||||
//This appears slightly redundant with getBestAction() but it is not -
|
||||
//descend() may pick the current node rather than a child to expand (if a child has a good score but high/low uncertainty)
|
||||
//but getBestAction specifically asks for the optimum action to take from the current node,
|
||||
//even if it results in a worse next state.
|
||||
for (Action action : node.getActions()) {
|
||||
GameTreeNode childNode = node.getChild(action);
|
||||
double childScore = (double) childNode.getNumWins() / childNode.getNumVisits();
|
||||
if (childScore >= bestScore) {
|
||||
bestScore = childScore;
|
||||
bestNode = childNode;
|
||||
}
|
||||
}
|
||||
|
||||
if (bestNode == node) {
|
||||
List<GameTreeNode> bestNodeList = new ArrayList<GameTreeNode>();
|
||||
bestNodeList.add(bestNode);
|
||||
return bestNodeList;
|
||||
} else {
|
||||
return descend(bestNode);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Action getBestAction(GameTreeNode node) {
|
||||
Action bestAction = Action.NONE;
|
||||
double bestScore = Double.NEGATIVE_INFINITY;
|
||||
|
||||
for (Action action : node.getActions()) {
|
||||
GameTreeNode childNode = node.getChild(action);
|
||||
double childScore = (double) childNode.getNumWins() / childNode.getNumVisits();
|
||||
if (childScore >= bestScore) {
|
||||
bestScore = childScore;
|
||||
bestAction = action;
|
||||
}
|
||||
}
|
||||
|
||||
return bestAction;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<GameTreeNode> grow(GameTreeNode node) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MonteCarloTreeNode grow(MonteCarloTreeNode node) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int rollout(MonteCarloTreeNode node) {
|
||||
public int rollout(GameTreeNode node) {
|
||||
// TODO Auto-generated method stub
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void update(MonteCarloTreeNode node, int reward) {
|
||||
public void update(GameTreeNode node, int reward) {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user