Fixed Minimax search to use the new GameTreeNode, MinimaxProperty classes.

The previous implementation was overly complicated and may have been buggy except when searching only 2 plies ahead.
2012-08-30 10:51:04 -04:00
parent 2e40440838
commit 4a1c64843d
12 changed files with 249 additions and 153 deletions
--- a/src/net/woodyfolsom/msproj/policy/MonteCarloUCT.java
+++ b/src/net/woodyfolsom/msproj/policy/MonteCarloUCT.java
@@ -4,6 +4,8 @@ import java.util.ArrayList;
 import java.util.List;

 import net.woodyfolsom.msproj.Action;
+import net.woodyfolsom.msproj.tree.GameTreeNode;
+import net.woodyfolsom.msproj.tree.MonteCarloProperties;

 public class MonteCarloUCT extends MonteCarlo {
 	
@@ -12,17 +14,20 @@ public class MonteCarloUCT extends MonteCarlo {
 	}

 	@Override
-	public List<GameTreeNode> descend(GameTreeNode node) {
-		double bestScore = (double) node.getNumWins() / node.getNumVisits();
-		GameTreeNode bestNode = node;
+	public List<GameTreeNode<MonteCarloProperties>> descend(GameTreeNode<MonteCarloProperties> node) {
+		double bestScore = Double.NEGATIVE_INFINITY;
+		GameTreeNode<MonteCarloProperties> bestNode = node;
 		
 		//This appears slightly redundant with getBestAction() but it is not -
 		//descend() may pick the current node rather than a child to expand (if a child has a good score but high/low uncertainty)
 		//but getBestAction specifically asks for the optimum action to take from the current node,
 		//even if it results in a worse next state.
 		for (Action action : node.getActions()) {
-			GameTreeNode childNode = node.getChild(action);
-			double childScore = (double) childNode.getNumWins() / childNode.getNumVisits();
+			GameTreeNode<MonteCarloProperties> childNode = node.getChild(action);
+			
+			MonteCarloProperties properties = childNode.getProperties();
+			double childScore = (double) properties.getWins() / properties.getVisits();
+			
 			if (childScore >= bestScore) {
 				bestScore = childScore;
 				bestNode = childNode;
@@ -30,7 +35,7 @@ public class MonteCarloUCT extends MonteCarlo {
 		}
 		
 		if (bestNode == node) {
-			List<GameTreeNode> bestNodeList = new ArrayList<GameTreeNode>();
+			List<GameTreeNode<MonteCarloProperties>> bestNodeList = new ArrayList<GameTreeNode<MonteCarloProperties>>();
 			bestNodeList.add(bestNode);
 			return bestNodeList;
 		} else {
@@ -39,13 +44,16 @@ public class MonteCarloUCT extends MonteCarlo {
 	}

 	@Override
-	public Action getBestAction(GameTreeNode node) {
+	public Action getBestAction(GameTreeNode<MonteCarloProperties> node) {
 		Action bestAction = Action.NONE;
 		double bestScore = Double.NEGATIVE_INFINITY;
 		
 		for (Action action : node.getActions()) {
-			GameTreeNode childNode = node.getChild(action);
-			double childScore = (double) childNode.getNumWins() / childNode.getNumVisits();
+			GameTreeNode<MonteCarloProperties> childNode = node.getChild(action);
+			
+			MonteCarloProperties properties = childNode.getProperties();
+			double childScore = (double) properties.getWins() / properties.getVisits();
+			
 			if (childScore >= bestScore) {
 				bestScore = childScore;
 				bestAction = action;
@@ -56,19 +64,19 @@ public class MonteCarloUCT extends MonteCarlo {
 	}

 	@Override
-	public List<GameTreeNode> grow(GameTreeNode node) {
+	public List<GameTreeNode<MonteCarloProperties>> grow(GameTreeNode<MonteCarloProperties> node) {
 		// TODO Auto-generated method stub
 		return null;
 	}

 	@Override
-	public int rollout(GameTreeNode node) {
+	public int rollout(GameTreeNode<MonteCarloProperties> node) {
 		// TODO Auto-generated method stub
 		return 0;
 	}

 	@Override
-	public void update(GameTreeNode node, int reward) {
+	public void update(GameTreeNode<MonteCarloProperties> node, int reward) {
 		// TODO Auto-generated method stub
 		
 	}