Refactoring in progress.

Player and Action classes are now singletons (factory pattern) rather than String values. Implementing more general treesearch code for minimax, alpha-beta, monte carlo using simplified backup logic.
2012-08-30 08:41:03 -04:00
parent b44b666663
commit 2e40440838
26 changed files with 647 additions and 433 deletions
--- a/src/net/woodyfolsom/msproj/policy/MonteCarloUCT.java
+++ b/src/net/woodyfolsom/msproj/policy/MonteCarloUCT.java
@@ -1,5 +1,9 @@
 package net.woodyfolsom.msproj.policy;

+import java.util.ArrayList;
+import java.util.List;
+
+import net.woodyfolsom.msproj.Action;

 public class MonteCarloUCT extends MonteCarlo {
 	
@@ -8,27 +12,65 @@ public class MonteCarloUCT extends MonteCarlo {
 	}

 	@Override
-	public MonteCarloTreeNode descend(MonteCarloTreeNode node) {
+	public List<GameTreeNode> descend(GameTreeNode node) {
+		double bestScore = (double) node.getNumWins() / node.getNumVisits();
+		GameTreeNode bestNode = node;
+		
+		//This appears slightly redundant with getBestAction() but it is not -
+		//descend() may pick the current node rather than a child to expand (if a child has a good score but high/low uncertainty)
+		//but getBestAction specifically asks for the optimum action to take from the current node,
+		//even if it results in a worse next state.
+		for (Action action : node.getActions()) {
+			GameTreeNode childNode = node.getChild(action);
+			double childScore = (double) childNode.getNumWins() / childNode.getNumVisits();
+			if (childScore >= bestScore) {
+				bestScore = childScore;
+				bestNode = childNode;
+			}
+		}
+		
+		if (bestNode == node) {
+			List<GameTreeNode> bestNodeList = new ArrayList<GameTreeNode>();
+			bestNodeList.add(bestNode);
+			return bestNodeList;
+		} else {
+			return descend(bestNode);
+		}
+	}
+
+	@Override
+	public Action getBestAction(GameTreeNode node) {
+		Action bestAction = Action.NONE;
+		double bestScore = Double.NEGATIVE_INFINITY;
+		
+		for (Action action : node.getActions()) {
+			GameTreeNode childNode = node.getChild(action);
+			double childScore = (double) childNode.getNumWins() / childNode.getNumVisits();
+			if (childScore >= bestScore) {
+				bestScore = childScore;
+				bestAction = action;
+			}
+		}
+		
+		return bestAction;
+	}
+
+	@Override
+	public List<GameTreeNode> grow(GameTreeNode node) {
 		// TODO Auto-generated method stub
 		return null;
 	}

 	@Override
-	public MonteCarloTreeNode grow(MonteCarloTreeNode node) {
-		// TODO Auto-generated method stub
-		return null;
-	}
-
-	@Override
-	public int rollout(MonteCarloTreeNode node) {
+	public int rollout(GameTreeNode node) {
 		// TODO Auto-generated method stub
 		return 0;
 	}

 	@Override
-	public void update(MonteCarloTreeNode node, int reward) {
+	public void update(GameTreeNode node, int reward) {
 		// TODO Auto-generated method stub
 		
 	}
-
+	
 }