Merge branch 'master' of woodyfolsom.net:/opt/git/cs8803p4

2012-04-30 13:41:10 -04:00
parent 3dfecec104 04c05a7396
commit 3800436cd9
35 changed files with 2510 additions and 1 deletions
--- a/src/model/comPlayer/AdaptiveComPlayer.java
+++ b/src/model/comPlayer/AdaptiveComPlayer.java
@@ -0,0 +1,123 @@
+package model.comPlayer;
+
+import model.Board;
+import model.BoardScorer;
+import model.Move;
+import model.comPlayer.generator.AlphaBetaMoveGenerator;
+import model.comPlayer.generator.MonteCarloMoveGenerator;
+import model.comPlayer.generator.MoveGenerator;
+import model.playerModel.GameGoal;
+import model.playerModel.PlayerModel;
+import aima.core.environment.gridworld.GridCell;
+import aima.core.environment.gridworld.GridWorld;
+import aima.core.environment.gridworld.GridWorldAction;
+import aima.core.environment.gridworld.GridWorldFactory;
+import aima.core.probability.example.MDPFactory;
+import aima.core.probability.mdp.MarkovDecisionProcess;
+import aima.core.probability.mdp.Policy;
+import aima.core.probability.mdp.PolicyEvaluation;
+import aima.core.probability.mdp.impl.ModifiedPolicyEvaluation;
+import aima.core.probability.mdp.search.PolicyIteration;
+
+public class AdaptiveComPlayer implements Player {
+	private final MoveGenerator abMoveGenerator = new AlphaBetaMoveGenerator();
+	private final MoveGenerator mcMoveGenerator = new MonteCarloMoveGenerator();
+
+	private BoardScorer boardScorer = new BoardScorer();
+	private boolean calculatePolicy = true;
+	private GameGoal target = null;
+	private GridWorld<Double> gw = null;
+	private MarkovDecisionProcess<GridCell<Double>, GridWorldAction> mdp = null;
+	private Policy<GridCell<Double>, GridWorldAction> policy = null;
+	private PolicyIteration<GridCell<Double>, GridWorldAction> pi = null;
+
+	@Override
+	public void denyMove() {
+		throw new UnsupportedOperationException("Not implemented");
+	}
+
+	@Override
+	public Move getMove(Board board, PlayerModel player) {
+		if (calculatePolicy) {
+			System.out.println("Calculating policy for PlayerModel: " + player);
+
+			// take 10 turns to place 6 tiles
+			double defaultPenalty = -0.25;
+
+			int maxScore = target.getTargetScore();
+			int maxTiles = Board.NUM_COLS * Board.NUM_ROWS;
+
+			gw = GridWorldFactory.createGridWorldForTileGame(maxTiles,
+					maxScore, defaultPenalty);
+			mdp = MDPFactory.createMDPForTileGame(gw, maxTiles, maxScore);
+
+			// gamma = 1.0
+			PolicyEvaluation<GridCell<Double>, GridWorldAction> pe = new ModifiedPolicyEvaluation<GridCell<Double>, GridWorldAction>(
+					50, 0.9);
+			pi = new PolicyIteration<GridCell<Double>, GridWorldAction>(pe);
+			policy = pi.policyIteration(mdp);
+
+			System.out.println("Optimum policy calculated.");
+
+			for (int j = maxScore; j >= 1; j--) {
+				StringBuilder sb = new StringBuilder();
+				for (int i = 1; i <= maxTiles; i++) {
+					sb.append(policy.action(gw.getCellAt(i, j)));
+					sb.append(" ");
+				}
+				System.out.println(sb.toString());
+			}
+
+			calculatePolicy = false;
+		} else {
+			System.out.println("Using pre-calculated policy");
+		}
+
+		GridCell<Double> state = getState(board);
+		GridWorldAction action = policy.action(state);
+
+		if (action == null || state == null) {
+			System.out
+					.println("Board state outside of parameters of MDP.  Reverting to failsafe behavior.");
+			action = GridWorldAction.RandomMove;
+		}
+		System.out.println("Performing action " + action + " at state " + state
+				+ " per policy.");
+		switch (action) {
+		case AddTile:
+			// System.out.println("Performing action #" +
+			// GridWorldAction.AddTile.ordinal());
+			return abMoveGenerator.genMove(board, false);
+		case CaptureThree:
+			// System.out.println("Performing action #" +
+			// GridWorldAction.CaptureThree.ordinal());
+			return mcMoveGenerator.genMove(board, false);
+		case RandomMove:
+			// System.out.println("Performing action #" +
+			// GridWorldAction.None.ordinal());
+			return mcMoveGenerator.genMove(board, false);
+		default:
+			// System.out.println("Performing failsafe action");
+			return mcMoveGenerator.genMove(board, false);
+		}
+	}
+
+	private GridCell<Double> getState(Board board) {
+		return gw.getCellAt(board.getTurn(), boardScorer.getScore(board));
+	}
+
+	@Override
+	public boolean isReady() {
+		return true; // always ready to play a random valid move
+	}
+
+	@Override
+	public String toString() {
+		return "Adaptive ComPlayer";
+	}
+
+	@Override
+	public void setGameGoal(GameGoal target) {
+		this.target = target;
+	}
+}
--- a/src/model/mdp/Action.java
+++ b/src/model/mdp/Action.java
@@ -0,0 +1,18 @@
+package model.mdp;
+
+public class Action {
+	public static Action playToWin = new Action("PlayToWin");
+	public static Action playToLose = new Action("PlayToLose");
+	//public static Action maintainScore = new Action();
+	
+	private final String name;
+	
+	public Action(String name) {
+		this.name = name;
+	}
+	
+	@Override
+	public String toString() {
+		return name;
+	}
+}
--- a/src/model/mdp/MDP.java
+++ b/src/model/mdp/MDP.java
@@ -0,0 +1,51 @@
+package model.mdp;
+
+public class MDP {
+	public static final double nonTerminalReward = -0.25;
+
+	public enum MODE {
+		CEIL, FLOOR
+	}
+
+	private final int maxScore;
+	private final int maxTiles;
+	private final MODE mode;
+
+	public MDP(int maxScore, int maxTiles, MODE mode) {
+		this.maxScore = maxScore;
+		this.maxTiles = maxTiles;
+		this.mode = mode;
+	}
+
+	public Action[] getActions(int i, int j) {
+		if (i == maxScore) {
+			return new Action[0];
+		}
+		if (j == maxTiles) {
+			return new Action[0];
+		}
+		return new Action[]{Action.playToLose,Action.playToWin};
+	}
+	
+	public int getMaxScore() {
+		return maxScore;
+	}
+
+	public int getMaxTiles() {
+		return maxTiles;
+	}
+
+	public double getReward(int score, int tiles) {
+		if (score == maxScore && tiles == maxTiles) {
+			return 10.0;
+		}
+		// TODO scale linearly?
+		if (score == maxScore) {
+			return -1.0;
+		}
+		if (tiles == maxTiles) {
+			return -5.0;
+		}
+		return nonTerminalReward;
+	}
+}
--- a/src/model/mdp/MDPSolver.java
+++ b/src/model/mdp/MDPSolver.java
@@ -0,0 +1,5 @@
+package model.mdp;
+
+public interface MDPSolver {
+	Policy solve(MDP mdp);
+}
--- a/src/model/mdp/Policy.java
+++ b/src/model/mdp/Policy.java
@@ -0,0 +1,7 @@
+package model.mdp;
+
+import java.util.ArrayList;
+
+public class Policy extends ArrayList<Action>{
+
+}
--- a/src/model/mdp/Transition.java
+++ b/src/model/mdp/Transition.java
@@ -0,0 +1,34 @@
+package model.mdp;
+
+public class Transition {
+	private double prob;
+	private int scoreChange;
+	private int tileCountChange;
+	
+	public Transition(double prob, int scoreChange, int tileCountChange) {
+		super();
+		this.prob = prob;
+		this.scoreChange = scoreChange;
+		this.tileCountChange = tileCountChange;
+	}
+	
+	public double getProb() {
+		return prob;
+	}
+	public void setProb(double prob) {
+		this.prob = prob;
+	}
+	public int getScoreChange() {
+		return scoreChange;
+	}
+	public void setScoreChange(int scoreChange) {
+		this.scoreChange = scoreChange;
+	}
+	public int getTileCountChange() {
+		return tileCountChange;
+	}
+	public void setTileCountChange(int tileCountChange) {
+		this.tileCountChange = tileCountChange;
+	}
+	
+}
--- a/src/model/mdp/ValueIterationSolver.java
+++ b/src/model/mdp/ValueIterationSolver.java
@@ -0,0 +1,110 @@
+package model.mdp;
+
+import java.text.DecimalFormat;
+import java.util.ArrayList;
+import java.util.List;
+
+public class ValueIterationSolver implements MDPSolver {
+	public int maxIterations = 10;
+	public final double DEFAULT_EPS = 0.1;
+	public final double GAMMA = 0.9; //discount
+	
+	private DecimalFormat fmt = new DecimalFormat("##.00");
+	public Policy solve(MDP mdp) {
+		Policy policy = new Policy();
+		
+		double[][] utility = new double[mdp.getMaxScore()+1][mdp.getMaxTiles()+1];
+		double[][] utilityPrime = new double[mdp.getMaxScore()+1][mdp.getMaxTiles()+1];
+		
+		for (int i = 0; i <= mdp.getMaxScore(); i++) {
+			//StringBuilder sb = new StringBuilder();
+			for (int j = 0; j <= mdp.getMaxTiles(); j++) {
+				utilityPrime[i][j] = mdp.getReward(i, j);
+				//sb.append(fmt.format(utility[i][j]));
+				//sb.append(" ");
+			}
+			//System.out.println(sb);
+		}
+		
+		converged:
+		for (int iteration = 0; iteration < maxIterations; iteration++) {
+			for (int i = 0; i <= mdp.getMaxScore(); i++) {
+				for (int j = 0; j <= mdp.getMaxTiles(); j++) {
+					utility[i][j] = utilityPrime[i][j];
+				}
+			}
+			for (int i = 0; i <= mdp.getMaxScore(); i++) {
+				for (int j = 0; j <= mdp.getMaxTiles(); j++) {
+					Action[] actions = mdp.getActions(i,j);
+					
+					double aMax;
+					if (actions.length > 0) {
+						aMax = Double.NEGATIVE_INFINITY;
+					} else {
+						aMax = 0;
+					}
+					
+					for (Action action : actions){
+						List<Transition> transitions = getTransitions(action,mdp,i,j);
+						double aSum = 0.0;
+						for (Transition transition : transitions) {
+							int transI = transition.getScoreChange();
+							int transJ = transition.getTileCountChange();
+							if (i+transI >= 0 && i+transI <= mdp.getMaxScore()
+									&& j+transJ >= 0 && j+transJ <= mdp.getMaxTiles())
+							aSum += utility[i+transI][j+transJ];
+						}
+						if (aSum > aMax) {
+							aMax = aSum;
+						}
+					}
+					utilityPrime[i][j] = mdp.getReward(i,j) + GAMMA * aMax;
+				}
+			}
+			double maxDiff = getMaxDiff(utility,utilityPrime);
+			System.out.println("Max diff |U - U'| = " + maxDiff);
+			if (maxDiff < DEFAULT_EPS) {
+				System.out.println("Solution to MDP converged: " + maxDiff);
+				break converged;
+			}
+		}
+		
+		for (int i = 0; i < utility.length; i++) {
+			StringBuilder sb = new StringBuilder();
+			for (int j = 0; j < utility[i].length; j++) {
+				sb.append(fmt.format(utility[i][j]));
+				sb.append(" ");
+			}
+			System.out.println(sb);
+		}
+		
+		//utility is now the utility Matrix
+		//get the policy
+		return policy;
+	}
+	
+	double getMaxDiff(double[][]u, double[][]uPrime) {
+		double maxDiff = 0;
+		for (int i = 0; i < u.length; i++) {
+			for (int j = 0; j < u[i].length; j++) {
+				maxDiff = Math.max(maxDiff,Math.abs(u[i][j] - uPrime[i][j]));
+			}
+		}
+		return maxDiff;
+	}
+	
+	private List<Transition> getTransitions(Action action, MDP mdp, int score, int tiles) {
+		List<Transition> transitions = new ArrayList<Transition>();
+		if (Action.playToWin == action) {
+			transitions.add(new Transition(0.9,1,1));
+			transitions.add(new Transition(0.1,1,-3));
+		} else if (Action.playToLose == action) {
+			transitions.add(new Transition(0.9,1,1));
+			transitions.add(new Transition(0.1,1,-3));			
+		} /*else if (Action.maintainScore == action) {
+			transitions.add(new Transition(0.5,1,1));
+			transitions.add(new Transition(0.5,1,-3));				
+		}*/
+		return transitions;
+	}
+}