Merge branch 'master' of woodyfolsom.net:/opt/git/cs8803p4
This commit is contained in:
123
src/model/comPlayer/AdaptiveComPlayer.java
Normal file
123
src/model/comPlayer/AdaptiveComPlayer.java
Normal file
@@ -0,0 +1,123 @@
|
||||
package model.comPlayer;
|
||||
|
||||
import model.Board;
|
||||
import model.BoardScorer;
|
||||
import model.Move;
|
||||
import model.comPlayer.generator.AlphaBetaMoveGenerator;
|
||||
import model.comPlayer.generator.MonteCarloMoveGenerator;
|
||||
import model.comPlayer.generator.MoveGenerator;
|
||||
import model.playerModel.GameGoal;
|
||||
import model.playerModel.PlayerModel;
|
||||
import aima.core.environment.gridworld.GridCell;
|
||||
import aima.core.environment.gridworld.GridWorld;
|
||||
import aima.core.environment.gridworld.GridWorldAction;
|
||||
import aima.core.environment.gridworld.GridWorldFactory;
|
||||
import aima.core.probability.example.MDPFactory;
|
||||
import aima.core.probability.mdp.MarkovDecisionProcess;
|
||||
import aima.core.probability.mdp.Policy;
|
||||
import aima.core.probability.mdp.PolicyEvaluation;
|
||||
import aima.core.probability.mdp.impl.ModifiedPolicyEvaluation;
|
||||
import aima.core.probability.mdp.search.PolicyIteration;
|
||||
|
||||
public class AdaptiveComPlayer implements Player {
|
||||
private final MoveGenerator abMoveGenerator = new AlphaBetaMoveGenerator();
|
||||
private final MoveGenerator mcMoveGenerator = new MonteCarloMoveGenerator();
|
||||
|
||||
private BoardScorer boardScorer = new BoardScorer();
|
||||
private boolean calculatePolicy = true;
|
||||
private GameGoal target = null;
|
||||
private GridWorld<Double> gw = null;
|
||||
private MarkovDecisionProcess<GridCell<Double>, GridWorldAction> mdp = null;
|
||||
private Policy<GridCell<Double>, GridWorldAction> policy = null;
|
||||
private PolicyIteration<GridCell<Double>, GridWorldAction> pi = null;
|
||||
|
||||
@Override
|
||||
public void denyMove() {
|
||||
throw new UnsupportedOperationException("Not implemented");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Move getMove(Board board, PlayerModel player) {
|
||||
if (calculatePolicy) {
|
||||
System.out.println("Calculating policy for PlayerModel: " + player);
|
||||
|
||||
// take 10 turns to place 6 tiles
|
||||
double defaultPenalty = -0.25;
|
||||
|
||||
int maxScore = target.getTargetScore();
|
||||
int maxTiles = Board.NUM_COLS * Board.NUM_ROWS;
|
||||
|
||||
gw = GridWorldFactory.createGridWorldForTileGame(maxTiles,
|
||||
maxScore, defaultPenalty);
|
||||
mdp = MDPFactory.createMDPForTileGame(gw, maxTiles, maxScore);
|
||||
|
||||
// gamma = 1.0
|
||||
PolicyEvaluation<GridCell<Double>, GridWorldAction> pe = new ModifiedPolicyEvaluation<GridCell<Double>, GridWorldAction>(
|
||||
50, 0.9);
|
||||
pi = new PolicyIteration<GridCell<Double>, GridWorldAction>(pe);
|
||||
policy = pi.policyIteration(mdp);
|
||||
|
||||
System.out.println("Optimum policy calculated.");
|
||||
|
||||
for (int j = maxScore; j >= 1; j--) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int i = 1; i <= maxTiles; i++) {
|
||||
sb.append(policy.action(gw.getCellAt(i, j)));
|
||||
sb.append(" ");
|
||||
}
|
||||
System.out.println(sb.toString());
|
||||
}
|
||||
|
||||
calculatePolicy = false;
|
||||
} else {
|
||||
System.out.println("Using pre-calculated policy");
|
||||
}
|
||||
|
||||
GridCell<Double> state = getState(board);
|
||||
GridWorldAction action = policy.action(state);
|
||||
|
||||
if (action == null || state == null) {
|
||||
System.out
|
||||
.println("Board state outside of parameters of MDP. Reverting to failsafe behavior.");
|
||||
action = GridWorldAction.RandomMove;
|
||||
}
|
||||
System.out.println("Performing action " + action + " at state " + state
|
||||
+ " per policy.");
|
||||
switch (action) {
|
||||
case AddTile:
|
||||
// System.out.println("Performing action #" +
|
||||
// GridWorldAction.AddTile.ordinal());
|
||||
return abMoveGenerator.genMove(board, false);
|
||||
case CaptureThree:
|
||||
// System.out.println("Performing action #" +
|
||||
// GridWorldAction.CaptureThree.ordinal());
|
||||
return mcMoveGenerator.genMove(board, false);
|
||||
case RandomMove:
|
||||
// System.out.println("Performing action #" +
|
||||
// GridWorldAction.None.ordinal());
|
||||
return mcMoveGenerator.genMove(board, false);
|
||||
default:
|
||||
// System.out.println("Performing failsafe action");
|
||||
return mcMoveGenerator.genMove(board, false);
|
||||
}
|
||||
}
|
||||
|
||||
private GridCell<Double> getState(Board board) {
|
||||
return gw.getCellAt(board.getTurn(), boardScorer.getScore(board));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isReady() {
|
||||
return true; // always ready to play a random valid move
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Adaptive ComPlayer";
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setGameGoal(GameGoal target) {
|
||||
this.target = target;
|
||||
}
|
||||
}
|
||||
18
src/model/mdp/Action.java
Normal file
18
src/model/mdp/Action.java
Normal file
@@ -0,0 +1,18 @@
|
||||
package model.mdp;
|
||||
|
||||
public class Action {
|
||||
public static Action playToWin = new Action("PlayToWin");
|
||||
public static Action playToLose = new Action("PlayToLose");
|
||||
//public static Action maintainScore = new Action();
|
||||
|
||||
private final String name;
|
||||
|
||||
public Action(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return name;
|
||||
}
|
||||
}
|
||||
51
src/model/mdp/MDP.java
Normal file
51
src/model/mdp/MDP.java
Normal file
@@ -0,0 +1,51 @@
|
||||
package model.mdp;
|
||||
|
||||
public class MDP {
|
||||
public static final double nonTerminalReward = -0.25;
|
||||
|
||||
public enum MODE {
|
||||
CEIL, FLOOR
|
||||
}
|
||||
|
||||
private final int maxScore;
|
||||
private final int maxTiles;
|
||||
private final MODE mode;
|
||||
|
||||
public MDP(int maxScore, int maxTiles, MODE mode) {
|
||||
this.maxScore = maxScore;
|
||||
this.maxTiles = maxTiles;
|
||||
this.mode = mode;
|
||||
}
|
||||
|
||||
public Action[] getActions(int i, int j) {
|
||||
if (i == maxScore) {
|
||||
return new Action[0];
|
||||
}
|
||||
if (j == maxTiles) {
|
||||
return new Action[0];
|
||||
}
|
||||
return new Action[]{Action.playToLose,Action.playToWin};
|
||||
}
|
||||
|
||||
public int getMaxScore() {
|
||||
return maxScore;
|
||||
}
|
||||
|
||||
public int getMaxTiles() {
|
||||
return maxTiles;
|
||||
}
|
||||
|
||||
public double getReward(int score, int tiles) {
|
||||
if (score == maxScore && tiles == maxTiles) {
|
||||
return 10.0;
|
||||
}
|
||||
// TODO scale linearly?
|
||||
if (score == maxScore) {
|
||||
return -1.0;
|
||||
}
|
||||
if (tiles == maxTiles) {
|
||||
return -5.0;
|
||||
}
|
||||
return nonTerminalReward;
|
||||
}
|
||||
}
|
||||
5
src/model/mdp/MDPSolver.java
Normal file
5
src/model/mdp/MDPSolver.java
Normal file
@@ -0,0 +1,5 @@
|
||||
package model.mdp;
|
||||
|
||||
public interface MDPSolver {
|
||||
Policy solve(MDP mdp);
|
||||
}
|
||||
7
src/model/mdp/Policy.java
Normal file
7
src/model/mdp/Policy.java
Normal file
@@ -0,0 +1,7 @@
|
||||
package model.mdp;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
public class Policy extends ArrayList<Action>{
|
||||
|
||||
}
|
||||
34
src/model/mdp/Transition.java
Normal file
34
src/model/mdp/Transition.java
Normal file
@@ -0,0 +1,34 @@
|
||||
package model.mdp;
|
||||
|
||||
public class Transition {
|
||||
private double prob;
|
||||
private int scoreChange;
|
||||
private int tileCountChange;
|
||||
|
||||
public Transition(double prob, int scoreChange, int tileCountChange) {
|
||||
super();
|
||||
this.prob = prob;
|
||||
this.scoreChange = scoreChange;
|
||||
this.tileCountChange = tileCountChange;
|
||||
}
|
||||
|
||||
public double getProb() {
|
||||
return prob;
|
||||
}
|
||||
public void setProb(double prob) {
|
||||
this.prob = prob;
|
||||
}
|
||||
public int getScoreChange() {
|
||||
return scoreChange;
|
||||
}
|
||||
public void setScoreChange(int scoreChange) {
|
||||
this.scoreChange = scoreChange;
|
||||
}
|
||||
public int getTileCountChange() {
|
||||
return tileCountChange;
|
||||
}
|
||||
public void setTileCountChange(int tileCountChange) {
|
||||
this.tileCountChange = tileCountChange;
|
||||
}
|
||||
|
||||
}
|
||||
110
src/model/mdp/ValueIterationSolver.java
Normal file
110
src/model/mdp/ValueIterationSolver.java
Normal file
@@ -0,0 +1,110 @@
|
||||
package model.mdp;
|
||||
|
||||
import java.text.DecimalFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class ValueIterationSolver implements MDPSolver {
|
||||
public int maxIterations = 10;
|
||||
public final double DEFAULT_EPS = 0.1;
|
||||
public final double GAMMA = 0.9; //discount
|
||||
|
||||
private DecimalFormat fmt = new DecimalFormat("##.00");
|
||||
public Policy solve(MDP mdp) {
|
||||
Policy policy = new Policy();
|
||||
|
||||
double[][] utility = new double[mdp.getMaxScore()+1][mdp.getMaxTiles()+1];
|
||||
double[][] utilityPrime = new double[mdp.getMaxScore()+1][mdp.getMaxTiles()+1];
|
||||
|
||||
for (int i = 0; i <= mdp.getMaxScore(); i++) {
|
||||
//StringBuilder sb = new StringBuilder();
|
||||
for (int j = 0; j <= mdp.getMaxTiles(); j++) {
|
||||
utilityPrime[i][j] = mdp.getReward(i, j);
|
||||
//sb.append(fmt.format(utility[i][j]));
|
||||
//sb.append(" ");
|
||||
}
|
||||
//System.out.println(sb);
|
||||
}
|
||||
|
||||
converged:
|
||||
for (int iteration = 0; iteration < maxIterations; iteration++) {
|
||||
for (int i = 0; i <= mdp.getMaxScore(); i++) {
|
||||
for (int j = 0; j <= mdp.getMaxTiles(); j++) {
|
||||
utility[i][j] = utilityPrime[i][j];
|
||||
}
|
||||
}
|
||||
for (int i = 0; i <= mdp.getMaxScore(); i++) {
|
||||
for (int j = 0; j <= mdp.getMaxTiles(); j++) {
|
||||
Action[] actions = mdp.getActions(i,j);
|
||||
|
||||
double aMax;
|
||||
if (actions.length > 0) {
|
||||
aMax = Double.NEGATIVE_INFINITY;
|
||||
} else {
|
||||
aMax = 0;
|
||||
}
|
||||
|
||||
for (Action action : actions){
|
||||
List<Transition> transitions = getTransitions(action,mdp,i,j);
|
||||
double aSum = 0.0;
|
||||
for (Transition transition : transitions) {
|
||||
int transI = transition.getScoreChange();
|
||||
int transJ = transition.getTileCountChange();
|
||||
if (i+transI >= 0 && i+transI <= mdp.getMaxScore()
|
||||
&& j+transJ >= 0 && j+transJ <= mdp.getMaxTiles())
|
||||
aSum += utility[i+transI][j+transJ];
|
||||
}
|
||||
if (aSum > aMax) {
|
||||
aMax = aSum;
|
||||
}
|
||||
}
|
||||
utilityPrime[i][j] = mdp.getReward(i,j) + GAMMA * aMax;
|
||||
}
|
||||
}
|
||||
double maxDiff = getMaxDiff(utility,utilityPrime);
|
||||
System.out.println("Max diff |U - U'| = " + maxDiff);
|
||||
if (maxDiff < DEFAULT_EPS) {
|
||||
System.out.println("Solution to MDP converged: " + maxDiff);
|
||||
break converged;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < utility.length; i++) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int j = 0; j < utility[i].length; j++) {
|
||||
sb.append(fmt.format(utility[i][j]));
|
||||
sb.append(" ");
|
||||
}
|
||||
System.out.println(sb);
|
||||
}
|
||||
|
||||
//utility is now the utility Matrix
|
||||
//get the policy
|
||||
return policy;
|
||||
}
|
||||
|
||||
double getMaxDiff(double[][]u, double[][]uPrime) {
|
||||
double maxDiff = 0;
|
||||
for (int i = 0; i < u.length; i++) {
|
||||
for (int j = 0; j < u[i].length; j++) {
|
||||
maxDiff = Math.max(maxDiff,Math.abs(u[i][j] - uPrime[i][j]));
|
||||
}
|
||||
}
|
||||
return maxDiff;
|
||||
}
|
||||
|
||||
private List<Transition> getTransitions(Action action, MDP mdp, int score, int tiles) {
|
||||
List<Transition> transitions = new ArrayList<Transition>();
|
||||
if (Action.playToWin == action) {
|
||||
transitions.add(new Transition(0.9,1,1));
|
||||
transitions.add(new Transition(0.1,1,-3));
|
||||
} else if (Action.playToLose == action) {
|
||||
transitions.add(new Transition(0.9,1,1));
|
||||
transitions.add(new Transition(0.1,1,-3));
|
||||
} /*else if (Action.maintainScore == action) {
|
||||
transitions.add(new Transition(0.5,1,1));
|
||||
transitions.add(new Transition(0.5,1,-3));
|
||||
}*/
|
||||
return transitions;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user