From 8f92ae65d8aae6fa3e08c82a6dfd832136a0b6e6 Mon Sep 17 00:00:00 2001 From: Woody Folsom Date: Mon, 30 Apr 2012 17:37:37 -0400 Subject: [PATCH] Fixed unit tests, changed MDP generation to more reasonably seek the goal state, avoiding premature end of game. Removed unused google-code classes. Regenerate policy when AdaptiveComPlayer.setTarget() is called. --- src/aima/core/environment/cellworld/Cell.java | 87 ----------- .../core/environment/cellworld/CellWorld.java | 123 --------------- .../cellworld/CellWorldAction.java | 142 ------------------ .../cellworld/CellWorldFactory.java | 27 ---- .../gridworld/GridWorldFactory.java | 7 +- .../core/probability/example/MDPFactory.java | 133 +--------------- src/model/comPlayer/AdaptiveComPlayer.java | 1 + src/model/mdp/Action.java | 18 --- src/model/mdp/MDP.java | 51 ------- src/model/mdp/MDPSolver.java | 5 - src/model/mdp/Policy.java | 7 - src/model/mdp/Transition.java | 34 ----- src/model/mdp/ValueIterationSolver.java | 110 -------------- test/PlayerModel.dat | Bin 0 -> 419207 bytes .../mdp/MarkovDecisionProcessTest.java | 74 +++------ .../probability/mdp/PolicyIterationTest.java | 45 ++---- .../probability/mdp/ValueIterationTest.java | 64 -------- .../probability/mdp/ValueIterationTest2.java | 38 ++--- test/model/mdp/ValueIterationSolverTest.java | 26 ---- 19 files changed, 53 insertions(+), 939 deletions(-) delete mode 100644 src/aima/core/environment/cellworld/Cell.java delete mode 100644 src/aima/core/environment/cellworld/CellWorld.java delete mode 100644 src/aima/core/environment/cellworld/CellWorldAction.java delete mode 100644 src/aima/core/environment/cellworld/CellWorldFactory.java delete mode 100644 src/model/mdp/Action.java delete mode 100644 src/model/mdp/MDP.java delete mode 100644 src/model/mdp/MDPSolver.java delete mode 100644 src/model/mdp/Policy.java delete mode 100644 src/model/mdp/Transition.java delete mode 100644 src/model/mdp/ValueIterationSolver.java create mode 100644 test/PlayerModel.dat delete mode 100644 test/aima/core/probability/mdp/ValueIterationTest.java delete mode 100644 test/model/mdp/ValueIterationSolverTest.java diff --git a/src/aima/core/environment/cellworld/Cell.java b/src/aima/core/environment/cellworld/Cell.java deleted file mode 100644 index fa6c4ea..0000000 --- a/src/aima/core/environment/cellworld/Cell.java +++ /dev/null @@ -1,87 +0,0 @@ -package aima.core.environment.cellworld; - -/** - * Artificial Intelligence A Modern Approach (3rd Edition): page 645.
- *
- * A representation of a Cell in the environment detailed in Figure 17.1. - * - * @param - * the content type of the cell. - * - * @author Ciaran O'Reilly - * @author Ravi Mohan - */ -public class Cell { - private int x = 1; - private int y = 1; - private C content = null; - - /** - * Construct a Cell. - * - * @param x - * the x position of the cell. - * @param y - * the y position of the cell. - * @param content - * the initial content of the cell. - */ - public Cell(int x, int y, C content) { - this.x = x; - this.y = y; - this.content = content; - } - - /** - * - * @return the x position of the cell. - */ - public int getX() { - return x; - } - - /** - * - * @return the y position of the cell. - */ - public int getY() { - return y; - } - - /** - * - * @return the content of the cell. - */ - public C getContent() { - return content; - } - - /** - * Set the cell's content. - * - * @param content - * the content to be placed in the cell. - */ - public void setContent(C content) { - this.content = content; - } - - @Override - public String toString() { - return ""; - } - - @Override - public boolean equals(Object o) { - if (o instanceof Cell) { - Cell c = (Cell) o; - return x == c.x && y == c.y && content.equals(c.content); - } - return false; - } - - @Override - public int hashCode() { - return x + 23 + y + 31 * content.hashCode(); - } -} diff --git a/src/aima/core/environment/cellworld/CellWorld.java b/src/aima/core/environment/cellworld/CellWorld.java deleted file mode 100644 index 20d8a78..0000000 --- a/src/aima/core/environment/cellworld/CellWorld.java +++ /dev/null @@ -1,123 +0,0 @@ -package aima.core.environment.cellworld; - -import java.util.HashMap; -import java.util.LinkedHashSet; -import java.util.Map; -import java.util.Set; - -/** - * Artificial Intelligence A Modern Approach (3rd Edition): page 645.
- *
- * - * A representation for the environment depicted in figure 17.1.
- *
- * Note: the x and y coordinates are always positive integers starting at - * 1.
- * Note: If looking at a rectangle - the coordinate (x=1, y=1) will be the - * bottom left hand corner.
- * - * - * @param - * the type of content for the Cells in the world. - * - * @author Ciaran O'Reilly - * @author Ravi Mohan - */ -public class CellWorld { - private Set> cells = new LinkedHashSet>(); - private Map>> cellLookup = new HashMap>>(); - - /** - * Construct a Cell World with size xDimension * y Dimension cells, all with - * their values set to a default content value. - * - * @param xDimension - * the size of the x dimension. - * @param yDimension - * the size of the y dimension. - * - * @param defaultCellContent - * the default content to assign to each cell created. - */ - public CellWorld(int xDimension, int yDimension, C defaultCellContent) { - for (int x = 1; x <= xDimension; x++) { - Map> xCol = new HashMap>(); - for (int y = 1; y <= yDimension; y++) { - Cell c = new Cell(x, y, defaultCellContent); - cells.add(c); - xCol.put(y, c); - } - cellLookup.put(x, xCol); - } - } - - /** - * - * @return all the cells in this world. - */ - public Set> getCells() { - return cells; - } - - /** - * Determine what cell would be moved into if the specified action is - * performed in the specified cell. Normally, this will be the cell adjacent - * in the appropriate direction. However, if there is no cell in the - * adjacent direction of the action then the outcome of the action is to - * stay in the same cell as the action was performed in. - * - * @param s - * the cell location from which the action is to be performed. - * @param a - * the action to perform (Up, Down, Left, or Right). - * @return the Cell an agent would end up in if they performed the specified - * action from the specified cell location. - */ - public Cell result(Cell s, CellWorldAction a) { - Cell sDelta = getCellAt(a.getXResult(s.getX()), a.getYResult(s - .getY())); - if (null == sDelta) { - // Default to no effect - // (i.e. bumps back in place as no adjoining cell). - sDelta = s; - } - - return sDelta; - } - - /** - * Remove the cell at the specified location from this Cell World. This - * allows you to introduce barriers into different location. - * - * @param x - * the x dimension of the cell to be removed. - * @param y - * the y dimension of the cell to be removed. - */ - public void removeCell(int x, int y) { - Map> xCol = cellLookup.get(x); - if (null != xCol) { - cells.remove(xCol.remove(y)); - } - } - - /** - * Get the cell at the specified x and y locations. - * - * @param x - * the x dimension of the cell to be retrieved. - * @param y - * the y dimension of the cell to be retrieved. - * @return the cell at the specified x,y location, null if no cell exists at - * this location. - */ - public Cell getCellAt(int x, int y) { - Cell c = null; - Map> xCol = cellLookup.get(x); - if (null != xCol) { - c = xCol.get(y); - } - - return c; - } -} diff --git a/src/aima/core/environment/cellworld/CellWorldAction.java b/src/aima/core/environment/cellworld/CellWorldAction.java deleted file mode 100644 index ae14bd8..0000000 --- a/src/aima/core/environment/cellworld/CellWorldAction.java +++ /dev/null @@ -1,142 +0,0 @@ -package aima.core.environment.cellworld; - -import java.util.LinkedHashSet; -import java.util.Set; - -import aima.core.agent.Action; - -/** - * Artificial Intelligence A Modern Approach (3rd Edition): page 645.
- *
- * - * The actions in every state are Up, Down, Left, and Right.
- *
- * Note: Moving 'North' causes y to increase by 1, 'Down' y to decrease by - * 1, 'Left' x to decrease by 1, and 'Right' x to increase by 1 within a Cell - * World. - * - * @author Ciaran O'Reilly - * - */ -public enum CellWorldAction implements Action { - Up, Down, Left, Right, None; - - private static final Set _actions = new LinkedHashSet(); - static { - _actions.add(Up); - _actions.add(Down); - _actions.add(Left); - _actions.add(Right); - _actions.add(None); - } - - /** - * - * @return a set of the actual actions. - */ - public static final Set actions() { - return _actions; - } - - // - // START-Action - //@Override - //public boolean isNoOp() { - // if (None == this) { - // return true; - // } - // return false; - //} - // END-Action - // - - /** - * - * @param curX - * the current x position. - * @return the result on the x position of applying this action. - */ - public int getXResult(int curX) { - int newX = curX; - - switch (this) { - case Left: - newX--; - break; - case Right: - newX++; - break; - } - - return newX; - } - - /** - * - * @param curY - * the current y position. - * @return the result on the y position of applying this action. - */ - public int getYResult(int curY) { - int newY = curY; - - switch (this) { - case Up: - newY++; - break; - case Down: - newY--; - break; - } - - return newY; - } - - /** - * - * @return the first right angled action related to this action. - */ - public CellWorldAction getFirstRightAngledAction() { - CellWorldAction a = null; - - switch (this) { - case Up: - case Down: - a = Left; - break; - case Left: - case Right: - a = Down; - break; - case None: - a = None; - break; - } - - return a; - } - - /** - * - * @return the second right angled action related to this action. - */ - public CellWorldAction getSecondRightAngledAction() { - CellWorldAction a = null; - - switch (this) { - case Up: - case Down: - a = Right; - break; - case Left: - case Right: - a = Up; - break; - case None: - a = None; - break; - } - - return a; - } -} diff --git a/src/aima/core/environment/cellworld/CellWorldFactory.java b/src/aima/core/environment/cellworld/CellWorldFactory.java deleted file mode 100644 index 16ad6ac..0000000 --- a/src/aima/core/environment/cellworld/CellWorldFactory.java +++ /dev/null @@ -1,27 +0,0 @@ -package aima.core.environment.cellworld; - -/** - * - * @author Ciaran O'Reilly - * - */ -public class CellWorldFactory { - - /** - * Create the cell world as defined in Figure 17.1 in AIMA3e. (a) A simple 4 - * x 3 environment that presents the agent with a sequential decision - * problem. - * - * @return a cell world representation of Fig 17.1 in AIMA3e. - */ - public static CellWorld createCellWorldForFig17_1() { - CellWorld cw = new CellWorld(4, 3, -0.04); - - cw.removeCell(2, 2); - - cw.getCellAt(4, 3).setContent(1.0); - cw.getCellAt(4, 2).setContent(-1.0); - - return cw; - } -} \ No newline at end of file diff --git a/src/aima/core/environment/gridworld/GridWorldFactory.java b/src/aima/core/environment/gridworld/GridWorldFactory.java index 35afccf..0d5f767 100644 --- a/src/aima/core/environment/gridworld/GridWorldFactory.java +++ b/src/aima/core/environment/gridworld/GridWorldFactory.java @@ -17,7 +17,12 @@ public class GridWorldFactory { GridWorld cw = new GridWorld(maxTiles, maxScore, nonTerminalReward); cw.getCellAt(maxTiles, maxScore).setContent(1.0); - + for (int score = 1; score < maxScore; score++) { + cw.getCellAt(maxTiles, score).setContent(-0.2); + } + for (int tiles = 1; tiles < maxTiles; tiles++) { + cw.getCellAt(tiles, maxScore).setContent(-0.2); + } return cw; } } \ No newline at end of file diff --git a/src/aima/core/probability/example/MDPFactory.java b/src/aima/core/probability/example/MDPFactory.java index 5494a73..ab21fb0 100644 --- a/src/aima/core/probability/example/MDPFactory.java +++ b/src/aima/core/probability/example/MDPFactory.java @@ -6,9 +6,6 @@ import java.util.HashSet; import java.util.List; import java.util.Set; -import aima.core.environment.cellworld.Cell; -import aima.core.environment.cellworld.CellWorld; -import aima.core.environment.cellworld.CellWorldAction; import aima.core.environment.gridworld.GridCell; import aima.core.environment.gridworld.GridWorld; import aima.core.environment.gridworld.GridWorldAction; @@ -19,30 +16,11 @@ import aima.core.probability.mdp.TransitionProbabilityFunction; import aima.core.probability.mdp.impl.MDP; /** - * - * @author Ciaran O'Reilly - * @author Ravi Mohan + * Based on MDPFactory by Ciaran O'Reilly and Ravi Mohan. + * @author Woody */ public class MDPFactory { - /** - * Constructs an MDP that can be used to generate the utility values - * detailed in Fig 17.3. - * - * @param cw - * the cell world from figure 17.1. - * @return an MDP that can be used to generate the utility values detailed - * in Fig 17.3. - */ - public static MarkovDecisionProcess, CellWorldAction> createMDPForFigure17_3( - final CellWorld cw) { - - return new MDP, CellWorldAction>(cw.getCells(), - cw.getCellAt(1, 1), createActionsFunctionForFigure17_1(cw), - createTransitionProbabilityFunctionForFigure17_1(cw), - createRewardFunctionForFigure17_1()); - } - public static MarkovDecisionProcess, GridWorldAction> createMDPForTileGame( final GridWorld cw, int maxTiles, int maxScore) { @@ -52,36 +30,6 @@ public class MDPFactory { createRewardFunctionForTileGame()); } - /** - * Returns the allowed actions from a specified cell within the cell world - * described in Fig 17.1. - * - * @param cw - * the cell world from figure 17.1. - * @return the set of actions allowed at a particular cell. This set will be - * empty if at a terminal state. - */ - public static ActionsFunction, CellWorldAction> createActionsFunctionForFigure17_1( - final CellWorld cw) { - final Set> terminals = new HashSet>(); - terminals.add(cw.getCellAt(4, 3)); - terminals.add(cw.getCellAt(4, 2)); - - ActionsFunction, CellWorldAction> af = new ActionsFunction, CellWorldAction>() { - - @Override - public Set actions(Cell s) { - // All actions can be performed in each cell - // (except terminal states) - if (terminals.contains(s)) { - return Collections.emptySet(); - } - return CellWorldAction.actions(); - } - }; - return af; - } - public static ActionsFunction, GridWorldAction> createActionsFunctionForTileGame( final GridWorld cw, int maxTiles, int maxScore) { final Set> terminals = new HashSet>(); @@ -102,59 +50,6 @@ public class MDPFactory { return af; } - - /** - * Figure 17.1 (b) Illustration of the transition model of the environment: - * the 'intended' outcome occurs with probability 0.8, but with probability - * 0.2 the agent moves at right angles to the intended direction. A - * collision with a wall results in no movement. - * - * @param cw - * the cell world from figure 17.1. - * @return the transition probability function as described in figure 17.1. - */ - public static TransitionProbabilityFunction, CellWorldAction> createTransitionProbabilityFunctionForFigure17_1( - final CellWorld cw) { - TransitionProbabilityFunction, CellWorldAction> tf = new TransitionProbabilityFunction, CellWorldAction>() { - private double[] distribution = new double[] { 0.8, 0.1, 0.1 }; - - @Override - public double probability(Cell sDelta, Cell s, - CellWorldAction a) { - double prob = 0; - - List> outcomes = possibleOutcomes(s, a); - for (int i = 0; i < outcomes.size(); i++) { - if (sDelta.equals(outcomes.get(i))) { - // Note: You have to sum the matches to - // sDelta as the different actions - // could have the same effect (i.e. - // staying in place due to there being - // no adjacent cells), which increases - // the probability of the transition for - // that state. - prob += distribution[i]; - } - } - - return prob; - } - - private List> possibleOutcomes(Cell c, - CellWorldAction a) { - // There can be three possible outcomes for the planned action - List> outcomes = new ArrayList>(); - - outcomes.add(cw.result(c, a)); - outcomes.add(cw.result(c, a.getFirstRightAngledAction())); - outcomes.add(cw.result(c, a.getSecondRightAngledAction())); - - return outcomes; - } - }; - - return tf; - } public static TransitionProbabilityFunction, GridWorldAction> createTransitionProbabilityFunctionForTileGame( final GridWorld cw) { @@ -170,13 +65,6 @@ public class MDPFactory { List> outcomes = possibleOutcomes(s, a); for (int i = 0; i < outcomes.size(); i++) { if (sDelta.equals(outcomes.get(i))) { - // Note: You have to sum the matches to - // sDelta as the different actions - // could have the same effect (i.e. - // staying in place due to there being - // no adjacent cells), which increases - // the probability of the transition for - // that state. prob += distribution[i]; } } @@ -198,7 +86,7 @@ public class MDPFactory { } private List> possibleOutcomes(GridCell c, GridWorldAction a) { - // There can be three possible outcomes for the planned action + List> outcomes = new ArrayList>(); switch (a) { @@ -224,21 +112,6 @@ public class MDPFactory { return tf; } - /** - * - * @return the reward function which takes the content of the cell as being - * the reward value. - */ - public static RewardFunction> createRewardFunctionForFigure17_1() { - RewardFunction> rf = new RewardFunction>() { - @Override - public double reward(Cell s) { - return s.getContent(); - } - }; - return rf; - } - public static RewardFunction> createRewardFunctionForTileGame() { RewardFunction> rf = new RewardFunction>() { @Override diff --git a/src/model/comPlayer/AdaptiveComPlayer.java b/src/model/comPlayer/AdaptiveComPlayer.java index 5413f1b..03725c5 100644 --- a/src/model/comPlayer/AdaptiveComPlayer.java +++ b/src/model/comPlayer/AdaptiveComPlayer.java @@ -119,5 +119,6 @@ public class AdaptiveComPlayer implements Player { @Override public void setGameGoal(GameGoal target) { this.target = target; + this.calculatePolicy = true; } } \ No newline at end of file diff --git a/src/model/mdp/Action.java b/src/model/mdp/Action.java deleted file mode 100644 index cda7b6d..0000000 --- a/src/model/mdp/Action.java +++ /dev/null @@ -1,18 +0,0 @@ -package model.mdp; - -public class Action { - public static Action playToWin = new Action("PlayToWin"); - public static Action playToLose = new Action("PlayToLose"); - //public static Action maintainScore = new Action(); - - private final String name; - - public Action(String name) { - this.name = name; - } - - @Override - public String toString() { - return name; - } -} diff --git a/src/model/mdp/MDP.java b/src/model/mdp/MDP.java deleted file mode 100644 index fe534b7..0000000 --- a/src/model/mdp/MDP.java +++ /dev/null @@ -1,51 +0,0 @@ -package model.mdp; - -public class MDP { - public static final double nonTerminalReward = -0.25; - - public enum MODE { - CEIL, FLOOR - } - - private final int maxScore; - private final int maxTiles; - private final MODE mode; - - public MDP(int maxScore, int maxTiles, MODE mode) { - this.maxScore = maxScore; - this.maxTiles = maxTiles; - this.mode = mode; - } - - public Action[] getActions(int i, int j) { - if (i == maxScore) { - return new Action[0]; - } - if (j == maxTiles) { - return new Action[0]; - } - return new Action[]{Action.playToLose,Action.playToWin}; - } - - public int getMaxScore() { - return maxScore; - } - - public int getMaxTiles() { - return maxTiles; - } - - public double getReward(int score, int tiles) { - if (score == maxScore && tiles == maxTiles) { - return 10.0; - } - // TODO scale linearly? - if (score == maxScore) { - return -1.0; - } - if (tiles == maxTiles) { - return -5.0; - } - return nonTerminalReward; - } -} \ No newline at end of file diff --git a/src/model/mdp/MDPSolver.java b/src/model/mdp/MDPSolver.java deleted file mode 100644 index 812fed2..0000000 --- a/src/model/mdp/MDPSolver.java +++ /dev/null @@ -1,5 +0,0 @@ -package model.mdp; - -public interface MDPSolver { - Policy solve(MDP mdp); -} diff --git a/src/model/mdp/Policy.java b/src/model/mdp/Policy.java deleted file mode 100644 index 66b9b0c..0000000 --- a/src/model/mdp/Policy.java +++ /dev/null @@ -1,7 +0,0 @@ -package model.mdp; - -import java.util.ArrayList; - -public class Policy extends ArrayList{ - -} diff --git a/src/model/mdp/Transition.java b/src/model/mdp/Transition.java deleted file mode 100644 index 5148b8f..0000000 --- a/src/model/mdp/Transition.java +++ /dev/null @@ -1,34 +0,0 @@ -package model.mdp; - -public class Transition { - private double prob; - private int scoreChange; - private int tileCountChange; - - public Transition(double prob, int scoreChange, int tileCountChange) { - super(); - this.prob = prob; - this.scoreChange = scoreChange; - this.tileCountChange = tileCountChange; - } - - public double getProb() { - return prob; - } - public void setProb(double prob) { - this.prob = prob; - } - public int getScoreChange() { - return scoreChange; - } - public void setScoreChange(int scoreChange) { - this.scoreChange = scoreChange; - } - public int getTileCountChange() { - return tileCountChange; - } - public void setTileCountChange(int tileCountChange) { - this.tileCountChange = tileCountChange; - } - -} \ No newline at end of file diff --git a/src/model/mdp/ValueIterationSolver.java b/src/model/mdp/ValueIterationSolver.java deleted file mode 100644 index 35e9d87..0000000 --- a/src/model/mdp/ValueIterationSolver.java +++ /dev/null @@ -1,110 +0,0 @@ -package model.mdp; - -import java.text.DecimalFormat; -import java.util.ArrayList; -import java.util.List; - -public class ValueIterationSolver implements MDPSolver { - public int maxIterations = 10; - public final double DEFAULT_EPS = 0.1; - public final double GAMMA = 0.9; //discount - - private DecimalFormat fmt = new DecimalFormat("##.00"); - public Policy solve(MDP mdp) { - Policy policy = new Policy(); - - double[][] utility = new double[mdp.getMaxScore()+1][mdp.getMaxTiles()+1]; - double[][] utilityPrime = new double[mdp.getMaxScore()+1][mdp.getMaxTiles()+1]; - - for (int i = 0; i <= mdp.getMaxScore(); i++) { - //StringBuilder sb = new StringBuilder(); - for (int j = 0; j <= mdp.getMaxTiles(); j++) { - utilityPrime[i][j] = mdp.getReward(i, j); - //sb.append(fmt.format(utility[i][j])); - //sb.append(" "); - } - //System.out.println(sb); - } - - converged: - for (int iteration = 0; iteration < maxIterations; iteration++) { - for (int i = 0; i <= mdp.getMaxScore(); i++) { - for (int j = 0; j <= mdp.getMaxTiles(); j++) { - utility[i][j] = utilityPrime[i][j]; - } - } - for (int i = 0; i <= mdp.getMaxScore(); i++) { - for (int j = 0; j <= mdp.getMaxTiles(); j++) { - Action[] actions = mdp.getActions(i,j); - - double aMax; - if (actions.length > 0) { - aMax = Double.NEGATIVE_INFINITY; - } else { - aMax = 0; - } - - for (Action action : actions){ - List transitions = getTransitions(action,mdp,i,j); - double aSum = 0.0; - for (Transition transition : transitions) { - int transI = transition.getScoreChange(); - int transJ = transition.getTileCountChange(); - if (i+transI >= 0 && i+transI <= mdp.getMaxScore() - && j+transJ >= 0 && j+transJ <= mdp.getMaxTiles()) - aSum += utility[i+transI][j+transJ]; - } - if (aSum > aMax) { - aMax = aSum; - } - } - utilityPrime[i][j] = mdp.getReward(i,j) + GAMMA * aMax; - } - } - double maxDiff = getMaxDiff(utility,utilityPrime); - System.out.println("Max diff |U - U'| = " + maxDiff); - if (maxDiff < DEFAULT_EPS) { - System.out.println("Solution to MDP converged: " + maxDiff); - break converged; - } - } - - for (int i = 0; i < utility.length; i++) { - StringBuilder sb = new StringBuilder(); - for (int j = 0; j < utility[i].length; j++) { - sb.append(fmt.format(utility[i][j])); - sb.append(" "); - } - System.out.println(sb); - } - - //utility is now the utility Matrix - //get the policy - return policy; - } - - double getMaxDiff(double[][]u, double[][]uPrime) { - double maxDiff = 0; - for (int i = 0; i < u.length; i++) { - for (int j = 0; j < u[i].length; j++) { - maxDiff = Math.max(maxDiff,Math.abs(u[i][j] - uPrime[i][j])); - } - } - return maxDiff; - } - - private List getTransitions(Action action, MDP mdp, int score, int tiles) { - List transitions = new ArrayList(); - if (Action.playToWin == action) { - transitions.add(new Transition(0.9,1,1)); - transitions.add(new Transition(0.1,1,-3)); - } else if (Action.playToLose == action) { - transitions.add(new Transition(0.9,1,1)); - transitions.add(new Transition(0.1,1,-3)); - } /*else if (Action.maintainScore == action) { - transitions.add(new Transition(0.5,1,1)); - transitions.add(new Transition(0.5,1,-3)); - }*/ - return transitions; - } -} \ No newline at end of file diff --git a/test/PlayerModel.dat b/test/PlayerModel.dat new file mode 100644 index 0000000000000000000000000000000000000000..71609408e3866791aab473ffad3703e7a6885252 GIT binary patch literal 419207 zcmeI5afnv;p{9?a>vBjvX9`2zWv5dp1kKgzUQ0yzDxhLdUR)R_3RtlyEpc<@90_o>y5oXAN=*QKm59C zehe8}wY^q7`LkDl{?mWe{+~_xhK5)^B_L zrC;>*ZrS#F{ii*#eOKS_|F}1+foe!s_1MnAPrR>sx_SJrzAZg3z1rKm{?}bwcJ{s8 zzoY&a|62WX*zbP#yZ^qc_sh?_1yuRG+rRpGzwocmkNKyMK74a%RSo_L7iz$h67c`} zaetsZY+*Iz)rHmLD;6*R=`R-lN!P;q_dC0)$7_Jyy<7S=?ri;p7q8#Bxo`c3o{jY< z-0}Xq<;SUxhs__?@kR5``crtSAZaiEitS&1#8ZDh@`dM?{Pp!ms-cUjCwjK8?_RY2 zSAE-iU#lMP+g$%A-Mqc0yZ%Ev2mg?F-fFA=`EmWTyZ+-Q{*iy9FaK){eoR%>tr@bq zYU6XSiPRtbjls{Q{O4(3xP4dssqg!DHSOzum|Dz)N2d{u_ zfAnugg~{;zD^7_=wEmrDXn95}GFq9@s*F}= zv?imq8Li7`Lq^>hZOUkKMq4xL$!J?fJ2L9cs4t`488w%MFZV(@xA)|u_GUDY(Y}oK zXLKN=gBcyl=`>5^jgBXo{@@%*Mn9#&AF(eRW#4A6cQ?$(?vx?Vo&RZ zqts+?Mgtk`%V>W_2QoUC(V>jq&ggJPM>0B^(XotFGzGYvaIR!@H6s-b!g~qlenu*~ z`K#UJMB|F4N-7!>DjE_h8WJb+(&35*j*5oF-K>dL(M-xnMN=hjbVI@$-H>Q* zu6d)|8{MT5&2`lk4T;ZFCwDSZ(M@kABpMY>m3%H5605Q%t20v3@KVu`@J2TzR5T=1 zbR*w!c?lh#k&31_a}$n=29An`L|@iqcSb52UMd1Y=$wpH zG-uGtgrlN?qoN^kG;89Qivj0@grlN?qoP%`imv}=vMjF>%QIS$k&5P8(wlHpG;mZj zBrau5R5WmICma>6qE)nt{{71;mfI*jd6n3f(TC&FjjEqz?CzXnZ z#MZ2diUy8~hJ=cS#3y;esAv_fqU+lzi}Ku9oY9hux-wGHTya!1Bo1axR5Wl@G$d5C z?-$+b<7s)}a7A;vsAx!ZXH7O`v^gUc4MG(SiE~*K6%8B}t)f+QJ((BgRl*g`RboTJ zQPIHJmvHuHbRZ)Y4MG(Si5poH6|JIGw2J=93yG}DtAvW?5;~A@R5Wl-C!8}Goy|x^ zgHT0xJS;4#s^j;Uq`|ff@8}#1-+8O8{^!T_PrstO<$e0R(OvpczRKSl-H>oaLqbJE zLPbO3dj2)!!;C)4NJXpY*2z3IOX73UoM+zXhJ-5`60T@SoXSgwiUy8~hQ$4>$%EFY zfAF_$9q+$eewsA%A*Xh^7NNZid6Mn$XW z*2z3COQNDVgH$vmc4tlcGg8s;I+1WxG;mZjBvf?c;|Y1XxS}bEiiX6htci*Sj*5oF zv8>7Qj8rtdt|S~44ICA%qFX2P+&o=WG^dM-hD2Z1L`4HfMML6)tjXn!R5ZNqBpem3 zqE&SL&0s6?+*p~Biso9Pq9JiKYoeloqoN^kJ8SZJMk>1T@%TjJjc%&siiU)WhD2|k zFe(~2DjE`(vL-4TI4Zi4@7OHKxQtXZy_u76R5Wl@G$d3sB#z_>qoRSMq9O5F)8bu!P)lBj6TAQcUXo~+5Xj8rtdR5T>s%bKWY;Hc=v z$D^|(u4qc4q9L&~YoeloqoN^kC~NX|Mk*R!DjE`>W=&MIif*0EGqNNqnlng6Lt<;z zL`4HfMML6z*5pD)DjHrYy7BR-EQu?clBj4%bY)FcG;mZjBo1axR5Wl@G$cOBn%vAt zMXTu6$=seLQPG@KDjE`-vnDDUI4T+v=dva$8aOI?pc>xMS#VVEywz48KdygvzgfSo z(}?ViE4pwha8xuTmSjy-G;mZjBo1UvR5Wl@G$d|hO;ogsZr#VzvLw?pQqi1LDjE`- zvL-4TI4T+vXR{_M8aOJt<6&V@RUKb6{~T-iDjGN{8WN|nCZ{t}(eP5yka*Bq6BS*+sOZ+42d-#3v9|SeDNn(xXyEKk zI4T-ADjE{kvL@FvQqk~I(T$HMXGvVqlte{CLPbMjPo6L;8aO8tj*143iiX6!tjYb1 zRJ4k2oy?tC5*5u!rJ^CBq9Ji2PZ$*qoT~{(MFU4gH}ajBC7G0wE1KS@Xh^Kdny6^t zsAx#2Xh>Yi6GlY?=WfDL(JH!iAJ5B@%+E+gb5f~jNHkxfGV-gh2cV*XqoN_9q9Ji7 zud6D$@$rO2rDQqk~I(U3TvHBr&PQPGf4(T$JC=jo!NDaqV~qoRSM zq9M_jHQAk!iiVephQ#HpiHZh}idNCBx3K5r>7t@JT~;O>6%8B}4T+;!lVcgFXn3h; zNPM0(QPC<|Mc3bwygbj16&a~$t|h$*M@0iiMML6J*5reXR5ZL)bmQZ3SrTt_QxaD+ zBvdpccH{}8qJeWH;izcfsAx!hmNmJZk&0H)t&@3nmPAE!QmJT2sAxzW&J#vO1LtDG zQPIFr(T#k^WJ$(m&I4T-ADjE`7vnD+msc3kqXh^7NNZia5Mn$XW)?3)^SrQe^ z8Pt_j*3>%t^0UtmSkE+Dw>l@MMFYG zLtYony6^t zsOZMWld~i$nv%GpA+a`VqN0JbH{qyg;G9Z0DjGN{8WQ)jCMsG*x8BU}%#x^R&L9;H zi9J~p6%Cw|2}eZ(=UT#1(ZEsBjgKc~NnFvCL`6elP1Zz314l(e;zZU&MFZz*!coz{ zxtDNMw2E$>%=5D(Dw;D$MMI)LYoeloqoN^kC2OLhfpa(EsOZMW6BCUqn%<~rNUYA9 zsA%A*Xh#>W#9jf$o>^Ae7V29An`#O|z#iUy8~hQ#HpiHZh} zif(*7K1-saDaqV~qoRScD&eST;HYRw9Lt)hXyB-5NPM0(QPC>8^(o~!SrQe^8MHFt zsA%ByB^(tE92E_T53(jI8aOJt@$tAUi7T3tsAx#6$eO5V;PfUO6%CxD2}eZ(M@2*8 zcGg5itLWCrJUdIGqB(<9G$eLpO;j{+jwBov4V+5}M@0iiMK?YkntvplB~j6wK`I&&+p;Dq8aOH%67OYAR5WmI zB^(vq_;^gBaYfS`6%C1HSrZiv92E_Tx3eZH8aOH%5}#&GRCMFx(TPSy)0>$IM@0ii zMMI(|YoeloqoN^kA#0+dfuo`uACJnCsAx(uBjKoM;4DozDjGN{8WM-HCMp^@DjE_u zvnDE9MYles+@2*-(VRhD2}eZ(XKTVy(ZEsBkT{<;QPIFr(N*`G)j%~OOX7;ABq|yb zOR^>^8aSI1j*14(!Gxotfuo`!@k!Q1MXTu6$vizvqM|v2R5TcWKC2wa6U>nD!Sug!BbUi#6d=` zXmFIL;8k=fNFlK(&ma{I92E_TeOVI~4IC8>i4U_TD!Rbw9|Y6Z@&3Ez$0-%vI+>?t zZ&WlVm5PSMhOCK-29An`#ObVwiUy8~hQx!`nyBakrdfqkTG5oDsG@`6E(8aO8tj*143iiX6!tci+N(XEqtewIWVl5)}=J)mal24IC8>iQ`!l6%Cv#2}eZ(=WfDL(JH!iGSADBsA$e06%C2q zSrZiv92E_T%UKf@4V*g(M@2V2o{(r<(ey?|Lt<6dL`4HfMML6P)(I4T-AD-({2 z29An`#L=vYiUy8~hQ#fxiHcUytxqY>&XTBT&Y%?uM@0jtH{qyg;HYRwT*{iLXyB;m z#>Zo`B(7*mqM{+OJZqw&fwLpwsA%9ENjNGRI4T+vpJh!{w2E$>%(Jp2Dw;D$MMGj+ z)br-92E^56%C24SrZiv92E_T^H~!W4IC9+ zb-!5+R3ow^Dw>kCCmau{r)No2G-uF~ zgrlN?vpM0YXyB-5NSw=>sA%A*=z(f@M`yuNz4KOEef+ro>53lN@kK$>+}~HxkXW48 z5)}=cO$kRu1Lr`(QPIFr(U7>2HBr$jx^*&7%aW*Q&L9;HiSDe4iU!X9grlN?b2j0q zXyB;mj)#RsRk0BV+v17_M|lcfMVEpU5{vR$qN0JLq9L&_Yoelob0*=aXyAO5a8$I4 zZk^0ivm`2-Ge|{4VnfzMMFU4gL*jJSL`4JV!-S)v3!MHzFl`<0zgvEsQqd~9-p32_ zbXk~@istg7q9HMmHBr&PQPGgNo;6X?zkWldBx za8xuTPGwD0G;mZjB<^QTRCMFx$%#fq(;HVbB-Un4R5Wn*CL9$F92E_TYgrQ&4ICBS z_;^y5L`74Q&V-|)fuo`!u_tSyqJeWV;izcfsAx#s%bKWY72W#e`ur@3islSjlWtvpnB~j6wK`I&&yR#-L8aT%jj*143iiX6Utci+ld^{o1xT5KeiiX6htci*S zj*5oFv8;)T2F~S#qoRSMq8lHN&yuKUN-{U$sA%A*Xh`&BO;j{+R5T<$$eO5V;C!BN zRJ4k2y@fp|OQNDVgH|RS6%8B}4T+;!6BP{{6%C2oSrZlA_;_5RaYfS`6%C0MSrZiv zoZf_^qJg8LA#o{dqN0JLq8lHN&622SN-{g)sA%A*Xh`hHny6^t97#AT8aOH%5}##F zRJ4k2y@fq1OQNDVgO(>86%8B}4T-~96BP}diwQ?X14l(SJ|2@LaYa)S6%C1HSrZiv zoNWn5MFU4gL*l)xiHZi!t%ReZRdnlQo|z?4(VRgl8WKHO6BP}dw-b(v29An`#HU#k z72WuFbfR%Z(;F2HiKST+6%8B}4T(cp6BP}d3kgR>14l(SJ|2}NQPGrSM#53iz){hV z*qSv_(ZEsBkT{<;QPIG;nQ&CJif+Ay-JT^;(VRhD2}eZ(M@2*8VAe!M14l(e;*+e2 zimtlftOlwPiN+O8Z&WlSmSjy-G;lU292E^56%C1VSrZiv92GrK4e#hIII4HvYO9YQ z*FRPC$c`@xlIG^Y^gM%9G;mZjBsOJDR5Wl7BpekD92E_T8(9+-t)g3RVNc7FsA$fh z#R*4614l(eVt>{|MFZz-!coz{QPCX_3yZ2^BM!F36%CGZe_utHf)o;q@>-&zfzzFE zR5Wl@G$hVsO;j{+K1w($T1B@`=BZf{70nr>q9L&%YoelovoGPOXyB-5NPL(zQPBlX z{~(yQj`!a!KTfG=6-iDjGN{8WN|nCMp^@*AtG429Anud^|ZzqM|8@D;g4OvnDDU zIC~S0iUy8~hQzh3iHZi!{e+{URdnmk{LUa~*B-Uh2R5Wl@G$c-BO;j{+t|lB64ICBS_;_NLL`74Q`3Xlw14l(e zqCab*qJg8LA#o*ZqN0IwH{qyg72SFZdtR1AMRNwNPB8aN*$92E^5 z72WuFT$aQYO-WQVBvxciR5WmU6OM`oj*5oFrL2jH2F~q-qoP%G>tvptB~j6wK`I&& zJF+G!8aPK1j*143iiX5zSrZlA_;_rhaYfS`6%C2ySrZiv92E_T!&ws*4V;S!M@0ii zMK?YklO<8nlw?-IQPIFr(U91dHBr&PQPGfiFKeQrfpaV2sAv`4dJB7ImPAE!1}#fC zDjGN{8WL}3O;j{+R5T<$&6=p_#>b-*jVqeosAxzm&6=oa;PfOM6%8B}4T%d`6BP{{ z72WuFRF*_VQ<518M@0iiMMGk1)8^%iz}mPAE!26ZJI z6%8B}4T*zU6BP}d^9e^q14l(y-EUR{)rc&KE1Ht1XhWXrbXk~@istgNA>pWK;HYRw zoX(o4XyB-5NIYn*iHa^@n!B`9TG5mnx2|a5sAx#6%bKWY;0z=j6%8B}4TgASrQe^8Kj~i zu_tSyqJeWV;izcfTuV4A8aOJt@$sZAi7T3tsAx#6$(pEW;HYRwoXDD}Xy9B;I4T-A z_Y#hZR?)4Kd485eMRNwJXh`&DO;j{+R5T>6WKC2waPB4?72WuFVxn3ir&>*J^#zDH@0_g>}lW8v;Nl`dw)Lo>uvS1{k3g7 zcJ(c-fA#Y0O()*kfA(MeVrcz+T082`Yw-2OJ}qDLyrCR=%i%~lTrP)2<#4zhR+Yo) zau{0FNZ_ru(urMl*5X0m{$&? z%Hdu)94&|C<s1-IczJ356WRdIb0}*Ps*XQ9M+Y?h;n$Z9Ojlo zPdThEhrxgSspVG(%3*CeTq=j@<#3}M&X&Wza+p~TpO?e#a`>hr5whU!?|*}S`N3%VN5x+m&4+6I93jCm&5ULxL6M7 z%V9@3j4p?RqtanZbtO#?TDu3cbLyT6OVf4a751?h-loZh#o&3(F@}unwO6n zy*3{8qbDMI>|R9A{B1g9;O_b{SI6ZF?l#-qO9AJNPU z5w+#tS$^~&9@Y8Vh<@l_3bYr`dXfk-dZ^poIJN*?T!!p-sp%PpB>RRk4N;(l!&U|MD*>qBYJK^ zL@$nsXvFP^UcMXAcmFz~?;MP1^rsO`xf0PUcOq&V7SXg1B6=jRzCX?j-}m;!2UgkD zC-Re+Jug0RUj7#vIU^o5BL8cDHN4Y9`&7x5xtx>8I~XT?aA?h&ySC2_^F6~ zv?HSL{Fm0KfAF^sk$PD2f7d`1KkJ|hANpAmtC&xk<6XG9?3 zGa``i84*bMj7UwQULAgE7LD*rvmoJ@W<|Ht zLj)2I5lA>hAmI?HNjOBl@`le25zY;V2qYXLkZ_1V!XW|)hX^DbB9QQVL?Ge!h(N;c z5rKr?BT|#7SBKxMP9ywgbx8Ql>X7i8)gj?Gt3$$XR)>V&tPY9C{ATrnqW=4Y->gn2 z{AP7X_|58&@SD|}tAwwbMI(IGEJ*mOS&;BmvmoKCWB|Ap!}92qYXLkZ_1V!XW|)hX^DbB9L&1 zK*Avc35Q5c!XfgNFS>f}Kuj`+2&aof1QHGrNH|0w;Shm@Lj)2I5lA>hAmI>!ghK=p z4w0IKL&PD{u_^Y~#Oo0`M7VG`L?Gc1frLW@5)KhaI7A@f5P^h41QHGrNH|1l5)KiE zh(m-+!i#?u`|HUnB8LdP93qf#h(N+20ttr*Bpf1;aEL&{Ap!}9NKL{a;t+9&a7l28 zK;m0}7yIk!zmLcv0xyRMBpf1;aEL&{Ap!}92qYXLkZ_39Bpf0R5r+tu1cwMD93qft z+Zy|8>g9+WBJgsEK*Avc35N(I93qf#h(N+2Qj>6qI7A#GToN22kZ_1V!XW~QaW`Ur z%~%kTLj+z95lA>hAmI>!ghK=p4w0IKL&PEC5aE*G5P^h41QHGrNH|0w(S9-Z*AJhJ z$RPqRhX^DbB9L&1K*AwXlW>SQL>wYq5*#9saEL&{Ap!}92qYXLkoex{*k6y&j>sVb zFNX*u93qf#h}0w;A`TIU2$uwh2qYXLkZ_1V!XW|)hX^DbB9M6HPVBF?VG%h*;N=j3 zghQkz;Sh0%I7GN4I7A@f5P^h41QHGrNH|0w;Shm@Lj)4v9UJ@Wk$Dk0MBwERsYy6Q z93l=8E(s11NH|0w;Shm@Lj)2I5lA>hAmI>!ghK=pKW>Zt^___kIYeq+4iSflLxc;5 zLj)2I5lA>hAmI>!ghK=p4iQK=L?Gc1frLY(CQ(m}=l93@dSYlquWXL!>%Wa?Mk%-3JjOf+d5lveh(cCi;J##ps=MF?PZdF8&pN{B- zaS_eSM~z+^kNVLQ5j}Ph zAmI>!ghK=p4iQK=L?Gc1sYy6QzVcj$Lxgj~Ap(ivvtp7tM1bQEfrLW@5)KhaI7A@f z5P^h41QHGrNH|1l5)KiEh|h>{N%+ZQu{shAmI>! zghK=p4w0IKL&PEC5aE*G5P`&;xv@GNBET8HGT=Bwfa4H>ghK=p4iQK=L?Gc1frLY( zCgBiqh&V*JBsfGM;Shnuf*G+o93sFObt~XFM1bQEfrLW@5)KhaI7A@v|IQHUt)AMt ze)szJU42`6+F$MMUH|K@Ej#=A|JA)`-~EgA|Mrn;$ZOTGom=*7?B7xUyM67gVfFEe zoxRmFZ*1@0*wenFXZ^1?_WpeE*FRhT#>TGguMZ-qsv$$Gp|4e6d%X^q?t0_3>d~FQ z+TPpzDUa0;8GKdeKRsXn>XG{A;P*Wb{%%P9y9q@=gTH&U<#$8t-wl3G_Tc|k|E7B8 M%SV>aYfS(D1PWHUO#lD@ literal 0 HcmV?d00001 diff --git a/test/aima/core/probability/mdp/MarkovDecisionProcessTest.java b/test/aima/core/probability/mdp/MarkovDecisionProcessTest.java index e266e92..200c644 100644 --- a/test/aima/core/probability/mdp/MarkovDecisionProcessTest.java +++ b/test/aima/core/probability/mdp/MarkovDecisionProcessTest.java @@ -5,91 +5,57 @@ import junit.framework.Assert; import org.junit.Before; import org.junit.Test; -import aima.core.environment.cellworld.Cell; -import aima.core.environment.cellworld.CellWorld; -import aima.core.environment.cellworld.CellWorldAction; -import aima.core.environment.cellworld.CellWorldFactory; +import aima.core.environment.gridworld.GridCell; +import aima.core.environment.gridworld.GridWorld; +import aima.core.environment.gridworld.GridWorldAction; +import aima.core.environment.gridworld.GridWorldFactory; import aima.core.probability.example.MDPFactory; import aima.core.probability.mdp.MarkovDecisionProcess; /** - * - * @author Ciaran O'Reilly - * @author Ravi Mohan - * + * Based on MarkovDecisionProcessTest by Ciaran O'Reilly and Ravi Mohan. Used under MIT license. */ public class MarkovDecisionProcessTest { public static final double DELTA_THRESHOLD = 1e-3; - private CellWorld cw = null; - private MarkovDecisionProcess, CellWorldAction> mdp = null; + private double nonTerminalReward = -0.04; + private GridWorld gw = null; + private MarkovDecisionProcess, GridWorldAction> mdp = null; @Before public void setUp() { - cw = CellWorldFactory.createCellWorldForFig17_1(); - mdp = MDPFactory.createMDPForFigure17_3(cw); + int maxTiles = 6; + int maxScore = 10; + + gw = GridWorldFactory.createGridWorldForTileGame(maxTiles, maxScore, nonTerminalReward); + mdp = MDPFactory.createMDPForTileGame(gw, maxTiles, maxScore); } @Test public void testActions() { // Ensure all actions can be performed in each cell // except for the terminal states. - for (Cell s : cw.getCells()) { - if (4 == s.getX() && (3 == s.getY() || 2 == s.getY())) { + for (GridCell s : gw.getCells()) { + if (6 == s.getX() && 10 == s.getY()) { Assert.assertEquals(0, mdp.actions(s).size()); } else { - Assert.assertEquals(5, mdp.actions(s).size()); + Assert.assertEquals(3, mdp.actions(s).size()); } } } @Test public void testMDPTransitionModel() { - Assert.assertEquals(0.8, mdp.transitionProbability(cw.getCellAt(1, 2), - cw.getCellAt(1, 1), CellWorldAction.Up), DELTA_THRESHOLD); - Assert.assertEquals(0.1, mdp.transitionProbability(cw.getCellAt(1, 1), - cw.getCellAt(1, 1), CellWorldAction.Up), DELTA_THRESHOLD); - Assert.assertEquals(0.1, mdp.transitionProbability(cw.getCellAt(2, 1), - cw.getCellAt(1, 1), CellWorldAction.Up), DELTA_THRESHOLD); - Assert.assertEquals(0.0, mdp.transitionProbability(cw.getCellAt(1, 3), - cw.getCellAt(1, 1), CellWorldAction.Up), DELTA_THRESHOLD); - - Assert.assertEquals(0.9, mdp.transitionProbability(cw.getCellAt(1, 1), - cw.getCellAt(1, 1), CellWorldAction.Down), DELTA_THRESHOLD); - Assert.assertEquals(0.1, mdp.transitionProbability(cw.getCellAt(2, 1), - cw.getCellAt(1, 1), CellWorldAction.Down), DELTA_THRESHOLD); - Assert.assertEquals(0.0, mdp.transitionProbability(cw.getCellAt(3, 1), - cw.getCellAt(1, 1), CellWorldAction.Down), DELTA_THRESHOLD); - Assert.assertEquals(0.0, mdp.transitionProbability(cw.getCellAt(1, 2), - cw.getCellAt(1, 1), CellWorldAction.Down), DELTA_THRESHOLD); - - Assert.assertEquals(0.9, mdp.transitionProbability(cw.getCellAt(1, 1), - cw.getCellAt(1, 1), CellWorldAction.Left), DELTA_THRESHOLD); - Assert.assertEquals(0.0, mdp.transitionProbability(cw.getCellAt(2, 1), - cw.getCellAt(1, 1), CellWorldAction.Left), DELTA_THRESHOLD); - Assert.assertEquals(0.0, mdp.transitionProbability(cw.getCellAt(3, 1), - cw.getCellAt(1, 1), CellWorldAction.Left), DELTA_THRESHOLD); - Assert.assertEquals(0.1, mdp.transitionProbability(cw.getCellAt(1, 2), - cw.getCellAt(1, 1), CellWorldAction.Left), DELTA_THRESHOLD); - - Assert.assertEquals(0.8, mdp.transitionProbability(cw.getCellAt(2, 1), - cw.getCellAt(1, 1), CellWorldAction.Right), DELTA_THRESHOLD); - Assert.assertEquals(0.1, mdp.transitionProbability(cw.getCellAt(1, 1), - cw.getCellAt(1, 1), CellWorldAction.Right), DELTA_THRESHOLD); - Assert.assertEquals(0.1, mdp.transitionProbability(cw.getCellAt(1, 2), - cw.getCellAt(1, 1), CellWorldAction.Right), DELTA_THRESHOLD); - Assert.assertEquals(0.0, mdp.transitionProbability(cw.getCellAt(1, 3), - cw.getCellAt(1, 1), CellWorldAction.Right), DELTA_THRESHOLD); + Assert.assertEquals(0.66, mdp.transitionProbability(gw.getCellAt(2, 2), + gw.getCellAt(1, 1), GridWorldAction.AddTile), DELTA_THRESHOLD); } @Test public void testRewardFunction() { // Ensure all actions can be performed in each cell. - for (Cell s : cw.getCells()) { - if (4 == s.getX() && 3 == s.getY()) { + for (GridCell s : gw.getCells()) { + if (6 == s.getX() && 10 == s.getY()) { Assert.assertEquals(1.0, mdp.reward(s), DELTA_THRESHOLD); - } else if (4 == s.getX() && 2 == s.getY()) { - Assert.assertEquals(-1.0, mdp.reward(s), DELTA_THRESHOLD); } else { Assert.assertEquals(-0.04, mdp.reward(s), DELTA_THRESHOLD); } diff --git a/test/aima/core/probability/mdp/PolicyIterationTest.java b/test/aima/core/probability/mdp/PolicyIterationTest.java index 255f403..f9cbe22 100644 --- a/test/aima/core/probability/mdp/PolicyIterationTest.java +++ b/test/aima/core/probability/mdp/PolicyIterationTest.java @@ -1,15 +1,8 @@ package aima.core.probability.mdp; -import java.util.Map; - -import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import aima.core.environment.cellworld.Cell; -import aima.core.environment.cellworld.CellWorld; -import aima.core.environment.cellworld.CellWorldAction; -import aima.core.environment.cellworld.CellWorldFactory; import aima.core.environment.gridworld.GridCell; import aima.core.environment.gridworld.GridWorld; import aima.core.environment.gridworld.GridWorldAction; @@ -18,7 +11,6 @@ import aima.core.probability.example.MDPFactory; import aima.core.probability.mdp.MarkovDecisionProcess; import aima.core.probability.mdp.impl.ModifiedPolicyEvaluation; import aima.core.probability.mdp.search.PolicyIteration; -import aima.core.probability.mdp.search.ValueIteration; /** * @author Ravi Mohan @@ -29,28 +21,31 @@ public class PolicyIterationTest { public static final double DELTA_THRESHOLD = 1e-3; private GridWorld gw = null; - private MarkovDecisionProcess, GridWorldAction> mdp = null; + private MarkovDecisionProcess, GridWorldAction> mdp = null; private PolicyIteration, GridWorldAction> pi = null; final int maxTiles = 6; final int maxScore = 10; - + @Before public void setUp() { - //take 10 turns to place 6 tiles + // take 10 turns to place 6 tiles double defaultPenalty = -0.04; - - gw = GridWorldFactory.createGridWorldForTileGame(maxTiles,maxScore,defaultPenalty); + + gw = GridWorldFactory.createGridWorldForTileGame(maxTiles, maxScore, + defaultPenalty); mdp = MDPFactory.createMDPForTileGame(gw, maxTiles, maxScore); - - //gamma = 1.0 - PolicyEvaluation,GridWorldAction> pe = new ModifiedPolicyEvaluation, GridWorldAction>(100,0.9); + + // gamma = 1.0 + PolicyEvaluation, GridWorldAction> pe = new ModifiedPolicyEvaluation, GridWorldAction>( + 100, 0.9); pi = new PolicyIteration, GridWorldAction>(pe); } @Test public void testPolicyIterationForTileGame() { - Policy, GridWorldAction> policy = pi.policyIteration(mdp); + Policy, GridWorldAction> policy = pi + .policyIteration(mdp); for (int j = maxScore; j >= 1; j--) { StringBuilder sb = new StringBuilder(); @@ -60,21 +55,5 @@ public class PolicyIterationTest { } System.out.println(sb.toString()); } - - //Assert.assertEquals(0.705, U.get(gw.getCellAt(1, 1)), DELTA_THRESHOLD); - /* - Assert.assertEquals(0.762, U.get(cw1.getCellAt(1, 2)), DELTA_THRESHOLD); - Assert.assertEquals(0.812, U.get(cw1.getCellAt(1, 3)), DELTA_THRESHOLD); - - Assert.assertEquals(0.655, U.get(cw1.getCellAt(2, 1)), DELTA_THRESHOLD); - Assert.assertEquals(0.868, U.get(cw1.getCellAt(2, 3)), DELTA_THRESHOLD); - - Assert.assertEquals(0.611, U.get(cw1.getCellAt(3, 1)), DELTA_THRESHOLD); - Assert.assertEquals(0.660, U.get(cw1.getCellAt(3, 2)), DELTA_THRESHOLD); - Assert.assertEquals(0.918, U.get(cw1.getCellAt(3, 3)), DELTA_THRESHOLD); - - Assert.assertEquals(0.388, U.get(cw1.getCellAt(4, 1)), DELTA_THRESHOLD); - Assert.assertEquals(-1.0, U.get(cw1.getCellAt(4, 2)), DELTA_THRESHOLD); - Assert.assertEquals(1.0, U.get(cw1.getCellAt(4, 3)), DELTA_THRESHOLD);*/ } } diff --git a/test/aima/core/probability/mdp/ValueIterationTest.java b/test/aima/core/probability/mdp/ValueIterationTest.java deleted file mode 100644 index 9d1215e..0000000 --- a/test/aima/core/probability/mdp/ValueIterationTest.java +++ /dev/null @@ -1,64 +0,0 @@ -package aima.core.probability.mdp; - -import java.util.Map; - -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -import aima.core.environment.cellworld.Cell; -import aima.core.environment.cellworld.CellWorld; -import aima.core.environment.cellworld.CellWorldAction; -import aima.core.environment.cellworld.CellWorldFactory; -import aima.core.probability.example.MDPFactory; -import aima.core.probability.mdp.MarkovDecisionProcess; -import aima.core.probability.mdp.search.ValueIteration; - -/** - * @author Ravi Mohan - * @author Ciaran O'Reilly - * - */ -public class ValueIterationTest { - public static final double DELTA_THRESHOLD = 1e-3; - - private CellWorld cw = null; - private MarkovDecisionProcess, CellWorldAction> mdp = null; - private ValueIteration, CellWorldAction> vi = null; - - @Before - public void setUp() { - cw = CellWorldFactory.createCellWorldForFig17_1(); - mdp = MDPFactory.createMDPForFigure17_3(cw); - vi = new ValueIteration, CellWorldAction>(1.0); - } - - @Test - public void testValueIterationForFig17_3() { - Map, Double> U = vi.valueIteration(mdp, 0.0001); - - Assert.assertEquals(0.705, U.get(cw.getCellAt(1, 1)), DELTA_THRESHOLD); - Assert.assertEquals(0.762, U.get(cw.getCellAt(1, 2)), DELTA_THRESHOLD); - Assert.assertEquals(0.812, U.get(cw.getCellAt(1, 3)), DELTA_THRESHOLD); - - Assert.assertEquals(0.655, U.get(cw.getCellAt(2, 1)), DELTA_THRESHOLD); - Assert.assertEquals(0.868, U.get(cw.getCellAt(2, 3)), DELTA_THRESHOLD); - - Assert.assertEquals(0.611, U.get(cw.getCellAt(3, 1)), DELTA_THRESHOLD); - Assert.assertEquals(0.660, U.get(cw.getCellAt(3, 2)), DELTA_THRESHOLD); - Assert.assertEquals(0.918, U.get(cw.getCellAt(3, 3)), DELTA_THRESHOLD); - - Assert.assertEquals(0.388, U.get(cw.getCellAt(4, 1)), DELTA_THRESHOLD); - Assert.assertEquals(-1.0, U.get(cw.getCellAt(4, 2)), DELTA_THRESHOLD); - Assert.assertEquals(1.0, U.get(cw.getCellAt(4, 3)), DELTA_THRESHOLD); - - for (int j = 3; j >= 1; j--) { - StringBuilder sb = new StringBuilder(); - for (int i = 1; i <= 4; i++) { - sb.append(U.get(cw.getCellAt(i, j))); - sb.append(" "); - } - System.out.println(sb.toString()); - } - } -} diff --git a/test/aima/core/probability/mdp/ValueIterationTest2.java b/test/aima/core/probability/mdp/ValueIterationTest2.java index a0c6ce1..7b1e0e2 100644 --- a/test/aima/core/probability/mdp/ValueIterationTest2.java +++ b/test/aima/core/probability/mdp/ValueIterationTest2.java @@ -6,10 +6,6 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import aima.core.environment.cellworld.Cell; -import aima.core.environment.cellworld.CellWorld; -import aima.core.environment.cellworld.CellWorldAction; -import aima.core.environment.cellworld.CellWorldFactory; import aima.core.environment.gridworld.GridCell; import aima.core.environment.gridworld.GridWorld; import aima.core.environment.gridworld.GridWorldAction; @@ -19,29 +15,30 @@ import aima.core.probability.mdp.MarkovDecisionProcess; import aima.core.probability.mdp.search.ValueIteration; /** - * @author Ravi Mohan - * @author Ciaran O'Reilly * + * @author Woody + * */ public class ValueIterationTest2 { public static final double DELTA_THRESHOLD = 1e-3; private GridWorld gw = null; - private MarkovDecisionProcess, GridWorldAction> mdp = null; + private MarkovDecisionProcess, GridWorldAction> mdp = null; private ValueIteration, GridWorldAction> vi = null; final int maxTiles = 6; final int maxScore = 10; - + @Before public void setUp() { - //take 10 turns to place 6 tiles + // take 10 turns to place 6 tiles double defaultPenalty = -0.04; - - gw = GridWorldFactory.createGridWorldForTileGame(maxTiles,maxScore,defaultPenalty); + + gw = GridWorldFactory.createGridWorldForTileGame(maxTiles, maxScore, + defaultPenalty); mdp = MDPFactory.createMDPForTileGame(gw, maxTiles, maxScore); - - //gamma = 1.0 + + // gamma = 1.0 vi = new ValueIteration, GridWorldAction>(0.9); } @@ -57,20 +54,7 @@ public class ValueIterationTest2 { } System.out.println(sb.toString()); } - - Assert.assertEquals(0.705, U.get(gw.getCellAt(1, 1)), DELTA_THRESHOLD);/* - Assert.assertEquals(0.762, U.get(cw1.getCellAt(1, 2)), DELTA_THRESHOLD); - Assert.assertEquals(0.812, U.get(cw1.getCellAt(1, 3)), DELTA_THRESHOLD); - Assert.assertEquals(0.655, U.get(cw1.getCellAt(2, 1)), DELTA_THRESHOLD); - Assert.assertEquals(0.868, U.get(cw1.getCellAt(2, 3)), DELTA_THRESHOLD); - - Assert.assertEquals(0.611, U.get(cw1.getCellAt(3, 1)), DELTA_THRESHOLD); - Assert.assertEquals(0.660, U.get(cw1.getCellAt(3, 2)), DELTA_THRESHOLD); - Assert.assertEquals(0.918, U.get(cw1.getCellAt(3, 3)), DELTA_THRESHOLD); - - Assert.assertEquals(0.388, U.get(cw1.getCellAt(4, 1)), DELTA_THRESHOLD); - Assert.assertEquals(-1.0, U.get(cw1.getCellAt(4, 2)), DELTA_THRESHOLD); - Assert.assertEquals(1.0, U.get(cw1.getCellAt(4, 3)), DELTA_THRESHOLD);*/ + Assert.assertEquals(-0.1874236, U.get(gw.getCellAt(1, 1)), DELTA_THRESHOLD); } } diff --git a/test/model/mdp/ValueIterationSolverTest.java b/test/model/mdp/ValueIterationSolverTest.java deleted file mode 100644 index dac3656..0000000 --- a/test/model/mdp/ValueIterationSolverTest.java +++ /dev/null @@ -1,26 +0,0 @@ -package model.mdp; - -import static org.junit.Assert.assertTrue; -import model.mdp.MDP.MODE; - -import org.junit.Test; - -public class ValueIterationSolverTest { - - @Test - public void testSolve() { - MDPSolver solver = new ValueIterationSolver(); - - //solve for a score of 25 in at most 35 turns - int maxScore = 6; - int maxTurns = 10; - - MDP mdp = new MDP(maxScore,maxTurns,MODE.CEIL); - Policy policy = solver.solve(mdp); - - assertTrue(policy.size() >= maxScore); - assertTrue(policy.size() <= maxTurns); - - System.out.println("Policy: " + policy); - } -}