Implemented agent which chooses to play winning, losing or random moves by solving a simplified MDP model of the game using policy iteration.

Portions of MDP/solver code by Ciaran O'Reilly and Ravi Mohan used under MIT license.
This commit is contained in:
Woody Folsom
2012-04-30 13:35:40 -04:00
parent c06f7ab38e
commit d0ee1e647b
35 changed files with 2500 additions and 3 deletions

View File

@@ -0,0 +1,19 @@
package aima.core.agent;
/**
* Describes an Action that can or has been taken by an Agent via one of its
* Actuators.
*
* @author Ciaran O'Reilly
*/
public interface Action {
/**
* Indicates whether or not this Action is a 'No Operation'.<br>
* Note: AIMA3e - NoOp, or no operation, is the name of an assembly language
* instruction that does nothing.
*
* @return true if this is a NoOp Action.
*/
//boolean isNoOp();
}

View File

@@ -0,0 +1,87 @@
package aima.core.environment.cellworld;
/**
* Artificial Intelligence A Modern Approach (3rd Edition): page 645.<br>
* <br>
* A representation of a Cell in the environment detailed in Figure 17.1.
*
* @param <C>
* the content type of the cell.
*
* @author Ciaran O'Reilly
* @author Ravi Mohan
*/
public class Cell<C> {
private int x = 1;
private int y = 1;
private C content = null;
/**
* Construct a Cell.
*
* @param x
* the x position of the cell.
* @param y
* the y position of the cell.
* @param content
* the initial content of the cell.
*/
public Cell(int x, int y, C content) {
this.x = x;
this.y = y;
this.content = content;
}
/**
*
* @return the x position of the cell.
*/
public int getX() {
return x;
}
/**
*
* @return the y position of the cell.
*/
public int getY() {
return y;
}
/**
*
* @return the content of the cell.
*/
public C getContent() {
return content;
}
/**
* Set the cell's content.
*
* @param content
* the content to be placed in the cell.
*/
public void setContent(C content) {
this.content = content;
}
@Override
public String toString() {
return "<x=" + x + ", y=" + y + ", content=" + content + ">";
}
@Override
public boolean equals(Object o) {
if (o instanceof Cell<?>) {
Cell<?> c = (Cell<?>) o;
return x == c.x && y == c.y && content.equals(c.content);
}
return false;
}
@Override
public int hashCode() {
return x + 23 + y + 31 * content.hashCode();
}
}

View File

@@ -0,0 +1,123 @@
package aima.core.environment.cellworld;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
/**
* Artificial Intelligence A Modern Approach (3rd Edition): page 645.<br>
* <br>
*
* A representation for the environment depicted in figure 17.1.<br>
* <br>
* <b>Note:<b> the x and y coordinates are always positive integers starting at
* 1.<br>
* <b>Note:<b> If looking at a rectangle - the coordinate (x=1, y=1) will be the
* bottom left hand corner.<br>
*
*
* @param <C>
* the type of content for the Cells in the world.
*
* @author Ciaran O'Reilly
* @author Ravi Mohan
*/
public class CellWorld<C> {
private Set<Cell<C>> cells = new LinkedHashSet<Cell<C>>();
private Map<Integer, Map<Integer, Cell<C>>> cellLookup = new HashMap<Integer, Map<Integer, Cell<C>>>();
/**
* Construct a Cell World with size xDimension * y Dimension cells, all with
* their values set to a default content value.
*
* @param xDimension
* the size of the x dimension.
* @param yDimension
* the size of the y dimension.
*
* @param defaultCellContent
* the default content to assign to each cell created.
*/
public CellWorld(int xDimension, int yDimension, C defaultCellContent) {
for (int x = 1; x <= xDimension; x++) {
Map<Integer, Cell<C>> xCol = new HashMap<Integer, Cell<C>>();
for (int y = 1; y <= yDimension; y++) {
Cell<C> c = new Cell<C>(x, y, defaultCellContent);
cells.add(c);
xCol.put(y, c);
}
cellLookup.put(x, xCol);
}
}
/**
*
* @return all the cells in this world.
*/
public Set<Cell<C>> getCells() {
return cells;
}
/**
* Determine what cell would be moved into if the specified action is
* performed in the specified cell. Normally, this will be the cell adjacent
* in the appropriate direction. However, if there is no cell in the
* adjacent direction of the action then the outcome of the action is to
* stay in the same cell as the action was performed in.
*
* @param s
* the cell location from which the action is to be performed.
* @param a
* the action to perform (Up, Down, Left, or Right).
* @return the Cell an agent would end up in if they performed the specified
* action from the specified cell location.
*/
public Cell<C> result(Cell<C> s, CellWorldAction a) {
Cell<C> sDelta = getCellAt(a.getXResult(s.getX()), a.getYResult(s
.getY()));
if (null == sDelta) {
// Default to no effect
// (i.e. bumps back in place as no adjoining cell).
sDelta = s;
}
return sDelta;
}
/**
* Remove the cell at the specified location from this Cell World. This
* allows you to introduce barriers into different location.
*
* @param x
* the x dimension of the cell to be removed.
* @param y
* the y dimension of the cell to be removed.
*/
public void removeCell(int x, int y) {
Map<Integer, Cell<C>> xCol = cellLookup.get(x);
if (null != xCol) {
cells.remove(xCol.remove(y));
}
}
/**
* Get the cell at the specified x and y locations.
*
* @param x
* the x dimension of the cell to be retrieved.
* @param y
* the y dimension of the cell to be retrieved.
* @return the cell at the specified x,y location, null if no cell exists at
* this location.
*/
public Cell<C> getCellAt(int x, int y) {
Cell<C> c = null;
Map<Integer, Cell<C>> xCol = cellLookup.get(x);
if (null != xCol) {
c = xCol.get(y);
}
return c;
}
}

View File

@@ -0,0 +1,142 @@
package aima.core.environment.cellworld;
import java.util.LinkedHashSet;
import java.util.Set;
import aima.core.agent.Action;
/**
* Artificial Intelligence A Modern Approach (3rd Edition): page 645.<br>
* <br>
*
* The actions in every state are Up, Down, Left, and Right.<br>
* <br>
* <b>Note:<b> Moving 'North' causes y to increase by 1, 'Down' y to decrease by
* 1, 'Left' x to decrease by 1, and 'Right' x to increase by 1 within a Cell
* World.
*
* @author Ciaran O'Reilly
*
*/
public enum CellWorldAction implements Action {
Up, Down, Left, Right, None;
private static final Set<CellWorldAction> _actions = new LinkedHashSet<CellWorldAction>();
static {
_actions.add(Up);
_actions.add(Down);
_actions.add(Left);
_actions.add(Right);
_actions.add(None);
}
/**
*
* @return a set of the actual actions.
*/
public static final Set<CellWorldAction> actions() {
return _actions;
}
//
// START-Action
//@Override
//public boolean isNoOp() {
// if (None == this) {
// return true;
// }
// return false;
//}
// END-Action
//
/**
*
* @param curX
* the current x position.
* @return the result on the x position of applying this action.
*/
public int getXResult(int curX) {
int newX = curX;
switch (this) {
case Left:
newX--;
break;
case Right:
newX++;
break;
}
return newX;
}
/**
*
* @param curY
* the current y position.
* @return the result on the y position of applying this action.
*/
public int getYResult(int curY) {
int newY = curY;
switch (this) {
case Up:
newY++;
break;
case Down:
newY--;
break;
}
return newY;
}
/**
*
* @return the first right angled action related to this action.
*/
public CellWorldAction getFirstRightAngledAction() {
CellWorldAction a = null;
switch (this) {
case Up:
case Down:
a = Left;
break;
case Left:
case Right:
a = Down;
break;
case None:
a = None;
break;
}
return a;
}
/**
*
* @return the second right angled action related to this action.
*/
public CellWorldAction getSecondRightAngledAction() {
CellWorldAction a = null;
switch (this) {
case Up:
case Down:
a = Right;
break;
case Left:
case Right:
a = Up;
break;
case None:
a = None;
break;
}
return a;
}
}

View File

@@ -0,0 +1,27 @@
package aima.core.environment.cellworld;
/**
*
* @author Ciaran O'Reilly
*
*/
public class CellWorldFactory {
/**
* Create the cell world as defined in Figure 17.1 in AIMA3e. (a) A simple 4
* x 3 environment that presents the agent with a sequential decision
* problem.
*
* @return a cell world representation of Fig 17.1 in AIMA3e.
*/
public static CellWorld<Double> createCellWorldForFig17_1() {
CellWorld<Double> cw = new CellWorld<Double>(4, 3, -0.04);
cw.removeCell(2, 2);
cw.getCellAt(4, 3).setContent(1.0);
cw.getCellAt(4, 2).setContent(-1.0);
return cw;
}
}

View File

@@ -0,0 +1,87 @@
package aima.core.environment.gridworld;
/**
* Artificial Intelligence A Modern Approach (3rd Edition): page 645.<br>
* <br>
* A representation of a Cell in the environment detailed in Figure 17.1.
*
* @param <C>
* the content type of the cell.
*
* @author Ciaran O'Reilly
* @author Ravi Mohan
*/
public class GridCell<C> {
private int x = 1;
private int y = 1;
private C content = null;
/**
* Construct a Cell.
*
* @param x
* the x position of the cell.
* @param y
* the y position of the cell.
* @param content
* the initial content of the cell.
*/
public GridCell(int x, int y, C content) {
this.x = x;
this.y = y;
this.content = content;
}
/**
*
* @return the x position of the cell.
*/
public int getX() {
return x;
}
/**
*
* @return the y position of the cell.
*/
public int getY() {
return y;
}
/**
*
* @return the content of the cell.
*/
public C getContent() {
return content;
}
/**
* Set the cell's content.
*
* @param content
* the content to be placed in the cell.
*/
public void setContent(C content) {
this.content = content;
}
@Override
public String toString() {
return "<x=" + x + ", y=" + y + ", content=" + content + ">";
}
@Override
public boolean equals(Object o) {
if (o instanceof GridCell<?>) {
GridCell<?> c = (GridCell<?>) o;
return x == c.x && y == c.y && content.equals(c.content);
}
return false;
}
@Override
public int hashCode() {
return x + 23 + y + 31 * content.hashCode();
}
}

View File

@@ -0,0 +1,56 @@
package aima.core.environment.gridworld;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
public class GridWorld<C> {
private Set<GridCell<C>> cells = new LinkedHashSet<GridCell<C>>();
private Map<Integer, Map<Integer, GridCell<C>>> cellLookup = new HashMap<Integer, Map<Integer, GridCell<C>>>();
public GridWorld(int xDimension, int yDimension, C defaultCellContent) {
for (int x = 1; x <= xDimension; x++) {
Map<Integer, GridCell<C>> xCol = new HashMap<Integer, GridCell<C>>();
for (int y = 1; y <= yDimension; y++) {
GridCell<C> c = new GridCell<C>(x, y, defaultCellContent);
cells.add(c);
xCol.put(y, c);
}
cellLookup.put(x, xCol);
}
}
public Set<GridCell<C>> getCells() {
return cells;
}
public GridCell<C> result(GridCell<C> s, GridWorldAction a) {
GridCell<C> sDelta = getCellAt(a.getXResult(s.getX()), a.getYResult(s
.getY()));
if (null == sDelta) {
// Default to no effect
// (i.e. bumps back in place as no adjoining cell).
sDelta = s;
}
return sDelta;
}
public void removeCell(int x, int y) {
Map<Integer, GridCell<C>> xCol = cellLookup.get(x);
if (null != xCol) {
cells.remove(xCol.remove(y));
}
}
public GridCell<C> getCellAt(int x, int y) {
GridCell<C> c = null;
Map<Integer, GridCell<C>> xCol = cellLookup.get(x);
if (null != xCol) {
c = xCol.get(y);
}
return c;
}
}

View File

@@ -0,0 +1,56 @@
package aima.core.environment.gridworld;
import java.util.LinkedHashSet;
import java.util.Set;
import aima.core.agent.Action;
public enum GridWorldAction implements Action {
AddTile,CaptureThree,RandomMove;
private static final Set<GridWorldAction> _actions = new LinkedHashSet<GridWorldAction>();
static {
_actions.add(AddTile); // try to add a tile, turn (low chance of capture)
_actions.add(CaptureThree); // try to subtract two tiles, add a turn (high chance of capture)
_actions.add(RandomMove); // try add a tile, add a turn (even chance of add/capture)
}
public static final Set<GridWorldAction> actions() {
return _actions;
}
//
// START-Action
//@Override
//public boolean isNoOp() {
// if (None == this) {
// return true;
// }
// return false;
//}
// END-Action
//
public int getXResult(int curX) {
int newX = curX;
switch (this) {
case AddTile:
newX++;
break;
case CaptureThree:
newX-=2;
break;
case RandomMove:
newX--;
break;
}
return newX;
}
public int getYResult(int curY) {
//the score increments by 1 at every action, regardless
return curY+1;
}
}

View File

@@ -0,0 +1,23 @@
package aima.core.environment.gridworld;
/**
*
* @author Woody Folsom
*
*/
public class GridWorldFactory {
/**
* Create a CellWorld modeling a TileGame where the objective is to reach the maximum Number of tiles without
* exceeding targetScore.
*
* @return a cell world representation of Fig 17.1 in AIMA3e.
*/
public static GridWorld<Double> createGridWorldForTileGame(int maxTiles, int maxScore, double nonTerminalReward) {
GridWorld<Double> cw = new GridWorld<Double>(maxTiles, maxScore, nonTerminalReward);
cw.getCellAt(maxTiles, maxScore).setContent(1.0);
return cw;
}
}

View File

@@ -0,0 +1,251 @@
package aima.core.probability.example;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import aima.core.environment.cellworld.Cell;
import aima.core.environment.cellworld.CellWorld;
import aima.core.environment.cellworld.CellWorldAction;
import aima.core.environment.gridworld.GridCell;
import aima.core.environment.gridworld.GridWorld;
import aima.core.environment.gridworld.GridWorldAction;
import aima.core.probability.mdp.ActionsFunction;
import aima.core.probability.mdp.MarkovDecisionProcess;
import aima.core.probability.mdp.RewardFunction;
import aima.core.probability.mdp.TransitionProbabilityFunction;
import aima.core.probability.mdp.impl.MDP;
/**
*
* @author Ciaran O'Reilly
* @author Ravi Mohan
*/
public class MDPFactory {
/**
* Constructs an MDP that can be used to generate the utility values
* detailed in Fig 17.3.
*
* @param cw
* the cell world from figure 17.1.
* @return an MDP that can be used to generate the utility values detailed
* in Fig 17.3.
*/
public static MarkovDecisionProcess<Cell<Double>, CellWorldAction> createMDPForFigure17_3(
final CellWorld<Double> cw) {
return new MDP<Cell<Double>, CellWorldAction>(cw.getCells(),
cw.getCellAt(1, 1), createActionsFunctionForFigure17_1(cw),
createTransitionProbabilityFunctionForFigure17_1(cw),
createRewardFunctionForFigure17_1());
}
public static MarkovDecisionProcess<GridCell<Double>, GridWorldAction> createMDPForTileGame(
final GridWorld<Double> cw, int maxTiles, int maxScore) {
return new MDP<GridCell<Double>, GridWorldAction>(cw.getCells(),
cw.getCellAt(1, 1), createActionsFunctionForTileGame(cw,maxTiles,maxScore),
createTransitionProbabilityFunctionForTileGame(cw),
createRewardFunctionForTileGame());
}
/**
* Returns the allowed actions from a specified cell within the cell world
* described in Fig 17.1.
*
* @param cw
* the cell world from figure 17.1.
* @return the set of actions allowed at a particular cell. This set will be
* empty if at a terminal state.
*/
public static ActionsFunction<Cell<Double>, CellWorldAction> createActionsFunctionForFigure17_1(
final CellWorld<Double> cw) {
final Set<Cell<Double>> terminals = new HashSet<Cell<Double>>();
terminals.add(cw.getCellAt(4, 3));
terminals.add(cw.getCellAt(4, 2));
ActionsFunction<Cell<Double>, CellWorldAction> af = new ActionsFunction<Cell<Double>, CellWorldAction>() {
@Override
public Set<CellWorldAction> actions(Cell<Double> s) {
// All actions can be performed in each cell
// (except terminal states)
if (terminals.contains(s)) {
return Collections.emptySet();
}
return CellWorldAction.actions();
}
};
return af;
}
public static ActionsFunction<GridCell<Double>, GridWorldAction> createActionsFunctionForTileGame(
final GridWorld<Double> cw, int maxTiles, int maxScore) {
final Set<GridCell<Double>> terminals = new HashSet<GridCell<Double>>();
terminals.add(cw.getCellAt(maxTiles,maxScore));
ActionsFunction<GridCell<Double>, GridWorldAction> af = new ActionsFunction<GridCell<Double>, GridWorldAction>() {
@Override
public Set<GridWorldAction> actions(GridCell<Double> s) {
// All actions can be performed in each cell
// (except terminal states)
if (terminals.contains(s)) {
return Collections.emptySet();
}
return GridWorldAction.actions();
}
};
return af;
}
/**
* Figure 17.1 (b) Illustration of the transition model of the environment:
* the 'intended' outcome occurs with probability 0.8, but with probability
* 0.2 the agent moves at right angles to the intended direction. A
* collision with a wall results in no movement.
*
* @param cw
* the cell world from figure 17.1.
* @return the transition probability function as described in figure 17.1.
*/
public static TransitionProbabilityFunction<Cell<Double>, CellWorldAction> createTransitionProbabilityFunctionForFigure17_1(
final CellWorld<Double> cw) {
TransitionProbabilityFunction<Cell<Double>, CellWorldAction> tf = new TransitionProbabilityFunction<Cell<Double>, CellWorldAction>() {
private double[] distribution = new double[] { 0.8, 0.1, 0.1 };
@Override
public double probability(Cell<Double> sDelta, Cell<Double> s,
CellWorldAction a) {
double prob = 0;
List<Cell<Double>> outcomes = possibleOutcomes(s, a);
for (int i = 0; i < outcomes.size(); i++) {
if (sDelta.equals(outcomes.get(i))) {
// Note: You have to sum the matches to
// sDelta as the different actions
// could have the same effect (i.e.
// staying in place due to there being
// no adjacent cells), which increases
// the probability of the transition for
// that state.
prob += distribution[i];
}
}
return prob;
}
private List<Cell<Double>> possibleOutcomes(Cell<Double> c,
CellWorldAction a) {
// There can be three possible outcomes for the planned action
List<Cell<Double>> outcomes = new ArrayList<Cell<Double>>();
outcomes.add(cw.result(c, a));
outcomes.add(cw.result(c, a.getFirstRightAngledAction()));
outcomes.add(cw.result(c, a.getSecondRightAngledAction()));
return outcomes;
}
};
return tf;
}
public static TransitionProbabilityFunction<GridCell<Double>, GridWorldAction> createTransitionProbabilityFunctionForTileGame(
final GridWorld<Double> cw) {
TransitionProbabilityFunction<GridCell<Double>, GridWorldAction> tf = new TransitionProbabilityFunction<GridCell<Double>, GridWorldAction>() {
@Override
public double probability(GridCell<Double> sDelta, GridCell<Double> s,
GridWorldAction a) {
double prob = 0;
double[] distribution = getDistribution(a);
List<GridCell<Double>> outcomes = possibleOutcomes(s, a);
for (int i = 0; i < outcomes.size(); i++) {
if (sDelta.equals(outcomes.get(i))) {
// Note: You have to sum the matches to
// sDelta as the different actions
// could have the same effect (i.e.
// staying in place due to there being
// no adjacent cells), which increases
// the probability of the transition for
// that state.
prob += distribution[i];
}
}
return prob;
}
private double[] getDistribution(GridWorldAction a) {
switch (a) {
case AddTile :
return new double[] { 0.66, 0.34 };
case CaptureThree :
return new double[] { 0.34, 0.66 };
case RandomMove :
return new double[] { 0.50, 0.50 };
default :
throw new RuntimeException("Unrecognized action: " + a);
}
}
private List<GridCell<Double>> possibleOutcomes(GridCell<Double> c,
GridWorldAction a) {
// There can be three possible outcomes for the planned action
List<GridCell<Double>> outcomes = new ArrayList<GridCell<Double>>();
switch (a) {
case AddTile :
outcomes.add(cw.result(c, GridWorldAction.AddTile));
outcomes.add(cw.result(c, GridWorldAction.CaptureThree));
break;
case CaptureThree :
outcomes.add(cw.result(c, GridWorldAction.AddTile));
outcomes.add(cw.result(c, GridWorldAction.CaptureThree));
break;
case RandomMove :
outcomes.add(cw.result(c, GridWorldAction.AddTile));
outcomes.add(cw.result(c, GridWorldAction.CaptureThree));
default :
//no possible outcomes for unrecognized actions
}
return outcomes;
}
};
return tf;
}
/**
*
* @return the reward function which takes the content of the cell as being
* the reward value.
*/
public static RewardFunction<Cell<Double>> createRewardFunctionForFigure17_1() {
RewardFunction<Cell<Double>> rf = new RewardFunction<Cell<Double>>() {
@Override
public double reward(Cell<Double> s) {
return s.getContent();
}
};
return rf;
}
public static RewardFunction<GridCell<Double>> createRewardFunctionForTileGame() {
RewardFunction<GridCell<Double>> rf = new RewardFunction<GridCell<Double>>() {
@Override
public double reward(GridCell<Double> s) {
return s.getContent();
}
};
return rf;
}
}

View File

@@ -0,0 +1,27 @@
package aima.core.probability.mdp;
import java.util.Set;
import aima.core.agent.Action;
/**
* An interface for MDP action functions.
*
* @param <S>
* the state type.
* @param <A>
* the action type.
*
* @author Ciaran O'Reilly
* @author Ravi Mohan
*/
public interface ActionsFunction<S, A extends Action> {
/**
* Get the set of actions for state s.
*
* @param s
* the state.
* @return the set of actions for state s.
*/
Set<A> actions(S s);
}

View File

@@ -0,0 +1,79 @@
package aima.core.probability.mdp;
import java.util.Set;
import aima.core.agent.Action;
/**
* Artificial Intelligence A Modern Approach (3rd Edition): page 647.<br>
* <br>
*
* A sequential decision problem for a fully observable, stochastic environment
* with a Markovian transition model and additive rewards is called a <b>Markov
* decision process</b>, or <b>MDP</b>, and consists of a set of states (with an
* initial state s<sub>0</sub>; a set ACTIONS(s) of actions in each state; a
* transition model P(s' | s, a); and a reward function R(s).<br>
* <br>
* <b>Note:</b> Some definitions of MDPs allow the reward to depend on the
* action and outcome too, so the reward function is R(s, a, s'). This
* simplifies the description of some environments but does not change the
* problem in any fundamental way.
*
* @param <S>
* the state type.
* @param <A>
* the action type.
*
* @author Ciaran O'Reilly
* @author Ravi Mohan
*
*/
public interface MarkovDecisionProcess<S, A extends Action> {
/**
* Get the set of states associated with the Markov decision process.
*
* @return the set of states associated with the Markov decision process.
*/
Set<S> states();
/**
* Get the initial state s<sub>0</sub> for this instance of a Markov
* decision process.
*
* @return the initial state s<sub>0</sub>.
*/
S getInitialState();
/**
* Get the set of actions for state s.
*
* @param s
* the state.
* @return the set of actions for state s.
*/
Set<A> actions(S s);
/**
* Return the probability of going from state s using action a to s' based
* on the underlying transition model P(s' | s, a).
*
* @param sDelta
* the state s' being transitioned to.
* @param s
* the state s being transitions from.
* @param a
* the action used to move from state s to s'.
* @return the probability of going from state s using action a to s'.
*/
double transitionProbability(S sDelta, S s, A a);
/**
* Get the reward associated with being in state s.
*
* @param s
* the state whose award is sought.
* @return the reward associated with being in state s.
*/
double reward(S s);
}

View File

@@ -0,0 +1,34 @@
package aima.core.probability.mdp;
import aima.core.agent.Action;
/**
* Artificial Intelligence A Modern Approach (3rd Edition): page 647.<br>
* <br>
*
* A solution to a Markov decision process is called a <b>policy</b>. It
* specifies what the agent should do for any state that the agent might reach.
* It is traditional to denote a policy by &pi;, and &pi;(s) is the action
* recommended by the policy &pi; for state s. If the agent has a complete
* policy, then no matter what the outcome of any action, the agent will always
* know what to do next.
*
* @param <S>
* the state type.
* @param <A>
* the action type.
*
* @author Ciaran O'Reilly
* @author Ravi Mohan
*
*/
public interface Policy<S, A extends Action> {
/**
* &pi;(s) is the action recommended by the policy &pi; for state s.
*
* @param s
* the state s
* @return the action recommended by the policy &pi; for state s.
*/
A action(S s);
}

View File

@@ -0,0 +1,39 @@
package aima.core.probability.mdp;
import java.util.Map;
import aima.core.agent.Action;
/**
* Artificial Intelligence A Modern Approach (3rd Edition): page 656.<br>
* <br>
* Given a policy &pi;<sub>i</sub>, calculate
* U<sub>i</sub>=U<sup>&pi;<sub>i</sub></sup>, the utility of each state if
* &pi;<sub>i</sub> were to be executed.
*
* @param <S>
* the state type.
* @param <A>
* the action type.
*
* @author Ciaran O'Reilly
* @author Ravi Mohan
*/
public interface PolicyEvaluation<S, A extends Action> {
/**
* <b>Policy evaluation:</b> given a policy &pi;<sub>i</sub>, calculate
* U<sub>i</sub>=U<sup>&pi;<sub>i</sub></sup>, the utility of each state if
* &pi;<sub>i</sub> were to be executed.
*
* @param pi_i
* a policy vector indexed by state
* @param U
* a vector of utilities for states in S
* @param mdp
* an MDP with states S, actions A(s), transition model P(s'|s,a)
* @return U<sub>i</sub>=U<sup>&pi;<sub>i</sub></sup>, the utility of each
* state if &pi;<sub>i</sub> were to be executed.
*/
Map<S, Double> evaluate(Map<S, A> pi_i, Map<S, Double> U,
MarkovDecisionProcess<S, A> mdp);
}

View File

@@ -0,0 +1,21 @@
package aima.core.probability.mdp;
/**
* An interface for MDP reward functions.
*
* @param <S>
* the state type.
* @author Ciaran O'Reilly
* @author Ravi Mohan
*/
public interface RewardFunction<S> {
/**
* Get the reward associated with being in state s.
*
* @param s
* the state whose award is sought.
* @return the reward associated with being in state s.
*/
double reward(S s);
}

View File

@@ -0,0 +1,31 @@
package aima.core.probability.mdp;
import aima.core.agent.Action;
/**
* An interface for MDP transition probability functions.
*
* @param <S>
* the state type.
* @param <A>
* the action type.
*
* @author Ciaran O'Reilly
* @author Ravi Mohan
*/
public interface TransitionProbabilityFunction<S, A extends Action> {
/**
* Return the probability of going from state s using action a to s' based
* on the underlying transition model P(s' | s, a).
*
* @param sDelta
* the state s' being transitioned to.
* @param s
* the state s being transitions from.
* @param a
* the action used to move from state s to s'.
* @return the probability of going from state s using action a to s'.
*/
double probability(S sDelta, S s, A a);
}

View File

@@ -0,0 +1,36 @@
package aima.core.probability.mdp.impl;
import java.util.HashMap;
import java.util.Map;
import aima.core.agent.Action;
import aima.core.probability.mdp.Policy;
/**
* Default implementation of the Policy interface using an underlying Map to
* look up an action associated with a state.
*
* @param <S>
* the state type.
* @param <A>
* the action type.
*
* @author Ciaran O'Reilly
*/
public class LookupPolicy<S, A extends Action> implements Policy<S, A> {
private Map<S, A> policy = new HashMap<S, A>();
public LookupPolicy(Map<S, A> aPolicy) {
policy.putAll(aPolicy);
}
//
// START-Policy
@Override
public A action(S s) {
return policy.get(s);
}
// END-Policy
//
}

View File

@@ -0,0 +1,69 @@
package aima.core.probability.mdp.impl;
import java.util.Set;
import aima.core.agent.Action;
import aima.core.probability.mdp.ActionsFunction;
import aima.core.probability.mdp.MarkovDecisionProcess;
import aima.core.probability.mdp.RewardFunction;
import aima.core.probability.mdp.TransitionProbabilityFunction;
/**
* Default implementation of the MarkovDecisionProcess<S, A> interface.
*
* @param <S>
* the state type.
* @param <A>
* the action type.
*
* @author Ciaran O'Reilly
* @author Ravi Mohan
*/
public class MDP<S, A extends Action> implements MarkovDecisionProcess<S, A> {
private Set<S> states = null;
private S initialState = null;
private ActionsFunction<S, A> actionsFunction = null;
private TransitionProbabilityFunction<S, A> transitionProbabilityFunction = null;
private RewardFunction<S> rewardFunction = null;
public MDP(Set<S> states, S initialState,
ActionsFunction<S, A> actionsFunction,
TransitionProbabilityFunction<S, A> transitionProbabilityFunction,
RewardFunction<S> rewardFunction) {
this.states = states;
this.initialState = initialState;
this.actionsFunction = actionsFunction;
this.transitionProbabilityFunction = transitionProbabilityFunction;
this.rewardFunction = rewardFunction;
}
//
// START-MarkovDecisionProcess
@Override
public Set<S> states() {
return states;
}
@Override
public S getInitialState() {
return initialState;
}
@Override
public Set<A> actions(S s) {
return actionsFunction.actions(s);
}
@Override
public double transitionProbability(S sDelta, S s, A a) {
return transitionProbabilityFunction.probability(sDelta, s, a);
}
@Override
public double reward(S s) {
return rewardFunction.reward(s);
}
// END-MarkovDecisionProcess
//
}

View File

@@ -0,0 +1,93 @@
package aima.core.probability.mdp.impl;
import java.util.HashMap;
import java.util.Map;
import aima.core.agent.Action;
import aima.core.probability.mdp.MarkovDecisionProcess;
import aima.core.probability.mdp.PolicyEvaluation;
/**
* Artificial Intelligence A Modern Approach (3rd Edition): page 657.<br>
* <br>
* For small state spaces, policy evaluation using exact solution methods is
* often the most efficient approach. For large state spaces, O(n<sup>3</sup>)
* time might be prohibitive. Fortunately, it is not necessary to do exact
* policy evaluation. Instead, we can perform some number of simplified value
* iteration steps (simplified because the policy is fixed) to give a reasonably
* good approximation of utilities. The simplified Bellman update for this
* process is:<br>
* <br>
*
* <pre>
* U<sub>i+1</sub>(s) <- R(s) + &gamma;&Sigma;<sub>s'</sub>P(s'|s,&pi;<sub>i</sub>(s))U<sub>i</sub>(s')
* </pre>
*
* and this is repeated k times to produce the next utility estimate. The
* resulting algorithm is called <b>modified policy iteration</b>. It is often
* much more efficient than standard policy iteration or value iteration.
*
*
* @param <S>
* the state type.
* @param <A>
* the action type.
*
* @author Ciaran O'Reilly
* @author Ravi Mohan
*
*/
public class ModifiedPolicyEvaluation<S, A extends Action> implements PolicyEvaluation<S, A> {
// # iterations to use to produce the next utility estimate
private int k;
// discount &gamma; to be used.
private double gamma;
/**
* Constructor.
*
* @param k
* number iterations to use to produce the next utility estimate
* @param gamma
* discount &gamma; to be used
*/
public ModifiedPolicyEvaluation(int k, double gamma) {
if (gamma > 1.0 || gamma <= 0.0) {
throw new IllegalArgumentException("Gamma must be > 0 and <= 1.0");
}
this.k = k;
this.gamma = gamma;
}
//
// START-PolicyEvaluation
@Override
public Map<S, Double> evaluate(Map<S, A> pi_i, Map<S, Double> U,
MarkovDecisionProcess<S, A> mdp) {
Map<S, Double> U_i = new HashMap<S, Double>(U);
Map<S, Double> U_ip1 = new HashMap<S, Double>(U);
// repeat k times to produce the next utility estimate
for (int i = 0; i < k; i++) {
// U<sub>i+1</sub>(s) <- R(s) +
// &gamma;&Sigma;<sub>s'</sub>P(s'|s,&pi;<sub>i</sub>(s))U<sub>i</sub>(s')
for (S s : U.keySet()) {
A ap_i = pi_i.get(s);
double aSum = 0;
// Handle terminal states (i.e. no actions)
if (null != ap_i) {
for (S sDelta : U.keySet()) {
aSum += mdp.transitionProbability(sDelta, s, ap_i)
* U_i.get(sDelta);
}
}
U_ip1.put(s, mdp.reward(s) + gamma * aSum);
}
U_i.putAll(U_ip1);
}
return U_ip1;
}
// END-PolicyEvaluation
//
}

View File

@@ -0,0 +1,144 @@
package aima.core.probability.mdp.search;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import aima.core.agent.Action;
import aima.core.probability.mdp.MarkovDecisionProcess;
import aima.core.probability.mdp.Policy;
import aima.core.probability.mdp.PolicyEvaluation;
import aima.core.probability.mdp.impl.LookupPolicy;
import aima.core.util.Util;
/**
* Artificial Intelligence A Modern Approach (3rd Edition): page 657.<br>
* <br>
*
* <pre>
* function POLICY-ITERATION(mdp) returns a policy
* inputs: mdp, an MDP with states S, actions A(s), transition model P(s' | s, a)
* local variables: U, a vector of utilities for states in S, initially zero
* &pi;, a policy vector indexed by state, initially random
*
* repeat
* U <- POLICY-EVALUATION(&pi;, U, mdp)
* unchanged? <- true
* for each state s in S do
* if max<sub>a &isin; A(s)</sub> &Sigma;<sub>s'</sub>P(s'|s,a)U[s'] > &Sigma;<sub>s'</sub>P(s'|s,&pi;[s])U[s'] then do
* &pi;[s] <- argmax<sub>a &isin; A(s)</sub> &Sigma;<sub>s'</sub>P(s'|s,a)U[s']
* unchanged? <- false
* until unchanged?
* return &pi;
* </pre>
*
* Figure 17.7 The policy iteration algorithm for calculating an optimal policy.
*
* @param <S>
* the state type.
* @param <A>
* the action type.
*
* @author Ciaran O'Reilly
* @author Ravi Mohan
*
*/
public class PolicyIteration<S, A extends Action> {
private PolicyEvaluation<S, A> policyEvaluation = null;
/**
* Constructor.
*
* @param policyEvaluation
* the policy evaluation function to use.
*/
public PolicyIteration(PolicyEvaluation<S, A> policyEvaluation) {
this.policyEvaluation = policyEvaluation;
}
// function POLICY-ITERATION(mdp) returns a policy
/**
* The policy iteration algorithm for calculating an optimal policy.
*
* @param mdp
* an MDP with states S, actions A(s), transition model P(s'|s,a)
* @return an optimal policy
*/
public Policy<S, A> policyIteration(MarkovDecisionProcess<S, A> mdp) {
// local variables: U, a vector of utilities for states in S, initially
// zero
Map<S, Double> U = Util.create(mdp.states(), new Double(0));
// &pi;, a policy vector indexed by state, initially random
Map<S, A> pi = initialPolicyVector(mdp);
boolean unchanged;
// repeat
do {
// U <- POLICY-EVALUATION(&pi;, U, mdp)
U = policyEvaluation.evaluate(pi, U, mdp);
// unchanged? <- true
unchanged = true;
// for each state s in S do
for (S s : mdp.states()) {
// calculate:
// max<sub>a &isin; A(s)</sub>
// &Sigma;<sub>s'</sub>P(s'|s,a)U[s']
double aMax = Double.NEGATIVE_INFINITY, piVal = 0;
A aArgmax = pi.get(s);
for (A a : mdp.actions(s)) {
double aSum = 0;
for (S sDelta : mdp.states()) {
aSum += mdp.transitionProbability(sDelta, s, a)
* U.get(sDelta);
}
if (aSum > aMax) {
aMax = aSum;
aArgmax = a;
}
// track:
// &Sigma;<sub>s'</sub>P(s'|s,&pi;[s])U[s']
if (a.equals(pi.get(s))) {
piVal = aSum;
}
}
// if max<sub>a &isin; A(s)</sub>
// &Sigma;<sub>s'</sub>P(s'|s,a)U[s']
// > &Sigma;<sub>s'</sub>P(s'|s,&pi;[s])U[s'] then do
if (aMax > piVal) {
// &pi;[s] <- argmax<sub>a &isin;A(s)</sub>
// &Sigma;<sub>s'</sub>P(s'|s,a)U[s']
pi.put(s, aArgmax);
// unchanged? <- false
unchanged = false;
}
}
// until unchanged?
} while (!unchanged);
// return &pi;
return new LookupPolicy<S, A>(pi);
}
/**
* Create a policy vector indexed by state, initially random.
*
* @param mdp
* an MDP with states S, actions A(s), transition model P(s'|s,a)
* @return a policy vector indexed by state, initially random.
*/
public static <S, A extends Action> Map<S, A> initialPolicyVector(
MarkovDecisionProcess<S, A> mdp) {
Map<S, A> pi = new LinkedHashMap<S, A>();
List<A> actions = new ArrayList<A>();
for (S s : mdp.states()) {
actions.clear();
actions.addAll(mdp.actions(s));
// Handle terminal states (i.e. no actions).
if (actions.size() > 0) {
pi.put(s, Util.selectRandomlyFromList(actions));
}
}
return pi;
}
}

View File

@@ -0,0 +1,129 @@
package aima.core.probability.mdp.search;
import java.util.Map;
import java.util.Set;
import aima.core.agent.Action;
import aima.core.probability.mdp.MarkovDecisionProcess;
import aima.core.util.Util;
/**
* Artificial Intelligence A Modern Approach (3rd Edition): page 653.<br>
* <br>
*
* <pre>
* function VALUE-ITERATION(mdp, &epsilon;) returns a utility function
* inputs: mdp, an MDP with states S, actions A(s), transition model P(s' | s, a),
* rewards R(s), discount &gamma;
* &epsilon; the maximum error allowed in the utility of any state
* local variables: U, U', vectors of utilities for states in S, initially zero
* &delta; the maximum change in the utility of any state in an iteration
*
* repeat
* U <- U'; &delta; <- 0
* for each state s in S do
* U'[s] <- R(s) + &gamma; max<sub>a &isin; A(s)</sub> &Sigma;<sub>s'</sub>P(s' | s, a) U[s']
* if |U'[s] - U[s]| > &delta; then &delta; <- |U'[s] - U[s]|
* until &delta; < &epsilon;(1 - &gamma;)/&gamma;
* return U
* </pre>
*
* Figure 17.4 The value iteration algorithm for calculating utilities of
* states. The termination condition is from Equation (17.8):<br>
*
* <pre>
* if ||U<sub>i+1</sub> - U<sub>i</sub>|| < &epsilon;(1 - &gamma;)/&gamma; then ||U<sub>i+1</sub> - U|| < &epsilon;
* </pre>
*
* @param <S>
* the state type.
* @param <A>
* the action type.
*
* @author Ciaran O'Reilly
* @author Ravi Mohan
*
*/
public class ValueIteration<S, A extends Action> {
// discount &gamma; to be used.
private double gamma = 0;
/**
* Constructor.
*
* @param gamma
* discount &gamma; to be used.
*/
public ValueIteration(double gamma) {
if (gamma > 1.0 || gamma <= 0.0) {
throw new IllegalArgumentException("Gamma must be > 0 and <= 1.0");
}
this.gamma = gamma;
}
// function VALUE-ITERATION(mdp, &epsilon;) returns a utility function
/**
* The value iteration algorithm for calculating the utility of states.
*
* @param mdp
* an MDP with states S, actions A(s), <br>
* transition model P(s' | s, a), rewards R(s)
* @param epsilon
* the maximum error allowed in the utility of any state
* @return a vector of utilities for states in S
*/
public Map<S, Double> valueIteration(MarkovDecisionProcess<S, A> mdp,
double epsilon) {
//
// local variables: U, U', vectors of utilities for states in S,
// initially zero
Map<S, Double> U = Util.create(mdp.states(), new Double(0));
Map<S, Double> Udelta = Util.create(mdp.states(), new Double(0));
// &delta; the maximum change in the utility of any state in an
// iteration
double delta = 0;
// Note: Just calculate this once for efficiency purposes:
// &epsilon;(1 - &gamma;)/&gamma;
double minDelta = epsilon * (1 - gamma) / gamma;
// repeat
do {
// U <- U'; &delta; <- 0
U.putAll(Udelta);
delta = 0;
// for each state s in S do
for (S s : mdp.states()) {
// max<sub>a &isin; A(s)</sub>
Set<A> actions = mdp.actions(s);
// Handle terminal states (i.e. no actions).
double aMax = 0;
if (actions.size() > 0) {
aMax = Double.NEGATIVE_INFINITY;
}
for (A a : actions) {
// &Sigma;<sub>s'</sub>P(s' | s, a) U[s']
double aSum = 0;
for (S sDelta : mdp.states()) {
aSum += mdp.transitionProbability(sDelta, s, a)
* U.get(sDelta);
}
if (aSum > aMax) {
aMax = aSum;
}
}
// U'[s] <- R(s) + &gamma;
// max<sub>a &isin; A(s)</sub>
Udelta.put(s, mdp.reward(s) + gamma * aMax);
// if |U'[s] - U[s]| > &delta; then &delta; <- |U'[s] - U[s]|
double aDiff = Math.abs(Udelta.get(s) - U.get(s));
if (aDiff > delta) {
delta = aDiff;
}
}
// until &delta; < &epsilon;(1 - &gamma;)/&gamma;
} while (delta > minDelta);
// return U
return U;
}
}

View File

@@ -0,0 +1,240 @@
package aima.core.util;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Hashtable;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
/**
* @author Ravi Mohan
*
*/
public class Util {
public static final String NO = "No";
public static final String YES = "Yes";
//
private static Random _r = new Random();
/**
* Get the first element from a list.
*
* @param l
* the list the first element is to be extracted from.
* @return the first element of the passed in list.
*/
public static <T> T first(List<T> l) {
return l.get(0);
}
/**
* Get a sublist of all of the elements in the list except for first.
*
* @param l
* the list the rest of the elements are to be extracted from.
* @return a list of all of the elements in the passed in list except for
* the first element.
*/
public static <T> List<T> rest(List<T> l) {
return l.subList(1, l.size());
}
/**
* Create a Map<K, V> with the passed in keys having their values
* initialized to the passed in value.
*
* @param keys
* the keys for the newly constructed map.
* @param value
* the value to be associated with each of the maps keys.
* @return a map with the passed in keys initialized to value.
*/
public static <K, V> Map<K, V> create(Collection<K> keys, V value) {
Map<K, V> map = new LinkedHashMap<K, V>();
for (K k : keys) {
map.put(k, value);
}
return map;
}
/**
* Randomly select an element from a list.
*
* @param <T>
* the type of element to be returned from the list l.
* @param l
* a list of type T from which an element is to be selected
* randomly.
* @return a randomly selected element from l.
*/
public static <T> T selectRandomlyFromList(List<T> l) {
return l.get(_r.nextInt(l.size()));
}
public static boolean randomBoolean() {
int trueOrFalse = _r.nextInt(2);
return (!(trueOrFalse == 0));
}
public static double[] normalize(double[] probDist) {
int len = probDist.length;
double total = 0.0;
for (double d : probDist) {
total = total + d;
}
double[] normalized = new double[len];
if (total != 0) {
for (int i = 0; i < len; i++) {
normalized[i] = probDist[i] / total;
}
}
return normalized;
}
public static List<Double> normalize(List<Double> values) {
double[] valuesAsArray = new double[values.size()];
for (int i = 0; i < valuesAsArray.length; i++) {
valuesAsArray[i] = values.get(i);
}
double[] normalized = normalize(valuesAsArray);
List<Double> results = new ArrayList<Double>();
for (int i = 0; i < normalized.length; i++) {
results.add(normalized[i]);
}
return results;
}
public static int min(int i, int j) {
return (i > j ? j : i);
}
public static int max(int i, int j) {
return (i < j ? j : i);
}
public static int max(int i, int j, int k) {
return max(max(i, j), k);
}
public static int min(int i, int j, int k) {
return min(min(i, j), k);
}
public static <T> T mode(List<T> l) {
Hashtable<T, Integer> hash = new Hashtable<T, Integer>();
for (T obj : l) {
if (hash.containsKey(obj)) {
hash.put(obj, hash.get(obj).intValue() + 1);
} else {
hash.put(obj, 1);
}
}
T maxkey = hash.keySet().iterator().next();
for (T key : hash.keySet()) {
if (hash.get(key) > hash.get(maxkey)) {
maxkey = key;
}
}
return maxkey;
}
public static String[] yesno() {
return new String[] { YES, NO };
}
public static double log2(double d) {
return Math.log(d) / Math.log(2);
}
public static double information(double[] probabilities) {
double total = 0.0;
for (double d : probabilities) {
total += (-1.0 * log2(d) * d);
}
return total;
}
public static <T> List<T> removeFrom(List<T> list, T member) {
List<T> newList = new ArrayList<T>(list);
newList.remove(member);
return newList;
}
public static <T extends Number> double sumOfSquares(List<T> list) {
double accum = 0;
for (T item : list) {
accum = accum + (item.doubleValue() * item.doubleValue());
}
return accum;
}
public static String ntimes(String s, int n) {
StringBuffer buf = new StringBuffer();
for (int i = 0; i < n; i++) {
buf.append(s);
}
return buf.toString();
}
public static void checkForNanOrInfinity(double d) {
if (Double.isNaN(d)) {
throw new RuntimeException("Not a Number");
}
if (Double.isInfinite(d)) {
throw new RuntimeException("Infinite Number");
}
}
public static int randomNumberBetween(int i, int j) {
/* i,j bothinclusive */
return _r.nextInt(j - i + 1) + i;
}
public static double calculateMean(List<Double> lst) {
Double sum = 0.0;
for (Double d : lst) {
sum = sum + d.doubleValue();
}
return sum / lst.size();
}
public static double calculateStDev(List<Double> values, double mean) {
int listSize = values.size();
Double sumOfDiffSquared = 0.0;
for (Double value : values) {
double diffFromMean = value - mean;
sumOfDiffSquared += ((diffFromMean * diffFromMean) / (listSize - 1));
// division moved here to avoid sum becoming too big if this
// doesn't work use incremental formulation
}
double variance = sumOfDiffSquared;
// (listSize - 1);
// assumes at least 2 members in list.
return Math.sqrt(variance);
}
public static List<Double> normalizeFromMeanAndStdev(List<Double> values,
double mean, double stdev) {
List<Double> normalized = new ArrayList<Double>();
for (Double d : values) {
normalized.add((d - mean) / stdev);
}
return normalized;
}
public static double generateRandomDoubleBetween(double lowerLimit,
double upperLimit) {
return lowerLimit + ((upperLimit - lowerLimit) * _r.nextDouble());
}
}

View File

@@ -0,0 +1,111 @@
package model.comPlayer;
import model.Board;
import model.BoardScorer;
import model.Move;
import model.comPlayer.generator.AlphaBetaMoveGenerator;
import model.comPlayer.generator.MonteCarloMoveGenerator;
import model.comPlayer.generator.MoveGenerator;
import model.playerModel.PlayerModel;
import aima.core.environment.gridworld.GridCell;
import aima.core.environment.gridworld.GridWorld;
import aima.core.environment.gridworld.GridWorldAction;
import aima.core.environment.gridworld.GridWorldFactory;
import aima.core.probability.example.MDPFactory;
import aima.core.probability.mdp.MarkovDecisionProcess;
import aima.core.probability.mdp.Policy;
import aima.core.probability.mdp.PolicyEvaluation;
import aima.core.probability.mdp.impl.ModifiedPolicyEvaluation;
import aima.core.probability.mdp.search.PolicyIteration;
public class AdaptiveComPlayer implements Player {
private final MoveGenerator abMoveGenerator = new AlphaBetaMoveGenerator();
private final MoveGenerator mcMoveGenerator = new MonteCarloMoveGenerator();
private BoardScorer boardScorer = new BoardScorer();
private boolean calculatePolicy = true;
private GridWorld<Double> gw = null;
private MarkovDecisionProcess<GridCell<Double>, GridWorldAction> mdp = null;
private Policy<GridCell<Double>, GridWorldAction> policy = null;
private PolicyIteration<GridCell<Double>, GridWorldAction> pi = null;
@Override
public void denyMove() {
throw new UnsupportedOperationException("Not implemented");
}
@Override
public Move getMove(Board board, PlayerModel player) {
if (calculatePolicy) {
System.out.println("Calculating policy for PlayerModel: " + player);
// take 10 turns to place 6 tiles
double defaultPenalty = -0.25;
int maxScore = player.getTargetScore().getTargetScore();
int maxTiles = Board.NUM_COLS * Board.NUM_ROWS;
gw = GridWorldFactory.createGridWorldForTileGame(maxTiles,
maxScore, defaultPenalty);
mdp = MDPFactory.createMDPForTileGame(gw, maxTiles, maxScore);
// gamma = 1.0
PolicyEvaluation<GridCell<Double>, GridWorldAction> pe = new ModifiedPolicyEvaluation<GridCell<Double>, GridWorldAction>(
50, 0.9);
pi = new PolicyIteration<GridCell<Double>, GridWorldAction>(pe);
policy = pi.policyIteration(mdp);
System.out.println("Optimum policy calculated.");
for (int j = maxScore; j >= 1; j--) {
StringBuilder sb = new StringBuilder();
for (int i = 1; i <= maxTiles; i++) {
sb.append(policy.action(gw.getCellAt(i, j)));
sb.append(" ");
}
System.out.println(sb.toString());
}
calculatePolicy = false;
} else {
System.out.println("Using pre-calculated policy");
}
GridCell<Double> state = getState(board);
GridWorldAction action = policy.action(state);
if (action == null || state == null) {
System.out.println("Board state outside of parameters of MDP. Reverting to failsafe behavior.");
action = GridWorldAction.RandomMove;
}
System.out.println("Performing action " + action + " at state " + state + " per policy.");
switch (action) {
case AddTile:
//System.out.println("Performing action #" + GridWorldAction.AddTile.ordinal());
return abMoveGenerator.genMove(board, false);
case CaptureThree:
//System.out.println("Performing action #" + GridWorldAction.CaptureThree.ordinal());
return mcMoveGenerator.genMove(board, false);
case RandomMove:
//System.out.println("Performing action #" + GridWorldAction.None.ordinal());
return mcMoveGenerator.genMove(board, false);
default:
//System.out.println("Performing failsafe action");
return mcMoveGenerator.genMove(board, false);
}
}
private GridCell<Double> getState(Board board) {
return gw.getCellAt(board.getTurn(), boardScorer.getScore(board));
}
@Override
public boolean isReady() {
return true; // always ready to play a random valid move
}
@Override
public String toString() {
return "Alpha-Beta ComPlayer";
}
}

18
src/model/mdp/Action.java Normal file
View File

@@ -0,0 +1,18 @@
package model.mdp;
public class Action {
public static Action playToWin = new Action("PlayToWin");
public static Action playToLose = new Action("PlayToLose");
//public static Action maintainScore = new Action();
private final String name;
public Action(String name) {
this.name = name;
}
@Override
public String toString() {
return name;
}
}

51
src/model/mdp/MDP.java Normal file
View File

@@ -0,0 +1,51 @@
package model.mdp;
public class MDP {
public static final double nonTerminalReward = -0.25;
public enum MODE {
CEIL, FLOOR
}
private final int maxScore;
private final int maxTiles;
private final MODE mode;
public MDP(int maxScore, int maxTiles, MODE mode) {
this.maxScore = maxScore;
this.maxTiles = maxTiles;
this.mode = mode;
}
public Action[] getActions(int i, int j) {
if (i == maxScore) {
return new Action[0];
}
if (j == maxTiles) {
return new Action[0];
}
return new Action[]{Action.playToLose,Action.playToWin};
}
public int getMaxScore() {
return maxScore;
}
public int getMaxTiles() {
return maxTiles;
}
public double getReward(int score, int tiles) {
if (score == maxScore && tiles == maxTiles) {
return 10.0;
}
// TODO scale linearly?
if (score == maxScore) {
return -1.0;
}
if (tiles == maxTiles) {
return -5.0;
}
return nonTerminalReward;
}
}

View File

@@ -0,0 +1,5 @@
package model.mdp;
public interface MDPSolver {
Policy solve(MDP mdp);
}

View File

@@ -0,0 +1,7 @@
package model.mdp;
import java.util.ArrayList;
public class Policy extends ArrayList<Action>{
}

View File

@@ -0,0 +1,34 @@
package model.mdp;
public class Transition {
private double prob;
private int scoreChange;
private int tileCountChange;
public Transition(double prob, int scoreChange, int tileCountChange) {
super();
this.prob = prob;
this.scoreChange = scoreChange;
this.tileCountChange = tileCountChange;
}
public double getProb() {
return prob;
}
public void setProb(double prob) {
this.prob = prob;
}
public int getScoreChange() {
return scoreChange;
}
public void setScoreChange(int scoreChange) {
this.scoreChange = scoreChange;
}
public int getTileCountChange() {
return tileCountChange;
}
public void setTileCountChange(int tileCountChange) {
this.tileCountChange = tileCountChange;
}
}

View File

@@ -0,0 +1,110 @@
package model.mdp;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.List;
public class ValueIterationSolver implements MDPSolver {
public int maxIterations = 10;
public final double DEFAULT_EPS = 0.1;
public final double GAMMA = 0.9; //discount
private DecimalFormat fmt = new DecimalFormat("##.00");
public Policy solve(MDP mdp) {
Policy policy = new Policy();
double[][] utility = new double[mdp.getMaxScore()+1][mdp.getMaxTiles()+1];
double[][] utilityPrime = new double[mdp.getMaxScore()+1][mdp.getMaxTiles()+1];
for (int i = 0; i <= mdp.getMaxScore(); i++) {
//StringBuilder sb = new StringBuilder();
for (int j = 0; j <= mdp.getMaxTiles(); j++) {
utilityPrime[i][j] = mdp.getReward(i, j);
//sb.append(fmt.format(utility[i][j]));
//sb.append(" ");
}
//System.out.println(sb);
}
converged:
for (int iteration = 0; iteration < maxIterations; iteration++) {
for (int i = 0; i <= mdp.getMaxScore(); i++) {
for (int j = 0; j <= mdp.getMaxTiles(); j++) {
utility[i][j] = utilityPrime[i][j];
}
}
for (int i = 0; i <= mdp.getMaxScore(); i++) {
for (int j = 0; j <= mdp.getMaxTiles(); j++) {
Action[] actions = mdp.getActions(i,j);
double aMax;
if (actions.length > 0) {
aMax = Double.NEGATIVE_INFINITY;
} else {
aMax = 0;
}
for (Action action : actions){
List<Transition> transitions = getTransitions(action,mdp,i,j);
double aSum = 0.0;
for (Transition transition : transitions) {
int transI = transition.getScoreChange();
int transJ = transition.getTileCountChange();
if (i+transI >= 0 && i+transI <= mdp.getMaxScore()
&& j+transJ >= 0 && j+transJ <= mdp.getMaxTiles())
aSum += utility[i+transI][j+transJ];
}
if (aSum > aMax) {
aMax = aSum;
}
}
utilityPrime[i][j] = mdp.getReward(i,j) + GAMMA * aMax;
}
}
double maxDiff = getMaxDiff(utility,utilityPrime);
System.out.println("Max diff |U - U'| = " + maxDiff);
if (maxDiff < DEFAULT_EPS) {
System.out.println("Solution to MDP converged: " + maxDiff);
break converged;
}
}
for (int i = 0; i < utility.length; i++) {
StringBuilder sb = new StringBuilder();
for (int j = 0; j < utility[i].length; j++) {
sb.append(fmt.format(utility[i][j]));
sb.append(" ");
}
System.out.println(sb);
}
//utility is now the utility Matrix
//get the policy
return policy;
}
double getMaxDiff(double[][]u, double[][]uPrime) {
double maxDiff = 0;
for (int i = 0; i < u.length; i++) {
for (int j = 0; j < u[i].length; j++) {
maxDiff = Math.max(maxDiff,Math.abs(u[i][j] - uPrime[i][j]));
}
}
return maxDiff;
}
private List<Transition> getTransitions(Action action, MDP mdp, int score, int tiles) {
List<Transition> transitions = new ArrayList<Transition>();
if (Action.playToWin == action) {
transitions.add(new Transition(0.9,1,1));
transitions.add(new Transition(0.1,1,-3));
} else if (Action.playToLose == action) {
transitions.add(new Transition(0.9,1,1));
transitions.add(new Transition(0.1,1,-3));
} /*else if (Action.maintainScore == action) {
transitions.add(new Transition(0.5,1,1));
transitions.add(new Transition(0.5,1,-3));
}*/
return transitions;
}
}

View File

@@ -1,5 +1,6 @@
package view; package view;
import model.comPlayer.AdaptiveComPlayer;
import model.comPlayer.AlphaBetaComPlayer; import model.comPlayer.AlphaBetaComPlayer;
import model.comPlayer.MinimaxComPlayer; import model.comPlayer.MinimaxComPlayer;
import model.comPlayer.MonteCarloComPlayer; import model.comPlayer.MonteCarloComPlayer;
@@ -7,16 +8,19 @@ import model.comPlayer.Player;
import model.comPlayer.RandomComPlayer; import model.comPlayer.RandomComPlayer;
public class ParsedArgs { public class ParsedArgs {
public static final String COM_RANDOM = "RANDOM"; public static final String COM_ADAPTIVE = "ADAPTIVE";
public static final String COM_MINIMAX = "MINIMAX";
public static final String COM_ALPHABETA = "ALPHABETA"; public static final String COM_ALPHABETA = "ALPHABETA";
public static final String COM_MINIMAX = "MINIMAX";
public static final String COM_MONTECARLO = "MONTECARLO"; public static final String COM_MONTECARLO = "MONTECARLO";
public static final String COM_RANDOM = "RANDOM";
public static final String COM_DEFAULT = COM_ALPHABETA; public static final String COM_DEFAULT = COM_ALPHABETA;
private String comPlayer = COM_DEFAULT; private String comPlayer = COM_DEFAULT;
public Player getComPlayer() { public Player getComPlayer() {
if (COM_RANDOM.equalsIgnoreCase(comPlayer)) { if (COM_ADAPTIVE.equalsIgnoreCase(comPlayer)) {
return new AdaptiveComPlayer();
} else if (COM_RANDOM.equalsIgnoreCase(comPlayer)) {
return new RandomComPlayer(); return new RandomComPlayer();
} else if (COM_MINIMAX.equalsIgnoreCase(comPlayer)) { } else if (COM_MINIMAX.equalsIgnoreCase(comPlayer)) {
return new MinimaxComPlayer(); return new MinimaxComPlayer();

View File

@@ -0,0 +1,98 @@
package aima.core.probability.mdp;
import junit.framework.Assert;
import org.junit.Before;
import org.junit.Test;
import aima.core.environment.cellworld.Cell;
import aima.core.environment.cellworld.CellWorld;
import aima.core.environment.cellworld.CellWorldAction;
import aima.core.environment.cellworld.CellWorldFactory;
import aima.core.probability.example.MDPFactory;
import aima.core.probability.mdp.MarkovDecisionProcess;
/**
*
* @author Ciaran O'Reilly
* @author Ravi Mohan
*
*/
public class MarkovDecisionProcessTest {
public static final double DELTA_THRESHOLD = 1e-3;
private CellWorld<Double> cw = null;
private MarkovDecisionProcess<Cell<Double>, CellWorldAction> mdp = null;
@Before
public void setUp() {
cw = CellWorldFactory.createCellWorldForFig17_1();
mdp = MDPFactory.createMDPForFigure17_3(cw);
}
@Test
public void testActions() {
// Ensure all actions can be performed in each cell
// except for the terminal states.
for (Cell<Double> s : cw.getCells()) {
if (4 == s.getX() && (3 == s.getY() || 2 == s.getY())) {
Assert.assertEquals(0, mdp.actions(s).size());
} else {
Assert.assertEquals(5, mdp.actions(s).size());
}
}
}
@Test
public void testMDPTransitionModel() {
Assert.assertEquals(0.8, mdp.transitionProbability(cw.getCellAt(1, 2),
cw.getCellAt(1, 1), CellWorldAction.Up), DELTA_THRESHOLD);
Assert.assertEquals(0.1, mdp.transitionProbability(cw.getCellAt(1, 1),
cw.getCellAt(1, 1), CellWorldAction.Up), DELTA_THRESHOLD);
Assert.assertEquals(0.1, mdp.transitionProbability(cw.getCellAt(2, 1),
cw.getCellAt(1, 1), CellWorldAction.Up), DELTA_THRESHOLD);
Assert.assertEquals(0.0, mdp.transitionProbability(cw.getCellAt(1, 3),
cw.getCellAt(1, 1), CellWorldAction.Up), DELTA_THRESHOLD);
Assert.assertEquals(0.9, mdp.transitionProbability(cw.getCellAt(1, 1),
cw.getCellAt(1, 1), CellWorldAction.Down), DELTA_THRESHOLD);
Assert.assertEquals(0.1, mdp.transitionProbability(cw.getCellAt(2, 1),
cw.getCellAt(1, 1), CellWorldAction.Down), DELTA_THRESHOLD);
Assert.assertEquals(0.0, mdp.transitionProbability(cw.getCellAt(3, 1),
cw.getCellAt(1, 1), CellWorldAction.Down), DELTA_THRESHOLD);
Assert.assertEquals(0.0, mdp.transitionProbability(cw.getCellAt(1, 2),
cw.getCellAt(1, 1), CellWorldAction.Down), DELTA_THRESHOLD);
Assert.assertEquals(0.9, mdp.transitionProbability(cw.getCellAt(1, 1),
cw.getCellAt(1, 1), CellWorldAction.Left), DELTA_THRESHOLD);
Assert.assertEquals(0.0, mdp.transitionProbability(cw.getCellAt(2, 1),
cw.getCellAt(1, 1), CellWorldAction.Left), DELTA_THRESHOLD);
Assert.assertEquals(0.0, mdp.transitionProbability(cw.getCellAt(3, 1),
cw.getCellAt(1, 1), CellWorldAction.Left), DELTA_THRESHOLD);
Assert.assertEquals(0.1, mdp.transitionProbability(cw.getCellAt(1, 2),
cw.getCellAt(1, 1), CellWorldAction.Left), DELTA_THRESHOLD);
Assert.assertEquals(0.8, mdp.transitionProbability(cw.getCellAt(2, 1),
cw.getCellAt(1, 1), CellWorldAction.Right), DELTA_THRESHOLD);
Assert.assertEquals(0.1, mdp.transitionProbability(cw.getCellAt(1, 1),
cw.getCellAt(1, 1), CellWorldAction.Right), DELTA_THRESHOLD);
Assert.assertEquals(0.1, mdp.transitionProbability(cw.getCellAt(1, 2),
cw.getCellAt(1, 1), CellWorldAction.Right), DELTA_THRESHOLD);
Assert.assertEquals(0.0, mdp.transitionProbability(cw.getCellAt(1, 3),
cw.getCellAt(1, 1), CellWorldAction.Right), DELTA_THRESHOLD);
}
@Test
public void testRewardFunction() {
// Ensure all actions can be performed in each cell.
for (Cell<Double> s : cw.getCells()) {
if (4 == s.getX() && 3 == s.getY()) {
Assert.assertEquals(1.0, mdp.reward(s), DELTA_THRESHOLD);
} else if (4 == s.getX() && 2 == s.getY()) {
Assert.assertEquals(-1.0, mdp.reward(s), DELTA_THRESHOLD);
} else {
Assert.assertEquals(-0.04, mdp.reward(s), DELTA_THRESHOLD);
}
}
}
}

View File

@@ -0,0 +1,80 @@
package aima.core.probability.mdp;
import java.util.Map;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import aima.core.environment.cellworld.Cell;
import aima.core.environment.cellworld.CellWorld;
import aima.core.environment.cellworld.CellWorldAction;
import aima.core.environment.cellworld.CellWorldFactory;
import aima.core.environment.gridworld.GridCell;
import aima.core.environment.gridworld.GridWorld;
import aima.core.environment.gridworld.GridWorldAction;
import aima.core.environment.gridworld.GridWorldFactory;
import aima.core.probability.example.MDPFactory;
import aima.core.probability.mdp.MarkovDecisionProcess;
import aima.core.probability.mdp.impl.ModifiedPolicyEvaluation;
import aima.core.probability.mdp.search.PolicyIteration;
import aima.core.probability.mdp.search.ValueIteration;
/**
* @author Ravi Mohan
* @author Ciaran O'Reilly
*
*/
public class PolicyIterationTest {
public static final double DELTA_THRESHOLD = 1e-3;
private GridWorld<Double> gw = null;
private MarkovDecisionProcess<GridCell<Double>, GridWorldAction> mdp = null;
private PolicyIteration<GridCell<Double>, GridWorldAction> pi = null;
final int maxTiles = 6;
final int maxScore = 10;
@Before
public void setUp() {
//take 10 turns to place 6 tiles
double defaultPenalty = -0.04;
gw = GridWorldFactory.createGridWorldForTileGame(maxTiles,maxScore,defaultPenalty);
mdp = MDPFactory.createMDPForTileGame(gw, maxTiles, maxScore);
//gamma = 1.0
PolicyEvaluation<GridCell<Double>,GridWorldAction> pe = new ModifiedPolicyEvaluation<GridCell<Double>, GridWorldAction>(100,0.9);
pi = new PolicyIteration<GridCell<Double>, GridWorldAction>(pe);
}
@Test
public void testPolicyIterationForTileGame() {
Policy<GridCell<Double>, GridWorldAction> policy = pi.policyIteration(mdp);
for (int j = maxScore; j >= 1; j--) {
StringBuilder sb = new StringBuilder();
for (int i = 1; i <= maxTiles; i++) {
sb.append(policy.action(gw.getCellAt(i, j)));
sb.append(" ");
}
System.out.println(sb.toString());
}
//Assert.assertEquals(0.705, U.get(gw.getCellAt(1, 1)), DELTA_THRESHOLD);
/*
Assert.assertEquals(0.762, U.get(cw1.getCellAt(1, 2)), DELTA_THRESHOLD);
Assert.assertEquals(0.812, U.get(cw1.getCellAt(1, 3)), DELTA_THRESHOLD);
Assert.assertEquals(0.655, U.get(cw1.getCellAt(2, 1)), DELTA_THRESHOLD);
Assert.assertEquals(0.868, U.get(cw1.getCellAt(2, 3)), DELTA_THRESHOLD);
Assert.assertEquals(0.611, U.get(cw1.getCellAt(3, 1)), DELTA_THRESHOLD);
Assert.assertEquals(0.660, U.get(cw1.getCellAt(3, 2)), DELTA_THRESHOLD);
Assert.assertEquals(0.918, U.get(cw1.getCellAt(3, 3)), DELTA_THRESHOLD);
Assert.assertEquals(0.388, U.get(cw1.getCellAt(4, 1)), DELTA_THRESHOLD);
Assert.assertEquals(-1.0, U.get(cw1.getCellAt(4, 2)), DELTA_THRESHOLD);
Assert.assertEquals(1.0, U.get(cw1.getCellAt(4, 3)), DELTA_THRESHOLD);*/
}
}

View File

@@ -0,0 +1,64 @@
package aima.core.probability.mdp;
import java.util.Map;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import aima.core.environment.cellworld.Cell;
import aima.core.environment.cellworld.CellWorld;
import aima.core.environment.cellworld.CellWorldAction;
import aima.core.environment.cellworld.CellWorldFactory;
import aima.core.probability.example.MDPFactory;
import aima.core.probability.mdp.MarkovDecisionProcess;
import aima.core.probability.mdp.search.ValueIteration;
/**
* @author Ravi Mohan
* @author Ciaran O'Reilly
*
*/
public class ValueIterationTest {
public static final double DELTA_THRESHOLD = 1e-3;
private CellWorld<Double> cw = null;
private MarkovDecisionProcess<Cell<Double>, CellWorldAction> mdp = null;
private ValueIteration<Cell<Double>, CellWorldAction> vi = null;
@Before
public void setUp() {
cw = CellWorldFactory.createCellWorldForFig17_1();
mdp = MDPFactory.createMDPForFigure17_3(cw);
vi = new ValueIteration<Cell<Double>, CellWorldAction>(1.0);
}
@Test
public void testValueIterationForFig17_3() {
Map<Cell<Double>, Double> U = vi.valueIteration(mdp, 0.0001);
Assert.assertEquals(0.705, U.get(cw.getCellAt(1, 1)), DELTA_THRESHOLD);
Assert.assertEquals(0.762, U.get(cw.getCellAt(1, 2)), DELTA_THRESHOLD);
Assert.assertEquals(0.812, U.get(cw.getCellAt(1, 3)), DELTA_THRESHOLD);
Assert.assertEquals(0.655, U.get(cw.getCellAt(2, 1)), DELTA_THRESHOLD);
Assert.assertEquals(0.868, U.get(cw.getCellAt(2, 3)), DELTA_THRESHOLD);
Assert.assertEquals(0.611, U.get(cw.getCellAt(3, 1)), DELTA_THRESHOLD);
Assert.assertEquals(0.660, U.get(cw.getCellAt(3, 2)), DELTA_THRESHOLD);
Assert.assertEquals(0.918, U.get(cw.getCellAt(3, 3)), DELTA_THRESHOLD);
Assert.assertEquals(0.388, U.get(cw.getCellAt(4, 1)), DELTA_THRESHOLD);
Assert.assertEquals(-1.0, U.get(cw.getCellAt(4, 2)), DELTA_THRESHOLD);
Assert.assertEquals(1.0, U.get(cw.getCellAt(4, 3)), DELTA_THRESHOLD);
for (int j = 3; j >= 1; j--) {
StringBuilder sb = new StringBuilder();
for (int i = 1; i <= 4; i++) {
sb.append(U.get(cw.getCellAt(i, j)));
sb.append(" ");
}
System.out.println(sb.toString());
}
}
}

View File

@@ -0,0 +1,76 @@
package aima.core.probability.mdp;
import java.util.Map;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import aima.core.environment.cellworld.Cell;
import aima.core.environment.cellworld.CellWorld;
import aima.core.environment.cellworld.CellWorldAction;
import aima.core.environment.cellworld.CellWorldFactory;
import aima.core.environment.gridworld.GridCell;
import aima.core.environment.gridworld.GridWorld;
import aima.core.environment.gridworld.GridWorldAction;
import aima.core.environment.gridworld.GridWorldFactory;
import aima.core.probability.example.MDPFactory;
import aima.core.probability.mdp.MarkovDecisionProcess;
import aima.core.probability.mdp.search.ValueIteration;
/**
* @author Ravi Mohan
* @author Ciaran O'Reilly
*
*/
public class ValueIterationTest2 {
public static final double DELTA_THRESHOLD = 1e-3;
private GridWorld<Double> gw = null;
private MarkovDecisionProcess<GridCell<Double>, GridWorldAction> mdp = null;
private ValueIteration<GridCell<Double>, GridWorldAction> vi = null;
final int maxTiles = 6;
final int maxScore = 10;
@Before
public void setUp() {
//take 10 turns to place 6 tiles
double defaultPenalty = -0.04;
gw = GridWorldFactory.createGridWorldForTileGame(maxTiles,maxScore,defaultPenalty);
mdp = MDPFactory.createMDPForTileGame(gw, maxTiles, maxScore);
//gamma = 1.0
vi = new ValueIteration<GridCell<Double>, GridWorldAction>(0.9);
}
@Test
public void testValueIterationForTileGame() {
Map<GridCell<Double>, Double> U = vi.valueIteration(mdp, 1.0);
for (int j = maxScore; j >= 1; j--) {
StringBuilder sb = new StringBuilder();
for (int i = 1; i <= maxTiles; i++) {
sb.append(U.get(gw.getCellAt(i, j)));
sb.append(" ");
}
System.out.println(sb.toString());
}
Assert.assertEquals(0.705, U.get(gw.getCellAt(1, 1)), DELTA_THRESHOLD);/*
Assert.assertEquals(0.762, U.get(cw1.getCellAt(1, 2)), DELTA_THRESHOLD);
Assert.assertEquals(0.812, U.get(cw1.getCellAt(1, 3)), DELTA_THRESHOLD);
Assert.assertEquals(0.655, U.get(cw1.getCellAt(2, 1)), DELTA_THRESHOLD);
Assert.assertEquals(0.868, U.get(cw1.getCellAt(2, 3)), DELTA_THRESHOLD);
Assert.assertEquals(0.611, U.get(cw1.getCellAt(3, 1)), DELTA_THRESHOLD);
Assert.assertEquals(0.660, U.get(cw1.getCellAt(3, 2)), DELTA_THRESHOLD);
Assert.assertEquals(0.918, U.get(cw1.getCellAt(3, 3)), DELTA_THRESHOLD);
Assert.assertEquals(0.388, U.get(cw1.getCellAt(4, 1)), DELTA_THRESHOLD);
Assert.assertEquals(-1.0, U.get(cw1.getCellAt(4, 2)), DELTA_THRESHOLD);
Assert.assertEquals(1.0, U.get(cw1.getCellAt(4, 3)), DELTA_THRESHOLD);*/
}
}

View File

@@ -0,0 +1,26 @@
package model.mdp;
import static org.junit.Assert.assertTrue;
import model.mdp.MDP.MODE;
import org.junit.Test;
public class ValueIterationSolverTest {
@Test
public void testSolve() {
MDPSolver solver = new ValueIterationSolver();
//solve for a score of 25 in at most 35 turns
int maxScore = 6;
int maxTurns = 10;
MDP mdp = new MDP(maxScore,maxTurns,MODE.CEIL);
Policy policy = solver.solve(mdp);
assertTrue(policy.size() >= maxScore);
assertTrue(policy.size() <= maxTurns);
System.out.println("Policy: " + policy);
}
}