Implemented agent which chooses to play winning, losing or random moves by solving a simplified MDP model of the game using policy iteration.
Portions of MDP/solver code by Ciaran O'Reilly and Ravi Mohan used under MIT license.
This commit is contained in:
19
src/aima/core/agent/Action.java
Normal file
19
src/aima/core/agent/Action.java
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
package aima.core.agent;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Describes an Action that can or has been taken by an Agent via one of its
|
||||||
|
* Actuators.
|
||||||
|
*
|
||||||
|
* @author Ciaran O'Reilly
|
||||||
|
*/
|
||||||
|
public interface Action {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Indicates whether or not this Action is a 'No Operation'.<br>
|
||||||
|
* Note: AIMA3e - NoOp, or no operation, is the name of an assembly language
|
||||||
|
* instruction that does nothing.
|
||||||
|
*
|
||||||
|
* @return true if this is a NoOp Action.
|
||||||
|
*/
|
||||||
|
//boolean isNoOp();
|
||||||
|
}
|
||||||
87
src/aima/core/environment/cellworld/Cell.java
Normal file
87
src/aima/core/environment/cellworld/Cell.java
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
package aima.core.environment.cellworld;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Artificial Intelligence A Modern Approach (3rd Edition): page 645.<br>
|
||||||
|
* <br>
|
||||||
|
* A representation of a Cell in the environment detailed in Figure 17.1.
|
||||||
|
*
|
||||||
|
* @param <C>
|
||||||
|
* the content type of the cell.
|
||||||
|
*
|
||||||
|
* @author Ciaran O'Reilly
|
||||||
|
* @author Ravi Mohan
|
||||||
|
*/
|
||||||
|
public class Cell<C> {
|
||||||
|
private int x = 1;
|
||||||
|
private int y = 1;
|
||||||
|
private C content = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct a Cell.
|
||||||
|
*
|
||||||
|
* @param x
|
||||||
|
* the x position of the cell.
|
||||||
|
* @param y
|
||||||
|
* the y position of the cell.
|
||||||
|
* @param content
|
||||||
|
* the initial content of the cell.
|
||||||
|
*/
|
||||||
|
public Cell(int x, int y, C content) {
|
||||||
|
this.x = x;
|
||||||
|
this.y = y;
|
||||||
|
this.content = content;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return the x position of the cell.
|
||||||
|
*/
|
||||||
|
public int getX() {
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return the y position of the cell.
|
||||||
|
*/
|
||||||
|
public int getY() {
|
||||||
|
return y;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return the content of the cell.
|
||||||
|
*/
|
||||||
|
public C getContent() {
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the cell's content.
|
||||||
|
*
|
||||||
|
* @param content
|
||||||
|
* the content to be placed in the cell.
|
||||||
|
*/
|
||||||
|
public void setContent(C content) {
|
||||||
|
this.content = content;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "<x=" + x + ", y=" + y + ", content=" + content + ">";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (o instanceof Cell<?>) {
|
||||||
|
Cell<?> c = (Cell<?>) o;
|
||||||
|
return x == c.x && y == c.y && content.equals(c.content);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return x + 23 + y + 31 * content.hashCode();
|
||||||
|
}
|
||||||
|
}
|
||||||
123
src/aima/core/environment/cellworld/CellWorld.java
Normal file
123
src/aima/core/environment/cellworld/CellWorld.java
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
package aima.core.environment.cellworld;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.LinkedHashSet;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Artificial Intelligence A Modern Approach (3rd Edition): page 645.<br>
|
||||||
|
* <br>
|
||||||
|
*
|
||||||
|
* A representation for the environment depicted in figure 17.1.<br>
|
||||||
|
* <br>
|
||||||
|
* <b>Note:<b> the x and y coordinates are always positive integers starting at
|
||||||
|
* 1.<br>
|
||||||
|
* <b>Note:<b> If looking at a rectangle - the coordinate (x=1, y=1) will be the
|
||||||
|
* bottom left hand corner.<br>
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* @param <C>
|
||||||
|
* the type of content for the Cells in the world.
|
||||||
|
*
|
||||||
|
* @author Ciaran O'Reilly
|
||||||
|
* @author Ravi Mohan
|
||||||
|
*/
|
||||||
|
public class CellWorld<C> {
|
||||||
|
private Set<Cell<C>> cells = new LinkedHashSet<Cell<C>>();
|
||||||
|
private Map<Integer, Map<Integer, Cell<C>>> cellLookup = new HashMap<Integer, Map<Integer, Cell<C>>>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct a Cell World with size xDimension * y Dimension cells, all with
|
||||||
|
* their values set to a default content value.
|
||||||
|
*
|
||||||
|
* @param xDimension
|
||||||
|
* the size of the x dimension.
|
||||||
|
* @param yDimension
|
||||||
|
* the size of the y dimension.
|
||||||
|
*
|
||||||
|
* @param defaultCellContent
|
||||||
|
* the default content to assign to each cell created.
|
||||||
|
*/
|
||||||
|
public CellWorld(int xDimension, int yDimension, C defaultCellContent) {
|
||||||
|
for (int x = 1; x <= xDimension; x++) {
|
||||||
|
Map<Integer, Cell<C>> xCol = new HashMap<Integer, Cell<C>>();
|
||||||
|
for (int y = 1; y <= yDimension; y++) {
|
||||||
|
Cell<C> c = new Cell<C>(x, y, defaultCellContent);
|
||||||
|
cells.add(c);
|
||||||
|
xCol.put(y, c);
|
||||||
|
}
|
||||||
|
cellLookup.put(x, xCol);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return all the cells in this world.
|
||||||
|
*/
|
||||||
|
public Set<Cell<C>> getCells() {
|
||||||
|
return cells;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determine what cell would be moved into if the specified action is
|
||||||
|
* performed in the specified cell. Normally, this will be the cell adjacent
|
||||||
|
* in the appropriate direction. However, if there is no cell in the
|
||||||
|
* adjacent direction of the action then the outcome of the action is to
|
||||||
|
* stay in the same cell as the action was performed in.
|
||||||
|
*
|
||||||
|
* @param s
|
||||||
|
* the cell location from which the action is to be performed.
|
||||||
|
* @param a
|
||||||
|
* the action to perform (Up, Down, Left, or Right).
|
||||||
|
* @return the Cell an agent would end up in if they performed the specified
|
||||||
|
* action from the specified cell location.
|
||||||
|
*/
|
||||||
|
public Cell<C> result(Cell<C> s, CellWorldAction a) {
|
||||||
|
Cell<C> sDelta = getCellAt(a.getXResult(s.getX()), a.getYResult(s
|
||||||
|
.getY()));
|
||||||
|
if (null == sDelta) {
|
||||||
|
// Default to no effect
|
||||||
|
// (i.e. bumps back in place as no adjoining cell).
|
||||||
|
sDelta = s;
|
||||||
|
}
|
||||||
|
|
||||||
|
return sDelta;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove the cell at the specified location from this Cell World. This
|
||||||
|
* allows you to introduce barriers into different location.
|
||||||
|
*
|
||||||
|
* @param x
|
||||||
|
* the x dimension of the cell to be removed.
|
||||||
|
* @param y
|
||||||
|
* the y dimension of the cell to be removed.
|
||||||
|
*/
|
||||||
|
public void removeCell(int x, int y) {
|
||||||
|
Map<Integer, Cell<C>> xCol = cellLookup.get(x);
|
||||||
|
if (null != xCol) {
|
||||||
|
cells.remove(xCol.remove(y));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the cell at the specified x and y locations.
|
||||||
|
*
|
||||||
|
* @param x
|
||||||
|
* the x dimension of the cell to be retrieved.
|
||||||
|
* @param y
|
||||||
|
* the y dimension of the cell to be retrieved.
|
||||||
|
* @return the cell at the specified x,y location, null if no cell exists at
|
||||||
|
* this location.
|
||||||
|
*/
|
||||||
|
public Cell<C> getCellAt(int x, int y) {
|
||||||
|
Cell<C> c = null;
|
||||||
|
Map<Integer, Cell<C>> xCol = cellLookup.get(x);
|
||||||
|
if (null != xCol) {
|
||||||
|
c = xCol.get(y);
|
||||||
|
}
|
||||||
|
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
}
|
||||||
142
src/aima/core/environment/cellworld/CellWorldAction.java
Normal file
142
src/aima/core/environment/cellworld/CellWorldAction.java
Normal file
@@ -0,0 +1,142 @@
|
|||||||
|
package aima.core.environment.cellworld;
|
||||||
|
|
||||||
|
import java.util.LinkedHashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import aima.core.agent.Action;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Artificial Intelligence A Modern Approach (3rd Edition): page 645.<br>
|
||||||
|
* <br>
|
||||||
|
*
|
||||||
|
* The actions in every state are Up, Down, Left, and Right.<br>
|
||||||
|
* <br>
|
||||||
|
* <b>Note:<b> Moving 'North' causes y to increase by 1, 'Down' y to decrease by
|
||||||
|
* 1, 'Left' x to decrease by 1, and 'Right' x to increase by 1 within a Cell
|
||||||
|
* World.
|
||||||
|
*
|
||||||
|
* @author Ciaran O'Reilly
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public enum CellWorldAction implements Action {
|
||||||
|
Up, Down, Left, Right, None;
|
||||||
|
|
||||||
|
private static final Set<CellWorldAction> _actions = new LinkedHashSet<CellWorldAction>();
|
||||||
|
static {
|
||||||
|
_actions.add(Up);
|
||||||
|
_actions.add(Down);
|
||||||
|
_actions.add(Left);
|
||||||
|
_actions.add(Right);
|
||||||
|
_actions.add(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return a set of the actual actions.
|
||||||
|
*/
|
||||||
|
public static final Set<CellWorldAction> actions() {
|
||||||
|
return _actions;
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// START-Action
|
||||||
|
//@Override
|
||||||
|
//public boolean isNoOp() {
|
||||||
|
// if (None == this) {
|
||||||
|
// return true;
|
||||||
|
// }
|
||||||
|
// return false;
|
||||||
|
//}
|
||||||
|
// END-Action
|
||||||
|
//
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param curX
|
||||||
|
* the current x position.
|
||||||
|
* @return the result on the x position of applying this action.
|
||||||
|
*/
|
||||||
|
public int getXResult(int curX) {
|
||||||
|
int newX = curX;
|
||||||
|
|
||||||
|
switch (this) {
|
||||||
|
case Left:
|
||||||
|
newX--;
|
||||||
|
break;
|
||||||
|
case Right:
|
||||||
|
newX++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return newX;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param curY
|
||||||
|
* the current y position.
|
||||||
|
* @return the result on the y position of applying this action.
|
||||||
|
*/
|
||||||
|
public int getYResult(int curY) {
|
||||||
|
int newY = curY;
|
||||||
|
|
||||||
|
switch (this) {
|
||||||
|
case Up:
|
||||||
|
newY++;
|
||||||
|
break;
|
||||||
|
case Down:
|
||||||
|
newY--;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return newY;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return the first right angled action related to this action.
|
||||||
|
*/
|
||||||
|
public CellWorldAction getFirstRightAngledAction() {
|
||||||
|
CellWorldAction a = null;
|
||||||
|
|
||||||
|
switch (this) {
|
||||||
|
case Up:
|
||||||
|
case Down:
|
||||||
|
a = Left;
|
||||||
|
break;
|
||||||
|
case Left:
|
||||||
|
case Right:
|
||||||
|
a = Down;
|
||||||
|
break;
|
||||||
|
case None:
|
||||||
|
a = None;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return the second right angled action related to this action.
|
||||||
|
*/
|
||||||
|
public CellWorldAction getSecondRightAngledAction() {
|
||||||
|
CellWorldAction a = null;
|
||||||
|
|
||||||
|
switch (this) {
|
||||||
|
case Up:
|
||||||
|
case Down:
|
||||||
|
a = Right;
|
||||||
|
break;
|
||||||
|
case Left:
|
||||||
|
case Right:
|
||||||
|
a = Up;
|
||||||
|
break;
|
||||||
|
case None:
|
||||||
|
a = None;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
}
|
||||||
27
src/aima/core/environment/cellworld/CellWorldFactory.java
Normal file
27
src/aima/core/environment/cellworld/CellWorldFactory.java
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
package aima.core.environment.cellworld;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @author Ciaran O'Reilly
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class CellWorldFactory {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create the cell world as defined in Figure 17.1 in AIMA3e. (a) A simple 4
|
||||||
|
* x 3 environment that presents the agent with a sequential decision
|
||||||
|
* problem.
|
||||||
|
*
|
||||||
|
* @return a cell world representation of Fig 17.1 in AIMA3e.
|
||||||
|
*/
|
||||||
|
public static CellWorld<Double> createCellWorldForFig17_1() {
|
||||||
|
CellWorld<Double> cw = new CellWorld<Double>(4, 3, -0.04);
|
||||||
|
|
||||||
|
cw.removeCell(2, 2);
|
||||||
|
|
||||||
|
cw.getCellAt(4, 3).setContent(1.0);
|
||||||
|
cw.getCellAt(4, 2).setContent(-1.0);
|
||||||
|
|
||||||
|
return cw;
|
||||||
|
}
|
||||||
|
}
|
||||||
87
src/aima/core/environment/gridworld/GridCell.java
Normal file
87
src/aima/core/environment/gridworld/GridCell.java
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
package aima.core.environment.gridworld;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Artificial Intelligence A Modern Approach (3rd Edition): page 645.<br>
|
||||||
|
* <br>
|
||||||
|
* A representation of a Cell in the environment detailed in Figure 17.1.
|
||||||
|
*
|
||||||
|
* @param <C>
|
||||||
|
* the content type of the cell.
|
||||||
|
*
|
||||||
|
* @author Ciaran O'Reilly
|
||||||
|
* @author Ravi Mohan
|
||||||
|
*/
|
||||||
|
public class GridCell<C> {
|
||||||
|
private int x = 1;
|
||||||
|
private int y = 1;
|
||||||
|
private C content = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct a Cell.
|
||||||
|
*
|
||||||
|
* @param x
|
||||||
|
* the x position of the cell.
|
||||||
|
* @param y
|
||||||
|
* the y position of the cell.
|
||||||
|
* @param content
|
||||||
|
* the initial content of the cell.
|
||||||
|
*/
|
||||||
|
public GridCell(int x, int y, C content) {
|
||||||
|
this.x = x;
|
||||||
|
this.y = y;
|
||||||
|
this.content = content;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return the x position of the cell.
|
||||||
|
*/
|
||||||
|
public int getX() {
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return the y position of the cell.
|
||||||
|
*/
|
||||||
|
public int getY() {
|
||||||
|
return y;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return the content of the cell.
|
||||||
|
*/
|
||||||
|
public C getContent() {
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the cell's content.
|
||||||
|
*
|
||||||
|
* @param content
|
||||||
|
* the content to be placed in the cell.
|
||||||
|
*/
|
||||||
|
public void setContent(C content) {
|
||||||
|
this.content = content;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "<x=" + x + ", y=" + y + ", content=" + content + ">";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (o instanceof GridCell<?>) {
|
||||||
|
GridCell<?> c = (GridCell<?>) o;
|
||||||
|
return x == c.x && y == c.y && content.equals(c.content);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return x + 23 + y + 31 * content.hashCode();
|
||||||
|
}
|
||||||
|
}
|
||||||
56
src/aima/core/environment/gridworld/GridWorld.java
Normal file
56
src/aima/core/environment/gridworld/GridWorld.java
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
package aima.core.environment.gridworld;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.LinkedHashSet;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
public class GridWorld<C> {
|
||||||
|
private Set<GridCell<C>> cells = new LinkedHashSet<GridCell<C>>();
|
||||||
|
private Map<Integer, Map<Integer, GridCell<C>>> cellLookup = new HashMap<Integer, Map<Integer, GridCell<C>>>();
|
||||||
|
|
||||||
|
public GridWorld(int xDimension, int yDimension, C defaultCellContent) {
|
||||||
|
for (int x = 1; x <= xDimension; x++) {
|
||||||
|
Map<Integer, GridCell<C>> xCol = new HashMap<Integer, GridCell<C>>();
|
||||||
|
for (int y = 1; y <= yDimension; y++) {
|
||||||
|
GridCell<C> c = new GridCell<C>(x, y, defaultCellContent);
|
||||||
|
cells.add(c);
|
||||||
|
xCol.put(y, c);
|
||||||
|
}
|
||||||
|
cellLookup.put(x, xCol);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Set<GridCell<C>> getCells() {
|
||||||
|
return cells;
|
||||||
|
}
|
||||||
|
|
||||||
|
public GridCell<C> result(GridCell<C> s, GridWorldAction a) {
|
||||||
|
GridCell<C> sDelta = getCellAt(a.getXResult(s.getX()), a.getYResult(s
|
||||||
|
.getY()));
|
||||||
|
if (null == sDelta) {
|
||||||
|
// Default to no effect
|
||||||
|
// (i.e. bumps back in place as no adjoining cell).
|
||||||
|
sDelta = s;
|
||||||
|
}
|
||||||
|
|
||||||
|
return sDelta;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void removeCell(int x, int y) {
|
||||||
|
Map<Integer, GridCell<C>> xCol = cellLookup.get(x);
|
||||||
|
if (null != xCol) {
|
||||||
|
cells.remove(xCol.remove(y));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public GridCell<C> getCellAt(int x, int y) {
|
||||||
|
GridCell<C> c = null;
|
||||||
|
Map<Integer, GridCell<C>> xCol = cellLookup.get(x);
|
||||||
|
if (null != xCol) {
|
||||||
|
c = xCol.get(y);
|
||||||
|
}
|
||||||
|
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
}
|
||||||
56
src/aima/core/environment/gridworld/GridWorldAction.java
Normal file
56
src/aima/core/environment/gridworld/GridWorldAction.java
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
package aima.core.environment.gridworld;
|
||||||
|
|
||||||
|
import java.util.LinkedHashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import aima.core.agent.Action;
|
||||||
|
|
||||||
|
public enum GridWorldAction implements Action {
|
||||||
|
AddTile,CaptureThree,RandomMove;
|
||||||
|
|
||||||
|
private static final Set<GridWorldAction> _actions = new LinkedHashSet<GridWorldAction>();
|
||||||
|
static {
|
||||||
|
_actions.add(AddTile); // try to add a tile, turn (low chance of capture)
|
||||||
|
_actions.add(CaptureThree); // try to subtract two tiles, add a turn (high chance of capture)
|
||||||
|
_actions.add(RandomMove); // try add a tile, add a turn (even chance of add/capture)
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final Set<GridWorldAction> actions() {
|
||||||
|
return _actions;
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// START-Action
|
||||||
|
//@Override
|
||||||
|
//public boolean isNoOp() {
|
||||||
|
// if (None == this) {
|
||||||
|
// return true;
|
||||||
|
// }
|
||||||
|
// return false;
|
||||||
|
//}
|
||||||
|
// END-Action
|
||||||
|
//
|
||||||
|
|
||||||
|
public int getXResult(int curX) {
|
||||||
|
int newX = curX;
|
||||||
|
|
||||||
|
switch (this) {
|
||||||
|
case AddTile:
|
||||||
|
newX++;
|
||||||
|
break;
|
||||||
|
case CaptureThree:
|
||||||
|
newX-=2;
|
||||||
|
break;
|
||||||
|
case RandomMove:
|
||||||
|
newX--;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return newX;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getYResult(int curY) {
|
||||||
|
//the score increments by 1 at every action, regardless
|
||||||
|
return curY+1;
|
||||||
|
}
|
||||||
|
}
|
||||||
23
src/aima/core/environment/gridworld/GridWorldFactory.java
Normal file
23
src/aima/core/environment/gridworld/GridWorldFactory.java
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
package aima.core.environment.gridworld;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @author Woody Folsom
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class GridWorldFactory {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a CellWorld modeling a TileGame where the objective is to reach the maximum Number of tiles without
|
||||||
|
* exceeding targetScore.
|
||||||
|
*
|
||||||
|
* @return a cell world representation of Fig 17.1 in AIMA3e.
|
||||||
|
*/
|
||||||
|
public static GridWorld<Double> createGridWorldForTileGame(int maxTiles, int maxScore, double nonTerminalReward) {
|
||||||
|
GridWorld<Double> cw = new GridWorld<Double>(maxTiles, maxScore, nonTerminalReward);
|
||||||
|
|
||||||
|
cw.getCellAt(maxTiles, maxScore).setContent(1.0);
|
||||||
|
|
||||||
|
return cw;
|
||||||
|
}
|
||||||
|
}
|
||||||
251
src/aima/core/probability/example/MDPFactory.java
Normal file
251
src/aima/core/probability/example/MDPFactory.java
Normal file
@@ -0,0 +1,251 @@
|
|||||||
|
package aima.core.probability.example;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import aima.core.environment.cellworld.Cell;
|
||||||
|
import aima.core.environment.cellworld.CellWorld;
|
||||||
|
import aima.core.environment.cellworld.CellWorldAction;
|
||||||
|
import aima.core.environment.gridworld.GridCell;
|
||||||
|
import aima.core.environment.gridworld.GridWorld;
|
||||||
|
import aima.core.environment.gridworld.GridWorldAction;
|
||||||
|
import aima.core.probability.mdp.ActionsFunction;
|
||||||
|
import aima.core.probability.mdp.MarkovDecisionProcess;
|
||||||
|
import aima.core.probability.mdp.RewardFunction;
|
||||||
|
import aima.core.probability.mdp.TransitionProbabilityFunction;
|
||||||
|
import aima.core.probability.mdp.impl.MDP;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @author Ciaran O'Reilly
|
||||||
|
* @author Ravi Mohan
|
||||||
|
*/
|
||||||
|
public class MDPFactory {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs an MDP that can be used to generate the utility values
|
||||||
|
* detailed in Fig 17.3.
|
||||||
|
*
|
||||||
|
* @param cw
|
||||||
|
* the cell world from figure 17.1.
|
||||||
|
* @return an MDP that can be used to generate the utility values detailed
|
||||||
|
* in Fig 17.3.
|
||||||
|
*/
|
||||||
|
public static MarkovDecisionProcess<Cell<Double>, CellWorldAction> createMDPForFigure17_3(
|
||||||
|
final CellWorld<Double> cw) {
|
||||||
|
|
||||||
|
return new MDP<Cell<Double>, CellWorldAction>(cw.getCells(),
|
||||||
|
cw.getCellAt(1, 1), createActionsFunctionForFigure17_1(cw),
|
||||||
|
createTransitionProbabilityFunctionForFigure17_1(cw),
|
||||||
|
createRewardFunctionForFigure17_1());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static MarkovDecisionProcess<GridCell<Double>, GridWorldAction> createMDPForTileGame(
|
||||||
|
final GridWorld<Double> cw, int maxTiles, int maxScore) {
|
||||||
|
|
||||||
|
return new MDP<GridCell<Double>, GridWorldAction>(cw.getCells(),
|
||||||
|
cw.getCellAt(1, 1), createActionsFunctionForTileGame(cw,maxTiles,maxScore),
|
||||||
|
createTransitionProbabilityFunctionForTileGame(cw),
|
||||||
|
createRewardFunctionForTileGame());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the allowed actions from a specified cell within the cell world
|
||||||
|
* described in Fig 17.1.
|
||||||
|
*
|
||||||
|
* @param cw
|
||||||
|
* the cell world from figure 17.1.
|
||||||
|
* @return the set of actions allowed at a particular cell. This set will be
|
||||||
|
* empty if at a terminal state.
|
||||||
|
*/
|
||||||
|
public static ActionsFunction<Cell<Double>, CellWorldAction> createActionsFunctionForFigure17_1(
|
||||||
|
final CellWorld<Double> cw) {
|
||||||
|
final Set<Cell<Double>> terminals = new HashSet<Cell<Double>>();
|
||||||
|
terminals.add(cw.getCellAt(4, 3));
|
||||||
|
terminals.add(cw.getCellAt(4, 2));
|
||||||
|
|
||||||
|
ActionsFunction<Cell<Double>, CellWorldAction> af = new ActionsFunction<Cell<Double>, CellWorldAction>() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<CellWorldAction> actions(Cell<Double> s) {
|
||||||
|
// All actions can be performed in each cell
|
||||||
|
// (except terminal states)
|
||||||
|
if (terminals.contains(s)) {
|
||||||
|
return Collections.emptySet();
|
||||||
|
}
|
||||||
|
return CellWorldAction.actions();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
return af;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ActionsFunction<GridCell<Double>, GridWorldAction> createActionsFunctionForTileGame(
|
||||||
|
final GridWorld<Double> cw, int maxTiles, int maxScore) {
|
||||||
|
final Set<GridCell<Double>> terminals = new HashSet<GridCell<Double>>();
|
||||||
|
terminals.add(cw.getCellAt(maxTiles,maxScore));
|
||||||
|
|
||||||
|
ActionsFunction<GridCell<Double>, GridWorldAction> af = new ActionsFunction<GridCell<Double>, GridWorldAction>() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<GridWorldAction> actions(GridCell<Double> s) {
|
||||||
|
// All actions can be performed in each cell
|
||||||
|
// (except terminal states)
|
||||||
|
if (terminals.contains(s)) {
|
||||||
|
return Collections.emptySet();
|
||||||
|
}
|
||||||
|
return GridWorldAction.actions();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return af;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Figure 17.1 (b) Illustration of the transition model of the environment:
|
||||||
|
* the 'intended' outcome occurs with probability 0.8, but with probability
|
||||||
|
* 0.2 the agent moves at right angles to the intended direction. A
|
||||||
|
* collision with a wall results in no movement.
|
||||||
|
*
|
||||||
|
* @param cw
|
||||||
|
* the cell world from figure 17.1.
|
||||||
|
* @return the transition probability function as described in figure 17.1.
|
||||||
|
*/
|
||||||
|
public static TransitionProbabilityFunction<Cell<Double>, CellWorldAction> createTransitionProbabilityFunctionForFigure17_1(
|
||||||
|
final CellWorld<Double> cw) {
|
||||||
|
TransitionProbabilityFunction<Cell<Double>, CellWorldAction> tf = new TransitionProbabilityFunction<Cell<Double>, CellWorldAction>() {
|
||||||
|
private double[] distribution = new double[] { 0.8, 0.1, 0.1 };
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double probability(Cell<Double> sDelta, Cell<Double> s,
|
||||||
|
CellWorldAction a) {
|
||||||
|
double prob = 0;
|
||||||
|
|
||||||
|
List<Cell<Double>> outcomes = possibleOutcomes(s, a);
|
||||||
|
for (int i = 0; i < outcomes.size(); i++) {
|
||||||
|
if (sDelta.equals(outcomes.get(i))) {
|
||||||
|
// Note: You have to sum the matches to
|
||||||
|
// sDelta as the different actions
|
||||||
|
// could have the same effect (i.e.
|
||||||
|
// staying in place due to there being
|
||||||
|
// no adjacent cells), which increases
|
||||||
|
// the probability of the transition for
|
||||||
|
// that state.
|
||||||
|
prob += distribution[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return prob;
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<Cell<Double>> possibleOutcomes(Cell<Double> c,
|
||||||
|
CellWorldAction a) {
|
||||||
|
// There can be three possible outcomes for the planned action
|
||||||
|
List<Cell<Double>> outcomes = new ArrayList<Cell<Double>>();
|
||||||
|
|
||||||
|
outcomes.add(cw.result(c, a));
|
||||||
|
outcomes.add(cw.result(c, a.getFirstRightAngledAction()));
|
||||||
|
outcomes.add(cw.result(c, a.getSecondRightAngledAction()));
|
||||||
|
|
||||||
|
return outcomes;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return tf;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static TransitionProbabilityFunction<GridCell<Double>, GridWorldAction> createTransitionProbabilityFunctionForTileGame(
|
||||||
|
final GridWorld<Double> cw) {
|
||||||
|
|
||||||
|
TransitionProbabilityFunction<GridCell<Double>, GridWorldAction> tf = new TransitionProbabilityFunction<GridCell<Double>, GridWorldAction>() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double probability(GridCell<Double> sDelta, GridCell<Double> s,
|
||||||
|
GridWorldAction a) {
|
||||||
|
double prob = 0;
|
||||||
|
|
||||||
|
double[] distribution = getDistribution(a);
|
||||||
|
List<GridCell<Double>> outcomes = possibleOutcomes(s, a);
|
||||||
|
for (int i = 0; i < outcomes.size(); i++) {
|
||||||
|
if (sDelta.equals(outcomes.get(i))) {
|
||||||
|
// Note: You have to sum the matches to
|
||||||
|
// sDelta as the different actions
|
||||||
|
// could have the same effect (i.e.
|
||||||
|
// staying in place due to there being
|
||||||
|
// no adjacent cells), which increases
|
||||||
|
// the probability of the transition for
|
||||||
|
// that state.
|
||||||
|
prob += distribution[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return prob;
|
||||||
|
}
|
||||||
|
|
||||||
|
private double[] getDistribution(GridWorldAction a) {
|
||||||
|
switch (a) {
|
||||||
|
case AddTile :
|
||||||
|
return new double[] { 0.66, 0.34 };
|
||||||
|
case CaptureThree :
|
||||||
|
return new double[] { 0.34, 0.66 };
|
||||||
|
case RandomMove :
|
||||||
|
return new double[] { 0.50, 0.50 };
|
||||||
|
default :
|
||||||
|
throw new RuntimeException("Unrecognized action: " + a);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
private List<GridCell<Double>> possibleOutcomes(GridCell<Double> c,
|
||||||
|
GridWorldAction a) {
|
||||||
|
// There can be three possible outcomes for the planned action
|
||||||
|
List<GridCell<Double>> outcomes = new ArrayList<GridCell<Double>>();
|
||||||
|
|
||||||
|
switch (a) {
|
||||||
|
case AddTile :
|
||||||
|
outcomes.add(cw.result(c, GridWorldAction.AddTile));
|
||||||
|
outcomes.add(cw.result(c, GridWorldAction.CaptureThree));
|
||||||
|
break;
|
||||||
|
case CaptureThree :
|
||||||
|
outcomes.add(cw.result(c, GridWorldAction.AddTile));
|
||||||
|
outcomes.add(cw.result(c, GridWorldAction.CaptureThree));
|
||||||
|
break;
|
||||||
|
case RandomMove :
|
||||||
|
outcomes.add(cw.result(c, GridWorldAction.AddTile));
|
||||||
|
outcomes.add(cw.result(c, GridWorldAction.CaptureThree));
|
||||||
|
default :
|
||||||
|
//no possible outcomes for unrecognized actions
|
||||||
|
}
|
||||||
|
|
||||||
|
return outcomes;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return tf;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return the reward function which takes the content of the cell as being
|
||||||
|
* the reward value.
|
||||||
|
*/
|
||||||
|
public static RewardFunction<Cell<Double>> createRewardFunctionForFigure17_1() {
|
||||||
|
RewardFunction<Cell<Double>> rf = new RewardFunction<Cell<Double>>() {
|
||||||
|
@Override
|
||||||
|
public double reward(Cell<Double> s) {
|
||||||
|
return s.getContent();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
return rf;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static RewardFunction<GridCell<Double>> createRewardFunctionForTileGame() {
|
||||||
|
RewardFunction<GridCell<Double>> rf = new RewardFunction<GridCell<Double>>() {
|
||||||
|
@Override
|
||||||
|
public double reward(GridCell<Double> s) {
|
||||||
|
return s.getContent();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
return rf;
|
||||||
|
}
|
||||||
|
}
|
||||||
27
src/aima/core/probability/mdp/ActionsFunction.java
Normal file
27
src/aima/core/probability/mdp/ActionsFunction.java
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
package aima.core.probability.mdp;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import aima.core.agent.Action;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An interface for MDP action functions.
|
||||||
|
*
|
||||||
|
* @param <S>
|
||||||
|
* the state type.
|
||||||
|
* @param <A>
|
||||||
|
* the action type.
|
||||||
|
*
|
||||||
|
* @author Ciaran O'Reilly
|
||||||
|
* @author Ravi Mohan
|
||||||
|
*/
|
||||||
|
public interface ActionsFunction<S, A extends Action> {
|
||||||
|
/**
|
||||||
|
* Get the set of actions for state s.
|
||||||
|
*
|
||||||
|
* @param s
|
||||||
|
* the state.
|
||||||
|
* @return the set of actions for state s.
|
||||||
|
*/
|
||||||
|
Set<A> actions(S s);
|
||||||
|
}
|
||||||
79
src/aima/core/probability/mdp/MarkovDecisionProcess.java
Normal file
79
src/aima/core/probability/mdp/MarkovDecisionProcess.java
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
package aima.core.probability.mdp;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import aima.core.agent.Action;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Artificial Intelligence A Modern Approach (3rd Edition): page 647.<br>
|
||||||
|
* <br>
|
||||||
|
*
|
||||||
|
* A sequential decision problem for a fully observable, stochastic environment
|
||||||
|
* with a Markovian transition model and additive rewards is called a <b>Markov
|
||||||
|
* decision process</b>, or <b>MDP</b>, and consists of a set of states (with an
|
||||||
|
* initial state s<sub>0</sub>; a set ACTIONS(s) of actions in each state; a
|
||||||
|
* transition model P(s' | s, a); and a reward function R(s).<br>
|
||||||
|
* <br>
|
||||||
|
* <b>Note:</b> Some definitions of MDPs allow the reward to depend on the
|
||||||
|
* action and outcome too, so the reward function is R(s, a, s'). This
|
||||||
|
* simplifies the description of some environments but does not change the
|
||||||
|
* problem in any fundamental way.
|
||||||
|
*
|
||||||
|
* @param <S>
|
||||||
|
* the state type.
|
||||||
|
* @param <A>
|
||||||
|
* the action type.
|
||||||
|
*
|
||||||
|
* @author Ciaran O'Reilly
|
||||||
|
* @author Ravi Mohan
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public interface MarkovDecisionProcess<S, A extends Action> {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the set of states associated with the Markov decision process.
|
||||||
|
*
|
||||||
|
* @return the set of states associated with the Markov decision process.
|
||||||
|
*/
|
||||||
|
Set<S> states();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the initial state s<sub>0</sub> for this instance of a Markov
|
||||||
|
* decision process.
|
||||||
|
*
|
||||||
|
* @return the initial state s<sub>0</sub>.
|
||||||
|
*/
|
||||||
|
S getInitialState();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the set of actions for state s.
|
||||||
|
*
|
||||||
|
* @param s
|
||||||
|
* the state.
|
||||||
|
* @return the set of actions for state s.
|
||||||
|
*/
|
||||||
|
Set<A> actions(S s);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the probability of going from state s using action a to s' based
|
||||||
|
* on the underlying transition model P(s' | s, a).
|
||||||
|
*
|
||||||
|
* @param sDelta
|
||||||
|
* the state s' being transitioned to.
|
||||||
|
* @param s
|
||||||
|
* the state s being transitions from.
|
||||||
|
* @param a
|
||||||
|
* the action used to move from state s to s'.
|
||||||
|
* @return the probability of going from state s using action a to s'.
|
||||||
|
*/
|
||||||
|
double transitionProbability(S sDelta, S s, A a);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the reward associated with being in state s.
|
||||||
|
*
|
||||||
|
* @param s
|
||||||
|
* the state whose award is sought.
|
||||||
|
* @return the reward associated with being in state s.
|
||||||
|
*/
|
||||||
|
double reward(S s);
|
||||||
|
}
|
||||||
34
src/aima/core/probability/mdp/Policy.java
Normal file
34
src/aima/core/probability/mdp/Policy.java
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
package aima.core.probability.mdp;
|
||||||
|
|
||||||
|
import aima.core.agent.Action;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Artificial Intelligence A Modern Approach (3rd Edition): page 647.<br>
|
||||||
|
* <br>
|
||||||
|
*
|
||||||
|
* A solution to a Markov decision process is called a <b>policy</b>. It
|
||||||
|
* specifies what the agent should do for any state that the agent might reach.
|
||||||
|
* It is traditional to denote a policy by π, and π(s) is the action
|
||||||
|
* recommended by the policy π for state s. If the agent has a complete
|
||||||
|
* policy, then no matter what the outcome of any action, the agent will always
|
||||||
|
* know what to do next.
|
||||||
|
*
|
||||||
|
* @param <S>
|
||||||
|
* the state type.
|
||||||
|
* @param <A>
|
||||||
|
* the action type.
|
||||||
|
*
|
||||||
|
* @author Ciaran O'Reilly
|
||||||
|
* @author Ravi Mohan
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public interface Policy<S, A extends Action> {
|
||||||
|
/**
|
||||||
|
* π(s) is the action recommended by the policy π for state s.
|
||||||
|
*
|
||||||
|
* @param s
|
||||||
|
* the state s
|
||||||
|
* @return the action recommended by the policy π for state s.
|
||||||
|
*/
|
||||||
|
A action(S s);
|
||||||
|
}
|
||||||
39
src/aima/core/probability/mdp/PolicyEvaluation.java
Normal file
39
src/aima/core/probability/mdp/PolicyEvaluation.java
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
package aima.core.probability.mdp;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import aima.core.agent.Action;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Artificial Intelligence A Modern Approach (3rd Edition): page 656.<br>
|
||||||
|
* <br>
|
||||||
|
* Given a policy π<sub>i</sub>, calculate
|
||||||
|
* U<sub>i</sub>=U<sup>π<sub>i</sub></sup>, the utility of each state if
|
||||||
|
* π<sub>i</sub> were to be executed.
|
||||||
|
*
|
||||||
|
* @param <S>
|
||||||
|
* the state type.
|
||||||
|
* @param <A>
|
||||||
|
* the action type.
|
||||||
|
*
|
||||||
|
* @author Ciaran O'Reilly
|
||||||
|
* @author Ravi Mohan
|
||||||
|
*/
|
||||||
|
public interface PolicyEvaluation<S, A extends Action> {
|
||||||
|
/**
|
||||||
|
* <b>Policy evaluation:</b> given a policy π<sub>i</sub>, calculate
|
||||||
|
* U<sub>i</sub>=U<sup>π<sub>i</sub></sup>, the utility of each state if
|
||||||
|
* π<sub>i</sub> were to be executed.
|
||||||
|
*
|
||||||
|
* @param pi_i
|
||||||
|
* a policy vector indexed by state
|
||||||
|
* @param U
|
||||||
|
* a vector of utilities for states in S
|
||||||
|
* @param mdp
|
||||||
|
* an MDP with states S, actions A(s), transition model P(s'|s,a)
|
||||||
|
* @return U<sub>i</sub>=U<sup>π<sub>i</sub></sup>, the utility of each
|
||||||
|
* state if π<sub>i</sub> were to be executed.
|
||||||
|
*/
|
||||||
|
Map<S, Double> evaluate(Map<S, A> pi_i, Map<S, Double> U,
|
||||||
|
MarkovDecisionProcess<S, A> mdp);
|
||||||
|
}
|
||||||
21
src/aima/core/probability/mdp/RewardFunction.java
Normal file
21
src/aima/core/probability/mdp/RewardFunction.java
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
package aima.core.probability.mdp;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An interface for MDP reward functions.
|
||||||
|
*
|
||||||
|
* @param <S>
|
||||||
|
* the state type.
|
||||||
|
* @author Ciaran O'Reilly
|
||||||
|
* @author Ravi Mohan
|
||||||
|
*/
|
||||||
|
public interface RewardFunction<S> {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the reward associated with being in state s.
|
||||||
|
*
|
||||||
|
* @param s
|
||||||
|
* the state whose award is sought.
|
||||||
|
* @return the reward associated with being in state s.
|
||||||
|
*/
|
||||||
|
double reward(S s);
|
||||||
|
}
|
||||||
@@ -0,0 +1,31 @@
|
|||||||
|
package aima.core.probability.mdp;
|
||||||
|
|
||||||
|
import aima.core.agent.Action;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An interface for MDP transition probability functions.
|
||||||
|
*
|
||||||
|
* @param <S>
|
||||||
|
* the state type.
|
||||||
|
* @param <A>
|
||||||
|
* the action type.
|
||||||
|
*
|
||||||
|
* @author Ciaran O'Reilly
|
||||||
|
* @author Ravi Mohan
|
||||||
|
*/
|
||||||
|
public interface TransitionProbabilityFunction<S, A extends Action> {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the probability of going from state s using action a to s' based
|
||||||
|
* on the underlying transition model P(s' | s, a).
|
||||||
|
*
|
||||||
|
* @param sDelta
|
||||||
|
* the state s' being transitioned to.
|
||||||
|
* @param s
|
||||||
|
* the state s being transitions from.
|
||||||
|
* @param a
|
||||||
|
* the action used to move from state s to s'.
|
||||||
|
* @return the probability of going from state s using action a to s'.
|
||||||
|
*/
|
||||||
|
double probability(S sDelta, S s, A a);
|
||||||
|
}
|
||||||
36
src/aima/core/probability/mdp/impl/LookupPolicy.java
Normal file
36
src/aima/core/probability/mdp/impl/LookupPolicy.java
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
package aima.core.probability.mdp.impl;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import aima.core.agent.Action;
|
||||||
|
import aima.core.probability.mdp.Policy;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default implementation of the Policy interface using an underlying Map to
|
||||||
|
* look up an action associated with a state.
|
||||||
|
*
|
||||||
|
* @param <S>
|
||||||
|
* the state type.
|
||||||
|
* @param <A>
|
||||||
|
* the action type.
|
||||||
|
*
|
||||||
|
* @author Ciaran O'Reilly
|
||||||
|
*/
|
||||||
|
public class LookupPolicy<S, A extends Action> implements Policy<S, A> {
|
||||||
|
private Map<S, A> policy = new HashMap<S, A>();
|
||||||
|
|
||||||
|
public LookupPolicy(Map<S, A> aPolicy) {
|
||||||
|
policy.putAll(aPolicy);
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// START-Policy
|
||||||
|
@Override
|
||||||
|
public A action(S s) {
|
||||||
|
return policy.get(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
// END-Policy
|
||||||
|
//
|
||||||
|
}
|
||||||
69
src/aima/core/probability/mdp/impl/MDP.java
Normal file
69
src/aima/core/probability/mdp/impl/MDP.java
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
package aima.core.probability.mdp.impl;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import aima.core.agent.Action;
|
||||||
|
import aima.core.probability.mdp.ActionsFunction;
|
||||||
|
import aima.core.probability.mdp.MarkovDecisionProcess;
|
||||||
|
import aima.core.probability.mdp.RewardFunction;
|
||||||
|
import aima.core.probability.mdp.TransitionProbabilityFunction;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default implementation of the MarkovDecisionProcess<S, A> interface.
|
||||||
|
*
|
||||||
|
* @param <S>
|
||||||
|
* the state type.
|
||||||
|
* @param <A>
|
||||||
|
* the action type.
|
||||||
|
*
|
||||||
|
* @author Ciaran O'Reilly
|
||||||
|
* @author Ravi Mohan
|
||||||
|
*/
|
||||||
|
public class MDP<S, A extends Action> implements MarkovDecisionProcess<S, A> {
|
||||||
|
private Set<S> states = null;
|
||||||
|
private S initialState = null;
|
||||||
|
private ActionsFunction<S, A> actionsFunction = null;
|
||||||
|
private TransitionProbabilityFunction<S, A> transitionProbabilityFunction = null;
|
||||||
|
private RewardFunction<S> rewardFunction = null;
|
||||||
|
|
||||||
|
public MDP(Set<S> states, S initialState,
|
||||||
|
ActionsFunction<S, A> actionsFunction,
|
||||||
|
TransitionProbabilityFunction<S, A> transitionProbabilityFunction,
|
||||||
|
RewardFunction<S> rewardFunction) {
|
||||||
|
this.states = states;
|
||||||
|
this.initialState = initialState;
|
||||||
|
this.actionsFunction = actionsFunction;
|
||||||
|
this.transitionProbabilityFunction = transitionProbabilityFunction;
|
||||||
|
this.rewardFunction = rewardFunction;
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// START-MarkovDecisionProcess
|
||||||
|
@Override
|
||||||
|
public Set<S> states() {
|
||||||
|
return states;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public S getInitialState() {
|
||||||
|
return initialState;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<A> actions(S s) {
|
||||||
|
return actionsFunction.actions(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double transitionProbability(S sDelta, S s, A a) {
|
||||||
|
return transitionProbabilityFunction.probability(sDelta, s, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double reward(S s) {
|
||||||
|
return rewardFunction.reward(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
// END-MarkovDecisionProcess
|
||||||
|
//
|
||||||
|
}
|
||||||
@@ -0,0 +1,93 @@
|
|||||||
|
package aima.core.probability.mdp.impl;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import aima.core.agent.Action;
|
||||||
|
import aima.core.probability.mdp.MarkovDecisionProcess;
|
||||||
|
import aima.core.probability.mdp.PolicyEvaluation;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Artificial Intelligence A Modern Approach (3rd Edition): page 657.<br>
|
||||||
|
* <br>
|
||||||
|
* For small state spaces, policy evaluation using exact solution methods is
|
||||||
|
* often the most efficient approach. For large state spaces, O(n<sup>3</sup>)
|
||||||
|
* time might be prohibitive. Fortunately, it is not necessary to do exact
|
||||||
|
* policy evaluation. Instead, we can perform some number of simplified value
|
||||||
|
* iteration steps (simplified because the policy is fixed) to give a reasonably
|
||||||
|
* good approximation of utilities. The simplified Bellman update for this
|
||||||
|
* process is:<br>
|
||||||
|
* <br>
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* U<sub>i+1</sub>(s) <- R(s) + γΣ<sub>s'</sub>P(s'|s,π<sub>i</sub>(s))U<sub>i</sub>(s')
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* and this is repeated k times to produce the next utility estimate. The
|
||||||
|
* resulting algorithm is called <b>modified policy iteration</b>. It is often
|
||||||
|
* much more efficient than standard policy iteration or value iteration.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* @param <S>
|
||||||
|
* the state type.
|
||||||
|
* @param <A>
|
||||||
|
* the action type.
|
||||||
|
*
|
||||||
|
* @author Ciaran O'Reilly
|
||||||
|
* @author Ravi Mohan
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class ModifiedPolicyEvaluation<S, A extends Action> implements PolicyEvaluation<S, A> {
|
||||||
|
// # iterations to use to produce the next utility estimate
|
||||||
|
private int k;
|
||||||
|
// discount γ to be used.
|
||||||
|
private double gamma;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor.
|
||||||
|
*
|
||||||
|
* @param k
|
||||||
|
* number iterations to use to produce the next utility estimate
|
||||||
|
* @param gamma
|
||||||
|
* discount γ to be used
|
||||||
|
*/
|
||||||
|
public ModifiedPolicyEvaluation(int k, double gamma) {
|
||||||
|
if (gamma > 1.0 || gamma <= 0.0) {
|
||||||
|
throw new IllegalArgumentException("Gamma must be > 0 and <= 1.0");
|
||||||
|
}
|
||||||
|
this.k = k;
|
||||||
|
this.gamma = gamma;
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// START-PolicyEvaluation
|
||||||
|
@Override
|
||||||
|
public Map<S, Double> evaluate(Map<S, A> pi_i, Map<S, Double> U,
|
||||||
|
MarkovDecisionProcess<S, A> mdp) {
|
||||||
|
Map<S, Double> U_i = new HashMap<S, Double>(U);
|
||||||
|
Map<S, Double> U_ip1 = new HashMap<S, Double>(U);
|
||||||
|
// repeat k times to produce the next utility estimate
|
||||||
|
for (int i = 0; i < k; i++) {
|
||||||
|
// U<sub>i+1</sub>(s) <- R(s) +
|
||||||
|
// γΣ<sub>s'</sub>P(s'|s,π<sub>i</sub>(s))U<sub>i</sub>(s')
|
||||||
|
for (S s : U.keySet()) {
|
||||||
|
A ap_i = pi_i.get(s);
|
||||||
|
double aSum = 0;
|
||||||
|
// Handle terminal states (i.e. no actions)
|
||||||
|
if (null != ap_i) {
|
||||||
|
for (S sDelta : U.keySet()) {
|
||||||
|
aSum += mdp.transitionProbability(sDelta, s, ap_i)
|
||||||
|
* U_i.get(sDelta);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
U_ip1.put(s, mdp.reward(s) + gamma * aSum);
|
||||||
|
}
|
||||||
|
|
||||||
|
U_i.putAll(U_ip1);
|
||||||
|
}
|
||||||
|
return U_ip1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// END-PolicyEvaluation
|
||||||
|
//
|
||||||
|
}
|
||||||
144
src/aima/core/probability/mdp/search/PolicyIteration.java
Normal file
144
src/aima/core/probability/mdp/search/PolicyIteration.java
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
package aima.core.probability.mdp.search;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import aima.core.agent.Action;
|
||||||
|
import aima.core.probability.mdp.MarkovDecisionProcess;
|
||||||
|
import aima.core.probability.mdp.Policy;
|
||||||
|
import aima.core.probability.mdp.PolicyEvaluation;
|
||||||
|
import aima.core.probability.mdp.impl.LookupPolicy;
|
||||||
|
import aima.core.util.Util;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Artificial Intelligence A Modern Approach (3rd Edition): page 657.<br>
|
||||||
|
* <br>
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* function POLICY-ITERATION(mdp) returns a policy
|
||||||
|
* inputs: mdp, an MDP with states S, actions A(s), transition model P(s' | s, a)
|
||||||
|
* local variables: U, a vector of utilities for states in S, initially zero
|
||||||
|
* π, a policy vector indexed by state, initially random
|
||||||
|
*
|
||||||
|
* repeat
|
||||||
|
* U <- POLICY-EVALUATION(π, U, mdp)
|
||||||
|
* unchanged? <- true
|
||||||
|
* for each state s in S do
|
||||||
|
* if max<sub>a ∈ A(s)</sub> Σ<sub>s'</sub>P(s'|s,a)U[s'] > Σ<sub>s'</sub>P(s'|s,π[s])U[s'] then do
|
||||||
|
* π[s] <- argmax<sub>a ∈ A(s)</sub> Σ<sub>s'</sub>P(s'|s,a)U[s']
|
||||||
|
* unchanged? <- false
|
||||||
|
* until unchanged?
|
||||||
|
* return π
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* Figure 17.7 The policy iteration algorithm for calculating an optimal policy.
|
||||||
|
*
|
||||||
|
* @param <S>
|
||||||
|
* the state type.
|
||||||
|
* @param <A>
|
||||||
|
* the action type.
|
||||||
|
*
|
||||||
|
* @author Ciaran O'Reilly
|
||||||
|
* @author Ravi Mohan
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class PolicyIteration<S, A extends Action> {
|
||||||
|
|
||||||
|
private PolicyEvaluation<S, A> policyEvaluation = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor.
|
||||||
|
*
|
||||||
|
* @param policyEvaluation
|
||||||
|
* the policy evaluation function to use.
|
||||||
|
*/
|
||||||
|
public PolicyIteration(PolicyEvaluation<S, A> policyEvaluation) {
|
||||||
|
this.policyEvaluation = policyEvaluation;
|
||||||
|
}
|
||||||
|
|
||||||
|
// function POLICY-ITERATION(mdp) returns a policy
|
||||||
|
/**
|
||||||
|
* The policy iteration algorithm for calculating an optimal policy.
|
||||||
|
*
|
||||||
|
* @param mdp
|
||||||
|
* an MDP with states S, actions A(s), transition model P(s'|s,a)
|
||||||
|
* @return an optimal policy
|
||||||
|
*/
|
||||||
|
public Policy<S, A> policyIteration(MarkovDecisionProcess<S, A> mdp) {
|
||||||
|
// local variables: U, a vector of utilities for states in S, initially
|
||||||
|
// zero
|
||||||
|
Map<S, Double> U = Util.create(mdp.states(), new Double(0));
|
||||||
|
// π, a policy vector indexed by state, initially random
|
||||||
|
Map<S, A> pi = initialPolicyVector(mdp);
|
||||||
|
boolean unchanged;
|
||||||
|
// repeat
|
||||||
|
do {
|
||||||
|
// U <- POLICY-EVALUATION(π, U, mdp)
|
||||||
|
U = policyEvaluation.evaluate(pi, U, mdp);
|
||||||
|
// unchanged? <- true
|
||||||
|
unchanged = true;
|
||||||
|
// for each state s in S do
|
||||||
|
for (S s : mdp.states()) {
|
||||||
|
// calculate:
|
||||||
|
// max<sub>a ∈ A(s)</sub>
|
||||||
|
// Σ<sub>s'</sub>P(s'|s,a)U[s']
|
||||||
|
double aMax = Double.NEGATIVE_INFINITY, piVal = 0;
|
||||||
|
A aArgmax = pi.get(s);
|
||||||
|
for (A a : mdp.actions(s)) {
|
||||||
|
double aSum = 0;
|
||||||
|
for (S sDelta : mdp.states()) {
|
||||||
|
aSum += mdp.transitionProbability(sDelta, s, a)
|
||||||
|
* U.get(sDelta);
|
||||||
|
}
|
||||||
|
if (aSum > aMax) {
|
||||||
|
aMax = aSum;
|
||||||
|
aArgmax = a;
|
||||||
|
}
|
||||||
|
// track:
|
||||||
|
// Σ<sub>s'</sub>P(s'|s,π[s])U[s']
|
||||||
|
if (a.equals(pi.get(s))) {
|
||||||
|
piVal = aSum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// if max<sub>a ∈ A(s)</sub>
|
||||||
|
// Σ<sub>s'</sub>P(s'|s,a)U[s']
|
||||||
|
// > Σ<sub>s'</sub>P(s'|s,π[s])U[s'] then do
|
||||||
|
if (aMax > piVal) {
|
||||||
|
// π[s] <- argmax<sub>a ∈A(s)</sub>
|
||||||
|
// Σ<sub>s'</sub>P(s'|s,a)U[s']
|
||||||
|
pi.put(s, aArgmax);
|
||||||
|
// unchanged? <- false
|
||||||
|
unchanged = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// until unchanged?
|
||||||
|
} while (!unchanged);
|
||||||
|
|
||||||
|
// return π
|
||||||
|
return new LookupPolicy<S, A>(pi);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a policy vector indexed by state, initially random.
|
||||||
|
*
|
||||||
|
* @param mdp
|
||||||
|
* an MDP with states S, actions A(s), transition model P(s'|s,a)
|
||||||
|
* @return a policy vector indexed by state, initially random.
|
||||||
|
*/
|
||||||
|
public static <S, A extends Action> Map<S, A> initialPolicyVector(
|
||||||
|
MarkovDecisionProcess<S, A> mdp) {
|
||||||
|
Map<S, A> pi = new LinkedHashMap<S, A>();
|
||||||
|
List<A> actions = new ArrayList<A>();
|
||||||
|
for (S s : mdp.states()) {
|
||||||
|
actions.clear();
|
||||||
|
actions.addAll(mdp.actions(s));
|
||||||
|
// Handle terminal states (i.e. no actions).
|
||||||
|
if (actions.size() > 0) {
|
||||||
|
pi.put(s, Util.selectRandomlyFromList(actions));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return pi;
|
||||||
|
}
|
||||||
|
}
|
||||||
129
src/aima/core/probability/mdp/search/ValueIteration.java
Normal file
129
src/aima/core/probability/mdp/search/ValueIteration.java
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
package aima.core.probability.mdp.search;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import aima.core.agent.Action;
|
||||||
|
import aima.core.probability.mdp.MarkovDecisionProcess;
|
||||||
|
import aima.core.util.Util;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Artificial Intelligence A Modern Approach (3rd Edition): page 653.<br>
|
||||||
|
* <br>
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* function VALUE-ITERATION(mdp, ε) returns a utility function
|
||||||
|
* inputs: mdp, an MDP with states S, actions A(s), transition model P(s' | s, a),
|
||||||
|
* rewards R(s), discount γ
|
||||||
|
* ε the maximum error allowed in the utility of any state
|
||||||
|
* local variables: U, U', vectors of utilities for states in S, initially zero
|
||||||
|
* δ the maximum change in the utility of any state in an iteration
|
||||||
|
*
|
||||||
|
* repeat
|
||||||
|
* U <- U'; δ <- 0
|
||||||
|
* for each state s in S do
|
||||||
|
* U'[s] <- R(s) + γ max<sub>a ∈ A(s)</sub> Σ<sub>s'</sub>P(s' | s, a) U[s']
|
||||||
|
* if |U'[s] - U[s]| > δ then δ <- |U'[s] - U[s]|
|
||||||
|
* until δ < ε(1 - γ)/γ
|
||||||
|
* return U
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* Figure 17.4 The value iteration algorithm for calculating utilities of
|
||||||
|
* states. The termination condition is from Equation (17.8):<br>
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* if ||U<sub>i+1</sub> - U<sub>i</sub>|| < ε(1 - γ)/γ then ||U<sub>i+1</sub> - U|| < ε
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* @param <S>
|
||||||
|
* the state type.
|
||||||
|
* @param <A>
|
||||||
|
* the action type.
|
||||||
|
*
|
||||||
|
* @author Ciaran O'Reilly
|
||||||
|
* @author Ravi Mohan
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class ValueIteration<S, A extends Action> {
|
||||||
|
// discount γ to be used.
|
||||||
|
private double gamma = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor.
|
||||||
|
*
|
||||||
|
* @param gamma
|
||||||
|
* discount γ to be used.
|
||||||
|
*/
|
||||||
|
public ValueIteration(double gamma) {
|
||||||
|
if (gamma > 1.0 || gamma <= 0.0) {
|
||||||
|
throw new IllegalArgumentException("Gamma must be > 0 and <= 1.0");
|
||||||
|
}
|
||||||
|
this.gamma = gamma;
|
||||||
|
}
|
||||||
|
|
||||||
|
// function VALUE-ITERATION(mdp, ε) returns a utility function
|
||||||
|
/**
|
||||||
|
* The value iteration algorithm for calculating the utility of states.
|
||||||
|
*
|
||||||
|
* @param mdp
|
||||||
|
* an MDP with states S, actions A(s), <br>
|
||||||
|
* transition model P(s' | s, a), rewards R(s)
|
||||||
|
* @param epsilon
|
||||||
|
* the maximum error allowed in the utility of any state
|
||||||
|
* @return a vector of utilities for states in S
|
||||||
|
*/
|
||||||
|
public Map<S, Double> valueIteration(MarkovDecisionProcess<S, A> mdp,
|
||||||
|
double epsilon) {
|
||||||
|
//
|
||||||
|
// local variables: U, U', vectors of utilities for states in S,
|
||||||
|
// initially zero
|
||||||
|
Map<S, Double> U = Util.create(mdp.states(), new Double(0));
|
||||||
|
Map<S, Double> Udelta = Util.create(mdp.states(), new Double(0));
|
||||||
|
// δ the maximum change in the utility of any state in an
|
||||||
|
// iteration
|
||||||
|
double delta = 0;
|
||||||
|
// Note: Just calculate this once for efficiency purposes:
|
||||||
|
// ε(1 - γ)/γ
|
||||||
|
double minDelta = epsilon * (1 - gamma) / gamma;
|
||||||
|
|
||||||
|
// repeat
|
||||||
|
do {
|
||||||
|
// U <- U'; δ <- 0
|
||||||
|
U.putAll(Udelta);
|
||||||
|
delta = 0;
|
||||||
|
// for each state s in S do
|
||||||
|
for (S s : mdp.states()) {
|
||||||
|
// max<sub>a ∈ A(s)</sub>
|
||||||
|
Set<A> actions = mdp.actions(s);
|
||||||
|
// Handle terminal states (i.e. no actions).
|
||||||
|
double aMax = 0;
|
||||||
|
if (actions.size() > 0) {
|
||||||
|
aMax = Double.NEGATIVE_INFINITY;
|
||||||
|
}
|
||||||
|
for (A a : actions) {
|
||||||
|
// Σ<sub>s'</sub>P(s' | s, a) U[s']
|
||||||
|
double aSum = 0;
|
||||||
|
for (S sDelta : mdp.states()) {
|
||||||
|
aSum += mdp.transitionProbability(sDelta, s, a)
|
||||||
|
* U.get(sDelta);
|
||||||
|
}
|
||||||
|
if (aSum > aMax) {
|
||||||
|
aMax = aSum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// U'[s] <- R(s) + γ
|
||||||
|
// max<sub>a ∈ A(s)</sub>
|
||||||
|
Udelta.put(s, mdp.reward(s) + gamma * aMax);
|
||||||
|
// if |U'[s] - U[s]| > δ then δ <- |U'[s] - U[s]|
|
||||||
|
double aDiff = Math.abs(Udelta.get(s) - U.get(s));
|
||||||
|
if (aDiff > delta) {
|
||||||
|
delta = aDiff;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// until δ < ε(1 - γ)/γ
|
||||||
|
} while (delta > minDelta);
|
||||||
|
|
||||||
|
// return U
|
||||||
|
return U;
|
||||||
|
}
|
||||||
|
}
|
||||||
240
src/aima/core/util/Util.java
Normal file
240
src/aima/core/util/Util.java
Normal file
@@ -0,0 +1,240 @@
|
|||||||
|
package aima.core.util;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Hashtable;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Ravi Mohan
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class Util {
|
||||||
|
public static final String NO = "No";
|
||||||
|
public static final String YES = "Yes";
|
||||||
|
//
|
||||||
|
private static Random _r = new Random();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the first element from a list.
|
||||||
|
*
|
||||||
|
* @param l
|
||||||
|
* the list the first element is to be extracted from.
|
||||||
|
* @return the first element of the passed in list.
|
||||||
|
*/
|
||||||
|
public static <T> T first(List<T> l) {
|
||||||
|
return l.get(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a sublist of all of the elements in the list except for first.
|
||||||
|
*
|
||||||
|
* @param l
|
||||||
|
* the list the rest of the elements are to be extracted from.
|
||||||
|
* @return a list of all of the elements in the passed in list except for
|
||||||
|
* the first element.
|
||||||
|
*/
|
||||||
|
public static <T> List<T> rest(List<T> l) {
|
||||||
|
return l.subList(1, l.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a Map<K, V> with the passed in keys having their values
|
||||||
|
* initialized to the passed in value.
|
||||||
|
*
|
||||||
|
* @param keys
|
||||||
|
* the keys for the newly constructed map.
|
||||||
|
* @param value
|
||||||
|
* the value to be associated with each of the maps keys.
|
||||||
|
* @return a map with the passed in keys initialized to value.
|
||||||
|
*/
|
||||||
|
public static <K, V> Map<K, V> create(Collection<K> keys, V value) {
|
||||||
|
Map<K, V> map = new LinkedHashMap<K, V>();
|
||||||
|
|
||||||
|
for (K k : keys) {
|
||||||
|
map.put(k, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Randomly select an element from a list.
|
||||||
|
*
|
||||||
|
* @param <T>
|
||||||
|
* the type of element to be returned from the list l.
|
||||||
|
* @param l
|
||||||
|
* a list of type T from which an element is to be selected
|
||||||
|
* randomly.
|
||||||
|
* @return a randomly selected element from l.
|
||||||
|
*/
|
||||||
|
public static <T> T selectRandomlyFromList(List<T> l) {
|
||||||
|
return l.get(_r.nextInt(l.size()));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean randomBoolean() {
|
||||||
|
int trueOrFalse = _r.nextInt(2);
|
||||||
|
return (!(trueOrFalse == 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static double[] normalize(double[] probDist) {
|
||||||
|
int len = probDist.length;
|
||||||
|
double total = 0.0;
|
||||||
|
for (double d : probDist) {
|
||||||
|
total = total + d;
|
||||||
|
}
|
||||||
|
|
||||||
|
double[] normalized = new double[len];
|
||||||
|
if (total != 0) {
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
normalized[i] = probDist[i] / total;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return normalized;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<Double> normalize(List<Double> values) {
|
||||||
|
double[] valuesAsArray = new double[values.size()];
|
||||||
|
for (int i = 0; i < valuesAsArray.length; i++) {
|
||||||
|
valuesAsArray[i] = values.get(i);
|
||||||
|
}
|
||||||
|
double[] normalized = normalize(valuesAsArray);
|
||||||
|
List<Double> results = new ArrayList<Double>();
|
||||||
|
for (int i = 0; i < normalized.length; i++) {
|
||||||
|
results.add(normalized[i]);
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int min(int i, int j) {
|
||||||
|
return (i > j ? j : i);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int max(int i, int j) {
|
||||||
|
return (i < j ? j : i);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int max(int i, int j, int k) {
|
||||||
|
return max(max(i, j), k);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int min(int i, int j, int k) {
|
||||||
|
return min(min(i, j), k);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <T> T mode(List<T> l) {
|
||||||
|
Hashtable<T, Integer> hash = new Hashtable<T, Integer>();
|
||||||
|
for (T obj : l) {
|
||||||
|
if (hash.containsKey(obj)) {
|
||||||
|
hash.put(obj, hash.get(obj).intValue() + 1);
|
||||||
|
} else {
|
||||||
|
hash.put(obj, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
T maxkey = hash.keySet().iterator().next();
|
||||||
|
for (T key : hash.keySet()) {
|
||||||
|
if (hash.get(key) > hash.get(maxkey)) {
|
||||||
|
maxkey = key;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return maxkey;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String[] yesno() {
|
||||||
|
return new String[] { YES, NO };
|
||||||
|
}
|
||||||
|
|
||||||
|
public static double log2(double d) {
|
||||||
|
return Math.log(d) / Math.log(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static double information(double[] probabilities) {
|
||||||
|
double total = 0.0;
|
||||||
|
for (double d : probabilities) {
|
||||||
|
total += (-1.0 * log2(d) * d);
|
||||||
|
}
|
||||||
|
return total;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <T> List<T> removeFrom(List<T> list, T member) {
|
||||||
|
List<T> newList = new ArrayList<T>(list);
|
||||||
|
newList.remove(member);
|
||||||
|
return newList;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <T extends Number> double sumOfSquares(List<T> list) {
|
||||||
|
double accum = 0;
|
||||||
|
for (T item : list) {
|
||||||
|
accum = accum + (item.doubleValue() * item.doubleValue());
|
||||||
|
}
|
||||||
|
return accum;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String ntimes(String s, int n) {
|
||||||
|
StringBuffer buf = new StringBuffer();
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
buf.append(s);
|
||||||
|
}
|
||||||
|
return buf.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void checkForNanOrInfinity(double d) {
|
||||||
|
if (Double.isNaN(d)) {
|
||||||
|
throw new RuntimeException("Not a Number");
|
||||||
|
}
|
||||||
|
if (Double.isInfinite(d)) {
|
||||||
|
throw new RuntimeException("Infinite Number");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int randomNumberBetween(int i, int j) {
|
||||||
|
/* i,j bothinclusive */
|
||||||
|
return _r.nextInt(j - i + 1) + i;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static double calculateMean(List<Double> lst) {
|
||||||
|
Double sum = 0.0;
|
||||||
|
for (Double d : lst) {
|
||||||
|
sum = sum + d.doubleValue();
|
||||||
|
}
|
||||||
|
return sum / lst.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static double calculateStDev(List<Double> values, double mean) {
|
||||||
|
|
||||||
|
int listSize = values.size();
|
||||||
|
|
||||||
|
Double sumOfDiffSquared = 0.0;
|
||||||
|
for (Double value : values) {
|
||||||
|
double diffFromMean = value - mean;
|
||||||
|
sumOfDiffSquared += ((diffFromMean * diffFromMean) / (listSize - 1));
|
||||||
|
// division moved here to avoid sum becoming too big if this
|
||||||
|
// doesn't work use incremental formulation
|
||||||
|
|
||||||
|
}
|
||||||
|
double variance = sumOfDiffSquared;
|
||||||
|
// (listSize - 1);
|
||||||
|
// assumes at least 2 members in list.
|
||||||
|
return Math.sqrt(variance);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<Double> normalizeFromMeanAndStdev(List<Double> values,
|
||||||
|
double mean, double stdev) {
|
||||||
|
List<Double> normalized = new ArrayList<Double>();
|
||||||
|
for (Double d : values) {
|
||||||
|
normalized.add((d - mean) / stdev);
|
||||||
|
}
|
||||||
|
return normalized;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static double generateRandomDoubleBetween(double lowerLimit,
|
||||||
|
double upperLimit) {
|
||||||
|
|
||||||
|
return lowerLimit + ((upperLimit - lowerLimit) * _r.nextDouble());
|
||||||
|
}
|
||||||
|
}
|
||||||
111
src/model/comPlayer/AdaptiveComPlayer.java
Normal file
111
src/model/comPlayer/AdaptiveComPlayer.java
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
package model.comPlayer;
|
||||||
|
|
||||||
|
import model.Board;
|
||||||
|
import model.BoardScorer;
|
||||||
|
import model.Move;
|
||||||
|
import model.comPlayer.generator.AlphaBetaMoveGenerator;
|
||||||
|
import model.comPlayer.generator.MonteCarloMoveGenerator;
|
||||||
|
import model.comPlayer.generator.MoveGenerator;
|
||||||
|
import model.playerModel.PlayerModel;
|
||||||
|
import aima.core.environment.gridworld.GridCell;
|
||||||
|
import aima.core.environment.gridworld.GridWorld;
|
||||||
|
import aima.core.environment.gridworld.GridWorldAction;
|
||||||
|
import aima.core.environment.gridworld.GridWorldFactory;
|
||||||
|
import aima.core.probability.example.MDPFactory;
|
||||||
|
import aima.core.probability.mdp.MarkovDecisionProcess;
|
||||||
|
import aima.core.probability.mdp.Policy;
|
||||||
|
import aima.core.probability.mdp.PolicyEvaluation;
|
||||||
|
import aima.core.probability.mdp.impl.ModifiedPolicyEvaluation;
|
||||||
|
import aima.core.probability.mdp.search.PolicyIteration;
|
||||||
|
|
||||||
|
public class AdaptiveComPlayer implements Player {
|
||||||
|
private final MoveGenerator abMoveGenerator = new AlphaBetaMoveGenerator();
|
||||||
|
private final MoveGenerator mcMoveGenerator = new MonteCarloMoveGenerator();
|
||||||
|
|
||||||
|
private BoardScorer boardScorer = new BoardScorer();
|
||||||
|
private boolean calculatePolicy = true;
|
||||||
|
private GridWorld<Double> gw = null;
|
||||||
|
private MarkovDecisionProcess<GridCell<Double>, GridWorldAction> mdp = null;
|
||||||
|
private Policy<GridCell<Double>, GridWorldAction> policy = null;
|
||||||
|
private PolicyIteration<GridCell<Double>, GridWorldAction> pi = null;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void denyMove() {
|
||||||
|
throw new UnsupportedOperationException("Not implemented");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Move getMove(Board board, PlayerModel player) {
|
||||||
|
if (calculatePolicy) {
|
||||||
|
System.out.println("Calculating policy for PlayerModel: " + player);
|
||||||
|
|
||||||
|
// take 10 turns to place 6 tiles
|
||||||
|
double defaultPenalty = -0.25;
|
||||||
|
|
||||||
|
int maxScore = player.getTargetScore().getTargetScore();
|
||||||
|
int maxTiles = Board.NUM_COLS * Board.NUM_ROWS;
|
||||||
|
|
||||||
|
gw = GridWorldFactory.createGridWorldForTileGame(maxTiles,
|
||||||
|
maxScore, defaultPenalty);
|
||||||
|
mdp = MDPFactory.createMDPForTileGame(gw, maxTiles, maxScore);
|
||||||
|
|
||||||
|
// gamma = 1.0
|
||||||
|
PolicyEvaluation<GridCell<Double>, GridWorldAction> pe = new ModifiedPolicyEvaluation<GridCell<Double>, GridWorldAction>(
|
||||||
|
50, 0.9);
|
||||||
|
pi = new PolicyIteration<GridCell<Double>, GridWorldAction>(pe);
|
||||||
|
policy = pi.policyIteration(mdp);
|
||||||
|
|
||||||
|
System.out.println("Optimum policy calculated.");
|
||||||
|
|
||||||
|
for (int j = maxScore; j >= 1; j--) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (int i = 1; i <= maxTiles; i++) {
|
||||||
|
sb.append(policy.action(gw.getCellAt(i, j)));
|
||||||
|
sb.append(" ");
|
||||||
|
}
|
||||||
|
System.out.println(sb.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
calculatePolicy = false;
|
||||||
|
} else {
|
||||||
|
System.out.println("Using pre-calculated policy");
|
||||||
|
}
|
||||||
|
|
||||||
|
GridCell<Double> state = getState(board);
|
||||||
|
GridWorldAction action = policy.action(state);
|
||||||
|
|
||||||
|
if (action == null || state == null) {
|
||||||
|
System.out.println("Board state outside of parameters of MDP. Reverting to failsafe behavior.");
|
||||||
|
action = GridWorldAction.RandomMove;
|
||||||
|
}
|
||||||
|
System.out.println("Performing action " + action + " at state " + state + " per policy.");
|
||||||
|
switch (action) {
|
||||||
|
case AddTile:
|
||||||
|
//System.out.println("Performing action #" + GridWorldAction.AddTile.ordinal());
|
||||||
|
return abMoveGenerator.genMove(board, false);
|
||||||
|
case CaptureThree:
|
||||||
|
//System.out.println("Performing action #" + GridWorldAction.CaptureThree.ordinal());
|
||||||
|
return mcMoveGenerator.genMove(board, false);
|
||||||
|
case RandomMove:
|
||||||
|
//System.out.println("Performing action #" + GridWorldAction.None.ordinal());
|
||||||
|
return mcMoveGenerator.genMove(board, false);
|
||||||
|
default:
|
||||||
|
//System.out.println("Performing failsafe action");
|
||||||
|
return mcMoveGenerator.genMove(board, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private GridCell<Double> getState(Board board) {
|
||||||
|
return gw.getCellAt(board.getTurn(), boardScorer.getScore(board));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isReady() {
|
||||||
|
return true; // always ready to play a random valid move
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "Alpha-Beta ComPlayer";
|
||||||
|
}
|
||||||
|
}
|
||||||
18
src/model/mdp/Action.java
Normal file
18
src/model/mdp/Action.java
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
package model.mdp;
|
||||||
|
|
||||||
|
public class Action {
|
||||||
|
public static Action playToWin = new Action("PlayToWin");
|
||||||
|
public static Action playToLose = new Action("PlayToLose");
|
||||||
|
//public static Action maintainScore = new Action();
|
||||||
|
|
||||||
|
private final String name;
|
||||||
|
|
||||||
|
public Action(String name) {
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
}
|
||||||
51
src/model/mdp/MDP.java
Normal file
51
src/model/mdp/MDP.java
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
package model.mdp;
|
||||||
|
|
||||||
|
public class MDP {
|
||||||
|
public static final double nonTerminalReward = -0.25;
|
||||||
|
|
||||||
|
public enum MODE {
|
||||||
|
CEIL, FLOOR
|
||||||
|
}
|
||||||
|
|
||||||
|
private final int maxScore;
|
||||||
|
private final int maxTiles;
|
||||||
|
private final MODE mode;
|
||||||
|
|
||||||
|
public MDP(int maxScore, int maxTiles, MODE mode) {
|
||||||
|
this.maxScore = maxScore;
|
||||||
|
this.maxTiles = maxTiles;
|
||||||
|
this.mode = mode;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Action[] getActions(int i, int j) {
|
||||||
|
if (i == maxScore) {
|
||||||
|
return new Action[0];
|
||||||
|
}
|
||||||
|
if (j == maxTiles) {
|
||||||
|
return new Action[0];
|
||||||
|
}
|
||||||
|
return new Action[]{Action.playToLose,Action.playToWin};
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getMaxScore() {
|
||||||
|
return maxScore;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getMaxTiles() {
|
||||||
|
return maxTiles;
|
||||||
|
}
|
||||||
|
|
||||||
|
public double getReward(int score, int tiles) {
|
||||||
|
if (score == maxScore && tiles == maxTiles) {
|
||||||
|
return 10.0;
|
||||||
|
}
|
||||||
|
// TODO scale linearly?
|
||||||
|
if (score == maxScore) {
|
||||||
|
return -1.0;
|
||||||
|
}
|
||||||
|
if (tiles == maxTiles) {
|
||||||
|
return -5.0;
|
||||||
|
}
|
||||||
|
return nonTerminalReward;
|
||||||
|
}
|
||||||
|
}
|
||||||
5
src/model/mdp/MDPSolver.java
Normal file
5
src/model/mdp/MDPSolver.java
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
package model.mdp;
|
||||||
|
|
||||||
|
public interface MDPSolver {
|
||||||
|
Policy solve(MDP mdp);
|
||||||
|
}
|
||||||
7
src/model/mdp/Policy.java
Normal file
7
src/model/mdp/Policy.java
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
package model.mdp;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
public class Policy extends ArrayList<Action>{
|
||||||
|
|
||||||
|
}
|
||||||
34
src/model/mdp/Transition.java
Normal file
34
src/model/mdp/Transition.java
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
package model.mdp;
|
||||||
|
|
||||||
|
public class Transition {
|
||||||
|
private double prob;
|
||||||
|
private int scoreChange;
|
||||||
|
private int tileCountChange;
|
||||||
|
|
||||||
|
public Transition(double prob, int scoreChange, int tileCountChange) {
|
||||||
|
super();
|
||||||
|
this.prob = prob;
|
||||||
|
this.scoreChange = scoreChange;
|
||||||
|
this.tileCountChange = tileCountChange;
|
||||||
|
}
|
||||||
|
|
||||||
|
public double getProb() {
|
||||||
|
return prob;
|
||||||
|
}
|
||||||
|
public void setProb(double prob) {
|
||||||
|
this.prob = prob;
|
||||||
|
}
|
||||||
|
public int getScoreChange() {
|
||||||
|
return scoreChange;
|
||||||
|
}
|
||||||
|
public void setScoreChange(int scoreChange) {
|
||||||
|
this.scoreChange = scoreChange;
|
||||||
|
}
|
||||||
|
public int getTileCountChange() {
|
||||||
|
return tileCountChange;
|
||||||
|
}
|
||||||
|
public void setTileCountChange(int tileCountChange) {
|
||||||
|
this.tileCountChange = tileCountChange;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
110
src/model/mdp/ValueIterationSolver.java
Normal file
110
src/model/mdp/ValueIterationSolver.java
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
package model.mdp;
|
||||||
|
|
||||||
|
import java.text.DecimalFormat;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class ValueIterationSolver implements MDPSolver {
|
||||||
|
public int maxIterations = 10;
|
||||||
|
public final double DEFAULT_EPS = 0.1;
|
||||||
|
public final double GAMMA = 0.9; //discount
|
||||||
|
|
||||||
|
private DecimalFormat fmt = new DecimalFormat("##.00");
|
||||||
|
public Policy solve(MDP mdp) {
|
||||||
|
Policy policy = new Policy();
|
||||||
|
|
||||||
|
double[][] utility = new double[mdp.getMaxScore()+1][mdp.getMaxTiles()+1];
|
||||||
|
double[][] utilityPrime = new double[mdp.getMaxScore()+1][mdp.getMaxTiles()+1];
|
||||||
|
|
||||||
|
for (int i = 0; i <= mdp.getMaxScore(); i++) {
|
||||||
|
//StringBuilder sb = new StringBuilder();
|
||||||
|
for (int j = 0; j <= mdp.getMaxTiles(); j++) {
|
||||||
|
utilityPrime[i][j] = mdp.getReward(i, j);
|
||||||
|
//sb.append(fmt.format(utility[i][j]));
|
||||||
|
//sb.append(" ");
|
||||||
|
}
|
||||||
|
//System.out.println(sb);
|
||||||
|
}
|
||||||
|
|
||||||
|
converged:
|
||||||
|
for (int iteration = 0; iteration < maxIterations; iteration++) {
|
||||||
|
for (int i = 0; i <= mdp.getMaxScore(); i++) {
|
||||||
|
for (int j = 0; j <= mdp.getMaxTiles(); j++) {
|
||||||
|
utility[i][j] = utilityPrime[i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int i = 0; i <= mdp.getMaxScore(); i++) {
|
||||||
|
for (int j = 0; j <= mdp.getMaxTiles(); j++) {
|
||||||
|
Action[] actions = mdp.getActions(i,j);
|
||||||
|
|
||||||
|
double aMax;
|
||||||
|
if (actions.length > 0) {
|
||||||
|
aMax = Double.NEGATIVE_INFINITY;
|
||||||
|
} else {
|
||||||
|
aMax = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Action action : actions){
|
||||||
|
List<Transition> transitions = getTransitions(action,mdp,i,j);
|
||||||
|
double aSum = 0.0;
|
||||||
|
for (Transition transition : transitions) {
|
||||||
|
int transI = transition.getScoreChange();
|
||||||
|
int transJ = transition.getTileCountChange();
|
||||||
|
if (i+transI >= 0 && i+transI <= mdp.getMaxScore()
|
||||||
|
&& j+transJ >= 0 && j+transJ <= mdp.getMaxTiles())
|
||||||
|
aSum += utility[i+transI][j+transJ];
|
||||||
|
}
|
||||||
|
if (aSum > aMax) {
|
||||||
|
aMax = aSum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
utilityPrime[i][j] = mdp.getReward(i,j) + GAMMA * aMax;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
double maxDiff = getMaxDiff(utility,utilityPrime);
|
||||||
|
System.out.println("Max diff |U - U'| = " + maxDiff);
|
||||||
|
if (maxDiff < DEFAULT_EPS) {
|
||||||
|
System.out.println("Solution to MDP converged: " + maxDiff);
|
||||||
|
break converged;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < utility.length; i++) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (int j = 0; j < utility[i].length; j++) {
|
||||||
|
sb.append(fmt.format(utility[i][j]));
|
||||||
|
sb.append(" ");
|
||||||
|
}
|
||||||
|
System.out.println(sb);
|
||||||
|
}
|
||||||
|
|
||||||
|
//utility is now the utility Matrix
|
||||||
|
//get the policy
|
||||||
|
return policy;
|
||||||
|
}
|
||||||
|
|
||||||
|
double getMaxDiff(double[][]u, double[][]uPrime) {
|
||||||
|
double maxDiff = 0;
|
||||||
|
for (int i = 0; i < u.length; i++) {
|
||||||
|
for (int j = 0; j < u[i].length; j++) {
|
||||||
|
maxDiff = Math.max(maxDiff,Math.abs(u[i][j] - uPrime[i][j]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return maxDiff;
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<Transition> getTransitions(Action action, MDP mdp, int score, int tiles) {
|
||||||
|
List<Transition> transitions = new ArrayList<Transition>();
|
||||||
|
if (Action.playToWin == action) {
|
||||||
|
transitions.add(new Transition(0.9,1,1));
|
||||||
|
transitions.add(new Transition(0.1,1,-3));
|
||||||
|
} else if (Action.playToLose == action) {
|
||||||
|
transitions.add(new Transition(0.9,1,1));
|
||||||
|
transitions.add(new Transition(0.1,1,-3));
|
||||||
|
} /*else if (Action.maintainScore == action) {
|
||||||
|
transitions.add(new Transition(0.5,1,1));
|
||||||
|
transitions.add(new Transition(0.5,1,-3));
|
||||||
|
}*/
|
||||||
|
return transitions;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
package view;
|
package view;
|
||||||
|
|
||||||
|
import model.comPlayer.AdaptiveComPlayer;
|
||||||
import model.comPlayer.AlphaBetaComPlayer;
|
import model.comPlayer.AlphaBetaComPlayer;
|
||||||
import model.comPlayer.MinimaxComPlayer;
|
import model.comPlayer.MinimaxComPlayer;
|
||||||
import model.comPlayer.MonteCarloComPlayer;
|
import model.comPlayer.MonteCarloComPlayer;
|
||||||
@@ -7,16 +8,19 @@ import model.comPlayer.Player;
|
|||||||
import model.comPlayer.RandomComPlayer;
|
import model.comPlayer.RandomComPlayer;
|
||||||
|
|
||||||
public class ParsedArgs {
|
public class ParsedArgs {
|
||||||
public static final String COM_RANDOM = "RANDOM";
|
public static final String COM_ADAPTIVE = "ADAPTIVE";
|
||||||
public static final String COM_MINIMAX = "MINIMAX";
|
|
||||||
public static final String COM_ALPHABETA = "ALPHABETA";
|
public static final String COM_ALPHABETA = "ALPHABETA";
|
||||||
|
public static final String COM_MINIMAX = "MINIMAX";
|
||||||
public static final String COM_MONTECARLO = "MONTECARLO";
|
public static final String COM_MONTECARLO = "MONTECARLO";
|
||||||
|
public static final String COM_RANDOM = "RANDOM";
|
||||||
public static final String COM_DEFAULT = COM_ALPHABETA;
|
public static final String COM_DEFAULT = COM_ALPHABETA;
|
||||||
|
|
||||||
private String comPlayer = COM_DEFAULT;
|
private String comPlayer = COM_DEFAULT;
|
||||||
|
|
||||||
public Player getComPlayer() {
|
public Player getComPlayer() {
|
||||||
if (COM_RANDOM.equalsIgnoreCase(comPlayer)) {
|
if (COM_ADAPTIVE.equalsIgnoreCase(comPlayer)) {
|
||||||
|
return new AdaptiveComPlayer();
|
||||||
|
} else if (COM_RANDOM.equalsIgnoreCase(comPlayer)) {
|
||||||
return new RandomComPlayer();
|
return new RandomComPlayer();
|
||||||
} else if (COM_MINIMAX.equalsIgnoreCase(comPlayer)) {
|
} else if (COM_MINIMAX.equalsIgnoreCase(comPlayer)) {
|
||||||
return new MinimaxComPlayer();
|
return new MinimaxComPlayer();
|
||||||
|
|||||||
@@ -0,0 +1,98 @@
|
|||||||
|
package aima.core.probability.mdp;
|
||||||
|
|
||||||
|
import junit.framework.Assert;
|
||||||
|
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import aima.core.environment.cellworld.Cell;
|
||||||
|
import aima.core.environment.cellworld.CellWorld;
|
||||||
|
import aima.core.environment.cellworld.CellWorldAction;
|
||||||
|
import aima.core.environment.cellworld.CellWorldFactory;
|
||||||
|
import aima.core.probability.example.MDPFactory;
|
||||||
|
import aima.core.probability.mdp.MarkovDecisionProcess;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @author Ciaran O'Reilly
|
||||||
|
* @author Ravi Mohan
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class MarkovDecisionProcessTest {
|
||||||
|
public static final double DELTA_THRESHOLD = 1e-3;
|
||||||
|
|
||||||
|
private CellWorld<Double> cw = null;
|
||||||
|
private MarkovDecisionProcess<Cell<Double>, CellWorldAction> mdp = null;
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() {
|
||||||
|
cw = CellWorldFactory.createCellWorldForFig17_1();
|
||||||
|
mdp = MDPFactory.createMDPForFigure17_3(cw);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testActions() {
|
||||||
|
// Ensure all actions can be performed in each cell
|
||||||
|
// except for the terminal states.
|
||||||
|
for (Cell<Double> s : cw.getCells()) {
|
||||||
|
if (4 == s.getX() && (3 == s.getY() || 2 == s.getY())) {
|
||||||
|
Assert.assertEquals(0, mdp.actions(s).size());
|
||||||
|
} else {
|
||||||
|
Assert.assertEquals(5, mdp.actions(s).size());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMDPTransitionModel() {
|
||||||
|
Assert.assertEquals(0.8, mdp.transitionProbability(cw.getCellAt(1, 2),
|
||||||
|
cw.getCellAt(1, 1), CellWorldAction.Up), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(0.1, mdp.transitionProbability(cw.getCellAt(1, 1),
|
||||||
|
cw.getCellAt(1, 1), CellWorldAction.Up), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(0.1, mdp.transitionProbability(cw.getCellAt(2, 1),
|
||||||
|
cw.getCellAt(1, 1), CellWorldAction.Up), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(0.0, mdp.transitionProbability(cw.getCellAt(1, 3),
|
||||||
|
cw.getCellAt(1, 1), CellWorldAction.Up), DELTA_THRESHOLD);
|
||||||
|
|
||||||
|
Assert.assertEquals(0.9, mdp.transitionProbability(cw.getCellAt(1, 1),
|
||||||
|
cw.getCellAt(1, 1), CellWorldAction.Down), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(0.1, mdp.transitionProbability(cw.getCellAt(2, 1),
|
||||||
|
cw.getCellAt(1, 1), CellWorldAction.Down), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(0.0, mdp.transitionProbability(cw.getCellAt(3, 1),
|
||||||
|
cw.getCellAt(1, 1), CellWorldAction.Down), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(0.0, mdp.transitionProbability(cw.getCellAt(1, 2),
|
||||||
|
cw.getCellAt(1, 1), CellWorldAction.Down), DELTA_THRESHOLD);
|
||||||
|
|
||||||
|
Assert.assertEquals(0.9, mdp.transitionProbability(cw.getCellAt(1, 1),
|
||||||
|
cw.getCellAt(1, 1), CellWorldAction.Left), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(0.0, mdp.transitionProbability(cw.getCellAt(2, 1),
|
||||||
|
cw.getCellAt(1, 1), CellWorldAction.Left), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(0.0, mdp.transitionProbability(cw.getCellAt(3, 1),
|
||||||
|
cw.getCellAt(1, 1), CellWorldAction.Left), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(0.1, mdp.transitionProbability(cw.getCellAt(1, 2),
|
||||||
|
cw.getCellAt(1, 1), CellWorldAction.Left), DELTA_THRESHOLD);
|
||||||
|
|
||||||
|
Assert.assertEquals(0.8, mdp.transitionProbability(cw.getCellAt(2, 1),
|
||||||
|
cw.getCellAt(1, 1), CellWorldAction.Right), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(0.1, mdp.transitionProbability(cw.getCellAt(1, 1),
|
||||||
|
cw.getCellAt(1, 1), CellWorldAction.Right), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(0.1, mdp.transitionProbability(cw.getCellAt(1, 2),
|
||||||
|
cw.getCellAt(1, 1), CellWorldAction.Right), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(0.0, mdp.transitionProbability(cw.getCellAt(1, 3),
|
||||||
|
cw.getCellAt(1, 1), CellWorldAction.Right), DELTA_THRESHOLD);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRewardFunction() {
|
||||||
|
// Ensure all actions can be performed in each cell.
|
||||||
|
for (Cell<Double> s : cw.getCells()) {
|
||||||
|
if (4 == s.getX() && 3 == s.getY()) {
|
||||||
|
Assert.assertEquals(1.0, mdp.reward(s), DELTA_THRESHOLD);
|
||||||
|
} else if (4 == s.getX() && 2 == s.getY()) {
|
||||||
|
Assert.assertEquals(-1.0, mdp.reward(s), DELTA_THRESHOLD);
|
||||||
|
} else {
|
||||||
|
Assert.assertEquals(-0.04, mdp.reward(s), DELTA_THRESHOLD);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
80
test/aima/core/probability/mdp/PolicyIterationTest.java
Normal file
80
test/aima/core/probability/mdp/PolicyIterationTest.java
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
package aima.core.probability.mdp;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import aima.core.environment.cellworld.Cell;
|
||||||
|
import aima.core.environment.cellworld.CellWorld;
|
||||||
|
import aima.core.environment.cellworld.CellWorldAction;
|
||||||
|
import aima.core.environment.cellworld.CellWorldFactory;
|
||||||
|
import aima.core.environment.gridworld.GridCell;
|
||||||
|
import aima.core.environment.gridworld.GridWorld;
|
||||||
|
import aima.core.environment.gridworld.GridWorldAction;
|
||||||
|
import aima.core.environment.gridworld.GridWorldFactory;
|
||||||
|
import aima.core.probability.example.MDPFactory;
|
||||||
|
import aima.core.probability.mdp.MarkovDecisionProcess;
|
||||||
|
import aima.core.probability.mdp.impl.ModifiedPolicyEvaluation;
|
||||||
|
import aima.core.probability.mdp.search.PolicyIteration;
|
||||||
|
import aima.core.probability.mdp.search.ValueIteration;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Ravi Mohan
|
||||||
|
* @author Ciaran O'Reilly
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class PolicyIterationTest {
|
||||||
|
public static final double DELTA_THRESHOLD = 1e-3;
|
||||||
|
|
||||||
|
private GridWorld<Double> gw = null;
|
||||||
|
private MarkovDecisionProcess<GridCell<Double>, GridWorldAction> mdp = null;
|
||||||
|
private PolicyIteration<GridCell<Double>, GridWorldAction> pi = null;
|
||||||
|
|
||||||
|
final int maxTiles = 6;
|
||||||
|
final int maxScore = 10;
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() {
|
||||||
|
//take 10 turns to place 6 tiles
|
||||||
|
double defaultPenalty = -0.04;
|
||||||
|
|
||||||
|
gw = GridWorldFactory.createGridWorldForTileGame(maxTiles,maxScore,defaultPenalty);
|
||||||
|
mdp = MDPFactory.createMDPForTileGame(gw, maxTiles, maxScore);
|
||||||
|
|
||||||
|
//gamma = 1.0
|
||||||
|
PolicyEvaluation<GridCell<Double>,GridWorldAction> pe = new ModifiedPolicyEvaluation<GridCell<Double>, GridWorldAction>(100,0.9);
|
||||||
|
pi = new PolicyIteration<GridCell<Double>, GridWorldAction>(pe);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPolicyIterationForTileGame() {
|
||||||
|
Policy<GridCell<Double>, GridWorldAction> policy = pi.policyIteration(mdp);
|
||||||
|
|
||||||
|
for (int j = maxScore; j >= 1; j--) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (int i = 1; i <= maxTiles; i++) {
|
||||||
|
sb.append(policy.action(gw.getCellAt(i, j)));
|
||||||
|
sb.append(" ");
|
||||||
|
}
|
||||||
|
System.out.println(sb.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
//Assert.assertEquals(0.705, U.get(gw.getCellAt(1, 1)), DELTA_THRESHOLD);
|
||||||
|
/*
|
||||||
|
Assert.assertEquals(0.762, U.get(cw1.getCellAt(1, 2)), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(0.812, U.get(cw1.getCellAt(1, 3)), DELTA_THRESHOLD);
|
||||||
|
|
||||||
|
Assert.assertEquals(0.655, U.get(cw1.getCellAt(2, 1)), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(0.868, U.get(cw1.getCellAt(2, 3)), DELTA_THRESHOLD);
|
||||||
|
|
||||||
|
Assert.assertEquals(0.611, U.get(cw1.getCellAt(3, 1)), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(0.660, U.get(cw1.getCellAt(3, 2)), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(0.918, U.get(cw1.getCellAt(3, 3)), DELTA_THRESHOLD);
|
||||||
|
|
||||||
|
Assert.assertEquals(0.388, U.get(cw1.getCellAt(4, 1)), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(-1.0, U.get(cw1.getCellAt(4, 2)), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(1.0, U.get(cw1.getCellAt(4, 3)), DELTA_THRESHOLD);*/
|
||||||
|
}
|
||||||
|
}
|
||||||
64
test/aima/core/probability/mdp/ValueIterationTest.java
Normal file
64
test/aima/core/probability/mdp/ValueIterationTest.java
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
package aima.core.probability.mdp;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import aima.core.environment.cellworld.Cell;
|
||||||
|
import aima.core.environment.cellworld.CellWorld;
|
||||||
|
import aima.core.environment.cellworld.CellWorldAction;
|
||||||
|
import aima.core.environment.cellworld.CellWorldFactory;
|
||||||
|
import aima.core.probability.example.MDPFactory;
|
||||||
|
import aima.core.probability.mdp.MarkovDecisionProcess;
|
||||||
|
import aima.core.probability.mdp.search.ValueIteration;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Ravi Mohan
|
||||||
|
* @author Ciaran O'Reilly
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class ValueIterationTest {
|
||||||
|
public static final double DELTA_THRESHOLD = 1e-3;
|
||||||
|
|
||||||
|
private CellWorld<Double> cw = null;
|
||||||
|
private MarkovDecisionProcess<Cell<Double>, CellWorldAction> mdp = null;
|
||||||
|
private ValueIteration<Cell<Double>, CellWorldAction> vi = null;
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() {
|
||||||
|
cw = CellWorldFactory.createCellWorldForFig17_1();
|
||||||
|
mdp = MDPFactory.createMDPForFigure17_3(cw);
|
||||||
|
vi = new ValueIteration<Cell<Double>, CellWorldAction>(1.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testValueIterationForFig17_3() {
|
||||||
|
Map<Cell<Double>, Double> U = vi.valueIteration(mdp, 0.0001);
|
||||||
|
|
||||||
|
Assert.assertEquals(0.705, U.get(cw.getCellAt(1, 1)), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(0.762, U.get(cw.getCellAt(1, 2)), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(0.812, U.get(cw.getCellAt(1, 3)), DELTA_THRESHOLD);
|
||||||
|
|
||||||
|
Assert.assertEquals(0.655, U.get(cw.getCellAt(2, 1)), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(0.868, U.get(cw.getCellAt(2, 3)), DELTA_THRESHOLD);
|
||||||
|
|
||||||
|
Assert.assertEquals(0.611, U.get(cw.getCellAt(3, 1)), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(0.660, U.get(cw.getCellAt(3, 2)), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(0.918, U.get(cw.getCellAt(3, 3)), DELTA_THRESHOLD);
|
||||||
|
|
||||||
|
Assert.assertEquals(0.388, U.get(cw.getCellAt(4, 1)), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(-1.0, U.get(cw.getCellAt(4, 2)), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(1.0, U.get(cw.getCellAt(4, 3)), DELTA_THRESHOLD);
|
||||||
|
|
||||||
|
for (int j = 3; j >= 1; j--) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (int i = 1; i <= 4; i++) {
|
||||||
|
sb.append(U.get(cw.getCellAt(i, j)));
|
||||||
|
sb.append(" ");
|
||||||
|
}
|
||||||
|
System.out.println(sb.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
76
test/aima/core/probability/mdp/ValueIterationTest2.java
Normal file
76
test/aima/core/probability/mdp/ValueIterationTest2.java
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
package aima.core.probability.mdp;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import aima.core.environment.cellworld.Cell;
|
||||||
|
import aima.core.environment.cellworld.CellWorld;
|
||||||
|
import aima.core.environment.cellworld.CellWorldAction;
|
||||||
|
import aima.core.environment.cellworld.CellWorldFactory;
|
||||||
|
import aima.core.environment.gridworld.GridCell;
|
||||||
|
import aima.core.environment.gridworld.GridWorld;
|
||||||
|
import aima.core.environment.gridworld.GridWorldAction;
|
||||||
|
import aima.core.environment.gridworld.GridWorldFactory;
|
||||||
|
import aima.core.probability.example.MDPFactory;
|
||||||
|
import aima.core.probability.mdp.MarkovDecisionProcess;
|
||||||
|
import aima.core.probability.mdp.search.ValueIteration;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Ravi Mohan
|
||||||
|
* @author Ciaran O'Reilly
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class ValueIterationTest2 {
|
||||||
|
public static final double DELTA_THRESHOLD = 1e-3;
|
||||||
|
|
||||||
|
private GridWorld<Double> gw = null;
|
||||||
|
private MarkovDecisionProcess<GridCell<Double>, GridWorldAction> mdp = null;
|
||||||
|
private ValueIteration<GridCell<Double>, GridWorldAction> vi = null;
|
||||||
|
|
||||||
|
final int maxTiles = 6;
|
||||||
|
final int maxScore = 10;
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() {
|
||||||
|
//take 10 turns to place 6 tiles
|
||||||
|
double defaultPenalty = -0.04;
|
||||||
|
|
||||||
|
gw = GridWorldFactory.createGridWorldForTileGame(maxTiles,maxScore,defaultPenalty);
|
||||||
|
mdp = MDPFactory.createMDPForTileGame(gw, maxTiles, maxScore);
|
||||||
|
|
||||||
|
//gamma = 1.0
|
||||||
|
vi = new ValueIteration<GridCell<Double>, GridWorldAction>(0.9);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testValueIterationForTileGame() {
|
||||||
|
Map<GridCell<Double>, Double> U = vi.valueIteration(mdp, 1.0);
|
||||||
|
|
||||||
|
for (int j = maxScore; j >= 1; j--) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (int i = 1; i <= maxTiles; i++) {
|
||||||
|
sb.append(U.get(gw.getCellAt(i, j)));
|
||||||
|
sb.append(" ");
|
||||||
|
}
|
||||||
|
System.out.println(sb.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
Assert.assertEquals(0.705, U.get(gw.getCellAt(1, 1)), DELTA_THRESHOLD);/*
|
||||||
|
Assert.assertEquals(0.762, U.get(cw1.getCellAt(1, 2)), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(0.812, U.get(cw1.getCellAt(1, 3)), DELTA_THRESHOLD);
|
||||||
|
|
||||||
|
Assert.assertEquals(0.655, U.get(cw1.getCellAt(2, 1)), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(0.868, U.get(cw1.getCellAt(2, 3)), DELTA_THRESHOLD);
|
||||||
|
|
||||||
|
Assert.assertEquals(0.611, U.get(cw1.getCellAt(3, 1)), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(0.660, U.get(cw1.getCellAt(3, 2)), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(0.918, U.get(cw1.getCellAt(3, 3)), DELTA_THRESHOLD);
|
||||||
|
|
||||||
|
Assert.assertEquals(0.388, U.get(cw1.getCellAt(4, 1)), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(-1.0, U.get(cw1.getCellAt(4, 2)), DELTA_THRESHOLD);
|
||||||
|
Assert.assertEquals(1.0, U.get(cw1.getCellAt(4, 3)), DELTA_THRESHOLD);*/
|
||||||
|
}
|
||||||
|
}
|
||||||
26
test/model/mdp/ValueIterationSolverTest.java
Normal file
26
test/model/mdp/ValueIterationSolverTest.java
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
package model.mdp;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
import model.mdp.MDP.MODE;
|
||||||
|
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class ValueIterationSolverTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSolve() {
|
||||||
|
MDPSolver solver = new ValueIterationSolver();
|
||||||
|
|
||||||
|
//solve for a score of 25 in at most 35 turns
|
||||||
|
int maxScore = 6;
|
||||||
|
int maxTurns = 10;
|
||||||
|
|
||||||
|
MDP mdp = new MDP(maxScore,maxTurns,MODE.CEIL);
|
||||||
|
Policy policy = solver.solve(mdp);
|
||||||
|
|
||||||
|
assertTrue(policy.size() >= maxScore);
|
||||||
|
assertTrue(policy.size() <= maxTurns);
|
||||||
|
|
||||||
|
System.out.println("Policy: " + policy);
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user