package aima.core.probability.mdp;

import junit.framework.Assert;

import org.junit.Before;
import org.junit.Test;

import aima.core.environment.gridworld.GridCell;
import aima.core.environment.gridworld.GridWorld;
import aima.core.environment.gridworld.GridWorldAction;
import aima.core.environment.gridworld.GridWorldFactory;
import aima.core.probability.example.MDPFactory;
import aima.core.probability.mdp.MarkovDecisionProcess;

/**
 * Based on MarkovDecisionProcessTest by Ciaran O'Reilly and Ravi Mohan.  Used under MIT license.
 */
public class MarkovDecisionProcessTest {
	public static final double DELTA_THRESHOLD = 1e-3;

	private double nonTerminalReward = -0.04;
	private GridWorld<Double> gw = null;
	private MarkovDecisionProcess<GridCell<Double>, GridWorldAction> mdp = null;

	@Before
	public void setUp() {
		int maxTiles = 6;
		int maxScore = 10;
		
		gw = GridWorldFactory.createGridWorldForTileGame(maxTiles, maxScore, nonTerminalReward);
		mdp = MDPFactory.createMDPForTileGame(gw, maxTiles, maxScore);
	}

	@Test
	public void testActions() {
		// Ensure all actions can be performed in each cell
		// except for the terminal states.
		for (GridCell<Double> s : gw.getCells()) {
			if (6 == s.getX() && 10 == s.getY()) {
				Assert.assertEquals(0, mdp.actions(s).size());
			} else {
				Assert.assertEquals(3, mdp.actions(s).size());
			}
		}
	}

	@Test
	public void testMDPTransitionModel() {
		Assert.assertEquals(0.66, mdp.transitionProbability(gw.getCellAt(2, 2),
				gw.getCellAt(1, 1), GridWorldAction.AddTile), DELTA_THRESHOLD);
	}

	@Test
	public void testRewardFunction() {
		// Ensure all actions can be performed in each cell.
		for (GridCell<Double> s : gw.getCells()) {
			if (6 == s.getX() && 10 == s.getY()) {
				Assert.assertEquals(1.0, mdp.reward(s), DELTA_THRESHOLD);
			} else {
				Assert.assertEquals(-0.04, mdp.reward(s), DELTA_THRESHOLD);
			}
		}
	}
}