Implemented agent which chooses to play winning, losing or random moves by solving a simplified MDP model of the game using policy iteration.

Portions of MDP/solver code by Ciaran O'Reilly and Ravi Mohan used under MIT license.
2012-04-30 13:35:40 -04:00
parent c06f7ab38e
commit d0ee1e647b
35 changed files with 2500 additions and 3 deletions
--- a/src/aima/core/probability/mdp/PolicyEvaluation.java
+++ b/src/aima/core/probability/mdp/PolicyEvaluation.java
@@ -0,0 +1,39 @@
+package aima.core.probability.mdp;
+
+import java.util.Map;
+
+import aima.core.agent.Action;
+
+/**
+ * Artificial Intelligence A Modern Approach (3rd Edition): page 656.<br>
+ * <br>
+ * Given a policy &pi;<sub>i</sub>, calculate
+ * U<sub>i</sub>=U<sup>&pi;<sub>i</sub></sup>, the utility of each state if
+ * &pi;<sub>i</sub> were to be executed.
+ * 
+ * @param <S>
+ *            the state type.
+ * @param <A>
+ *            the action type.
+ * 
+ * @author Ciaran O'Reilly
+ * @author Ravi Mohan
+ */
+public interface PolicyEvaluation<S, A extends Action> {
+	/**
+	 * <b>Policy evaluation:</b> given a policy &pi;<sub>i</sub>, calculate
+	 * U<sub>i</sub>=U<sup>&pi;<sub>i</sub></sup>, the utility of each state if
+	 * &pi;<sub>i</sub> were to be executed.
+	 * 
+	 * @param pi_i
+	 *            a policy vector indexed by state
+	 * @param U
+	 *            a vector of utilities for states in S
+	 * @param mdp
+	 *            an MDP with states S, actions A(s), transition model P(s'|s,a)
+	 * @return U<sub>i</sub>=U<sup>&pi;<sub>i</sub></sup>, the utility of each
+	 *         state if &pi;<sub>i</sub> were to be executed.
+	 */
+	Map<S, Double> evaluate(Map<S, A> pi_i, Map<S, Double> U,
+			MarkovDecisionProcess<S, A> mdp);
+}