Implemented agent which chooses to play winning, losing or random moves by solving a simplified MDP model of the game using policy iteration.
Portions of MDP/solver code by Ciaran O'Reilly and Ravi Mohan used under MIT license.
This commit is contained in:
39
src/aima/core/probability/mdp/PolicyEvaluation.java
Normal file
39
src/aima/core/probability/mdp/PolicyEvaluation.java
Normal file
@@ -0,0 +1,39 @@
|
||||
package aima.core.probability.mdp;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import aima.core.agent.Action;
|
||||
|
||||
/**
|
||||
* Artificial Intelligence A Modern Approach (3rd Edition): page 656.<br>
|
||||
* <br>
|
||||
* Given a policy π<sub>i</sub>, calculate
|
||||
* U<sub>i</sub>=U<sup>π<sub>i</sub></sup>, the utility of each state if
|
||||
* π<sub>i</sub> were to be executed.
|
||||
*
|
||||
* @param <S>
|
||||
* the state type.
|
||||
* @param <A>
|
||||
* the action type.
|
||||
*
|
||||
* @author Ciaran O'Reilly
|
||||
* @author Ravi Mohan
|
||||
*/
|
||||
public interface PolicyEvaluation<S, A extends Action> {
|
||||
/**
|
||||
* <b>Policy evaluation:</b> given a policy π<sub>i</sub>, calculate
|
||||
* U<sub>i</sub>=U<sup>π<sub>i</sub></sup>, the utility of each state if
|
||||
* π<sub>i</sub> were to be executed.
|
||||
*
|
||||
* @param pi_i
|
||||
* a policy vector indexed by state
|
||||
* @param U
|
||||
* a vector of utilities for states in S
|
||||
* @param mdp
|
||||
* an MDP with states S, actions A(s), transition model P(s'|s,a)
|
||||
* @return U<sub>i</sub>=U<sup>π<sub>i</sub></sup>, the utility of each
|
||||
* state if π<sub>i</sub> were to be executed.
|
||||
*/
|
||||
Map<S, Double> evaluate(Map<S, A> pi_i, Map<S, Double> U,
|
||||
MarkovDecisionProcess<S, A> mdp);
|
||||
}
|
||||
Reference in New Issue
Block a user