Added Daniel's Bayes Net code. Converted example code to unit tests. Minor code clean-up.
This commit is contained in:
45
src/dkohl/bayes/bayesnet/BayesNet.java
Normal file
45
src/dkohl/bayes/bayesnet/BayesNet.java
Normal file
@@ -0,0 +1,45 @@
|
||||
package dkohl.bayes.bayesnet;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
|
||||
import dkohl.bayes.probability.Variable;
|
||||
import dkohl.bayes.probability.distribution.ProbabilityDistribution;
|
||||
|
||||
/**
|
||||
* Represents a Bayes net as a graph with a probability table associated with
|
||||
* each node.
|
||||
*
|
||||
* @author Daniel Kohlsdorf
|
||||
*/
|
||||
public class BayesNet extends NamedGraph {
|
||||
|
||||
/**
|
||||
* The probability tables for each node
|
||||
*/
|
||||
private HashMap<String, ProbabilityDistribution> nodes;
|
||||
private LinkedList<Variable> variables;
|
||||
|
||||
public BayesNet(String names[]) {
|
||||
super(names);
|
||||
this.nodes = new HashMap<String, ProbabilityDistribution>();
|
||||
this.variables = new LinkedList<Variable>();
|
||||
}
|
||||
|
||||
public void setDistribution(Variable node, ProbabilityDistribution dist) {
|
||||
nodes.put(node.getName(), dist);
|
||||
variables.add(node);
|
||||
}
|
||||
|
||||
public void updateDistribution(Variable node, ProbabilityDistribution dist) {
|
||||
nodes.put(node.getName(), dist);
|
||||
}
|
||||
|
||||
public HashMap<String, ProbabilityDistribution> getNodes() {
|
||||
return nodes;
|
||||
}
|
||||
|
||||
public LinkedList<Variable> getVariables() {
|
||||
return variables;
|
||||
}
|
||||
}
|
||||
98
src/dkohl/bayes/bayesnet/NamedGraph.java
Normal file
98
src/dkohl/bayes/bayesnet/NamedGraph.java
Normal file
@@ -0,0 +1,98 @@
|
||||
package dkohl.bayes.bayesnet;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
|
||||
/**
|
||||
* A Graph: G(V, E) implemented as a |V| x |V| matrix.
|
||||
*
|
||||
* Just one node type !!!!
|
||||
*
|
||||
* @author Daniel Kohlsdorf
|
||||
*/
|
||||
public class NamedGraph {
|
||||
|
||||
/**
|
||||
* net[i][j]: variable i is connected to j.
|
||||
*/
|
||||
private boolean net[][];
|
||||
|
||||
/**
|
||||
* Mapping variable names to positions in the graph's matrix.
|
||||
*/
|
||||
private HashMap<String, Integer> variable2pos;
|
||||
|
||||
/**
|
||||
* Mapping positions in the graph to variable names.
|
||||
*/
|
||||
private HashMap<Integer, String> pos2variable;
|
||||
|
||||
/**
|
||||
* Initializes the graph of size: |VariableNames| x |VariableNames|
|
||||
*
|
||||
* @param variableNames
|
||||
* The names of the variables
|
||||
*/
|
||||
public NamedGraph(String variableNames[]) {
|
||||
variable2pos = new HashMap<String, Integer>();
|
||||
pos2variable = new HashMap<Integer, String>();
|
||||
int num_nodes = variableNames.length;
|
||||
|
||||
net = new boolean[num_nodes][num_nodes];
|
||||
|
||||
for (int i = 0; i < num_nodes; i++) {
|
||||
variable2pos.put(variableNames[i], i);
|
||||
pos2variable.put(i, variableNames[i]);
|
||||
|
||||
for (int j = 0; j < num_nodes; j++) {
|
||||
net[i][j] = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Connects two existing vertices in the graph.
|
||||
*
|
||||
* @param x
|
||||
* the variable to connect
|
||||
* @param y
|
||||
* the parent (or other node for undirected graphs)
|
||||
*/
|
||||
public void connect(String x, String y) {
|
||||
Preconditions.checkArgument(variable2pos.containsKey(x),
|
||||
"Variable not known: " + x);
|
||||
Preconditions.checkArgument(variable2pos.containsKey(y),
|
||||
"Variable not known: " + y);
|
||||
|
||||
int variable_index = variable2pos.get(x);
|
||||
int bias_index = variable2pos.get(y);
|
||||
|
||||
net[bias_index][variable_index] = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the names of the variable's parents
|
||||
*
|
||||
* @param variable
|
||||
* the target variable
|
||||
* @return list of variable names
|
||||
*/
|
||||
public LinkedList<String> getParents(String variable) {
|
||||
Preconditions.checkArgument(variable2pos.containsKey(variable),
|
||||
"Variable not known: " + variable);
|
||||
|
||||
LinkedList<String> parents = new LinkedList<String>();
|
||||
int variable_pos = variable2pos.get(variable);
|
||||
|
||||
for (int i = 0; i < net.length; i++) {
|
||||
if (net[i][variable_pos]) {
|
||||
String variableName = pos2variable.get(i);
|
||||
parents.add(variableName);
|
||||
}
|
||||
}
|
||||
return parents;
|
||||
}
|
||||
|
||||
}
|
||||
119
src/dkohl/bayes/estimation/MaximumLikelihoodEstimation.java
Normal file
119
src/dkohl/bayes/estimation/MaximumLikelihoodEstimation.java
Normal file
@@ -0,0 +1,119 @@
|
||||
package dkohl.bayes.estimation;
|
||||
|
||||
import java.util.LinkedList;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
|
||||
import dkohl.bayes.bayesnet.BayesNet;
|
||||
import dkohl.bayes.probability.Assignment;
|
||||
import dkohl.bayes.probability.Probability;
|
||||
import dkohl.bayes.probability.Variable;
|
||||
import dkohl.bayes.probability.distribution.ContinousDistribution;
|
||||
import dkohl.bayes.probability.distribution.ProbabilityDistribution;
|
||||
import dkohl.bayes.probability.distribution.ProbabilityTable;
|
||||
import dkohl.bayes.probability.distribution.ProbabilityTree;
|
||||
import dkohl.bayes.statistic.DataSet;
|
||||
|
||||
/**
|
||||
* Works for fully observable bayes nets
|
||||
*
|
||||
* @author Daniel Kohlsdorf
|
||||
*/
|
||||
public class MaximumLikelihoodEstimation {
|
||||
|
||||
/**
|
||||
* Enumerates all possible assignments for a set of variables using depth
|
||||
* first + back tracking. Estimates the probability for each assignment
|
||||
* given data and inserts the values in a probability function.
|
||||
*
|
||||
* @param target
|
||||
* The target variable
|
||||
* @param assignments
|
||||
* A list built by traversing the tree. Adding an assignment on
|
||||
* each layer
|
||||
* @param variables
|
||||
* All variables to assign
|
||||
* @param current
|
||||
* The current variable
|
||||
* @param data
|
||||
* the data for estimation
|
||||
* @param table
|
||||
* the probability table
|
||||
*/
|
||||
private static void enumerate(Assignment target,
|
||||
LinkedList<Assignment> assignments, LinkedList<Variable> variables,
|
||||
int current, DataSet data, ProbabilityDistribution dist) {
|
||||
|
||||
if (assignments.size() == variables.size()) {
|
||||
if (dist instanceof ProbabilityTable) {
|
||||
double likelihood = data.prob(target, assignments)
|
||||
.getProbability();
|
||||
assignments.add(target);
|
||||
ProbabilityTable table = (ProbabilityTable) dist;
|
||||
table.setProbabilityForAssignment(assignments, new Probability(
|
||||
likelihood));
|
||||
}
|
||||
if (dist instanceof ProbabilityTree) {
|
||||
double likelihood = data.prob(target, assignments)
|
||||
.getProbability();
|
||||
assignments.add(target);
|
||||
|
||||
ProbabilityTree tree = (ProbabilityTree) dist;
|
||||
tree.setProbabilityForAssignment(assignments, new Probability(
|
||||
likelihood));
|
||||
}
|
||||
if (dist instanceof ContinousDistribution) {
|
||||
ContinousDistribution pdf = (ContinousDistribution) dist;
|
||||
for (LinkedList<Assignment> assignment : data
|
||||
.getAssignmentMatchesForQuery(assignments)) {
|
||||
pdf.pushAssignment(assignment);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
Variable variable = variables.get(assignments.size());
|
||||
for (String value : variable.getDomain()) {
|
||||
LinkedList<Assignment> new_assignments = new LinkedList<Assignment>(
|
||||
assignments);
|
||||
new_assignments.add(new Assignment(variable, value));
|
||||
enumerate(target, new_assignments, variables, current + 1, data,
|
||||
dist);
|
||||
}
|
||||
}
|
||||
|
||||
public static void estimate(DataSet data, BayesNet net, String targetName) {
|
||||
Variable target = null;
|
||||
// Search parent variables
|
||||
LinkedList<String> parentNames = net.getParents(targetName);
|
||||
LinkedList<Variable> allVariables = net.getVariables();
|
||||
LinkedList<Variable> parentVariables = new LinkedList<Variable>();
|
||||
for (Variable variable : allVariables) {
|
||||
if (parentNames.contains(variable.getName())) {
|
||||
parentVariables.add(variable);
|
||||
}
|
||||
if (variable.getName().equals(targetName)) {
|
||||
target = variable;
|
||||
}
|
||||
}
|
||||
Preconditions.checkState(target != null, "MLE: variable " + targetName
|
||||
+ "Not in net");
|
||||
|
||||
/**
|
||||
* For all assignments of this variable list all assignments of it's
|
||||
* parents and estimate the probability for each assignment using data.
|
||||
*/
|
||||
ProbabilityDistribution dist = net.getNodes().get(targetName);
|
||||
if (dist instanceof ContinousDistribution) {
|
||||
enumerate(null, new LinkedList<Assignment>(), parentVariables, 0,
|
||||
data, dist);
|
||||
} else {
|
||||
for (String value : target.getDomain()) {
|
||||
enumerate(new Assignment(target, value),
|
||||
new LinkedList<Assignment>(), parentVariables, 0, data,
|
||||
dist);
|
||||
}
|
||||
}
|
||||
net.updateDistribution(target, dist);
|
||||
}
|
||||
|
||||
}
|
||||
118
src/dkohl/bayes/inference/EnumerateAll.java
Normal file
118
src/dkohl/bayes/inference/EnumerateAll.java
Normal file
@@ -0,0 +1,118 @@
|
||||
package dkohl.bayes.inference;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
import dkohl.bayes.bayesnet.BayesNet;
|
||||
import dkohl.bayes.probability.Assignment;
|
||||
import dkohl.bayes.probability.ProbabilityAssignment;
|
||||
import dkohl.bayes.probability.Variable;
|
||||
|
||||
/**
|
||||
* Enumeration Algorithm: Exact inference in Baysian Networks.
|
||||
*
|
||||
* @author Daniel Kohlsdorf
|
||||
*
|
||||
*/
|
||||
public class EnumerateAll {
|
||||
|
||||
/**
|
||||
* The probability distribution of a variable.
|
||||
*
|
||||
* @param query
|
||||
* the variable
|
||||
* @param net
|
||||
* the bayes net defining the independence
|
||||
* @param assignments
|
||||
* a set of assignments in this net.
|
||||
* @return
|
||||
*/
|
||||
public static LinkedList<ProbabilityAssignment> enumerateAsk(
|
||||
Variable query, BayesNet net, LinkedList<Assignment> assignments) {
|
||||
LinkedList<Variable> variables = net.getVariables();
|
||||
LinkedList<ProbabilityAssignment> result = new LinkedList<ProbabilityAssignment>();
|
||||
|
||||
// Evaluate probability for each possible
|
||||
// value of the variable by enumeration
|
||||
for (String value : query.getDomain()) {
|
||||
LinkedList<Assignment> temp = new LinkedList<Assignment>();
|
||||
temp.addAll(assignments);
|
||||
temp.add(new Assignment(query, value));
|
||||
double prob = enumerateAll(net, variables, temp);
|
||||
result.add(new ProbabilityAssignment(query, value, prob));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decides if a variable is hidden or not. A variable is hidden if it is not
|
||||
* assigned.
|
||||
*
|
||||
* @param variable
|
||||
* the variable in question.
|
||||
* @param assignments
|
||||
* all the assignments
|
||||
* @return true if not assigned
|
||||
*/
|
||||
private static boolean hidden(Variable variable,
|
||||
LinkedList<Assignment> assignments) {
|
||||
for (Assignment assignment : assignments) {
|
||||
if (assignment.getVariable().getName().equals(variable.getName())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively evaluate probability of an assignment.
|
||||
*
|
||||
* Can be seen as depth first search + backtracking (branching on hidden
|
||||
* nodes)
|
||||
*
|
||||
* @param net
|
||||
* a bayes net defining independence
|
||||
* @param variables
|
||||
* the variables left to evaluate
|
||||
* @param assignments
|
||||
* all assignments
|
||||
* @return
|
||||
*/
|
||||
public static double enumerateAll(BayesNet net, List<Variable> variables,
|
||||
LinkedList<Assignment> assignments) {
|
||||
// if no variables left to evaluate,
|
||||
// leaf node reached.
|
||||
if (variables.isEmpty()) {
|
||||
return 1;
|
||||
}
|
||||
// evaluate variable, recurse on rest
|
||||
// PROLOG: [Variable|Rest].
|
||||
Variable variable = variables.get(0);
|
||||
List<Variable> rest = variables.subList(1, variables.size());
|
||||
// if current variable is hidden
|
||||
if (hidden(variable, assignments)) {
|
||||
// sum out all possible values for that variable
|
||||
double sumOut = 0;
|
||||
for (String value : variable.getDomain()) {
|
||||
// by temporarily adding each value to the asigned variable set
|
||||
LinkedList<Assignment> temp = new LinkedList<Assignment>();
|
||||
temp.addAll(assignments);
|
||||
temp.add(new Assignment(variable, value));
|
||||
|
||||
// then evaluate this variable
|
||||
double val = net.getNodes().get(variable.getName()).eval(temp)
|
||||
.getProbability();
|
||||
// and all that depend on it
|
||||
val *= enumerateAll(net, rest, temp);
|
||||
|
||||
sumOut += val;
|
||||
}
|
||||
return sumOut;
|
||||
}
|
||||
// if not just evaluate variable and continue.
|
||||
return net.getNodes().get(variable.getName()).eval(assignments)
|
||||
.getProbability()
|
||||
* enumerateAll(net, rest, assignments);
|
||||
}
|
||||
|
||||
}
|
||||
66
src/dkohl/bayes/probability/Assignment.java
Normal file
66
src/dkohl/bayes/probability/Assignment.java
Normal file
@@ -0,0 +1,66 @@
|
||||
package dkohl.bayes.probability;
|
||||
|
||||
/**
|
||||
* An assigned variable
|
||||
*
|
||||
* @author Daniel Kohlsdorf
|
||||
*/
|
||||
public class Assignment {
|
||||
|
||||
public static final String NOT_ASSIGNED = "NOT_ASSIGNED";
|
||||
|
||||
/**
|
||||
* The variable specifying the domain.
|
||||
*/
|
||||
private Variable variable;
|
||||
|
||||
/**
|
||||
* The assigned value
|
||||
*/
|
||||
private String value;
|
||||
|
||||
public Assignment(Variable variable, String value) {
|
||||
super();
|
||||
this.variable = variable;
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this assignment a valid one given my domain?
|
||||
*
|
||||
* @return true if the assignment is valid
|
||||
*/
|
||||
public boolean valid() {
|
||||
for (String outcome : variable.getDomain()) {
|
||||
if (outcome.equals(value)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public Variable getVariable() {
|
||||
return variable;
|
||||
}
|
||||
|
||||
public void setVariable(Variable variable) {
|
||||
this.variable = variable;
|
||||
}
|
||||
|
||||
public String getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
public void setValue(String value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(variable);
|
||||
sb.append(" = ");
|
||||
sb.append(value);
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
49
src/dkohl/bayes/probability/Probability.java
Normal file
49
src/dkohl/bayes/probability/Probability.java
Normal file
@@ -0,0 +1,49 @@
|
||||
package dkohl.bayes.probability;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
|
||||
/**
|
||||
* A probabilistic event
|
||||
*
|
||||
* @author Daniel Kohlsdorf
|
||||
*/
|
||||
public class Probability {
|
||||
|
||||
/**
|
||||
* The probability for this event
|
||||
*/
|
||||
private double probability;
|
||||
|
||||
public Probability(double probability) {
|
||||
setProbability(probability);
|
||||
}
|
||||
|
||||
public double getProbability() {
|
||||
return probability;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets this probability to a value p that holds:
|
||||
*
|
||||
* 0 >= p <= 1, p in R
|
||||
*
|
||||
* @param probability
|
||||
*/
|
||||
public void setProbability(double probability) {
|
||||
Preconditions.checkArgument(probability <= 1,
|
||||
"Probability Error: P >= 1: " + probability);
|
||||
Preconditions.checkArgument(probability >= 0,
|
||||
"Probability Error: P <= 0: " + probability);
|
||||
this.probability = probability;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the rest of the probability
|
||||
*
|
||||
* @return 1 - p
|
||||
*/
|
||||
public Probability rest() {
|
||||
return new Probability(1 - probability);
|
||||
}
|
||||
|
||||
}
|
||||
26
src/dkohl/bayes/probability/ProbabilityAssignment.java
Normal file
26
src/dkohl/bayes/probability/ProbabilityAssignment.java
Normal file
@@ -0,0 +1,26 @@
|
||||
package dkohl.bayes.probability;
|
||||
|
||||
public class ProbabilityAssignment extends Assignment {
|
||||
|
||||
private double probability;
|
||||
|
||||
public ProbabilityAssignment(Variable variable, String value,
|
||||
double probability) {
|
||||
super(variable, value);
|
||||
this.probability = probability;
|
||||
}
|
||||
|
||||
public double getProbability() {
|
||||
return probability;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("(");
|
||||
sb.append(super.toString());
|
||||
sb.append(") ");
|
||||
sb.append(probability);
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
50
src/dkohl/bayes/probability/Variable.java
Normal file
50
src/dkohl/bayes/probability/Variable.java
Normal file
@@ -0,0 +1,50 @@
|
||||
package dkohl.bayes.probability;
|
||||
|
||||
/**
|
||||
* A named variable with a domain of possible values it can take
|
||||
*
|
||||
* @author Daniel Kohlsdorf
|
||||
*/
|
||||
public class Variable {
|
||||
|
||||
/**
|
||||
* The name of the variable
|
||||
*/
|
||||
private String name;
|
||||
|
||||
/**
|
||||
* The domain of the variable
|
||||
*/
|
||||
private String domain[];
|
||||
|
||||
public Variable(String name, String[] domain) {
|
||||
super();
|
||||
this.name = name;
|
||||
this.domain = domain;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String[] getDomain() {
|
||||
return domain;
|
||||
}
|
||||
|
||||
public void setDomain(String[] domain) {
|
||||
this.domain = domain;
|
||||
}
|
||||
|
||||
public int domainSize() {
|
||||
return domain.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return name;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,104 @@
|
||||
package dkohl.bayes.probability.distribution;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
|
||||
import dkohl.bayes.probability.Assignment;
|
||||
import dkohl.bayes.probability.Probability;
|
||||
|
||||
public class ContinousDistribution implements ProbabilityDistribution {
|
||||
|
||||
/**
|
||||
* The involved variables mapping to row
|
||||
*/
|
||||
private HashMap<String, Integer> variable2row;
|
||||
|
||||
private HashMap<String, Gaussian> distribution;
|
||||
|
||||
private String name;
|
||||
|
||||
public ContinousDistribution(String names[], int self) {
|
||||
distribution = new HashMap<String, Gaussian>();
|
||||
this.variable2row = new HashMap<String, Integer>();
|
||||
int count = 0;
|
||||
for (int i = 0; i < names.length; i++) {
|
||||
if (i != self) {
|
||||
variable2row.put(names[i], count);
|
||||
count++;
|
||||
} else {
|
||||
name = names[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a table entry key for an assignment
|
||||
*
|
||||
* @param assignment
|
||||
* the assignment
|
||||
*
|
||||
* @return the key
|
||||
*/
|
||||
public String generateKey(LinkedList<Assignment> assignment) {
|
||||
String[] values = new String[variable2row.size()];
|
||||
for (Assignment col : assignment) {
|
||||
if (variable2row.containsKey(col.getVariable().getName())) {
|
||||
int row = variable2row.get(col.getVariable().getName());
|
||||
values[row] = col.getValue();
|
||||
}
|
||||
}
|
||||
String key = "";
|
||||
for (String entry : values) {
|
||||
key += entry + ";";
|
||||
}
|
||||
return key;
|
||||
}
|
||||
|
||||
public Assignment value(LinkedList<Assignment> assignment) {
|
||||
for (Assignment a : assignment) {
|
||||
if (a.getVariable().getName().equals(name)) {
|
||||
return a;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Probability eval(LinkedList<Assignment> assignment) {
|
||||
String assignment_key = generateKey(assignment);
|
||||
if (distribution.get(assignment_key) == null) {
|
||||
return new Probability(0);
|
||||
}
|
||||
return new Probability(distribution.get(assignment_key).eval(
|
||||
value(assignment)));
|
||||
}
|
||||
|
||||
public void pushAssignment(LinkedList<Assignment> assignment) {
|
||||
String key = generateKey(assignment);
|
||||
Gaussian gaussian = new Gaussian();
|
||||
if (distribution.containsKey(key)) {
|
||||
gaussian = distribution.get(key);
|
||||
}
|
||||
gaussian.push(value(assignment));
|
||||
distribution.put(key, gaussian);
|
||||
}
|
||||
|
||||
public void estimate() {
|
||||
for (String key : distribution.keySet()) {
|
||||
distribution.get(key).estimate();
|
||||
}
|
||||
}
|
||||
|
||||
public String[] getNames() {
|
||||
String[] names = new String[variable2row.size()];
|
||||
for (String name : variable2row.keySet()) {
|
||||
names[variable2row.get(name)] = name;
|
||||
}
|
||||
return names;
|
||||
}
|
||||
|
||||
public HashMap<String, Gaussian> getAssignments() {
|
||||
return distribution;
|
||||
}
|
||||
|
||||
}
|
||||
49
src/dkohl/bayes/probability/distribution/Gaussian.java
Normal file
49
src/dkohl/bayes/probability/distribution/Gaussian.java
Normal file
@@ -0,0 +1,49 @@
|
||||
package dkohl.bayes.probability.distribution;
|
||||
|
||||
import java.util.Vector;
|
||||
|
||||
import dkohl.bayes.probability.Assignment;
|
||||
|
||||
public class Gaussian {
|
||||
|
||||
public static final double TWO_PI = 2 * Math.PI;
|
||||
|
||||
private double mean;
|
||||
private double var;
|
||||
private Vector<Double> samples;
|
||||
|
||||
public Gaussian() {
|
||||
samples = new Vector<Double>();
|
||||
}
|
||||
|
||||
public void push(Assignment assignment) {
|
||||
double sample = Double.valueOf(assignment.getValue());
|
||||
samples.add(sample);
|
||||
}
|
||||
|
||||
public void estimate() {
|
||||
mean = 0;
|
||||
for(Double sample : samples) {
|
||||
mean += sample;
|
||||
}
|
||||
mean /= samples.size();
|
||||
|
||||
var = 0;
|
||||
for(Double sample : samples) {
|
||||
var += Math.pow(sample - mean, 2);
|
||||
}
|
||||
var /= samples.size();
|
||||
}
|
||||
|
||||
public double eval(Assignment assignment) {
|
||||
double sample = Double.valueOf(assignment.getValue());
|
||||
double fac = 1 / (Math.sqrt(TWO_PI * var));
|
||||
return fac * Math.exp(-0.5 * (Math.pow(sample - mean, 2) / var));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "< " + mean + ", " + Math.sqrt(var) + " >";
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
package dkohl.bayes.probability.distribution;
|
||||
|
||||
import java.util.LinkedList;
|
||||
|
||||
import dkohl.bayes.probability.Assignment;
|
||||
import dkohl.bayes.probability.Probability;
|
||||
|
||||
|
||||
/**
|
||||
* A probability distribution.
|
||||
*
|
||||
* @author Daniel Kohlsdorf
|
||||
*/
|
||||
public interface ProbabilityDistribution {
|
||||
|
||||
/**
|
||||
* Returns probability given an assignments.
|
||||
*
|
||||
* @param assignment A set of assigned variables
|
||||
* @return probability of that assignment
|
||||
*/
|
||||
public Probability eval(LinkedList<Assignment> assignment);
|
||||
|
||||
}
|
||||
112
src/dkohl/bayes/probability/distribution/ProbabilityTable.java
Normal file
112
src/dkohl/bayes/probability/distribution/ProbabilityTable.java
Normal file
@@ -0,0 +1,112 @@
|
||||
package dkohl.bayes.probability.distribution;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
|
||||
import dkohl.bayes.probability.Assignment;
|
||||
import dkohl.bayes.probability.Probability;
|
||||
|
||||
/**
|
||||
* A probability table defined as a table of all combinations of all possible
|
||||
* values of the varibales involved.
|
||||
*
|
||||
* @author Daniel Kohlsdorf
|
||||
*/
|
||||
public class ProbabilityTable implements ProbabilityDistribution {
|
||||
|
||||
/**
|
||||
* The involved variables mapping to row
|
||||
*/
|
||||
private HashMap<String, Integer> variable2row;
|
||||
|
||||
/**
|
||||
* An P(assignment key) assignment_key var1_value....varN_value : String
|
||||
*/
|
||||
private HashMap<String, Probability> assignments;
|
||||
|
||||
public ProbabilityTable(String names[]) {
|
||||
assignments = new HashMap<String, Probability>();
|
||||
this.variable2row = new HashMap<String, Integer>();
|
||||
for (int i = 0; i < names.length; i++) {
|
||||
variable2row.put(names[i], i);
|
||||
}
|
||||
}
|
||||
|
||||
public ProbabilityTable(LinkedList<String> names) {
|
||||
assignments = new HashMap<String, Probability>();
|
||||
this.variable2row = new HashMap<String, Integer>();
|
||||
for (int i = 0; i < names.size(); i++) {
|
||||
variable2row.put(names.get(i), i);
|
||||
System.out.println(names.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the probability for a table entry key. You can generate one from
|
||||
* your assignments using generateKey, or just use this same method with
|
||||
* your assignments.
|
||||
*
|
||||
* @param key
|
||||
* the entry key
|
||||
* @param probability
|
||||
* the associated probability
|
||||
*/
|
||||
public void setProbabilityForAssignment(String key, Probability probability) {
|
||||
assignments.put(key, probability);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the probability for the given assignment
|
||||
*
|
||||
* @param assignment
|
||||
* the assignment
|
||||
* @param probability
|
||||
* the associated probability
|
||||
*/
|
||||
public void setProbabilityForAssignment(LinkedList<Assignment> assignment,
|
||||
Probability probability) {
|
||||
String key = generateKey(assignment);
|
||||
assignments.put(key, probability);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Probability eval(LinkedList<Assignment> assignment) {
|
||||
String key = generateKey(assignment);
|
||||
return assignments.get(key);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a table entry key for an assignment
|
||||
*
|
||||
* @param assignment
|
||||
* the assignment
|
||||
*
|
||||
* @return the key
|
||||
*/
|
||||
public String generateKey(LinkedList<Assignment> assignment) {
|
||||
String[] values = new String[variable2row.size()];
|
||||
for (Assignment col : assignment) {
|
||||
if (variable2row.containsKey(col.getVariable().getName())) {
|
||||
int row = variable2row.get(col.getVariable().getName());
|
||||
values[row] = col.getValue();
|
||||
}
|
||||
}
|
||||
String key = "";
|
||||
for (String entry : values) {
|
||||
key += entry + ";";
|
||||
}
|
||||
return key;
|
||||
}
|
||||
|
||||
public HashMap<String, Probability> getAssignments() {
|
||||
return assignments;
|
||||
}
|
||||
|
||||
public String[] getNames() {
|
||||
String[] names = new String[variable2row.size()];
|
||||
for (String name : variable2row.keySet()) {
|
||||
names[variable2row.get(name)] = name;
|
||||
}
|
||||
return names;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,76 @@
|
||||
package dkohl.bayes.probability.distribution;
|
||||
|
||||
import java.util.LinkedList;
|
||||
|
||||
import dkohl.bayes.probability.Assignment;
|
||||
import dkohl.bayes.probability.Probability;
|
||||
import dkohl.bayes.probability.distribution.tree.DecisionNode;
|
||||
import dkohl.bayes.probability.distribution.tree.ProbabilityLeaf;
|
||||
|
||||
/**
|
||||
* Probability distribution in descision tree representation
|
||||
*
|
||||
* DANGER: No node ordering during creation yet.
|
||||
*
|
||||
* @author Daniel Kohlsdorf
|
||||
*/
|
||||
public class ProbabilityTree implements ProbabilityDistribution {
|
||||
|
||||
/**
|
||||
* The trees root node
|
||||
*/
|
||||
private DecisionNode root = null;
|
||||
|
||||
/**
|
||||
* Initialize the tree
|
||||
* by creating one path, along an assignment of
|
||||
* variables and add a probability node at the end.
|
||||
*
|
||||
* @param assignment
|
||||
* @param probability
|
||||
*/
|
||||
private void initTree(LinkedList<Assignment> assignment, Probability probability) {
|
||||
root = new DecisionNode(assignment.get(0).getVariable().getName());
|
||||
|
||||
DecisionNode parent = root;
|
||||
for(int i = 1; i < assignment.size(); i++) {
|
||||
// add the child to the parent with the key of the parents assignment
|
||||
DecisionNode child = new DecisionNode(assignment.get(i).getVariable().getName());
|
||||
parent.put(assignment.get(i - 1).getValue(), child);
|
||||
parent = child;
|
||||
}
|
||||
parent.put(assignment.getLast().getValue(), new ProbabilityLeaf(probability));
|
||||
}
|
||||
|
||||
/**
|
||||
* Follow existing paths until
|
||||
* successors do not contain the current assignment,
|
||||
* then start inserting.
|
||||
*
|
||||
* @param assignment
|
||||
* @param probability
|
||||
*/
|
||||
public void setProbabilityForAssignment(LinkedList<Assignment> assignment, Probability probability) {
|
||||
if(root == null) {
|
||||
initTree(assignment, probability);
|
||||
} else {
|
||||
DecisionNode parent = root;
|
||||
for(int i = 1; i < assignment.size(); i++) {
|
||||
if(parent.getSuccessors().containsKey(assignment.get(i - 1).getValue())) {
|
||||
parent = (DecisionNode) parent.getSuccessors().get(assignment.get(i - 1).getValue());
|
||||
} else {
|
||||
DecisionNode child = new DecisionNode(assignment.get(i).getVariable().getName());
|
||||
parent.put(assignment.get(i - 1).getValue(), child);
|
||||
parent = child;
|
||||
}
|
||||
}
|
||||
parent.put(assignment.getLast().getValue(), new ProbabilityLeaf(probability));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Probability eval(LinkedList<Assignment> assignment) {
|
||||
return root.eval(assignment);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,61 @@
|
||||
package dkohl.bayes.probability.distribution.tree;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
|
||||
import dkohl.bayes.probability.Assignment;
|
||||
import dkohl.bayes.probability.Probability;
|
||||
import dkohl.bayes.probability.distribution.ProbabilityDistribution;
|
||||
|
||||
/**
|
||||
* Represents a random variable. Maps each outcome to a successor.
|
||||
*
|
||||
* @author Daniel Kohlsdorf
|
||||
*/
|
||||
public class DecisionNode implements ProbabilityDistribution {
|
||||
|
||||
/**
|
||||
* The successors for each outcome
|
||||
*/
|
||||
private HashMap<String, ProbabilityDistribution> successors;
|
||||
|
||||
/**
|
||||
* Name of the variable
|
||||
*/
|
||||
private String variable;
|
||||
|
||||
public DecisionNode(String variable) {
|
||||
this.variable = variable;
|
||||
successors = new HashMap<String, ProbabilityDistribution>();
|
||||
}
|
||||
|
||||
public String getVariable() {
|
||||
return variable;
|
||||
}
|
||||
|
||||
public void put(String value, ProbabilityDistribution distribution) {
|
||||
successors.put(value, distribution);
|
||||
}
|
||||
|
||||
public HashMap<String, ProbabilityDistribution> getSuccessors() {
|
||||
return successors;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Probability eval(LinkedList<Assignment> assignment) {
|
||||
// Evaluate recursively
|
||||
for (Assignment a : assignment) {
|
||||
if (a.getVariable().getName().equals(variable)) {
|
||||
return successors.get(a.getValue()).eval(assignment);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
throw (new Exception("Domain Violation: " + variable));
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
package dkohl.bayes.probability.distribution.tree;
|
||||
|
||||
import java.util.LinkedList;
|
||||
|
||||
import dkohl.bayes.probability.Assignment;
|
||||
import dkohl.bayes.probability.Probability;
|
||||
import dkohl.bayes.probability.distribution.ProbabilityDistribution;
|
||||
|
||||
/**
|
||||
* Just a dummy node. Always returns the probability value, ignoring the
|
||||
* assignment.
|
||||
*
|
||||
* @author Daniel Kohlsdorf
|
||||
*/
|
||||
public class ProbabilityLeaf implements ProbabilityDistribution {
|
||||
|
||||
private Probability probability;
|
||||
|
||||
public ProbabilityLeaf(Probability probability) {
|
||||
super();
|
||||
this.probability = probability;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Probability eval(LinkedList<Assignment> assignment) {
|
||||
return probability;
|
||||
}
|
||||
|
||||
}
|
||||
27
src/dkohl/bayes/statistic/DataPoint.java
Normal file
27
src/dkohl/bayes/statistic/DataPoint.java
Normal file
@@ -0,0 +1,27 @@
|
||||
package dkohl.bayes.statistic;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
import dkohl.bayes.probability.Assignment;
|
||||
|
||||
/**
|
||||
* A data point or feature vector, that keeps observations.
|
||||
*
|
||||
* @author Daniel Kohlsdorf
|
||||
*/
|
||||
public class DataPoint extends HashMap<String, Assignment> {
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
public DataPoint(DataPoint point) {
|
||||
putAll(point);
|
||||
}
|
||||
|
||||
public DataPoint() {
|
||||
}
|
||||
|
||||
public void add(Assignment assignment) {
|
||||
put(assignment.getVariable().getName(), assignment);
|
||||
}
|
||||
|
||||
}
|
||||
99
src/dkohl/bayes/statistic/DataSet.java
Normal file
99
src/dkohl/bayes/statistic/DataSet.java
Normal file
@@ -0,0 +1,99 @@
|
||||
package dkohl.bayes.statistic;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.Vector;
|
||||
|
||||
import dkohl.bayes.probability.Assignment;
|
||||
import dkohl.bayes.probability.Probability;
|
||||
|
||||
/**
|
||||
* A data set, defined as a vector of data points
|
||||
*
|
||||
* @author Daniel Kohlsdorf
|
||||
*/
|
||||
public class DataSet extends Vector<DataPoint> {
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
public LinkedList<LinkedList<Assignment>> getAssignmentMatchesForQuery(
|
||||
LinkedList<Assignment> given) {
|
||||
LinkedList<LinkedList<Assignment>> assignments = new LinkedList<LinkedList<Assignment>>();
|
||||
for (DataPoint point : this) {
|
||||
boolean insert = true;
|
||||
for (Assignment assignment : given) {
|
||||
if (!match(point, assignment)) {
|
||||
insert = false;
|
||||
}
|
||||
}
|
||||
if (insert) {
|
||||
assignments.add(new LinkedList<Assignment>(point.values()));
|
||||
}
|
||||
}
|
||||
return assignments;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is the assignment equal to a data point / observation ?
|
||||
*
|
||||
* @param point
|
||||
* the point / observation
|
||||
* @param query
|
||||
* the assignment
|
||||
* @return
|
||||
*/
|
||||
private boolean match(String queryName, DataPoint point,
|
||||
LinkedList<Assignment> query) {
|
||||
boolean queryFound = false;
|
||||
for (Assignment assignment : query) {
|
||||
if (!match(point, assignment)) {
|
||||
return false;
|
||||
}
|
||||
if (point.containsKey(queryName)) {
|
||||
queryFound = true;
|
||||
}
|
||||
}
|
||||
if (!queryFound) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean match(DataPoint point, Assignment query) {
|
||||
String name = query.getVariable().getName();
|
||||
String value = query.getValue();
|
||||
if (point.containsKey(name)) {
|
||||
if (point.get(name).getValue().equals(value)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimating probability for: P(Query | given_1 .... given_N) = #(Query |
|
||||
* given_1 .... given_N) / #(given_1 .... given_N)
|
||||
*
|
||||
* @param query
|
||||
* @param given
|
||||
* @return
|
||||
*/
|
||||
public Probability prob(Assignment query, LinkedList<Assignment> given) {
|
||||
int matches = 0;
|
||||
int num_query_given = 0;
|
||||
for (DataPoint point : this) {
|
||||
if (match(query.getVariable().getName(), point, given)) {
|
||||
matches += 1;
|
||||
if (match(point, query)) {
|
||||
num_query_given += 1; // point.getWeight();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (matches == 0) {
|
||||
return new Probability(0);
|
||||
}
|
||||
|
||||
return new Probability(num_query_given / ((double) matches));
|
||||
}
|
||||
|
||||
}
|
||||
47
src/dkohl/onthology/Ontology.java
Normal file
47
src/dkohl/onthology/Ontology.java
Normal file
@@ -0,0 +1,47 @@
|
||||
package dkohl.onthology;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.Set;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
|
||||
public class Ontology {
|
||||
|
||||
/**
|
||||
* Thing <- Class
|
||||
*/
|
||||
private HashMap<String, String> inheritance;
|
||||
|
||||
private HashMap<String, LinkedList<String>> classes2thing;
|
||||
|
||||
public Ontology(HashSet<String> classes) {
|
||||
inheritance = new HashMap<String, String>();
|
||||
this.classes2thing = new HashMap<String, LinkedList<String>>();
|
||||
for (String key : classes) {
|
||||
classes2thing.put(key, new LinkedList<String>());
|
||||
}
|
||||
}
|
||||
|
||||
public void define(String thing, String isA) {
|
||||
Preconditions.checkArgument(classes2thing.containsKey(isA), "Class: "
|
||||
+ isA + "notDefined");
|
||||
LinkedList<String> things = classes2thing.get(isA);
|
||||
things.add(thing);
|
||||
classes2thing.put(isA, things);
|
||||
inheritance.put(thing, isA);
|
||||
}
|
||||
|
||||
public HashMap<String, String> getInheritance() {
|
||||
return inheritance;
|
||||
}
|
||||
|
||||
public HashMap<String, LinkedList<String>> getClasses2thing() {
|
||||
return classes2thing;
|
||||
}
|
||||
|
||||
public Set<String> getClasses() {
|
||||
return classes2thing.keySet();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user