Added Daniel's Bayes Net code. Converted example code to unit tests. Minor code clean-up.

This commit is contained in:
Woody Folsom
2012-03-11 10:33:45 -04:00
parent a021dc2fc0
commit 571d0a1922
27 changed files with 2310 additions and 0 deletions

View File

@@ -0,0 +1,45 @@
package dkohl.bayes.bayesnet;
import java.util.HashMap;
import java.util.LinkedList;
import dkohl.bayes.probability.Variable;
import dkohl.bayes.probability.distribution.ProbabilityDistribution;
/**
* Represents a Bayes net as a graph with a probability table associated with
* each node.
*
* @author Daniel Kohlsdorf
*/
public class BayesNet extends NamedGraph {
/**
* The probability tables for each node
*/
private HashMap<String, ProbabilityDistribution> nodes;
private LinkedList<Variable> variables;
public BayesNet(String names[]) {
super(names);
this.nodes = new HashMap<String, ProbabilityDistribution>();
this.variables = new LinkedList<Variable>();
}
public void setDistribution(Variable node, ProbabilityDistribution dist) {
nodes.put(node.getName(), dist);
variables.add(node);
}
public void updateDistribution(Variable node, ProbabilityDistribution dist) {
nodes.put(node.getName(), dist);
}
public HashMap<String, ProbabilityDistribution> getNodes() {
return nodes;
}
public LinkedList<Variable> getVariables() {
return variables;
}
}

View File

@@ -0,0 +1,98 @@
package dkohl.bayes.bayesnet;
import java.util.HashMap;
import java.util.LinkedList;
import com.google.common.base.Preconditions;
/**
* A Graph: G(V, E) implemented as a |V| x |V| matrix.
*
* Just one node type !!!!
*
* @author Daniel Kohlsdorf
*/
public class NamedGraph {
/**
* net[i][j]: variable i is connected to j.
*/
private boolean net[][];
/**
* Mapping variable names to positions in the graph's matrix.
*/
private HashMap<String, Integer> variable2pos;
/**
* Mapping positions in the graph to variable names.
*/
private HashMap<Integer, String> pos2variable;
/**
* Initializes the graph of size: |VariableNames| x |VariableNames|
*
* @param variableNames
* The names of the variables
*/
public NamedGraph(String variableNames[]) {
variable2pos = new HashMap<String, Integer>();
pos2variable = new HashMap<Integer, String>();
int num_nodes = variableNames.length;
net = new boolean[num_nodes][num_nodes];
for (int i = 0; i < num_nodes; i++) {
variable2pos.put(variableNames[i], i);
pos2variable.put(i, variableNames[i]);
for (int j = 0; j < num_nodes; j++) {
net[i][j] = false;
}
}
}
/**
* Connects two existing vertices in the graph.
*
* @param x
* the variable to connect
* @param y
* the parent (or other node for undirected graphs)
*/
public void connect(String x, String y) {
Preconditions.checkArgument(variable2pos.containsKey(x),
"Variable not known: " + x);
Preconditions.checkArgument(variable2pos.containsKey(y),
"Variable not known: " + y);
int variable_index = variable2pos.get(x);
int bias_index = variable2pos.get(y);
net[bias_index][variable_index] = true;
}
/**
* Returns the names of the variable's parents
*
* @param variable
* the target variable
* @return list of variable names
*/
public LinkedList<String> getParents(String variable) {
Preconditions.checkArgument(variable2pos.containsKey(variable),
"Variable not known: " + variable);
LinkedList<String> parents = new LinkedList<String>();
int variable_pos = variable2pos.get(variable);
for (int i = 0; i < net.length; i++) {
if (net[i][variable_pos]) {
String variableName = pos2variable.get(i);
parents.add(variableName);
}
}
return parents;
}
}

View File

@@ -0,0 +1,119 @@
package dkohl.bayes.estimation;
import java.util.LinkedList;
import com.google.common.base.Preconditions;
import dkohl.bayes.bayesnet.BayesNet;
import dkohl.bayes.probability.Assignment;
import dkohl.bayes.probability.Probability;
import dkohl.bayes.probability.Variable;
import dkohl.bayes.probability.distribution.ContinousDistribution;
import dkohl.bayes.probability.distribution.ProbabilityDistribution;
import dkohl.bayes.probability.distribution.ProbabilityTable;
import dkohl.bayes.probability.distribution.ProbabilityTree;
import dkohl.bayes.statistic.DataSet;
/**
* Works for fully observable bayes nets
*
* @author Daniel Kohlsdorf
*/
public class MaximumLikelihoodEstimation {
/**
* Enumerates all possible assignments for a set of variables using depth
* first + back tracking. Estimates the probability for each assignment
* given data and inserts the values in a probability function.
*
* @param target
* The target variable
* @param assignments
* A list built by traversing the tree. Adding an assignment on
* each layer
* @param variables
* All variables to assign
* @param current
* The current variable
* @param data
* the data for estimation
* @param table
* the probability table
*/
private static void enumerate(Assignment target,
LinkedList<Assignment> assignments, LinkedList<Variable> variables,
int current, DataSet data, ProbabilityDistribution dist) {
if (assignments.size() == variables.size()) {
if (dist instanceof ProbabilityTable) {
double likelihood = data.prob(target, assignments)
.getProbability();
assignments.add(target);
ProbabilityTable table = (ProbabilityTable) dist;
table.setProbabilityForAssignment(assignments, new Probability(
likelihood));
}
if (dist instanceof ProbabilityTree) {
double likelihood = data.prob(target, assignments)
.getProbability();
assignments.add(target);
ProbabilityTree tree = (ProbabilityTree) dist;
tree.setProbabilityForAssignment(assignments, new Probability(
likelihood));
}
if (dist instanceof ContinousDistribution) {
ContinousDistribution pdf = (ContinousDistribution) dist;
for (LinkedList<Assignment> assignment : data
.getAssignmentMatchesForQuery(assignments)) {
pdf.pushAssignment(assignment);
}
}
return;
}
Variable variable = variables.get(assignments.size());
for (String value : variable.getDomain()) {
LinkedList<Assignment> new_assignments = new LinkedList<Assignment>(
assignments);
new_assignments.add(new Assignment(variable, value));
enumerate(target, new_assignments, variables, current + 1, data,
dist);
}
}
public static void estimate(DataSet data, BayesNet net, String targetName) {
Variable target = null;
// Search parent variables
LinkedList<String> parentNames = net.getParents(targetName);
LinkedList<Variable> allVariables = net.getVariables();
LinkedList<Variable> parentVariables = new LinkedList<Variable>();
for (Variable variable : allVariables) {
if (parentNames.contains(variable.getName())) {
parentVariables.add(variable);
}
if (variable.getName().equals(targetName)) {
target = variable;
}
}
Preconditions.checkState(target != null, "MLE: variable " + targetName
+ "Not in net");
/**
* For all assignments of this variable list all assignments of it's
* parents and estimate the probability for each assignment using data.
*/
ProbabilityDistribution dist = net.getNodes().get(targetName);
if (dist instanceof ContinousDistribution) {
enumerate(null, new LinkedList<Assignment>(), parentVariables, 0,
data, dist);
} else {
for (String value : target.getDomain()) {
enumerate(new Assignment(target, value),
new LinkedList<Assignment>(), parentVariables, 0, data,
dist);
}
}
net.updateDistribution(target, dist);
}
}

View File

@@ -0,0 +1,118 @@
package dkohl.bayes.inference;
import java.util.LinkedList;
import java.util.List;
import dkohl.bayes.bayesnet.BayesNet;
import dkohl.bayes.probability.Assignment;
import dkohl.bayes.probability.ProbabilityAssignment;
import dkohl.bayes.probability.Variable;
/**
* Enumeration Algorithm: Exact inference in Baysian Networks.
*
* @author Daniel Kohlsdorf
*
*/
public class EnumerateAll {
/**
* The probability distribution of a variable.
*
* @param query
* the variable
* @param net
* the bayes net defining the independence
* @param assignments
* a set of assignments in this net.
* @return
*/
public static LinkedList<ProbabilityAssignment> enumerateAsk(
Variable query, BayesNet net, LinkedList<Assignment> assignments) {
LinkedList<Variable> variables = net.getVariables();
LinkedList<ProbabilityAssignment> result = new LinkedList<ProbabilityAssignment>();
// Evaluate probability for each possible
// value of the variable by enumeration
for (String value : query.getDomain()) {
LinkedList<Assignment> temp = new LinkedList<Assignment>();
temp.addAll(assignments);
temp.add(new Assignment(query, value));
double prob = enumerateAll(net, variables, temp);
result.add(new ProbabilityAssignment(query, value, prob));
}
return result;
}
/**
* Decides if a variable is hidden or not. A variable is hidden if it is not
* assigned.
*
* @param variable
* the variable in question.
* @param assignments
* all the assignments
* @return true if not assigned
*/
private static boolean hidden(Variable variable,
LinkedList<Assignment> assignments) {
for (Assignment assignment : assignments) {
if (assignment.getVariable().getName().equals(variable.getName())) {
return false;
}
}
return true;
}
/**
* Recursively evaluate probability of an assignment.
*
* Can be seen as depth first search + backtracking (branching on hidden
* nodes)
*
* @param net
* a bayes net defining independence
* @param variables
* the variables left to evaluate
* @param assignments
* all assignments
* @return
*/
public static double enumerateAll(BayesNet net, List<Variable> variables,
LinkedList<Assignment> assignments) {
// if no variables left to evaluate,
// leaf node reached.
if (variables.isEmpty()) {
return 1;
}
// evaluate variable, recurse on rest
// PROLOG: [Variable|Rest].
Variable variable = variables.get(0);
List<Variable> rest = variables.subList(1, variables.size());
// if current variable is hidden
if (hidden(variable, assignments)) {
// sum out all possible values for that variable
double sumOut = 0;
for (String value : variable.getDomain()) {
// by temporarily adding each value to the asigned variable set
LinkedList<Assignment> temp = new LinkedList<Assignment>();
temp.addAll(assignments);
temp.add(new Assignment(variable, value));
// then evaluate this variable
double val = net.getNodes().get(variable.getName()).eval(temp)
.getProbability();
// and all that depend on it
val *= enumerateAll(net, rest, temp);
sumOut += val;
}
return sumOut;
}
// if not just evaluate variable and continue.
return net.getNodes().get(variable.getName()).eval(assignments)
.getProbability()
* enumerateAll(net, rest, assignments);
}
}

View File

@@ -0,0 +1,66 @@
package dkohl.bayes.probability;
/**
* An assigned variable
*
* @author Daniel Kohlsdorf
*/
public class Assignment {
public static final String NOT_ASSIGNED = "NOT_ASSIGNED";
/**
* The variable specifying the domain.
*/
private Variable variable;
/**
* The assigned value
*/
private String value;
public Assignment(Variable variable, String value) {
super();
this.variable = variable;
this.value = value;
}
/**
* Is this assignment a valid one given my domain?
*
* @return true if the assignment is valid
*/
public boolean valid() {
for (String outcome : variable.getDomain()) {
if (outcome.equals(value)) {
return true;
}
}
return false;
}
public Variable getVariable() {
return variable;
}
public void setVariable(Variable variable) {
this.variable = variable;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append(variable);
sb.append(" = ");
sb.append(value);
return sb.toString();
}
}

View File

@@ -0,0 +1,49 @@
package dkohl.bayes.probability;
import com.google.common.base.Preconditions;
/**
* A probabilistic event
*
* @author Daniel Kohlsdorf
*/
public class Probability {
/**
* The probability for this event
*/
private double probability;
public Probability(double probability) {
setProbability(probability);
}
public double getProbability() {
return probability;
}
/**
* Sets this probability to a value p that holds:
*
* 0 >= p <= 1, p in R
*
* @param probability
*/
public void setProbability(double probability) {
Preconditions.checkArgument(probability <= 1,
"Probability Error: P >= 1: " + probability);
Preconditions.checkArgument(probability >= 0,
"Probability Error: P <= 0: " + probability);
this.probability = probability;
}
/**
* Returns the rest of the probability
*
* @return 1 - p
*/
public Probability rest() {
return new Probability(1 - probability);
}
}

View File

@@ -0,0 +1,26 @@
package dkohl.bayes.probability;
public class ProbabilityAssignment extends Assignment {
private double probability;
public ProbabilityAssignment(Variable variable, String value,
double probability) {
super(variable, value);
this.probability = probability;
}
public double getProbability() {
return probability;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("(");
sb.append(super.toString());
sb.append(") ");
sb.append(probability);
return sb.toString();
}
}

View File

@@ -0,0 +1,50 @@
package dkohl.bayes.probability;
/**
* A named variable with a domain of possible values it can take
*
* @author Daniel Kohlsdorf
*/
public class Variable {
/**
* The name of the variable
*/
private String name;
/**
* The domain of the variable
*/
private String domain[];
public Variable(String name, String[] domain) {
super();
this.name = name;
this.domain = domain;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String[] getDomain() {
return domain;
}
public void setDomain(String[] domain) {
this.domain = domain;
}
public int domainSize() {
return domain.length;
}
@Override
public String toString() {
return name;
}
}

View File

@@ -0,0 +1,104 @@
package dkohl.bayes.probability.distribution;
import java.util.HashMap;
import java.util.LinkedList;
import dkohl.bayes.probability.Assignment;
import dkohl.bayes.probability.Probability;
public class ContinousDistribution implements ProbabilityDistribution {
/**
* The involved variables mapping to row
*/
private HashMap<String, Integer> variable2row;
private HashMap<String, Gaussian> distribution;
private String name;
public ContinousDistribution(String names[], int self) {
distribution = new HashMap<String, Gaussian>();
this.variable2row = new HashMap<String, Integer>();
int count = 0;
for (int i = 0; i < names.length; i++) {
if (i != self) {
variable2row.put(names[i], count);
count++;
} else {
name = names[i];
}
}
}
/**
* Generates a table entry key for an assignment
*
* @param assignment
* the assignment
*
* @return the key
*/
public String generateKey(LinkedList<Assignment> assignment) {
String[] values = new String[variable2row.size()];
for (Assignment col : assignment) {
if (variable2row.containsKey(col.getVariable().getName())) {
int row = variable2row.get(col.getVariable().getName());
values[row] = col.getValue();
}
}
String key = "";
for (String entry : values) {
key += entry + ";";
}
return key;
}
public Assignment value(LinkedList<Assignment> assignment) {
for (Assignment a : assignment) {
if (a.getVariable().getName().equals(name)) {
return a;
}
}
return null;
}
@Override
public Probability eval(LinkedList<Assignment> assignment) {
String assignment_key = generateKey(assignment);
if (distribution.get(assignment_key) == null) {
return new Probability(0);
}
return new Probability(distribution.get(assignment_key).eval(
value(assignment)));
}
public void pushAssignment(LinkedList<Assignment> assignment) {
String key = generateKey(assignment);
Gaussian gaussian = new Gaussian();
if (distribution.containsKey(key)) {
gaussian = distribution.get(key);
}
gaussian.push(value(assignment));
distribution.put(key, gaussian);
}
public void estimate() {
for (String key : distribution.keySet()) {
distribution.get(key).estimate();
}
}
public String[] getNames() {
String[] names = new String[variable2row.size()];
for (String name : variable2row.keySet()) {
names[variable2row.get(name)] = name;
}
return names;
}
public HashMap<String, Gaussian> getAssignments() {
return distribution;
}
}

View File

@@ -0,0 +1,49 @@
package dkohl.bayes.probability.distribution;
import java.util.Vector;
import dkohl.bayes.probability.Assignment;
public class Gaussian {
public static final double TWO_PI = 2 * Math.PI;
private double mean;
private double var;
private Vector<Double> samples;
public Gaussian() {
samples = new Vector<Double>();
}
public void push(Assignment assignment) {
double sample = Double.valueOf(assignment.getValue());
samples.add(sample);
}
public void estimate() {
mean = 0;
for(Double sample : samples) {
mean += sample;
}
mean /= samples.size();
var = 0;
for(Double sample : samples) {
var += Math.pow(sample - mean, 2);
}
var /= samples.size();
}
public double eval(Assignment assignment) {
double sample = Double.valueOf(assignment.getValue());
double fac = 1 / (Math.sqrt(TWO_PI * var));
return fac * Math.exp(-0.5 * (Math.pow(sample - mean, 2) / var));
}
@Override
public String toString() {
return "< " + mean + ", " + Math.sqrt(var) + " >";
}
}

View File

@@ -0,0 +1,24 @@
package dkohl.bayes.probability.distribution;
import java.util.LinkedList;
import dkohl.bayes.probability.Assignment;
import dkohl.bayes.probability.Probability;
/**
* A probability distribution.
*
* @author Daniel Kohlsdorf
*/
public interface ProbabilityDistribution {
/**
* Returns probability given an assignments.
*
* @param assignment A set of assigned variables
* @return probability of that assignment
*/
public Probability eval(LinkedList<Assignment> assignment);
}

View File

@@ -0,0 +1,112 @@
package dkohl.bayes.probability.distribution;
import java.util.HashMap;
import java.util.LinkedList;
import dkohl.bayes.probability.Assignment;
import dkohl.bayes.probability.Probability;
/**
* A probability table defined as a table of all combinations of all possible
* values of the varibales involved.
*
* @author Daniel Kohlsdorf
*/
public class ProbabilityTable implements ProbabilityDistribution {
/**
* The involved variables mapping to row
*/
private HashMap<String, Integer> variable2row;
/**
* An P(assignment key) assignment_key var1_value....varN_value : String
*/
private HashMap<String, Probability> assignments;
public ProbabilityTable(String names[]) {
assignments = new HashMap<String, Probability>();
this.variable2row = new HashMap<String, Integer>();
for (int i = 0; i < names.length; i++) {
variable2row.put(names[i], i);
}
}
public ProbabilityTable(LinkedList<String> names) {
assignments = new HashMap<String, Probability>();
this.variable2row = new HashMap<String, Integer>();
for (int i = 0; i < names.size(); i++) {
variable2row.put(names.get(i), i);
System.out.println(names.get(i));
}
}
/**
* Sets the probability for a table entry key. You can generate one from
* your assignments using generateKey, or just use this same method with
* your assignments.
*
* @param key
* the entry key
* @param probability
* the associated probability
*/
public void setProbabilityForAssignment(String key, Probability probability) {
assignments.put(key, probability);
}
/**
* Sets the probability for the given assignment
*
* @param assignment
* the assignment
* @param probability
* the associated probability
*/
public void setProbabilityForAssignment(LinkedList<Assignment> assignment,
Probability probability) {
String key = generateKey(assignment);
assignments.put(key, probability);
}
@Override
public Probability eval(LinkedList<Assignment> assignment) {
String key = generateKey(assignment);
return assignments.get(key);
}
/**
* Generates a table entry key for an assignment
*
* @param assignment
* the assignment
*
* @return the key
*/
public String generateKey(LinkedList<Assignment> assignment) {
String[] values = new String[variable2row.size()];
for (Assignment col : assignment) {
if (variable2row.containsKey(col.getVariable().getName())) {
int row = variable2row.get(col.getVariable().getName());
values[row] = col.getValue();
}
}
String key = "";
for (String entry : values) {
key += entry + ";";
}
return key;
}
public HashMap<String, Probability> getAssignments() {
return assignments;
}
public String[] getNames() {
String[] names = new String[variable2row.size()];
for (String name : variable2row.keySet()) {
names[variable2row.get(name)] = name;
}
return names;
}
}

View File

@@ -0,0 +1,76 @@
package dkohl.bayes.probability.distribution;
import java.util.LinkedList;
import dkohl.bayes.probability.Assignment;
import dkohl.bayes.probability.Probability;
import dkohl.bayes.probability.distribution.tree.DecisionNode;
import dkohl.bayes.probability.distribution.tree.ProbabilityLeaf;
/**
* Probability distribution in descision tree representation
*
* DANGER: No node ordering during creation yet.
*
* @author Daniel Kohlsdorf
*/
public class ProbabilityTree implements ProbabilityDistribution {
/**
* The trees root node
*/
private DecisionNode root = null;
/**
* Initialize the tree
* by creating one path, along an assignment of
* variables and add a probability node at the end.
*
* @param assignment
* @param probability
*/
private void initTree(LinkedList<Assignment> assignment, Probability probability) {
root = new DecisionNode(assignment.get(0).getVariable().getName());
DecisionNode parent = root;
for(int i = 1; i < assignment.size(); i++) {
// add the child to the parent with the key of the parents assignment
DecisionNode child = new DecisionNode(assignment.get(i).getVariable().getName());
parent.put(assignment.get(i - 1).getValue(), child);
parent = child;
}
parent.put(assignment.getLast().getValue(), new ProbabilityLeaf(probability));
}
/**
* Follow existing paths until
* successors do not contain the current assignment,
* then start inserting.
*
* @param assignment
* @param probability
*/
public void setProbabilityForAssignment(LinkedList<Assignment> assignment, Probability probability) {
if(root == null) {
initTree(assignment, probability);
} else {
DecisionNode parent = root;
for(int i = 1; i < assignment.size(); i++) {
if(parent.getSuccessors().containsKey(assignment.get(i - 1).getValue())) {
parent = (DecisionNode) parent.getSuccessors().get(assignment.get(i - 1).getValue());
} else {
DecisionNode child = new DecisionNode(assignment.get(i).getVariable().getName());
parent.put(assignment.get(i - 1).getValue(), child);
parent = child;
}
}
parent.put(assignment.getLast().getValue(), new ProbabilityLeaf(probability));
}
}
@Override
public Probability eval(LinkedList<Assignment> assignment) {
return root.eval(assignment);
}
}

View File

@@ -0,0 +1,61 @@
package dkohl.bayes.probability.distribution.tree;
import java.util.HashMap;
import java.util.LinkedList;
import dkohl.bayes.probability.Assignment;
import dkohl.bayes.probability.Probability;
import dkohl.bayes.probability.distribution.ProbabilityDistribution;
/**
* Represents a random variable. Maps each outcome to a successor.
*
* @author Daniel Kohlsdorf
*/
public class DecisionNode implements ProbabilityDistribution {
/**
* The successors for each outcome
*/
private HashMap<String, ProbabilityDistribution> successors;
/**
* Name of the variable
*/
private String variable;
public DecisionNode(String variable) {
this.variable = variable;
successors = new HashMap<String, ProbabilityDistribution>();
}
public String getVariable() {
return variable;
}
public void put(String value, ProbabilityDistribution distribution) {
successors.put(value, distribution);
}
public HashMap<String, ProbabilityDistribution> getSuccessors() {
return successors;
}
@Override
public Probability eval(LinkedList<Assignment> assignment) {
// Evaluate recursively
for (Assignment a : assignment) {
if (a.getVariable().getName().equals(variable)) {
return successors.get(a.getValue()).eval(assignment);
}
}
try {
throw (new Exception("Domain Violation: " + variable));
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
}

View File

@@ -0,0 +1,29 @@
package dkohl.bayes.probability.distribution.tree;
import java.util.LinkedList;
import dkohl.bayes.probability.Assignment;
import dkohl.bayes.probability.Probability;
import dkohl.bayes.probability.distribution.ProbabilityDistribution;
/**
* Just a dummy node. Always returns the probability value, ignoring the
* assignment.
*
* @author Daniel Kohlsdorf
*/
public class ProbabilityLeaf implements ProbabilityDistribution {
private Probability probability;
public ProbabilityLeaf(Probability probability) {
super();
this.probability = probability;
}
@Override
public Probability eval(LinkedList<Assignment> assignment) {
return probability;
}
}

View File

@@ -0,0 +1,27 @@
package dkohl.bayes.statistic;
import java.util.HashMap;
import dkohl.bayes.probability.Assignment;
/**
* A data point or feature vector, that keeps observations.
*
* @author Daniel Kohlsdorf
*/
public class DataPoint extends HashMap<String, Assignment> {
private static final long serialVersionUID = 1L;
public DataPoint(DataPoint point) {
putAll(point);
}
public DataPoint() {
}
public void add(Assignment assignment) {
put(assignment.getVariable().getName(), assignment);
}
}

View File

@@ -0,0 +1,99 @@
package dkohl.bayes.statistic;
import java.util.LinkedList;
import java.util.Vector;
import dkohl.bayes.probability.Assignment;
import dkohl.bayes.probability.Probability;
/**
* A data set, defined as a vector of data points
*
* @author Daniel Kohlsdorf
*/
public class DataSet extends Vector<DataPoint> {
private static final long serialVersionUID = 1L;
public LinkedList<LinkedList<Assignment>> getAssignmentMatchesForQuery(
LinkedList<Assignment> given) {
LinkedList<LinkedList<Assignment>> assignments = new LinkedList<LinkedList<Assignment>>();
for (DataPoint point : this) {
boolean insert = true;
for (Assignment assignment : given) {
if (!match(point, assignment)) {
insert = false;
}
}
if (insert) {
assignments.add(new LinkedList<Assignment>(point.values()));
}
}
return assignments;
}
/**
* Is the assignment equal to a data point / observation ?
*
* @param point
* the point / observation
* @param query
* the assignment
* @return
*/
private boolean match(String queryName, DataPoint point,
LinkedList<Assignment> query) {
boolean queryFound = false;
for (Assignment assignment : query) {
if (!match(point, assignment)) {
return false;
}
if (point.containsKey(queryName)) {
queryFound = true;
}
}
if (!queryFound) {
return false;
}
return true;
}
private boolean match(DataPoint point, Assignment query) {
String name = query.getVariable().getName();
String value = query.getValue();
if (point.containsKey(name)) {
if (point.get(name).getValue().equals(value)) {
return true;
}
}
return false;
}
/**
* Estimating probability for: P(Query | given_1 .... given_N) = #(Query |
* given_1 .... given_N) / #(given_1 .... given_N)
*
* @param query
* @param given
* @return
*/
public Probability prob(Assignment query, LinkedList<Assignment> given) {
int matches = 0;
int num_query_given = 0;
for (DataPoint point : this) {
if (match(query.getVariable().getName(), point, given)) {
matches += 1;
if (match(point, query)) {
num_query_given += 1; // point.getWeight();
}
}
}
if (matches == 0) {
return new Probability(0);
}
return new Probability(num_query_given / ((double) matches));
}
}

View File

@@ -0,0 +1,47 @@
package dkohl.onthology;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Set;
import com.google.common.base.Preconditions;
public class Ontology {
/**
* Thing <- Class
*/
private HashMap<String, String> inheritance;
private HashMap<String, LinkedList<String>> classes2thing;
public Ontology(HashSet<String> classes) {
inheritance = new HashMap<String, String>();
this.classes2thing = new HashMap<String, LinkedList<String>>();
for (String key : classes) {
classes2thing.put(key, new LinkedList<String>());
}
}
public void define(String thing, String isA) {
Preconditions.checkArgument(classes2thing.containsKey(isA), "Class: "
+ isA + "notDefined");
LinkedList<String> things = classes2thing.get(isA);
things.add(thing);
classes2thing.put(isA, things);
inheritance.put(thing, isA);
}
public HashMap<String, String> getInheritance() {
return inheritance;
}
public HashMap<String, LinkedList<String>> getClasses2thing() {
return classes2thing;
}
public Set<String> getClasses() {
return classes2thing.keySet();
}
}