Final write-up edits (turned in) and some build/source cleanup to ensure that turned in code can be run from command line.

2012-03-13 14:14:14 -04:00
parent b7676b08c2
commit 052da376ce
12 changed files with 455 additions and 231 deletions
--- a/build.xml
+++ b/build.xml
@@ -43,12 +43,11 @@
 	  </target>
 	<target name="copy-resources" depends="init" >
-		<copy todir="${build}/data">
+		<copy todir="${dist}/data">
 			<fileset dir="${data}">
-				<include name="**/*" />
+				<include name="*.xml" />
 			</fileset>
 		</copy>
 		<!--copy file="DetailedInfo.txt" todir="${dist}" /-->
 	</target>
 	<target name="dist" depends="copy-resources, compile"
@@ -56,7 +55,8 @@
 		<jar jarfile="${dist}/${project.name}.jar">
 			<fileset dir="${build}" excludes="**/*Test.class" />
 			<manifest>
-				<attribute name="Main-Class" value="net.woodyfolsom.cs6601.p2.RecipeBookStub" />
+				<attribute name="Main-Class" value="net.woodyfolsom.cs6601.p2.BayesChef" />
 				<attribute name="Class-Path" value=". ../lib/commons-codec-1.5.jar ../lib/guava-r09.jar ../lib/xstream-1.4.2.jar" />
 			</manifest>
 		</jar>
 	</target>
--- a/test/dkohl/bayes/example/builders/FoodExampleBuilder.java
+++ b/test/dkohl/bayes/example/builders/FoodExampleBuilder.java
@@ -1,4 +1,4 @@
-package dkohl.bayes.example.builders;
+package dkohl.bayes.builders;
 import java.util.HashSet;
 import java.util.LinkedList;
--- a/src/dkohl/bayes/builders/FoodNetBuilder.java
+++ b/src/dkohl/bayes/builders/FoodNetBuilder.java
@@ -10,7 +10,6 @@ import net.woodyfolsom.cs6601.p2.RecipeBook;
 import net.woodyfolsom.cs6601.p2.Survey;
 import dkohl.bayes.bayesnet.BayesNet;
 import dkohl.bayes.estimation.MaximumLikelihoodEstimation;
 import dkohl.bayes.example.builders.FoodExampleBuilder;
 import dkohl.bayes.probability.Assignment;
 import dkohl.bayes.probability.Probability;
 import dkohl.bayes.probability.Variable;
--- a/test/dkohl/bayes/example/FoodExampleTest.java
+++ b/test/dkohl/bayes/example/FoodExampleTest.java
@@ -9,7 +9,7 @@ import java.util.List;
 import org.junit.Test;
 import dkohl.bayes.bayesnet.BayesNet;
-import dkohl.bayes.example.builders.FoodExampleBuilder;
+import dkohl.bayes.builders.FoodExampleBuilder;
 import dkohl.bayes.inference.EnumerateAll;
 import dkohl.bayes.probability.ProbabilityAssignment;
 import dkohl.bayes.probability.Variable;
--- a/Proposal.aux
+++ b/Proposal.aux
@@ -3,14 +3,19 @@
 \citation{corney}
 \citation{janzenxiang}
 \citation{truyen}
 \citation{truyen}
 \citation{murphy}
 \citation{murphy}
 \citation{russelnorvig}
 \@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Our Bayesian net modeling user preferences. The top layer describes the categories Meat and Vegetable. We have a control variable vegetarian for Meat, such that it will always evaluate to $0$ when there is meat involved in a dish and we have a vegetarian diner. The mid layer describes the preference for different ingredients. The last layer is a Gaussian predicting the users preferences.}}{2}}
 \newlabel{img:bayes_net}{{1}{2}}
 \bibstyle{plain}
 \bibdata{p2refs}
 \bibcite{corney}{1}
 \bibcite{janzenxiang}{2}
 \bibcite{murphy}{3}
-\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Our Baysian net modeling user preferences}}{2}}
+\bibcite{russelnorvig}{4}
-\newlabel{img:bayes_net}{{1}{2}}
+\bibcite{truyen}{5}
-\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Estimated vs. Actual Survey Dish Ratings}}{2}}
+\bibcite{vanboekel}{6}
-\bibcite{truyen}{4}
+\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Estimated vs. surveyed dish ratings}}{3}}
-\bibcite{vanboekel}{5}
+\newlabel{result}{{2}{3}}
--- a/Proposal.bbl
+++ b/Proposal.bbl
@@ -18,6 +18,11 @@ Kevin Murphy.
 \newblock http://www.cs.ubc.ca/~murphyk/Bayes/bnintro.html, 1998.
 \newblock [Online; accessed 22-Feb-2012].
 \bibitem{russelnorvig}
 S.~Russel and P.~Norvig.
 \newblock {\em Artificial Intelligence; A Modern Approach}.
 \newblock Prentice Hall, third edition, 2010.
 \bibitem{truyen}
 Tran~The Truyen, Dinh~Q. Phung, and Svetha Venkatesh.
 \newblock Preference {Networks}: {Probabilistic} {Models} for {Recommendation}
@@ -29,6 +34,6 @@ Tran~The Truyen, Dinh~Q. Phung, and Svetha Venkatesh.
 Stein~A. van Boekel, M.A.J.S. and A.H.C. van Bruggen.
 \newblock {Bayesian Statistics} and {Quality Modelling} in the {Agro-food
  Production Chain}.
-\newblock {\em Proceedings of the Frontis workshop}, 2004.
+\newblock {\em Proceedings of the Frontis workshop}, 3, 2004.
 \end{thebibliography}
--- a/Proposal.blg
+++ b/Proposal.blg
@@ -1,28 +1,51 @@
-This is BibTeX, Version 0.99dThe top-level auxiliary file: P2 Proposal.aux
+This is BibTeX, Version 0.99d (TeX Live 2010)
 Capacity: max_strings=35307, hash_size=35307, hash_prime=30011
 The top-level auxiliary file: P2 Proposal.aux
 The style file: plain.bst
 Database file #1: p2refs.bib
 I was expecting a `,' or a `}'---line 14 of file p2refs.bib
 :    
 :    CITY = {Wageningen}
 (Error may have been on previous line)
 I'm skipping whatever remains of this entry
 I was expecting a `,' or a `}'---line 23 of file p2refs.bib
 :    
 :    CITY = {London}
 (Error may have been on previous line)
 I'm skipping whatever remains of this entry
 I was expecting a `,' or a `}'---line 31 of file p2refs.bib
 :    
 :    CITY = {London}
 (Error may have been on previous line)
 I'm skipping whatever remains of this entry
 I was expecting a `,' or a `}'---line 40 of file p2refs.bib
 :    
 :    CITY = {Guelph}
 (Error may have been on previous line)
 I'm skipping whatever remains of this entry
 Too many commas in name 1 of "van Boekel, M.A.J.S., Stein, A. and van Bruggen, A.H.C." for entry vanboekel
 while executing---line 1049 of file plain.bst
 Too many commas in name 1 of "van Boekel, M.A.J.S., Stein, A. and van Bruggen, A.H.C." for entry vanboekel
 while executing---line 1090 of file plain.bst
-(There were 6 error messages)
+You've used 6 entries,
            2118 wiz_defined-function locations,
            529 strings with 4926 characters,
 and the built_in function-call counts, 1457 in all, are:
 = -- 138
 > -- 71
 < -- 0
 + -- 29
 - -- 22
 * -- 72
 := -- 272
 add.period$ -- 19
 call.type$ -- 6
 change.case$ -- 29
 chr.to.int$ -- 0
 cite$ -- 6
 duplicate$ -- 59
 empty$ -- 113
 format.name$ -- 22
 if$ -- 295
 int.to.chr$ -- 0
 int.to.str$ -- 6
 missing$ -- 6
 newline$ -- 34
 num.names$ -- 12
 pop$ -- 34
 preamble$ -- 1
 purify$ -- 23
 quote$ -- 0
 skip$ -- 44
 stack$ -- 0
 substring$ -- 31
 swap$ -- 6
 text.length$ -- 0
 text.prefix$ -- 0
 top$ -- 0
 type$ -- 22
 warning$ -- 0
 while$ -- 12
 width$ -- 7
 write$ -- 66
 (There were 2 error messages)
--- a/Proposal.log
+++ b/Proposal.log
@@ -1,4 +1,4 @@
-This is pdfTeX, Version 3.1415926-2.3-1.40.12 (MiKTeX 2.9) (preloaded format=pdflatex 2012.1.11)  12 MAR 2012 22:10
+This is pdfTeX, Version 3.1415926-2.3-1.40.12 (MiKTeX 2.9) (preloaded format=pdflatex 2012.1.11)  13 MAR 2012 07:08
 entering extended mode
 **D:/workspace/cs6601p2/writeup/P2*Proposal.tex
 ("D:/workspace/cs6601p2/writeup/P2 Proposal.tex"
@@ -46,13 +46,41 @@ Package: titlesec 2011/12/15 v2.10.0 Sectioning titles
 \titlewidthlast=\dimen106
 \titlewidthfirst=\dimen107
 )
-("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\graphics\graphicx.sty"
+("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\geometry\geometry.sty"
-Package: graphicx 1999/02/16 v1.0f Enhanced LaTeX Graphics (DPC,SPQR)
+Package: geometry 2010/09/12 v5.6 Page Geometry
 ("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\graphics\keyval.sty"
 Package: keyval 1999/03/16 v1.13 key=value parser (DPC)
 \KV@toks@=\toks15
 )
 ("C:\Program Files (x86)\MiKTeX 2.9\tex\generic\oberdiek\ifpdf.sty"
 Package: ifpdf 2011/01/30 v2.3 Provides the ifpdf switch (HO)
 Package ifpdf Info: pdfTeX in PDF mode is detected.
 )
 ("C:\Program Files (x86)\MiKTeX 2.9\tex\generic\oberdiek\ifvtex.sty"
 Package: ifvtex 2010/03/01 v1.5 Switches for detecting VTeX and its modes (HO)
 Package ifvtex Info: VTeX not detected.
 )
 ("C:\Program Files (x86)\MiKTeX 2.9\tex\generic\ifxetex\ifxetex.sty"
 Package: ifxetex 2010/09/12 v0.6 Provides ifxetex conditional
 )
 \Gm@cnth=\count87
 \Gm@cntv=\count88
 \c@Gm@tempcnt=\count89
 \Gm@bindingoffset=\dimen108
 \Gm@wd@mp=\dimen109
 \Gm@odd@mp=\dimen110
 \Gm@even@mp=\dimen111
 \Gm@layoutwidth=\dimen112
 \Gm@layoutheight=\dimen113
 \Gm@layouthoffset=\dimen114
 \Gm@layoutvoffset=\dimen115
 \Gm@dimlist=\toks16
 ("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\geometry\geometry.cfg"))
 ("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\graphics\graphicx.sty"
 Package: graphicx 1999/02/16 v1.0f Enhanced LaTeX Graphics (DPC,SPQR)
 ("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\graphics\graphics.sty"
 Package: graphics 2009/02/05 v1.0o Standard LaTeX Graphics (DPC,SPQR)
@@ -73,10 +101,10 @@ Package: infwarerr 2010/04/08 v1.3 Providing info/warning/message (HO)
 ("C:\Program Files (x86)\MiKTeX 2.9\tex\generic\oberdiek\ltxcmds.sty"
 Package: ltxcmds 2011/04/18 v1.20 LaTeX kernel commands for general use (HO)
 )
-\Gread@gobject=\count87
+\Gread@gobject=\count90
 ))
-\Gin@req@height=\dimen108
+\Gin@req@height=\dimen116
-\Gin@req@width=\dimen109
+\Gin@req@width=\dimen117
 )
 ("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\ams\math\amsmath.sty"
 Package: amsmath 2000/07/18 v2.13 AMS math features
@@ -88,51 +116,51 @@ Package: amstext 2000/06/29 v2.01
 ("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\ams\math\amsgen.sty"
 File: amsgen.sty 1999/11/30 v2.0
-\@emptytoks=\toks16
+\@emptytoks=\toks17
-\ex@=\dimen110
+\ex@=\dimen118
 ))
 ("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\ams\math\amsbsy.sty"
 Package: amsbsy 1999/11/29 v1.2d
-\pmbraise@=\dimen111
+\pmbraise@=\dimen119
 )
 ("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\ams\math\amsopn.sty"
 Package: amsopn 1999/12/14 v2.01 operator names
 )
-\inf@bad=\count88
+\inf@bad=\count91
 LaTeX Info: Redefining \frac on input line 211.
-\uproot@=\count89
+\uproot@=\count92
-\leftroot@=\count90
+\leftroot@=\count93
 LaTeX Info: Redefining \overline on input line 307.
-\classnum@=\count91
+\classnum@=\count94
-\DOTSCASE@=\count92
+\DOTSCASE@=\count95
 LaTeX Info: Redefining \ldots on input line 379.
 LaTeX Info: Redefining \dots on input line 382.
 LaTeX Info: Redefining \cdots on input line 467.
 \Mathstrutbox@=\box27
 \strutbox@=\box28
-\big@size=\dimen112
+\big@size=\dimen120
 LaTeX Font Info:    Redeclaring font encoding OML on input line 567.
 LaTeX Font Info:    Redeclaring font encoding OMS on input line 568.
-\macc@depth=\count93
+\macc@depth=\count96
-\c@MaxMatrixCols=\count94
+\c@MaxMatrixCols=\count97
 \dotsspace@=\muskip10
-\c@parentequation=\count95
+\c@parentequation=\count98
-\dspbrk@lvl=\count96
+\dspbrk@lvl=\count99
-\tag@help=\toks17
+\tag@help=\toks18
-\row@=\count97
+\row@=\count100
-\column@=\count98
+\column@=\count101
-\maxfields@=\count99
+\maxfields@=\count102
-\andhelp@=\toks18
+\andhelp@=\toks19
-\eqnshift@=\dimen113
+\eqnshift@=\dimen121
-\alignsep@=\dimen114
+\alignsep@=\dimen122
-\tagshift@=\dimen115
+\tagshift@=\dimen123
-\tagwidth@=\dimen116
+\tagwidth@=\dimen124
-\totwidth@=\dimen117
+\totwidth@=\dimen125
-\lineht@=\dimen118
+\lineht@=\dimen126
-\@envbody=\toks19
+\@envbody=\toks20
 \multlinegap=\skip48
 \multlinetaggap=\skip49
-\mathdisplay@stack=\toks20
+\mathdisplay@stack=\toks21
 LaTeX Info: Redefining \[ on input line 2666.
 LaTeX Info: Redefining \] on input line 2667.
 )
@@ -154,50 +182,94 @@ LaTeX Font Info:    ... okay on input line 11.
 LaTeX Font Info:    Checking defaults for U/cmr/m/n on input line 11.
 LaTeX Font Info:    ... okay on input line 11.
 *geometry* driver: auto-detecting
 *geometry* detected driver: pdftex
 *geometry* verbose mode - [ preamble ] result:
 * driver: pdftex
 * paper: <default>
 * layout: <same size as paper>
 * layoutoffset:(h,v)=(0.0pt,0.0pt)
 * modes: 
 * h-part:(L,W,R)=(50.58878pt, 513.11745pt, 50.58878pt)
 * v-part:(T,H,B)=(50.58878pt, 693.79243pt, 50.58878pt)
 * \paperwidth=614.295pt
 * \paperheight=794.96999pt
 * \textwidth=513.11745pt
 * \textheight=693.79243pt
 * \oddsidemargin=-21.68121pt
 * \evensidemargin=-21.68121pt
 * \topmargin=-21.68121pt
 * \headheight=0.0pt
 * \headsep=0.0pt
 * \topskip=10.0pt
 * \footskip=30.0pt
 * \marginparwidth=4.0pt
 * \marginparsep=10.0pt
 * \columnsep=22.58437pt
 * \skip\footins=9.0pt plus 4.0pt minus 2.0pt
 * \hoffset=0.0pt
 * \voffset=0.0pt
 * \mag=1000
 * \@twocolumntrue
 * \@twosidefalse
 * \@mparswitchfalse
 * \@reversemarginfalse
 * (1in=72.27pt=25.4mm, 1cm=28.453pt)
 ("C:\Program Files (x86)\MiKTeX 2.9\tex\context\base\supp-pdf.mkii"
 [Loading MPS to PDF converter (version 2006.09.02).]
-\scratchcounter=\count100
+\scratchcounter=\count103
-\scratchdimen=\dimen119
+\scratchdimen=\dimen127
 \scratchbox=\box29
-\nofMPsegments=\count101
+\nofMPsegments=\count104
-\nofMParguments=\count102
+\nofMParguments=\count105
-\everyMPshowfont=\toks21
+\everyMPshowfont=\toks22
-\MPscratchCnt=\count103
+\MPscratchCnt=\count106
-\MPscratchDim=\dimen120
+\MPscratchDim=\dimen128
-\MPnumerator=\count104
+\MPnumerator=\count107
-\makeMPintoPDFobject=\count105
+\makeMPintoPDFobject=\count108
-\everyMPtoPDFconversion=\toks22
+\everyMPtoPDFconversion=\toks23
 )
-Underfull \hbox (badness 4254) in paragraph at lines 67--70
+Underfull \hbox (badness 8094) in paragraph at lines 89--94
-[] \OT1/cmr/bx/n/10 Data ac-cui-si-tion \OT1/cmr/m/n/10 We first ac-cu-mu-lated
+[] \OT1/cmr/bx/n/10 Data ac-qui-si-tion \OT1/cmr/m/n/10 We first ac-cu-mu-lated
 a di-
 []
-<bayes.png, id=1, 987.69pt x 480.79625pt>
+
-File: bayes.png Graphic file (type png)
+Underfull \hbox (badness 1881) in paragraph at lines 89--94
- <use bayes.png>
+\OT1/cmr/m/n/10 verse col-lec-tion of license-free sam-ple recipes from
-Package pdftex.def Info: bayes.png used on input line 99.
+ []
-(pdftex.def)             Requested size: 237.13594pt x 115.43013pt.
+
 Underfull \hbox (badness 2600) in paragraph at lines 89--94
 \OT1/cmr/m/n/10 web sites such as \OT1/cmr/m/it/10 Dark-star's Meal-Master Reci
 pes
 []
 [1{C:/ProgramData/MiKTeX/2.9/pdftex/config/pdftex.map}
 ]
 <bayes.png, id=12, 969.6225pt x 506.89375pt>
 File: bayes.png Graphic file (type png)
 <use bayes.png>
 Package pdftex.def Info: bayes.png used on input line 117.
 (pdftex.def)             Requested size: 220.73839pt x 115.39198pt.
 Package amsmath Warning: Foreign command \over;
 (amsmath)                \frac or \genfrac should be used instead
-(amsmath)                 on input line 133.
+(amsmath)                 on input line 161.
-
+[2 <D:/workspace/cs6601p2/writeup/bayes.png>]
-LaTeX Warning: Reference `rms-table' on page 2 undefined on input line 150.
+<BayesChefChart.png, id=18, 426.99525pt x 313.77225pt>
 <BayesChefChart.png, id=14, 426.99525pt x 313.77225pt>
 File: BayesChefChart.png Graphic file (type png)
 <use BayesChefChart.png>
-Package pdftex.def Info: BayesChefChart.png used on input line 154.
+Package pdftex.def Info: BayesChefChart.png used on input line 254.
-(pdftex.def)             Requested size: 248.42812pt x 182.55777pt.
+(pdftex.def)             Requested size: 256.55872pt x 188.53769pt.
-Overfull \hbox (11.29218pt too wide) in paragraph at lines 154--155
+Overfull \hbox (11.29218pt too wide) in paragraph at lines 254--255
 [][] 
 []
@@ -216,36 +288,35 @@ Underfull \hbox (badness 10000) in paragraph at lines 16--20
 \OT1/cmr/m/n/10 works.  http://www.cs.ubc.ca/ mur-
 []
 [2 <D:/workspace/cs6601p2/writeup/bayes.png> <D:/workspace/cs6601p2/writeup/Bay
 esChefChart.png>]) [3
-] ("D:\workspace\cs6601p2\writeup\P2 Proposal.aux")
+Underfull \hbox (badness 1038) in paragraph at lines 34--38
 []\OT1/cmr/m/n/10 Stein A. van Boekel, M.A.J.S. and A.H.C. van
 []
-LaTeX Warning: There were undefined references.
+) [3 <D:/workspace/cs6601p2/writeup/BayesChefChart.png>]
-
+("D:\workspace\cs6601p2\writeup\P2 Proposal.aux") ) 
 ) 
 Here is how much of TeX's memory you used:
- 2023 strings out of 494045
+ 2521 strings out of 494045
- 25481 string characters out of 3145969
+ 32956 string characters out of 3145969
- 87176 words of memory out of 3000000
+ 104365 words of memory out of 3000000
- 5325 multiletter control sequences out of 15000+200000
+ 5807 multiletter control sequences out of 15000+200000
 11026 words of font info for 36 fonts, out of 3000000 for 9000
 715 hyphenation exceptions out of 8191
- 27i,8n,28p,716b,219s stack positions out of 5000i,500n,10000p,200000b,50000s
+ 27i,8n,32p,572b,220s stack positions out of 5000i,500n,10000p,200000b,50000s
-{C:/Program Files (x86)/MiKTeX 2.9/fonts/enc/dvips/fontname/8r.enc}<C:/Progra
+{C:/Program Files (x86)/MiKT
-m Files (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmbx10.pfb><C:/Program 
+eX 2.9/fonts/enc/dvips/fontname/8r.enc}<C:/Program Files (x86)/MiKTeX 2.9/fonts
-Files (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmbx12.pfb><C:/Program Fi
+/type1/public/amsfonts/cm/cmbx10.pfb><C:/Program Files (x86)/MiKTeX 2.9/fonts/t
-les (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmmi10.pfb><C:/Program File
+ype1/public/amsfonts/cm/cmbx12.pfb><C:/Program Files (x86)/MiKTeX 2.9/fonts/typ
-s (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmmi7.pfb><C:/Program Files (
+e1/public/amsfonts/cm/cmmi10.pfb><C:/Program Files (x86)/MiKTeX 2.9/fonts/type1
-x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr10.pfb><C:/Program Files (x86
+/public/amsfonts/cm/cmr10.pfb><C:/Program Files (x86)/MiKTeX 2.9/fonts/type1/pu
-)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr12.pfb><C:/Program Files (x86)/M
+blic/amsfonts/cm/cmr12.pfb><C:/Program Files (x86)/MiKTeX 2.9/fonts/type1/publi
-iKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr7.pfb><C:/Program Files (x86)/MiKTe
+c/amsfonts/cm/cmr7.pfb><C:/Program Files (x86)/MiKTeX 2.9/fonts/type1/public/am
-X 2.9/fonts/type1/public/amsfonts/cm/cmsy10.pfb><C:/Program Files (x86)/MiKTeX 
+sfonts/cm/cmsy10.pfb><C:/Program Files (x86)/MiKTeX 2.9/fonts/type1/public/amsf
-2.9/fonts/type1/public/amsfonts/cm/cmti10.pfb><C:/Program Files (x86)/MiKTeX 2.
+onts/cm/cmti10.pfb><C:/Program Files (x86)/MiKTeX 2.9/fonts/type1/urw/helvetic/
-9/fonts/type1/urw/helvetic/uhvb8a.pfb>
+uhvb8a.pfb>
-Output written on "P2 Proposal.pdf" (3 pages, 190917 bytes).
+Output written on "P2 Proposal.pdf" (3 pages, 187866 bytes).
 PDF statistics:
- 55 PDF objects out of 1000 (max. 8388607)
+ 51 PDF objects out of 1000 (max. 8388607)
 0 named destinations out of 1000 (max. 500000)
 11 words of extra memory for PDF output out of 10000 (max. 10000000)
--- a/Proposal.pdf
+++ b/Proposal.pdf
--- a/Proposal.tex
+++ b/Proposal.tex
@@ -2,7 +2,7 @@
 \usepackage{latex8}
 \usepackage{titlesec}
-% \usepackage[margin=0.5in]{geometry}
+\usepackage[margin=0.7in]{geometry}
 \usepackage{graphicx}
 \usepackage{amsmath}
@@ -27,31 +27,53 @@ allergies and religious beliefs.  Many professional catering services
 handle this problem by allowing guests to select from a very limited
 menu. We introduce a dish recommendation system
 based on Bayesian Networks modeling user preferences.
-We predict the meals from a data base of recepices that most likely match the varied tastes
+We predict the meals from a data base of recipes that most likely match the varied tastes
 of the customers, using a limited set of ingredients.  This type of expert system
-would be of great use to a catering service or restaurant which needs to rapdily decide on
+would be of great use to a catering service or restaurant which needs to rapidly decide on
 a small number of dishes which would be acceptable for a large dinner party,
 given diverse requirements and preferences. 
 \paragraph*{Bayesian Catering: A use case.}
 Imagine that you run a catering service and have to 
 plan an event with a customer. You can 
 create a variety of dishes and now you
 want to discuss with your clients which one to 
 serve. In order to get a better idea of which preferences
 and needs you clients will have, you let them fill
 out a survey in advance, where they rate a small amount of 
 your dishes on a scale from $1 - 10$ and inform you about
 hard constraints like allergies, religious constraints
 or vegetarians. You then use those results in order to predict
 the ratings for the rest of your dishes and present the clients
 the top $k$ results. If such a system works this will save time and will lead
 to a better customer satisfaction since you can present them dishes
 they will most probably like but still surprise them (since you have
 not presented them what the already rated). After the dinner,
 participants could rate the dishes served at the party which would iteratively improve the process for future customers.
 \section*{Related Work}
 Boekel and Corney propose using Bayesian Networks to model 
 consumer needs in food production chains \cite{vanboekel} \cite{corney}. 
 Janzen and Xiang propose an intelligent refrigerator capable of 
 generating meal plans based on inventory 
-and past food choices \cite{janzenxiang}.  Baysian networks have also been
+and past food choices \cite{janzenxiang}.  
 We suggest that these approaches are limited in that they
 only consider the preferences of a single (or supposed 'typical') 
 user rather than a group. Baysian networks have also been
 applied to recommendation systems before in on-line social
 networks \cite{truyen} making predictions of the form
 ``if you bought those items what is the probability you would like to
-buy that''.  We suggest that these approaches are limited in that they
+buy that''.  This method also uses bayesian networks
-only consider the preferences of a single (or supposed 'typical') user rather than a group.
+for prediction and our approach is similar or inspired by
 some of Truyens \cite{truyen}.
 \section*{Approach}
 The approached problem is to pick a single meal which best meets the requirements
 and tastes of different people dining together. We learn a predictive
-baysian net from a survey distributed to participants of the meal as
+Bayesian net from a survey distributed to participants of the meal as
 training data in order to capture their preferences. The dishes
-in the questionaire are selected such that all ingrediants
+in the questionnaire are selected such that all ingredients
 are covered. The participants rate each dish on a scale from
 one to ten and give additional information like vegetarians.
 For new dishes we then predict the maximum likelihood 
@@ -63,100 +85,199 @@ end how we trained the modeled net from
 gathered data and predicted the 
 value for different recipes.
-\paragraph*{Data accuisition}
+\paragraph*{Data acquisition}
-We first accumulated a diverse collection of license-free sample recipes from web sites such as \emph{Darkstar's Meal-Master Recipes} (http://home.earthlink.net/~darkstar105/).  Next, we converted these recipes from flat text files to well-formed XML using the Krecipes application for Debian Linux.  Finally, we created a representative data set representing several diners' preference for
+We first accumulated a diverse collection of license-free sample recipes from web sites such as \emph{Darkstar's Meal-Master Recipes} (http://home.earthlink.net/~darkstar105/).  Next, we converted these recipes from flat text files to well-formed XML using the `Krecipes' application for Debian Linux.  Finally, we created a representative data set representing several diners' preference for
 24 of these recipes, using a simple survey of the type 'rate on a
-scale of 1 to 10,  10 being favorite and 1 being least favorite'.  Furthermore, users were allowed to specify a vegetarian or nut-free meal preference.
+scale of 1 to 10,  10 being favorite and 1 being least favorite'. 
 Furthermore, users were allowed to specify a vegetarian 
 or nut-free meal preference.
 %daniel is here
 \paragraph*{Knowledge Engineering}
 We model the diners' various taste preferences using
-a Bayes net. We model the taste
+a Bayes net. The net consists of three node types.
-
+We call them ``control nodes'', ``taste nodes''
-\begin{description}
+and ``rating nodes''. A ``preference node'' 
-\item[Layer 1] The first layer models a general preference towards 
+models the probability of a diners' preference towards an
-different food categories like vegetables or meat.
+ingredient ($P(likes tomato)$, $P(likes potato)$)  or a category
-As one can see, the food categories are dependent 
+($P(likes meat)$). These variables are discrete. The ingredients
-on the general meal preference. For example 
+are conditional independent from each other but conditioned
-being vegetarian will exclude meat and will 
+by the food category they belong to (see Figure \ref{img:bayes_net}
-support vegetables.
+the two top layers). A control node can definitely reject a dish,
-
+by evaluating to $0$ in certain conditions. For 
-\item[Layer 2]  Specific flavors and ingredients. Each ingredient is conditioned 
+example if someone is vegetarian and the presented dish contains
-by the food category to which it belongs.
+meet, the control variable for vegetarian will evaluate to $0$ and
-\end{description}
+so the probability for the whole dish will become $0$.
-
+So the vegetarian variable is conditioned by meat. 
-If we need to model hard constraints, like 
+The third type in the net is a preference node, it is continuous
-
+and models the dish rating given a set of ingredients.
 The overall net is shown in Figure \ref{img:bayes_net}.
 Given a recipe with a list of ingredients $I = i_1,...,i_n$ 
 and a Bayesian network capturing user preferences 
 we can calculate the probability of users liking the dish given
 the probabilities of liking each ingrediant.
-\begin{figure}
+\begin{figure}[ht]
 \centering
-\includegraphics[width=\linewidth]{bayes}
+\includegraphics[width=0.9\linewidth]{bayes}
-\caption{Our Baysian net modeling user preferences}
+\caption{Our Bayesian net modeling user preferences. The top layer
  describes the categories Meat and Vegetable. We have a control
  variable vegetarian for Meat, such that it will always evaluate to
  $0$ when there is meat involved in a dish and we have a vegetarian
  diner. The mid layer describes the preference for different
  ingredients. The last layer is a Gaussian predicting the users preferences.}
 \label{img:bayes_net}
 \end{figure}
 %\subsection*{implementation}
-\paragraph*{Learning and Predicting}
+\paragraph*{Learning user preferences}
 In order to estimate the model parameters, the
 system will be trained with statistics about taste 
 and preferences given a set of dishes with ratings
-from multiple users. From that information we can directly calculate 
+from multiple users. The training set is generated from
-the probabilities for the ingredients using Maximum Likelihood Learning \cite{murphy}.
+the questionnaires we distributed.
 An example  for a survey output could look like this (Ingredients, Rating): ``Pork,
 Potatoes, 8''. In order to perform normal Maximum Likelihood Learning
 \cite{murphy} we have to have information about all variables
 (``Pork, Potatoes, Tomatoes, Beef, Meat, Vegetables, Rating'').
 We perform several steps in order to transform from the survey input
 to a training instance.
 First we discretize the values such that all given variables (in our
 case Pork and Potatoes) are set to ``true'' if the value is above
 a certain threshold (in our experiments $5$) and ``false'' otherwise.
 In that way ``liking things rated $>$ 5'' appear more often in the 
 training set and will be assigned with a higher probability.
 We add categories by including the category of each ingredient 
 from the survey. If the ingredient is liked, the category is too
 and if it is not, the category is not liked too. The last step 
 is to add all values that are not in the recipe as ``false''
 to the training instance.
-
+From a set of those preprocessed assignments, we can directly calculate 
-%\subsection*{Meal Optimization}
+the probabilities for the ingredients using Maximum Likelihood
-In order to model food preferences, we implemented
+Learning \cite{murphy}. For example for an assignment of a
-a custom Baysian net library in java with minimal use of third party libraries (e.g. for XML input).
+conditional variable $P(X = x \mid Y_1 = y_1, ... Y_2 = y2)$,
-We chose to implement our own Library, for maximum flexibility and to ensure that the learning algorithm functions precisely as follows:
+we count how often we observe the configuration $X = x, Y_1 = y_1,
-
+... Y_2 = y2$ and how often we count $Y_1 = y_1, ... Y_2 = y2$
-The library
+in our data set. The maximum likelihood is then defined as 
 uses the sum-product algorithm for
 inference and maximum likelihood learning
 for parameter estimation. In our implementation
 we support discrete as well as continous 
 probability distributions. Discrete distributions
 can be modeled as tables or as trees. 
 In our implementation only continous distributions with discrete parents
 are supported. A continous distribution is then modeled as a mapping
 of all possible combination of it' s parents to a gaussian.
 Given a data set, the parameters of a discrete variable $X$ are
 estimated as 
 \begin{align}
 P(X = x| Y_1 = y_1, ... Y_2 = y2) =\\
 N(X = x| Y_1 = y_1, ... Y_2 = y2) \over N(Y_1 = y_1, ... Y_2 = y2)
 \end{align}
 where $N(A)$ is the number of times event $A$occurs in the data set.
 For a continuous variable like rating, we estimate a Gaussian for
 each combination of it's parents. For example if the rating variable
 is dependent on beef and tomatoes, we would estimate 4 Gaussians,
 one for each possible combination of beef and tomatoes. So during
 training we would estimate mean and variance for all cases where
 $(tomato = true, potato = true)$, $(tomato = false, potato = true)$
 and so on.
 \paragraph*{Inferring maximum likelihood rating}
 Having estimated the probabilities of such a net, we can infer 
 the maximum likelihood rating of a unseen dish while observing only
 a set of ingredients. Therefore, we iterate over all possible
 ratings ($1 - 10$) and compute the probability of this rating.
 The maximum probability is the maximum likelihood rating 
 for that dish. We use the \emph{enumerateAll} algorithm \cite{russelnorvig}, 
 for the probability calculations.
 \paragraph*{Implementation}
 In order to model food preferences, we implemented
 a custom Bayesian net library in Java with 
 minimal use of third party libraries (e.g. for XML input).
 We chose to implement our own Library,
 for maximum flexibility and to ensure that the learning algorithm
 functions precisely as follows:
 The library uses the sum-product algorithm for
 inference and maximum likelihood learning
 for parameter estimation. In our implementation
 we support discrete as well as continuous 
 probability distributions. Discrete distributions
 can be modeled as tables or as trees. 
 In our implementation only continuous distributions with discrete parents
 are supported. A continuous distribution is then modeled as a mapping
 of all possible combination of its parents to a Gaussian set.
 \section*{Evaluation}
-The application model will be trained using a sparse subset (50\%) of the survey data and the optimization problem solved for the inferred constraints.  As shown below, the calculated preferences for recipes which were not used to train the Bayes net are quite close to the actual survey data, which essentially reflects the following preferences (Sample ratings are on a 1-10 scale):
+In an experiment we collected $24$ ratings from $4$ persons.
 We trained the Bayes net using a sparse subset (50\%) of the survey data. Then we evaluated the rest of the 
 recipes (which are all unseen) and calculated
 the maximum likelihood rating.
 As shown below, the calculated preferences for recipes
 which were not used to train the Bayes net are quite close to the
 actual survey data,  which essentially reflects the following
 preferences 
 (Sample ratings are on a 1-10 scale):
 \begin{description}
-\item[Diner 1] No allergies, prefers all dishes equally (5)
+\item[Diner 1] Prefers all dishes equally (5)
 \item[Diner 2] Vegetarian, meat dishes are (1), remainder are (9)
-
+\item[Diner 3] Prefers meat (6) to vegetarian (4) to desert (3)
-\item[Diner 3] Nut Alleregy, prefers meat (6) to vegetarian (4) to desert (3)
+\item[Diner 4] Prefers Pork and Desserts (9), remainder are (3)
 \item[Diner 4] No allergies, prefers Pork and Desserts (9), remainder are (3)
 \end{description}
-Next, we calculate the correlation between the application's ranking of all dishes and the actual ranking as determined by the user surveys.  We suggest that a high degree of correlation indicates that the system has the potential to accurately appraise constrained group food preferences for dishes which are not part of the survey, given sufficiently detailed recipe information.  As \ref{rms-table} shows, the estimated food preferences are quite close to the actual mean ratings over all diners for the dishes which were not used to train the Bayes net.  The root mean-square-error for calculated vs. surveyed meal preferences is approximately 1.0.
+Next, we calculate the error between the application's ranking of all
 dishes and the actual ranking as determined by the user surveys.  We
 suggest that a low error indicates that the system has the potential
 to accurately appraise constrained group food preferences for dishes
 which are not part of the survey, given sufficiently detailed recipe
 information.  As the Table and Figure \ref{result} show, the estimated food preferences are quite close to the actual mean ratings over all diners for the dishes which were not used to train the Bayes net.  The root mean-square-error for calculated vs. surveyed meal preferences is approximately 1.92.
-\begin{figure}[h!]
+\begin{table}[ht]
 \begin{tabular}{ | l | l | l | }
 \hline
 Dish & Est. & Actual Avg.\\ \hline
 Southwest Smoothie: & &\\
 DAIRY & 5 & 5.5 \\ \hline
 Bayou Shrimp Creole:  & &\\
 TOMATO & 9 & 3.75 \\ \hline
 Crab Burgers:  & &\\
 EGGS & 5 & 3.75 \\ \hline
 Broiled Flounder:  & &\\
 GENERIC NUTS, EGGS & 5 & 3.75 \\ \hline
 Baked Steak And Lima Beans:  & &\\
 TOMATO, SUGAR & 2 & 3.75 \\ \hline
 Eggplant Lasagna:  & &\\
 GLUTEN & 5 & 5.25 \\ \hline
 Salisbury Steak:  & &\\
 GLUTEN, DAIRY, BEEF & 6 & 3.75 \\ \hline
 Meatless Loaf:  & &\\
 SPICE & 5 & 5.25 \\ \hline
 Lemon Pork Chops:  & &\\
 PORK, SUGAR & 5 & 5.25 \\ \hline
 Fava Bean Burgers:  & &\\
 EGGS, POTATO & 3 & 5.25 \\ \hline
 Angel Hair Pesto Primavera:  & &\\
 GENERIC NUTS, SPICE & 5 & 5.25 \\ \hline
 %\hline
 \end{tabular}
 \end{table}
 \begin{figure}[ht]
 \centering
 \includegraphics[width=0.5 \textwidth]{BayesChefChart.png}
-\caption{Estimated vs. Actual Survey Dish Ratings}
+\caption{Estimated vs. surveyed dish ratings}
 \label{result}
 \end{figure}
-Note the outlier at Dish \#2 (Bayou Shrimp Creole).  The strong preference for this dish is a result of the ingredient list containing primarily shrimp and tomato.  Unlike beef and pork, the seafood category was not implemented in the knowledge enginerring of the net.  Consequently, this dish is incorrectly deemed to be vegetarian-compatible.  The same issue had previously occurred at Dish \#5 (Baked Steak and Lima Beans) until 'steak' was added to the recipe parser as a synonym for beef, and therefore a type of meat.
+Note the outlier at Dish \#2 (Bayou Shrimp Creole).  The strong preference for this dish is a result of the ingredient list containing primarily shrimp and tomato.  Unlike beef and pork, the seafood category was not implemented in the knowledge engineering of the net.  Consequently, this dish is incorrectly deemed to be vegetarian-compatible.  The same issue had previously occurred at Dish \#5 (Baked Steak and Lima Beans) until 'steak' was added to the recipe parser as a synonym for beef, and therefore a type of meat.
 \section*{Conclusion}
 We proposed, implemented and evaluated a food preference prediction system that
 is capable of predicting how much a user would like a new, unseen recipes.
 We discussed how to encode user preference towards ingredients and
 categories in a Bayes Net and how to add control variables in order 
 to exclude dishes that users have to avoid, such as meat in the case of
 vegetarians. Furthermore, we presented our learning 
 scheme for such a Bayes net using data from a small survey 
 and how to predict the user rating for unseen dishes.
 In an evaluation we showed that the net can predict 
 preferences when learned from a sparse data set.
 So in a real life setting, where people plan 
 a dinner with a catering service, 
 a few participants could rate a small amount of recipes 
 in an on-line service
 and the system could actually predict the scores 
 on the rest of the caterers data base.
 The top $k$ with the answers highest predicted
 rating of the system could be used to assemble the final
 dinner.
 \bibliographystyle{plain}
 \bibliography{p2refs}
--- a/writeup/bayes.png
+++ b/writeup/bayes.png
--- a/writeup/p2refs.bib
+++ b/writeup/p2refs.bib
@@ -10,8 +10,8 @@
   AUTHOR  = {van Boekel, M.A.J.S., Stein, A. and van Bruggen, A.H.C.},
   TITLE   = {{Bayesian Statistics} and {Quality Modelling} in the {Agro-food Production Chain}},  
   YEAR    = {2004},
-   JOURNAL = {Proceedings of the Frontis workshop}
+   JOURNAL = {Proceedings of the Frontis workshop},
-   CITY = {Wageningen}
+   CITY = {Wageningen},
   VOLUME = {3}
 }
@@ -19,7 +19,7 @@
   AUTHOR  = {David Corney},
   TITLE   = {{Designing Food} with {Bayesian Belief Networks}},  
   YEAR    = {2000},
-   JOURNAL = {ACDM}
+   JOURNAL = {ACDM},
   CITY = {London}
 }
@@ -27,7 +27,7 @@
   AUTHOR  = {Tran The Truyen and Dinh Q. Phung and Svetha Venkatesh},
   TITLE   = {Preference {Networks}: {Probabilistic} {Models} for {Recommendation} {Systems}},  
   YEAR    = {2007},
-   JOURNAL = {Proceedings of the {Sixth Australasian Conference} on {Data Mining}}
+   JOURNAL = {Proceedings of the {Sixth Australasian Conference} on {Data Mining}},
   CITY = {London}
 }
@@ -36,7 +36,7 @@
   AUTHOR  = {Janzen, M. and Xiang, Y.},
   TITLE   = {{Probabilistic Reasoning} in {Meal Planning} in {Intelligent Fridges}},  
   YEAR    = {2003},
-   JOURNAL = {16th Conference of the Canadian Society for Computational Studies of Intelligence}
+   JOURNAL = {16th Conference of the Canadian Society for Computational Studies of Intelligence},
   CITY = {Guelph}
 }