From 14bc76949323434d297caf9487867701905e09fe Mon Sep 17 00:00:00 2001 From: Woody Folsom Date: Wed, 12 Dec 2012 19:34:32 -0500 Subject: [PATCH] Added connect.bat, made minor changes to kgsGtp.ini. ScratchGo can now easily connect to gokgs.com's server. It will look for room whf4cs6999 as whf4human (to avoid creating more accounts) and will connect to or wait for whf4cs6999. --- GoGame.log | 0 build.xml | 11 +- connect.bat | 1 + data/gogame.cfg | 12 +- data/kgsGtp.ini | 11 +- gofree.txt | 3 + kgsGtp.ini | 11 + log4j.xml | 19 ++ pass.net | 42 +++ rrt.bat | 1 + rrt.rootpar-amaf.black.txt | 85 +++++ rrt/rrt.alphabeta.black.txt | 84 +++++ rrt/rrt.amaf.black.txt | 85 +++++ rrt/rrt.random.black.txt | 86 +++++ rrt/rrt.rave.black.txt | 83 +++++ rrt/rrt.rootpar-nn.black.txt | 85 +++++ rrt/rrt.rootpar.black.txt | 85 +++++ rrt/rrt.smaf.black.txt | 85 +++++ rrt/rrt.uct.black | 85 +++++ src/net/woodyfolsom/msproj/GameRecord.java | 8 + src/net/woodyfolsom/msproj/GameState.java | 8 + src/net/woodyfolsom/msproj/GoGame.java | 12 +- src/net/woodyfolsom/msproj/RoundRobin.java | 115 +++++++ .../woodyfolsom/msproj/StandAloneGame.java | 12 +- .../woodyfolsom/msproj/ann/Connection.java | 5 - .../msproj/ann/FusekiFilterTrainer.java | 292 +++++++++++++++++ .../msproj/ann/MultiLayerPerceptron.java | 2 +- .../msproj/ann/PassFilterTrainer.java | 298 ++++++++++++++++++ src/net/woodyfolsom/msproj/ann/TTTFilter.java | 34 -- .../msproj/ann/TTTFilterTrainer.java | 6 +- .../msproj/ann/TemporalDifference.java | 29 +- src/net/woodyfolsom/msproj/ann/XORFilter.java | 2 +- .../woodyfolsom/msproj/policy/AlphaBeta.java | 5 + .../msproj/policy/HumanGuiInput.java | 5 + .../msproj/policy/HumanKeyboardInput.java | 5 + .../woodyfolsom/msproj/policy/Minimax.java | 7 +- .../msproj/policy/MonteCarloAMAF.java | 9 +- .../msproj/policy/MonteCarloSMAF.java | 4 + .../msproj/policy/MonteCarloUCT.java | 9 +- .../msproj/policy/NeuralNetPolicy.java | 144 +++++++++ src/net/woodyfolsom/msproj/policy/Policy.java | 10 +- .../msproj/policy/PolicyFactory.java | 14 + .../msproj/policy/RandomMovePolicy.java | 5 + .../msproj/policy/RootParAMAF.java | 186 +++++++++++ .../msproj/policy/RootParSMAF.java | 186 +++++++++++ .../msproj/policy/RootParallelization.java | 27 +- .../msproj/tictactoe/NNDataSetFactory.java | 133 +++++++- .../woodyfolsom/msproj/ann/TTTFilterTest.java | 100 ------ .../woodyfolsom/msproj/ann/XORFilterTest.java | 6 +- ttt.net | 174 ++++++---- 50 files changed, 2462 insertions(+), 264 deletions(-) create mode 100644 GoGame.log create mode 100644 connect.bat create mode 100644 gofree.txt create mode 100644 kgsGtp.ini create mode 100644 log4j.xml create mode 100644 pass.net create mode 100644 rrt.bat create mode 100644 rrt.rootpar-amaf.black.txt create mode 100644 rrt/rrt.alphabeta.black.txt create mode 100644 rrt/rrt.amaf.black.txt create mode 100644 rrt/rrt.random.black.txt create mode 100644 rrt/rrt.rave.black.txt create mode 100644 rrt/rrt.rootpar-nn.black.txt create mode 100644 rrt/rrt.rootpar.black.txt create mode 100644 rrt/rrt.smaf.black.txt create mode 100644 rrt/rrt.uct.black create mode 100644 src/net/woodyfolsom/msproj/RoundRobin.java create mode 100644 src/net/woodyfolsom/msproj/ann/FusekiFilterTrainer.java create mode 100644 src/net/woodyfolsom/msproj/ann/PassFilterTrainer.java delete mode 100644 src/net/woodyfolsom/msproj/ann/TTTFilter.java create mode 100644 src/net/woodyfolsom/msproj/policy/NeuralNetPolicy.java create mode 100644 src/net/woodyfolsom/msproj/policy/PolicyFactory.java create mode 100644 src/net/woodyfolsom/msproj/policy/RootParAMAF.java create mode 100644 src/net/woodyfolsom/msproj/policy/RootParSMAF.java delete mode 100644 test/net/woodyfolsom/msproj/ann/TTTFilterTest.java diff --git a/GoGame.log b/GoGame.log new file mode 100644 index 0000000..e69de29 diff --git a/build.xml b/build.xml index 19cde56..85959f7 100644 --- a/build.xml +++ b/build.xml @@ -33,9 +33,16 @@ - + + + + + + + + diff --git a/connect.bat b/connect.bat new file mode 100644 index 0000000..f727a78 --- /dev/null +++ b/connect.bat @@ -0,0 +1 @@ +java -cp GoGame.jar;antlrworks-1.4.3.jar;kgsGtp.jar;log4j-1.2.16.jar net.woodyfolsom.msproj.GoGame \ No newline at end of file diff --git a/data/gogame.cfg b/data/gogame.cfg index abcbf5c..452337a 100644 --- a/data/gogame.cfg +++ b/data/gogame.cfg @@ -1,10 +1,10 @@ -PlayerOne=SMAF -PlayerTwo=RANDOM +PlayerOne=ROOT_PAR_AMAF //HUMAN, HUMAN_GUI, ROOT_PAR, UCT, RANDOM, RAVE, SMAF, ROOT_PAR_AMAF +PlayerTwo=Random GUIDelay=1000 //1 second -BoardSize=9 -Komi=6.5 +BoardSize=13 //9, 13 or 19 +Komi=6.5 //suggested 6.5 NumGames=1 //Games for each color per player -TurnTime=2000 //seconds per player per turn -SpectatorBoardShown=true +TurnTime=6000 //seconds per player per turn +SpectatorBoardShown=true //set to true for modes which otherwise wouldn't show GUI. false for HUMAN_GUI player. WhiteMoveLogged=false BlackMoveLogged=true \ No newline at end of file diff --git a/data/kgsGtp.ini b/data/kgsGtp.ini index ca673d5..3124b69 100644 --- a/data/kgsGtp.ini +++ b/data/kgsGtp.ini @@ -1,12 +1,11 @@ engine=java -cp GoGame.jar;log4j-1.2.16.jar net.woodyfolsom.msproj.GoGame montecarlo -name=whf4cs6999 -password=6id39p +name=whf4human +password=t3snxf room=whf4cs6999 -mode=custom +mode=auto talk=I'm a Monte Carlo tree search bot. -opponent=whf4human reconnect=t -automatch.rank=25k rules=chinese rules.boardSize=9 -rules.time=0 \ No newline at end of file +rules.time=0 +opponent=whf4cs6999 \ No newline at end of file diff --git a/gofree.txt b/gofree.txt new file mode 100644 index 0000000..8440a22 --- /dev/null +++ b/gofree.txt @@ -0,0 +1,3 @@ +UCT-RAVE vs GoFree +level 1 (black) 2/2 +level 2 (black) 1/1 \ No newline at end of file diff --git a/kgsGtp.ini b/kgsGtp.ini new file mode 100644 index 0000000..b9976db --- /dev/null +++ b/kgsGtp.ini @@ -0,0 +1,11 @@ +engine=java -cp GoGame.jar;log4j-1.2.16.jar net.woodyfolsom.msproj.GoGame montecarlo +name=whf4human +password=t3snxf +room=whf4cs6999 +mode=auto +talk=I'm a Monte Carlo tree search bot. +reconnect=t +rules=chinese +rules.boardSize=13 +rules.time=0 +opponent=whf4cs6999 \ No newline at end of file diff --git a/log4j.xml b/log4j.xml new file mode 100644 index 0000000..d6c9e0d --- /dev/null +++ b/log4j.xml @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/pass.net b/pass.net new file mode 100644 index 0000000..1f4d702 --- /dev/null +++ b/pass.net @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 + 2 + + + 3 + 4 + + + 5 + + diff --git a/rrt.bat b/rrt.bat new file mode 100644 index 0000000..070ea67 --- /dev/null +++ b/rrt.bat @@ -0,0 +1 @@ +java -Xms256m -Xmx4096m -cp GoGame.jar;antlrworks-1.4.3.jar;kgsGtp.jar;log4j-1.2.16.jar net.woodyfolsom.msproj.RoundRobin \ No newline at end of file diff --git a/rrt.rootpar-amaf.black.txt b/rrt.rootpar-amaf.black.txt new file mode 100644 index 0000000..4a43b94 --- /dev/null +++ b/rrt.rootpar-amaf.black.txt @@ -0,0 +1,85 @@ + +C:\workspace\msproj\dist>java -Xms256m -Xmx4096m -cp GoGame.jar;antlrworks-1.4.3.jar;kgsGtp.jar;log4j-1.2.16.jar net.woodyfolsom.msproj.RoundRobin +Beginning round-robin tournament. +Initializing policies... +Game over. Result: B+5.5 +RootParallelization (Black) vs Random (White) : B+5.5 +Game over. Result: B+30.5 +RootParallelization (Black) vs Random (White) : B+30.5 +Game over. Result: B+0.5 +RootParallelization (Black) vs Random (White) : B+0.5 +Game over. Result: B+18.5 +RootParallelization (Black) vs Random (White) : B+18.5 +Game over. Result: B+18.5 +RootParallelization (Black) vs Random (White) : B+18.5 +Game over. Result: B+3.5 +RootParallelization (Black) vs Alpha-Beta (White) : B+3.5 +Game over. Result: B+0.5 +RootParallelization (Black) vs Alpha-Beta (White) : B+0.5 +Game over. Result: B+46.5 +RootParallelization (Black) vs Alpha-Beta (White) : B+46.5 +Game over. Result: B+44.5 +RootParallelization (Black) vs Alpha-Beta (White) : B+44.5 +Game over. Result: B+53.5 +RootParallelization (Black) vs Alpha-Beta (White) : B+53.5 +Game over. Result: B+14.5 +RootParallelization (Black) vs MonteCarloUCT (White) : B+14.5 +Game over. Result: B+30.5 +RootParallelization (Black) vs MonteCarloUCT (White) : B+30.5 +Game over. Result: B+9.5 +RootParallelization (Black) vs MonteCarloUCT (White) : B+9.5 +Game over. Result: B+44.5 +RootParallelization (Black) vs MonteCarloUCT (White) : B+44.5 +Game over. Result: B+29.5 +RootParallelization (Black) vs MonteCarloUCT (White) : B+29.5 +Game over. Result: B+4.5 +RootParallelization (Black) vs UCT-RAVE (White) : B+4.5 +Game over. Result: B+27.5 +RootParallelization (Black) vs UCT-RAVE (White) : B+27.5 +Game over. Result: B+29.5 +RootParallelization (Black) vs UCT-RAVE (White) : B+29.5 +Game over. Result: B+22.5 +RootParallelization (Black) vs UCT-RAVE (White) : B+22.5 +Game over. Result: B+36.5 +RootParallelization (Black) vs UCT-RAVE (White) : B+36.5 +Game over. Result: B+50.5 +RootParallelization (Black) vs MonteCarloSMAF (White) : B+50.5 +Game over. Result: B+42.5 +RootParallelization (Black) vs MonteCarloSMAF (White) : B+42.5 +Game over. Result: B+28.5 +RootParallelization (Black) vs MonteCarloSMAF (White) : B+28.5 +Game over. Result: B+38.5 +RootParallelization (Black) vs MonteCarloSMAF (White) : B+38.5 +Game over. Result: B+23.5 +RootParallelization (Black) vs MonteCarloSMAF (White) : B+23.5 +Game over. Result: B+7.5 +RootParallelization (Black) vs RootParallelization (White) : B+7.5 +Game over. Result: W+16.5 +RootParallelization (Black) vs RootParallelization (White) : W+16.5 +Game over. Result: B+0.5 +RootParallelization (Black) vs RootParallelization (White) : B+0.5 +Game over. Result: B+8.5 +RootParallelization (Black) vs RootParallelization (White) : B+8.5 +Game over. Result: W+19.5 +RootParallelization (Black) vs RootParallelization (White) : W+19.5 +Game over. Result: B+13.5 +RootParallelization (Black) vs RootParallelization-NeuralNet (White) : B+13.5 +Game over. Result: B+2.5 +RootParallelization (Black) vs RootParallelization-NeuralNet (White) : B+2.5 +Game over. Result: B+16.5 +RootParallelization (Black) vs RootParallelization-NeuralNet (White) : B+16.5 +Game over. Result: B+32.5 +RootParallelization (Black) vs RootParallelization-NeuralNet (White) : B+32.5 +Game over. Result: B+8.5 +RootParallelization (Black) vs RootParallelization-NeuralNet (White) : B+8.5 + +Tournament Win Rates +==================== +RootParallelization (Black) vs Random (White) : 100% +RootParallelization (Black) vs Alpha-Beta (White) : 100% +RootParallelization (Black) vs MonteCarloUCT (White) : 100% +RootParallelization (Black) vs UCT-RAVE (White) : 100% +RootParallelization (Black) vs MonteCarloSMAF (White) : 100% +RootParallelization (Black) vs RootParallelization (White) : 60% +RootParallelization (Black) vs RootParallelization-NeuralNet (White) : 100% +Tournament lasted 1597.948 seconds. diff --git a/rrt/rrt.alphabeta.black.txt b/rrt/rrt.alphabeta.black.txt new file mode 100644 index 0000000..3ab33ad --- /dev/null +++ b/rrt/rrt.alphabeta.black.txt @@ -0,0 +1,84 @@ + +C:\workspace\msproj\dist>java -Xms256m -Xmx4096m -cp GoGame.jar;antlrworks-1.4.3.jar;kgsGtp.jar;log4j-1.2.16.jar net.woodyfolsom.msproj.RoundRobin +Beginning round-robin tournament. +Initializing policies... +Game over. Result: B+2.5 +Alpha-Beta (Black) vs Random (White) : B+2.5 +Game over. Result: W+3.5 +Alpha-Beta (Black) vs Random (White) : W+3.5 +Game over. Result: W+4.5 +Alpha-Beta (Black) vs Random (White) : W+4.5 +Game over. Result: W+1.5 +Alpha-Beta (Black) vs Random (White) : W+1.5 +Game over. Result: W+2.5 +Alpha-Beta (Black) vs Random (White) : W+2.5 +Game over. Result: B+40.5 +Alpha-Beta (Black) vs Alpha-Beta (White) : B+40.5 +Game over. Result: B+40.5 +Alpha-Beta (Black) vs Alpha-Beta (White) : B+40.5 +Game over. Result: B+40.5 +Alpha-Beta (Black) vs Alpha-Beta (White) : B+40.5 +Game over. Result: B+40.5 +Alpha-Beta (Black) vs Alpha-Beta (White) : B+40.5 +Game over. Result: B+40.5 +Alpha-Beta (Black) vs Alpha-Beta (White) : B+40.5 +Game over. Result: W+17.5 +Alpha-Beta (Black) vs MonteCarloUCT (White) : W+17.5 +Game over. Result: W+40.5 +Alpha-Beta (Black) vs MonteCarloUCT (White) : W+40.5 +Game over. Result: W+18.5 +Alpha-Beta (Black) vs MonteCarloUCT (White) : W+18.5 +Game over. Result: W+30.5 +Alpha-Beta (Black) vs MonteCarloUCT (White) : W+30.5 +Game over. Result: W+33.5 +Alpha-Beta (Black) vs MonteCarloUCT (White) : W+33.5 +Game over. Result: W+32.5 +Alpha-Beta (Black) vs UCT-RAVE (White) : W+32.5 +Game over. Result: W+41.5 +Alpha-Beta (Black) vs UCT-RAVE (White) : W+41.5 +Game over. Result: W+36.5 +Alpha-Beta (Black) vs UCT-RAVE (White) : W+36.5 +Game over. Result: W+40.5 +Alpha-Beta (Black) vs UCT-RAVE (White) : W+40.5 +Game over. Result: W+34.5 +Alpha-Beta (Black) vs UCT-RAVE (White) : W+34.5 +Game over. Result: W+6.5 +Alpha-Beta (Black) vs MonteCarloSMAF (White) : W+6.5 +Game over. Result: W+23.5 +Alpha-Beta (Black) vs MonteCarloSMAF (White) : W+23.5 +Game over. Result: W+18.5 +Alpha-Beta (Black) vs MonteCarloSMAF (White) : W+18.5 +Game over. Result: W+33.5 +Alpha-Beta (Black) vs MonteCarloSMAF (White) : W+33.5 +Game over. Result: W+40.5 +Alpha-Beta (Black) vs MonteCarloSMAF (White) : W+40.5 +Game over. Result: W+1.5 +Alpha-Beta (Black) vs RootParallelization (White) : W+1.5 +Game over. Result: W+4.5 +Alpha-Beta (Black) vs RootParallelization (White) : W+4.5 +Game over. Result: W+0.5 +Alpha-Beta (Black) vs RootParallelization (White) : W+0.5 +Game over. Result: W+0.5 +Alpha-Beta (Black) vs RootParallelization (White) : W+0.5 +Game over. Result: W+35.5 +Alpha-Beta (Black) vs RootParallelization (White) : W+35.5 +Game over. Result: W+3.5 +Alpha-Beta (Black) vs RootParallelization-NeuralNet (White) : W+3.5 +Game over. Result: W+0.5 +Alpha-Beta (Black) vs RootParallelization-NeuralNet (White) : W+0.5 +Game over. Result: W+1.5 +Alpha-Beta (Black) vs RootParallelization-NeuralNet (White) : W+1.5 +Game over. Result: W+4.5 +Alpha-Beta (Black) vs RootParallelization-NeuralNet (White) : W+4.5 +Game over. Result: W+4.5 +Alpha-Beta (Black) vs RootParallelization-NeuralNet (White) : W+4.5 + +Tournament Win Rates +==================== +Alpha-Beta (Black) vs Random (White) : 20% +Alpha-Beta (Black) vs Alpha-Beta (White) : 100% +Alpha-Beta (Black) vs MonteCarloUCT (White) : 00% +Alpha-Beta (Black) vs UCT-RAVE (White) : 00% +Alpha-Beta (Black) vs MonteCarloSMAF (White) : 00% +Alpha-Beta (Black) vs RootParallelization (White) : 00% +Alpha-Beta (Black) vs RootParallelization-NeuralNet (White) : 00% diff --git a/rrt/rrt.amaf.black.txt b/rrt/rrt.amaf.black.txt new file mode 100644 index 0000000..6f9fd0e --- /dev/null +++ b/rrt/rrt.amaf.black.txt @@ -0,0 +1,85 @@ + +C:\workspace\msproj\dist>java -Xms256m -Xmx4096m -cp GoGame.jar;antlrworks-1.4.3.jar;kgsGtp.jar;log4j-1.2.16.jar net.woodyfolsom.msproj.RoundRobin +Beginning round-robin tournament. +Initializing policies... +Game over. Result: B+40.5 +UCT-RAVE (Black) vs Random (White) : B+40.5 +Game over. Result: B+3.5 +UCT-RAVE (Black) vs Random (White) : B+3.5 +Game over. Result: B+15.5 +UCT-RAVE (Black) vs Random (White) : B+15.5 +Game over. Result: B+54.5 +UCT-RAVE (Black) vs Random (White) : B+54.5 +Game over. Result: B+18.5 +UCT-RAVE (Black) vs Random (White) : B+18.5 +Game over. Result: B+31.5 +UCT-RAVE (Black) vs Alpha-Beta (White) : B+31.5 +Game over. Result: B+17.5 +UCT-RAVE (Black) vs Alpha-Beta (White) : B+17.5 +Game over. Result: W+9.5 +UCT-RAVE (Black) vs Alpha-Beta (White) : W+9.5 +Game over. Result: B+34.5 +UCT-RAVE (Black) vs Alpha-Beta (White) : B+34.5 +Game over. Result: W+9.5 +UCT-RAVE (Black) vs Alpha-Beta (White) : W+9.5 +Game over. Result: B+2.5 +UCT-RAVE (Black) vs MonteCarloUCT (White) : B+2.5 +Game over. Result: B+36.5 +UCT-RAVE (Black) vs MonteCarloUCT (White) : B+36.5 +Game over. Result: B+9.5 +UCT-RAVE (Black) vs MonteCarloUCT (White) : B+9.5 +Game over. Result: W+2.5 +UCT-RAVE (Black) vs MonteCarloUCT (White) : W+2.5 +Game over. Result: B+1.5 +UCT-RAVE (Black) vs MonteCarloUCT (White) : B+1.5 +Game over. Result: B+22.5 +UCT-RAVE (Black) vs UCT-RAVE (White) : B+22.5 +Game over. Result: B+5.5 +UCT-RAVE (Black) vs UCT-RAVE (White) : B+5.5 +Game over. Result: B+2.5 +UCT-RAVE (Black) vs UCT-RAVE (White) : B+2.5 +Game over. Result: B+11.5 +UCT-RAVE (Black) vs UCT-RAVE (White) : B+11.5 +Game over. Result: W+11.5 +UCT-RAVE (Black) vs UCT-RAVE (White) : W+11.5 +Game over. Result: B+7.5 +UCT-RAVE (Black) vs MonteCarloSMAF (White) : B+7.5 +Game over. Result: B+39.5 +UCT-RAVE (Black) vs MonteCarloSMAF (White) : B+39.5 +Game over. Result: W+15.5 +UCT-RAVE (Black) vs MonteCarloSMAF (White) : W+15.5 +Game over. Result: W+22.5 +UCT-RAVE (Black) vs MonteCarloSMAF (White) : W+22.5 +Game over. Result: W+3.5 +UCT-RAVE (Black) vs MonteCarloSMAF (White) : W+3.5 +Game over. Result: B+20.5 +UCT-RAVE (Black) vs RootParallelization (White) : B+20.5 +Game over. Result: B+29.5 +UCT-RAVE (Black) vs RootParallelization (White) : B+29.5 +Game over. Result: B+41.5 +UCT-RAVE (Black) vs RootParallelization (White) : B+41.5 +Game over. Result: B+36.5 +UCT-RAVE (Black) vs RootParallelization (White) : B+36.5 +Game over. Result: B+18.5 +UCT-RAVE (Black) vs RootParallelization (White) : B+18.5 +Game over. Result: B+54.5 +UCT-RAVE (Black) vs RootParallelization-NeuralNet (White) : B+54.5 +Game over. Result: B+7.5 +UCT-RAVE (Black) vs RootParallelization-NeuralNet (White) : B+7.5 +Game over. Result: B+19.5 +UCT-RAVE (Black) vs RootParallelization-NeuralNet (White) : B+19.5 +Game over. Result: B+9.5 +UCT-RAVE (Black) vs RootParallelization-NeuralNet (White) : B+9.5 +Game over. Result: B+3.5 +UCT-RAVE (Black) vs RootParallelization-NeuralNet (White) : B+3.5 + +Tournament Win Rates +==================== +UCT-RAVE (Black) vs Random (White) : 100% +UCT-RAVE (Black) vs Alpha-Beta (White) : 60% +UCT-RAVE (Black) vs MonteCarloUCT (White) : 80% +UCT-RAVE (Black) vs UCT-RAVE (White) : 80% +UCT-RAVE (Black) vs MonteCarloSMAF (White) : 40% +UCT-RAVE (Black) vs RootParallelization (White) : 100% +UCT-RAVE (Black) vs RootParallelization-NeuralNet (White) : 100% +Tournament lasted 1476.893 seconds. diff --git a/rrt/rrt.random.black.txt b/rrt/rrt.random.black.txt new file mode 100644 index 0000000..af15bdc --- /dev/null +++ b/rrt/rrt.random.black.txt @@ -0,0 +1,86 @@ + +C:\workspace\msproj\dist>java -cp GoGame.jar;antlrworks-1.4.3.jar;kgsGtp.jar;log4j-1.2.16.jar net.woodyfolsom.msproj.RoundRobin +Beginning round-robin tournament. +Initializing policies... +Game over. Result: W+6.5 +Random (Black) vs Random (White) : W+6.5 +Game over. Result: B+1.5 +Random (Black) vs Random (White) : B+1.5 +Game over. Result: B+7.5 +Random (Black) vs Random (White) : B+7.5 +Game over. Result: W+0.5 +Random (Black) vs Random (White) : W+0.5 +Game over. Result: B+1.5 +Random (Black) vs Random (White) : B+1.5 +Game over. Result: B+28.5 +Random (Black) vs Alpha-Beta (White) : B+28.5 +Game over. Result: B+1.5 +Random (Black) vs Alpha-Beta (White) : B+1.5 +Game over. Result: B+29.5 +Random (Black) vs Alpha-Beta (White) : B+29.5 +Game over. Result: B+47.5 +Random (Black) vs Alpha-Beta (White) : B+47.5 +Game over. Result: B+22.5 +Random (Black) vs Alpha-Beta (White) : B+22.5 +Game over. Result: W+22.5 +Random (Black) vs MonteCarloUCT (White) : W+22.5 +Game over. Result: W+6.5 +Random (Black) vs MonteCarloUCT (White) : W+6.5 +Game over. Result: W+5.5 +Random (Black) vs MonteCarloUCT (White) : W+5.5 +Game over. Result: W+12.5 +Random (Black) vs MonteCarloUCT (White) : W+12.5 +Game over. Result: W+35.5 +Random (Black) vs MonteCarloUCT (White) : W+35.5 +Game over. Result: W+14.5 +Random (Black) vs UCT-RAVE (White) : W+14.5 +Game over. Result: W+18.5 +Random (Black) vs UCT-RAVE (White) : W+18.5 +Game over. Result: W+3.5 +Random (Black) vs UCT-RAVE (White) : W+3.5 +Game over. Result: W+5.5 +Random (Black) vs UCT-RAVE (White) : W+5.5 +Game over. Result: W+32.5 +Random (Black) vs UCT-RAVE (White) : W+32.5 +Game over. Result: W+19.5 +Random (Black) vs MonteCarloSMAF (White) : W+19.5 +Game over. Result: W+26.5 +Random (Black) vs MonteCarloSMAF (White) : W+26.5 +Game over. Result: W+19.5 +Random (Black) vs MonteCarloSMAF (White) : W+19.5 +Game over. Result: W+8.5 +Random (Black) vs MonteCarloSMAF (White) : W+8.5 +Game over. Result: W+13.5 +Random (Black) vs MonteCarloSMAF (White) : W+13.5 +Game over. Result: W+9.5 +Random (Black) vs RootParallelization (White) : W+9.5 +Game over. Result: W+4.5 +Random (Black) vs RootParallelization (White) : W+4.5 +Game over. Result: W+8.5 +Random (Black) vs RootParallelization (White) : W+8.5 +Game over. Result: W+39.5 +Random (Black) vs RootParallelization (White) : W+39.5 +Game over. Result: W+0.5 +Random (Black) vs RootParallelization (White) : W+0.5 +Game over. Result: W+10.5 +Random (Black) vs RootParallelization-NeuralNet (White) : W+10.5 +Game over. Result: W+11.5 +Random (Black) vs RootParallelization-NeuralNet (White) : W+11.5 +Game over. Result: W+1.5 +Random (Black) vs RootParallelization-NeuralNet (White) : W+1.5 +Game over. Result: W+3.5 +Random (Black) vs RootParallelization-NeuralNet (White) : W+3.5 +Game over. Result: W+10.5 +Random (Black) vs RootParallelization-NeuralNet (White) : W+10.5 +Game over. Result: W+40.5 + + +Tournament Win Rates +==================== +Random (Black) vs Random (White) : 40% +Random (Black) vs Alpha-Beta (White) : 100% +Random (Black) vs MonteCarloUCT (White) : 00% +Random (Black) vs UCT-RAVE (White) : 00% +Random (Black) vs MonteCarloSMAF (White) : 00% +Random (Black) vs RootParallelization (White) : 00% +Random (Black) vs RootParallelization-NeuralNet (White) : 00% diff --git a/rrt/rrt.rave.black.txt b/rrt/rrt.rave.black.txt new file mode 100644 index 0000000..8a21c47 --- /dev/null +++ b/rrt/rrt.rave.black.txt @@ -0,0 +1,83 @@ +Beginning round-robin tournament. +Initializing policies... +Game over. Result: B+8.5 +UCT-RAVE (Black) vs Random (White) : B+8.5 +Game over. Result: B+31.5 +UCT-RAVE (Black) vs Random (White) : B+31.5 +Game over. Result: B+16.5 +UCT-RAVE (Black) vs Random (White) : B+16.5 +Game over. Result: B+9.5 +UCT-RAVE (Black) vs Random (White) : B+9.5 +Game over. Result: B+16.5 +UCT-RAVE (Black) vs Random (White) : B+16.5 +Game over. Result: B+48.5 +UCT-RAVE (Black) vs Alpha-Beta (White) : B+48.5 +Game over. Result: W+5.5 +UCT-RAVE (Black) vs Alpha-Beta (White) : W+5.5 +Game over. Result: B+13.5 +UCT-RAVE (Black) vs Alpha-Beta (White) : B+13.5 +Game over. Result: B+34.5 +UCT-RAVE (Black) vs Alpha-Beta (White) : B+34.5 +Game over. Result: B+1.5 +UCT-RAVE (Black) vs Alpha-Beta (White) : B+1.5 +Game over. Result: B+2.5 +UCT-RAVE (Black) vs MonteCarloUCT (White) : B+2.5 +Game over. Result: B+7.5 +UCT-RAVE (Black) vs MonteCarloUCT (White) : B+7.5 +Game over. Result: W+4.5 +UCT-RAVE (Black) vs MonteCarloUCT (White) : W+4.5 +Game over. Result: B+3.5 +UCT-RAVE (Black) vs MonteCarloUCT (White) : B+3.5 +Game over. Result: B+6.5 +UCT-RAVE (Black) vs MonteCarloUCT (White) : B+6.5 +Game over. Result: B+3.5 +UCT-RAVE (Black) vs UCT-RAVE (White) : B+3.5 +Game over. Result: B+2.5 +UCT-RAVE (Black) vs UCT-RAVE (White) : B+2.5 +Game over. Result: B+9.5 +UCT-RAVE (Black) vs UCT-RAVE (White) : B+9.5 +Game over. Result: B+0.5 +UCT-RAVE (Black) vs UCT-RAVE (White) : B+0.5 +Game over. Result: W+13.5 +UCT-RAVE (Black) vs UCT-RAVE (White) : W+13.5 +Game over. Result: W+0.5 +UCT-RAVE (Black) vs MonteCarloSMAF (White) : W+0.5 +Game over. Result: B+1.5 +UCT-RAVE (Black) vs MonteCarloSMAF (White) : B+1.5 +Game over. Result: W+0.5 +UCT-RAVE (Black) vs MonteCarloSMAF (White) : W+0.5 +Game over. Result: B+9.5 +UCT-RAVE (Black) vs MonteCarloSMAF (White) : B+9.5 +Game over. Result: W+20.5 +UCT-RAVE (Black) vs MonteCarloSMAF (White) : W+20.5 +Game over. Result: B+13.5 +UCT-RAVE (Black) vs RootParallelization (White) : B+13.5 +Game over. Result: W+16.5 +UCT-RAVE (Black) vs RootParallelization (White) : W+16.5 +Game over. Result: B+28.5 +UCT-RAVE (Black) vs RootParallelization (White) : B+28.5 +Game over. Result: B+25.5 +UCT-RAVE (Black) vs RootParallelization (White) : B+25.5 +Game over. Result: B+25.5 +UCT-RAVE (Black) vs RootParallelization (White) : B+25.5 +Game over. Result: B+48.5 +UCT-RAVE (Black) vs RootParallelization-NeuralNet (White) : B+48.5 +Game over. Result: B+6.5 +UCT-RAVE (Black) vs RootParallelization-NeuralNet (White) : B+6.5 +Game over. Result: B+9.5 +UCT-RAVE (Black) vs RootParallelization-NeuralNet (White) : B+9.5 +Game over. Result: B+55.5 +UCT-RAVE (Black) vs RootParallelization-NeuralNet (White) : B+55.5 +Game over. Result: B+42.5 +UCT-RAVE (Black) vs RootParallelization-NeuralNet (White) : B+42.5 + +Tournament Win Rates +==================== +UCT-RAVE (Black) vs Random (White) : 100% +UCT-RAVE (Black) vs Alpha-Beta (White) : 80% +UCT-RAVE (Black) vs MonteCarloUCT (White) : 80% +UCT-RAVE (Black) vs UCT-RAVE (White) : 80% +UCT-RAVE (Black) vs MonteCarloSMAF (White) : 40% +UCT-RAVE (Black) vs RootParallelization (White) : 80% +UCT-RAVE (Black) vs RootParallelization-NeuralNet (White) : 100% +Tournament lasted 1458.494 seconds. \ No newline at end of file diff --git a/rrt/rrt.rootpar-nn.black.txt b/rrt/rrt.rootpar-nn.black.txt new file mode 100644 index 0000000..c803673 --- /dev/null +++ b/rrt/rrt.rootpar-nn.black.txt @@ -0,0 +1,85 @@ + +C:\workspace\msproj\dist>java -Xms256m -Xmx4096m -cp GoGame.jar;antlrworks-1.4.3.jar;kgsGtp.jar;log4j-1.2.16.jar net.woodyfolsom.msproj.RoundRobin +Beginning round-robin tournament. +Initializing policies... +Game over. Result: B+6.5 +RootParallelization-NeuralNet (Black) vs Random (White) : B+6.5 +Game over. Result: B+0.5 +RootParallelization-NeuralNet (Black) vs Random (White) : B+0.5 +Game over. Result: B+5.5 +RootParallelization-NeuralNet (Black) vs Random (White) : B+5.5 +Game over. Result: B+19.5 +RootParallelization-NeuralNet (Black) vs Random (White) : B+19.5 +Game over. Result: B+2.5 +RootParallelization-NeuralNet (Black) vs Random (White) : B+2.5 +Game over. Result: B+21.5 +RootParallelization-NeuralNet (Black) vs Alpha-Beta (White) : B+21.5 +Game over. Result: W+12.5 +RootParallelization-NeuralNet (Black) vs Alpha-Beta (White) : W+12.5 +Game over. Result: B+23.5 +RootParallelization-NeuralNet (Black) vs Alpha-Beta (White) : B+23.5 +Game over. Result: B+23.5 +RootParallelization-NeuralNet (Black) vs Alpha-Beta (White) : B+23.5 +Game over. Result: W+9.5 +RootParallelization-NeuralNet (Black) vs Alpha-Beta (White) : W+9.5 +Game over. Result: B+29.5 +RootParallelization-NeuralNet (Black) vs MonteCarloUCT (White) : B+29.5 +Game over. Result: B+9.5 +RootParallelization-NeuralNet (Black) vs MonteCarloUCT (White) : B+9.5 +Game over. Result: W+50.5 +RootParallelization-NeuralNet (Black) vs MonteCarloUCT (White) : W+50.5 +Game over. Result: B+9.5 +RootParallelization-NeuralNet (Black) vs MonteCarloUCT (White) : B+9.5 +Game over. Result: B+7.5 +RootParallelization-NeuralNet (Black) vs MonteCarloUCT (White) : B+7.5 +Game over. Result: W+12.5 +RootParallelization-NeuralNet (Black) vs UCT-RAVE (White) : W+12.5 +Game over. Result: W+9.5 +RootParallelization-NeuralNet (Black) vs UCT-RAVE (White) : W+9.5 +Game over. Result: W+29.5 +RootParallelization-NeuralNet (Black) vs UCT-RAVE (White) : W+29.5 +Game over. Result: W+10.5 +RootParallelization-NeuralNet (Black) vs UCT-RAVE (White) : W+10.5 +Game over. Result: W+27.5 +RootParallelization-NeuralNet (Black) vs UCT-RAVE (White) : W+27.5 +Game over. Result: W+2.5 +RootParallelization-NeuralNet (Black) vs MonteCarloSMAF (White) : W+2.5 +Game over. Result: W+22.5 +RootParallelization-NeuralNet (Black) vs MonteCarloSMAF (White) : W+22.5 +Game over. Result: W+10.5 +RootParallelization-NeuralNet (Black) vs MonteCarloSMAF (White) : W+10.5 +Game over. Result: W+41.5 +RootParallelization-NeuralNet (Black) vs MonteCarloSMAF (White) : W+41.5 +Game over. Result: W+18.5 +RootParallelization-NeuralNet (Black) vs MonteCarloSMAF (White) : W+18.5 +Game over. Result: B+3.5 +RootParallelization-NeuralNet (Black) vs RootParallelization (White) : B+3.5 +Game over. Result: W+10.5 +RootParallelization-NeuralNet (Black) vs RootParallelization (White) : W+10.5 +Game over. Result: W+14.5 +RootParallelization-NeuralNet (Black) vs RootParallelization (White) : W+14.5 +Game over. Result: W+5.5 +RootParallelization-NeuralNet (Black) vs RootParallelization (White) : W+5.5 +Game over. Result: W+6.5 +RootParallelization-NeuralNet (Black) vs RootParallelization (White) : W+6.5 +Game over. Result: W+8.5 +RootParallelization-NeuralNet (Black) vs RootParallelization-NeuralNet (White) : W+8.5 +Game over. Result: W+11.5 +RootParallelization-NeuralNet (Black) vs RootParallelization-NeuralNet (White) : W+11.5 +Game over. Result: W+6.5 +RootParallelization-NeuralNet (Black) vs RootParallelization-NeuralNet (White) : W+6.5 +Game over. Result: B+2.5 +RootParallelization-NeuralNet (Black) vs RootParallelization-NeuralNet (White) : B+2.5 +Game over. Result: B+21.5 +RootParallelization-NeuralNet (Black) vs RootParallelization-NeuralNet (White) : B+21.5 + +Tournament Win Rates +==================== +RootParallelization-NeuralNet (Black) vs Random (White) : 100% +RootParallelization-NeuralNet (Black) vs Alpha-Beta (White) : 60% +RootParallelization-NeuralNet (Black) vs MonteCarloUCT (White) : 80% +RootParallelization-NeuralNet (Black) vs UCT-RAVE (White) : 00% +RootParallelization-NeuralNet (Black) vs MonteCarloSMAF (White) : 00% +RootParallelization-NeuralNet (Black) vs RootParallelization (White) : 20% +RootParallelization-NeuralNet (Black) vs RootParallelization-NeuralNet (White) : 40% +Tournament lasted 1400.277 seconds. diff --git a/rrt/rrt.rootpar.black.txt b/rrt/rrt.rootpar.black.txt new file mode 100644 index 0000000..cb74356 --- /dev/null +++ b/rrt/rrt.rootpar.black.txt @@ -0,0 +1,85 @@ + +C:\workspace\msproj\dist>java -Xms256m -Xmx4096m -cp GoGame.jar;antlrworks-1.4.3.jar;kgsGtp.jar;log4j-1.2.16.jar net.woodyfolsom.msproj.RoundRobin +Beginning round-robin tournament. +Initializing policies... +Game over. Result: B+4.5 +RootParallelization (Black) vs Random (White) : B+4.5 +Game over. Result: B+4.5 +RootParallelization (Black) vs Random (White) : B+4.5 +Game over. Result: B+1.5 +RootParallelization (Black) vs Random (White) : B+1.5 +Game over. Result: B+1.5 +RootParallelization (Black) vs Random (White) : B+1.5 +Game over. Result: B+0.5 +RootParallelization (Black) vs Random (White) : B+0.5 +Game over. Result: B+20.5 +RootParallelization (Black) vs Alpha-Beta (White) : B+20.5 +Game over. Result: B+23.5 +RootParallelization (Black) vs Alpha-Beta (White) : B+23.5 +Game over. Result: W+9.5 +RootParallelization (Black) vs Alpha-Beta (White) : W+9.5 +Game over. Result: W+7.5 +RootParallelization (Black) vs Alpha-Beta (White) : W+7.5 +Game over. Result: B+25.5 +RootParallelization (Black) vs Alpha-Beta (White) : B+25.5 +Game over. Result: B+0.5 +RootParallelization (Black) vs MonteCarloUCT (White) : B+0.5 +Game over. Result: B+11.5 +RootParallelization (Black) vs MonteCarloUCT (White) : B+11.5 +Game over. Result: W+0.5 +RootParallelization (Black) vs MonteCarloUCT (White) : W+0.5 +Game over. Result: B+1.5 +RootParallelization (Black) vs MonteCarloUCT (White) : B+1.5 +Game over. Result: B+0.5 +RootParallelization (Black) vs MonteCarloUCT (White) : B+0.5 +Game over. Result: W+22.5 +RootParallelization (Black) vs UCT-RAVE (White) : W+22.5 +Game over. Result: W+63.5 +RootParallelization (Black) vs UCT-RAVE (White) : W+63.5 +Game over. Result: W+29.5 +RootParallelization (Black) vs UCT-RAVE (White) : W+29.5 +Game over. Result: W+58.5 +RootParallelization (Black) vs UCT-RAVE (White) : W+58.5 +Game over. Result: W+30.5 +RootParallelization (Black) vs UCT-RAVE (White) : W+30.5 +Game over. Result: W+15.5 +RootParallelization (Black) vs MonteCarloSMAF (White) : W+15.5 +Game over. Result: W+62.5 +RootParallelization (Black) vs MonteCarloSMAF (White) : W+62.5 +Game over. Result: W+57.5 +RootParallelization (Black) vs MonteCarloSMAF (White) : W+57.5 +Game over. Result: W+57.5 +RootParallelization (Black) vs MonteCarloSMAF (White) : W+57.5 +Game over. Result: W+12.5 +RootParallelization (Black) vs MonteCarloSMAF (White) : W+12.5 +Game over. Result: B+2.5 +RootParallelization (Black) vs RootParallelization (White) : B+2.5 +Game over. Result: W+6.5 +RootParallelization (Black) vs RootParallelization (White) : W+6.5 +Game over. Result: B+2.5 +RootParallelization (Black) vs RootParallelization (White) : B+2.5 +Game over. Result: W+5.5 +RootParallelization (Black) vs RootParallelization (White) : W+5.5 +Game over. Result: B+2.5 +RootParallelization (Black) vs RootParallelization (White) : B+2.5 +Game over. Result: W+8.5 +RootParallelization (Black) vs RootParallelization-NeuralNet (White) : W+8.5 +Game over. Result: W+6.5 +RootParallelization (Black) vs RootParallelization-NeuralNet (White) : W+6.5 +Game over. Result: W+6.5 +RootParallelization (Black) vs RootParallelization-NeuralNet (White) : W+6.5 +Game over. Result: B+3.5 +RootParallelization (Black) vs RootParallelization-NeuralNet (White) : B+3.5 +Game over. Result: W+13.5 +RootParallelization (Black) vs RootParallelization-NeuralNet (White) : W+13.5 + +Tournament Win Rates +==================== +RootParallelization (Black) vs Random (White) : 100% +RootParallelization (Black) vs Alpha-Beta (White) : 60% +RootParallelization (Black) vs MonteCarloUCT (White) : 80% +RootParallelization (Black) vs UCT-RAVE (White) : 00% +RootParallelization (Black) vs MonteCarloSMAF (White) : 00% +RootParallelization (Black) vs RootParallelization (White) : 60% +RootParallelization (Black) vs RootParallelization-NeuralNet (White) : 20% +Tournament lasted 1367.523 seconds. diff --git a/rrt/rrt.smaf.black.txt b/rrt/rrt.smaf.black.txt new file mode 100644 index 0000000..9af9844 --- /dev/null +++ b/rrt/rrt.smaf.black.txt @@ -0,0 +1,85 @@ + +C:\workspace\msproj\dist>java -Xms256m -Xmx4096m -cp GoGame.jar;antlrworks-1.4.3.jar;kgsGtp.jar;log4j-1.2.16.jar net.woodyfolsom.msproj.RoundRobin +Beginning round-robin tournament. +Initializing policies... +Game over. Result: B+8.5 +UCT-RAVE (Black) vs Random (White) : B+8.5 +Game over. Result: B+31.5 +UCT-RAVE (Black) vs Random (White) : B+31.5 +Game over. Result: B+16.5 +UCT-RAVE (Black) vs Random (White) : B+16.5 +Game over. Result: B+9.5 +UCT-RAVE (Black) vs Random (White) : B+9.5 +Game over. Result: B+16.5 +UCT-RAVE (Black) vs Random (White) : B+16.5 +Game over. Result: B+48.5 +UCT-RAVE (Black) vs Alpha-Beta (White) : B+48.5 +Game over. Result: W+5.5 +UCT-RAVE (Black) vs Alpha-Beta (White) : W+5.5 +Game over. Result: B+13.5 +UCT-RAVE (Black) vs Alpha-Beta (White) : B+13.5 +Game over. Result: B+34.5 +UCT-RAVE (Black) vs Alpha-Beta (White) : B+34.5 +Game over. Result: B+1.5 +UCT-RAVE (Black) vs Alpha-Beta (White) : B+1.5 +Game over. Result: B+2.5 +UCT-RAVE (Black) vs MonteCarloUCT (White) : B+2.5 +Game over. Result: B+7.5 +UCT-RAVE (Black) vs MonteCarloUCT (White) : B+7.5 +Game over. Result: W+4.5 +UCT-RAVE (Black) vs MonteCarloUCT (White) : W+4.5 +Game over. Result: B+3.5 +UCT-RAVE (Black) vs MonteCarloUCT (White) : B+3.5 +Game over. Result: B+6.5 +UCT-RAVE (Black) vs MonteCarloUCT (White) : B+6.5 +Game over. Result: B+3.5 +UCT-RAVE (Black) vs UCT-RAVE (White) : B+3.5 +Game over. Result: B+2.5 +UCT-RAVE (Black) vs UCT-RAVE (White) : B+2.5 +Game over. Result: B+9.5 +UCT-RAVE (Black) vs UCT-RAVE (White) : B+9.5 +Game over. Result: B+0.5 +UCT-RAVE (Black) vs UCT-RAVE (White) : B+0.5 +Game over. Result: W+13.5 +UCT-RAVE (Black) vs UCT-RAVE (White) : W+13.5 +Game over. Result: W+0.5 +UCT-RAVE (Black) vs MonteCarloSMAF (White) : W+0.5 +Game over. Result: B+1.5 +UCT-RAVE (Black) vs MonteCarloSMAF (White) : B+1.5 +Game over. Result: W+0.5 +UCT-RAVE (Black) vs MonteCarloSMAF (White) : W+0.5 +Game over. Result: B+9.5 +UCT-RAVE (Black) vs MonteCarloSMAF (White) : B+9.5 +Game over. Result: W+20.5 +UCT-RAVE (Black) vs MonteCarloSMAF (White) : W+20.5 +Game over. Result: B+13.5 +UCT-RAVE (Black) vs RootParallelization (White) : B+13.5 +Game over. Result: W+16.5 +UCT-RAVE (Black) vs RootParallelization (White) : W+16.5 +Game over. Result: B+28.5 +UCT-RAVE (Black) vs RootParallelization (White) : B+28.5 +Game over. Result: B+25.5 +UCT-RAVE (Black) vs RootParallelization (White) : B+25.5 +Game over. Result: B+25.5 +UCT-RAVE (Black) vs RootParallelization (White) : B+25.5 +Game over. Result: B+48.5 +UCT-RAVE (Black) vs RootParallelization-NeuralNet (White) : B+48.5 +Game over. Result: B+6.5 +UCT-RAVE (Black) vs RootParallelization-NeuralNet (White) : B+6.5 +Game over. Result: B+9.5 +UCT-RAVE (Black) vs RootParallelization-NeuralNet (White) : B+9.5 +Game over. Result: B+55.5 +UCT-RAVE (Black) vs RootParallelization-NeuralNet (White) : B+55.5 +Game over. Result: B+42.5 +UCT-RAVE (Black) vs RootParallelization-NeuralNet (White) : B+42.5 + +Tournament Win Rates +==================== +UCT-RAVE (Black) vs Random (White) : 100% +UCT-RAVE (Black) vs Alpha-Beta (White) : 80% +UCT-RAVE (Black) vs MonteCarloUCT (White) : 80% +UCT-RAVE (Black) vs UCT-RAVE (White) : 80% +UCT-RAVE (Black) vs MonteCarloSMAF (White) : 40% +UCT-RAVE (Black) vs RootParallelization (White) : 80% +UCT-RAVE (Black) vs RootParallelization-NeuralNet (White) : 100% +Tournament lasted 1458.494 seconds. diff --git a/rrt/rrt.uct.black b/rrt/rrt.uct.black new file mode 100644 index 0000000..417fbd6 --- /dev/null +++ b/rrt/rrt.uct.black @@ -0,0 +1,85 @@ + +C:\workspace\msproj\dist>java -Xms256m -Xmx4096m -cp GoGame.jar;antlrworks-1.4.3.jar;kgsGtp.jar;log4j-1.2.16.jar net.woodyfolsom.msproj.RoundRobin +Beginning round-robin tournament. +Initializing policies... +Game over. Result: B+0.5 +MonteCarloUCT (Black) vs Random (White) : B+0.5 +Game over. Result: B+9.5 +MonteCarloUCT (Black) vs Random (White) : B+9.5 +Game over. Result: B+24.5 +MonteCarloUCT (Black) vs Random (White) : B+24.5 +Game over. Result: B+10.5 +MonteCarloUCT (Black) vs Random (White) : B+10.5 +Game over. Result: B+4.5 +MonteCarloUCT (Black) vs Random (White) : B+4.5 +Game over. Result: B+15.5 +MonteCarloUCT (Black) vs Alpha-Beta (White) : B+15.5 +Game over. Result: B+22.5 +MonteCarloUCT (Black) vs Alpha-Beta (White) : B+22.5 +Game over. Result: B+32.5 +MonteCarloUCT (Black) vs Alpha-Beta (White) : B+32.5 +Game over. Result: W+12.5 +MonteCarloUCT (Black) vs Alpha-Beta (White) : W+12.5 +Game over. Result: B+23.5 +MonteCarloUCT (Black) vs Alpha-Beta (White) : B+23.5 +Game over. Result: B+0.5 +MonteCarloUCT (Black) vs MonteCarloUCT (White) : B+0.5 +Game over. Result: W+13.5 +MonteCarloUCT (Black) vs MonteCarloUCT (White) : W+13.5 +Game over. Result: W+11.5 +MonteCarloUCT (Black) vs MonteCarloUCT (White) : W+11.5 +Game over. Result: W+8.5 +MonteCarloUCT (Black) vs MonteCarloUCT (White) : W+8.5 +Game over. Result: W+9.5 +MonteCarloUCT (Black) vs MonteCarloUCT (White) : W+9.5 +Game over. Result: W+9.5 +MonteCarloUCT (Black) vs UCT-RAVE (White) : W+9.5 +Game over. Result: W+16.5 +MonteCarloUCT (Black) vs UCT-RAVE (White) : W+16.5 +Game over. Result: W+8.5 +MonteCarloUCT (Black) vs UCT-RAVE (White) : W+8.5 +Game over. Result: W+11.5 +MonteCarloUCT (Black) vs UCT-RAVE (White) : W+11.5 +Game over. Result: W+5.5 +MonteCarloUCT (Black) vs UCT-RAVE (White) : W+5.5 +Game over. Result: W+8.5 +MonteCarloUCT (Black) vs MonteCarloSMAF (White) : W+8.5 +Game over. Result: W+9.5 +MonteCarloUCT (Black) vs MonteCarloSMAF (White) : W+9.5 +Game over. Result: W+15.5 +MonteCarloUCT (Black) vs MonteCarloSMAF (White) : W+15.5 +Game over. Result: W+14.5 +MonteCarloUCT (Black) vs MonteCarloSMAF (White) : W+14.5 +Game over. Result: W+13.5 +MonteCarloUCT (Black) vs MonteCarloSMAF (White) : W+13.5 +Game over. Result: W+15.5 +MonteCarloUCT (Black) vs RootParallelization (White) : W+15.5 +Game over. Result: W+14.5 +MonteCarloUCT (Black) vs RootParallelization (White) : W+14.5 +Game over. Result: W+6.5 +MonteCarloUCT (Black) vs RootParallelization (White) : W+6.5 +Game over. Result: W+6.5 +MonteCarloUCT (Black) vs RootParallelization (White) : W+6.5 +Game over. Result: W+11.5 +MonteCarloUCT (Black) vs RootParallelization (White) : W+11.5 +Game over. Result: W+26.5 +MonteCarloUCT (Black) vs RootParallelization-NeuralNet (White) : W+26.5 +Game over. Result: W+11.5 +MonteCarloUCT (Black) vs RootParallelization-NeuralNet (White) : W+11.5 +Game over. Result: W+47.5 +MonteCarloUCT (Black) vs RootParallelization-NeuralNet (White) : W+47.5 +Game over. Result: W+13.5 +MonteCarloUCT (Black) vs RootParallelization-NeuralNet (White) : W+13.5 +Game over. Result: B+33.5 +MonteCarloUCT (Black) vs RootParallelization-NeuralNet (White) : B+33.5 + +Tournament Win Rates +==================== +MonteCarloUCT (Black) vs Random (White) : 100% +MonteCarloUCT (Black) vs Alpha-Beta (White) : 80% +MonteCarloUCT (Black) vs MonteCarloUCT (White) : 20% +MonteCarloUCT (Black) vs UCT-RAVE (White) : 00% +MonteCarloUCT (Black) vs MonteCarloSMAF (White) : 00% +MonteCarloUCT (Black) vs RootParallelization (White) : 00% +MonteCarloUCT (Black) vs RootParallelization-NeuralNet (White) : 20% +Tournament lasted 1355.668 seconds. diff --git a/src/net/woodyfolsom/msproj/GameRecord.java b/src/net/woodyfolsom/msproj/GameRecord.java index 8202d5e..ba719c0 100644 --- a/src/net/woodyfolsom/msproj/GameRecord.java +++ b/src/net/woodyfolsom/msproj/GameRecord.java @@ -59,6 +59,14 @@ public class GameRecord { return gameStates.get(0).getGameConfig(); } + /** + * Gets the game state for the most recent ply. + * @return + */ + public GameState getGameState() { + return gameStates.get(getNumTurns()); + } + public GameState getGameState(Integer turn) { return gameStates.get(turn); } diff --git a/src/net/woodyfolsom/msproj/GameState.java b/src/net/woodyfolsom/msproj/GameState.java index cc1107e..7de170a 100644 --- a/src/net/woodyfolsom/msproj/GameState.java +++ b/src/net/woodyfolsom/msproj/GameState.java @@ -119,6 +119,14 @@ public class GameState { return whitePrisoners; } + public boolean isPrevPlyPass() { + if (moveHistory.size() == 0) { + return false; + } else { + return moveHistory.get(moveHistory.size()-1).isPass(); + } + } + public boolean isSelfFill(Action action, Player player) { return gameBoard.isSelfFill(action, player); } diff --git a/src/net/woodyfolsom/msproj/GoGame.java b/src/net/woodyfolsom/msproj/GoGame.java index 1dcffcb..ac10198 100644 --- a/src/net/woodyfolsom/msproj/GoGame.java +++ b/src/net/woodyfolsom/msproj/GoGame.java @@ -16,6 +16,7 @@ import net.woodyfolsom.msproj.policy.Minimax; import net.woodyfolsom.msproj.policy.MonteCarloUCT; import net.woodyfolsom.msproj.policy.Policy; import net.woodyfolsom.msproj.policy.RandomMovePolicy; +import net.woodyfolsom.msproj.policy.RootParAMAF; import org.apache.log4j.Logger; import org.apache.log4j.xml.DOMConfigurator; @@ -80,10 +81,11 @@ public class GoGame implements Runnable { public static void main(String[] args) throws IOException { configureLogging(); if (args.length == 0) { - Policy defaultMoveGenerator = new MonteCarloUCT(new RandomMovePolicy(), 5000L); - LOGGER.info("No MoveGenerator specified. Using default: " + defaultMoveGenerator.toString()); + Policy policy = new RootParAMAF(4, 10000L); + policy.setLogging(true); + LOGGER.info("No MoveGenerator specified. Using default: " + policy.getName()); - GoGame goGame = new GoGame(defaultMoveGenerator, PROPS_FILE); + GoGame goGame = new GoGame(policy, PROPS_FILE); new Thread(goGame).start(); System.out.println("Creating GtpClient"); @@ -111,7 +113,9 @@ public class GoGame implements Runnable { } else if ("alphabeta".equals(policyName)) { return new AlphaBeta(); } else if ("montecarlo".equals(policyName)) { - return new MonteCarloUCT(new RandomMovePolicy(), 5000L); + return new MonteCarloUCT(new RandomMovePolicy(), 10000L); + } else if ("root_par_amaf".equals(policyName)) { + return new RootParAMAF(4, 10000L); } else { LOGGER.info("Unable to create Policy for unsupported name: " + policyName); System.exit(INVALID_MOVE_GENERATOR); diff --git a/src/net/woodyfolsom/msproj/RoundRobin.java b/src/net/woodyfolsom/msproj/RoundRobin.java new file mode 100644 index 0000000..3209e4d --- /dev/null +++ b/src/net/woodyfolsom/msproj/RoundRobin.java @@ -0,0 +1,115 @@ +package net.woodyfolsom.msproj; + +import java.io.IOException; +import java.text.DecimalFormat; +import java.util.ArrayList; +import java.util.List; + +import net.woodyfolsom.msproj.policy.AlphaBeta; +import net.woodyfolsom.msproj.policy.MonteCarloAMAF; +import net.woodyfolsom.msproj.policy.MonteCarloSMAF; +import net.woodyfolsom.msproj.policy.MonteCarloUCT; +import net.woodyfolsom.msproj.policy.NeuralNetPolicy; +import net.woodyfolsom.msproj.policy.Policy; +import net.woodyfolsom.msproj.policy.RandomMovePolicy; +import net.woodyfolsom.msproj.policy.RootParAMAF; +import net.woodyfolsom.msproj.policy.RootParallelization; + +public class RoundRobin { + public static final int EXIT_USER_QUIT = 1; + public static final int EXIT_NOMINAL = 0; + public static final int EXIT_IO_EXCEPTION = -1; + + public static void main(String[] args) throws IOException { + long startTime = System.currentTimeMillis(); + + System.out.println("Beginning round-robin tournament."); + System.out.println("Initializing policies..."); + List policies = new ArrayList(); + + policies.add(new RandomMovePolicy()); + //policies.add(new Minimax(1)); + policies.add(new AlphaBeta(1)); + policies.add(new MonteCarloUCT(new RandomMovePolicy(), 500L)); + policies.add(new MonteCarloAMAF(new RandomMovePolicy(), 500L)); + policies.add(new MonteCarloSMAF(new RandomMovePolicy(), 500L, 4)); + policies.add(new RootParAMAF(4, 500L)); + policies.add(new RootParallelization(4, new NeuralNetPolicy(), 500L)); + + RoundRobin rr = new RoundRobin(); + + List> tourneyWinRates = new ArrayList>(); + + int gamesPerMatch = 5; + + for (int i = 0; i < policies.size(); i++) { + List roundWinRates = new ArrayList(); + if (i != 5) { + tourneyWinRates.add(roundWinRates); + continue; + } + for (int j = 0; j < policies.size(); j++) { + Policy policy1 = policies.get(i); + policy1.setLogging(false); + Policy policy2 = policies.get(j); + policy2.setLogging(false); + + List gameResults = rr.playGame(policy1, policy2, 9, 6.5, gamesPerMatch, false, false, false); + + double wins = 0.0; + double games = 0.0; + for(GameResult gr : gameResults) { + wins += gr.isWinner(Player.BLACK) ? 1.0 : 0.0; + games += 1.0; + } + roundWinRates.add(100.0 * wins / games); + } + tourneyWinRates.add(roundWinRates); + } + + System.out.println(""); + System.out.println("Tournament Win Rates"); + System.out.println("===================="); + + DecimalFormat df = new DecimalFormat("00.#"); + for (int i = 0; i < policies.size(); i++) { + for (int j = 0; j < policies.size(); j++) { + if (i == 5) + System.out.println(policies.get(i).getName() + " (Black) vs " + policies.get(j).getName() + " (White) : " + df.format(tourneyWinRates.get(i).get(j)) + "%"); + } + } + + long endTime = System.currentTimeMillis(); + System.out.println("Tournament lasted " + (endTime-startTime)/1000.0 + " seconds."); + } + + public List playGame(Policy player1Policy, Policy player2Policy, int size, + double komi, int rounds, boolean showSpectatorBoard, + boolean blackMoveLogged, boolean whiteMoveLogged) { + + GameConfig gameConfig = new GameConfig(size); + gameConfig.setKomi(komi); + + Referee referee = new Referee(); + referee.setPolicy(Player.BLACK, player1Policy); + referee.setPolicy(Player.WHITE, player2Policy); + + List roundResults = new ArrayList(); + + boolean logGameRecords = false; + + int gameNo = 1; + + for (int round = 0; round < rounds; round++) { + gameNo++; + GameResult gameResult = referee.play(gameConfig, gameNo, + showSpectatorBoard, logGameRecords); + roundResults.add(gameResult); + + System.out.println(player1Policy.getName() + " (Black) vs " + + player2Policy.getName() + " (White) : " + gameResult); + roundResults.add(gameResult); + } + return roundResults; + } +} \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/StandAloneGame.java b/src/net/woodyfolsom/msproj/StandAloneGame.java index cba3eb2..0528a39 100644 --- a/src/net/woodyfolsom/msproj/StandAloneGame.java +++ b/src/net/woodyfolsom/msproj/StandAloneGame.java @@ -17,6 +17,7 @@ import net.woodyfolsom.msproj.policy.MonteCarloSMAF; import net.woodyfolsom.msproj.policy.MonteCarloUCT; import net.woodyfolsom.msproj.policy.Policy; import net.woodyfolsom.msproj.policy.RandomMovePolicy; +import net.woodyfolsom.msproj.policy.RootParAMAF; import net.woodyfolsom.msproj.policy.RootParallelization; public class StandAloneGame { @@ -27,13 +28,13 @@ public class StandAloneGame { private int gameNo = 0; enum PLAYER_TYPE { - HUMAN, HUMAN_GUI, ROOT_PAR, UCT, RANDOM, RAVE, SMAF + HUMAN, HUMAN_GUI, ROOT_PAR, UCT, RANDOM, RAVE, SMAF, ROOT_PAR_AMAF }; public static void main(String[] args) throws IOException { try { GameSettings gameSettings = GameSettings - .createGameSetings("data/gogame.cfg"); + .createGameSetings("gogame.cfg"); System.out.println("Game Settings: " + gameSettings); System.out.println("Successfully parsed game settings."); new StandAloneGame().playGame( @@ -68,6 +69,8 @@ public class StandAloneGame { return PLAYER_TYPE.RAVE; } else if ("SMAF".equalsIgnoreCase(playerTypeStr)) { return PLAYER_TYPE.SMAF; + } else if ("ROOT_PAR_AMAF".equalsIgnoreCase(playerTypeStr)) { + return PLAYER_TYPE.ROOT_PAR_AMAF; } else { throw new RuntimeException("Unknown player type: " + playerTypeStr); } @@ -186,11 +189,14 @@ public class StandAloneGame { case ROOT_PAR: policy = new RootParallelization(4, turnLength); break; + case ROOT_PAR_AMAF: + policy = new RootParAMAF(4, turnLength); + break; case UCT: policy = new MonteCarloUCT(new RandomMovePolicy(), turnLength); break; case SMAF: - policy = new MonteCarloSMAF(new RandomMovePolicy(), turnLength, 0); + policy = new MonteCarloSMAF(new RandomMovePolicy(), turnLength, 4); break; case RANDOM: policy = new RandomMovePolicy(); diff --git a/src/net/woodyfolsom/msproj/ann/Connection.java b/src/net/woodyfolsom/msproj/ann/Connection.java index 0cb4fd0..7951460 100644 --- a/src/net/woodyfolsom/msproj/ann/Connection.java +++ b/src/net/woodyfolsom/msproj/ann/Connection.java @@ -31,11 +31,6 @@ public class Connection { return dest; } - //@XmlTransient - //public double getLastDelta() { - // return lastDelta; - //} - @XmlAttribute public int getSrc() { return src; diff --git a/src/net/woodyfolsom/msproj/ann/FusekiFilterTrainer.java b/src/net/woodyfolsom/msproj/ann/FusekiFilterTrainer.java new file mode 100644 index 0000000..15f0610 --- /dev/null +++ b/src/net/woodyfolsom/msproj/ann/FusekiFilterTrainer.java @@ -0,0 +1,292 @@ +package net.woodyfolsom.msproj.ann; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import net.woodyfolsom.msproj.Action; +import net.woodyfolsom.msproj.GameConfig; +import net.woodyfolsom.msproj.GameRecord; +import net.woodyfolsom.msproj.GameResult; +import net.woodyfolsom.msproj.GameState; +import net.woodyfolsom.msproj.Player; +import net.woodyfolsom.msproj.policy.NeuralNetPolicy; +import net.woodyfolsom.msproj.policy.Policy; +import net.woodyfolsom.msproj.policy.RandomMovePolicy; +import net.woodyfolsom.msproj.tictactoe.NNDataSetFactory; + +public class FusekiFilterTrainer { // implements epsilon-greedy trainer? online + // version of NeuralNetFilter + + private boolean training = true; + + public static void main(String[] args) throws IOException { + double alpha = 0.50; + double lambda = 0.90; + int maxGames = 1000; + + new FusekiFilterTrainer().trainNetwork(alpha, lambda, maxGames); + } + + public void trainNetwork(double alpha, double lambda, int maxGames) + throws IOException { + + FeedforwardNetwork neuralNetwork; + + GameConfig gameConfig = new GameConfig(9); + + if (training) { + neuralNetwork = new MultiLayerPerceptron(true, 81, 18, 1); + neuralNetwork.setName("FusekiFilter" + gameConfig.getSize()); + neuralNetwork.initWeights(); + TrainingMethod trainer = new TemporalDifference(alpha, lambda); + + System.out.println("Playing untrained games."); + + for (int i = 0; i < 10; i++) { + GameRecord gameRecord = new GameRecord(gameConfig); + System.out.println("" + (i + 1) + ". " + + playOptimal(neuralNetwork, gameRecord).getResult()); + } + + System.out.println("Learning from " + maxGames + + " games of random self-play"); + + int gamesPlayed = 0; + List results = new ArrayList(); + do { + GameRecord gameRecord = new GameRecord(gameConfig); + playEpsilonGreedy(0.50, neuralNetwork, trainer, gameRecord); + System.out.println("Winner: " + gameRecord.getResult()); + gamesPlayed++; + results.add(gameRecord.getResult()); + } while (gamesPlayed < maxGames); + + System.out.println("Results of every 10th training game:"); + + for (int i = 0; i < results.size(); i++) { + if (i % 10 == 0) { + System.out.println("" + (i + 1) + ". " + results.get(i)); + } + } + + System.out.println("Learned network after " + maxGames + + " training games."); + } else { + System.out.println("Loading TicTacToe network from file."); + neuralNetwork = new MultiLayerPerceptron(); + FileInputStream fis = new FileInputStream(new File("pass.net")); + if (!new MultiLayerPerceptron().load(fis)) { + System.out.println("Error loading pass.net from file."); + return; + } + fis.close(); + } + + evalTestCases(gameConfig, neuralNetwork); + + System.out.println("Playing optimal games."); + List gameResults = new ArrayList(); + for (int i = 0; i < 10; i++) { + GameRecord gameRecord = new GameRecord(gameConfig); + gameResults.add(playOptimal(neuralNetwork, gameRecord).getResult()); + } + + boolean suboptimalPlay = false; + System.out.println("Optimal game summary: "); + for (int i = 0; i < gameResults.size(); i++) { + GameResult result = gameResults.get(i); + System.out.println("" + (i + 1) + ". " + result); + } + + File output = new File("pass.net"); + + FileOutputStream fos = new FileOutputStream(output); + + neuralNetwork.save(fos); + + System.out.println("Playing optimal vs random games."); + for (int i = 0; i < 10; i++) { + GameRecord gameRecord = new GameRecord(gameConfig); + System.out.println("" + + (i + 1) + + ". " + + playOptimalVsRandom(neuralNetwork, gameRecord) + .getResult()); + } + + if (suboptimalPlay) { + System.out.println("Suboptimal play detected!"); + } + } + + private double[] createBoard(GameConfig gameConfig, Action... actions) { + GameRecord gameRec = new GameRecord(gameConfig); + for (Action action : actions) { + gameRec.play(gameRec.getPlayerToMove(), action); + } + return NNDataSetFactory.createDataPair(gameRec.getGameState(), FusekiFilterTrainer.class).getInput().getValues(); + } + + private void evalTestCases(GameConfig gameConfig, FeedforwardNetwork neuralNetwork) { + double[][] validationSet = new double[1][]; + + // start state: black has 0, white has 0 + komi, neither has passed + validationSet[0] = createBoard(gameConfig, Action.getInstance("C3")); + + String[] inputNames = NNDataSetFactory.getInputFields(FusekiFilterTrainer.class); + String[] outputNames = NNDataSetFactory.getOutputFields(FusekiFilterTrainer.class); + + System.out.println("Output from eval set (learned network):"); + testNetwork(neuralNetwork, validationSet, inputNames, outputNames); + } + + private GameRecord playOptimalVsRandom(FeedforwardNetwork neuralNetwork, + GameRecord gameRecord) { + NeuralNetPolicy neuralNetPolicy = new NeuralNetPolicy(); + neuralNetPolicy.setMoveFilter(neuralNetwork); + + Policy randomPolicy = new RandomMovePolicy(); + + GameConfig gameConfig = gameRecord.getGameConfig(); + GameState gameState = gameRecord.getGameState(); + + Policy[] policies = new Policy[] { neuralNetPolicy, randomPolicy }; + int turnNo = 0; + do { + Action action; + GameState nextState; + + Player playerToMove = gameState.getPlayerToMove(); + action = policies[turnNo % 2].getAction(gameConfig, gameState, + playerToMove); + + if (!gameRecord.play(playerToMove, action)) { + throw new RuntimeException("Illegal move: " + action); + } + + nextState = gameRecord.getGameState(); + + //System.out.println("Action " + action + " selected by policy " + // + policies[turnNo % 2].getName()); + //System.out.println("Next board state: " + nextState); + gameState = nextState; + turnNo++; + } while (!gameState.isTerminal()); + return gameRecord; + } + + private GameRecord playOptimal(FeedforwardNetwork neuralNetwork, + GameRecord gameRecord) { + + NeuralNetPolicy neuralNetPolicy = new NeuralNetPolicy(); + neuralNetPolicy.setMoveFilter(neuralNetwork); + + if (gameRecord.getNumTurns() > 0) { + throw new RuntimeException( + "PlayOptimal requires a new GameRecord with no turns played."); + } + + GameState gameState; + + do { + Action action; + GameState nextState; + + Player playerToMove = gameRecord.getPlayerToMove(); + action = neuralNetPolicy.getAction(gameRecord.getGameConfig(), + gameRecord.getGameState(), playerToMove); + + if (!gameRecord.play(playerToMove, action)) { + throw new RuntimeException("Invalid move played: " + action); + } + nextState = gameRecord.getGameState(); + + //System.out.println("Action " + action + " selected by policy " + // + neuralNetPolicy.getName()); + //System.out.println("Next board state: " + nextState); + gameState = nextState; + } while (!gameState.isTerminal()); + + return gameRecord; + } + + private GameRecord playEpsilonGreedy(double epsilon, + FeedforwardNetwork neuralNetwork, TrainingMethod trainer, + GameRecord gameRecord) { + Policy randomPolicy = new RandomMovePolicy(); + NeuralNetPolicy neuralNetPolicy = new NeuralNetPolicy(); + neuralNetPolicy.setMoveFilter(neuralNetwork); + + if (gameRecord.getNumTurns() > 0) { + throw new RuntimeException( + "PlayOptimal requires a new GameRecord with no turns played."); + } + + GameState gameState = gameRecord.getGameState(); + NNDataPair statePair; + + Policy selectedPolicy; + trainer.zeroTraces(neuralNetwork); + + do { + Action action; + GameState nextState; + + Player playerToMove = gameRecord.getPlayerToMove(); + + if (Math.random() < epsilon) { + selectedPolicy = randomPolicy; + action = selectedPolicy + .getAction(gameRecord.getGameConfig(), + gameRecord.getGameState(), + gameRecord.getPlayerToMove()); + if (!gameRecord.play(playerToMove, action)) { + throw new RuntimeException("Illegal move played: " + action); + } + nextState = gameRecord.getGameState(); + } else { + selectedPolicy = neuralNetPolicy; + action = selectedPolicy + .getAction(gameRecord.getGameConfig(), + gameRecord.getGameState(), + gameRecord.getPlayerToMove()); + + if (!gameRecord.play(playerToMove, action)) { + throw new RuntimeException("Illegal move played: " + action); + } + nextState = gameRecord.getGameState(); + + statePair = NNDataSetFactory.createDataPair(gameState, FusekiFilterTrainer.class); + NNDataPair nextStatePair = NNDataSetFactory + .createDataPair(nextState, FusekiFilterTrainer.class); + + trainer.iteratePattern(neuralNetwork, statePair, + nextStatePair.getIdeal()); + } + + gameState = nextState; + } while (!gameState.isTerminal()); + + // finally, reinforce the actual reward + statePair = NNDataSetFactory.createDataPair(gameState, FusekiFilterTrainer.class); + trainer.iteratePattern(neuralNetwork, statePair, statePair.getIdeal()); + + return gameRecord; + } + + private void testNetwork(FeedforwardNetwork neuralNetwork, + double[][] validationSet, String[] inputNames, String[] outputNames) { + for (int valIndex = 0; valIndex < validationSet.length; valIndex++) { + NNDataPair dp = new NNDataPair(new NNData(inputNames, + validationSet[valIndex]), new NNData(outputNames, + new double[] { 0.0 })); + System.out.println(dp); + System.out.println(" => "); + System.out.println(neuralNetwork.compute(dp)); + } + } +} \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/ann/MultiLayerPerceptron.java b/src/net/woodyfolsom/msproj/ann/MultiLayerPerceptron.java index 6f9c8f4..1871a2f 100644 --- a/src/net/woodyfolsom/msproj/ann/MultiLayerPerceptron.java +++ b/src/net/woodyfolsom/msproj/ann/MultiLayerPerceptron.java @@ -148,7 +148,7 @@ public class MultiLayerPerceptron extends FeedforwardNetwork { Marshaller m = jc.createMarshaller(); m.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true); m.marshal(this, os); - m.marshal(this, System.out); + //m.marshal(this, System.out); return true; } catch (JAXBException je) { je.printStackTrace(); diff --git a/src/net/woodyfolsom/msproj/ann/PassFilterTrainer.java b/src/net/woodyfolsom/msproj/ann/PassFilterTrainer.java new file mode 100644 index 0000000..4068a07 --- /dev/null +++ b/src/net/woodyfolsom/msproj/ann/PassFilterTrainer.java @@ -0,0 +1,298 @@ +package net.woodyfolsom.msproj.ann; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import net.woodyfolsom.msproj.Action; +import net.woodyfolsom.msproj.GameConfig; +import net.woodyfolsom.msproj.GameRecord; +import net.woodyfolsom.msproj.GameResult; +import net.woodyfolsom.msproj.GameState; +import net.woodyfolsom.msproj.Player; +import net.woodyfolsom.msproj.policy.NeuralNetPolicy; +import net.woodyfolsom.msproj.policy.Policy; +import net.woodyfolsom.msproj.policy.RandomMovePolicy; +import net.woodyfolsom.msproj.tictactoe.NNDataSetFactory; + +public class PassFilterTrainer { // implements epsilon-greedy trainer? online + // version of NeuralNetFilter + + private boolean training = true; + + public static void main(String[] args) throws IOException { + double alpha = 0.50; + double lambda = 0.1; + int maxGames = 1500; + + new PassFilterTrainer().trainNetwork(alpha, lambda, maxGames); + } + + public void trainNetwork(double alpha, double lambda, int maxGames) + throws IOException { + + FeedforwardNetwork neuralNetwork; + + GameConfig gameConfig = new GameConfig(9); + + if (training) { + neuralNetwork = new MultiLayerPerceptron(true, 2, 2, 1); + neuralNetwork.setName("PassFilter" + gameConfig.getSize()); + neuralNetwork.initWeights(); + TrainingMethod trainer = new TemporalDifference(alpha, lambda); + + System.out.println("Playing untrained games."); + + for (int i = 0; i < 10; i++) { + GameRecord gameRecord = new GameRecord(gameConfig); + System.out.println("" + (i + 1) + ". " + + playOptimal(neuralNetwork, gameRecord).getResult()); + } + + System.out.println("Learning from " + maxGames + + " games of random self-play"); + + int gamesPlayed = 0; + List results = new ArrayList(); + do { + GameRecord gameRecord = new GameRecord(gameConfig); + playEpsilonGreedy(0.5, neuralNetwork, trainer, gameRecord); + System.out.println("Winner: " + gameRecord.getResult()); + gamesPlayed++; + results.add(gameRecord.getResult()); + } while (gamesPlayed < maxGames); + + System.out.println("Results of every 10th training game:"); + + for (int i = 0; i < results.size(); i++) { + if (i % 10 == 0) { + System.out.println("" + (i + 1) + ". " + results.get(i)); + } + } + + System.out.println("Learned network after " + maxGames + + " training games."); + } else { + System.out.println("Loading TicTacToe network from file."); + neuralNetwork = new MultiLayerPerceptron(); + FileInputStream fis = new FileInputStream(new File("pass.net")); + if (!new MultiLayerPerceptron().load(fis)) { + System.out.println("Error loading pass.net from file."); + return; + } + fis.close(); + } + + evalTestCases(neuralNetwork); + + System.out.println("Playing optimal games."); + List gameResults = new ArrayList(); + for (int i = 0; i < 10; i++) { + GameRecord gameRecord = new GameRecord(gameConfig); + gameResults.add(playOptimal(neuralNetwork, gameRecord).getResult()); + } + + boolean suboptimalPlay = false; + System.out.println("Optimal game summary: "); + for (int i = 0; i < gameResults.size(); i++) { + GameResult result = gameResults.get(i); + System.out.println("" + (i + 1) + ". " + result); + } + + File output = new File("pass.net"); + + FileOutputStream fos = new FileOutputStream(output); + + neuralNetwork.save(fos); + + System.out.println("Playing optimal vs random games."); + for (int i = 0; i < 10; i++) { + GameRecord gameRecord = new GameRecord(gameConfig); + System.out.println("" + + (i + 1) + + ". " + + playOptimalVsRandom(neuralNetwork, gameRecord) + .getResult()); + } + + if (suboptimalPlay) { + System.out.println("Suboptimal play detected!"); + } + } + + private void evalTestCases(FeedforwardNetwork neuralNetwork) { + double[][] validationSet = new double[4][]; + + //losing, opponent did not pass + //don't pass + //(0.0 1.0 0.0) => 0.0 + validationSet[0] = new double[] { -1.0, -1.0 }; + + //winning, opponent did not pass + //maybe pass? + //(1.0 0.0 0.0) => ? + validationSet[1] = new double[] { 1.0, -1.0 }; + + //winning, opponent passed + //pass! + //(1.0 0.0 1.0) => 1.0 + validationSet[2] = new double[] { 1.0, 1.0 }; + + //losing, opponent passed + //don't pass! + //(0.0 1.0 1.0) => 1.0 + validationSet[3] = new double[] { -1.0, 1.0 }; + + String[] inputNames = NNDataSetFactory.getInputFields(PassFilterTrainer.class); + String[] outputNames = NNDataSetFactory.getOutputFields(PassFilterTrainer.class); + + System.out.println("Output from eval set (learned network):"); + testNetwork(neuralNetwork, validationSet, inputNames, outputNames); + } + + private GameRecord playOptimalVsRandom(FeedforwardNetwork neuralNetwork, + GameRecord gameRecord) { + NeuralNetPolicy neuralNetPolicy = new NeuralNetPolicy(); + neuralNetPolicy.setPassFilter(neuralNetwork); + Policy randomPolicy = new RandomMovePolicy(); + + GameConfig gameConfig = gameRecord.getGameConfig(); + GameState gameState = gameRecord.getGameState(); + + Policy[] policies = new Policy[] { neuralNetPolicy, randomPolicy }; + int turnNo = 0; + do { + Action action; + GameState nextState; + + Player playerToMove = gameState.getPlayerToMove(); + action = policies[turnNo % 2].getAction(gameConfig, gameState, + playerToMove); + + if (!gameRecord.play(playerToMove, action)) { + throw new RuntimeException("Illegal move: " + action); + } + + nextState = gameRecord.getGameState(); + + //System.out.println("Action " + action + " selected by policy " + // + policies[turnNo % 2].getName()); + //System.out.println("Next board state: " + nextState); + gameState = nextState; + turnNo++; + } while (!gameState.isTerminal()); + return gameRecord; + } + + private GameRecord playOptimal(FeedforwardNetwork neuralNetwork, + GameRecord gameRecord) { + + NeuralNetPolicy neuralNetPolicy = new NeuralNetPolicy(); + neuralNetPolicy.setPassFilter(neuralNetwork); + + if (gameRecord.getNumTurns() > 0) { + throw new RuntimeException( + "PlayOptimal requires a new GameRecord with no turns played."); + } + + GameState gameState; + + do { + Action action; + GameState nextState; + + Player playerToMove = gameRecord.getPlayerToMove(); + action = neuralNetPolicy.getAction(gameRecord.getGameConfig(), + gameRecord.getGameState(), playerToMove); + + if (!gameRecord.play(playerToMove, action)) { + throw new RuntimeException("Invalid move played: " + action); + } + nextState = gameRecord.getGameState(); + + //System.out.println("Action " + action + " selected by policy " + // + neuralNetPolicy.getName()); + //System.out.println("Next board state: " + nextState); + gameState = nextState; + } while (!gameState.isTerminal()); + + return gameRecord; + } + + private GameRecord playEpsilonGreedy(double epsilon, + FeedforwardNetwork neuralNetwork, TrainingMethod trainer, + GameRecord gameRecord) { + Policy randomPolicy = new RandomMovePolicy(); + NeuralNetPolicy neuralNetPolicy = new NeuralNetPolicy(); + neuralNetPolicy.setPassFilter(neuralNetwork); + + if (gameRecord.getNumTurns() > 0) { + throw new RuntimeException( + "PlayOptimal requires a new GameRecord with no turns played."); + } + + GameState gameState = gameRecord.getGameState(); + NNDataPair statePair; + + Policy selectedPolicy; + trainer.zeroTraces(neuralNetwork); + + do { + Action action; + GameState nextState; + + Player playerToMove = gameRecord.getPlayerToMove(); + + if (Math.random() < epsilon) { + selectedPolicy = randomPolicy; + action = selectedPolicy + .getAction(gameRecord.getGameConfig(), + gameRecord.getGameState(), + gameRecord.getPlayerToMove()); + if (!gameRecord.play(playerToMove, action)) { + throw new RuntimeException("Illegal move played: " + action); + } + nextState = gameRecord.getGameState(); + } else { + selectedPolicy = neuralNetPolicy; + action = selectedPolicy + .getAction(gameRecord.getGameConfig(), + gameRecord.getGameState(), + gameRecord.getPlayerToMove()); + + if (!gameRecord.play(playerToMove, action)) { + throw new RuntimeException("Illegal move played: " + action); + } + nextState = gameRecord.getGameState(); + + statePair = NNDataSetFactory.createDataPair(gameState, PassFilterTrainer.class); + NNDataPair nextStatePair = NNDataSetFactory + .createDataPair(nextState, PassFilterTrainer.class); + + trainer.iteratePattern(neuralNetwork, statePair, + nextStatePair.getIdeal()); + } + + gameState = nextState; + } while (!gameState.isTerminal()); + + // finally, reinforce the actual reward + statePair = NNDataSetFactory.createDataPair(gameState, PassFilterTrainer.class); + trainer.iteratePattern(neuralNetwork, statePair, statePair.getIdeal()); + + return gameRecord; + } + + private void testNetwork(FeedforwardNetwork neuralNetwork, + double[][] validationSet, String[] inputNames, String[] outputNames) { + for (int valIndex = 0; valIndex < validationSet.length; valIndex++) { + NNDataPair dp = new NNDataPair(new NNData(inputNames, + validationSet[valIndex]), new NNData(outputNames, + new double[] { 0.0 })); + System.out.println(dp + " => " + neuralNetwork.compute(dp)); + } + } +} \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/ann/TTTFilter.java b/src/net/woodyfolsom/msproj/ann/TTTFilter.java deleted file mode 100644 index 31f743c..0000000 --- a/src/net/woodyfolsom/msproj/ann/TTTFilter.java +++ /dev/null @@ -1,34 +0,0 @@ -package net.woodyfolsom.msproj.ann; - -/** - * Based on sample code from http://neuroph.sourceforge.net - * - * @author Woody - * - */ -public class TTTFilter extends AbstractNeuralNetFilter implements - NeuralNetFilter { - - private static final int INPUT_SIZE = 9; - private static final int OUTPUT_SIZE = 1; - - public TTTFilter() { - this(0.5,0.0, 1000); - } - - public TTTFilter(double alpha, double lambda, int maxEpochs) { - super( new MultiLayerPerceptron(true, INPUT_SIZE, 5, OUTPUT_SIZE), - new TemporalDifference(0.5,0.0), maxEpochs, 0.05); - super.getNeuralNetwork().setName("XORFilter"); - } - - @Override - public int getInputSize() { - return INPUT_SIZE; - } - - @Override - public int getOutputSize() { - return OUTPUT_SIZE; - } -} \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/ann/TTTFilterTrainer.java b/src/net/woodyfolsom/msproj/ann/TTTFilterTrainer.java index 7c39344..b7745ba 100644 --- a/src/net/woodyfolsom/msproj/ann/TTTFilterTrainer.java +++ b/src/net/woodyfolsom/msproj/ann/TTTFilterTrainer.java @@ -22,8 +22,8 @@ public class TTTFilterTrainer { // implements epsilon-greedy trainer? online private boolean training = true; public static void main(String[] args) throws IOException { - double alpha = 0.50; - double lambda = 0.90; + double alpha = 0.025; + double lambda = .10; int maxGames = 100000; new TTTFilterTrainer().trainNetwork(alpha, lambda, maxGames); @@ -51,7 +51,7 @@ public class TTTFilterTrainer { // implements epsilon-greedy trainer? online int gamesPlayed = 0; List results = new ArrayList(); do { - GameRecord gameRecord = playEpsilonGreedy(0.50, neuralNetwork, + GameRecord gameRecord = playEpsilonGreedy(0.9, neuralNetwork, trainer); System.out.println("Winner: " + gameRecord.getResult()); gamesPlayed++; diff --git a/src/net/woodyfolsom/msproj/ann/TemporalDifference.java b/src/net/woodyfolsom/msproj/ann/TemporalDifference.java index e0587e7..f3bd2fd 100644 --- a/src/net/woodyfolsom/msproj/ann/TemporalDifference.java +++ b/src/net/woodyfolsom/msproj/ann/TemporalDifference.java @@ -3,12 +3,11 @@ package net.woodyfolsom.msproj.ann; import java.util.List; public class TemporalDifference extends TrainingMethod { - private final double alpha; - // private final double gamma = 1.0; + private final double gamma; private final double lambda; public TemporalDifference(double alpha, double lambda) { - this.alpha = alpha; + this.gamma = alpha; this.lambda = lambda; } @@ -55,6 +54,7 @@ public class TemporalDifference extends TrainingMethod { + derivative * (idealValues[i] - outputNeurons[i].getOutput())); } + // walking down the list of Neurons in reverse order, propagate the // error Neuron[] neurons = neuralNetwork.getNeurons(); @@ -84,24 +84,10 @@ public class TemporalDifference extends TrainingMethod { private void updateWeights(FeedforwardNetwork neuralNetwork, double predictionError) { for (Connection connection : neuralNetwork.getConnections()) { - /* - * Neuron srcNeuron = neuralNetwork.getNeuron(connection.getSrc()); - * Neuron destNeuron = - * neuralNetwork.getNeuron(connection.getDest()); - * - * double delta = alpha * srcNeuron.getOutput() - * destNeuron.getGradient() * predictionError + - * connection.getTrace() * lambda; - * - * // TODO allow for momentum // double lastDelta = - * connection.getLastDelta(); connection.addDelta(delta); - */ Neuron srcNeuron = neuralNetwork.getNeuron(connection.getSrc()); Neuron destNeuron = neuralNetwork.getNeuron(connection.getDest()); - double delta = alpha * srcNeuron.getOutput() + double delta = gamma * srcNeuron.getOutput() * destNeuron.getGradient() + connection.getTrace() * lambda; - // TODO allow for momentum - // double lastDelta = connection.getLastDelta(); connection.addDelta(delta); } } @@ -121,18 +107,11 @@ public class TemporalDifference extends TrainingMethod { @Override protected void iteratePattern(FeedforwardNetwork neuralNetwork, NNDataPair statePair, NNData nextReward) { - // System.out.println("Learningrate: " + alpha); - zeroGradients(neuralNetwork); - // System.out.println("Training with: " + statePair.getInput()); - NNData ideal = nextReward; NNData actual = neuralNetwork.compute(statePair); - // System.out.println("Updating weights. Ideal Output: " + ideal); - // System.out.println("Actual Output: " + actual); - // backpropagate the gradients w.r.t. output error backPropagate(neuralNetwork, ideal); diff --git a/src/net/woodyfolsom/msproj/ann/XORFilter.java b/src/net/woodyfolsom/msproj/ann/XORFilter.java index 19e15d4..1ea3b85 100644 --- a/src/net/woodyfolsom/msproj/ann/XORFilter.java +++ b/src/net/woodyfolsom/msproj/ann/XORFilter.java @@ -18,7 +18,7 @@ public class XORFilter extends AbstractNeuralNetFilter implements public XORFilter(double learningRate, double momentum) { super( new MultiLayerPerceptron(true, INPUT_SIZE, 2, OUTPUT_SIZE), - new BackPropagation(learningRate, momentum), 1000, 0.01); + new BackPropagation(learningRate, momentum), 1000, 0.001); super.getNeuralNetwork().setName("XORFilter"); } diff --git a/src/net/woodyfolsom/msproj/policy/AlphaBeta.java b/src/net/woodyfolsom/msproj/policy/AlphaBeta.java index 1d7f542..291d283 100644 --- a/src/net/woodyfolsom/msproj/policy/AlphaBeta.java +++ b/src/net/woodyfolsom/msproj/policy/AlphaBeta.java @@ -191,4 +191,9 @@ public class AlphaBeta implements Policy { // TODO Auto-generated method stub } + + @Override + public String getName() { + return "Alpha-Beta"; + } } \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/policy/HumanGuiInput.java b/src/net/woodyfolsom/msproj/policy/HumanGuiInput.java index 043cbca..d296aa2 100644 --- a/src/net/woodyfolsom/msproj/policy/HumanGuiInput.java +++ b/src/net/woodyfolsom/msproj/policy/HumanGuiInput.java @@ -61,4 +61,9 @@ public class HumanGuiInput implements Policy { goban.setGameState(gameState); } + @Override + public String getName() { + return "HumanGUI"; + } + } diff --git a/src/net/woodyfolsom/msproj/policy/HumanKeyboardInput.java b/src/net/woodyfolsom/msproj/policy/HumanKeyboardInput.java index f6bb765..81f6b3a 100644 --- a/src/net/woodyfolsom/msproj/policy/HumanKeyboardInput.java +++ b/src/net/woodyfolsom/msproj/policy/HumanKeyboardInput.java @@ -85,4 +85,9 @@ public class HumanKeyboardInput implements Policy { } + @Override + public String getName() { + return "HumanKeyboard"; + } + } diff --git a/src/net/woodyfolsom/msproj/policy/Minimax.java b/src/net/woodyfolsom/msproj/policy/Minimax.java index 0e8fd0b..4e3c25a 100644 --- a/src/net/woodyfolsom/msproj/policy/Minimax.java +++ b/src/net/woodyfolsom/msproj/policy/Minimax.java @@ -161,7 +161,10 @@ public class Minimax implements Policy { @Override public void setState(GameState gameState) { - // TODO Auto-generated method stub - + } + + @Override + public String getName() { + return "Minimax"; } } \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/policy/MonteCarloAMAF.java b/src/net/woodyfolsom/msproj/policy/MonteCarloAMAF.java index 9d2246f..3aea712 100644 --- a/src/net/woodyfolsom/msproj/policy/MonteCarloAMAF.java +++ b/src/net/woodyfolsom/msproj/policy/MonteCarloAMAF.java @@ -85,8 +85,9 @@ public class MonteCarloAMAF extends MonteCarloUCT { if (bestAction == Action.NONE) { System.out - .println("MonteCarloUCT failed - no actions were found for the current game state (not even PASS)."); + .println(getName() + " failed - no actions were found for the current game state (not even PASS)."); } else { + if (isLogging()) { System.out.println("Action " + bestAction + " selected for " + node.getGameState().getPlayerToMove() + " with simulated win ratio of " @@ -96,6 +97,7 @@ public class MonteCarloAMAF extends MonteCarloUCT { + node.getProperties().getVisits() + " rollouts among " + node.getNumChildren() + " valid actions from the current state."); + } } return bestAction; } @@ -132,4 +134,9 @@ public class MonteCarloAMAF extends MonteCarloUCT { node.addChild(action, newChild); return newChildren; } + + @Override + public String getName() { + return "UCT-RAVE"; + } } \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/policy/MonteCarloSMAF.java b/src/net/woodyfolsom/msproj/policy/MonteCarloSMAF.java index dfa8cf9..1a722f5 100644 --- a/src/net/woodyfolsom/msproj/policy/MonteCarloSMAF.java +++ b/src/net/woodyfolsom/msproj/policy/MonteCarloSMAF.java @@ -56,4 +56,8 @@ public class MonteCarloSMAF extends MonteCarloAMAF { currentNode = currentNode.getParent(); } } + + public String getName() { + return "MonteCarloSMAF"; + } } \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/policy/MonteCarloUCT.java b/src/net/woodyfolsom/msproj/policy/MonteCarloUCT.java index b4a622d..eb1c2d0 100644 --- a/src/net/woodyfolsom/msproj/policy/MonteCarloUCT.java +++ b/src/net/woodyfolsom/msproj/policy/MonteCarloUCT.java @@ -102,8 +102,9 @@ public class MonteCarloUCT extends MonteCarlo { if (bestAction == Action.NONE) { System.out - .println("MonteCarloUCT failed - no actions were found for the current game state (not even PASS)."); + .println(getName() + " failed - no actions were found for the current game state (not even PASS)."); } else { + if (isLogging()) { System.out.println("Action " + bestAction + " selected for " + node.getGameState().getPlayerToMove() + " with simulated win ratio of " @@ -113,6 +114,7 @@ public class MonteCarloUCT extends MonteCarlo { + node.getProperties().getVisits() + " rollouts among " + node.getNumChildren() + " valid actions from the current state."); + } } return bestAction; } @@ -230,4 +232,9 @@ public class MonteCarloUCT extends MonteCarlo { // TODO Auto-generated method stub } + + @Override + public String getName() { + return "MonteCarloUCT"; + } } \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/policy/NeuralNetPolicy.java b/src/net/woodyfolsom/msproj/policy/NeuralNetPolicy.java new file mode 100644 index 0000000..44e69e0 --- /dev/null +++ b/src/net/woodyfolsom/msproj/policy/NeuralNetPolicy.java @@ -0,0 +1,144 @@ +package net.woodyfolsom.msproj.policy; + +import java.util.Collection; +import java.util.List; + +import net.woodyfolsom.msproj.Action; +import net.woodyfolsom.msproj.GameConfig; +import net.woodyfolsom.msproj.GameState; +import net.woodyfolsom.msproj.Player; +import net.woodyfolsom.msproj.ann.FeedforwardNetwork; +import net.woodyfolsom.msproj.ann.NNDataPair; +import net.woodyfolsom.msproj.ann.PassFilterTrainer; +import net.woodyfolsom.msproj.tictactoe.NNDataSetFactory; + +public class NeuralNetPolicy implements Policy { + + private FeedforwardNetwork moveFilter; + private FeedforwardNetwork passFilter; + + private ValidMoveGenerator validMoveGenerator = new ValidMoveGenerator(); + private Policy randomMovePolicy = new RandomMovePolicy(); + + public NeuralNetPolicy() { + } + + @Override + public Action getAction(GameConfig gameConfig, GameState gameState, + Player player) { + //If passFilter != null, check for a strong PASS signal. + if (passFilter != null) { + GameState stateAfterPass = new GameState(gameState); + if (!stateAfterPass.playStone(player, Action.PASS)) { + throw new RuntimeException("Pass should always be valid, but playStone(" + player +", Action.PASS) failed."); + } + + NNDataPair passData = NNDataSetFactory.createDataPair(gameState,PassFilterTrainer.class); + double estimatedValue = passFilter.compute(passData).getValues()[0]; + + //if losing and opponent passed, never pass + //if (passData.getInput().getValues()[0] == -1.0 && passData.getInput().getValues()[1] == 1.0) { + // estimatedValue = 0.0; + //} + + if (player == Player.BLACK && 0.6 < estimatedValue) { + //System.out.println("NeuralNetwork estimates value of PASS at > 0.95 (BLACK) for " + passData.getInput()); + return Action.PASS; + } + if (player == Player.WHITE && 0.4 > estimatedValue) { + //System.out.println("NeuralNetwork estimates value of PASS at > 0.95 (BLACK) for " + passData.getInput()); + return Action.PASS; + } + } + //If moveFilter != null, calculate action estimates and return the best one. + + //max # valid moves is 19x19+2 (any coord plus pass, resign). + List validMoves = validMoveGenerator.getActions(gameConfig, gameState, player, 363); + + if (moveFilter != null) { + if (player == Player.BLACK) { + double bestValue = Double.NEGATIVE_INFINITY; + Action bestAction = Action.NONE; + for (Action actionToTry : validMoves) { + GameState stateAfterAction = new GameState(gameState); + if (!stateAfterAction.playStone(player, actionToTry)) { + throw new RuntimeException("Invalid move: " + actionToTry); + } + NNDataPair passData = NNDataSetFactory.createDataPair(stateAfterAction,PassFilterTrainer.class); + double estimatedValue = passFilter.compute(passData).getValues()[0]; + + if (estimatedValue > bestValue) { + bestAction = actionToTry; + } + } + if (bestValue > 0.95) { + return bestAction; + } + } else if (player == Player.WHITE) { + double bestValue = Double.POSITIVE_INFINITY; + Action bestAction = Action.NONE; + for (Action actionToTry : validMoves) { + GameState stateAfterAction = new GameState(gameState); + if (!stateAfterAction.playStone(player, actionToTry)) { + throw new RuntimeException("Invalid move: " + actionToTry); + } + NNDataPair passData = NNDataSetFactory.createDataPair(stateAfterAction,PassFilterTrainer.class); + double estimatedValue = passFilter.compute(passData).getValues()[0]; + + if (estimatedValue > bestValue) { + bestAction = actionToTry; + } + } + if (bestValue > 0.95) { + return bestAction; + } + } else { + throw new RuntimeException("Invalid player: " + player); + } + } + + //If no moves make the cutoff, just return a random move. + return randomMovePolicy.getAction(gameConfig, gameState, player); + } + + @Override + public Action getAction(GameConfig gameConfig, GameState gameState, + Collection prohibitedActions, Player player) { + + throw new UnsupportedOperationException(); + } + + @Override + public int getNumStateEvaluations() { + return randomMovePolicy.getNumStateEvaluations(); + } + + @Override + public void setState(GameState gameState) { + randomMovePolicy.setState(gameState); + } + + @Override + public boolean isLogging() { + return randomMovePolicy.isLogging(); + } + + @Override + public void setLogging(boolean logging) { + randomMovePolicy.setLogging(logging); + } + + public void setMoveFilter(FeedforwardNetwork ffn) { + this.moveFilter = ffn; + } + + public void setPassFilter(FeedforwardNetwork ffn) { + this.passFilter = ffn; + } + + @Override + public String getName() { + return "NeuralNet" + (passFilter != null ? "-" + passFilter.getName() : "") + + (passFilter != null ? "-" + passFilter.getName() : ""); + } +} diff --git a/src/net/woodyfolsom/msproj/policy/Policy.java b/src/net/woodyfolsom/msproj/policy/Policy.java index cc04120..ba4278c 100644 --- a/src/net/woodyfolsom/msproj/policy/Policy.java +++ b/src/net/woodyfolsom/msproj/policy/Policy.java @@ -8,15 +8,17 @@ import net.woodyfolsom.msproj.GameState; import net.woodyfolsom.msproj.Player; public interface Policy { - public Action getAction(GameConfig gameConfig, GameState gameState, + Action getAction(GameConfig gameConfig, GameState gameState, Player player); - public Action getAction(GameConfig gameConfig, GameState gameState, + Action getAction(GameConfig gameConfig, GameState gameState, Collection prohibitedActions, Player player); - public int getNumStateEvaluations(); + String getName(); - public void setState(GameState gameState); + int getNumStateEvaluations(); + + void setState(GameState gameState); boolean isLogging(); diff --git a/src/net/woodyfolsom/msproj/policy/PolicyFactory.java b/src/net/woodyfolsom/msproj/policy/PolicyFactory.java new file mode 100644 index 0000000..4645828 --- /dev/null +++ b/src/net/woodyfolsom/msproj/policy/PolicyFactory.java @@ -0,0 +1,14 @@ +package net.woodyfolsom.msproj.policy; + +public class PolicyFactory { + + public static Policy createNew(Policy policyPrototype) { + if (policyPrototype instanceof RandomMovePolicy) { + return new RandomMovePolicy(); + } else if (policyPrototype instanceof NeuralNetPolicy) { + return new NeuralNetPolicy(); + } else { + throw new UnsupportedOperationException("Can only create new NeuralNetPolicy, not " + policyPrototype.getName()); + } + } +} diff --git a/src/net/woodyfolsom/msproj/policy/RandomMovePolicy.java b/src/net/woodyfolsom/msproj/policy/RandomMovePolicy.java index 45209d7..01c586c 100644 --- a/src/net/woodyfolsom/msproj/policy/RandomMovePolicy.java +++ b/src/net/woodyfolsom/msproj/policy/RandomMovePolicy.java @@ -123,4 +123,9 @@ public class RandomMovePolicy implements Policy, ActionGenerator { public void setState(GameState gameState) { // TODO Auto-generated method stub } + + @Override + public String getName() { + return "Random"; + } } \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/policy/RootParAMAF.java b/src/net/woodyfolsom/msproj/policy/RootParAMAF.java new file mode 100644 index 0000000..f59fb24 --- /dev/null +++ b/src/net/woodyfolsom/msproj/policy/RootParAMAF.java @@ -0,0 +1,186 @@ +package net.woodyfolsom.msproj.policy; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import net.woodyfolsom.msproj.Action; +import net.woodyfolsom.msproj.GameConfig; +import net.woodyfolsom.msproj.GameState; +import net.woodyfolsom.msproj.Player; +import net.woodyfolsom.msproj.tree.AMAFProperties; +import net.woodyfolsom.msproj.tree.MonteCarloProperties; + +public class RootParAMAF implements Policy { + private boolean logging = false; + private int numTrees = 1; + private Policy rolloutPolicy; + + public boolean isLogging() { + return logging; + } + + public void setLogging(boolean logging) { + this.logging = logging; + } + + private long timeLimit = 1000L; + + public RootParAMAF(int numTrees, long timeLimit) { + this.numTrees = numTrees; + this.timeLimit = timeLimit; + this.rolloutPolicy = new RandomMovePolicy(); + } + + public RootParAMAF(int numTrees, Policy policyPrototype, long timeLimit) { + this.numTrees = numTrees; + this.timeLimit = timeLimit; + this.rolloutPolicy = policyPrototype; + } + + @Override + public Action getAction(GameConfig gameConfig, GameState gameState, + Player player) { + Action bestAction = Action.NONE; + + List policyRunners = new ArrayList(); + List simulationThreads = new ArrayList(); + + for (int i = 0; i < numTrees; i++) { + + MonteCarlo policy = new MonteCarloAMAF( + PolicyFactory.createNew(rolloutPolicy), timeLimit); + + //policy.setLogging(true); + + PolicyRunner policyRunner = new PolicyRunner(policy, gameConfig, gameState, + player); + policyRunners.add(policyRunner); + + Thread simThread = new Thread(policyRunner); + simulationThreads.add(simThread); + } + + for (Thread simThread : simulationThreads) { + simThread.start(); + } + + for (Thread simThread : simulationThreads) { + try { + simThread.join(); + } catch (InterruptedException ie) { + System.out + .println("Interrupted while waiting for Monte Carlo simulations to finish."); + } + } + + Map totalReward = new HashMap(); + Map numSims = new HashMap(); + + for (PolicyRunner policyRunner : policyRunners) { + Map qValues = policyRunner.getQvalues(); + for (Action action : qValues.keySet()) { + if (totalReward.containsKey(action)) { + totalReward.put(action, totalReward.get(action) + ((AMAFProperties)qValues.get(action)).getAmafWins()); + } else { + totalReward.put(action, ((AMAFProperties)qValues.get(action)).getAmafWins()); + } + if (numSims.containsKey(action)) { + numSims.put(action, numSims.get(action) + ((AMAFProperties)qValues.get(action)).getAmafVisits()); + } else { + numSims.put(action, ((AMAFProperties)qValues.get(action)).getAmafVisits()); + } + } + } + + double bestValue = 0.0; + int totalRollouts = 0; + int bestWins = 0; + int bestSims = 0; + + for (Action action : totalReward.keySet()) + { + int totalWins = totalReward.get(action); + int totalSims = numSims.get(action); + + totalRollouts += totalSims; + + double value = ((double)totalWins) / ((double)totalSims); + + if (bestAction.isNone() || bestValue < value) { + bestAction = action; + bestValue = value; + bestWins = totalWins; + bestSims = totalSims; + } + + } + + if(isLogging()) { + System.out.println("Action " + bestAction + " selected for " + + player + + " with simulated win ratio of " + + (bestValue * 100.0 + "% among " + numTrees + " parallel simulations.")); + System.out.println("It won " + + bestWins + " out of " + bestSims + + " rollouts among " + totalRollouts + + " total rollouts (" + totalReward.size() + + " possible moves evaluated) from the current state."); + } + return bestAction; + } + + @Override + public Action getAction(GameConfig gameConfig, GameState gameState, + Collection prohibitedActions, Player player) { + throw new UnsupportedOperationException( + "Prohibited actions not supported by this class."); + } + + @Override + public int getNumStateEvaluations() { + // TODO Auto-generated method stub + return 0; + } + + class PolicyRunner implements Runnable { + Map qValues; + + private GameConfig gameConfig; + private GameState gameState; + private Player player; + private MonteCarlo policy; + + public PolicyRunner(MonteCarlo policy, GameConfig gameConfig, + GameState gameState, Player player) { + this.policy = policy; + this.gameConfig = gameConfig; + this.gameState = gameState; + this.player = player; + } + + public Map getQvalues() { + return qValues; + } + + @Override + public void run() { + qValues = policy.getQvalues(gameConfig, gameState, player); + } + } + + @Override + public void setState(GameState gameState) { + } + + @Override + public String getName() { + if (rolloutPolicy.getName() == "Random") { + return "RootParallelization"; + } else { + return "RootParallelization-" + rolloutPolicy.getName(); + } + } +} \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/policy/RootParSMAF.java b/src/net/woodyfolsom/msproj/policy/RootParSMAF.java new file mode 100644 index 0000000..5b5b3f3 --- /dev/null +++ b/src/net/woodyfolsom/msproj/policy/RootParSMAF.java @@ -0,0 +1,186 @@ +package net.woodyfolsom.msproj.policy; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import net.woodyfolsom.msproj.Action; +import net.woodyfolsom.msproj.GameConfig; +import net.woodyfolsom.msproj.GameState; +import net.woodyfolsom.msproj.Player; +import net.woodyfolsom.msproj.tree.AMAFProperties; +import net.woodyfolsom.msproj.tree.MonteCarloProperties; + +public class RootParSMAF implements Policy { + private boolean logging = false; + private int numTrees = 1; + private Policy rolloutPolicy; + + public boolean isLogging() { + return logging; + } + + public void setLogging(boolean logging) { + this.logging = logging; + } + + private long timeLimit = 1000L; + + public RootParSMAF(int numTrees, long timeLimit) { + this.numTrees = numTrees; + this.timeLimit = timeLimit; + this.rolloutPolicy = new RandomMovePolicy(); + } + + public RootParSMAF(int numTrees, Policy policyPrototype, long timeLimit) { + this.numTrees = numTrees; + this.timeLimit = timeLimit; + this.rolloutPolicy = policyPrototype; + } + + @Override + public Action getAction(GameConfig gameConfig, GameState gameState, + Player player) { + Action bestAction = Action.NONE; + + List policyRunners = new ArrayList(); + List simulationThreads = new ArrayList(); + + for (int i = 0; i < numTrees; i++) { + + MonteCarlo policy = new MonteCarloSMAF( + PolicyFactory.createNew(rolloutPolicy), timeLimit, 4); + + //policy.setLogging(true); + + PolicyRunner policyRunner = new PolicyRunner(policy, gameConfig, gameState, + player); + policyRunners.add(policyRunner); + + Thread simThread = new Thread(policyRunner); + simulationThreads.add(simThread); + } + + for (Thread simThread : simulationThreads) { + simThread.start(); + } + + for (Thread simThread : simulationThreads) { + try { + simThread.join(); + } catch (InterruptedException ie) { + System.out + .println("Interrupted while waiting for Monte Carlo simulations to finish."); + } + } + + Map totalReward = new HashMap(); + Map numSims = new HashMap(); + + for (PolicyRunner policyRunner : policyRunners) { + Map qValues = policyRunner.getQvalues(); + for (Action action : qValues.keySet()) { + if (totalReward.containsKey(action)) { + totalReward.put(action, totalReward.get(action) + ((AMAFProperties)qValues.get(action)).getAmafWins()); + } else { + totalReward.put(action, ((AMAFProperties)qValues.get(action)).getAmafWins()); + } + if (numSims.containsKey(action)) { + numSims.put(action, numSims.get(action) + ((AMAFProperties)qValues.get(action)).getAmafVisits()); + } else { + numSims.put(action, ((AMAFProperties)qValues.get(action)).getAmafVisits()); + } + } + } + + double bestValue = 0.0; + int totalRollouts = 0; + int bestWins = 0; + int bestSims = 0; + + for (Action action : totalReward.keySet()) + { + int totalWins = totalReward.get(action); + int totalSims = numSims.get(action); + + totalRollouts += totalSims; + + double value = ((double)totalWins) / ((double)totalSims); + + if (bestAction.isNone() || bestValue < value) { + bestAction = action; + bestValue = value; + bestWins = totalWins; + bestSims = totalSims; + } + + } + + if(isLogging()) { + System.out.println("Action " + bestAction + " selected for " + + player + + " with simulated win ratio of " + + (bestValue * 100.0 + "% among " + numTrees + " parallel simulations.")); + System.out.println("It won " + + bestWins + " out of " + bestSims + + " rollouts among " + totalRollouts + + " total rollouts (" + totalReward.size() + + " possible moves evaluated) from the current state."); + } + return bestAction; + } + + @Override + public Action getAction(GameConfig gameConfig, GameState gameState, + Collection prohibitedActions, Player player) { + throw new UnsupportedOperationException( + "Prohibited actions not supported by this class."); + } + + @Override + public int getNumStateEvaluations() { + // TODO Auto-generated method stub + return 0; + } + + class PolicyRunner implements Runnable { + Map qValues; + + private GameConfig gameConfig; + private GameState gameState; + private Player player; + private MonteCarlo policy; + + public PolicyRunner(MonteCarlo policy, GameConfig gameConfig, + GameState gameState, Player player) { + this.policy = policy; + this.gameConfig = gameConfig; + this.gameState = gameState; + this.player = player; + } + + public Map getQvalues() { + return qValues; + } + + @Override + public void run() { + qValues = policy.getQvalues(gameConfig, gameState, player); + } + } + + @Override + public void setState(GameState gameState) { + } + + @Override + public String getName() { + if (rolloutPolicy.getName() == "Random") { + return "RootParallelization"; + } else { + return "RootParallelization-" + rolloutPolicy.getName(); + } + } +} \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/policy/RootParallelization.java b/src/net/woodyfolsom/msproj/policy/RootParallelization.java index a05936a..412474b 100644 --- a/src/net/woodyfolsom/msproj/policy/RootParallelization.java +++ b/src/net/woodyfolsom/msproj/policy/RootParallelization.java @@ -15,6 +15,8 @@ import net.woodyfolsom.msproj.tree.MonteCarloProperties; public class RootParallelization implements Policy { private boolean logging = false; private int numTrees = 1; + private Policy rolloutPolicy; + public boolean isLogging() { return logging; } @@ -28,8 +30,15 @@ public class RootParallelization implements Policy { public RootParallelization(int numTrees, long timeLimit) { this.numTrees = numTrees; this.timeLimit = timeLimit; + this.rolloutPolicy = new RandomMovePolicy(); } + public RootParallelization(int numTrees, Policy policyPrototype, long timeLimit) { + this.numTrees = numTrees; + this.timeLimit = timeLimit; + this.rolloutPolicy = policyPrototype; + } + @Override public Action getAction(GameConfig gameConfig, GameState gameState, Player player) { @@ -40,7 +49,7 @@ public class RootParallelization implements Policy { for (int i = 0; i < numTrees; i++) { PolicyRunner policyRunner = new PolicyRunner(new MonteCarloUCT( - new RandomMovePolicy(), timeLimit), gameConfig, gameState, + PolicyFactory.createNew(rolloutPolicy), timeLimit), gameConfig, gameState, player); policyRunners.add(policyRunner); @@ -103,6 +112,7 @@ public class RootParallelization implements Policy { } + if(isLogging()) { System.out.println("Action " + bestAction + " selected for " + player + " with simulated win ratio of " @@ -112,7 +122,7 @@ public class RootParallelization implements Policy { + " rollouts among " + totalRollouts + " total rollouts (" + totalReward.size() + " possible moves evaluated) from the current state."); - + } return bestAction; } @@ -156,8 +166,15 @@ public class RootParallelization implements Policy { } @Override - public void setState(GameState gameState) { - // TODO Auto-generated method stub - + public void setState(GameState gameState) { + } + + @Override + public String getName() { + if (rolloutPolicy.getName() == "Random") { + return "RootParallelization"; + } else { + return "RootParallelization-" + rolloutPolicy.getName(); + } } } \ No newline at end of file diff --git a/src/net/woodyfolsom/msproj/tictactoe/NNDataSetFactory.java b/src/net/woodyfolsom/msproj/tictactoe/NNDataSetFactory.java index efedfba..1bb4b15 100644 --- a/src/net/woodyfolsom/msproj/tictactoe/NNDataSetFactory.java +++ b/src/net/woodyfolsom/msproj/tictactoe/NNDataSetFactory.java @@ -3,13 +3,35 @@ package net.woodyfolsom.msproj.tictactoe; import java.util.ArrayList; import java.util.List; +import net.woodyfolsom.msproj.GameBoard; +import net.woodyfolsom.msproj.GameConfig; +import net.woodyfolsom.msproj.GameResult; +import net.woodyfolsom.msproj.GameState; +import net.woodyfolsom.msproj.Player; +import net.woodyfolsom.msproj.ann.FusekiFilterTrainer; import net.woodyfolsom.msproj.ann.NNData; import net.woodyfolsom.msproj.ann.NNDataPair; +import net.woodyfolsom.msproj.ann.PassFilterTrainer; import net.woodyfolsom.msproj.tictactoe.Game.PLAYER; public class NNDataSetFactory { public static final String[] TTT_INPUT_FIELDS = {"00","01","02","10","11","12","20","21","22"}; - public static final String[] TTT_OUTPUT_FIELDS = {"value"}; + public static final String[] TTT_OUTPUT_FIELDS = {"VALUE"}; + + public static final String[] PASS_INPUT_FIELDS = {"WINNING","PREV_PLY_PASS"}; + public static final String[] PASS_OUTPUT_FIELDS = {"SHOULD_PASS"}; + + public static final String[] FUSEKI_INPUT_FIELDS = { + "00","11","22","33","44","55","66","77","88", + "10","11","22","33","44","55","66","77","88", + "20","11","22","33","44","55","66","77","88", + "30","11","22","33","44","55","66","77","88", + "40","11","22","33","44","55","66","77","88", + "50","11","22","33","44","55","66","77","88", + "60","11","22","33","44","55","66","77","88", + "70","11","22","33","44","55","66","77","88", + "70","11","22","33","44","55","66","77","88"}; + public static final String[] FUSEKI_OUTPUT_FIELDS = {"VALUE"}; public static List> createDataSet(List tttGames) { @@ -25,6 +47,26 @@ public class NNDataSetFactory { return nnDataSet; } + public static String[] getInputFields(Object clazz) { + if (clazz == PassFilterTrainer.class) { + return PASS_INPUT_FIELDS; + } else if (clazz == FusekiFilterTrainer.class) { + return FUSEKI_INPUT_FIELDS; + } else { + throw new RuntimeException("Don't know how to return inputFields for NeuralNetwork Trainer of type: " + clazz.getClass().getName()); + } + } + + public static String[] getOutputFields(Object clazz) { + if (clazz == PassFilterTrainer.class) { + return PASS_OUTPUT_FIELDS; + } else if (clazz == FusekiFilterTrainer.class) { + return FUSEKI_OUTPUT_FIELDS; + } else { + throw new RuntimeException("Don't know how to return inputFields for NeuralNetwork Trainer of type: " + clazz.getClass().getName()); + } + } + public static List createDataPairList(GameRecord gameRecord) { List gameData = new ArrayList(); @@ -35,6 +77,95 @@ public class NNDataSetFactory { return gameData; } + public static NNDataPair createDataPair(GameState goState, Object clazz) { + if (clazz == PassFilterTrainer.class) { + return createPassFilterDataPair(goState); + } else if (clazz == FusekiFilterTrainer.class) { + return createFusekiFilterDataPair(goState); + } else { + throw new RuntimeException("Don't know how to create DataPair for NeuralNetwork Trainer of type: " + clazz.getClass().getName()); + } + } + + private static NNDataPair createFusekiFilterDataPair(GameState goState) { + double value; + + if (goState.isTerminal()) { + if (goState.getResult().isWinner(Player.BLACK)) { + value = 1.0; // win for black + } else if (goState.getResult().isWinner(Player.WHITE)) { + value = 0.0; // loss for black + //value = -1.0; + } else {// tie + value = 0.5; + //value = 0.0; //tie + } + } else { + value = 0.0; + } + + int size = goState.getGameConfig().getSize(); + double[] inputValues = new double[size * size]; + for (int i = 0; i < size; i++) { + for (int j = 0; j < size; j++) { + //col then row + char symbol = goState.getGameBoard().getSymbolAt(j, i); + switch (symbol) { + case GameBoard.EMPTY_INTERSECTION : inputValues[i*size+j] = 0.0; + break; + case GameBoard.BLACK_STONE : inputValues[i*size+j] = 1.0; + break; + case GameBoard.WHITE_STONE : inputValues[i*size+j] = -1.0; + break; + } + } + } + + return new NNDataPair(new NNData(FUSEKI_INPUT_FIELDS,inputValues),new NNData(FUSEKI_OUTPUT_FIELDS,new double[]{value})); + } + + private static NNDataPair createPassFilterDataPair(GameState goState) { + double value; + + GameResult result = goState.getResult(); + if (goState.isTerminal()) { + if (result.isWinner(Player.BLACK)) { + value = 1.0; // win for black + } else if (result.isWinner(Player.WHITE)) { + value = 0.0; // loss for black + } else {// tie + value = 0.5; + //value = 0.0; //tie + } + } else { + value = 0.0; + } + + double[] inputValues = new double[4]; + inputValues[0] = result.isWinner(goState.getPlayerToMove()) ? 1.0 : -1.0; + //inputValues[1] = result.isWinner(goState.getPlayerToMove()) ? -1.0 : 1.0; + inputValues[1] = goState.isPrevPlyPass() ? 1.0 : 0.0; + + return new NNDataPair(new NNData(PASS_INPUT_FIELDS,inputValues),new NNData(PASS_OUTPUT_FIELDS,new double[]{value})); + } + + /* + private static double getNormalizedScore(GameState goState, Player player) { + GameResult gameResult = goState.getResult(); + GameConfig gameConfig = goState.getGameConfig(); + + double maxPoints = Math.pow(gameConfig.getSize(),2); + double komi = gameConfig.getKomi(); + + if (player == Player.BLACK) { + return gameResult.getBlackScore() / maxPoints; + } else if (player == Player.WHITE) { + return gameResult.getWhiteScore() / (maxPoints + komi); + } else { + throw new RuntimeException("Invalid player"); + } + }*/ + public static NNDataPair createDataPair(State tttState) { double value; if (tttState.isTerminal()) { diff --git a/test/net/woodyfolsom/msproj/ann/TTTFilterTest.java b/test/net/woodyfolsom/msproj/ann/TTTFilterTest.java deleted file mode 100644 index 60af997..0000000 --- a/test/net/woodyfolsom/msproj/ann/TTTFilterTest.java +++ /dev/null @@ -1,100 +0,0 @@ -package net.woodyfolsom.msproj.ann; - -import java.io.File; -import java.io.IOException; -import java.util.List; - -import net.woodyfolsom.msproj.ann.NNData; -import net.woodyfolsom.msproj.ann.NNDataPair; -import net.woodyfolsom.msproj.ann.NeuralNetFilter; -import net.woodyfolsom.msproj.ann.TTTFilter; -import net.woodyfolsom.msproj.tictactoe.GameRecord; -import net.woodyfolsom.msproj.tictactoe.NNDataSetFactory; -import net.woodyfolsom.msproj.tictactoe.Referee; - -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.junit.Test; - -public class TTTFilterTest { - private static final String FILENAME = "tttPerceptron.net"; - - @AfterClass - public static void deleteNewNet() { - File file = new File(FILENAME); - if (file.exists()) { - file.delete(); - } - } - - @BeforeClass - public static void deleteSavedNet() { - File file = new File(FILENAME); - if (file.exists()) { - file.delete(); - } - } - - @Test - public void testLearn() throws IOException { - double alpha = 0.5; - double lambda = 0.0; - int maxEpochs = 1000; - - NeuralNetFilter nnLearner = new TTTFilter(alpha, lambda, maxEpochs); - - // Create trainingSet from a tournament of random games. - // Future iterations will use Epsilon-greedy play from a policy based on - // this network to generate additional datasets. - List tournament = new Referee().play(1); - List> trainingSet = NNDataSetFactory - .createDataSet(tournament); - - System.out.println("Generated " + trainingSet.size() - + " datasets from random self-play."); - nnLearner.learnSequences(trainingSet); - System.out.println("Learned network after " - + nnLearner.getActualTrainingEpochs() + " training epochs."); - - double[][] validationSet = new double[7][]; - - // empty board - validationSet[0] = new double[] { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0 }; - // center - validationSet[1] = new double[] { 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, - 0.0, 0.0 }; - // top edge - validationSet[2] = new double[] { 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0 }; - // left edge - validationSet[3] = new double[] { 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, - 0.0, 0.0 }; - // corner - validationSet[4] = new double[] { 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0 }; - // win - validationSet[5] = new double[] { 1.0, 1.0, 1.0, -1.0, -1.0, 0.0, 0.0, - -1.0, 0.0 }; - // loss - validationSet[6] = new double[] { -1.0, 1.0, 0.0, 1.0, -1.0, 1.0, 0.0, - 0.0, -1.0 }; - - String[] inputNames = new String[] { "00", "01", "02", "10", "11", - "12", "20", "21", "22" }; - String[] outputNames = new String[] { "values" }; - - System.out.println("Output from eval set (learned network):"); - testNetwork(nnLearner, validationSet, inputNames, outputNames); - } - - private void testNetwork(NeuralNetFilter nnLearner, - double[][] validationSet, String[] inputNames, String[] outputNames) { - for (int valIndex = 0; valIndex < validationSet.length; valIndex++) { - NNDataPair dp = new NNDataPair(new NNData(inputNames, - validationSet[valIndex]), new NNData(outputNames, - validationSet[valIndex])); - System.out.println(dp + " => " + nnLearner.compute(dp)); - } - } -} \ No newline at end of file diff --git a/test/net/woodyfolsom/msproj/ann/XORFilterTest.java b/test/net/woodyfolsom/msproj/ann/XORFilterTest.java index 0ac82d8..02216ea 100644 --- a/test/net/woodyfolsom/msproj/ann/XORFilterTest.java +++ b/test/net/woodyfolsom/msproj/ann/XORFilterTest.java @@ -81,10 +81,10 @@ public class XORFilterTest { @Test public void testLearnSaveLoad() throws IOException { - NeuralNetFilter nnLearner = new XORFilter(0.5,0.0); + NeuralNetFilter nnLearner = new XORFilter(0.05,0.0); // create training set (logical XOR function) - int size = 2; + int size = 1; double[][] trainingInput = new double[4 * size][]; double[][] trainingOutput = new double[4 * size][]; for (int i = 0; i < size; i++) { @@ -106,7 +106,7 @@ public class XORFilterTest { trainingSet.add(new NNDataPair(new NNData(inputNames,trainingInput[i]),new NNData(outputNames,trainingOutput[i]))); } - nnLearner.setMaxTrainingEpochs(1); + nnLearner.setMaxTrainingEpochs(10000); nnLearner.learnPatterns(trainingSet); System.out.println("Learned network after " + nnLearner.getActualTrainingEpochs() + " training epochs."); diff --git a/ttt.net b/ttt.net index 7f9550d..9c8f164 100644 --- a/ttt.net +++ b/ttt.net @@ -1,62 +1,106 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -103,6 +147,18 @@ + + + + + + + + + + + + @@ -122,8 +178,12 @@ 12 13 14 + 15 + 16 + 17 + 18 - 15 + 19