Removed dangerous code to populate the database - this must be retrieved from git in order to repopulate the headlines table.

(This should not need to happen).
PricePoller and ValidationSetCreator generate the 1, 2, 3-grams.txt and validation.txt files, respectively.
MySQLHeadlineDaoImplTest reshuffles the training, validation datasets in 60-40 ratio.
This commit is contained in:
Woody Folsom
2012-04-22 21:24:01 -04:00
parent 6e3680426e
commit 5270359b10
11 changed files with 25 additions and 313 deletions

View File

@@ -1,11 +0,0 @@
package net.woodyfolsom.cs6601.p3;
import org.junit.Test;
public class HeadlinePullerTest {
@Test
public void testGetStartDate() {
}
}

View File

@@ -28,17 +28,17 @@ public class MySQLHeadlineDaoImplTest {
}
//Change this back to @Test to run it... but beware, it shuffles the datasets. Best done n times for n-fold cross validation.
@Ignore
@Test
public void testAssignRandomDatasets() {
int trainingPct = 80;
int testPct = 10;
int valPct = 10;
int trainingPct = 60;
//int testPct = 10;
int valPct = 40;
//assignment fails if character is ommitted from valPct (80% 10% 1% by accident)
assertFalse(headlineSvc.assignRandomDatasets(trainingPct,testPct,valPct/10));
assertFalse(headlineSvc.assignRandomDatasets(trainingPct/*,testPct*/,valPct/10));
//assignment succeeds if requested ratio is 8-1-1
assertTrue(headlineSvc.assignRandomDatasets(trainingPct,testPct,valPct));
assertTrue(headlineSvc.assignRandomDatasets(trainingPct/*,testPct*/,valPct));
int allCount = headlineSvc.getCount();
int trainingCount = headlineSvc.getCount(1);
@@ -48,7 +48,7 @@ public class MySQLHeadlineDaoImplTest {
assertEquals(trainingCount + testCount + valCount, allCount);
assertEquals((double)trainingCount/allCount,(double)trainingPct / 100.0,0.01);
assertEquals((double)testCount/allCount,(double)testPct / 100.0,0.01);
//assertEquals((double)testCount/allCount,(double)testPct / 100.0,0.01);
assertEquals((double)valCount/allCount,(double)valPct / 100.0,0.01);
}
}