Added script to pull historical stock data and resulting data files (1 per company). Added code to generate average price change per 1, 2 and 3-gram. Added code to output average price change per headline for VALIDATION dataset.

This commit is contained in:
Woody Folsom
2012-04-20 21:22:54 -04:00
parent eec32b19c1
commit 6e3680426e
65 changed files with 360216 additions and 102 deletions

View File

@@ -6,12 +6,14 @@ import java.util.List;
import net.woodyfolsom.cs6601.p3.domain.Headline;
public interface HeadlineDao {
boolean assignRandomDatasets(int training, int test, int validation);
int getCount();
int getCount(int dataset);
int deleteById(int id);
int insert(Headline headline);
int[] insertBatch(List<Headline> headlines);
Headline select(int id);
List<Headline> select(String stock, Date date);
List<Headline> select(String stock, Date startDate, Date endDate);
List<Headline> select(String stock, Date startDate, Date endDate, int dataset);
}

View File

@@ -18,16 +18,36 @@ import net.woodyfolsom.cs6601.p3.domain.Headline;
@Repository
public class HeadlineDaoImpl implements HeadlineDao {
private static final String COUNT_ALL_QRY = "SELECT COUNT(1) FROM headlines";
private static final String COUNT_DATASET_QRY = "SELECT COUNT(1) FROM headlines where dataset = ?";
private static final String DELETE_BY_ID_STMT = "DELETE from headlines WHERE id = ?";
private static final String INSERT_STMT = "INSERT INTO headlines (text, date, stock, dataset) values (?, ?, ?, ?)";
private static final String SELECT_BY_ID_QRY = "SELECT * from headlines WHERE id = ?";
private static final String SELECT_BY_STOCK_QRY = "SELECT * from headlines WHERE stock = ? AND date = ?";
private static final String SELECT_BY_DATE_RANGE_QRY = "SELECT * from headlines WHERE stock = ? AND date >= ? AND date <= ?";
private static final String SELECT_BY_STOCK_QRY = "SELECT * from headlines WHERE stock = ? AND date = ? AND dataset = 1";
private static final String SELECT_BY_DATE_RANGE_QRY = "SELECT * from headlines WHERE stock = ? AND date >= ? AND date <= ? AND dataset = ?";
private static final String ASSIGN_RANDOM_PCT_QRY = "update headlines set dataset = (select FLOOR(RAND() * (200 - 101) + 101))";
private static final String REMAP_TRAINING_QRY = "update headlines set dataset = 1 where dataset >= 101 and dataset <= (100 + ?)";
private static final String REMAP_TEST_QRY = "update headlines set dataset = 2 where dataset >= (100 + ?) and dataset <= (100 + ?)";
private static final String REMAP_VAL_QRY = "update headlines set dataset = 3 where dataset >= (100 + ?) and dataset <= 200";
private JdbcTemplate jdbcTemplate;
@Override
public boolean assignRandomDatasets(int training, int test, int validation) {
if (training + test + validation != 100) {
return false;
}
jdbcTemplate.update(ASSIGN_RANDOM_PCT_QRY);
jdbcTemplate.update(REMAP_TRAINING_QRY,training);
jdbcTemplate.update(REMAP_TEST_QRY,training,training+test);
jdbcTemplate.update(REMAP_VAL_QRY,training+test);
return true;
}
public int deleteById(int headlineId) {
return jdbcTemplate.update(DELETE_BY_ID_STMT,
new RequestMapper(), headlineId);
@@ -64,12 +84,12 @@ public class HeadlineDaoImpl implements HeadlineDao {
public List<Headline> select(String stock, Date date) {
return jdbcTemplate.query(SELECT_BY_STOCK_QRY,
new RequestMapper(), stock, date);
new RequestMapper(), stock, date, 1);
}
public List<Headline> select(String stock, Date startDate, Date endDate) {
public List<Headline> select(String stock, Date startDate, Date endDate, int dataset) {
return jdbcTemplate.query(SELECT_BY_DATE_RANGE_QRY,
new RequestMapper(), stock, startDate, endDate);
new RequestMapper(), stock, startDate, endDate, dataset);
}
@Autowired
@@ -82,6 +102,7 @@ public class HeadlineDaoImpl implements HeadlineDao {
@Override
public Headline mapRow(ResultSet rs, int arg1) throws SQLException {
Headline headline = new Headline();
headline.setId(rs.getInt("id"));
headline.setText(rs.getString("text"));
headline.setStock(rs.getString("stock"));
headline.setDate(rs.getDate("date"));
@@ -90,4 +111,14 @@ public class HeadlineDaoImpl implements HeadlineDao {
}
}
@Override
public int getCount() {
return jdbcTemplate.queryForInt(COUNT_ALL_QRY);
}
@Override
public int getCount(int dataset) {
return jdbcTemplate.queryForInt(COUNT_DATASET_QRY,dataset);
}
}