HeadlinePuller can import all news articles for Fortune 50 within a date range from Yahoo Finance historical data, inserting the headline text into the headlines table of database cs6601p3 on woodyfolsom.net:3306.
Limited to 25 headline per day (with many reptitions) per Yahoo Finance REST functionality.
This commit is contained in:
@@ -1,28 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<beans xmlns="http://www.springframework.org/schema/beans"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns:aop="http://www.springframework.org/schema/aop"
|
||||
xmlns:context="http://www.springframework.org/schema/context"
|
||||
xsi:schemaLocation="http://www.springframework.org/schema/beans
|
||||
http://www.springframework.org/schema/beans/spring-beans-2.5.xsd
|
||||
http://www.springframework.org/schema/aop
|
||||
http://www.springframework.org/schema/aop/spring-aop-2.5.xsd
|
||||
http://www.springframework.org/schema/context
|
||||
http://www.springframework.org/schema/context/spring-context-2.5.xsd"
|
||||
default-autowire="byName">
|
||||
|
||||
<bean id="dmdataSource"
|
||||
class="org.springframework.jdbc.datasource.DriverManagerDataSource">
|
||||
<property name="driverClassName" value="com.mysql.jdbc.Driver" />
|
||||
<property name="url" value="jdbc:mysql://woodyfolsom.net:3306/cs6601p3" />
|
||||
<property name="username" value="cs6601" />
|
||||
<property name="password" value="n0nst@p" />
|
||||
</bean>
|
||||
|
||||
<bean id="mySQLHeadlineSvc" class="net.woodyfolsom.cs6601.p3.svc.MySQLHeadlineServiceImpl" />
|
||||
<bean id="yahooHeadlineSvc" class="net.woodyfolsom.cs6601.p3.svc.YahooHeadlineServiceImpl" />
|
||||
|
||||
<context:annotation-config />
|
||||
<context:component-scan base-package="net.woodyfolsom.cs6601.p3"/>
|
||||
|
||||
</beans>
|
||||
@@ -6,47 +6,119 @@ import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.charset.Charset;
|
||||
import java.text.DateFormat;
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.context.ApplicationContext;
|
||||
import org.springframework.context.support.ClassPathXmlApplicationContext;
|
||||
import org.springframework.context.support.FileSystemXmlApplicationContext;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import net.woodyfolsom.cs6601.p3.domain.Company;
|
||||
import net.woodyfolsom.cs6601.p3.domain.Headline;
|
||||
import net.woodyfolsom.cs6601.p3.svc.HeadlineService;
|
||||
import net.woodyfolsom.cs6601.p3.svc.YahooHeadlineServiceImpl;
|
||||
|
||||
@Component
|
||||
public class HeadlinePuller {
|
||||
private static final File stockSymbolsCSV = new File("stock_symbols.csv");
|
||||
private static final int IO_EXCEPTION = 1;
|
||||
private static final int STOCK_SYMBOL_CSV_NOT_FOUND = 2;
|
||||
|
||||
private static final int INVALID_END_DATE = 1;
|
||||
private static final int INVALID_MODE = 2;
|
||||
private static final int INVALID_START_DATE = 3;
|
||||
private static final int IO_EXCEPTION = 4;
|
||||
private static final int NO_ARGS = 5;
|
||||
private static final int STOCK_SYMBOL_CSV_NOT_FOUND = 6;
|
||||
|
||||
@Autowired
|
||||
HeadlineService mySQLHeadlineServiceImpl;
|
||||
@Autowired
|
||||
HeadlineService yahooHeadlineServiceImpl;
|
||||
|
||||
|
||||
private static void printUsage() {
|
||||
System.out
|
||||
.println("Usage: java -jar cs6601p3.jar [insert|delete] mm/dd/yyyy-mm/dd/yyyy");
|
||||
}
|
||||
|
||||
private enum MODE {
|
||||
insert, invalid, delete
|
||||
}
|
||||
|
||||
public static void main(String... args) {
|
||||
ApplicationContext context=new ClassPathXmlApplicationContext(new String[]{"/AppContext.xml"});
|
||||
HeadlinePuller headlinePuller = context.getBean(HeadlinePuller.class);
|
||||
try {
|
||||
List<Company> fortune50 = headlinePuller.getFortune50(stockSymbolsCSV);
|
||||
for (Company company : fortune50) {
|
||||
System.out.println("Getting headlines for Fortune 50 company #" + company.getId() + " (" + company.getName() + ")...");
|
||||
Date today = new Date();
|
||||
List<Headline> headlines = headlinePuller.pullHeadlines(company.getStockSymbol(), today);
|
||||
headlinePuller.insertHeadlines(company.getStockSymbol(), today, headlines);
|
||||
System.out.println("Waiting 10 seconds to accommodate Yahoo throttling...");
|
||||
try {
|
||||
Thread.sleep(10000L);
|
||||
} catch (InterruptedException ie) {
|
||||
System.out.println("Interrupted while waiting, exiting");
|
||||
MODE mode = MODE.invalid;
|
||||
if (args.length != 2) {
|
||||
printUsage();
|
||||
System.exit(NO_ARGS);
|
||||
} else {
|
||||
try {
|
||||
mode = MODE.valueOf(args[0]);
|
||||
} catch (Exception ex) {
|
||||
System.out.println("Invalid mode: " + args[0]);
|
||||
}
|
||||
}
|
||||
|
||||
if (mode == MODE.invalid) {
|
||||
System.exit(INVALID_MODE);
|
||||
}
|
||||
|
||||
if (mode == MODE.delete) {
|
||||
System.out.println("Mode = delete. All data will be purged from HEADLINES table. Continue? [y/n]");
|
||||
byte[] buf = new byte[10];
|
||||
try {
|
||||
int read = System.in.read(buf,0,10);
|
||||
String conf = new String(buf,0,read,Charset.defaultCharset());
|
||||
System.out.println("CONF = '" + conf +"'");
|
||||
if (conf.charAt(0) == 'y') {
|
||||
System.out.println("Delete mode confirmed. Continuing...");
|
||||
System.exit(0);
|
||||
} else {
|
||||
System.out.println("Delete mode cancelled.");
|
||||
System.exit(0);
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
System.exit(IO_EXCEPTION);
|
||||
}
|
||||
}
|
||||
String[] dateFields = args[1].split("-");
|
||||
DateFormat dateFormat = new SimpleDateFormat("MM/dd/yyyy");
|
||||
Date startDate = null;
|
||||
try {
|
||||
startDate = dateFormat.parse(dateFields[0]);
|
||||
} catch (ParseException pe) {
|
||||
System.out.println("Invalid start date: " + dateFields[0]);
|
||||
System.exit(INVALID_START_DATE);
|
||||
}
|
||||
Date endDate = null;
|
||||
try {
|
||||
endDate = dateFormat.parse(dateFields[1]);
|
||||
} catch (ParseException pe) {
|
||||
System.out.println("Invalid end date: " + dateFields[0]);
|
||||
System.exit(INVALID_END_DATE);
|
||||
}
|
||||
|
||||
ApplicationContext context = new FileSystemXmlApplicationContext(
|
||||
new String[] { "AppContext.xml" });
|
||||
HeadlinePuller headlinePuller = context.getBean(HeadlinePuller.class);
|
||||
Calendar calendar = Calendar.getInstance();
|
||||
try {
|
||||
List<Company> fortune50 = headlinePuller
|
||||
.getFortune50(stockSymbolsCSV);
|
||||
for (Company company : fortune50) {
|
||||
System.out.println("Getting headlines for Fortune 50 company #"
|
||||
+ company.getId() + " (" + company.getName() + ")...");
|
||||
Date today;
|
||||
for (calendar.setTime(startDate); (today = calendar.getTime())
|
||||
.compareTo(endDate) <= 0; calendar
|
||||
.add(Calendar.DATE, 1)) {
|
||||
List<Headline> headlines = headlinePuller.pullHeadlines(
|
||||
company.getStockSymbol(), today);
|
||||
int[] updates = headlinePuller.mySQLHeadlineServiceImpl.insertHeadlines(headlines);
|
||||
System.out.println(updates.length + " rows updated");
|
||||
}
|
||||
}
|
||||
} catch (FileNotFoundException fnfe) {
|
||||
@@ -60,23 +132,15 @@ public class HeadlinePuller {
|
||||
}
|
||||
}
|
||||
|
||||
private void insertHeadlines(String stockSymbol, Date date, List<Headline> headlines) {
|
||||
for (Headline headline : headlines) {
|
||||
mySQLHeadlineServiceImpl.insertHeadline(headline);
|
||||
}
|
||||
}
|
||||
|
||||
private List<Headline> pullHeadlines(String stockSymbol, Date date) {
|
||||
List<Headline> headlines = yahooHeadlineServiceImpl.getHeadlines(stockSymbol, date);
|
||||
for (Headline headline : headlines) {
|
||||
System.out.println("Got headline: " + headline);
|
||||
}
|
||||
|
||||
List<Headline> headlines = yahooHeadlineServiceImpl.getHeadlines(
|
||||
stockSymbol, date);
|
||||
System.out.println("Pulled " + headlines.size() + " headlines for " + stockSymbol + " on " + date);
|
||||
return headlines;
|
||||
}
|
||||
|
||||
private List<Company> getFortune50(File csvFile) throws FileNotFoundException,
|
||||
IOException {
|
||||
|
||||
private List<Company> getFortune50(File csvFile)
|
||||
throws FileNotFoundException, IOException {
|
||||
List<Company> fortune50 = new ArrayList<Company>();
|
||||
FileInputStream fis = new FileInputStream(csvFile);
|
||||
InputStreamReader reader = new InputStreamReader(fis);
|
||||
@@ -88,10 +152,12 @@ public class HeadlinePuller {
|
||||
}
|
||||
String[] fields = csvline.split(",");
|
||||
if (fields.length != 3) {
|
||||
throw new RuntimeException("Badly formatted csv file name (3 values expected): " + csvline);
|
||||
throw new RuntimeException(
|
||||
"Badly formatted csv file name (3 values expected): "
|
||||
+ csvline);
|
||||
}
|
||||
int id = Integer.valueOf(fields[0]);
|
||||
fortune50.add(new Company(id,fields[1],fields[2]));
|
||||
fortune50.add(new Company(id, fields[1], fields[2]));
|
||||
}
|
||||
return fortune50;
|
||||
}
|
||||
|
||||
@@ -8,7 +8,8 @@ import net.woodyfolsom.cs6601.p3.domain.Headline;
|
||||
public interface HeadlineDao {
|
||||
|
||||
int deleteById(int id);
|
||||
int insert(Headline player);
|
||||
int insert(Headline headline);
|
||||
int[] insertBatch(List<Headline> headlines);
|
||||
|
||||
Headline select(int id);
|
||||
List<Headline> select(String stock, Date date);
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
package net.woodyfolsom.cs6601.p3.dao;
|
||||
|
||||
import java.sql.PreparedStatement;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.util.Date;
|
||||
@@ -8,6 +9,7 @@ import java.util.List;
|
||||
import javax.sql.DataSource;
|
||||
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.jdbc.core.BatchPreparedStatementSetter;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.springframework.jdbc.core.simple.ParameterizedRowMapper;
|
||||
import org.springframework.stereotype.Repository;
|
||||
@@ -34,6 +36,26 @@ public class HeadlineDaoImpl implements HeadlineDao {
|
||||
return jdbcTemplate.update(INSERT_STMT, headline.getText(), headline.getDate(), headline.getStock(), headline.getDataset());
|
||||
}
|
||||
|
||||
public int[] insertBatch(final List<Headline> headlines){
|
||||
|
||||
return jdbcTemplate.batchUpdate(INSERT_STMT, new BatchPreparedStatementSetter() {
|
||||
|
||||
@Override
|
||||
public void setValues(PreparedStatement ps, int i) throws SQLException {
|
||||
Headline headline = headlines.get(i);
|
||||
ps.setString(1, headline.getText());
|
||||
ps.setDate(2, new java.sql.Date(headline.getDate().getTime()));
|
||||
ps.setString(3, headline.getStock() );
|
||||
ps.setInt(4, headline.getDataset() );
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBatchSize() {
|
||||
return headlines.size();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public Headline select(int headlineId) {
|
||||
return jdbcTemplate.queryForObject(SELECT_BY_ID_QRY,
|
||||
new RequestMapper(), headlineId);
|
||||
@@ -54,6 +76,10 @@ public class HeadlineDaoImpl implements HeadlineDao {
|
||||
@Override
|
||||
public Headline mapRow(ResultSet rs, int arg1) throws SQLException {
|
||||
Headline headline = new Headline();
|
||||
headline.setText(rs.getString("text"));
|
||||
headline.setStock(rs.getString("stock"));
|
||||
headline.setDate(rs.getDate("date"));
|
||||
headline.setDataset(rs.getInt("dataset"));
|
||||
return headline;
|
||||
}
|
||||
|
||||
|
||||
@@ -7,5 +7,6 @@ import net.woodyfolsom.cs6601.p3.domain.Headline;
|
||||
|
||||
public interface HeadlineService {
|
||||
int insertHeadline(Headline headline);
|
||||
int[] insertHeadlines(List<Headline> headline);
|
||||
List<Headline> getHeadlines(String stock, Date date);
|
||||
}
|
||||
@@ -23,6 +23,11 @@ public class MySQLHeadlineServiceImpl implements HeadlineService {
|
||||
return headlineDao.insert(headline);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] insertHeadlines(List<Headline> headlines) {
|
||||
return headlineDao.insertBatch(headlines);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Headline> getHeadlines(String stock, Date date) {
|
||||
return headlineDao.select(stock, date);
|
||||
|
||||
@@ -31,13 +31,18 @@ public class YahooHeadlineServiceImpl implements HeadlineService {
|
||||
private static final String STORY_DATE_FIELD = "STORY_DATE";
|
||||
private static final String STOCK_SYMBOL_FIELD = "STOCK_SYMBOL";
|
||||
|
||||
private static final String QUERY_URL = "http://query.yahooapis.com/v1/public/yql?q=select%20content%20from%20html%20where%20url%3D%22http%3A%2F%2Ffinance.yahoo.com%2Fq%2Fh%3Fs%3DSTOCK_SYMBOL%26t%3DSTORY_DATE%22%20and%20xpath%3D'%2F%2Fdiv%5B%40class%3D%22mod%20yfi_quote_headline%20withsky%22%5D%2Ful%2Fli%2Fa'&diagnostics=true";
|
||||
private static final String QUERY_URL = "http://query.yahooapis.com/v1/public/yql?q=select%20content%20from%20html%20where%20url%3D%22http%3A%2F%2Ffinance.yahoo.com%2Fq%2Fh%3Fs%3DSTOCK_SYMBOL%26t%3DSTORY_DATE%22%20and%20xpath%3D'%2F%2Fdiv%5B%40class%3D%22mod%20yfi_quote_headline%20withsky%22%5D%2Ful%2Fli%2Fa'";
|
||||
|
||||
@Override
|
||||
public int insertHeadline(Headline headline) {
|
||||
throw new UnsupportedOperationException("This implementation does not support inserting headlines.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] insertHeadlines(List<Headline> headline) {
|
||||
throw new UnsupportedOperationException("This implementation does not support inserting headlines.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Headline> getHeadlines(String stock, Date date) {
|
||||
List<Headline> headlineList = new ArrayList<Headline>();
|
||||
@@ -57,7 +62,6 @@ public class YahooHeadlineServiceImpl implements HeadlineService {
|
||||
|
||||
while ((line = buf.readLine()) != null) {
|
||||
sb.append(line);
|
||||
//System.out.println(line);
|
||||
}
|
||||
|
||||
buf.close();
|
||||
@@ -67,7 +71,6 @@ public class YahooHeadlineServiceImpl implements HeadlineService {
|
||||
Pattern pattern = Pattern.compile("<a>.*?</a>");
|
||||
Matcher matcher = pattern.matcher(xmlResults);
|
||||
while (matcher.find()) {
|
||||
System.out.println();
|
||||
String anchorValue = xmlResults.substring(matcher.start()+3,matcher.end()-4);
|
||||
headlineList.add(new Headline(stock,anchorValue,date,1));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user