Functional data import from Yahoo Finance news using YQL (Yahoo Query Language) and XPATH. Data is stuffed into MySQL database cs6601 on woodyfolsom.net.
This commit is contained in:
98
src/net/woodyfolsom/cs6601/p3/HeadlinePuller.java
Normal file
98
src/net/woodyfolsom/cs6601/p3/HeadlinePuller.java
Normal file
@@ -0,0 +1,98 @@
|
||||
package net.woodyfolsom.cs6601.p3;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.context.ApplicationContext;
|
||||
import org.springframework.context.support.ClassPathXmlApplicationContext;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import net.woodyfolsom.cs6601.p3.domain.Company;
|
||||
import net.woodyfolsom.cs6601.p3.domain.Headline;
|
||||
import net.woodyfolsom.cs6601.p3.svc.HeadlineService;
|
||||
import net.woodyfolsom.cs6601.p3.svc.YahooHeadlineServiceImpl;
|
||||
|
||||
@Component
|
||||
public class HeadlinePuller {
|
||||
private static final File stockSymbolsCSV = new File("stock_symbols.csv");
|
||||
private static final int IO_EXCEPTION = 1;
|
||||
private static final int STOCK_SYMBOL_CSV_NOT_FOUND = 2;
|
||||
|
||||
@Autowired
|
||||
HeadlineService mySQLHeadlineServiceImpl;
|
||||
@Autowired
|
||||
HeadlineService yahooHeadlineServiceImpl;
|
||||
|
||||
public static void main(String... args) {
|
||||
ApplicationContext context=new ClassPathXmlApplicationContext(new String[]{"/AppContext.xml"});
|
||||
HeadlinePuller headlinePuller = context.getBean(HeadlinePuller.class);
|
||||
try {
|
||||
List<Company> fortune50 = headlinePuller.getFortune50(stockSymbolsCSV);
|
||||
for (Company company : fortune50) {
|
||||
System.out.println("Getting headlines for Fortune 50 company #" + company.getId() + " (" + company.getName() + ")...");
|
||||
Date today = new Date();
|
||||
List<Headline> headlines = headlinePuller.pullHeadlines(company.getStockSymbol(), today);
|
||||
headlinePuller.insertHeadlines(company.getStockSymbol(), today, headlines);
|
||||
System.out.println("Waiting 10 seconds to accommodate Yahoo throttling...");
|
||||
try {
|
||||
Thread.sleep(10000L);
|
||||
} catch (InterruptedException ie) {
|
||||
System.out.println("Interrupted while waiting, exiting");
|
||||
System.exit(0);
|
||||
}
|
||||
}
|
||||
} catch (FileNotFoundException fnfe) {
|
||||
System.out.println("Stock symbol CSV file does not exist: "
|
||||
+ stockSymbolsCSV);
|
||||
System.exit(STOCK_SYMBOL_CSV_NOT_FOUND);
|
||||
} catch (IOException ioe) {
|
||||
System.out.println("Stock symbol CSV file does not exist: "
|
||||
+ stockSymbolsCSV);
|
||||
System.exit(IO_EXCEPTION);
|
||||
}
|
||||
}
|
||||
|
||||
private void insertHeadlines(String stockSymbol, Date date, List<Headline> headlines) {
|
||||
for (Headline headline : headlines) {
|
||||
mySQLHeadlineServiceImpl.insertHeadline(headline);
|
||||
}
|
||||
}
|
||||
|
||||
private List<Headline> pullHeadlines(String stockSymbol, Date date) {
|
||||
List<Headline> headlines = yahooHeadlineServiceImpl.getHeadlines(stockSymbol, date);
|
||||
for (Headline headline : headlines) {
|
||||
System.out.println("Got headline: " + headline);
|
||||
}
|
||||
|
||||
return headlines;
|
||||
}
|
||||
|
||||
private List<Company> getFortune50(File csvFile) throws FileNotFoundException,
|
||||
IOException {
|
||||
List<Company> fortune50 = new ArrayList<Company>();
|
||||
FileInputStream fis = new FileInputStream(csvFile);
|
||||
InputStreamReader reader = new InputStreamReader(fis);
|
||||
BufferedReader buf = new BufferedReader(reader);
|
||||
String csvline = null;
|
||||
while ((csvline = buf.readLine()) != null) {
|
||||
if (csvline.length() == 0) {
|
||||
continue;
|
||||
}
|
||||
String[] fields = csvline.split(",");
|
||||
if (fields.length != 3) {
|
||||
throw new RuntimeException("Badly formatted csv file name (3 values expected): " + csvline);
|
||||
}
|
||||
int id = Integer.valueOf(fields[0]);
|
||||
fortune50.add(new Company(id,fields[1],fields[2]));
|
||||
}
|
||||
return fortune50;
|
||||
}
|
||||
}
|
||||
15
src/net/woodyfolsom/cs6601/p3/dao/HeadlineDao.java
Normal file
15
src/net/woodyfolsom/cs6601/p3/dao/HeadlineDao.java
Normal file
@@ -0,0 +1,15 @@
|
||||
package net.woodyfolsom.cs6601.p3.dao;
|
||||
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
|
||||
import net.woodyfolsom.cs6601.p3.domain.Headline;
|
||||
|
||||
public interface HeadlineDao {
|
||||
|
||||
int deleteById(int id);
|
||||
int insert(Headline player);
|
||||
|
||||
Headline select(int id);
|
||||
List<Headline> select(String stock, Date date);
|
||||
}
|
||||
61
src/net/woodyfolsom/cs6601/p3/dao/HeadlineDaoImpl.java
Normal file
61
src/net/woodyfolsom/cs6601/p3/dao/HeadlineDaoImpl.java
Normal file
@@ -0,0 +1,61 @@
|
||||
package net.woodyfolsom.cs6601.p3.dao;
|
||||
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
|
||||
import javax.sql.DataSource;
|
||||
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.springframework.jdbc.core.simple.ParameterizedRowMapper;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
import net.woodyfolsom.cs6601.p3.domain.Headline;
|
||||
|
||||
@Repository
|
||||
public class HeadlineDaoImpl implements HeadlineDao {
|
||||
private static final String DELETE_BY_ID_STMT = "DELETE from headlines WHERE id = ?";
|
||||
|
||||
private static final String INSERT_STMT = "INSERT INTO headlines (text, date, stock, dataset) values (?, ?, ?, ?)";
|
||||
|
||||
private static final String SELECT_BY_ID_QRY = "SELECT * from headlines WHERE id = ?";
|
||||
private static final String SELECT_BY_STOCK_QRY = "SELECT * from headlines WHERE stock = ? AND date = ?";
|
||||
|
||||
private JdbcTemplate jdbcTemplate;
|
||||
|
||||
public int deleteById(int headlineId) {
|
||||
return jdbcTemplate.update(DELETE_BY_ID_STMT,
|
||||
new RequestMapper(), headlineId);
|
||||
}
|
||||
|
||||
public int insert(Headline headline) {
|
||||
return jdbcTemplate.update(INSERT_STMT, headline.getText(), headline.getDate(), headline.getStock(), headline.getDataset());
|
||||
}
|
||||
|
||||
public Headline select(int headlineId) {
|
||||
return jdbcTemplate.queryForObject(SELECT_BY_ID_QRY,
|
||||
new RequestMapper(), headlineId);
|
||||
}
|
||||
|
||||
public List<Headline> select(String stock, Date date) {
|
||||
return jdbcTemplate.query(SELECT_BY_STOCK_QRY,
|
||||
new RequestMapper(), stock, date);
|
||||
}
|
||||
|
||||
@Autowired
|
||||
public void createTemplate(DataSource dataSource) {
|
||||
this.jdbcTemplate = new JdbcTemplate(dataSource);
|
||||
}
|
||||
|
||||
private class RequestMapper implements ParameterizedRowMapper<Headline> {
|
||||
|
||||
@Override
|
||||
public Headline mapRow(ResultSet rs, int arg1) throws SQLException {
|
||||
Headline headline = new Headline();
|
||||
return headline;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
32
src/net/woodyfolsom/cs6601/p3/domain/Company.java
Normal file
32
src/net/woodyfolsom/cs6601/p3/domain/Company.java
Normal file
@@ -0,0 +1,32 @@
|
||||
package net.woodyfolsom.cs6601.p3.domain;
|
||||
|
||||
public class Company {
|
||||
private int id;
|
||||
private String name;
|
||||
private String stockSymbol;
|
||||
|
||||
public Company(int id, String name, String stockSymbol) {
|
||||
this.id = id;
|
||||
this.name = name;
|
||||
this.stockSymbol = stockSymbol;
|
||||
}
|
||||
|
||||
public int getId() {
|
||||
return id;
|
||||
}
|
||||
public void setId(int id) {
|
||||
this.id = id;
|
||||
}
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
public String getStockSymbol() {
|
||||
return stockSymbol;
|
||||
}
|
||||
public void setStockSymbol(String stockSymbol) {
|
||||
this.stockSymbol = stockSymbol;
|
||||
}
|
||||
}
|
||||
68
src/net/woodyfolsom/cs6601/p3/domain/Headline.java
Normal file
68
src/net/woodyfolsom/cs6601/p3/domain/Headline.java
Normal file
@@ -0,0 +1,68 @@
|
||||
package net.woodyfolsom.cs6601.p3.domain;
|
||||
|
||||
import java.util.Date;
|
||||
|
||||
|
||||
public class Headline {
|
||||
public Headline() {
|
||||
}
|
||||
|
||||
private int dataset;
|
||||
private int id;
|
||||
private Date date;
|
||||
private String stock;
|
||||
private String text;
|
||||
|
||||
public Headline(String stock, String text, Date date, int dataset) {
|
||||
this.stock = stock;
|
||||
this.text = text;
|
||||
this.date = date;
|
||||
this.dataset = dataset;
|
||||
}
|
||||
|
||||
public int getDataset() {
|
||||
return dataset;
|
||||
}
|
||||
|
||||
public void setDataset(int dataset) {
|
||||
this.dataset = dataset;
|
||||
}
|
||||
|
||||
public int getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(int id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public Date getDate() {
|
||||
return date;
|
||||
}
|
||||
|
||||
public void setDate(Date date) {
|
||||
this.date = date;
|
||||
}
|
||||
|
||||
public String getStock() {
|
||||
return stock;
|
||||
}
|
||||
|
||||
public void setStock(String stock) {
|
||||
this.stock = stock;
|
||||
}
|
||||
|
||||
public String getText() {
|
||||
return text;
|
||||
}
|
||||
|
||||
public void setText(String text) {
|
||||
this.text = text;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return text;
|
||||
}
|
||||
|
||||
}
|
||||
11
src/net/woodyfolsom/cs6601/p3/svc/HeadlineService.java
Normal file
11
src/net/woodyfolsom/cs6601/p3/svc/HeadlineService.java
Normal file
@@ -0,0 +1,11 @@
|
||||
package net.woodyfolsom.cs6601.p3.svc;
|
||||
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
|
||||
import net.woodyfolsom.cs6601.p3.domain.Headline;
|
||||
|
||||
public interface HeadlineService {
|
||||
int insertHeadline(Headline headline);
|
||||
List<Headline> getHeadlines(String stock, Date date);
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
package net.woodyfolsom.cs6601.p3.svc;
|
||||
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
|
||||
import net.woodyfolsom.cs6601.p3.dao.HeadlineDao;
|
||||
import net.woodyfolsom.cs6601.p3.domain.Headline;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@Service
|
||||
public class MySQLHeadlineServiceImpl implements HeadlineService {
|
||||
private Log log = LogFactory.getLog(MySQLHeadlineServiceImpl.class);
|
||||
|
||||
@Autowired
|
||||
private HeadlineDao headlineDao;
|
||||
|
||||
@Override
|
||||
public int insertHeadline(Headline headline) {
|
||||
return headlineDao.insert(headline);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Headline> getHeadlines(String stock, Date date) {
|
||||
return headlineDao.select(stock, date);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
package net.woodyfolsom.cs6601.p3.svc;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
import java.text.DateFormat;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import net.woodyfolsom.cs6601.p3.dao.HeadlineDao;
|
||||
import net.woodyfolsom.cs6601.p3.domain.Headline;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@Service
|
||||
public class YahooHeadlineServiceImpl implements HeadlineService {
|
||||
private Log log = LogFactory.getLog(YahooHeadlineServiceImpl.class);
|
||||
|
||||
private static final DateFormat DATE_FORMATTER = new SimpleDateFormat("yyyy-MM-dd");
|
||||
private static final String STORY_DATE_FIELD = "STORY_DATE";
|
||||
private static final String STOCK_SYMBOL_FIELD = "STOCK_SYMBOL";
|
||||
|
||||
private static final String QUERY_URL = "http://query.yahooapis.com/v1/public/yql?q=select%20content%20from%20html%20where%20url%3D%22http%3A%2F%2Ffinance.yahoo.com%2Fq%2Fh%3Fs%3DSTOCK_SYMBOL%26t%3DSTORY_DATE%22%20and%20xpath%3D'%2F%2Fdiv%5B%40class%3D%22mod%20yfi_quote_headline%20withsky%22%5D%2Ful%2Fli%2Fa'&diagnostics=true";
|
||||
|
||||
@Override
|
||||
public int insertHeadline(Headline headline) {
|
||||
throw new UnsupportedOperationException("This implementation does not support inserting headlines.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Headline> getHeadlines(String stock, Date date) {
|
||||
List<Headline> headlineList = new ArrayList<Headline>();
|
||||
try {
|
||||
URL url = new URL(populateQueryURL(stock,date));
|
||||
|
||||
HttpURLConnection connection = (HttpURLConnection)url.openConnection();
|
||||
connection.setRequestMethod("GET");
|
||||
connection.setReadTimeout(10000);
|
||||
connection.connect();
|
||||
|
||||
BufferedReader buf = new BufferedReader(new InputStreamReader(connection.getInputStream()));
|
||||
|
||||
String line;
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
while ((line = buf.readLine()) != null) {
|
||||
sb.append(line);
|
||||
//System.out.println(line);
|
||||
}
|
||||
|
||||
buf.close();
|
||||
|
||||
String xmlResults = sb.toString();
|
||||
|
||||
Pattern pattern = Pattern.compile("<a>.*?</a>");
|
||||
Matcher matcher = pattern.matcher(xmlResults);
|
||||
while (matcher.find()) {
|
||||
System.out.println();
|
||||
String anchorValue = xmlResults.substring(matcher.start()+3,matcher.end()-4);
|
||||
headlineList.add(new Headline(stock,anchorValue,date,1));
|
||||
}
|
||||
} catch (MalformedURLException mue) {
|
||||
log.warn("Caught MalformedURLException: " + mue.getMessage() + ", returning empty Headline list.");
|
||||
} catch (IOException ioe) {
|
||||
log.warn("Caught IOException: " + ioe.getMessage() + ", returning empty Headline list.");
|
||||
}
|
||||
return headlineList;
|
||||
}
|
||||
|
||||
private String populateQueryURL(String stock, Date date) {
|
||||
String formattedDate = DATE_FORMATTER.format(date);
|
||||
return QUERY_URL.replaceAll(STOCK_SYMBOL_FIELD, stock).replaceAll(STORY_DATE_FIELD, formattedDate);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user