Functional data import from Yahoo Finance news using YQL (Yahoo Query Language) and XPATH. Data is stuffed into MySQL database cs6601 on woodyfolsom.net.
This commit is contained in:
@@ -0,0 +1,86 @@
|
||||
package net.woodyfolsom.cs6601.p3.svc;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
import java.text.DateFormat;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import net.woodyfolsom.cs6601.p3.dao.HeadlineDao;
|
||||
import net.woodyfolsom.cs6601.p3.domain.Headline;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@Service
|
||||
public class YahooHeadlineServiceImpl implements HeadlineService {
|
||||
private Log log = LogFactory.getLog(YahooHeadlineServiceImpl.class);
|
||||
|
||||
private static final DateFormat DATE_FORMATTER = new SimpleDateFormat("yyyy-MM-dd");
|
||||
private static final String STORY_DATE_FIELD = "STORY_DATE";
|
||||
private static final String STOCK_SYMBOL_FIELD = "STOCK_SYMBOL";
|
||||
|
||||
private static final String QUERY_URL = "http://query.yahooapis.com/v1/public/yql?q=select%20content%20from%20html%20where%20url%3D%22http%3A%2F%2Ffinance.yahoo.com%2Fq%2Fh%3Fs%3DSTOCK_SYMBOL%26t%3DSTORY_DATE%22%20and%20xpath%3D'%2F%2Fdiv%5B%40class%3D%22mod%20yfi_quote_headline%20withsky%22%5D%2Ful%2Fli%2Fa'&diagnostics=true";
|
||||
|
||||
@Override
|
||||
public int insertHeadline(Headline headline) {
|
||||
throw new UnsupportedOperationException("This implementation does not support inserting headlines.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Headline> getHeadlines(String stock, Date date) {
|
||||
List<Headline> headlineList = new ArrayList<Headline>();
|
||||
try {
|
||||
URL url = new URL(populateQueryURL(stock,date));
|
||||
|
||||
HttpURLConnection connection = (HttpURLConnection)url.openConnection();
|
||||
connection.setRequestMethod("GET");
|
||||
connection.setReadTimeout(10000);
|
||||
connection.connect();
|
||||
|
||||
BufferedReader buf = new BufferedReader(new InputStreamReader(connection.getInputStream()));
|
||||
|
||||
String line;
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
while ((line = buf.readLine()) != null) {
|
||||
sb.append(line);
|
||||
//System.out.println(line);
|
||||
}
|
||||
|
||||
buf.close();
|
||||
|
||||
String xmlResults = sb.toString();
|
||||
|
||||
Pattern pattern = Pattern.compile("<a>.*?</a>");
|
||||
Matcher matcher = pattern.matcher(xmlResults);
|
||||
while (matcher.find()) {
|
||||
System.out.println();
|
||||
String anchorValue = xmlResults.substring(matcher.start()+3,matcher.end()-4);
|
||||
headlineList.add(new Headline(stock,anchorValue,date,1));
|
||||
}
|
||||
} catch (MalformedURLException mue) {
|
||||
log.warn("Caught MalformedURLException: " + mue.getMessage() + ", returning empty Headline list.");
|
||||
} catch (IOException ioe) {
|
||||
log.warn("Caught IOException: " + ioe.getMessage() + ", returning empty Headline list.");
|
||||
}
|
||||
return headlineList;
|
||||
}
|
||||
|
||||
private String populateQueryURL(String stock, Date date) {
|
||||
String formattedDate = DATE_FORMATTER.format(date);
|
||||
return QUERY_URL.replaceAll(STOCK_SYMBOL_FIELD, stock).replaceAll(STORY_DATE_FIELD, formattedDate);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user