Functional data import from Yahoo Finance news using YQL (Yahoo Query Language) and XPATH. Data is stuffed into MySQL database cs6601 on woodyfolsom.net.

This commit is contained in:
Woody Folsom
2012-04-07 18:59:39 -04:00
parent a46e790059
commit d700d97124
46 changed files with 610 additions and 482 deletions

25
.classpath Normal file
View File

@@ -0,0 +1,25 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" path="src"/>
<classpathentry kind="src" path="test"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="lib" path="lib/junit-4.10.jar"/>
<classpathentry kind="lib" path="lib/org.springframework.core-3.1.1.RELEASE.jar"/>
<classpathentry kind="lib" path="lib/spring-data-jdbc-core-1.0.0.RC1.jar"/>
<classpathentry kind="lib" path="lib/mysql-connector-java-5.1.18-bin.jar"/>
<classpathentry kind="lib" path="lib/aopalliance.jar"/>
<classpathentry kind="lib" path="lib/aspectj-1.6.12.jar"/>
<classpathentry kind="lib" path="lib/aspectjweaver-1.6.8.jar"/>
<classpathentry kind="lib" path="lib/org.springframework.aop-3.1.1.RELEASE.jar"/>
<classpathentry kind="lib" path="lib/org.springframework.asm-3.1.1.RELEASE.jar"/>
<classpathentry kind="lib" path="lib/org.springframework.aspects-3.1.1.RELEASE.jar"/>
<classpathentry kind="lib" path="lib/org.springframework.beans-3.1.1.RELEASE.jar"/>
<classpathentry kind="lib" path="lib/org.springframework.context-3.1.1.RELEASE.jar"/>
<classpathentry kind="lib" path="lib/org.springframework.context.support-3.1.1.RELEASE.jar"/>
<classpathentry kind="lib" path="lib/org.springframework.jdbc-3.1.1.RELEASE.jar"/>
<classpathentry kind="lib" path="lib/org.springframework.transaction-3.1.1.RELEASE.jar"/>
<classpathentry kind="lib" path="lib/commons-logging-1.1.1.jar"/>
<classpathentry kind="lib" path="lib/log4j-1.2.16.jar"/>
<classpathentry kind="lib" path="lib/org.springframework.expression-3.1.1.RELEASE.jar"/>
<classpathentry kind="output" path="bin"/>
</classpath>

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
bin

17
.project Normal file
View File

@@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>cs6601p3</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jdt.core.javanature</nature>
</natures>
</projectDescription>

Binary file not shown.

Binary file not shown.

View File

@@ -1,5 +0,0 @@
\relax
\citation{russelnorvig}
\bibstyle{plain}
\bibdata{p3refs}
\bibcite{russelnorvig}{1}

View File

@@ -1,8 +0,0 @@
\begin{thebibliography}{1}
\bibitem{russelnorvig}
S.~Russel and P.~Norvig.
\newblock {\em Artificial Intelligence; A Modern Approach}.
\newblock Prentice Hall, third edition, 2010.
\end{thebibliography}

View File

@@ -1,3 +0,0 @@
This is BibTeX, Version 0.99dThe top-level auxiliary file: P3 Proposal.aux
The style file: plain.bst
Database file #1: p3refs.bib

View File

@@ -1,264 +0,0 @@
This is pdfTeX, Version 3.1415926-2.3-1.40.12 (MiKTeX 2.9) (preloaded format=pdflatex 2012.1.11) 1 APR 2012 20:11
entering extended mode
**C:/Users/Woody/Desktop/GaTechMS/Spring*2012/CS6601*AI/Project*3/P3*Proposal.t
ex
("C:/Users/Woody/Desktop/GaTechMS/Spring 2012/CS6601 AI/Project 3/P3 Proposal.t
ex"
LaTeX2e <2011/06/27>
Babel <v3.8m> and hyphenation patterns for english, afrikaans, ancientgreek, ar
abic, armenian, assamese, basque, bengali, bokmal, bulgarian, catalan, coptic,
croatian, czech, danish, dutch, esperanto, estonian, farsi, finnish, french, ga
lician, german, german-x-2009-06-19, greek, gujarati, hindi, hungarian, iceland
ic, indonesian, interlingua, irish, italian, kannada, kurmanji, lao, latin, lat
vian, lithuanian, malayalam, marathi, mongolian, mongolianlmc, monogreek, ngerm
an, ngerman-x-2009-06-19, nynorsk, oriya, panjabi, pinyin, polish, portuguese,
romanian, russian, sanskrit, serbian, slovak, slovenian, spanish, swedish, swis
sgerman, tamil, telugu, turkish, turkmen, ukenglish, ukrainian, uppersorbian, u
senglishmax, welsh, loaded.
("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\base\article.cls"
Document Class: article 2007/10/19 v1.4h Standard LaTeX document class
("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\base\size10.clo"
File: size10.clo 2007/10/19 v1.4h Standard LaTeX file (size option)
)
\c@part=\count79
\c@section=\count80
\c@subsection=\count81
\c@subsubsection=\count82
\c@paragraph=\count83
\c@subparagraph=\count84
\c@figure=\count85
\c@table=\count86
\abovecaptionskip=\skip41
\belowcaptionskip=\skip42
\bibindent=\dimen102
)
("C:\Users\Woody\Desktop\GaTechMS\Spring 2012\CS6601 AI\Project 3\latex8.sty"
IEEE 8.5 x 11-Inch Proceedings Style `latex8.sty'.
\@ctmp=\skip43
\@figindent=\skip44
) ("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\titlesec\titlesec.sty"
Package: titlesec 2011/12/15 v2.10.0 Sectioning titles
\ttl@box=\box26
\beforetitleunit=\skip45
\aftertitleunit=\skip46
\ttl@plus=\dimen103
\ttl@minus=\dimen104
\ttl@toksa=\toks14
\titlewidth=\dimen105
\titlewidthlast=\dimen106
\titlewidthfirst=\dimen107
)
("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\geometry\geometry.sty"
Package: geometry 2010/09/12 v5.6 Page Geometry
("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\graphics\keyval.sty"
Package: keyval 1999/03/16 v1.13 key=value parser (DPC)
\KV@toks@=\toks15
)
("C:\Program Files (x86)\MiKTeX 2.9\tex\generic\oberdiek\ifpdf.sty"
Package: ifpdf 2011/01/30 v2.3 Provides the ifpdf switch (HO)
Package ifpdf Info: pdfTeX in PDF mode is detected.
)
("C:\Program Files (x86)\MiKTeX 2.9\tex\generic\oberdiek\ifvtex.sty"
Package: ifvtex 2010/03/01 v1.5 Switches for detecting VTeX and its modes (HO)
Package ifvtex Info: VTeX not detected.
)
("C:\Program Files (x86)\MiKTeX 2.9\tex\generic\ifxetex\ifxetex.sty"
Package: ifxetex 2010/09/12 v0.6 Provides ifxetex conditional
)
\Gm@cnth=\count87
\Gm@cntv=\count88
\c@Gm@tempcnt=\count89
\Gm@bindingoffset=\dimen108
\Gm@wd@mp=\dimen109
\Gm@odd@mp=\dimen110
\Gm@even@mp=\dimen111
\Gm@layoutwidth=\dimen112
\Gm@layoutheight=\dimen113
\Gm@layouthoffset=\dimen114
\Gm@layoutvoffset=\dimen115
\Gm@dimlist=\toks16
("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\geometry\geometry.cfg"))
("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\graphics\graphicx.sty"
Package: graphicx 1999/02/16 v1.0f Enhanced LaTeX Graphics (DPC,SPQR)
("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\graphics\graphics.sty"
Package: graphics 2009/02/05 v1.0o Standard LaTeX Graphics (DPC,SPQR)
("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\graphics\trig.sty"
Package: trig 1999/03/16 v1.09 sin cos tan (DPC)
)
("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\00miktex\graphics.cfg"
File: graphics.cfg 2007/01/18 v1.5 graphics configuration of teTeX/TeXLive
)
Package graphics Info: Driver file: pdftex.def on input line 91.
("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\pdftex-def\pdftex.def"
File: pdftex.def 2011/05/27 v0.06d Graphics/color for pdfTeX
("C:\Program Files (x86)\MiKTeX 2.9\tex\generic\oberdiek\infwarerr.sty"
Package: infwarerr 2010/04/08 v1.3 Providing info/warning/message (HO)
)
("C:\Program Files (x86)\MiKTeX 2.9\tex\generic\oberdiek\ltxcmds.sty"
Package: ltxcmds 2011/04/18 v1.20 LaTeX kernel commands for general use (HO)
)
\Gread@gobject=\count90
))
\Gin@req@height=\dimen116
\Gin@req@width=\dimen117
)
("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\ams\math\amsmath.sty"
Package: amsmath 2000/07/18 v2.13 AMS math features
\@mathmargin=\skip47
For additional information on amsmath, use the `?' option.
("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\ams\math\amstext.sty"
Package: amstext 2000/06/29 v2.01
("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\ams\math\amsgen.sty"
File: amsgen.sty 1999/11/30 v2.0
\@emptytoks=\toks17
\ex@=\dimen118
))
("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\ams\math\amsbsy.sty"
Package: amsbsy 1999/11/29 v1.2d
\pmbraise@=\dimen119
)
("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\ams\math\amsopn.sty"
Package: amsopn 1999/12/14 v2.01 operator names
)
\inf@bad=\count91
LaTeX Info: Redefining \frac on input line 211.
\uproot@=\count92
\leftroot@=\count93
LaTeX Info: Redefining \overline on input line 307.
\classnum@=\count94
\DOTSCASE@=\count95
LaTeX Info: Redefining \ldots on input line 379.
LaTeX Info: Redefining \dots on input line 382.
LaTeX Info: Redefining \cdots on input line 467.
\Mathstrutbox@=\box27
\strutbox@=\box28
\big@size=\dimen120
LaTeX Font Info: Redeclaring font encoding OML on input line 567.
LaTeX Font Info: Redeclaring font encoding OMS on input line 568.
\macc@depth=\count96
\c@MaxMatrixCols=\count97
\dotsspace@=\muskip10
\c@parentequation=\count98
\dspbrk@lvl=\count99
\tag@help=\toks18
\row@=\count100
\column@=\count101
\maxfields@=\count102
\andhelp@=\toks19
\eqnshift@=\dimen121
\alignsep@=\dimen122
\tagshift@=\dimen123
\tagwidth@=\dimen124
\totwidth@=\dimen125
\lineht@=\dimen126
\@envbody=\toks20
\multlinegap=\skip48
\multlinetaggap=\skip49
\mathdisplay@stack=\toks21
LaTeX Info: Redefining \[ on input line 2666.
LaTeX Info: Redefining \] on input line 2667.
)
LaTeX Warning: Unused global option(s):
[times,08pt].
("C:\Users\Woody\Desktop\GaTechMS\Spring 2012\CS6601 AI\Project 3\P3 Proposal.a
ux")
LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 11.
LaTeX Font Info: ... okay on input line 11.
LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 11.
LaTeX Font Info: ... okay on input line 11.
LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 11.
LaTeX Font Info: ... okay on input line 11.
LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 11.
LaTeX Font Info: ... okay on input line 11.
LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 11.
LaTeX Font Info: ... okay on input line 11.
LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 11.
LaTeX Font Info: ... okay on input line 11.
*geometry* driver: auto-detecting
*geometry* detected driver: pdftex
*geometry* verbose mode - [ preamble ] result:
* driver: pdftex
* paper: <default>
* layout: <same size as paper>
* layoutoffset:(h,v)=(0.0pt,0.0pt)
* modes:
* h-part:(L,W,R)=(50.58878pt, 513.11745pt, 50.58878pt)
* v-part:(T,H,B)=(50.58878pt, 693.79243pt, 50.58878pt)
* \paperwidth=614.295pt
* \paperheight=794.96999pt
* \textwidth=513.11745pt
* \textheight=693.79243pt
* \oddsidemargin=-21.68121pt
* \evensidemargin=-21.68121pt
* \topmargin=-21.68121pt
* \headheight=0.0pt
* \headsep=0.0pt
* \topskip=10.0pt
* \footskip=30.0pt
* \marginparwidth=4.0pt
* \marginparsep=10.0pt
* \columnsep=22.58437pt
* \skip\footins=9.0pt plus 4.0pt minus 2.0pt
* \hoffset=0.0pt
* \voffset=0.0pt
* \mag=1000
* \@twocolumntrue
* \@twosidefalse
* \@mparswitchfalse
* \@reversemarginfalse
* (1in=72.27pt=25.4mm, 1cm=28.453pt)
("C:\Program Files (x86)\MiKTeX 2.9\tex\context\base\supp-pdf.mkii"
[Loading MPS to PDF converter (version 2006.09.02).]
\scratchcounter=\count103
\scratchdimen=\dimen127
\scratchbox=\box29
\nofMPsegments=\count104
\nofMParguments=\count105
\everyMPshowfont=\toks22
\MPscratchCnt=\count106
\MPscratchDim=\dimen128
\MPnumerator=\count107
\makeMPintoPDFobject=\count108
\everyMPtoPDFconversion=\toks23
)
("C:\Users\Woody\Desktop\GaTechMS\Spring 2012\CS6601 AI\Project 3\P3 Proposal.b
bl") [1{C:/ProgramData/MiKTeX/2.9/pdftex/config/pdftex.map}
]
("C:\Users\Woody\Desktop\GaTechMS\Spring 2012\CS6601 AI\Project 3\P3 Proposal.a
ux") )
Here is how much of TeX's memory you used:
2498 strings out of 494045
33028 string characters out of 3145969
94334 words of memory out of 3000000
5788 multiletter control sequences out of 15000+200000
9475 words of font info for 33 fonts, out of 3000000 for 9000
715 hyphenation exceptions out of 8191
27i,7n,32p,307b,209s stack positions out of 5000i,500n,10000p,200000b,50000s
<C:/Program Files (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmbx10.
pfb><C:/Program Files (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmbx12.pf
b><C:/Program Files (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr10.pfb><
C:/Program Files (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr12.pfb><C:/
Program Files (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmti10.pfb>
Output written on "P3 Proposal.pdf" (1 page, 61796 bytes).
PDF statistics:
26 PDF objects out of 1000 (max. 8388607)
0 named destinations out of 1000 (max. 500000)
1 words of extra memory for PDF output out of 10000 (max. 10000000)

Binary file not shown.

View File

@@ -1,38 +0,0 @@
\documentclass[times, 08pt,twocolumn]{article}
\usepackage{latex8}
\usepackage{titlesec}
\usepackage[margin=0.7in]{geometry}
\usepackage{graphicx}
\usepackage{amsmath}
\titleformat{\section}{\large\bfseries}{\thesection}{1em}{}
\begin{document}
\pagestyle{empty}
\title{Working title}
\author{Team X}
\date{April 2, 2012}
\maketitle
\section*{Introduction}
Sample citation from \emph {AI: A Modern Approach} \cite{russelnorvig}.
\section*{Related Work}
\section*{Approach}
\paragraph*{Implementation}
Sample paragraph for format demo purposes.
\section*{Evaluation}
\section*{Conclusion}
\bibliographystyle{plain}
\bibliography{p3refs}
\end{document}

Binary file not shown.

View File

@@ -1,157 +0,0 @@
% ---------------------------------------------------------------
%
% $Id: latex8.sty,v 1.2 1995/09/15 15:31:13 ienne Exp $
%
% by Paolo.Ienne@di.epfl.ch
%
% ---------------------------------------------------------------
%
% no guarantee is given that the format corresponds perfectly to
% IEEE 8.5" x 11" Proceedings, but most features should be ok.
%
% ---------------------------------------------------------------
% with LaTeX2e:
% =============
%
% use as
% \documentclass[times,10pt,twocolumn]{article}
% \usepackage{latex8}
% \usepackage{times}
%
% ---------------------------------------------------------------
% with LaTeX 2.09:
% ================
%
% use as
% \documentstyle[times,art10,twocolumn,latex8]{article}
%
% ---------------------------------------------------------------
% with both versions:
% ===================
%
% specify \pagestyle{empty} to omit page numbers in the final
% version
%
% specify references as
% \bibliographystyle{latex8}
% \bibliography{...your files...}
%
% use Section{} and SubSection{} instead of standard section{}
% and subsection{} to obtain headings in the form
% "1.3. My heading"
%
% ---------------------------------------------------------------
\typeout{IEEE 8.5 x 11-Inch Proceedings Style `latex8.sty'.}
% ten point helvetica bold required for captions
% in some sites the name of the helvetica bold font may differ,
% change the name here:
\font\tenhv = phvb at 08pt
% \font\tenhv = phvb7t at 09pt
% eleven point times bold required for second-order headings
\font\elvbf = cmbx10 scaled 1100
%\font\elvbf = ptmb scaled 1100
% set dimensions of columns, gap between columns, and paragraph indent
\setlength{\textheight}{8.8in}
% \setlength{\textheight}{9in}
\setlength{\textwidth}{6.875in}
% \setlength{\textwidth}{7in}
\setlength{\columnsep}{0.3125in}
\setlength{\topmargin}{0in}
\setlength{\headheight}{0in}
\setlength{\headsep}{0in}
\setlength{\parindent}{1pc}
\setlength{\oddsidemargin}{-.304in}
\setlength{\evensidemargin}{-.304in}
% memento from size10.clo
% \normalsize{\@setfontsize\normalsize\@xpt\@xiipt}
% \small{\@setfontsize\small\@ixpt{11}}
% \footnotesize{\@setfontsize\footnotesize\@viiipt{9.5}}
% \scriptsize{\@setfontsize\scriptsize\@viipt\@viiipt}
% \tiny{\@setfontsize\tiny\@vpt\@vipt}
% \large{\@setfontsize\large\@xiipt{14}}
% \Large{\@setfontsize\Large\@xivpt{18}}
% \LARGE{\@setfontsize\LARGE\@xviipt{22}}
% \huge{\@setfontsize\huge\@xxpt{25}}
% \Huge{\@setfontsize\Huge\@xxvpt{30}}
\def\@maketitle
{
\newpage
\null
% \vskip .375in
\begin{center}
{\Large \bf \@title \par}
% additional two empty lines at the end of the title
% \vspace*{24pt}
{
\large
%\lineskip .5em
\begin{tabular}[t]{c}
\@author
\end{tabular}
\par
}
% additional small space at the end of the author name
%\vskip .5em
\vspace*{12pt}
\end{center}
}
\def\abstract
{%
\centerline{\large\bf Abstract}%
\vspace*{12pt}%
\it%
}
\def\endabstract
{
% additional empty line at the end of the abstract
\vspace*{12pt}
}
\def\affiliation#1{\gdef\@affiliation{#1}} \gdef\@affiliation{}
\def\email#1{\gdef\@email{#1}}
\gdef\@email{}
\newlength{\@ctmp}
\newlength{\@figindent}
\setlength{\@figindent}{1pc}
\long\def\@makecaption#1#2{
% \vskip 10pt
\setbox\@tempboxa\hbox{\tenhv\noindent #1.~#2}
\setlength{\@ctmp}{\hsize}
\addtolength{\@ctmp}{-\@figindent}\addtolength{\@ctmp}{-\@figindent}
% IF longer than one indented paragraph line
\ifdim \wd\@tempboxa >\@ctmp
% THEN set as an indented paragraph
\begin{list}{}{\leftmargin\@figindent \rightmargin\leftmargin}
\item[]\tenhv #1.~#2\par
\end{list}
\else
% ELSE center
\hbox to\hsize{\hfil\box\@tempboxa\hfil}
\fi}
% correct heading spacing and type
\def\section{\@startsection {section}{1}{\z@}
{10pt plus 1pt minus 1pt}{10pt plus 1pt minus 1pt} {\large\bf}}
\def\subsection{\@startsection {subsection}{2}{\z@}
{13pt plus 2pt minus 2pt}{13pt plus 2pt minus 2pt} {\elvbf}}
% add the period after section numbers
\newcommand{\Section}[1]{\section{\hskip -1em.~#1}}
\newcommand{\SubSection}[1]{\subsection{\hskip -1em.~#1}}
% end of file latex8.sty
% ---------------------------------------------------------------

View File

@@ -1,7 +0,0 @@
@BOOK{russelnorvig,
title = {Artificial Intelligence; A Modern Approach},
author = {Russel, S. and Norvig, P.},
publisher = {Prentice Hall},
edition = {Third},
year = {2010}
}

Binary file not shown.

BIN
lib/aopalliance.jar Normal file

Binary file not shown.

BIN
lib/aspectj-1.6.12.jar Normal file

Binary file not shown.

BIN
lib/aspectjweaver-1.6.8.jar Normal file

Binary file not shown.

Binary file not shown.

BIN
lib/junit-4.10.jar Normal file

Binary file not shown.

BIN
lib/log4j-1.2.16.jar Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

50
res/stock_symbols.csv Normal file
View File

@@ -0,0 +1,50 @@
1,Wal-Mart Stores,WMT
2,Exxon Mobil,XOM
3,Chevron,CVX
4,ConocoPhillips,COP
5,Fannie Mae,FNMA
6,General Electric,GE
7,Berkshire Hathaway,BRKA
8,General Motors,GM
9,Bank of America Corp.,BAC
10,Ford Motor,F
11,Hewlett-Packard,HPQ
12,AT&T,T
13,J.P. Morgan Chase & Co.,JPM
14,Citigroup,C
15,McKesson,MCK
16,Verizon Communications,VZ
17,American International Group,AIG
18,International Business Machines,IBM
19,Cardinal Health,CAH
20,Freddie Mac,FMCC
21,CVS Caremark,CVS
22,UnitedHealth Group,UNH
23,Wells Fargo,WFC
24,Valero Energy,VLO
25,Kroger,KR
26,Procter & Gamble,PG
27,AmerisourceBergen,ABC
28,Costco Wholesale,COST
29,Marathon Oil,MRO
30,Home Depot,HD
31,Pfizer,PFE
32,Walgreen,WAG
33,Target,TGT
34,Medco Health Solutions,MHS
35,Apple,AAPL
36,Boeing,BA
37,State Farm Insurance Cos.,SNPAX
38,Microsoft,MSFT
39,Archer Daniels Midland,ADM
40,Johnson & Johnson,JNJ
41,Dell,DELL
42,WellPoint,WLP
43,PepsiCo,PEP
44,United Technologies,UTX
45,Dow Chemical,DOW
46,MetLife,MET
47,Best Buy,BBY
48,United Parcel Service,UPS
49,Kraft Foods,KFT
50,Lowe's,LOW
1 1 Wal-Mart Stores WMT
2 2 Exxon Mobil XOM
3 3 Chevron CVX
4 4 ConocoPhillips COP
5 5 Fannie Mae FNMA
6 6 General Electric GE
7 7 Berkshire Hathaway BRKA
8 8 General Motors GM
9 9 Bank of America Corp. BAC
10 10 Ford Motor F
11 11 Hewlett-Packard HPQ
12 12 AT&T T
13 13 J.P. Morgan Chase & Co. JPM
14 14 Citigroup C
15 15 McKesson MCK
16 16 Verizon Communications VZ
17 17 American International Group AIG
18 18 International Business Machines IBM
19 19 Cardinal Health CAH
20 20 Freddie Mac FMCC
21 21 CVS Caremark CVS
22 22 UnitedHealth Group UNH
23 23 Wells Fargo WFC
24 24 Valero Energy VLO
25 25 Kroger KR
26 26 Procter & Gamble PG
27 27 AmerisourceBergen ABC
28 28 Costco Wholesale COST
29 29 Marathon Oil MRO
30 30 Home Depot HD
31 31 Pfizer PFE
32 32 Walgreen WAG
33 33 Target TGT
34 34 Medco Health Solutions MHS
35 35 Apple AAPL
36 36 Boeing BA
37 37 State Farm Insurance Cos. SNPAX
38 38 Microsoft MSFT
39 39 Archer Daniels Midland ADM
40 40 Johnson & Johnson JNJ
41 41 Dell DELL
42 42 WellPoint WLP
43 43 PepsiCo PEP
44 44 United Technologies UTX
45 45 Dow Chemical DOW
46 46 MetLife MET
47 47 Best Buy BBY
48 48 United Parcel Service UPS
49 49 Kraft Foods KFT
50 50 Lowe's LOW

28
src/AppContext.xml Normal file
View File

@@ -0,0 +1,28 @@
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:aop="http://www.springframework.org/schema/aop"
xmlns:context="http://www.springframework.org/schema/context"
xsi:schemaLocation="http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-2.5.xsd
http://www.springframework.org/schema/aop
http://www.springframework.org/schema/aop/spring-aop-2.5.xsd
http://www.springframework.org/schema/context
http://www.springframework.org/schema/context/spring-context-2.5.xsd"
default-autowire="byName">
<bean id="dmdataSource"
class="org.springframework.jdbc.datasource.DriverManagerDataSource">
<property name="driverClassName" value="com.mysql.jdbc.Driver" />
<property name="url" value="jdbc:mysql://woodyfolsom.net:3306/cs6601p3" />
<property name="username" value="cs6601" />
<property name="password" value="n0nst@p" />
</bean>
<bean id="mySQLHeadlineSvc" class="net.woodyfolsom.cs6601.p3.svc.MySQLHeadlineServiceImpl" />
<bean id="yahooHeadlineSvc" class="net.woodyfolsom.cs6601.p3.svc.YahooHeadlineServiceImpl" />
<context:annotation-config />
<context:component-scan base-package="net.woodyfolsom.cs6601.p3"/>
</beans>

View File

@@ -0,0 +1,98 @@
package net.woodyfolsom.cs6601.p3;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import org.springframework.stereotype.Component;
import net.woodyfolsom.cs6601.p3.domain.Company;
import net.woodyfolsom.cs6601.p3.domain.Headline;
import net.woodyfolsom.cs6601.p3.svc.HeadlineService;
import net.woodyfolsom.cs6601.p3.svc.YahooHeadlineServiceImpl;
@Component
public class HeadlinePuller {
private static final File stockSymbolsCSV = new File("stock_symbols.csv");
private static final int IO_EXCEPTION = 1;
private static final int STOCK_SYMBOL_CSV_NOT_FOUND = 2;
@Autowired
HeadlineService mySQLHeadlineServiceImpl;
@Autowired
HeadlineService yahooHeadlineServiceImpl;
public static void main(String... args) {
ApplicationContext context=new ClassPathXmlApplicationContext(new String[]{"/AppContext.xml"});
HeadlinePuller headlinePuller = context.getBean(HeadlinePuller.class);
try {
List<Company> fortune50 = headlinePuller.getFortune50(stockSymbolsCSV);
for (Company company : fortune50) {
System.out.println("Getting headlines for Fortune 50 company #" + company.getId() + " (" + company.getName() + ")...");
Date today = new Date();
List<Headline> headlines = headlinePuller.pullHeadlines(company.getStockSymbol(), today);
headlinePuller.insertHeadlines(company.getStockSymbol(), today, headlines);
System.out.println("Waiting 10 seconds to accommodate Yahoo throttling...");
try {
Thread.sleep(10000L);
} catch (InterruptedException ie) {
System.out.println("Interrupted while waiting, exiting");
System.exit(0);
}
}
} catch (FileNotFoundException fnfe) {
System.out.println("Stock symbol CSV file does not exist: "
+ stockSymbolsCSV);
System.exit(STOCK_SYMBOL_CSV_NOT_FOUND);
} catch (IOException ioe) {
System.out.println("Stock symbol CSV file does not exist: "
+ stockSymbolsCSV);
System.exit(IO_EXCEPTION);
}
}
private void insertHeadlines(String stockSymbol, Date date, List<Headline> headlines) {
for (Headline headline : headlines) {
mySQLHeadlineServiceImpl.insertHeadline(headline);
}
}
private List<Headline> pullHeadlines(String stockSymbol, Date date) {
List<Headline> headlines = yahooHeadlineServiceImpl.getHeadlines(stockSymbol, date);
for (Headline headline : headlines) {
System.out.println("Got headline: " + headline);
}
return headlines;
}
private List<Company> getFortune50(File csvFile) throws FileNotFoundException,
IOException {
List<Company> fortune50 = new ArrayList<Company>();
FileInputStream fis = new FileInputStream(csvFile);
InputStreamReader reader = new InputStreamReader(fis);
BufferedReader buf = new BufferedReader(reader);
String csvline = null;
while ((csvline = buf.readLine()) != null) {
if (csvline.length() == 0) {
continue;
}
String[] fields = csvline.split(",");
if (fields.length != 3) {
throw new RuntimeException("Badly formatted csv file name (3 values expected): " + csvline);
}
int id = Integer.valueOf(fields[0]);
fortune50.add(new Company(id,fields[1],fields[2]));
}
return fortune50;
}
}

View File

@@ -0,0 +1,15 @@
package net.woodyfolsom.cs6601.p3.dao;
import java.util.Date;
import java.util.List;
import net.woodyfolsom.cs6601.p3.domain.Headline;
public interface HeadlineDao {
int deleteById(int id);
int insert(Headline player);
Headline select(int id);
List<Headline> select(String stock, Date date);
}

View File

@@ -0,0 +1,61 @@
package net.woodyfolsom.cs6601.p3.dao;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Date;
import java.util.List;
import javax.sql.DataSource;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.jdbc.core.simple.ParameterizedRowMapper;
import org.springframework.stereotype.Repository;
import net.woodyfolsom.cs6601.p3.domain.Headline;
@Repository
public class HeadlineDaoImpl implements HeadlineDao {
private static final String DELETE_BY_ID_STMT = "DELETE from headlines WHERE id = ?";
private static final String INSERT_STMT = "INSERT INTO headlines (text, date, stock, dataset) values (?, ?, ?, ?)";
private static final String SELECT_BY_ID_QRY = "SELECT * from headlines WHERE id = ?";
private static final String SELECT_BY_STOCK_QRY = "SELECT * from headlines WHERE stock = ? AND date = ?";
private JdbcTemplate jdbcTemplate;
public int deleteById(int headlineId) {
return jdbcTemplate.update(DELETE_BY_ID_STMT,
new RequestMapper(), headlineId);
}
public int insert(Headline headline) {
return jdbcTemplate.update(INSERT_STMT, headline.getText(), headline.getDate(), headline.getStock(), headline.getDataset());
}
public Headline select(int headlineId) {
return jdbcTemplate.queryForObject(SELECT_BY_ID_QRY,
new RequestMapper(), headlineId);
}
public List<Headline> select(String stock, Date date) {
return jdbcTemplate.query(SELECT_BY_STOCK_QRY,
new RequestMapper(), stock, date);
}
@Autowired
public void createTemplate(DataSource dataSource) {
this.jdbcTemplate = new JdbcTemplate(dataSource);
}
private class RequestMapper implements ParameterizedRowMapper<Headline> {
@Override
public Headline mapRow(ResultSet rs, int arg1) throws SQLException {
Headline headline = new Headline();
return headline;
}
}
}

View File

@@ -0,0 +1,32 @@
package net.woodyfolsom.cs6601.p3.domain;
public class Company {
private int id;
private String name;
private String stockSymbol;
public Company(int id, String name, String stockSymbol) {
this.id = id;
this.name = name;
this.stockSymbol = stockSymbol;
}
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getStockSymbol() {
return stockSymbol;
}
public void setStockSymbol(String stockSymbol) {
this.stockSymbol = stockSymbol;
}
}

View File

@@ -0,0 +1,68 @@
package net.woodyfolsom.cs6601.p3.domain;
import java.util.Date;
public class Headline {
public Headline() {
}
private int dataset;
private int id;
private Date date;
private String stock;
private String text;
public Headline(String stock, String text, Date date, int dataset) {
this.stock = stock;
this.text = text;
this.date = date;
this.dataset = dataset;
}
public int getDataset() {
return dataset;
}
public void setDataset(int dataset) {
this.dataset = dataset;
}
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public Date getDate() {
return date;
}
public void setDate(Date date) {
this.date = date;
}
public String getStock() {
return stock;
}
public void setStock(String stock) {
this.stock = stock;
}
public String getText() {
return text;
}
public void setText(String text) {
this.text = text;
}
@Override
public String toString() {
return text;
}
}

View File

@@ -0,0 +1,11 @@
package net.woodyfolsom.cs6601.p3.svc;
import java.util.Date;
import java.util.List;
import net.woodyfolsom.cs6601.p3.domain.Headline;
public interface HeadlineService {
int insertHeadline(Headline headline);
List<Headline> getHeadlines(String stock, Date date);
}

View File

@@ -0,0 +1,30 @@
package net.woodyfolsom.cs6601.p3.svc;
import java.util.Date;
import java.util.List;
import net.woodyfolsom.cs6601.p3.dao.HeadlineDao;
import net.woodyfolsom.cs6601.p3.domain.Headline;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
@Service
public class MySQLHeadlineServiceImpl implements HeadlineService {
private Log log = LogFactory.getLog(MySQLHeadlineServiceImpl.class);
@Autowired
private HeadlineDao headlineDao;
@Override
public int insertHeadline(Headline headline) {
return headlineDao.insert(headline);
}
@Override
public List<Headline> getHeadlines(String stock, Date date) {
return headlineDao.select(stock, date);
}
}

View File

@@ -0,0 +1,86 @@
package net.woodyfolsom.cs6601.p3.svc;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.woodyfolsom.cs6601.p3.dao.HeadlineDao;
import net.woodyfolsom.cs6601.p3.domain.Headline;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
@Service
public class YahooHeadlineServiceImpl implements HeadlineService {
private Log log = LogFactory.getLog(YahooHeadlineServiceImpl.class);
private static final DateFormat DATE_FORMATTER = new SimpleDateFormat("yyyy-MM-dd");
private static final String STORY_DATE_FIELD = "STORY_DATE";
private static final String STOCK_SYMBOL_FIELD = "STOCK_SYMBOL";
private static final String QUERY_URL = "http://query.yahooapis.com/v1/public/yql?q=select%20content%20from%20html%20where%20url%3D%22http%3A%2F%2Ffinance.yahoo.com%2Fq%2Fh%3Fs%3DSTOCK_SYMBOL%26t%3DSTORY_DATE%22%20and%20xpath%3D'%2F%2Fdiv%5B%40class%3D%22mod%20yfi_quote_headline%20withsky%22%5D%2Ful%2Fli%2Fa'&diagnostics=true";
@Override
public int insertHeadline(Headline headline) {
throw new UnsupportedOperationException("This implementation does not support inserting headlines.");
}
@Override
public List<Headline> getHeadlines(String stock, Date date) {
List<Headline> headlineList = new ArrayList<Headline>();
try {
URL url = new URL(populateQueryURL(stock,date));
HttpURLConnection connection = (HttpURLConnection)url.openConnection();
connection.setRequestMethod("GET");
connection.setReadTimeout(10000);
connection.connect();
BufferedReader buf = new BufferedReader(new InputStreamReader(connection.getInputStream()));
String line;
StringBuilder sb = new StringBuilder();
while ((line = buf.readLine()) != null) {
sb.append(line);
//System.out.println(line);
}
buf.close();
String xmlResults = sb.toString();
Pattern pattern = Pattern.compile("<a>.*?</a>");
Matcher matcher = pattern.matcher(xmlResults);
while (matcher.find()) {
System.out.println();
String anchorValue = xmlResults.substring(matcher.start()+3,matcher.end()-4);
headlineList.add(new Headline(stock,anchorValue,date,1));
}
} catch (MalformedURLException mue) {
log.warn("Caught MalformedURLException: " + mue.getMessage() + ", returning empty Headline list.");
} catch (IOException ioe) {
log.warn("Caught IOException: " + ioe.getMessage() + ", returning empty Headline list.");
}
return headlineList;
}
private String populateQueryURL(String stock, Date date) {
String formattedDate = DATE_FORMATTER.format(date);
return QUERY_URL.replaceAll(STOCK_SYMBOL_FIELD, stock).replaceAll(STORY_DATE_FIELD, formattedDate);
}
}

50
stock_symbols.csv Normal file
View File

@@ -0,0 +1,50 @@
1,Wal-Mart Stores,WMT
2,Exxon Mobil,XOM
3,Chevron,CVX
4,ConocoPhillips,COP
5,Fannie Mae,FNMA
6,General Electric,GE
7,Berkshire Hathaway,BRKA
8,General Motors,GM
9,Bank of America Corp.,BAC
10,Ford Motor,F
11,Hewlett-Packard,HPQ
12,AT&T,T
13,J.P. Morgan Chase & Co.,JPM
14,Citigroup,C
15,McKesson,MCK
16,Verizon Communications,VZ
17,American International Group,AIG
18,International Business Machines,IBM
19,Cardinal Health,CAH
20,Freddie Mac,FMCC
21,CVS Caremark,CVS
22,UnitedHealth Group,UNH
23,Wells Fargo,WFC
24,Valero Energy,VLO
25,Kroger,KR
26,Procter & Gamble,PG
27,AmerisourceBergen,ABC
28,Costco Wholesale,COST
29,Marathon Oil,MRO
30,Home Depot,HD
31,Pfizer,PFE
32,Walgreen,WAG
33,Target,TGT
34,Medco Health Solutions,MHS
35,Apple,AAPL
36,Boeing,BA
37,State Farm Insurance Cos.,SNPAX
38,Microsoft,MSFT
39,Archer Daniels Midland,ADM
40,Johnson & Johnson,JNJ
41,Dell,DELL
42,WellPoint,WLP
43,PepsiCo,PEP
44,United Technologies,UTX
45,Dow Chemical,DOW
46,MetLife,MET
47,Best Buy,BBY
48,United Parcel Service,UPS
49,Kraft Foods,KFT
50,Lowe's,LOW
1 1 Wal-Mart Stores WMT
2 2 Exxon Mobil XOM
3 3 Chevron CVX
4 4 ConocoPhillips COP
5 5 Fannie Mae FNMA
6 6 General Electric GE
7 7 Berkshire Hathaway BRKA
8 8 General Motors GM
9 9 Bank of America Corp. BAC
10 10 Ford Motor F
11 11 Hewlett-Packard HPQ
12 12 AT&T T
13 13 J.P. Morgan Chase & Co. JPM
14 14 Citigroup C
15 15 McKesson MCK
16 16 Verizon Communications VZ
17 17 American International Group AIG
18 18 International Business Machines IBM
19 19 Cardinal Health CAH
20 20 Freddie Mac FMCC
21 21 CVS Caremark CVS
22 22 UnitedHealth Group UNH
23 23 Wells Fargo WFC
24 24 Valero Energy VLO
25 25 Kroger KR
26 26 Procter & Gamble PG
27 27 AmerisourceBergen ABC
28 28 Costco Wholesale COST
29 29 Marathon Oil MRO
30 30 Home Depot HD
31 31 Pfizer PFE
32 32 Walgreen WAG
33 33 Target TGT
34 34 Medco Health Solutions MHS
35 35 Apple AAPL
36 36 Boeing BA
37 37 State Farm Insurance Cos. SNPAX
38 38 Microsoft MSFT
39 39 Archer Daniels Midland ADM
40 40 Johnson & Johnson JNJ
41 41 Dell DELL
42 42 WellPoint WLP
43 43 PepsiCo PEP
44 44 United Technologies UTX
45 45 Dow Chemical DOW
46 46 MetLife MET
47 47 Best Buy BBY
48 48 United Parcel Service UPS
49 49 Kraft Foods KFT
50 50 Lowe's LOW

View File

@@ -0,0 +1,11 @@
package net.woodyfolsom.cs6601.p3;
import org.junit.Test;
public class HeadlinePullerTest {
@Test
public void testGetStartDate() {
}
}

View File

@@ -0,0 +1,27 @@
package net.woodyfolsom.cs6601.p3.dao;
import static org.junit.Assert.assertNotNull;
import net.woodyfolsom.cs6601.p3.svc.HeadlineService;
import org.junit.BeforeClass;
import org.junit.Test;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
public class MySQLHeadlineDaoImplTest {
private static HeadlineService headlineSvc;
@BeforeClass
public static void setUp() {
ApplicationContext context=new ClassPathXmlApplicationContext(new String[]{"/AppContext.xml"});
headlineSvc = (HeadlineService) context
.getBean("mySQLHeadlineSvc");
}
@Test
public void testSelect() {
assertNotNull(headlineSvc);
}
}