-
Notifications
You must be signed in to change notification settings - Fork 0
/
beamer.tex
144 lines (102 loc) · 3.02 KB
/
beamer.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
\documentclass{beamer}
\usetheme{Boadilla}
\usepackage{hyperref}
\hypersetup{colorlinks=true, urlcolor=gray}
\usepackage{multicol}
\usepackage{multirow}
\usepackage{array}
\usepackage{color}
\author{Joe Walsh}
\title{Meet a New R Package: scrapeR}
\begin{document}
%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\titlepage
\end{frame}
\begin{frame}{What scrapeR Does}
\textbf{Directly}
\begin{itemize}
\item downloads \& preprocesses webpages
\item saves as a list
\item one function: scrape() \pause
\bigskip
\item Dependency package: XML \pause
\end{itemize}
\bigskip
\textbf{Indirectly}
\begin{itemize}
\item only requires three lines of code
\item can be automated
\item dynamic reports
\item improved replicability
\end{itemize}
\end{frame}
\begin{frame}{Why I Like It}
\begin{itemize}
\item huge amounts of data
\item new data being released
\item can stop static reports
\item eases replication
\end{itemize}
\end{frame}
\begin{frame}{Example: Cherry Hypothesis}
\begin{center}
\includegraphics[scale=.3]{"Don Cherry"}
\end{center}
\end{frame}
\begin{frame}{Example: Cherry Hypothesis}
\begin{center}
\includegraphics[scale=.45]{"HockeyFights website"}
\end{center}
\end{frame}
\begin{frame}{Example: Cherry Hypothesis}
\begin{center}
\includegraphics[scale=.45]{"NHL website"}
\end{center}
\end{frame}
\begin{frame}[fragile]{Example: Cherry Hypothesis}
\begin{verbatim}
# install and load scrapeR package
install.packages("scrapeR", dependencies=TRUE)
library(scrapeR)
\end{verbatim} \pause
\begin{verbatim}
# scrape data
URLs <- c("http://www.nhl.com/ice/standings.htm?type=lea",
"http://www.hockeyfights.com/leaders/teams/")
pageSource <- scrape(url=URLs, headers=FALSE, parse=TRUE)
\end{verbatim} \pause
\begin{verbatim}
# get two tables with info we need
NHL.tables <- readHTMLTable(pageSource[[1]])
team.records <- NHL.tables[[3]]
HockeyFights.tables <- readHTMLTable(pageSource[[2]])
team.fights <- HockeyFights.tables[[1]]
\end{verbatim}
\end{frame}
\begin{frame}{Example: Cherry Hypothesis}
\begin{center}
\includegraphics[scale=.45]{"cherry hypothesis"}
\end{center}
\end{frame}
\begin{frame}{Questions?}
Email: [email protected]
GitHub: jtwalsh0
\begin{center}
\includegraphics[scale=.20]{"DRW Logo"}
\end{center}
\end{frame}
\end{document}
\begin{center}
\begin{table}
\begin{tabular}{ll}
\textbf{Issue} & \textbf{scrapeR solution} \\ \hline
want web data & downloads \& preprocesses pages \\
changing data & can be automated \\
static reports & dynamic reports \\
less reproducible reports & more reproducible reports \\
other languages more difficult & three lines of code \\
other languages take more time & three lines of code
\end{tabular}
\end{table}
\end{center}