-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathNBER-methods.R
85 lines (69 loc) · 2.04 KB
/
NBER-methods.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#' ---
#' title: "NBER-methods"
#' author: "JJayes"
#' date: "04/09/2021"
#' output: html_document
#' ---
#'
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(rvest)
library(nberwp)
library(glue)
#'
#' ## Planning
#'
#' Try out Ben Davies package and see if I can access the text of the abstracts.
#'
#' ## NBER papers package
#'
## -----------------------------------------------------------------------------
papers <- nberwp::papers
papers
#'
#' Nice! Now we have a list of papers - we can construct the URL based on the paper number
#'
#' ### Try scraping?
#'
## -----------------------------------------------------------------------------
# url <- "https://www.nber.org/papers/w8001"
#
# text <- read_html(url) %>%
# html_node("p") %>%
# html_text() %>%
# str_squish()
#'
#' Would actually be really cool to look at the different topics popularity over time, like "Kuznets curve" etc. Other interesting words might be, "case studies" or "paradox"
#'
#' Make a function to scrape the abstract:
#'
## -----------------------------------------------------------------------------
get_abstract <- function(paper){
url <- glue("https://www.nber.org/papers/", paper)
message(glue("Getting abstract from Working Paper {paper}"))
text <- read_html(url) %>%
html_node("p") %>%
html_text() %>%
str_squish()
text
}
#'
#' Use function
#'
## -----------------------------------------------------------------------------
df <- papers %>%
mutate(row_num = row_number()) %>%
# filter(between(row_num, 0, 200)) %>%
mutate(abstract = map(paper, possibly(get_abstract, "failed")))
#'
#'
## -----------------------------------------------------------------------------
# df <- "hello"
df %>% write_rds("data/abstracts_df.rds")
#'
#' Purl it to .R file.
#'
## -----------------------------------------------------------------------------
# knitr::purl("code/NBER-methods.Rmd", documentation = 2)
#'