-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathGet_ReportsLinks.r
45 lines (33 loc) · 891 Bytes
/
Get_ReportsLinks.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
## !/user/bin/env RStudio 1.1.423
## -*- coding: utf-8 -*-
## Pages_links Acquisition
library("rvest")
library("stringr")
library("Rwordseg")
library("wordcloud2")
library("dplyr")
Get_Reports_Links <- function(){
url <- "http://www.gov.cn/guowuyuan/baogao.htm"
txt<-read_html(url) %>%
html_nodes("#history_report") %>%
html_nodes("p") %>%
html_text()
Base <- read_html(url) %>% html_nodes("div.history_report")
Year <- Base %>% html_nodes("a") %>% html_text(trim = TRUE) %>% as.numeric()
Links <- Base %>% html_nodes("a") %>% html_attr("href") %>% str_trim("both")
Reports_links <- data.frame(
Year = Year,
Links = Links,
stringsAsFactors = FALSE
)
return(Reports_links)
}
Reports_links <- Get_Reports_Links()
if (!dir.exists("data")){
dir.create("data")
write.csv(
Reports_links,
"./data/Reports_links.csv",
row.names=FALSE
)
}