-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathmain.R
72 lines (54 loc) · 2.7 KB
/
main.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
source("crawler_CIA_CREST.R")
##################################################################################
# Example for 'Kennedy'
# Download CIA files of 'Kennedy' in top 10 pages
#=== 1. Give a query to get basic information ===#
basic.info.query.CIA_CREST(query = "Kennedy")
#=== 2. Parse results according to given query and pages ===#
your.query = 'Kennedy'
page.nums = c(0:9) # top 10 pages (note that 0 is the 1st page on the web)
# return a parse.table
parse.table = parsing.pages.CIA_CREST(your.query, page.nums)
#=== 3. Auto-download files according to the parse.table ===#
# download files according to the parse.table from parsing.pages.CIA_CREST()
# and return a reference.table
reference.table = download.doc.CIA_CREST(parse.table)
##################################################################################
# Example for 'secret letter'
# Download CIA files of 'secret letter' in 0,2,4 pages
# (note that 0 is the 1st page on the web)
#=== 1. Give a query to get basic information ===#
basic.info.query.CIA_CREST(query = "secret letter")
#=== 2. Parse results according to given query and pages ===#
your.query = 'secret letter'
page.nums = c(0,2,4)
# return a parse.table
parse.table = parsing.pages.CIA_CREST(your.query, page.nums)
#=== 3. Auto-download files according to the parse.table ===#
# download files according to the parse.table from parsing.pages.CIA_CREST()
# and return a reference.table
reference.table = download.doc.CIA_CREST(parse.table)
##################################################################################
# Example for 'Obama'
# Download CIA files of 'Obama' in 0~1 pages
# (note that 0 is the 1st page on the web)
#=== 1. Give a query to get basic information ===#
basic.info.query.CIA_CREST(query = "Obama")
#=== 2. Parse results according to given query and pages ===#
your.query = 'Obama'
page.nums = c(0,1) # 0~1 pages (note that 0 is the 1st page on the web)
parse.table = parsing.pages.CIA_CREST(your.query, page.nums)
#=== 3. Auto-download files according to the parse.table ===#
reference.table = download.doc.CIA_CREST(parse.table)
##################################################################################
# Example for 'UFO'
# Download the top 10 CIA files of 'UFO' in 0 pages
# (note that 0 is the 1st page on the web)
#=== 1. Give a query to get basic information ===#
basic.info.query.CIA_CREST(query = "UFO")
#=== 2. Parse results according to given query and pages ===#
your.query = 'UFO'
page.nums = c(0) # (note that 0 is the 1st page on the web)
parse.table = parsing.pages.CIA_CREST(your.query, page.nums)
#=== 3. Auto-download files according to the parse.table ===#
reference.table = download.doc.CIA_CREST(parse.table[1:10, ])