-
Notifications
You must be signed in to change notification settings - Fork 0
/
Rakefile
119 lines (92 loc) · 3.03 KB
/
Rakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
require 'rubygems'
require 'rake'
require 'rake/clean'
require 'bundler'
Bundler.require :default
CLEAN.include("data")
#Configure indexes for datasets
namespace :setup do
desc "create Elastic Search indexes"
task :indexes do
sh %{ruby bin/create-indexes.rb config/datasets.json config/index-template.json}
end
desc "download datasets"
task :download do
sh %{ruby bin/cache-datasets.rb config/datasets.json}
end
desc "Unpack the downloaded files"
task :unpack do
datasets = JSON.parse( File.read("config/datasets.json") )
datasets.each do |dataset, config|
sh %{cd data/#{dataset}; unzip -un #{dataset}.zip} unless config["download"].nil?
end
end
desc "Delete all indexes"
task :delete_indexes do
sh %{ruby bin/delete-indexes.rb config/datasets.json}
end
task :all => [:download, :unpack, :indexes]
end
namespace :server do
desc "Start elastic search"
task :start do
sh %{./server/bin/elasticsearch}
end
end
namespace :prepare do
datasets = JSON.parse( File.read("config/datasets.json") )
datasets.keys.each do |dataset|
if datasets[dataset]["script"]
desc "Convert #{dataset} data to standard JSON"
task dataset do
sh %{ruby #{datasets[dataset]["script"]} config/datasets.json}
end
task :all => dataset
end
end
desc "Convert and load all datasets (will take a while!)"
task :all => [:load_all]
desc "load a named dataset"
task :load, [:dataset] do |t, args|
sh %{ruby bin/load-dataset.rb config/datasets.json #{args[:dataset]} }
end
desc "Reload a single dataset"
task :reload, [:dataset] do |t,args|
sh %{ruby bin/delete-index.rb #{args[:dataset]} }
sh %{ruby bin/load-dataset.rb config/datasets.json #{args[:dataset]} }
end
desc "Load all prepared datasets"
task :load_all do
datasets = JSON.parse( File.read("config/datasets.json") )
datasets.keys.each do |dataset|
sh %{ruby bin/load-dataset.rb config/datasets.json #{dataset} }
end
end
desc "Collect some sample affiliations from CrossRef to use as test data. Pass sample size as argument"
task :collect_crossref_samples, [:samples] do |t,args|
if args[:samples]
sh %{ruby bin/collect-crossref-samples.rb -s #{args[:samples]} }
else
sh %{ruby bin/collect-crossref-samples.rb}
end
end
desc "Collect some sample affiliations from ORCID to use as test data"
task :collect_orcid_samples do
exit 1 unless ENV["ORCID_DATA"]
sh %{ruby bin/collect-orcid-samples.rb #{ENV["ORCID_DATA"]} }
end
desc "Extract institution and country from affiliation data, enrich CSV"
task :enrich_cermine, [:csv] do |t,args|
sh %{ruby bin/enrich-cermine.rb #{args[:csv]}}
end
end
namespace :report do
desc "Generate coverage report by country"
task :geo_coverage do
sh %{ruby bin/report-geo-coverage.rb config/datasets.json}
end
desc "Attempt to match affiliations from data in provided CSV file"
task :match, [:file] do |t,args|
sh %{ruby bin/report-matching.rb config/datasets.json #{args[:file]}}
end
end