-
Notifications
You must be signed in to change notification settings - Fork 0
/
WikiPage.rb
118 lines (101 loc) · 3.39 KB
/
WikiPage.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
require 'rubygems'
require 'net/http'
require 'json'
require 'pry-rescue'
require 'pry-stack_explorer'
require 'pry-debugger'
#require 'ruby-prof'
WIKIBASE = 'http://en.wikipedia.org/w/api.php'
EXCLUDED = ["FULLPAGENAME", "SUBPAGENAME", "Lc", "Lx", "NAMESPACE", "PAGENAME", "DEFAULTSORT", "Documentation",
"TALKSPACE", "FULLROOTPAGENAME", "SUBJECTSPACE", "BASEPAGENAME", "ROOTPAGENAME", "PENDINGCHANGELEVEL",
"NAMESPACENUMBER", "FULLPAGENAMEE", "TALKPAGENAME", "</nowiki>", "SUBJECTPAGENAME", "Tl", "Tlx", "♥",
"Template:FlagIOC"]
class String
def upfirst
self[0] = self[0].upcase
self
end
end
class WikiPages
@@pages = []
def self.add(name, via: :content)
w = WikiPage.new(name, via)
@@pages << w
w
end
def self.pages
class_variable_get :@@pages
end
private
class WikiPage
attr_accessor :name, :templates
def initialize(name, via)
@name = name
@via = via
populate
end
def name=(page)
if page != @name
@name = page
populate
end
end
private
def populate
if @via == :transclusion
populate_templates
else
populate_content
end
end
def populate_content
unless @name
puts "Please set the name first!"
return
end
uri = URI.parse(WIKIBASE)
params = { format: 'json', action: 'query', prop: 'revisions', rvprop: 'content', titles: "#{URI.encode(@name.gsub(' ', '_'))}" }
#puts "params: #{params.inspect}"
uri.query = URI.encode_www_form(params)
#puts "uri: #{uri}"
resp = Net::HTTP.get_response(uri)
data = resp.body
result = JSON.parse(data)
content = result['query']['pages'].first[1]['revisions'][0]['*']
# Clean up content
content = content.gsub(/< *nowiki *>.*?< *\/nowiki *>/m,'')
content = content.gsub(/< *noinclude *>.*?< *\/noinclude *>/m,'')
@templates = content.scan(/{{([^# ][^|}\n]*)/).flatten
binding.pry if @name == 'History_of_Liberia'
# Clean up templates
@templates.reject! {|x| x =~ /:/ }
@templates.reject! {|x| x =~ /{/ }
@templates.reject! {|x| x.end_with? '_' }
binding.pry if @name == 'History_of_Liberia'
@templates.map! { |x| "Template:#{x.strip.upfirst}" }
binding.pry if @name == 'History_of_Liberia'
@templates = @templates.uniq
binding.pry if @name == 'History_of_Liberia'
@templates.reject! {|x| x =~ /Template: *{/ }
binding.pry if @name == 'History_of_Liberia'
@templates.reject! {|x| EXCLUDED.include? x.gsub(/Template:/, '') }
binding.pry if @name == 'History_of_Liberia'
#puts @templates.inspect
end
def populate_templates
unless @name
puts "Please set the name first!"
return
end
uri = URI.parse(WIKIBASE)
params = { format: 'json', action: 'query', prop: 'templates', tllimit: 100, titles: "#{URI.encode(@name.gsub(' ', '_'))}" }
puts "params: #{params.inspect}"
uri.query = URI.encode_www_form(params)
puts "uri: #{uri}"
resp = Net::HTTP.get_response(uri)
data = resp.body
result = JSON.parse(data)
@templates = result['query']['pages'].first[1]['templates'].map {|x| x['title'].upfirst}
end
end
end