This repository has been archived by the owner on Jul 30, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 8
/
atlas.py
executable file
·206 lines (170 loc) · 8.09 KB
/
atlas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
#!/usr/bin/env python
import subprocess, os, sys, publicsuffix, inspect, json, shutil, re, pystache
from lxml import etree
HTTPS_E = "https://github.com/EFForg/https-everywhere.git"
release_branch = "release"
stable_branch = "master"
ps = publicsuffix.PublicSuffixList()
index_template = open("templates/index.mustache").read()
letter_template = open("templates/letter.mustache").read()
ruleset_template = open("templates/ruleset.mustache").read()
redirect_template = open("templates/redirect.mustache").read()
domain_rulesets = {}
stable_rulesets = {}
release_rulesets = {}
renderer = pystache.Renderer(string_encoding='utf-8')
def clone_or_update():
if os.path.isdir("https-everywhere"):
os.chdir("https-everywhere/src/chrome/content/rules")
stable()
result = subprocess.call(["git", "pull", "--no-edit", "-q"])
if result != 0:
raise Exception("Could not pull updates")
else:
result = subprocess.call(["./clone-https-everywhere.sh", HTTPS_E, stable_branch, release_branch])
os.chdir("https-everywhere/src/chrome/content/rules")
if result != 0:
raise Exception("Could not clone {}".format(HTTPS_E))
def stable():
if subprocess.call(["git", "checkout", "-q", stable_branch]) != 0:
raise Exception("Could not switch to branch {}".format(stable_branch))
if subprocess.call(["git", "pull", "--no-edit", "-q", "origin", stable_branch]) != 0:
raise Exception("Could not pull from origin on branch {}".format(stable_branch))
return subprocess.Popen(["git", "log", "-1", "--pretty=format:%h %ai"], stdout=subprocess.PIPE, stderr=None).stdout.read()
def release():
if subprocess.call(["git", "checkout", "-q", release_branch]) != 0:
raise Exception("Could not switch to branch {}".format(release_branch))
if subprocess.call(["git", "pull", "--no-edit", "-q", "origin", release_branch]) != 0:
raise Exception("Could not pull from origin on branch {}".format(release_branch))
return subprocess.Popen(["git", "log", "-1", "--pretty=format:%h %ai"], stdout=subprocess.PIPE, stderr=None).stdout.read()
def public_suffix_wrapper(domain):
if re.match("^([0-9]{1,3}\.){3}[0-9]{1,3}$", domain):
return domain
else:
return ps.get_public_suffix(domain)
def get_names(branch):
if branch == stable_branch:
rulesets = stable_rulesets
else:
rulesets = release_rulesets
for fi in sorted(os.listdir(".")):
if fi[-4:] == ".xml":
try:
tree = etree.parse(fi)
except:
# Parsing this ruleset failed for some reason.
continue
if tree.xpath("/ruleset"):
dfo = bool(tree.xpath("/ruleset/@default_off"))
name = tree.xpath("/ruleset/@name")[0]
current_ruleset = [name, dfo, etree.tostring(tree, encoding='utf-8')]
rulesets[fi] = current_ruleset
for host in set(map(public_suffix_wrapper, tree.xpath("/ruleset/target/@host"))):
host = host.encode("idna").decode('utf-8')
if host == "*":
# This is a problem about wildcards at the end of
# target hosts. Currently, we exclude such targets
# from having their own listings in the atlas.
continue
if host[:2] == "*.":
# A very small minority of rules apply to the entirety
# of something that the public suffix list considers
# a top-level domain, like blogspot.de (because every
# blogspot blog can perhaps be accessed via HTTPS, but
# individual users contrain the content of each
# subdomain). In this unusual case, just list the
# higher level domain, without the *. part.
host = host[2:]
domain_rulesets.setdefault(host, set())
domain_rulesets[host].add(fi)
rulesets.setdefault(fi, [])
rulesets[fi].append(host)
if dfo: out = "([file %s] %s %s)"
else: out = "[file %s] %s %s"
clone_or_update()
release_as_of = release()
get_names(release_branch)
stable_as_of = stable()
get_names(stable_branch)
os.chdir("../../../../..")
def hosts_to_filenames(host):
rulesets_for_host = len(domain_rulesets[host])
if rulesets_for_host != 1:
return [host + '-' + str(current) for current in range(1, rulesets_for_host + 1)]
else:
return [host]
domains_nested = list(map(hosts_to_filenames, sorted(domain_rulesets.keys())))
domains = [item for sublist in domains_nested for item in sublist]
first_letters_list = sorted(set(n[0] for n in domains))
first_letters = []
for l in first_letters_list:
first_letters.append({ 'letter': l })
output = pystache.render(index_template, { 'letters': first_letters, 'domains': domains})
open("output/index.html", "w").write(output)
def letter_domain_pairs(domains):
last_letter = domains[0][0]
domains_index = []
for n in domains:
if n[0] != last_letter:
yield last_letter, domains_index
last_letter = n[0]
domains_index = []
domains_index.append({ 'domain': n})
yield last_letter, domains_index
redirect_output = pystache.render(redirect_template, { 'redirect': '../' })
if os.path.exists('output/domains'):
shutil.rmtree("output/domains")
os.mkdir('output/domains')
open("output/domains/index.html", "w").write(redirect_output)
if not os.path.exists('output/letters'):
os.mkdir('output/letters')
open("output/letters/index.html", "w").write(redirect_output)
for letter, domains_index in letter_domain_pairs(domains):
output = pystache.render(letter_template, { 'letters': first_letters,
'first_letter': letter,
'domains': domains_index })
open("output/letters/%s.html" % letter, "w").write(output)
for domain in domain_rulesets:
if len(domain_rulesets[domain]) > 1:
num = 1
for ruleset_filename in domain_rulesets[domain]:
os.symlink("../rulesets/" + ruleset_filename + ".html", "output/domains/" + domain + "-" + str(num) + ".html")
num += 1
else:
os.symlink("../rulesets/" + domain_rulesets[domain].pop() + ".html", "output/domains/" + domain + ".html")
if not os.path.exists('output/rulesets'):
os.mkdir('output/rulesets')
for ruleset in set(list(stable_rulesets.keys()) + list(release_rulesets.keys())):
d = {}
d["stable_as_of"] = stable_as_of
d["release_as_of"] = release_as_of
d["stable_affected"] = False
d["release_affected"] = False
d["stable_hosts"] = []
d["release_hosts"] = []
if ruleset in stable_rulesets:
d["stable_hosts"] = json.dumps(stable_rulesets[ruleset][3:])
name, dfo, xml = stable_rulesets[ruleset][:3]
d["stable_enabled"] = False
d["stable_disabled"] = False
if dfo:
d["stable_disabled"] = {"rule_text": xml, "git_link": ruleset}
else:
d["stable_enabled"] = {"rule_text": xml, "git_link": ruleset}
if d["stable_disabled"]: d["stable_has_disabled"] = True
if d["stable_enabled"]: d["stable_has_enabled"] = True
if ruleset in release_rulesets:
d["release_hosts"] = json.dumps(release_rulesets[ruleset][3:])
name, dfo, xml = release_rulesets[ruleset][:3]
d["release_enabled"] = False
d["release_disabled"] = False
if dfo:
d["release_disabled"] = {"rule_text": xml, "git_link": ruleset}
else:
d["release_enabled"] = {"rule_text": xml, "git_link": ruleset}
if d["release_disabled"]: d["release_has_disabled"] = True
if d["release_enabled"]: d["release_has_enabled"] = True
d['stable_branch'] = stable_branch
d['release_branch'] = release_branch
output = renderer.render(ruleset_template, d)
open("output/rulesets/" + ruleset + ".html", "w").write(output)