forked from mlandauer/vote-preferencing
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtransform.rb
executable file
·216 lines (190 loc) · 7.44 KB
/
transform.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
#!/usr/bin/env ruby
# Transform group voting data from belowtheline into a form that can go into the R statistics
# package to do some multidimensional scaling magic as a way to try to visualise the "closeness"
# of different parties
# TODO:
# 1. We're currently ignoring independent candidates. We could include them by lumping candidates together
# by group rather than party. We're currently effectively doing this with the Lib/Nat coalition. We could
# do this with the independents as well
# 2. Combine data from different states
# 3. Don't entirely ignore parties that have not submitted a ticket (because we can still get information
# from how the other parties preference that party)
require "json"
require "pp"
SHORTER_NAMES = {
"Animal Justice Party" => "Animal Justice",
"Australia First" => "Australia First",
"Australian Christian Party" => "Australian Christian Party",
"Australian Democrats" => "Democrats",
"Australian First Nations Political Party" => "First Nations",
"Australian Independents" => "Australian Independents",
"Australian Labor Party" => "Labor",
"Australian Motoring Enthusiast Party" => "Motoring Enthusiast",
"Australian Protectionist Party" => "Protectionist",
"Australian Sex Party" => "Sex Party",
"Australian Sports Party" => "Sports Party",
"Australian Voice Party" => "Australian Voice",
"Bank Reform Party" => "Bank Reform",
"Building Australia Party" => "Building Australia",
"Bullet Train For Australia" => "Bullet Train For Australia",
"Carers Alliance" => "Carers Alliance",
"Christian Democratic Party" => "Christian Democratic",
"Citizens Electoral Council" => "Citizens Electoral Council",
"Country Alliance" => "Country Alliance",
"Country Liberals" => "Country Liberals",
"Democratic Labour Party" => "Democratic Labour",
"Drug Law Reform" => "Drug Law Reform",
"Family First" => "Family First",
"Fishing and Lifestyle Party" => "Fishing and Lifestyle",
"Future Party" => "Future",
"Help End Marijuana Prohibition" => "Help End Marijuana Prohibition",
"Katter's Australian Party" => "Katter's Australian",
"Liberal Democratic Party" => "Liberal Democratic",
"Liberal National Party" => "Liberal National",
"Nick Xenophon Group" => "Nick Xenophon",
"No Carbon Tax Climate Sceptics" => "No Carbon Tax Climate Sceptics",
"Non-Custodial Parents Party (Equal Parenting)" => "Non-Custodial Parents",
"One Nation" => "One Nation",
"Outdoor Recreation Party (Stop The Greens)" => "Outdoor Recreation",
"Palmer United Australia" => "Palmer United",
"Pirate Party" => "Pirate Party",
"Republican Party of Australia" => "Republican",
"Rise Up Australia" => "Rise Up Australia",
"Secular Party" => "Secular Party",
"Senator Online" => "Senator Online",
"Shooters and Fishers Party" => "Shooters and Fishers",
"Smokers Rights Party" => "Smokers Rights",
"Socialist Alliance" => "Socialist Alliance",
"Socialist Equality Party" => "Socialist Equality",
"Stable Population Party" => "Stable Population",
"Stop CSG Party" => "Stop CSG",
"The Greens" => "Greens",
"Uniting Australia Party" => "Uniting Australia",
"Voluntary Euthanasia" => "Voluntary Euthanasia",
"Wikileaks Party" => "Wikileaks"
}
def party(person_label)
person = JSON.load(File.open("belowtheline/data/people/#{person_label}.json"))
person["party"] || "ind"
end
def lookup_tickets(group_file)
group = JSON.load(File.open(group_file))
# Special handling for coalition. We will treat them as one party
group["parties"] = group["parties"].map{|p| merge_coalition(p)}.uniq
raise "Can't currently handle multiple parties in a group" if group["parties"].count > 1
group_party = group["parties"].first
if group_party.nil?
return {:party => "ind"}
end
tickets = group["tickets"]
if tickets.empty?
# No ticket was submitted to the AEC
puts "INFO: No ticket submitted in #{group_file}"
return {:party => group_party}
end
{:party => group_party, :tickets => tickets}
end
def merge_coalition(party)
["lib", "nat", "clp", "lnp"].include?(party) ? "coa" : party
end
def calculate_distances(ticket, parties_to_ignore)
party_order = ticket.map{|t| party(t)}
# Do coalition substitution
party_order = party_order.map{|p| merge_coalition(p)}
# Remove parties that we want to ignore (no tickets and independents)
party_order = party_order.reject{|p| parties_to_ignore.include?(p)}
# Only keep the first instance of a party
party_order2 = []
party_order.each do |party|
unless party_order2.include?(party)
party_order2 << party
end
end
party_scores = {}
party_order2.each_with_index do |party, i|
party_scores[party] = i
end
party_scores
end
def average_array(a)
a.inject{ |sum, el| sum + el }.to_f / a.size
end
# Where there are multiple tickets average them
def average_distances(distances)
parties = distances.first.keys
result = {}
parties.each do |party|
result[party] = average_array(distances.map{|d| d[party]})
end
result
end
def group_info(group_file, parties_to_ignore)
a = lookup_tickets(group_file)
tickets = a[:tickets]
party = a[:party]
if tickets.nil?
return {:party => party}
end
distances = tickets.map {|ticket| calculate_distances(ticket, parties_to_ignore)}
party_scores = average_distances(distances)
{:party => party, :distances => party_scores}
end
def party_hash_to_array(infos, parties)
r = []
parties.each do |party|
r << infos[party]
end
r
end
def lookup_party_full_name(party_code)
# Special handling for our "made up" party the coalition
if party_code == "coa"
"Coalition"
else
party = JSON.load(File.open("belowtheline/data/parties/#{party_code}.json"))
SHORTER_NAMES[party["name"]] || party["name"] #HACK for new parties
end
end
def write_distance_matrix_as_csv(filename, parties_full_names, matrix)
puts "Writing data to files #{filename}..."
File.open(filename, "w") do |f|
f << (['"Party"'].concat(parties_full_names.map{|p| '"' + p + '"'})).join(",") << "\n"
index = 0
matrix.each do |row|
f << '"' << parties_full_names[index] << '",' << row.join(",") << "\n"
index += 1
end
end
end
def write_distance_matrix(filename, parties_full_names, matrix)
puts "Writing data to files #{filename}..."
File.open(filename, "w") do |f|
f << parties_full_names.map{|p| '"' + p + '"'}.join(" ") << "\n"
index = 0
matrix.each do |row|
f << '"' << parties_full_names[index] << '" ' << row.join(" ") << "\n"
index += 1
end
end
end
def process_state(state)
infos = {}
# Ignore independents and parties that have not submitted a ticket
parties_to_ignore = Dir.glob("belowtheline/data/groups/#{state}-*.json").map{|file| lookup_tickets(file)}.
select{|a| a[:tickets].nil?}.map{|a| a[:party]}.uniq
Dir.glob("belowtheline/data/groups/#{state}-*.json").each do |file|
i = group_info(file, parties_to_ignore)
infos[i[:party]] = i[:distances] if i[:distances]
end
parties = infos.keys.uniq.sort
pp parties
matrix = party_hash_to_array(infos, parties).map{|h| party_hash_to_array(h, parties)}
# Convert parties to full names
parties_full_names = parties.map{|p| lookup_party_full_name(p)}
pp parties_full_names
write_distance_matrix("output/distance_#{state}.dat", parties_full_names, matrix)
write_distance_matrix_as_csv("output/distance_#{state}.csv", parties_full_names, matrix)
end
#pp lookup_party_full_name('acp')
#["act", "nsw", "nt", "qld", "sa", "tas", "vic", "wa"].each{|s| process_state(s)}
["wa"].each{|s| process_state(s)}