-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add match bills to OKnesset urls management command
- Loading branch information
Showing
4 changed files
with
376 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,3 +10,4 @@ | |
db.sqlite3 | ||
*.csv | ||
*.json | ||
/committeeVotes/management/commands/bills_urls_resources/bills.html |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,167 @@ | ||
# coding: utf-8 | ||
import requests | ||
import json | ||
import Levenshtein | ||
from collections import namedtuple | ||
from oauth2client.client import SignedJwtAssertionCredentials | ||
import gspread | ||
from jinja2 import Environment, FileSystemLoader | ||
import datetime | ||
import SimpleHTTPServer | ||
import SocketServer | ||
from django.core.management import BaseCommand | ||
from optparse import make_option | ||
import webbrowser | ||
import os | ||
import threading | ||
|
||
OKNESSET_BILL_API_URL = 'https://oknesset.org/api/v2/bill/?order_by=-stage_date&limit=1000' | ||
|
||
OCOMMITTEE_SPREADSHEET_NAME = u"הצעות חוק 2015" | ||
|
||
MEETING_DATE_COLUMN_INDEX = 1 | ||
|
||
DATE_FORMAT = "%m/%d/%Y" | ||
|
||
RESOURCES_PATH = os.path.join( | ||
os.path.dirname(os.path.realpath(__file__)), "bills_urls_resources") | ||
|
||
CREDENTAIALS_FILE = RESOURCES_PATH + "/credentials.json" | ||
|
||
EXECUTION_CREDENTIALS_FILE = RESOURCES_PATH + "/execute_api_credentials.json" | ||
|
||
STOP_WORDS = [u"חוק", u"תיקון", u"הצעת", u"רציפות"] | ||
|
||
TEMPLATE_FILENAME = "bills_template.html" | ||
|
||
RENDERED_OUTPUT_PATH = RESOURCES_PATH + "/bills.html" | ||
|
||
Bill = namedtuple("Bill", "id name oknesset_url match_ratio") | ||
|
||
|
||
def get_all_oknesset_bills(): | ||
bills = [] | ||
current_bills, next_bills_api_url = get_next_oknesset_bills( | ||
OKNESSET_BILL_API_URL) | ||
bills.extend(current_bills) | ||
while next_bills_api_url is not None: | ||
current_bills, next_bills_api_url = get_next_oknesset_bills( | ||
"https://www.oknesset.org{0}".format(next_bills_api_url)) | ||
bills.extend(bills) | ||
return bills | ||
|
||
|
||
def get_next_oknesset_bills(bills_url): | ||
bills_json = json.loads(requests.get(bills_url).text) | ||
if "objects" in bills_json: | ||
return bills_json["objects"], bills_json["meta"]["next"] | ||
return [], None | ||
|
||
|
||
def get_bill_url(oknesset_bills, bill_name): | ||
max_ratio = 0.0 | ||
result = "" | ||
matched_bill = None | ||
for bill in oknesset_bills: | ||
ratio = Levenshtein.ratio(strip_stop_words( | ||
bill["full_title"]), strip_stop_words(bill_name)) | ||
if ratio > max_ratio: | ||
max_ratio = ratio | ||
matched_bill = bill | ||
if matched_bill is not None: | ||
return "http://www.oknesset.org{0}".format(matched_bill["absolute_url"]), max_ratio | ||
return None, None | ||
|
||
|
||
def strip_stop_words(bill_title): | ||
for word in STOP_WORDS: | ||
bill_title = bill_title.replace(word, u"") | ||
return bill_title | ||
|
||
|
||
def get_commitee_bills_since(since_date=None): | ||
json_key = json.load(open(CREDENTAIALS_FILE)) | ||
scope = ['https://spreadsheets.google.com/feeds'] | ||
credentials = SignedJwtAssertionCredentials(json_key['client_email'], json_key[ | ||
'private_key'].encode(), scope) | ||
spreadsheet = gspread.authorize( | ||
credentials).open(OCOMMITTEE_SPREADSHEET_NAME) | ||
worksheet = spreadsheet.worksheet("bills") | ||
cell_range = "{0}:{1}{2}".format("A2", "B", worksheet.row_count) | ||
reversed_cells = worksheet.range(cell_range)[::-1] | ||
matched_cells = [] | ||
for idx, cell in enumerate(reversed_cells): | ||
if cell.col == MEETING_DATE_COLUMN_INDEX: | ||
if len(cell.value) > 0: | ||
bill_date = datetime.datetime.strptime( | ||
cell.value, DATE_FORMAT).date() | ||
if since_date <= bill_date <= datetime.date.today(): | ||
# Cell in previous column - thus the name column | ||
name_cell = reversed_cells[idx - 1] | ||
matched_cells.append((name_cell.row, name_cell.value)) | ||
return matched_cells | ||
|
||
|
||
def get_matched_bills(commitee_bills, oknesset_bills): | ||
matched_bills = [] | ||
for bill_id, bill_name in commitee_bills: | ||
url, ratio = get_bill_url(oknesset_bills, bill_name) | ||
if url is not None: | ||
matched_bills.append(Bill(bill_id, bill_name, url, ratio)) | ||
return matched_bills | ||
|
||
|
||
def get_execution_api_credentials(): | ||
return json.load(open(EXECUTION_CREDENTIALS_FILE)) | ||
|
||
|
||
def render_bills(bills): | ||
template_env = Environment(loader=FileSystemLoader(RESOURCES_PATH)) | ||
output_html = template_env.get_template( | ||
TEMPLATE_FILENAME).render(bills=bills, credentials=get_execution_api_credentials()) | ||
with open(RENDERED_OUTPUT_PATH, "w") as f: | ||
f.write(output_html.encode("utf8")) | ||
|
||
|
||
def serve_bills_html(port): | ||
# Move to commands directory. | ||
os.chdir(RESOURCES_PATH) | ||
Handler = SimpleHTTPServer.SimpleHTTPRequestHandler | ||
httpd = SocketServer.TCPServer(("localhost", port), Handler) | ||
threading.Thread(target = lambda: httpd.serve_forever()).start() | ||
webbrowser.open("http://127.0.0.1:8080/bills.html") | ||
|
||
|
||
class Command(BaseCommand): | ||
help = 'Serve an html for manually matching a bill to its url in oknesset.' | ||
|
||
def parse_date(option, opt_str, value, parser): | ||
try: | ||
setattr(parser.values, option.dest, | ||
datetime.datetime.strptime(value, DATE_FORMAT).date()) | ||
except ValueError: | ||
return | ||
|
||
option_list = BaseCommand.option_list + ( | ||
make_option('-d', '--date', dest="date", action="callback", type=str, callback=parse_date, | ||
help="Show bills for commitee meetings made since this date in format m/d/y. Defaults to a week ago."), | ||
make_option('-p', '--port', nargs='?', default=8080, type=int, | ||
help="The port to serve the bills html on. Defaults to 8080") | ||
) | ||
|
||
def handle(self, *args, **options): | ||
commitee_bills = get_commitee_bills_since(options['date']) | ||
if len(commitee_bills) == 0: | ||
print "No commitee bills available." | ||
return | ||
print "There are {0} new commitee bills".format(len(commitee_bills)) | ||
oknesset_bills = get_all_oknesset_bills() | ||
if len(oknesset_bills) == 0: | ||
print "No oknesset bills available. perhaps the API is down?" | ||
return | ||
print "There are {0} OKnesset bills".format(len(oknesset_bills)) | ||
matched_bills = get_matched_bills(commitee_bills, oknesset_bills) | ||
render_bills(matched_bills) | ||
port = options['port'] | ||
print "Serving bills.html on {0}:{1}".format("localhost", port) | ||
serve_bills_html(port) |
203 changes: 203 additions & 0 deletions
203
committeeVotes/management/commands/bills_urls_resources/bills_template.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,203 @@ | ||
<!DOCTYPE html> | ||
<html lang="en" style="height: 100%"> | ||
<head> | ||
<meta http-equiv="content-type" content="text/html; charset=UTF-8" /> | ||
<meta http-equiv="X-UA-Compatible" content="IE=edge"> | ||
<meta name="viewport" content="width=device-width, initial-scale=1"> | ||
|
||
<title> OpenComittee bill upload </title> | ||
|
||
<link href="bootstrap.min.css" rel="stylesheet"> | ||
<script type="text/javascript"> | ||
function highlightRow(checkbox){ | ||
var row = checkbox.parentElement.parentElement; | ||
// toggle row class. | ||
|
||
if(checkbox.checked) | ||
row.className = "success"; | ||
else { | ||
if(row.hasAttribute("data-bad-match")) | ||
row.className = "danger"; | ||
else | ||
row.className = ""; | ||
|
||
} | ||
} | ||
</script> | ||
|
||
<script type="text/javascript"> | ||
var CLIENT_ID={{ "\"" + credentials['client_id'] + "\""}}; | ||
var SCOPES = ['https://www.googleapis.com/auth/script.storage','https://www.googleapis.com/auth/spreadsheets']; | ||
var loaded = false; | ||
function checkAuth() { | ||
console.log("checkAuth"); | ||
gapi.auth.authorize( | ||
{ | ||
'client_id': CLIENT_ID, | ||
'scope': SCOPES.join(' '), | ||
'immediate': true | ||
}); | ||
} | ||
|
||
function handleAuthResult(authResult) { | ||
if (authResult && !authResult.error) { | ||
// Hide auth UI, then load client library. | ||
var selectedRows = document.querySelectorAll('tr.success'); | ||
var rowElement; | ||
var rowToValueMaps = []; | ||
for(var i=0;i<selectedRows.length;i++){ | ||
rowElement = selectedRows[i]; | ||
console.dir(rowElement.childNodes) | ||
console.dir(rowElement.childNodes[1]); | ||
console.dir(rowElement.childNodes[1].textContent); | ||
rowToValueMaps.push({ | ||
row: parseInt(rowElement.childNodes[1].textContent, 10), | ||
value: rowElement.childNodes[5].childNodes[1].href | ||
}); | ||
} | ||
updateBills(rowToValueMaps, "oknesset_url"); | ||
} else { | ||
// Show auth UI, allowing the user to initiate authorization by | ||
// clicking authorize button. | ||
console.log(authResult); | ||
} | ||
} | ||
|
||
function handleAuthClick(){ | ||
gapi.auth.authorize( | ||
{client_id: CLIENT_ID, scope: SCOPES, immediate: false}, | ||
handleAuthResult); | ||
return false; | ||
} | ||
|
||
function updateBills(updatedRows,columnToUpdate) { | ||
var scriptId = {{ "\"" + credentials['script_id'] + "\"" }}; | ||
|
||
// Create an execution request object. | ||
var request = { | ||
'function': 'updateRows', | ||
'parameters':[updatedRows, "oknesset_url"] | ||
}; | ||
|
||
// Make the API request. | ||
var op = gapi.client.request({ | ||
'root': 'https://script.googleapis.com', | ||
'path': 'v1/scripts/' + scriptId + ':run', | ||
'method': 'POST', | ||
'body': request | ||
}); | ||
|
||
op.execute(function(resp) { | ||
if (resp.error && resp.error.status) { | ||
// The API encountered a problem before the script | ||
// started executing. | ||
console.log('Error calling API:'); | ||
console.log(JSON.stringify(resp, null, 2)); | ||
} else if (resp.error) { | ||
// The API executed, but the script returned an error. | ||
|
||
// Extract the first (and only) set of error details. | ||
// The values of this object are the script's 'errorMessage' and | ||
// 'errorType', and an array of stack trace elements. | ||
var error = resp.error.details[0]; | ||
console.log('Script error message: ' + error.errorMessage); | ||
|
||
if (error.scriptStackTraceElements) { | ||
// There may not be a stacktrace if the script didn't start | ||
// executing. | ||
console.log('Script error stacktrace:'); | ||
for (var i = 0; i < error.scriptStackTraceElements.length; i++) { | ||
var trace = error.scriptStackTraceElements[i]; | ||
console.log('\t' + trace.function + ':' + trace.lineNumber); | ||
} | ||
} | ||
} else { | ||
// The structure of the result will depend upon what the Apps | ||
// Script function returns. Here, the function returns an Apps | ||
// Script Object with String keys and values, and so the result | ||
// is treated as a JavaScript object (folderSet). | ||
var succeeded = resp.response; | ||
if (succeeded) { | ||
alert('Change succeeded!'); | ||
} else { | ||
console.log('Change failed'); | ||
} | ||
} | ||
}); | ||
} | ||
</script> | ||
<script src="https://apis.google.com/js/client.js?onload=checkAuth"> | ||
</script> | ||
|
||
<style type="text/css"> | ||
@import url(http://fonts.googleapis.com/earlyaccess/alefhebrew.css); | ||
body{ | ||
font-family: "Alef Hebrew", | ||
“Helvetica Neue”, | ||
Helvetica, | ||
Arial, | ||
sans-serif; | ||
} | ||
td{ | ||
direction: rtl; | ||
font-size: 1.2em; | ||
} | ||
</style> | ||
</head> | ||
<body> | ||
|
||
<div class="container-fluid"> | ||
<div class="row"> | ||
<div class="col-md-12"> | ||
<table class="table table-bordered" > | ||
<thead> | ||
<tr> | ||
<th> | ||
# | ||
</th> | ||
<th> | ||
Bill name | ||
</th> | ||
<th> | ||
OpenKnesset url | ||
</th> | ||
<th> | ||
Match ratio | ||
</th> | ||
<th> | ||
Fill in spreadsheet? | ||
</th> | ||
</tr> | ||
</thead> | ||
<tbody> | ||
{% for bill in bills %} | ||
<tr {{ 'data-bad-match class="danger"' if bill.match_ratio < 0.5 }}> | ||
<td class="id"> | ||
{{ bill.id }} | ||
</td> | ||
<td class="name"> | ||
{{ bill.name }} | ||
</td> | ||
<td class="oknesset_url"> | ||
<a href="{{ bill.oknesset_url }}">Oknesset</a> | ||
</td> | ||
<td class="ratio"> | ||
{{ "{:0.3f}".format(bill.match_ratio) }} | ||
</td> | ||
<td> | ||
<input type="checkbox" onclick="highlightRow(this)" value="" autocomplete="off"> | ||
</td> | ||
</tr> | ||
{% endfor %} | ||
</tbody> | ||
</table> | ||
</div> | ||
</div> | ||
<div class="row"> | ||
<div class="col-md-12"> | ||
<button type="button" class="btn btn-primary btn-block btn-lg" onclick="handleAuthClick()">Submit</button> | ||
</div> | ||
</div> | ||
</div> | ||
</body> | ||
</html> |
5 changes: 5 additions & 0 deletions
5
committeeVotes/management/commands/bills_urls_resources/bootstrap.min.css
Large diffs are not rendered by default.
Oops, something went wrong.