Skip to content

Commit

Permalink
Add match bills to OKnesset urls management command
Browse files Browse the repository at this point in the history
  • Loading branch information
kobigro committed Dec 7, 2015
1 parent f9d55ab commit 067fb1b
Show file tree
Hide file tree
Showing 4 changed files with 376 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@
db.sqlite3
*.csv
*.json
/committeeVotes/management/commands/bills_urls_resources/bills.html
167 changes: 167 additions & 0 deletions committeeVotes/management/commands/bills_urls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
# coding: utf-8
import requests
import json
import Levenshtein
from collections import namedtuple
from oauth2client.client import SignedJwtAssertionCredentials
import gspread
from jinja2 import Environment, FileSystemLoader
import datetime
import SimpleHTTPServer
import SocketServer
from django.core.management import BaseCommand
from optparse import make_option
import webbrowser
import os
import threading

OKNESSET_BILL_API_URL = 'https://oknesset.org/api/v2/bill/?order_by=-stage_date&limit=1000'

OCOMMITTEE_SPREADSHEET_NAME = u"הצעות חוק 2015"

MEETING_DATE_COLUMN_INDEX = 1

DATE_FORMAT = "%m/%d/%Y"

RESOURCES_PATH = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "bills_urls_resources")

CREDENTAIALS_FILE = RESOURCES_PATH + "/credentials.json"

EXECUTION_CREDENTIALS_FILE = RESOURCES_PATH + "/execute_api_credentials.json"

STOP_WORDS = [u"חוק", u"תיקון", u"הצעת", u"רציפות"]

TEMPLATE_FILENAME = "bills_template.html"

RENDERED_OUTPUT_PATH = RESOURCES_PATH + "/bills.html"

Bill = namedtuple("Bill", "id name oknesset_url match_ratio")


def get_all_oknesset_bills():
bills = []
current_bills, next_bills_api_url = get_next_oknesset_bills(
OKNESSET_BILL_API_URL)
bills.extend(current_bills)
while next_bills_api_url is not None:
current_bills, next_bills_api_url = get_next_oknesset_bills(
"https://www.oknesset.org{0}".format(next_bills_api_url))
bills.extend(bills)
return bills


def get_next_oknesset_bills(bills_url):
bills_json = json.loads(requests.get(bills_url).text)
if "objects" in bills_json:
return bills_json["objects"], bills_json["meta"]["next"]
return [], None


def get_bill_url(oknesset_bills, bill_name):
max_ratio = 0.0
result = ""
matched_bill = None
for bill in oknesset_bills:
ratio = Levenshtein.ratio(strip_stop_words(
bill["full_title"]), strip_stop_words(bill_name))
if ratio > max_ratio:
max_ratio = ratio
matched_bill = bill
if matched_bill is not None:
return "http://www.oknesset.org{0}".format(matched_bill["absolute_url"]), max_ratio
return None, None


def strip_stop_words(bill_title):
for word in STOP_WORDS:
bill_title = bill_title.replace(word, u"")
return bill_title


def get_commitee_bills_since(since_date=None):
json_key = json.load(open(CREDENTAIALS_FILE))
scope = ['https://spreadsheets.google.com/feeds']
credentials = SignedJwtAssertionCredentials(json_key['client_email'], json_key[
'private_key'].encode(), scope)
spreadsheet = gspread.authorize(
credentials).open(OCOMMITTEE_SPREADSHEET_NAME)
worksheet = spreadsheet.worksheet("bills")
cell_range = "{0}:{1}{2}".format("A2", "B", worksheet.row_count)
reversed_cells = worksheet.range(cell_range)[::-1]
matched_cells = []
for idx, cell in enumerate(reversed_cells):
if cell.col == MEETING_DATE_COLUMN_INDEX:
if len(cell.value) > 0:
bill_date = datetime.datetime.strptime(
cell.value, DATE_FORMAT).date()
if since_date <= bill_date <= datetime.date.today():
# Cell in previous column - thus the name column
name_cell = reversed_cells[idx - 1]
matched_cells.append((name_cell.row, name_cell.value))
return matched_cells


def get_matched_bills(commitee_bills, oknesset_bills):
matched_bills = []
for bill_id, bill_name in commitee_bills:
url, ratio = get_bill_url(oknesset_bills, bill_name)
if url is not None:
matched_bills.append(Bill(bill_id, bill_name, url, ratio))
return matched_bills


def get_execution_api_credentials():
return json.load(open(EXECUTION_CREDENTIALS_FILE))


def render_bills(bills):
template_env = Environment(loader=FileSystemLoader(RESOURCES_PATH))
output_html = template_env.get_template(
TEMPLATE_FILENAME).render(bills=bills, credentials=get_execution_api_credentials())
with open(RENDERED_OUTPUT_PATH, "w") as f:
f.write(output_html.encode("utf8"))


def serve_bills_html(port):
# Move to commands directory.
os.chdir(RESOURCES_PATH)
Handler = SimpleHTTPServer.SimpleHTTPRequestHandler
httpd = SocketServer.TCPServer(("localhost", port), Handler)
threading.Thread(target = lambda: httpd.serve_forever()).start()
webbrowser.open("http://127.0.0.1:8080/bills.html")


class Command(BaseCommand):
help = 'Serve an html for manually matching a bill to its url in oknesset.'

def parse_date(option, opt_str, value, parser):
try:
setattr(parser.values, option.dest,
datetime.datetime.strptime(value, DATE_FORMAT).date())
except ValueError:
return

option_list = BaseCommand.option_list + (
make_option('-d', '--date', dest="date", action="callback", type=str, callback=parse_date,
help="Show bills for commitee meetings made since this date in format m/d/y. Defaults to a week ago."),
make_option('-p', '--port', nargs='?', default=8080, type=int,
help="The port to serve the bills html on. Defaults to 8080")
)

def handle(self, *args, **options):
commitee_bills = get_commitee_bills_since(options['date'])
if len(commitee_bills) == 0:
print "No commitee bills available."
return
print "There are {0} new commitee bills".format(len(commitee_bills))
oknesset_bills = get_all_oknesset_bills()
if len(oknesset_bills) == 0:
print "No oknesset bills available. perhaps the API is down?"
return
print "There are {0} OKnesset bills".format(len(oknesset_bills))
matched_bills = get_matched_bills(commitee_bills, oknesset_bills)
render_bills(matched_bills)
port = options['port']
print "Serving bills.html on {0}:{1}".format("localhost", port)
serve_bills_html(port)
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
<!DOCTYPE html>
<html lang="en" style="height: 100%">
<head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">

<title> OpenComittee bill upload </title>

<link href="bootstrap.min.css" rel="stylesheet">
<script type="text/javascript">
function highlightRow(checkbox){
var row = checkbox.parentElement.parentElement;
// toggle row class.

if(checkbox.checked)
row.className = "success";
else {
if(row.hasAttribute("data-bad-match"))
row.className = "danger";
else
row.className = "";

}
}
</script>

<script type="text/javascript">
var CLIENT_ID={{ "\"" + credentials['client_id'] + "\""}};
var SCOPES = ['https://www.googleapis.com/auth/script.storage','https://www.googleapis.com/auth/spreadsheets'];
var loaded = false;
function checkAuth() {
console.log("checkAuth");
gapi.auth.authorize(
{
'client_id': CLIENT_ID,
'scope': SCOPES.join(' '),
'immediate': true
});
}

function handleAuthResult(authResult) {
if (authResult && !authResult.error) {
// Hide auth UI, then load client library.
var selectedRows = document.querySelectorAll('tr.success');
var rowElement;
var rowToValueMaps = [];
for(var i=0;i<selectedRows.length;i++){
rowElement = selectedRows[i];
console.dir(rowElement.childNodes)
console.dir(rowElement.childNodes[1]);
console.dir(rowElement.childNodes[1].textContent);
rowToValueMaps.push({
row: parseInt(rowElement.childNodes[1].textContent, 10),
value: rowElement.childNodes[5].childNodes[1].href
});
}
updateBills(rowToValueMaps, "oknesset_url");
} else {
// Show auth UI, allowing the user to initiate authorization by
// clicking authorize button.
console.log(authResult);
}
}

function handleAuthClick(){
gapi.auth.authorize(
{client_id: CLIENT_ID, scope: SCOPES, immediate: false},
handleAuthResult);
return false;
}

function updateBills(updatedRows,columnToUpdate) {
var scriptId = {{ "\"" + credentials['script_id'] + "\"" }};

// Create an execution request object.
var request = {
'function': 'updateRows',
'parameters':[updatedRows, "oknesset_url"]
};

// Make the API request.
var op = gapi.client.request({
'root': 'https://script.googleapis.com',
'path': 'v1/scripts/' + scriptId + ':run',
'method': 'POST',
'body': request
});

op.execute(function(resp) {
if (resp.error && resp.error.status) {
// The API encountered a problem before the script
// started executing.
console.log('Error calling API:');
console.log(JSON.stringify(resp, null, 2));
} else if (resp.error) {
// The API executed, but the script returned an error.

// Extract the first (and only) set of error details.
// The values of this object are the script's 'errorMessage' and
// 'errorType', and an array of stack trace elements.
var error = resp.error.details[0];
console.log('Script error message: ' + error.errorMessage);

if (error.scriptStackTraceElements) {
// There may not be a stacktrace if the script didn't start
// executing.
console.log('Script error stacktrace:');
for (var i = 0; i < error.scriptStackTraceElements.length; i++) {
var trace = error.scriptStackTraceElements[i];
console.log('\t' + trace.function + ':' + trace.lineNumber);
}
}
} else {
// The structure of the result will depend upon what the Apps
// Script function returns. Here, the function returns an Apps
// Script Object with String keys and values, and so the result
// is treated as a JavaScript object (folderSet).
var succeeded = resp.response;
if (succeeded) {
alert('Change succeeded!');
} else {
console.log('Change failed');
}
}
});
}
</script>
<script src="https://apis.google.com/js/client.js?onload=checkAuth">
</script>

<style type="text/css">
@import url(http://fonts.googleapis.com/earlyaccess/alefhebrew.css);
body{
font-family: "Alef Hebrew",
“Helvetica Neue”,
Helvetica,
Arial,
sans-serif;
}
td{
direction: rtl;
font-size: 1.2em;
}
</style>
</head>
<body>

<div class="container-fluid">
<div class="row">
<div class="col-md-12">
<table class="table table-bordered" >
<thead>
<tr>
<th>
#
</th>
<th>
Bill name
</th>
<th>
OpenKnesset url
</th>
<th>
Match ratio
</th>
<th>
Fill in spreadsheet?
</th>
</tr>
</thead>
<tbody>
{% for bill in bills %}
<tr {{ 'data-bad-match class="danger"' if bill.match_ratio < 0.5 }}>
<td class="id">
{{ bill.id }}
</td>
<td class="name">
{{ bill.name }}
</td>
<td class="oknesset_url">
<a href="{{ bill.oknesset_url }}">Oknesset</a>
</td>
<td class="ratio">
{{ "{:0.3f}".format(bill.match_ratio) }}
</td>
<td>
<input type="checkbox" onclick="highlightRow(this)" value="" autocomplete="off">
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
<div class="row">
<div class="col-md-12">
<button type="button" class="btn btn-primary btn-block btn-lg" onclick="handleAuthClick()">Submit</button>
</div>
</div>
</div>
</body>
</html>

Large diffs are not rendered by default.

0 comments on commit 067fb1b

Please sign in to comment.