Skip to content

Commit

Permalink
[datasets#29,refactor][l]: rework data schema as per datasets#29.
Browse files Browse the repository at this point in the history
* datapackage.json: new schema with descriptions
* data: update all data in line with new schema (this should be lossless)
* scripts: conversion script
* app: minor update to app and templates for new schema
  • Loading branch information
rufuspollock committed Oct 6, 2013
1 parent 777657d commit a642827
Show file tree
Hide file tree
Showing 14 changed files with 13,417 additions and 13,392 deletions.
508 changes: 254 additions & 254 deletions data/br.csv

Large diffs are not rendered by default.

5,572 changes: 2,786 additions & 2,786 deletions data/ch.csv

Large diffs are not rendered by default.

5,178 changes: 2,589 additions & 2,589 deletions data/de.csv

Large diffs are not rendered by default.

260 changes: 130 additions & 130 deletions data/eu.csv

Large diffs are not rendered by default.

10,726 changes: 5,363 additions & 5,363 deletions data/gb.csv

Large diffs are not rendered by default.

2,359 changes: 1,164 additions & 1,195 deletions data/gr.csv

Large diffs are not rendered by default.

1,016 changes: 508 additions & 508 deletions data/nz.csv

Large diffs are not rendered by default.

920 changes: 460 additions & 460 deletions data/us.csv

Large diffs are not rendered by default.

172 changes: 86 additions & 86 deletions datapackage.json
Original file line number Diff line number Diff line change
@@ -1,104 +1,104 @@
{
"name": "public-bodies",
"name": "public-bodies",
"license": [
{
"url": "http://opendatacommons.org/licenses/pddl/1.0/",
"url": "http://opendatacommons.org/licenses/pddl/1.0/",
"name": "Public Domain Dedication and License"
}
],
],
"resources": [
{
"schema": {
"fields": [
{
"type": "string",
"description": "Full name of the Body",
"id": "title"
},
"id": "id",
"type": "string",
"primarykey": true,
"description": "Unique key/id for the Body. Should be of form {jurisdiction-code}/{unique-id-for-body-within-jurisdiction} where jurisdiction-code is a per jurisdiction_code field"
},
{
"id": "name",
"type": "string",
"description": "Standard name of the Body"
},
{
"id": "abbreviation",
"type": "string",
"description": "Abbreviation for the body (if any)."
},
{
"id": "other_names",
"type": "string",
"description": "Other, alternate, names for this Body. If there is more than one separate them with semi-colons"
},
{
"id": "description",
"type": "string",
"description": "Description of the Body"
},
{
"id": "classification",
"type": "string",
"description": "Category of Body. There should only be one classification per body."
},
{
"id": "parent_id",
"type": "string",
"description": "If the Body has a parent body this should be the title for that parent body"
},
{
"type": "string",
"description": "Abbreviation for the body (if any).",
"id": "abbr"
},
"id": "founding_date",
"type": "date",
"description": "IS0 8601 Date this body was founded / created"
},
{
"type": "string",
"description": "Unique key/id for the Body. Should be of form {jurisdiction-code}/{unique-id-for-body-within-jurisdiction} where jurisdiction-code is a per jurisdiction_code field",
"id": "key"
},
"id": "dissolution_date",
"type": "date",
"description": "ISO 8601 Date this body was dissolved"
},
{
"type": "string",
"description": "Category of Body",
"id": "category"
},
"id": "image",
"type": "string",
"format": "url",
"description": "URL of an image for this Body"
},
{
"id": "url",
"type": "string",
"format": "url",
"description": "URL for the the Body"
},
{
"id": "jurisdiction_code",
"type": "string",
"description": "Short 2-digit code for the Body. Use 2-digit iso-code in case where jurisdiction is a country"
},
{
"id": "email",
"type": "string",
"format": "email",
"description": "Contact email for the Body"
},
{
"id": "address",
"type": "string",
"description": "Official address of the Body"
},
{
"id": "contact",
"type": "string",
"description": "Address for correspondence if different from official address"
},
{
"id": "tags",
"type": "string",
"description": "Free text tags, space separated"
},
{
"id": "source_url",
"type": "string",
"description": "If the Body has a parent body this should be the title for that parent body",
"id": "parent"
},
{
"type": "string",
"description": "If the Body has a parent body this should be the title for that parent body",
"id": "parent_key"
},
{
"type": "string",
"description": "Description of the Body",
"id": "description"
},
{
"type": "string",
"description": "URL for the the Body",
"id": "url"
},
{
"type": "string",
"description": "Full name of the jurisdiction in which this Body exists e.g. Germany or United States",
"id": "jurisdiction"
},
{
"type": "string",
"description": "Short 2-digit code for the Body. Use 2-digit iso-code in case where jurisdiction is a country",
"id": "jurisdiction_code"
},
{
"type": "string",
"description": "Human name for source e.g. Parliament Website",
"id": "source"
},
{
"type": "string",
"description": "Source URL",
"id": "source_url"
},
{
"type": "string",
"description": "Official physical address of the Body",
"id": "address"
},
{
"type": "string",
"description": "Address for correspondence if different from official address",
"id": "contact"
},
{
"type": "string",
"description": "Contact email for the Body",
"id": "email"
},
{
"type": "string",
"description": "Free text tags, space separated",
"id": "tags"
},
{
"type": "string",
"description": "Date this entry created at (NOT date Body created at). Likely to be deprecated",
"id": "created_at"
},
{
"type": "string",
"description": "Date this entry updated at. Likely to be deprecated",
"id": "updated_at"
"format": "url",
"description": "Source URL for this specific record. Please point to a specific webpage or API not just the base website or API (it is of little value if this attribute is the the same API endpoint for every record (in such circumstances it would be better to put something in the README instead)."
}
]
}
Expand Down
16 changes: 11 additions & 5 deletions lib/db.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,17 @@ function processFile(file, callback) {
console.log("Processing %s...", file);
csv().from(path.resolve('data', file), {
columns: true
}).on('record', function(record) {
var key = record.key;
tokens = key.split('/');

record.jurisdiction_code = tokens[0];
}).on('record', function(record, idx) {
var key = record.id;
tokens = key.split('/')
jurisdiction = tokens[0];
;

if (!jurisdiction) {
// console.log(file, idx, key);
return;
}
record.jurisdiction_code = jurisdiction;
record.slug = tokens[1];
record.type = 'body';
db.set(key, record);
Expand Down
4 changes: 2 additions & 2 deletions routes/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ exports.body = function(req, res) {

body = db.get(jurisdiction + '/' + key);

if (body.parent_key) {
body.parentBody = db.get(body.parent_key);
if (body.parent_id) {
body.parentBody = db.get(body.parent_id);
}

body.pageTitle = body.title + '/' + body.jurisdiction
Expand Down
60 changes: 55 additions & 5 deletions scripts/process.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,40 @@
import csv
import sys
import os
import json
import datetime

# migrate data as per https://github.com/okfn/publicbodies/issues/29
def migrate29(path):
fo = open(path)
reader = csv.DictReader(fo)
fields = reader.fieldnames
schema = json.load(open('datapackage.json'))['resources'][0]['schema']
newfields = [ f['id'] for f in schema['fields'] ]
mapfields = {
'title': 'name',
'abbr': 'abbreviation',
'key': 'id',
'category': 'classification',
'parent_key': 'parent_id',
}
def migraterow(row):
for key in mapfields:
outkey = mapfields[key]
row[outkey] = row[key]
for key in row.keys():
if key not in newfields:
del row[key]
return row

newrows = [ migraterow(row) for row in reader ]
fo.close()
writer = csv.DictWriter(open(path, 'w'), newfields, lineterminator='\n')
writer.writeheader()
writer.writerows(newrows)

## Older

def normalize(path):
fo = open(path)
reader = csv.DictReader(fo)
Expand Down Expand Up @@ -68,10 +100,28 @@ def strip_accents(s):
return out.encode('utf8')

if __name__ == '__main__':
if len(sys.argv) >= 2:
path = sys.argv[1]
normalize(path)
usage = 'process.py {action} ...'
if not len(sys.argv) > 1:
print(usage)
sys.exit(0)

jurisdictions = ['br', 'ch', 'de', 'eu', 'gb', 'gr', 'nz', 'us']

action = sys.argv[1]
if action == 'migrate29':
if len(sys.argv) > 2:
migrate29(sys.argv[2])
else:
for j in jurisdictions:
print('Processing %s' % j)
migrate29(os.path.join('data', '%s.csv' % j))
elif action == 'normalize':
if len(sys.argv) > 2:
path = sys.argv[2]
normalize(path)
else:
for c in ['de', 'eu', 'gb']:
normalize(os.path.join('data', c + '.csv'))
else:
for c in ['de', 'eu', 'gb']:
normalize(os.path.join('data', c + '.csv'))
print(usage)

16 changes: 8 additions & 8 deletions views/body.jade
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,29 @@ block content
| #{jurisdiction}
.view
h2
=title
- if (abbr)
=name
- if (abbreviation)
|
small.abbr=abbr
small.abbr=abbreviation
.attributes.dl-horizontal
dt Home page
dd
a(href="#{url}")=title
a(href="#{url}")=name
dt E-Mail
dd=email
dt Address
dd
- if (address)
address
pre=address
dt Category/Type
dd=category
dt Parent category
dt Classification
dd=classification
dt Parent
dd=parent
- if (parentBody)
dt Parent Body
dd
a(href="/#{parentBody.key}")=parentBody.title
a(href="/#{parentBody.id}")=parentBody.name
dt Jurisdiction
dd #{jurisdiction} (#{jurisdiction_code})
dt Tags
Expand Down
2 changes: 1 addition & 1 deletion views/jurisdiction.jade
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ block content
- each body in bodies
tr
td
a(href="/#{body.key}") #{body.title}
a(href="/#{body.id}") #{body.name}

0 comments on commit a642827

Please sign in to comment.