forked from openstate/open-raadsinformatie
-
Notifications
You must be signed in to change notification settings - Fork 0
/
reclassify_events.py
executable file
·103 lines (82 loc) · 3.26 KB
/
reclassify_events.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env python
import sys
import os
import re
import json
from pprint import pprint
from collections import OrderedDict
from optparse import OptionParser
import redis
import requests
from elasticsearch import helpers as es_helpers
from elasticsearch.exceptions import RequestError
import ocd_backend
from ocd_backend.es import elasticsearch as es
from ocd_backend.utils.misc import reindex
def transform_to_same(h):
print "Transforming item %s ..." % (h['_id'],)
#pprint(h)
return h
def transform_to_old(h):
if h['_type'] == u'events' and h['_source']['classification'] == u'Meetingitem':
h['_source']['classification'] = u'Meeting Item'
return h
def transform_to_new(h):
if h['_type'] != u'events':
return h
# This needs to be translated
if h['_source']['classification'] == u'Meeting Item':
h['_source']['classification'] = u'Agendapunt'
if h['_source']['classification'] == u'Meetingitem':
h['_source']['classification'] = u'Agendapunt'
if h['_source']['classification'] == u'Meeting':
h['_source']['classification'] = u'Agenda'
if h['_source'].has_key('source_data'):
sd = h['_source']['source_data']
else:
try:
doc = es.get(index=u'ori_%s' % (h['_source']['meta']['collection'],),
doc_type=h['_type'], id=h['_id'], _source_include=['*'])
sd = doc['_source']['source_data']
h['_source']['source_data'] = sd
except Exception as e:
sd = {}
if h['_source']['classification'] != u'Report':
return h
if sd.has_key('content_type') and sd[u'content_type'] == u'application/json':
# FIXME: this is mainly for iBabs, but what about GemeenteOplossingen?
data = json.loads(sd['data'])
if data.has_key('_ReportName'):
h['_source']['classification'] = unicode(
data['_ReportName'].split(r'\s+')[0])
elif h['_source']['classification'] == u'Report':
h['_source']['classification'] = u'Verslag'
elif h['_source']['classification'] == u'Resolution':
h['_source']['classification'] = u'Besluitenlijst'
return h
def run(argv):
parser = OptionParser()
parser.add_option("-a", "--action", dest="action", default="same",
help="perform ACTION", metavar="ACTION")
parser.add_option("-i", "--index", dest="index", default='ori_heerde',
help="read from INDEX", metavar="INDEX")
parser.add_option("-o", "--output", dest="output", default='ori_heerde_goed',
help="read from FILE", metavar="FILE")
parser.add_option("-q", "--quiet",
action="store_false", dest="verbose", default=True,
help="don't print status messages to stdout")
(options, args) = parser.parse_args()
#reindex(es, options.index, options.output, transformation_callable=dummy_transform)
func = None
try:
func = globals()["transform_to_%s" % (options.action,)]
except KeyError as e:
pass
if func is None:
return 1
if not callable(func):
return 2
reindex(es, options.index, options.output, transformation_callable=func)
return 0
if __name__ == '__main__':
sys.exit(run(sys.argv))