-
Notifications
You must be signed in to change notification settings - Fork 1
/
query.py
160 lines (135 loc) · 6.07 KB
/
query.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import json
import re
from hashlib import sha1
from urllib.parse import parse_qs
from schema import Schema
from util import cached_property, tree
from exceptions import ValidationError
def validate(condition, errmsg):
if not condition:
raise ValidationError(errmsg)
return True
class Argument:
def __init__(self, name, default, choices=[], regex=[], multi=True, single=True):
self.name = name
self.default = default
self.choices = choices
self.regex = regex
self.multi = multi
self.single = single
def clean(self, data):
if self.name not in data or data[self.name][0] == self.default:
return self.default
if self.single:
validate(len(data[self.name]) == 1, 'param `%s` can only be used once in query string')
if not self.multi:
validate(',' not in data[self.name][0], 'param `%s` can not be comma-separated')
for value in data[self.name][0].split(','):
if self.choices and not self.regex:
validate(
value in self.choices, '`%s` is not in allowed choices for param `%s`' %
(value, self.name))
if self.regex:
try:
validate(any(re.match(r, value) for r in self.regex),
'`%s` is not valid for param `%s`' % (value, self.name))
except ValidationError as e:
if self.choices:
validate(value in self.choices,
'`%s` is not in allowed choices for param `%s`' % (value, self.name))
else:
raise e
val = data[self.name]
if self.single and len(val) == 1:
val = val[0]
if self.multi:
val = val.split(',')
if len(val) == 1:
val = val[0]
return val
class DataArgument(Argument):
def clean(self, data):
try: # FIXME
data = super().clean(data)
except ValidationError as e:
if 'comma-separated' in str(e):
pass
else:
raise e
paths = tree()
# FIXME implementation
for statistic in data:
statistic, measure = statistic.split(':', 1)
if '(' in measure:
dimensions = re.search(r'\(([A-Z0-9_,:|]+)\)', measure)
measure = measure.split('(')[0]
if dimensions:
for dimension in dimensions.group(1).split(','):
if ':' in dimension:
dimension, value = dimension.split(':')
paths[statistic][measure][dimension] = value.split('|')
else:
paths[statistic][measure][dimension]
else:
paths[statistic][measure]
# sort stuff for unique identification
paths = {skey: {mkey: {d: sorted(v) for d, v in sorted(m.items())}
for mkey, m in sorted(s.items())} for skey, s in sorted(paths.items())}
return paths
NUM_RE = r'^\d+'
class Query:
# arg_name: (default, choices / validation regex, multi comma-seperated, single [allowed only once in qs])
region = Argument('region', 'all', regex=[NUM_RE], choices=['DG'])
level = Argument('level', '1', choices=['0', '1', '2', '3', '4', 'all'])
parent = Argument('parent', None, regex=[NUM_RE])
time = Argument('time', 'latest', choices=['all'], regex=[
r'^\d{4}$', # 2000
r'^\d{4}:\d{4}', # 2000:2010
r'^:\d{4}$', # :2010
r'^\d{4}:$' # 2010:
])
dformat = Argument('dformat', 'year', choices=['date'])
labels = Argument('labels', 'id', choices=['name', 'both'])
layout = Argument('layout', 'long', choices=['region', 'time'])
format = Argument('format', 'csv', choices=['tsv', 'json'])
delimiter = Argument('delimiter', ',', choices=[';'])
sort = Argument('sort', 'time', choices=['region', 'value', 'measure']) # data sorting
# not implemented:
# order = Argument('order', 'time,region,value,keys',
# choices=['time', 'region', 'value', 'keys', 'meta']) # column order
data = DataArgument('data', {}, regex=[], multi=False, single=False) # FIXME regex
def __init__(self, data):
if isinstance(data, str): # urlquery
self.urlquery = data # FIXME create urlquery from dict when we support query via dict
data = parse_qs(data)
invalid = set(data.keys()) - set([a[0] for a in self.arguments])
if len(invalid):
raise ValidationError('unknown attributes: %s' % ', '.join(invalid))
self._data = data
def __getattr__(self, attr):
return self.cleaned_data.get(attr, self.defaults.get(attr))
def clean(self):
cleaned_arguments = {key: arg.clean(self._data) for key, arg in self.arguments}
if Schema.validate(cleaned_arguments):
return cleaned_arguments
@cached_property
def cleaned_data(self):
return dict(sorted(self.clean().items()))
@cached_property
def data_definition(self):
return {k: v for k, v in self.cleaned_data.items()
if k in ('region', 'level', 'parent', 'time', 'data', 'dformat')}
@cached_property
def key(self):
"""unique identifier for exactly this table with all given specs about format etc"""
return sha1(json.dumps(self.cleaned_data).encode()).hexdigest()
@cached_property
def data_key(self):
"""unique identifier for the exact data used for this table regardless of format/transform options"""
return sha1(json.dumps(self.data_definition).encode()).hexdigest()
@cached_property
def arguments(self):
return [(key, arg) for key, arg in self.__class__.__dict__.items() if isinstance(arg, Argument)]
@cached_property
def schema(self):
return Schema.get_filtered_for_query(self.cleaned_data['data'])