Skip to content

Commit

Permalink
1.3.2, also another profile
Browse files Browse the repository at this point in the history
  • Loading branch information
Zverik committed Apr 19, 2018
1 parent 284f952 commit 5b7d2c4
Show file tree
Hide file tree
Showing 4 changed files with 172 additions and 26 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@

## master branch

## 1.3.2

_Released 2018-04-19_

* Fixed bug in categories building.
* Fixed threshold for tags in duplicates check.
* Now the script prints "Done" when finished, to better measure time.

## 1.3.1

_Released 2018-03-20_
Expand Down
57 changes: 32 additions & 25 deletions conflate/conflate.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,15 @@ def construct_overpass_query(self, bboxes=None):
for t in ('node', 'way', 'relation["type"="multipolygon"]'):
query += t + tag_str + bbox_str + ';'
if self.ref is not None:
for t in ('node', 'way', 'relation'):
query += t + '["' + self.ref + '"];'
if not self.profile.get('bounded_update', False):
for t in ('node', 'way', 'relation'):
query += t + '["' + self.ref + '"];'
else:
for bbox in bboxes:
bbox_str = '' if bbox is None else '(' + ','.join(
[str(x) for x in bbox]) + ')'
for t in ('node', 'way', 'relation'):
query += t + '["' + self.ref + '"]' + bbox_str + ';'
query += '); out meta qt center;'
return query

Expand Down Expand Up @@ -368,30 +375,28 @@ def get_categories(self, tags):
def match_query(tags, query):
for tag in query:
if len(tag) == 1:
if tag[0] in tags:
return tag[0] in tags
else:
value = tags.get(tag[0], None)
if tag[1] is None or tag[1] == '':
return value is None
if value is None:
return False
elif tag[1] is None or tag[1] == '':
if tag[0] not in tags:
return False
else:
value = tags.get(tag[0], None)
if value is None:
return False
found = False
for t2 in tag[1:]:
if t2[0] == '~':
m = re.search(t2[1:], value)
if not m:
return False
elif t2[0] == '!':
if t2[1:].lower() in value.lower():
found = True
elif t2 == value:
found = False
for t2 in tag[1:]:
if t2[0] == '~':
m = re.search(t2[1:], value)
if not m:
return False
elif t2[0] == '!':
if t2[1:].lower() in value.lower():
found = True
if found:
break
if not found:
return False
elif t2 == value:
found = True
if found:
break
if not found:
return False
return True

def tags_to_query(tags):
Expand Down Expand Up @@ -1094,7 +1099,7 @@ def check_dataset_for_duplicates(profile, dataset, print_all=False):
for k in diff_tags:
if alt.data.tags.get(k) != d.tags.get(k):
tags_differ += 1
if tags_differ <= max(1, len(diff_tags) / 3):
if tags_differ <= len(diff_tags) / 3:
duplicates.add(alt.data.id)
d.exclusive_group = group
alt.data.exclusive_group = group
Expand Down Expand Up @@ -1255,6 +1260,8 @@ def run(profile=None):
fc = {'type': 'FeatureCollection', 'features': conflator.changes}
json.dump(fc, options.changes, ensure_ascii=False, sort_keys=True, indent=1)

logging.info('Done')


if __name__ == '__main__':
run()
2 changes: 1 addition & 1 deletion conflate/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.3.1'
__version__ = '1.3.2'
131 changes: 131 additions & 0 deletions profiles/moscow_addr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import json
import logging

source = 'dit.mos.ru'
no_dataset_id = True
query = [[('addr:housenumber',)], [('building',)]]
max_distance = 50
max_request_boxes = 2
master_tags = ('addr:housenumber', 'addr:street')

COMPLEX = False
ADMS = {
'1': 'Северо-Западный административный округ',
'2': 'Северный административный округ',
'3': 'Северо-Восточный административный округ',
'4': 'Западный административный округ',
'5': 'Центральный административный округ',
'6': 'Восточный административный округ',
'7': 'Юго-Западный административный округ',
'8': 'Южный административный округ',
'9': 'Юго-Восточный административный округ',
'10': 'Зеленоградский административный округ',
'11': 'Троицкий административный округ',
'12': 'Новомосковский административный округ',
}
ADM = ADMS['2']
if param:
if param[0] == 'c':
COMPLEX = True
param = param[1:]
if param in ADMS:
ADM = ADMS[param]


def dataset(fileobj):
def find_center(geodata):
if not geodata:
return None
if 'center' in geodata:
return geodata['center'][0]
if 'coordinates' in geodata:
typ = geodata['type']
lonlat = [0, 0]
cnt = 0
if typ == 'Polygon':
for p in geodata['coordinates'][0]:
lonlat[0] += p[0]
lonlat[1] += p[1]
cnt += 1
elif typ == 'LineString':
for p in geodata['coordinates']:
lonlat[0] += p[0]
lonlat[1] += p[1]
cnt += 1
elif typ == 'Point':
p = geodata['coordinates']
lonlat[0] += p[0]
lonlat[1] += p[1]
cnt += 1
if cnt > 0:
return [lonlat[0]/cnt, lonlat[1]/cnt]
return None

global COMPLEX, ADM
import zipfile
zf = zipfile.ZipFile(fileobj)
data = []
no_geodata = 0
no_addr = 0
count = 0
for zname in zf.namelist():
source = json.loads(zf.read(zname).decode('cp1251'))
for el in source:
gid = el['global_id']
try:
adm_area = el['ADM_AREA']
if adm_area != ADM:
continue
count += 1
lonlat = find_center(el.get('geoData'))
if not lonlat:
no_geodata += 1
street = el.get('P7')
house = el.get('L1_VALUE')
htype = el.get('L1_TYPE')
corpus = el.get('L2_VALUE')
ctype = el.get('L2_TYPE')
stroenie = el.get('L3_VALUE')
stype = el.get('L3_TYPE')
if not street or not house:
no_addr += 1
continue
if not lonlat:
continue
is_complex = False
housenumber = house.replace(' ', '')
if htype != 'дом':
is_complex = True
if htype in ('владение', 'домовладение'):
housenumber = 'вл' + housenumber
else:
logging.warn('Unknown house number type: %s', htype)
continue
if corpus:
if ctype == 'корпус':
housenumber += ' к{}'.format(corpus)
else:
logging.warn('Unknown corpus type: %s', ctype)
continue
if stroenie:
is_complex = True
if stype == 'строение' or stype == 'сооружение':
housenumber += ' с{}'.format(stroenie)
else:
logging.warn('Unknown stroenie type: %s', stype)
continue
if is_complex != COMPLEX:
continue
tags = {
'addr:street': street,
'addr:housenumber': housenumber,
}
data.append(SourcePoint(gid, lonlat[1], lonlat[0], tags))
except Exception as e:
logging.warning('PROFILE: Failed to get attributes for address %s: %s', gid, str(e))
logging.warning(json.dumps(el, ensure_ascii=False))

if no_addr + no_geodata > 0:
logging.warning('%.2f%% of data have no centers, and %.2f%% have no streets or house numbers',
100*no_geodata/count, 100*no_addr/count)
return data

0 comments on commit 5b7d2c4

Please sign in to comment.