forked from osm-fr/osmose-backend
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Analyser_Merge_Geocode_Addok_CSV.py
69 lines (59 loc) · 3.01 KB
/
Analyser_Merge_Geocode_Addok_CSV.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env python
#-*- coding: utf-8 -*-
###########################################################################
## ##
## Copyrights Frédéric Rodrigo 2019 ##
## ##
## This program is free software: you can redistribute it and/or modify ##
## it under the terms of the GNU General Public License as published by ##
## the Free Software Foundation, either version 3 of the License, or ##
## (at your option) any later version. ##
## ##
## This program is distributed in the hope that it will be useful, ##
## but WITHOUT ANY WARRANTY; without even the implied warranty of ##
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ##
## GNU General Public License for more details. ##
## ##
## You should have received a copy of the GNU General Public License ##
## along with this program. If not, see <http://www.gnu.org/licenses/>. ##
## ##
###########################################################################
from .Analyser_Merge import Source
from modules import downloader
class Geocode_Addok_CSV(Source):
def __init__(self, source, columns, logger, citycode = None, delimiter = ',', encoding = 'utf-8'):
self.source = source
self.columns = columns
self.citycode = citycode
self.delimiter = delimiter
self.encoding = encoding
self.logger = logger
def __getattr__(self, name):
return getattr(self.source, name)
def open(self):
return open(downloader.update_cache('geocoded://' + self.source.fileUrl, 60, fetch=self.fetch))
def fetch(self, url, tmp_file, date_string=None):
service = u'https://api-adresse.data.gouv.fr/search/csv/'
outfile = open(tmp_file, 'w', encoding='utf-8')
content = self.source.open().readlines()
header = content[0:1]
step = 2000
slices = int((len(content)-1) / step) + 1
for i in range(0, slices):
self.logger.log("Geocode slice {0}/{1}".format(i, slices))
slice = ''.join(header + content[1 + step*i : 1 + step*(i+1)]) # noqa
r = downloader.requests_retry_session().post(url=service, data={
'delimiter': self.delimiter,
'encoding': self.encoding,
'columns': self.columns,
'citycode': self.citycode,
}, files={
'data': slice,
})
r.raise_for_status()
if i == 0:
text = '\n'.join(r.text.split('\n')[0:])
else:
text = '\n'.join(r.text.split('\n')[1:])
outfile.write(text)
return True