Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changes and upgrades from HDX #28

Open
wants to merge 16 commits into
base: master
Choose a base branch
from
Open
4 changes: 4 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
OCHA Fork
+++++++++
Security protocols at the UN require all services be run on UN servers, so dataproxy had to be modified to run indepedent of Google App Engine.

Data Proxy
++++++++++

Expand Down
Empty file modified dataproxy/__init__.py
100644 → 100755
Empty file.
50 changes: 45 additions & 5 deletions dataproxy/app.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ def __call__(self, environ, start_response):
flow['app']['config'] = AttributeDict()
flow['app']['config']['proxy'] = AttributeDict(max_length=int(self.max_length))
flow['environ'] = environ
if not 'HTTP_COOKIE' in flow.environ:
flow['environ']['HTTP_COOKIE'] = ''
flow['http_response'] = HTTPResponseMarble()
flow.http_response.header_list = [
dict(name='Content-Type', value='application/javascript'),
Expand Down Expand Up @@ -128,7 +130,12 @@ def __call__(self, environ, start_response):
return error(title=title, message=msg)

def index(self, flow):
if not flow.query.has_key('url'):
if not self.from_ckan(flow.environ['HTTP_COOKIE']):
title = 'ckan only'
msg = 'Dataproxy only accepts requests from CKAN installations'
flow.http_response.status = '200 Error %s'%title
flow.http_response.body = error(title=title, message=msg)
elif not flow.query.has_key('url'):
title = 'url query parameter missing'
msg = 'Please read the dataproxy API format documentation: https://github.com/okfn/dataproxy'
flow.http_response.status = '200 Error %s'%title
Expand All @@ -142,6 +149,14 @@ def index(self, flow):
flow.http_response.status = '200 %s %s' % (e.error, e.title)
flow.http_response.body = error(title=e.title, message=e.message)

def from_ckan(self,cookies):
#Checks to see if there's a ckan cookie present
#for c in cookies.split(';'):
# t = c.split('=')
# if t[0].strip().lower() == 'ckan':
# return True
#return False
return True

def proxy_query(self, flow, url, query):
parts = urlparse.urlparse(url)
Expand Down Expand Up @@ -221,15 +236,15 @@ def transform(type_name, flow, url, query, max_results):
if 'encoding' in query:
encoding = query["encoding"].value
if type_name == 'csv':
stream = urllib2.urlopen(url)
stream = create_stream(flow, url)
records, metadata = dataconverters.commas.parse(stream, encoding=encoding,
window=window, guess_types=guess_types)
elif type_name == 'tsv':
stream = urllib2.urlopen(url)
stream = create_stream(flow, url)
records, metadata = dataconverters.commas.parse(stream, delimiter='\t',
encoding=encoding, window=window, guess_types=guess_types)
elif type_name == 'xls' or type_name == 'xlsx':
stream = urllib2.urlopen(url)
stream = create_stream(flow, url)
length = int(stream.headers.get('content-length', 0))
# max_length = flow.app.config.proxy.max_length
max_length = 5000000 # ~ 5Mb
Expand All @@ -243,12 +258,37 @@ def transform(type_name, flow, url, query, max_results):
else:
sheet_number = 1
records, metadata = dataconverters.xls.parse(stream,
excel_type=type_name, worksheet=sheet_number,
excel_type=type_name,
guess_types=guess_types)
else:
raise Exception("Resource type not supported '%s'" % type_name)
return (records, metadata)

def from_hdx(url):
url = urlparse.urlparse(url)
url = url.netloc.split(':')
if url[0][-14:] == 'hdx.rwlabs.org':
return True
else:
return False

def create_stream(flow, url):
hdx = from_hdx(url)
if hdx:
request = urllib2.Request(url, headers={"Cookie" : flow.environ['HTTP_COOKIE']})
try:
stream = urllib2.urlopen(request)
except urllib2.HTTPError, err:
if err.code == 403:
raise Exception("You do not have permission to access this file.")
else:
if hdx:
raise Exception("File could not be fetched from filestore.")
else:
raise Exception("File could not be fetched.")
else:
stream = urllib2.urlopen(url)
return stream

if __name__ == '__main__':
from wsgiref.util import setup_testing_defaults
Expand Down
Empty file modified dataproxy/app.yaml
100644 → 100755
Empty file.
Empty file modified dataproxy/bn/__init__.py
100644 → 100755
Empty file.
Empty file modified dataproxy/bn/relimport_test.py
100644 → 100755
Empty file.
31 changes: 28 additions & 3 deletions dataproxy/dataproxy.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,11 +1,36 @@
from google.appengine.ext.webapp.util import run_wsgi_app
import os
#from google.appengine.ext.webapp.util import run_wsgi_app
import os, sys
from app import JsonpDataProxy

application = JsonpDataProxy(3000000)


def _start_response(status, headers, exc_info=None):
if exc_info is not None:
raise exc_info[0], exc_info[1], exc_info[2]
print "Status: %s" % status
for name, val in headers:
print "%s: %s" % (name, val)
print
return sys.stdout.write

def main():
run_wsgi_app(application)
env = dict(os.environ)
env["wsgi.input"] = sys.stdin
env["wsgi.errors"] = sys.stderr
env["wsgi.version"] = (1,0)
env["wsgi.run_once"] = True
env["wsgi.url_scheme"] = wsgiref.util.guess_scheme(env)
env["msgi.multithread"] = False
env["wsgi.multiprocess"] = False
result = application(env, _start_response)
try:
if result is not None:
for data in result:
sys.stdout.write(data)
finally:
if hasattr(result, 'close'):
result.close()

if __name__ == "__main__":
main()
Empty file modified dataproxy/static/demo.html
100644 → 100755
Empty file.
2 changes: 1 addition & 1 deletion dataproxy/vendor/chardet
Submodule chardet updated from 70af46 to c40aa9
2 changes: 1 addition & 1 deletion dataproxy/vendor/dataconverters
Submodule dataconverters updated from 11a7f1 to 3e6c96
2 changes: 1 addition & 1 deletion dataproxy/vendor/messytables
Submodule messytables updated 63 files
+2 −0 .coveragerc
+2 −0 .gitignore
+7 −2 .travis.yml
+56 −0 CHANGELOG.md
+53 −0 CONTRIBUTING.md
+7 −3 README.md
+0 −395 contrib/csv_file.py
+0 −40 contrib/unicsv.py
+94 −50 doc/index.rst
+1 −0 horror/211.csv
+4 −0 horror/README.md
+ horror/annotated.ods
+6 −0 horror/badcolspan.html
+ horror/bian-anal-mca-2005-dols-eng-1011-0312-tab3.xlsm
+865 −0 horror/characters.csv
+17 −0 horror/complex.html
+ horror/encrypted_no_password.xls
+3,715 −0 horror/html.html
+86 −0 horror/invisible_text.html
+ horror/large.ods
+ horror/large.xlsx
+1 −0 horror/mixedGLB.csv
+10 −0 horror/null.csv
+ horror/problematic_first_sheet.xls
+12 −0 horror/rowcolspan.html
+ horror/simple.ods
+ horror/simple.pdf
+29 −0 horror/simple.psv
+0 −0 horror/simple.unknown
+ horror/simple.zip
+3 −0 horror/skip_initials.csv
+1 −0 horror/small.csv
+ horror/unicode_sheet_name.xls
+28 −28 horror/weird_head_padding.csv
+15 −4 messytables/__init__.py
+159 −50 messytables/any.py
+40 −19 messytables/commas.py
+87 −20 messytables/core.py
+10 −10 messytables/dateparser.py
+12 −0 messytables/error.py
+36 −32 messytables/excel.py
+0 −78 messytables/excelx.py
+26 −20 messytables/headers.py
+244 −0 messytables/html.py
+10 −8 messytables/ilines.py
+44 −0 messytables/jts.py
+127 −0 messytables/ods.py
+97 −0 messytables/pdf.py
+134 −52 messytables/types.py
+23 −3 messytables/util.py
+27 −23 messytables/zip.py
+2 −6 requirements-test.txt
+11 −8 setup.py
+6 −0 test/__init__.py
+9 −0 test/shim26.py
+65 −0 test/test_any.py
+134 −0 test/test_guessing.py
+113 −0 test/test_properties.py
+571 −0 test/test_read.py
+14 −312 test/test_rowset.py
+80 −0 test/test_stream.py
+29 −0 test/test_tableset.py
+21 −0 test/test_unit.py
Empty file modified dataproxy/vendor/openpyxl-1.5.7/LICENCE
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/__init__.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/cell.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/chart.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/drawing.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/namedrange.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/reader/__init__.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/reader/excel.py
100644 → 100755
Empty file.
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/reader/strings.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/reader/style.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/reader/workbook.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/reader/worksheet.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/__init__.py
100644 → 100755
Empty file.
Empty file.
Empty file.
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/compat/odict.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/compat/sax.py
100644 → 100755
Empty file.
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/date_time.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/exc.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/ooxml.py
100644 → 100755
Empty file.
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/units.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/xmltools.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/style.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/__init__.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/helper.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_cell.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_chart.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_dump.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_iter.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_meta.py
100644 → 100755
Empty file.
Empty file.
Empty file.
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_props.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_read.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_strings.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_style.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_theme.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_workbook.py
100644 → 100755
Empty file.
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_write.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/workbook.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/worksheet.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/__init__.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/charts.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/drawings.py
100644 → 100755
Empty file.
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/excel.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/strings.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/styles.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/theme.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/workbook.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/worksheet.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/python-dateutil-1.5/LICENSE
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/python-dateutil-1.5/dateutil/__init__.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/python-dateutil-1.5/dateutil/easter.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/python-dateutil-1.5/dateutil/parser.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/python-dateutil-1.5/dateutil/relativedelta.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/python-dateutil-1.5/dateutil/rrule.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/python-dateutil-1.5/dateutil/tz.py
100644 → 100755
Empty file.
Empty file modified dataproxy/vendor/python-dateutil-1.5/dateutil/tzwin.py
100644 → 100755
Empty file.
Empty file.
Empty file.
Empty file modified dataproxy/xlrd/__init__.py
100644 → 100755
Empty file.
Empty file modified dataproxy/xlrd/biffh.py
100644 → 100755
Empty file.
Empty file modified dataproxy/xlrd/compdoc.py
100644 → 100755
Empty file.
Empty file modified dataproxy/xlrd/doc/compdoc.html
100644 → 100755
Empty file.
Empty file modified dataproxy/xlrd/doc/xlrd.html
100644 → 100755
Empty file.
Empty file modified dataproxy/xlrd/examples/namesdemo.xls
100644 → 100755
Empty file.
Empty file modified dataproxy/xlrd/examples/xlrdnameAPIdemo.py
100644 → 100755
Empty file.
Empty file modified dataproxy/xlrd/formatting.py
100644 → 100755
Empty file.
Empty file modified dataproxy/xlrd/formula.py
100644 → 100755
Empty file.
Empty file modified dataproxy/xlrd/licences.py
100644 → 100755
Empty file.
Empty file modified dataproxy/xlrd/sheet.py
100644 → 100755
Empty file.
Empty file modified dataproxy/xlrd/timemachine.py
100644 → 100755
Empty file.
Empty file modified dataproxy/xlrd/xldate.py
100644 → 100755
Empty file.