diff --git a/README.rst b/README.rst index 8103aeb..269670e 100644 --- a/README.rst +++ b/README.rst @@ -1,3 +1,7 @@ +OCHA Fork ++++++++++ +Security protocols at the UN require all services be run on UN servers, so dataproxy had to be modified to run indepedent of Google App Engine. + Data Proxy ++++++++++ diff --git a/dataproxy/__init__.py b/dataproxy/__init__.py old mode 100644 new mode 100755 diff --git a/dataproxy/app.py b/dataproxy/app.py old mode 100644 new mode 100755 index 14f4b9d..adf57ea --- a/dataproxy/app.py +++ b/dataproxy/app.py @@ -95,6 +95,8 @@ def __call__(self, environ, start_response): flow['app']['config'] = AttributeDict() flow['app']['config']['proxy'] = AttributeDict(max_length=int(self.max_length)) flow['environ'] = environ + if not 'HTTP_COOKIE' in flow.environ: + flow['environ']['HTTP_COOKIE'] = '' flow['http_response'] = HTTPResponseMarble() flow.http_response.header_list = [ dict(name='Content-Type', value='application/javascript'), @@ -128,7 +130,12 @@ def __call__(self, environ, start_response): return error(title=title, message=msg) def index(self, flow): - if not flow.query.has_key('url'): + if not self.from_ckan(flow.environ['HTTP_COOKIE']): + title = 'ckan only' + msg = 'Dataproxy only accepts requests from CKAN installations' + flow.http_response.status = '200 Error %s'%title + flow.http_response.body = error(title=title, message=msg) + elif not flow.query.has_key('url'): title = 'url query parameter missing' msg = 'Please read the dataproxy API format documentation: https://github.com/okfn/dataproxy' flow.http_response.status = '200 Error %s'%title @@ -142,6 +149,14 @@ def index(self, flow): flow.http_response.status = '200 %s %s' % (e.error, e.title) flow.http_response.body = error(title=e.title, message=e.message) + def from_ckan(self,cookies): + #Checks to see if there's a ckan cookie present + #for c in cookies.split(';'): + # t = c.split('=') + # if t[0].strip().lower() == 'ckan': + # return True + #return False + return True def proxy_query(self, flow, url, query): parts = urlparse.urlparse(url) @@ -221,15 +236,15 @@ def transform(type_name, flow, url, query, max_results): if 'encoding' in query: encoding = query["encoding"].value if type_name == 'csv': - stream = urllib2.urlopen(url) + stream = create_stream(flow, url) records, metadata = dataconverters.commas.parse(stream, encoding=encoding, window=window, guess_types=guess_types) elif type_name == 'tsv': - stream = urllib2.urlopen(url) + stream = create_stream(flow, url) records, metadata = dataconverters.commas.parse(stream, delimiter='\t', encoding=encoding, window=window, guess_types=guess_types) elif type_name == 'xls' or type_name == 'xlsx': - stream = urllib2.urlopen(url) + stream = create_stream(flow, url) length = int(stream.headers.get('content-length', 0)) # max_length = flow.app.config.proxy.max_length max_length = 5000000 # ~ 5Mb @@ -243,12 +258,37 @@ def transform(type_name, flow, url, query, max_results): else: sheet_number = 1 records, metadata = dataconverters.xls.parse(stream, - excel_type=type_name, worksheet=sheet_number, + excel_type=type_name, guess_types=guess_types) else: raise Exception("Resource type not supported '%s'" % type_name) return (records, metadata) +def from_hdx(url): + url = urlparse.urlparse(url) + url = url.netloc.split(':') + if url[0][-14:] == 'hdx.rwlabs.org': + return True + else: + return False + +def create_stream(flow, url): + hdx = from_hdx(url) + if hdx: + request = urllib2.Request(url, headers={"Cookie" : flow.environ['HTTP_COOKIE']}) + try: + stream = urllib2.urlopen(request) + except urllib2.HTTPError, err: + if err.code == 403: + raise Exception("You do not have permission to access this file.") + else: + if hdx: + raise Exception("File could not be fetched from filestore.") + else: + raise Exception("File could not be fetched.") + else: + stream = urllib2.urlopen(url) + return stream if __name__ == '__main__': from wsgiref.util import setup_testing_defaults diff --git a/dataproxy/app.yaml b/dataproxy/app.yaml old mode 100644 new mode 100755 diff --git a/dataproxy/bn/__init__.py b/dataproxy/bn/__init__.py old mode 100644 new mode 100755 diff --git a/dataproxy/bn/relimport_test.py b/dataproxy/bn/relimport_test.py old mode 100644 new mode 100755 diff --git a/dataproxy/dataproxy.py b/dataproxy/dataproxy.py old mode 100644 new mode 100755 index e987928..91eb333 --- a/dataproxy/dataproxy.py +++ b/dataproxy/dataproxy.py @@ -1,11 +1,36 @@ -from google.appengine.ext.webapp.util import run_wsgi_app -import os +#from google.appengine.ext.webapp.util import run_wsgi_app +import os, sys from app import JsonpDataProxy application = JsonpDataProxy(3000000) + +def _start_response(status, headers, exc_info=None): + if exc_info is not None: + raise exc_info[0], exc_info[1], exc_info[2] + print "Status: %s" % status + for name, val in headers: + print "%s: %s" % (name, val) + print + return sys.stdout.write + def main(): - run_wsgi_app(application) + env = dict(os.environ) + env["wsgi.input"] = sys.stdin + env["wsgi.errors"] = sys.stderr + env["wsgi.version"] = (1,0) + env["wsgi.run_once"] = True + env["wsgi.url_scheme"] = wsgiref.util.guess_scheme(env) + env["msgi.multithread"] = False + env["wsgi.multiprocess"] = False + result = application(env, _start_response) + try: + if result is not None: + for data in result: + sys.stdout.write(data) + finally: + if hasattr(result, 'close'): + result.close() if __name__ == "__main__": main() diff --git a/dataproxy/static/demo.html b/dataproxy/static/demo.html old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/chardet b/dataproxy/vendor/chardet index 70af46c..c40aa9b 160000 --- a/dataproxy/vendor/chardet +++ b/dataproxy/vendor/chardet @@ -1 +1 @@ -Subproject commit 70af46cb8e146326081fd76360d1c19675ee627c +Subproject commit c40aa9b8f7b47a77f4926d75adecd51fb1ed7ce3 diff --git a/dataproxy/vendor/dataconverters b/dataproxy/vendor/dataconverters index 11a7f11..3e6c963 160000 --- a/dataproxy/vendor/dataconverters +++ b/dataproxy/vendor/dataconverters @@ -1 +1 @@ -Subproject commit 11a7f11dd5068e9c884b24c26bb7e3cb65e5ccc1 +Subproject commit 3e6c9630db4325188d2c43cceb98b984269a7a0c diff --git a/dataproxy/vendor/messytables b/dataproxy/vendor/messytables index eaa8ae6..973dece 160000 --- a/dataproxy/vendor/messytables +++ b/dataproxy/vendor/messytables @@ -1 +1 @@ -Subproject commit eaa8ae617220ceea8660c9db1b3a0d13e56588c7 +Subproject commit 973dece22282fcf093e7e12c00b2a69743037d2d diff --git a/dataproxy/vendor/openpyxl-1.5.7/LICENCE b/dataproxy/vendor/openpyxl-1.5.7/LICENCE old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/__init__.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/__init__.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/cell.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/cell.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/chart.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/chart.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/drawing.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/drawing.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/namedrange.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/namedrange.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/reader/__init__.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/reader/__init__.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/reader/excel.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/reader/excel.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/reader/iter_worksheet.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/reader/iter_worksheet.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/reader/strings.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/reader/strings.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/reader/style.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/reader/style.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/reader/workbook.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/reader/workbook.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/reader/worksheet.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/reader/worksheet.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/__init__.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/__init__.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/compat/__init__.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/compat/__init__.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/compat/allany.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/compat/allany.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/compat/elementtree.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/compat/elementtree.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/compat/odict.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/compat/odict.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/compat/sax.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/compat/sax.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/compat/tempnamedfile.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/compat/tempnamedfile.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/date_time.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/date_time.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/exc.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/exc.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/ooxml.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/ooxml.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/password_hasher.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/password_hasher.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/units.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/units.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/xmltools.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/shared/xmltools.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/style.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/style.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/__init__.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/__init__.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/helper.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/helper.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_cell.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_cell.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_chart.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_chart.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_dump.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_dump.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_iter.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_iter.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_meta.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_meta.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_named_range.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_named_range.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_number_format.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_number_format.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_password_hash.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_password_hash.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_props.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_props.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_read.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_read.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_strings.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_strings.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_style.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_style.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_theme.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_theme.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_workbook.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_workbook.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_worksheet.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_worksheet.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_write.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/tests/test_write.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/workbook.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/workbook.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/worksheet.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/worksheet.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/__init__.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/__init__.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/charts.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/charts.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/drawings.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/drawings.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/dump_worksheet.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/dump_worksheet.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/excel.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/excel.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/strings.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/strings.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/styles.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/styles.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/theme.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/theme.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/workbook.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/workbook.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/worksheet.py b/dataproxy/vendor/openpyxl-1.5.7/openpyxl/writer/worksheet.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/python-dateutil-1.5/LICENSE b/dataproxy/vendor/python-dateutil-1.5/LICENSE old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/python-dateutil-1.5/dateutil/__init__.py b/dataproxy/vendor/python-dateutil-1.5/dateutil/__init__.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/python-dateutil-1.5/dateutil/easter.py b/dataproxy/vendor/python-dateutil-1.5/dateutil/easter.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/python-dateutil-1.5/dateutil/parser.py b/dataproxy/vendor/python-dateutil-1.5/dateutil/parser.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/python-dateutil-1.5/dateutil/relativedelta.py b/dataproxy/vendor/python-dateutil-1.5/dateutil/relativedelta.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/python-dateutil-1.5/dateutil/rrule.py b/dataproxy/vendor/python-dateutil-1.5/dateutil/rrule.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/python-dateutil-1.5/dateutil/tz.py b/dataproxy/vendor/python-dateutil-1.5/dateutil/tz.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/python-dateutil-1.5/dateutil/tzwin.py b/dataproxy/vendor/python-dateutil-1.5/dateutil/tzwin.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/python-dateutil-1.5/dateutil/zoneinfo/__init__.py b/dataproxy/vendor/python-dateutil-1.5/dateutil/zoneinfo/__init__.py old mode 100644 new mode 100755 diff --git a/dataproxy/vendor/python-dateutil-1.5/dateutil/zoneinfo/zoneinfo-2010g.tar.gz b/dataproxy/vendor/python-dateutil-1.5/dateutil/zoneinfo/zoneinfo-2010g.tar.gz old mode 100644 new mode 100755 diff --git a/dataproxy/xlrd/__init__.py b/dataproxy/xlrd/__init__.py old mode 100644 new mode 100755 diff --git a/dataproxy/xlrd/biffh.py b/dataproxy/xlrd/biffh.py old mode 100644 new mode 100755 diff --git a/dataproxy/xlrd/compdoc.py b/dataproxy/xlrd/compdoc.py old mode 100644 new mode 100755 diff --git a/dataproxy/xlrd/doc/compdoc.html b/dataproxy/xlrd/doc/compdoc.html old mode 100644 new mode 100755 diff --git a/dataproxy/xlrd/doc/xlrd.html b/dataproxy/xlrd/doc/xlrd.html old mode 100644 new mode 100755 diff --git a/dataproxy/xlrd/examples/namesdemo.xls b/dataproxy/xlrd/examples/namesdemo.xls old mode 100644 new mode 100755 diff --git a/dataproxy/xlrd/examples/xlrdnameAPIdemo.py b/dataproxy/xlrd/examples/xlrdnameAPIdemo.py old mode 100644 new mode 100755 diff --git a/dataproxy/xlrd/formatting.py b/dataproxy/xlrd/formatting.py old mode 100644 new mode 100755 diff --git a/dataproxy/xlrd/formula.py b/dataproxy/xlrd/formula.py old mode 100644 new mode 100755 diff --git a/dataproxy/xlrd/licences.py b/dataproxy/xlrd/licences.py old mode 100644 new mode 100755 diff --git a/dataproxy/xlrd/sheet.py b/dataproxy/xlrd/sheet.py old mode 100644 new mode 100755 diff --git a/dataproxy/xlrd/timemachine.py b/dataproxy/xlrd/timemachine.py old mode 100644 new mode 100755 diff --git a/dataproxy/xlrd/xldate.py b/dataproxy/xlrd/xldate.py old mode 100644 new mode 100755