From ff61dd7d26734638aea01ee85e7bebff01a89aad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E0=B0=A6=E0=B0=BE=E0=B0=AE=E0=B1=8B=E0=B0=A6=E0=B0=B0?= Date: Sat, 22 Aug 2020 10:59:44 +0530 Subject: [PATCH 1/2] Normalize content-type before extention computation in SimpleHTTPResolver fixes #508 --- loris/resolver.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/loris/resolver.py b/loris/resolver.py index 9c5c838f..522a6e81 100644 --- a/loris/resolver.py +++ b/loris/resolver.py @@ -298,7 +298,8 @@ def cached_file_for_ident(self, ident): def cache_file_extension(self, ident, response): if 'content-type' in response.headers: try: - extension = self.get_format(ident, constants.FORMATS_BY_MEDIA_TYPE[response.headers['content-type']]) + content_type = response.headers['content-type'].split(';')[0] + extension = self.get_format(ident, constants.FORMATS_BY_MEDIA_TYPE[content_type]) except KeyError: logger.warn('Your server may be responding with incorrect content-types. Reported %s for ident %s.', response.headers['content-type'], ident) From 397ebb06b415465691c2200f8996f92af0d77838 Mon Sep 17 00:00:00 2001 From: damooo Date: Thu, 27 Aug 2020 18:31:05 +0530 Subject: [PATCH 2/2] content-type normalization in SimpleHTTPResolver using cgi; updated test-case --- .gitignore | 2 ++ loris/resolver.py | 3 ++- tests/simple_http_resolver_ut.py | 4 ++-- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index a0e5c6b9..608006ce 100644 --- a/.gitignore +++ b/.gitignore @@ -33,3 +33,5 @@ todo.md .hypothesis .coverage + +.idea/ diff --git a/loris/resolver.py b/loris/resolver.py index 522a6e81..ad311913 100644 --- a/loris/resolver.py +++ b/loris/resolver.py @@ -2,6 +2,7 @@ `resolver` -- Resolve Identifiers to Image Paths ================================================ """ +import cgi from contextlib import closing import glob import json @@ -298,7 +299,7 @@ def cached_file_for_ident(self, ident): def cache_file_extension(self, ident, response): if 'content-type' in response.headers: try: - content_type = response.headers['content-type'].split(';')[0] + content_type = cgi.parse_header(response.headers['content-type'])[0] extension = self.get_format(ident, constants.FORMATS_BY_MEDIA_TYPE[content_type]) except KeyError: logger.warn('Your server may be responding with incorrect content-types. Reported %s for ident %s.', diff --git a/tests/simple_http_resolver_ut.py b/tests/simple_http_resolver_ut.py index 86ac8837..7319e006 100644 --- a/tests/simple_http_resolver_ut.py +++ b/tests/simple_http_resolver_ut.py @@ -63,14 +63,14 @@ def set_responses(self): responses.HEAD, self.identifier_url, status=200, - content_type='image/tiff' + content_type='image/tiff; charset=UTF-8' ) responses.add( responses.GET, self.identifier_url, body='II*\x00\x0c\x00\x00\x00\x80\x00 \x0e\x00\x00\x01\x03\x00\x01\x00\x00\x00\x01\x00\x00\x00\x01\x01\x03\x00\x01\x00\x00\x00\x01\x00\x00\x00\x02\x01\x03\x00\x01\x00\x00\x00\x08\x00\x00\x00\x03\x01\x03\x00\x01\x00\x00\x00\x05\x00\x00\x00\x06\x01\x03\x00\x01\x00\x00\x00\x03\x00\x00\x00\x11\x01\x04\x00\x01\x00\x00\x00\x08\x00\x00\x00\x15\x01\x03\x00\x01\x00\x00\x00\x01\x00\x00\x00\x16\x01\x03\x00\x01\x00\x00\x00\x08\x00\x00\x00\x17\x01\x04\x00\x01\x00\x00\x00\x04\x00\x00\x00\x1a\x01\x05\x00\x01\x00\x00\x00\xba\x00\x00\x00\x1b\x01\x05\x00\x01\x00\x00\x00\xc2\x00\x00\x00\x1c\x01\x03\x00\x01\x00\x00\x00\x01\x00\x00\x00(\x01\x03\x00\x01\x00\x00\x00\x02\x00\x00\x00@\x01\x03\x00\x00\x03\x00\x00\xca\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00H\x00\x00\x00\x01\x00\x00\x00H\x00\x00\x00\x01\x00\x00\x00\xff`\xe6q\x19\x08\x00\x00\x80\t\x00\x00\x80\n\x00\x00\x80\x0b\x00\x00\x80\x0c\x00\x00\x80\r', status=200, - content_type='image/tiff' + content_type='image/tiff; charset=UTF-8' ) responses.add( responses.HEAD,