From d5b389dd2e156a54084be21fb6a3fdb5273ac1ae Mon Sep 17 00:00:00 2001 From: James B Date: Wed, 19 Jul 2023 09:08:10 +0100 Subject: [PATCH] utils.py: get_file_type_for_flatten_tool: consider content type too Needed for https://github.com/openownership/cove-bods/issues/101 --- CHANGELOG.md | 5 +++++ libcoveweb2/settings.py | 20 ++++++++++++++------ libcoveweb2/utils.py | 20 ++++++++++++++++++++ 3 files changed, 39 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 633d75d..c572c9d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +## Added + +- utils.py: get_file_type_for_flatten_tool: consider content type too +- settings.ALLOWED_UNKNOWN_CONTENT_TYPES. + ## Fixed - utils.py: get_file_type_for_flatten_tool: include an error message in raise at end https://github.com/OpenDataServices/lib-cove-web-2/issues/3 diff --git a/libcoveweb2/settings.py b/libcoveweb2/settings.py index 2145264..f985d86 100644 --- a/libcoveweb2/settings.py +++ b/libcoveweb2/settings.py @@ -173,21 +173,28 @@ }, } -ALLOWED_JSON_CONTENT_TYPES = ["application/json", "application/octet-stream"] +# Sometimes uploads happen with a generic content type. +# In this case, we can't rely on content type to detect type. +# But the type is still allowed, so it's added to ALLOWED_*_CONTENT_TYPES later. +ALLOWED_UNKNOWN_CONTENT_TYPES = ["application/octet-stream"] + +# JSON details +ALLOWED_JSON_CONTENT_TYPES = ["application/json"] + ALLOWED_UNKNOWN_CONTENT_TYPES ALLOWED_JSON_EXTENSIONS = [".json"] +# Excel details ALLOWED_SPREADSHEET_EXCEL_CONTENT_TYPES = [ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - "application/octet-stream", -] +] + ALLOWED_UNKNOWN_CONTENT_TYPES ALLOWED_SPREADSHEET_EXCEL_EXTENSIONS = [".xlsx"] +# Open Document details ALLOWED_SPREADSHEET_OPENDOCUMENT_CONTENT_TYPES = [ "application/vnd.oasis.opendocument.spreadsheet", - "application/octet-stream", -] +] + ALLOWED_UNKNOWN_CONTENT_TYPES ALLOWED_SPREADSHEET_OPENDOCUMENT_EXTENSIONS = [".ods"] +# Spreadsheet details (sum of details above) ALLOWED_SPREADSHEET_CONTENT_TYPES = ( ALLOWED_SPREADSHEET_EXCEL_CONTENT_TYPES + ALLOWED_SPREADSHEET_OPENDOCUMENT_CONTENT_TYPES @@ -196,5 +203,6 @@ ALLOWED_SPREADSHEET_EXCEL_EXTENSIONS + ALLOWED_SPREADSHEET_OPENDOCUMENT_EXTENSIONS ) -ALLOWED_CSV_CONTENT_TYPES = ["text/csv", "application/octet-stream"] +# CSV Details +ALLOWED_CSV_CONTENT_TYPES = ["text/csv"] + ALLOWED_UNKNOWN_CONTENT_TYPES ALLOWED_CSV_EXTENSIONS = [".csv"] diff --git a/libcoveweb2/utils.py b/libcoveweb2/utils.py index 1379b02..efe6e53 100644 --- a/libcoveweb2/utils.py +++ b/libcoveweb2/utils.py @@ -29,6 +29,26 @@ def get_file_type_for_flatten_tool(supplied_data_file: SuppliedDataFile): for extension in settings.ALLOWED_CSV_EXTENSIONS: if supplied_data_file.filename.lower().endswith(extension): return "csv" + # Check the content type + if ( + supplied_data_file.content_type + and supplied_data_file.content_type + not in settings.ALLOWED_UNKNOWN_CONTENT_TYPES + ): + if supplied_data_file.content_type in settings.ALLOWED_JSON_CONTENT_TYPES: + return "json" + if ( + supplied_data_file.content_type + in settings.ALLOWED_SPREADSHEET_EXCEL_CONTENT_TYPES + ): + return "xlsx" + if ( + supplied_data_file.content_type + in settings.ALLOWED_SPREADSHEET_OPENDOCUMENT_CONTENT_TYPES + ): + return "ods" + if supplied_data_file.content_type in settings.ALLOWED_CSV_CONTENT_TYPES: + return "csv" # Try and load the first bit of the file to see if it's JSON? try: with open(supplied_data_file.upload_dir_and_filename(), "rb") as fp: