Skip to content

Commit

Permalink
utils.py: get_file_type_for_flatten_tool: consider content type too
Browse files Browse the repository at this point in the history
  • Loading branch information
odscjames committed Jul 19, 2023
1 parent 5dbbc89 commit d5b389d
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 6 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

## [Unreleased]

## Added

- utils.py: get_file_type_for_flatten_tool: consider content type too
- settings.ALLOWED_UNKNOWN_CONTENT_TYPES.

## Fixed

- utils.py: get_file_type_for_flatten_tool: include an error message in raise at end https://github.com/OpenDataServices/lib-cove-web-2/issues/3
Expand Down
20 changes: 14 additions & 6 deletions libcoveweb2/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,21 +173,28 @@
},
}

ALLOWED_JSON_CONTENT_TYPES = ["application/json", "application/octet-stream"]
# Sometimes uploads happen with a generic content type.
# In this case, we can't rely on content type to detect type.
# But the type is still allowed, so it's added to ALLOWED_*_CONTENT_TYPES later.
ALLOWED_UNKNOWN_CONTENT_TYPES = ["application/octet-stream"]

# JSON details
ALLOWED_JSON_CONTENT_TYPES = ["application/json"] + ALLOWED_UNKNOWN_CONTENT_TYPES
ALLOWED_JSON_EXTENSIONS = [".json"]

# Excel details
ALLOWED_SPREADSHEET_EXCEL_CONTENT_TYPES = [
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/octet-stream",
]
] + ALLOWED_UNKNOWN_CONTENT_TYPES
ALLOWED_SPREADSHEET_EXCEL_EXTENSIONS = [".xlsx"]

# Open Document details
ALLOWED_SPREADSHEET_OPENDOCUMENT_CONTENT_TYPES = [
"application/vnd.oasis.opendocument.spreadsheet",
"application/octet-stream",
]
] + ALLOWED_UNKNOWN_CONTENT_TYPES
ALLOWED_SPREADSHEET_OPENDOCUMENT_EXTENSIONS = [".ods"]

# Spreadsheet details (sum of details above)
ALLOWED_SPREADSHEET_CONTENT_TYPES = (
ALLOWED_SPREADSHEET_EXCEL_CONTENT_TYPES
+ ALLOWED_SPREADSHEET_OPENDOCUMENT_CONTENT_TYPES
Expand All @@ -196,5 +203,6 @@
ALLOWED_SPREADSHEET_EXCEL_EXTENSIONS + ALLOWED_SPREADSHEET_OPENDOCUMENT_EXTENSIONS
)

ALLOWED_CSV_CONTENT_TYPES = ["text/csv", "application/octet-stream"]
# CSV Details
ALLOWED_CSV_CONTENT_TYPES = ["text/csv"] + ALLOWED_UNKNOWN_CONTENT_TYPES
ALLOWED_CSV_EXTENSIONS = [".csv"]
20 changes: 20 additions & 0 deletions libcoveweb2/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,26 @@ def get_file_type_for_flatten_tool(supplied_data_file: SuppliedDataFile):
for extension in settings.ALLOWED_CSV_EXTENSIONS:
if supplied_data_file.filename.lower().endswith(extension):
return "csv"
# Check the content type
if (
supplied_data_file.content_type
and supplied_data_file.content_type
not in settings.ALLOWED_UNKNOWN_CONTENT_TYPES
):
if supplied_data_file.content_type in settings.ALLOWED_JSON_CONTENT_TYPES:
return "json"
if (
supplied_data_file.content_type
in settings.ALLOWED_SPREADSHEET_EXCEL_CONTENT_TYPES
):
return "xlsx"
if (
supplied_data_file.content_type
in settings.ALLOWED_SPREADSHEET_OPENDOCUMENT_CONTENT_TYPES
):
return "ods"
if supplied_data_file.content_type in settings.ALLOWED_CSV_CONTENT_TYPES:
return "csv"
# Try and load the first bit of the file to see if it's JSON?
try:
with open(supplied_data_file.upload_dir_and_filename(), "rb") as fp:
Expand Down

0 comments on commit d5b389d

Please sign in to comment.