Skip to content

Commit

Permalink
✨(backend) allow uploading more types of attachments
Browse files Browse the repository at this point in the history
We want to allow users to upload files to a document, not just images.
We try to enforce coherence between the file extension and the real
mime type of its content. If a file is deemed unsafe, it is still accepted
during upload and the information is stored as metadata on the object
for display to readers.
  • Loading branch information
sampaccoud committed Oct 16, 2024
1 parent a9f08df commit e8d95fa
Show file tree
Hide file tree
Showing 7 changed files with 228 additions and 49 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ and this project adheres to

## Added

- ✨(backend) allow uploading more types of attachments #309
- ✨(backend) add name fields to the user synchronized with OIDC #301
- ✨(ci) add security scan #291
- ♻️(frontend) Add versions #277
Expand Down
11 changes: 6 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,15 @@ ENV PYTHONUNBUFFERED=1

# Install required system libs
RUN apk add \
gettext \
cairo \
libffi-dev \
file \
font-noto \
font-noto-emoji \
gettext \
gdk-pixbuf \
pango \
libffi-dev \
pandoc \
font-noto-emoji \
font-noto \
pango \
shared-mime-info

# Copy entrypoint
Expand Down
41 changes: 34 additions & 7 deletions src/backend/core/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from django.db.models import Q
from django.utils.translation import gettext_lazy as _

import magic
from rest_framework import exceptions, serializers

from core import models
Expand Down Expand Up @@ -219,16 +220,42 @@ def validate_file(self, file):
f"File size exceeds the maximum limit of {max_size:d} MB."
)

# Validate file type
mime_type, _ = mimetypes.guess_type(file.name)
if mime_type not in settings.DOCUMENT_IMAGE_ALLOWED_MIME_TYPES:
mime_types = ", ".join(settings.DOCUMENT_IMAGE_ALLOWED_MIME_TYPES)
raise serializers.ValidationError(
f"File type '{mime_type:s}' is not allowed. Allowed types are: {mime_types:s}"
)
extension = file.name.rpartition(".")[-1] if "." in file.name else None

# Read the first few bytes to determine the MIME type accurately
mime = magic.Magic(mime=True)
magic_mime_type = mime.from_buffer(file.read(1024))
file.seek(0) # Reset file pointer to the beginning after reading

self.context["is_unsafe"] = (
magic_mime_type in settings.DOCUMENT_UNSAFE_MIME_TYPES
)

extension_mime_type, _ = mimetypes.guess_type(file.name)

# Try guessing a coherent extension from the mimetype
if extension_mime_type != magic_mime_type:
self.context["is_unsafe"] = True

guessed_ext = mimetypes.guess_extension(magic_mime_type)
# Missing extensions or extensions longer than 5 characters (it's as long as an extension
# can be) are replaced by the extension we eventually guessed from mimetype.
if (extension is None or len(extension) > 5) and guessed_ext:
extension = guessed_ext[1:]

if extension is None:
raise serializers.ValidationError("Could not determine file extension.")

self.context["expected_extension"] = extension

return file

def validate(self, attrs):
"""Override validate to add the computed extension to validated_data."""
attrs["expected_extension"] = self.context["expected_extension"]
attrs["is_unsafe"] = self.context["is_unsafe"]
return attrs


class TemplateSerializer(BaseResourceSerializer):
"""Serialize templates."""
Expand Down
18 changes: 12 additions & 6 deletions src/backend/core/api/viewsets.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""API endpoints"""

import os
import re
import uuid
from urllib.parse import urlparse
Expand Down Expand Up @@ -476,15 +475,22 @@ def attachment_upload(self, request, *args, **kwargs):
return drf_response.Response(
serializer.errors, status=status.HTTP_400_BAD_REQUEST
)
# Extract the file extension from the original filename
file = serializer.validated_data["file"]
extension = os.path.splitext(file.name)[1]

# Generate a generic yet unique filename to store the image in object storage
file_id = uuid.uuid4()
key = f"{document.key_base}/{ATTACHMENTS_FOLDER:s}/{file_id!s}{extension:s}"
extension = serializer.validated_data["expected_extension"]
key = f"{document.key_base}/{ATTACHMENTS_FOLDER:s}/{file_id!s}.{extension:s}"

# Prepare metadata for storage
metadata = {"Metadata": {"owner": str(request.user.id)}}
if serializer.validated_data["is_unsafe"]:
metadata["Metadata"]["is_unsafe"] = "true"

file = serializer.validated_data["file"]
default_storage.connection.meta.client.upload_fileobj(
file, default_storage.bucket_name, key, ExtraArgs=metadata
)

default_storage.save(key, file)
return drf_response.Response(
{"file": f"{settings.MEDIA_URL:s}{key:s}"}, status=status.HTTP_201_CREATED
)
Expand Down
130 changes: 106 additions & 24 deletions src/backend/core/tests/documents/test_api_documents_attachment_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import re
import uuid

from django.core.files.base import ContentFile
from django.core.files.storage import default_storage
from django.core.files.uploadedfile import SimpleUploadedFile

import pytest
Expand All @@ -16,6 +16,12 @@

pytestmark = pytest.mark.django_db

PIXEL = (
b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x06\x00"
b"\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\nIDATx\x9cc\xf8\xff\xff?\x00\x05\xfe\x02\xfe"
b"\xa7V\xbd\xfa\x00\x00\x00\x00IEND\xaeB`\x82"
)


@pytest.mark.parametrize(
"reach, role",
Expand All @@ -33,7 +39,7 @@ def test_api_documents_attachment_upload_anonymous_forbidden(reach, role):
and role don't allow it.
"""
document = factories.DocumentFactory(link_reach=reach, link_role=role)
file = SimpleUploadedFile("test_file.jpg", b"Dummy content")
file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")

url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
response = APIClient().post(url, {"file": file}, format="multipart")
Expand All @@ -50,14 +56,14 @@ def test_api_documents_attachment_upload_anonymous_success():
if the link reach and role permit it.
"""
document = factories.DocumentFactory(link_reach="public", link_role="editor")
file = SimpleUploadedFile("test_file.jpg", b"Dummy content")
file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")

url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
response = APIClient().post(url, {"file": file}, format="multipart")

assert response.status_code == 201

pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.jpg")
pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.png")
match = pattern.search(response.json()["file"])
file_id = match.group(1)

Expand Down Expand Up @@ -85,7 +91,7 @@ def test_api_documents_attachment_upload_authenticated_forbidden(reach, role):
client.force_login(user)

document = factories.DocumentFactory(link_reach=reach, link_role=role)
file = SimpleUploadedFile("test_file.jpg", b"Dummy content")
file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")

url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
response = client.post(url, {"file": file}, format="multipart")
Expand Down Expand Up @@ -114,14 +120,14 @@ def test_api_documents_attachment_upload_authenticated_success(reach, role):
client.force_login(user)

document = factories.DocumentFactory(link_reach=reach, link_role=role)
file = SimpleUploadedFile("test_file.jpg", b"Dummy content")
file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")

url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
response = client.post(url, {"file": file}, format="multipart")

assert response.status_code == 201

pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.jpg")
pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.png")
match = pattern.search(response.json()["file"])
file_id = match.group(1)

Expand All @@ -148,7 +154,7 @@ def test_api_documents_attachment_upload_reader(via, mock_user_teams):
document=document, team="lasuite", role="reader"
)

file = SimpleUploadedFile("test_file.jpg", b"Dummy content")
file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")

url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
response = client.post(url, {"file": file}, format="multipart")
Expand Down Expand Up @@ -179,20 +185,28 @@ def test_api_documents_attachment_upload_success(via, role, mock_user_teams):
document=document, team="lasuite", role=role
)

file = SimpleUploadedFile("test_file.jpg", b"Dummy content")
file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")

url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
response = client.post(url, {"file": file}, format="multipart")

assert response.status_code == 201

pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.jpg")
match = pattern.search(response.json()["file"])
file_path = response.json()["file"]
pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.png")
match = pattern.search(file_path)
file_id = match.group(1)

# Validate that file_id is a valid UUID
uuid.UUID(file_id)

# Now, check the metadata of the uploaded file
key = file_path.replace("/media", "")
file_head = default_storage.connection.meta.client.head_object(
Bucket=default_storage.bucket_name, Key=key
)
assert file_head["Metadata"] == {"owner": str(user.id)}


def test_api_documents_attachment_upload_invalid(client):
"""Attempt to upload without a file should return an explicit error."""
Expand Down Expand Up @@ -222,34 +236,102 @@ def test_api_documents_attachment_upload_size_limit_exceeded(settings):
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"

# Create a temporary file larger than the allowed size
content = b"a" * (1048576 + 1)
file = ContentFile(content, name="test.jpg")
file = SimpleUploadedFile(
name="test.txt", content=b"a" * (1048576 + 1), content_type="text/plain"
)

response = client.post(url, {"file": file}, format="multipart")

assert response.status_code == 400
assert response.json() == {"file": ["File size exceeds the maximum limit of 1 MB."]}


def test_api_documents_attachment_upload_type_not_allowed(settings):
"""The uploaded file should be of a whitelisted type."""
settings.DOCUMENT_IMAGE_ALLOWED_MIME_TYPES = ["image/jpeg", "image/png"]

@pytest.mark.parametrize(
"name,content,extension",
[
("test.exe", b"text", "exe"),
("test", b"text", "txt"),
("test.aaaaaa", b"test", "txt"),
("test.txt", PIXEL, "txt"),
("test.py", b"#!/usr/bin/python", "py"),
],
)
def test_api_documents_attachment_upload_fix_extension(name, content, extension):
"""
A file with no extension or a wrong extension is accepted and the extension
is corrected in storage.
"""
user = factories.UserFactory()
client = APIClient()
client.force_login(user)

document = factories.DocumentFactory(users=[(user, "owner")])
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"

# Create a temporary file with a not allowed type (e.g., text file)
file = ContentFile(b"a" * 1048576, name="test.txt")
file = SimpleUploadedFile(name=name, content=content)
response = client.post(url, {"file": file}, format="multipart")

assert response.status_code == 201

file_path = response.json()["file"]
pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.{extension:s}")
match = pattern.search(file_path)
file_id = match.group(1)

# Validate that file_id is a valid UUID
uuid.UUID(file_id)

# Now, check the metadata of the uploaded file
key = file_path.replace("/media", "")
file_head = default_storage.connection.meta.client.head_object(
Bucket=default_storage.bucket_name, Key=key
)
assert file_head["Metadata"] == {"owner": str(user.id), "is_unsafe": "true"}


def test_api_documents_attachment_upload_empty_file():
"""An empty file should be rejected."""
user = factories.UserFactory()
client = APIClient()
client.force_login(user)

document = factories.DocumentFactory(users=[(user, "owner")])
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"

file = SimpleUploadedFile(name="test.png", content=b"")
response = client.post(url, {"file": file}, format="multipart")

assert response.status_code == 400
assert response.json() == {
"file": [
"File type 'text/plain' is not allowed. Allowed types are: image/jpeg, image/png"
]
}
assert response.json() == {"file": ["The submitted file is empty."]}


def test_api_documents_attachment_upload_unsafe():
"""A file with an unsafe mime type should be tagged as such."""
user = factories.UserFactory()
client = APIClient()
client.force_login(user)

document = factories.DocumentFactory(users=[(user, "owner")])
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"

file = SimpleUploadedFile(
name="script.exe", content=b"\x4d\x5a\x90\x00\x03\x00\x00\x00"
)
response = client.post(url, {"file": file}, format="multipart")

assert response.status_code == 201

file_path = response.json()["file"]
pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.exe")
match = pattern.search(file_path)
file_id = match.group(1)

# Validate that file_id is a valid UUID
uuid.UUID(file_id)

# Now, check the metadata of the uploaded file
key = file_path.replace("/media", "")
file_head = default_storage.connection.meta.client.head_object(
Bucket=default_storage.bucket_name, Key=key
)
assert file_head["Metadata"] == {"owner": str(user.id), "is_unsafe": "true"}
Loading

0 comments on commit e8d95fa

Please sign in to comment.