Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for serving source files and other attachments from a CDN #1894

Merged
merged 10 commits into from
Jul 6, 2024
8 changes: 8 additions & 0 deletions peachjam/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,14 @@ def save_related(self, request, form, formsets, change):
super().save_related(request, form, formsets, change)
form.instance.save()

if change:
# the source file needs to update its filename to take changes into account
# refresh, because the source file may have been deleted
form.instance.refresh_from_db()
sf = getattr(form.instance, "source_file", None)
if sf:
sf.set_download_filename()

def get_urls(self):
return [
path(
Expand Down
5 changes: 5 additions & 0 deletions peachjam/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,11 @@ def clean_file(self):
def save(self, commit=True):
# clear these for changed files so they get updated
if "file" in self.changed_data:
# get the old file and make sure it's deleted
if self.instance.pk:
existing = self.instance.__class__.objects.get(pk=self.instance.pk)
if existing.file:
existing.file.delete(False)
self.instance.size = None
self.instance.mimetype = None
self.instance.filename = self.instance.file.name
Expand Down
37 changes: 35 additions & 2 deletions peachjam/models/core_document_model.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datetime
import logging
import os
import re
import shutil
Expand Down Expand Up @@ -39,6 +40,8 @@
from peachjam.storage import DynamicStorageFileField
from peachjam.xmlutils import parse_html_str

log = logging.getLogger(__name__)


class Label(models.Model):
name = models.CharField(
Expand Down Expand Up @@ -700,7 +703,10 @@ def file_location(instance, filename):
pk = instance.document.pk
folder = instance.SAVE_FOLDER
filename = os.path.basename(filename)
return f"media/{doc_type}/{pk}/{folder}/{filename}"
# generate a random nonce so that we never re-use an existing filename, so that we can guarantee that
# we don't overwrite it (which makes it easier to cache files)
nonce = os.urandom(8).hex()
return f"media/{doc_type}/{pk}/{folder}/{nonce}/{filename}"


class AttachmentAbstractModel(models.Model):
Expand All @@ -725,6 +731,14 @@ def save(self, *args, **kwargs):
self.mimetype = magic.from_buffer(self.file.read(), mime=True)
return super().save(*args, **kwargs)

def delete(self, *args, **kwargs):
if self.file:
try:
self.file.delete(False)
except Exception as e:
log.warning(f"Ignoring error while deleting {self.file}", exc_info=e)
return super().delete(*args, **kwargs)


class Image(AttachmentAbstractModel):
SAVE_FOLDER = "images"
Expand Down Expand Up @@ -764,7 +778,6 @@ class SourceFile(AttachmentAbstractModel):
source_url = models.URLField(
_("source URL"), max_length=2048, null=True, blank=True
)

file_as_pdf = models.FileField(
_("file as pdf"),
upload_to=file_location,
Expand Down Expand Up @@ -807,6 +820,26 @@ def filename_for_download(self, ext=None):
title = re.sub(r"[^a-zA-Z0-9() ]", "", self.document.title)
return title + ext

def set_download_filename(self):
"""For S3-backed storages using a custom domain, set the content-disposition header to a filename suitable
for download."""
if not self.source_url and getattr(self.file.storage, "custom_domain", None):
metadata = self.file.storage.get_object_parameters(self.file.name)
metadata[
"ContentDisposition"
] = f'attachment; filename="{self.filename_for_download()}"'
src = {"Bucket": self.file.storage.bucket_name, "Key": self.file.name}
self.file.storage.connection.meta.client.copy_object(
CopySource=src, MetadataDirective="REPLACE", **src, **metadata
)

def save(self, *args, **kwargs):
pk = self.pk
super().save(*args, **kwargs)
if not pk:
# first save, set the download filename
self.set_download_filename()


class AttachedFileNature(models.Model):
name = models.CharField(
Expand Down
38 changes: 29 additions & 9 deletions peachjam/tests/test_storage.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datetime
import re
import unittest.util
from unittest.mock import ANY, Mock, call

Expand All @@ -15,6 +16,17 @@
unittest.util._MAX_LENGTH = 999999999


class Pattern:
def __init__(self, p):
self.p = p if isinstance(p, re.Match) else re.compile(p)

def __eq__(self, other):
return self.p.match(other)

def __neq__(self, other):
return not self.p.match(other)


class TestableStorage(DynamicS3Boto3Storage):
pass

Expand Down Expand Up @@ -50,20 +62,22 @@ def test_basics(self):
[
call.Bucket("fake-bucket"),
call.Bucket().Object(
f"media/core_document/{self.doc.pk}/source_file/test.txt"
Pattern(
rf"media/core_document/{self.doc.pk}/source_file/[^/]+/test\.txt"
)
),
call.Bucket().Object().upload_fileobj(ANY, ExtraArgs=ANY, Config=ANY),
],
self.mock.mock_calls,
)

self.assertEqual(
[f"s3:fake-bucket:media/core_document/{self.doc.pk}/source_file/test.txt"],
list(SourceFile.objects.filter(pk=sf.pk).values_list("file", flat=True)),
self.assertRegex(
list(SourceFile.objects.filter(pk=sf.pk).values_list("file", flat=True))[0],
rf"s3:fake-bucket:media/core_document/{self.doc.pk}/source_file/[^/]+/test\.txt",
)
self.assertEqual(
f"s3:fake-bucket:media/core_document/{self.doc.pk}/source_file/test.txt",
self.assertRegex(
sf.file.get_raw_value(),
rf"s3:fake-bucket:media/core_document/{self.doc.pk}/source_file/[^/]+/test\.txt",
)

self.mock.reset_mock()
Expand All @@ -73,7 +87,9 @@ def test_basics(self):
[
call.Bucket("fake-bucket"),
call.Bucket().Object(
f"media/core_document/{self.doc.pk}/source_file/test.txt"
Pattern(
rf"media/core_document/{self.doc.pk}/source_file/[^/]+/test\.txt"
)
),
call.Bucket().Object().load(),
call.Bucket().Object().download_fileobj(ANY, ExtraArgs=ANY, Config=ANY),
Expand All @@ -89,7 +105,9 @@ def test_basics(self):
[
call.Bucket("fake-bucket"),
call.Bucket().Object(
f"media/core_document/{self.doc.pk}/source_file/test.txt"
Pattern(
rf"media/core_document/{self.doc.pk}/source_file/[^/]+/test\.txt"
)
),
call.Bucket().Object().load(),
call.Bucket().Object().download_fileobj(ANY, ExtraArgs=ANY, Config=ANY),
Expand All @@ -105,7 +123,9 @@ def test_basics(self):
[
call.Bucket("fake-bucket"),
call.Bucket().Object(
f"media/core_document/{self.doc.pk}/source_file/test.txt"
Pattern(
rf"media/core_document/{self.doc.pk}/source_file/[^/]+/test\.txt"
)
),
call.Bucket().Object().content_length(),
],
Expand Down
15 changes: 14 additions & 1 deletion peachjam/views/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ def render_to_response(self, context, **response_kwargs):
if source_file.source_url:
return redirect(source_file.source_url)

if getattr(source_file.file.storage, "custom_domain", None):
# use the storage's custom domain to serve the file
return redirect(source_file.file.url)

return self.make_response(
source_file.file.open(),
source_file.mimetype,
Expand All @@ -98,7 +102,7 @@ def render_to_response(self, context, **response_kwargs):
def make_response(self, f, content_type, fname):
file_bytes = f.read()
response = HttpResponse(file_bytes, content_type=content_type)
response["Content-Disposition"] = f"inline; filename={fname}"
response["Content-Disposition"] = f"attachment; filename={fname}"
response["Content-Length"] = str(len(file_bytes))
return response

Expand All @@ -112,6 +116,10 @@ def render_to_response(self, context, **response_kwargs):
if source_file.source_url and source_file.mimetype == "application/pdf":
return redirect(source_file.source_url)

if getattr(source_file.file.storage, "custom_domain", None):
# use the storage's custom domain to serve the file
return redirect(source_file.file.url)

pdf = source_file.as_pdf()
if pdf:
return self.make_response(
Expand All @@ -135,6 +143,11 @@ class DocumentMediaView(DetailView):

def render_to_response(self, context, **response_kwargs):
img = get_object_or_404(self.object.images, filename=self.kwargs["filename"])

if getattr(img.file.storage, "custom_domain", None):
# use the storage's custom domain to serve the file
return redirect(img.file.url)

file = img.file.open()
file_bytes = file.read()
response = HttpResponse(file_bytes, content_type=img.mimetype)
Expand Down
7 changes: 7 additions & 0 deletions tanzlii/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,10 @@
("en", _("English")),
("sw", _("Swahili")),
]


if not DEBUG: # noqa
# Tanzlii media files are stored on S3 and served via a Cloudflare CDN (via copying to R2).
# We can therefore set long-lived cache headers and serve them from a custom domain.
AWS_S3_OBJECT_PARAMETERS = {"CacheControl": f"max-age={86400*5}"}
AWS_S3_CUSTOM_DOMAIN = "media.tanzlii.org"
Loading