From 6fb93bdd10fe93e802ecbe3b21113d2b4eaf4427 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Fri, 29 Nov 2024 17:07:44 +0000 Subject: [PATCH 01/63] Implement simplified private_media app --- cms/private_media/__init__.py | 0 cms/private_media/apps.py | 11 ++ cms/private_media/bulk_operations.py | 57 +++++++++ cms/private_media/constants.py | 7 ++ cms/private_media/management/__init__.py | 0 .../management/commands/__init__.py | 0 .../retry_file_permission_set_attempts.py | 54 +++++++++ cms/private_media/managers.py | 107 +++++++++++++++++ cms/private_media/models/__init__.py | 3 + cms/private_media/models/documents.py | 26 +++++ cms/private_media/models/images.py | 98 ++++++++++++++++ cms/private_media/models/mixins.py | 108 ++++++++++++++++++ cms/private_media/signal_handlers.py | 88 ++++++++++++++ cms/private_media/storages.py | 51 +++++++++ cms/private_media/utils.py | 11 ++ cms/private_media/views.py | 34 ++++++ cms/private_media/wagtail_hooks.py | 22 ++++ cms/settings/base.py | 4 +- 18 files changed, 680 insertions(+), 1 deletion(-) create mode 100644 cms/private_media/__init__.py create mode 100644 cms/private_media/apps.py create mode 100644 cms/private_media/bulk_operations.py create mode 100644 cms/private_media/constants.py create mode 100644 cms/private_media/management/__init__.py create mode 100644 cms/private_media/management/commands/__init__.py create mode 100644 cms/private_media/management/commands/retry_file_permission_set_attempts.py create mode 100644 cms/private_media/managers.py create mode 100644 cms/private_media/models/__init__.py create mode 100644 cms/private_media/models/documents.py create mode 100644 cms/private_media/models/images.py create mode 100644 cms/private_media/models/mixins.py create mode 100644 cms/private_media/signal_handlers.py create mode 100644 cms/private_media/storages.py create mode 100644 cms/private_media/utils.py create mode 100644 cms/private_media/views.py create mode 100644 cms/private_media/wagtail_hooks.py diff --git a/cms/private_media/__init__.py b/cms/private_media/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cms/private_media/apps.py b/cms/private_media/apps.py new file mode 100644 index 00000000..2dbbdc4a --- /dev/null +++ b/cms/private_media/apps.py @@ -0,0 +1,11 @@ +from django.apps import AppConfig + + +class PrivateMediaConfig(AppConfig): + default_auto_field = "django.db.models.AutoField" + name = "cms.private_media" + + def ready(self) -> None: + from .signal_handlers import register_signal_handlers # pylint: disable=import-outside-toplevel + + register_signal_handlers() diff --git a/cms/private_media/bulk_operations.py b/cms/private_media/bulk_operations.py new file mode 100644 index 00000000..ad681149 --- /dev/null +++ b/cms/private_media/bulk_operations.py @@ -0,0 +1,57 @@ +import concurrent.futures +import logging +from collections.abc import Callable, Iterable +from typing import TYPE_CHECKING + +from django.conf import settings + +from cms.private_media.constants import Privacy + +if TYPE_CHECKING: + from django.db.models.fields.files import FieldFile + + +logger = logging.getLogger(__name__) + + +def bulk_set_file_permissions(files: Iterable["FieldFile"], intended_privacy: Privacy) -> dict["FieldFile", bool]: + """Set file permissions for an iterable of FieldFile objects, using the + make_private() or make_public() methods of the storage backend. + + Uses a thread pool to set the file permissions in parallel where possible. + + Args: + files: An iterable of FieldFile objects to set permissions for + intended_privacy: The intended privacy status for the supplied files + + Returns: + dict[FieldFile, bool]: A mapping of files to a boolean indicating whether + the permission setting request was successful + """ + results: dict[FieldFile, bool] = {} + + def set_file_permission_and_report(file: "FieldFile") -> None: + storage = file.storage + handler: Callable[[FieldFile], bool] | None + if intended_privacy is Privacy.PRIVATE: + handler = getattr(storage, "make_private", None) + elif intended_privacy is Privacy.PUBLIC: + handler = getattr(storage, "make_public", None) + + if handler is None: + logger.debug( + "%s does not support setting of individual file permissions to %s, so skipping for: %s.", + storage.__class__.__name__, + intended_privacy, + file.name, + ) + results[file] = True + else: + results[file] = handler(file) + + with concurrent.futures.ThreadPoolExecutor( + max_workers=int(settings.PRIVATE_MEDIA_PERMISSION_SETTING_MAX_WORKERS) + ) as executor: + executor.map(set_file_permission_and_report, files) + + return results diff --git a/cms/private_media/constants.py b/cms/private_media/constants.py new file mode 100644 index 00000000..a108ab74 --- /dev/null +++ b/cms/private_media/constants.py @@ -0,0 +1,7 @@ +from django.db.models import TextChoices +from django.utils.translation import gettext_lazy as _ + + +class Privacy(TextChoices): + PRIVATE = "private", _("Private") + PUBLIC = "public", _("Public") diff --git a/cms/private_media/management/__init__.py b/cms/private_media/management/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cms/private_media/management/commands/__init__.py b/cms/private_media/management/commands/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cms/private_media/management/commands/retry_file_permission_set_attempts.py b/cms/private_media/management/commands/retry_file_permission_set_attempts.py new file mode 100644 index 00000000..0da730eb --- /dev/null +++ b/cms/private_media/management/commands/retry_file_permission_set_attempts.py @@ -0,0 +1,54 @@ +from typing import TYPE_CHECKING, Any + +from django.core.management.base import BaseCommand +from django.db.models import F + +from cms.private_media.constants import Privacy +from cms.private_media.utils import get_private_media_models + +if TYPE_CHECKING: + from cms.private_media.models import PrivateMediaMixin + + +class Command(BaseCommand): + def add_arguments(self, parser: Any) -> None: + parser.add_argument( + "--dry-run", + action="store_true", + dest="dry_run", + default=False, + help="Dry run -- don't change anything.", + ) + + def handle(self, *args: Any, **options: Any) -> None: + self.dry_run = options["dry_run"] # pylint: disable=attribute-defined-outside-init + if self.dry_run: + self.stdout.write("This is a dry run.") + + for model in get_private_media_models(): + permissions_outdated = list(model.objects.filter(file_permissions_last_set__lt=F("privacy_last_changed"))) + self.stdout.write(f"{len(permissions_outdated)} {model.__name__} instances have outdated file permissions.") + if permissions_outdated: + make_private = [] + make_public = [] + for obj in permissions_outdated: + if obj.privacy is Privacy.PRIVATE: + make_private.append(obj) + elif obj.privacy is Privacy.PUBLIC: + make_public.append(obj) + + self.update_file_permissions(model, make_private, Privacy.PRIVATE) + self.update_file_permissions(model, make_public, Privacy.PUBLIC) + + def update_file_permissions( + self, model_class: type["PrivateMediaMixin"], items: list["PrivateMediaMixin"], privacy: Privacy + ) -> None: + """Update the file permissions for the provided items to reflect the provided privacy status.""" + plural = model_class._meta.verbose_name_plural + if self.dry_run: + self.stdout.write(f"Would update file permissions for {len(items)} {privacy} {plural}.") + else: + result = model_class.objects.bulk_update_file_permissions( # type: ignore[attr-defined] + items, privacy + ) + self.stdout.write(f"File permissions successfully updated for {result} public {plural}.") diff --git a/cms/private_media/managers.py b/cms/private_media/managers.py new file mode 100644 index 00000000..d1cc1db6 --- /dev/null +++ b/cms/private_media/managers.py @@ -0,0 +1,107 @@ +from collections import defaultdict +from collections.abc import Iterable +from typing import TYPE_CHECKING + +from django.db import models +from django.utils import timezone +from wagtail.documents.models import DocumentQuerySet +from wagtail.images.models import ImageQuerySet + +from .bulk_operations import bulk_set_file_permissions +from .constants import Privacy + +if TYPE_CHECKING: + from django.db.models.fields.files import FieldFile + + from cms.private_media.models import PrivateMediaMixin + + +class PrivateMediaModelManager(models.Manager): + """A custom model `Manager` to be used by concrete subclasses of + `PrivateMediaCollectionMember`. It includes several methods for + applying privacy-related changes to multiple objects at the same + time. + """ + + def bulk_set_privacy(self, objects: Iterable["PrivateMediaMixin"], intended_privacy: Privacy) -> int: + """Update an iterable of objects of this type to reflect the 'intended_privacy' + as efficiently as possible. Returns the number of objects that were actually + updated in the process. + """ + to_update = [] + for obj in objects: + if obj.privacy != intended_privacy: + obj.privacy = intended_privacy + to_update.append(obj) + if not to_update: + return 0 + + count = self.bulk_update(to_update, fields=["_privacy", "privacy_last_changed"]) # type: ignore[arg-type] + self.bulk_update_file_permissions(to_update, intended_privacy) + return count + + def bulk_make_public(self, objects: Iterable["PrivateMediaMixin"]) -> int: + """Make an iterable of objects of this type 'public' as efficiently as + possible. Returns the number of objects that were actually updated in + the process. + """ + return self.bulk_set_privacy(objects, Privacy.PUBLIC) + + def bulk_make_private(self, objects: Iterable["PrivateMediaMixin"]) -> int: + """Make an iterable of objects of this type 'private' as efficiently as + possible. Returns the number of objects that were actually updated + in the process. + """ + return self.bulk_set_privacy(objects, Privacy.PRIVATE) + + def bulk_update_file_permissions(self, objects: Iterable["PrivateMediaMixin"], intended_privacy: Privacy) -> int: + """For an itrerable of objects of this type, set the file permissions for all + related files to reflect `intended_privacy`. Returns the number of objects + for which all related files were successfully updated (which will also have + their 'file_permissions_last_set' datetime updated). + """ + successfully_updated_objects = [] + files_by_object: dict[PrivateMediaMixin, list[FieldFile]] = defaultdict(list) + all_files = [] + + for obj in objects: + for file in obj.get_privacy_controlled_files(): + all_files.append(file) + files_by_object[obj].append(file) + + results = bulk_set_file_permissions(all_files, intended_privacy) + + for obj, files in files_by_object.items(): + if all(results.get(file) for file in files): + obj.file_permissions_last_set = timezone.now() + successfully_updated_objects.append(obj) + + if successfully_updated_objects: + return self.bulk_update( + successfully_updated_objects, # type: ignore[arg-type] + fields=["file_permissions_last_set"], + ) + + return 0 + + +class PrivateImageManager(PrivateMediaModelManager): + """A subclass of `PrivateMediaModelManager` that returns instances of + Wagtail's custom `ImageQuerySet`, which includes image-specific + filter methods other functionality that Wagtail itself depends on. + """ + + def get_queryset(self) -> ImageQuerySet: + """Return an `ImageQuerySet` instance.""" + return ImageQuerySet(self.model, using=self._db) + + +class PrivateDocumentManager(PrivateMediaModelManager): + """A subclass of `PrivateMediaModelManager` that returns instances of + Wagtail's custom `DocumentQuerySet`, which includes document-specific + filter methods other functionality that Wagtail itself depends on. + """ + + def get_queryset(self) -> DocumentQuerySet: + """Return a `DocumentQuerySet` instance.""" + return DocumentQuerySet(self.model, using=self._db) diff --git a/cms/private_media/models/__init__.py b/cms/private_media/models/__init__.py new file mode 100644 index 00000000..1e95900a --- /dev/null +++ b/cms/private_media/models/__init__.py @@ -0,0 +1,3 @@ +from .documents import PrivateDocumentMixin # noqa: F401 +from .images import AbstractPrivateRendition, PrivateImageMixin # noqa: F401 +from .mixins import PrivateMediaMixin # noqa: F401 diff --git a/cms/private_media/models/documents.py b/cms/private_media/models/documents.py new file mode 100644 index 00000000..13224015 --- /dev/null +++ b/cms/private_media/models/documents.py @@ -0,0 +1,26 @@ +from collections.abc import Iterator +from typing import TYPE_CHECKING, ClassVar + +from cms.private_media.managers import PrivateDocumentManager + +from .mixins import PrivateMediaMixin + +if TYPE_CHECKING: + from django.db.models.fields.files import FieldFile + + +class PrivateDocumentMixin(PrivateMediaMixin): + """A mixin class to be applied to a project's custom Document model, + allowing the privacy to be controlled effectively, depending on the + collection the image belongs to. + """ + + objects: ClassVar[PrivateDocumentManager] = PrivateDocumentManager() + + class Meta: + abstract = True + + def get_privacy_controlled_files(self) -> Iterator["FieldFile"]: + file: FieldFile | None = getattr(self, "file", None) + if file: + yield file diff --git a/cms/private_media/models/images.py b/cms/private_media/models/images.py new file mode 100644 index 00000000..5b2bb45c --- /dev/null +++ b/cms/private_media/models/images.py @@ -0,0 +1,98 @@ +from collections.abc import Iterator +from typing import TYPE_CHECKING, ClassVar + +from wagtail.images.models import AbstractImage, AbstractRendition + +from cms.private_media.bulk_operations import bulk_set_file_permissions +from cms.private_media.managers import PrivateImageManager + +from .mixins import PrivateMediaMixin + +if TYPE_CHECKING: + from django.db.models.fields.files import FieldFile + from wagtail.images.models import Filter + + +class PrivateImageMixin(PrivateMediaMixin): + """A mixin class to be applied to a project's custom Image model, + allowing the file privacy to be controlled effectively. + """ + + objects: ClassVar[PrivateImageManager] = PrivateImageManager() + + class Meta: + abstract = True + + def get_privacy_controlled_files(self) -> Iterator["FieldFile"]: + file: FieldFile | None = getattr(self, "file", None) + if file: + yield file + for rendition in self.renditions.all(): # type: ignore[attr-defined] + rendition_file: FieldFile = rendition.file + yield rendition_file + + def create_renditions(self, *filters: "Filter") -> dict["Filter", AbstractRendition]: + """Create image renditions and set their privacy permissions. + + Args: + *filters: Filter objects defining the renditions to create + + Returns: + dict: Mapping of filters to their corresponding rendition objects + """ + created_renditions: dict[Filter, AbstractRendition] = super().create_renditions(*filters) # type: ignore[misc] + files = [r.file for r in created_renditions.values()] + bulk_set_file_permissions(files, self.privacy) + return created_renditions + + +class AbstractPrivateRendition(AbstractRendition): + """A replacement for Wagtail's built-in `AbstractRendition` model, that should be used as + a base for rendition models for image models subclassing `PrivateImageMixin`. This + is necessary to ensure that only users with relevant permissions can view renditions + for private images. + """ + + class Meta: + abstract = True + + @staticmethod + def construct_cache_key(image: "AbstractImage", filter_cache_key: str, filter_spec: str) -> str: + """Construct a cache key for the rendition that includes privacy status. + + Args: + image: The source image + filter_cache_key: The filter's cache key + filter_spec: The filter specification string + + Returns: + str: A unique cache key for the rendition + """ + return "wagtail-rendition-" + "-".join( + [ + str(image.id), + image.file_hash, + str(image.privacy), + filter_cache_key, + filter_spec, + ] + ) + + @property + def url(self) -> str: + """Get the URL for accessing the rendition. + + Returns a direct file URL for public images with up-to-date permissions, + or a permission-checking view URL for private or unprocessed images. + + Returns: + str: URL for accessing the rendition + """ + from wagtail.images.views.serve import generate_image_url # pylint: disable=import-outside-toplevel + + image: PrivateImageMixin = self.image # pylint: disable=no-member + if image.is_public and not image.file_permissions_are_outdated(): + file_url: str = self.file.url + return file_url + generated_url: str = generate_image_url(image, self.filter_spec) + return generated_url diff --git a/cms/private_media/models/mixins.py b/cms/private_media/models/mixins.py new file mode 100644 index 00000000..afe3d1e0 --- /dev/null +++ b/cms/private_media/models/mixins.py @@ -0,0 +1,108 @@ +import logging +from collections.abc import Iterator +from typing import TYPE_CHECKING, Any, ClassVar + +from django.db import models +from django.utils import timezone + +from cms.private_media.bulk_operations import bulk_set_file_permissions +from cms.private_media.constants import Privacy +from cms.private_media.managers import PrivateMediaModelManager + +if TYPE_CHECKING: + from django.db.models.fields.files import FieldFile + +logger = logging.getLogger(__name__) + + +class PrivateMediaMixin(models.Model): + """A mixin class for models that has files that need to remain private + until the object itself is no longer private. + + Subclasses must implement the `determine_privacy()` method, which should + return a `Privacy` value to determine the correct value for the `is_private` + field (and by extension, whether files should be made private or public). + + Subclasses must implement the `get_privacy_controlled_files()` method, + which should return an iterable of `FieldFile` objects that are managed + by the model instance. + + Where individual objects are updated (usually via the Wagtail UI), changes to + managed field values are written to the database by the overridden + `save()` method. + + Where multiple objects are updated at once (e.g. via a signal handler or + management command running on the server), changes to managed field values + are written to the database by the bulk update methods provided by + `PrivateMediaModelManager`. + """ + + _privacy = models.CharField(max_length=10, choices=Privacy.choices, default=Privacy.PRIVATE) + file_permissions_last_set = models.DateTimeField(editable=False, null=True) + privacy_last_changed = models.DateTimeField(editable=False, default=timezone.now) + + class Meta: + abstract = True + + objects: ClassVar[models.Manager] = PrivateMediaModelManager() + + @property + def privacy(self) -> Privacy: + """Return the privacy status of the object.""" + if isinstance(self._privacy, Privacy): + return self._privacy + for item in Privacy: + if item.value == self._privacy: + return item + raise ValueError(f"Invalid privacy value: {self._privacy}") + + @privacy.setter + def privacy(self, value: Privacy) -> None: + """Set the privacy status of the object and conditionally update the + privacy_last_changed timestamp if the privacy has changed. + """ + if self.privacy is not value: + self._privacy = value.value + self.privacy_last_changed = timezone.now() + + @property + def is_private(self) -> bool: + """Return True if the object is private.""" + return self.privacy is Privacy.PRIVATE + + @property + def is_public(self) -> bool: + """Return True if the object is public.""" + return self.privacy is Privacy.PUBLIC + + def save(self, *args: Any, set_file_permissions: bool = True, **kwargs: Any) -> None: + """Save the model instance and manage file permissions. + + Args: + set_file_permissions: If True, updates file permissions after saving + *args: Additional positional arguments passed to parent save method + **kwargs: Additional keyword arguments passed to parent save method + """ + # Save model field changes at this point + super().save(*args, **kwargs) + + # Trigger file permission updates after-the-fact + if set_file_permissions and self.file_permissions_are_outdated(): + results = bulk_set_file_permissions(self.get_privacy_controlled_files(), self.privacy) + # Only update 'file_permissions_last_set' if all updates were successfull + if set(results.values()) == {True}: + self.file_permissions_last_set = timezone.now() + kwargs.update(force_insert=False, update_fields=["file_permissions_last_set"]) + super().save(*args, **kwargs) + + def file_permissions_are_outdated(self) -> bool: + """Check if the file permissions are outdated relative to privacy changes.""" + return self.file_permissions_last_set is None or self.file_permissions_last_set < self.privacy_last_changed + + def get_privacy_controlled_files(self) -> Iterator["FieldFile"]: + """Return an Iterator of files that are managed by the model instance. + + Returns: + Iterator[FieldFile]: An Iterator of files managed by the instance + """ + raise NotImplementedError diff --git a/cms/private_media/signal_handlers.py b/cms/private_media/signal_handlers.py new file mode 100644 index 00000000..6b0f93d3 --- /dev/null +++ b/cms/private_media/signal_handlers.py @@ -0,0 +1,88 @@ +from typing import TYPE_CHECKING, Any + +from django.contrib.contenttypes.models import ContentType +from django.core.exceptions import ImproperlyConfigured +from django.db.models import Exists, IntegerField, OuterRef +from django.db.models.functions import Cast +from wagtail.models import ReferenceIndex +from wagtail.signals import page_published, page_unpublished, published, unpublished + +from cms.private_media.models import PrivateImageMixin +from cms.private_media.utils import get_private_media_models + +if TYPE_CHECKING: + from django.db.models import Model + + +def publish_media_on_publish(instance: "Model", **kwargs: Any) -> None: + """Signal handler to be connected to the 'page_published' and 'published' + signals for all publishable models. It is responsible for identifying any + privacy-controlled media used by the object, and ensuring that it is also + made public. + """ + for model_class in get_private_media_models(): + model_ct = ContentType.objects.get_for_model(model_class) + referenced_pks = ( + ReferenceIndex.get_references_for_object(instance) + .filter(to_content_type=model_ct) + .annotate(int_object_id=Cast("to_object_id", output_field=IntegerField())) + .values_list("int_object_id", flat=True) + .distinct() + ) + queryset = model_class.objects.filter(pk__in=referenced_pks) + if issubclass(model_class, PrivateImageMixin): + queryset = queryset.prefetch_related("renditions") + + if hasattr(model_class.objects, "bulk_make_public"): + model_class.objects.bulk_make_public(queryset) + else: + raise ImproperlyConfigured( + f"The manager for {model_class.__name__} is missing a bulk_make_public() method implementation. " + "Did you override the manager class and forget to subclass PrivateMediaModelManager?", + ) + + +def unpublish_media_on_unpublish(instance: "Model", **kwargs: Any) -> None: + """Signal handler to be connected to the 'page_unpublished' and 'unpublished' + signals for all publishable models. It is responsible for identifying any + privacy-controlled media used solely by the object, and ensuring that it is + made private again. + """ + for model_class in get_private_media_models(): + model_ct = ContentType.objects.get_for_model(model_class) + referenced_pks = ( + ReferenceIndex.get_references_for_object(instance) + .filter(to_content_type=model_ct) + .annotate(int_object_id=Cast("to_object_id", output_field=IntegerField())) + .values_list("int_object_id", flat=True) + .distinct() + ) + queryset = ( + model_class.objects.filter(pk__in=referenced_pks) + .alias( + referenced_by_other_pages=Exists( + ReferenceIndex.objects.filter(to_content_type=model_ct, to_object_id=OuterRef("pk")).exclude( + content_type=ContentType.objects.get_for_model(instance), object_id=instance.pk + ) + ) + ) + .filter(referenced_by_other_pages=False) + ) + if issubclass(model_class, PrivateImageMixin): + queryset = queryset.prefetch_related("renditions") + + if hasattr(model_class.objects, "bulk_make_private"): + model_class.objects.bulk_make_private(queryset) + else: + raise ImproperlyConfigured( + f"The manager for {model_class.__name__} is missing a bulk_make_private() method implementation. " + "Did you override the manager class and forget to subclass PrivateMediaModelManager?", + ) + + +def register_signal_handlers() -> None: + """Register signal handlers for models using the private media system.""" + page_published.connect(publish_media_on_publish, dispatch_uid="publish_media") + page_unpublished.connect(unpublish_media_on_unpublish, dispatch_uid="unpublish_media") + published.connect(publish_media_on_publish, dispatch_uid="publish_media") + unpublished.connect(unpublish_media_on_unpublish, dispatch_uid="unpublish_media") diff --git a/cms/private_media/storages.py b/cms/private_media/storages.py new file mode 100644 index 00000000..bd525d75 --- /dev/null +++ b/cms/private_media/storages.py @@ -0,0 +1,51 @@ +import logging +from typing import TYPE_CHECKING + +from django.core.files.storage import FileSystemStorage + +from botocore.exceptions import ClientError +from storages.backends.s3 import S3Storage + +if TYPE_CHECKING: + from django.db.models.fields.files import FieldFile + + +logger = logging.getLogger(__name__) + + +class PrivacySettingS3Storage(S3Storage): + # pylint: disable=W0223 + private_acl_name = "private" + public_acl_name = "public-read" + + def make_private(self, file: "FieldFile") -> bool: + """Make the provided file private in S3.""" + return self._set_file_acl(file, self.private_acl_name) + + def make_public(self, file: "FieldFile") -> bool: + """Make the provided file publically readable in S3.""" + return self._set_file_acl(file, self.public_acl_name) + + def _set_file_acl(self, file: "FieldFile", acl_name: str) -> bool: + obj = self.bucket.Object(file.name) + try: + obj_acl = obj.Acl() + except ClientError as e: + logger.debug("Failed to retrieve ACL for %s: %s", file.name, repr(e)) + return False + try: + obj_acl.put(ACL=acl_name) + except ClientError as e: + logger.debug("Failed to set ACL for %s: %s", file.name, repr(e)) + return False + + logger.info("ACL set successfully for %s", file.name) + return True + + +class DummyPrivacySettingFileSystemStorage(FileSystemStorage): + def make_private(self, file: "FieldFile") -> bool: # pylint: disable=unused-argument + return True + + def make_public(self, file: "FieldFile") -> bool: # pylint: disable=unused-argument + return True diff --git a/cms/private_media/utils.py b/cms/private_media/utils.py new file mode 100644 index 00000000..0bc6111a --- /dev/null +++ b/cms/private_media/utils.py @@ -0,0 +1,11 @@ +from functools import cache + +from django.apps import apps + +from cms.private_media.models import PrivateMediaMixin + + +@cache +def get_private_media_models() -> list[type[PrivateMediaMixin]]: + """Return all registered models that use the `PrivateMediaMixin` mixin.""" + return [m for m in apps.get_models() if issubclass(m, PrivateMediaMixin)] diff --git a/cms/private_media/views.py b/cms/private_media/views.py new file mode 100644 index 00000000..645aef99 --- /dev/null +++ b/cms/private_media/views.py @@ -0,0 +1,34 @@ +from typing import TYPE_CHECKING + +from django.core.exceptions import PermissionDenied +from django.shortcuts import redirect +from wagtail.images.permissions import permission_policy +from wagtail.images.views.serve import ServeView + +if TYPE_CHECKING: + from django.http import HttpResponse + + from cms.private_media.models import AbstractPrivateRendition, PrivateImageMixin + + +class PrivateImageServeView(ServeView): + def serve(self, rendition: "AbstractPrivateRendition") -> "HttpResponse": + image: PrivateImageMixin = rendition.image + + # If there's no reason (within our control) for the file not to be served by + # media infrastructure, redirect + if image.is_public and not image.file_permissions_are_outdated(): + return redirect(rendition.file.url) + + # Block access to private images if the user has insufficient permissions + user = self.request.user + if image.is_private and ( + not user.is_authenticated + or not permission_policy.user_has_any_permission_for_instance(user, ["choose", "add", "change"], image) + ): + raise PermissionDenied + + # Serve the file until it is no longer private, or file permissions + # have been set successfully + response: HttpResponse = super().serve(rendition) + return response diff --git a/cms/private_media/wagtail_hooks.py b/cms/private_media/wagtail_hooks.py new file mode 100644 index 00000000..4f1842b3 --- /dev/null +++ b/cms/private_media/wagtail_hooks.py @@ -0,0 +1,22 @@ +from typing import TYPE_CHECKING + +from django.core.exceptions import PermissionDenied +from wagtail import hooks +from wagtail.documents.permissions import permission_policy + +if TYPE_CHECKING: + from django.http import HttpRequest + + from cms.private_media.models import PrivateDocumentMixin + + +@hooks.register("before_serve_document") +def protect_private_documents(document: "PrivateDocumentMixin", request: "HttpRequest") -> None: + """Block access to private documents if the user has insufficient permissions.""" + if document.is_private and ( + not request.user.is_authenticated + or not permission_policy.user_has_any_permission_for_instance( + request.user, ["choose", "add", "change"], document + ) + ): + raise PermissionDenied diff --git a/cms/settings/base.py b/cms/settings/base.py index 2eb11493..e3be409d 100644 --- a/cms/settings/base.py +++ b/cms/settings/base.py @@ -62,6 +62,7 @@ "cms.documents", "cms.home", "cms.images", + "cms.private_media", "cms.release_calendar", "cms.themes", "cms.topics", @@ -393,7 +394,7 @@ INSTALLED_APPS += ["storages", "wagtail_storages"] # https://docs.djangoproject.com/en/stable/ref/settings/#std-setting-STORAGES - STORAGES["default"]["BACKEND"] = "storages.backends.s3.S3Storage" + STORAGES["default"]["BACKEND"] = "cms.private_media.storages.PrivacySettingS3Storage" AWS_STORAGE_BUCKET_NAME = env["AWS_STORAGE_BUCKET_NAME"] @@ -431,6 +432,7 @@ # https://github.com/jschneier/django-storages/blob/10d1929de5e0318dbd63d715db4bebc9a42257b5/storages/backends/s3boto3.py#L217 AWS_S3_URL_PROTOCOL = env.get("AWS_S3_URL_PROTOCOL", "https:") +PRIVATE_MEDIA_PERMISSION_SETTING_MAX_WORKERS = env.get("PRIVATE_MEDIA_PERMISSION_SETTING_MAX_WORKERS", 5) # Logging # This logging is configured to be used with Sentry and console logs. Console From 9f912b98b4b5dad25bfb6240dae9f099424f6f42 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Fri, 29 Nov 2024 17:08:22 +0000 Subject: [PATCH 02/63] Add to image and document models --- .../0003_customdocument__privacy_and_more.py | 28 +++++++++++++++++++ .../0004_alter_customdocument__privacy.py | 17 +++++++++++ cms/documents/models.py | 12 +++++--- .../0003_customimage__privacy_and_more.py | 28 +++++++++++++++++++ .../0004_alter_customimage__privacy.py | 17 +++++++++++ cms/images/models.py | 17 +++++++---- 6 files changed, 110 insertions(+), 9 deletions(-) create mode 100644 cms/documents/migrations/0003_customdocument__privacy_and_more.py create mode 100644 cms/documents/migrations/0004_alter_customdocument__privacy.py create mode 100644 cms/images/migrations/0003_customimage__privacy_and_more.py create mode 100644 cms/images/migrations/0004_alter_customimage__privacy.py diff --git a/cms/documents/migrations/0003_customdocument__privacy_and_more.py b/cms/documents/migrations/0003_customdocument__privacy_and_more.py new file mode 100644 index 00000000..2f6df745 --- /dev/null +++ b/cms/documents/migrations/0003_customdocument__privacy_and_more.py @@ -0,0 +1,28 @@ +# Generated by Django 5.1.3 on 2024-11-29 16:19 + +import django.utils.timezone +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("documents", "0002_alter_customdocument_file_size"), + ] + + operations = [ + migrations.AddField( + model_name="customdocument", + name="_privacy", + field=models.CharField(default="public", max_length=10), + ), + migrations.AddField( + model_name="customdocument", + name="file_permissions_last_set", + field=models.DateTimeField(editable=False, null=True), + ), + migrations.AddField( + model_name="customdocument", + name="privacy_last_changed", + field=models.DateTimeField(default=django.utils.timezone.now, editable=False), + ), + ] diff --git a/cms/documents/migrations/0004_alter_customdocument__privacy.py b/cms/documents/migrations/0004_alter_customdocument__privacy.py new file mode 100644 index 00000000..2e5c1cb2 --- /dev/null +++ b/cms/documents/migrations/0004_alter_customdocument__privacy.py @@ -0,0 +1,17 @@ +# Generated by Django 5.1.3 on 2024-11-29 16:20 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("documents", "0003_customdocument__privacy_and_more"), + ] + + operations = [ + migrations.AlterField( + model_name="customdocument", + name="_privacy", + field=models.CharField(default="private", max_length=10), + ), + ] diff --git a/cms/documents/models.py b/cms/documents/models.py index 870a6d10..d966590f 100644 --- a/cms/documents/models.py +++ b/cms/documents/models.py @@ -1,12 +1,16 @@ -from wagtail.documents.models import AbstractDocument -from wagtail.documents.models import Document as WagtailDocument +from collections.abc import Sequence +from typing import ClassVar +from wagtail.documents.models import AbstractDocument, Document -class CustomDocument(AbstractDocument): +from cms.private_media.models import PrivateDocumentMixin + + +class CustomDocument(PrivateDocumentMixin, AbstractDocument): """Custom Wagtail document class. Using a custom class from the beginning allows us to add any customisations we may need. """ - admin_form_fields = WagtailDocument.admin_form_fields + admin_form_fields: ClassVar[Sequence[str]] = Document.admin_form_fields diff --git a/cms/images/migrations/0003_customimage__privacy_and_more.py b/cms/images/migrations/0003_customimage__privacy_and_more.py new file mode 100644 index 00000000..a638727e --- /dev/null +++ b/cms/images/migrations/0003_customimage__privacy_and_more.py @@ -0,0 +1,28 @@ +# Generated by Django 5.1.3 on 2024-11-29 16:19 + +import django.utils.timezone +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("images", "0002_customimage_description"), + ] + + operations = [ + migrations.AddField( + model_name="customimage", + name="_privacy", + field=models.CharField(default="public", max_length=10), + ), + migrations.AddField( + model_name="customimage", + name="file_permissions_last_set", + field=models.DateTimeField(editable=False, null=True), + ), + migrations.AddField( + model_name="customimage", + name="privacy_last_changed", + field=models.DateTimeField(default=django.utils.timezone.now, editable=False), + ), + ] diff --git a/cms/images/migrations/0004_alter_customimage__privacy.py b/cms/images/migrations/0004_alter_customimage__privacy.py new file mode 100644 index 00000000..9f9503c9 --- /dev/null +++ b/cms/images/migrations/0004_alter_customimage__privacy.py @@ -0,0 +1,17 @@ +# Generated by Django 5.1.3 on 2024-11-29 16:20 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("images", "0003_customimage__privacy_and_more"), + ] + + operations = [ + migrations.AlterField( + model_name="customimage", + name="_privacy", + field=models.CharField(default="private", max_length=10), + ), + ] diff --git a/cms/images/models.py b/cms/images/models.py index 0cd5ecf1..79039406 100644 --- a/cms/images/models.py +++ b/cms/images/models.py @@ -1,21 +1,28 @@ +from collections.abc import Sequence +from typing import ClassVar + from django.db import models -from wagtail.images.models import AbstractImage, AbstractRendition, Image +from wagtail.images.models import AbstractImage, Image + +from cms.private_media.models import AbstractPrivateRendition, PrivateImageMixin -class CustomImage(AbstractImage): +class CustomImage(PrivateImageMixin, AbstractImage): # type: ignore[django-manager-missing] """Custom Wagtail image class. Using a custom class from the beginning allows us to add any customisations we may need. """ - admin_form_fields = Image.admin_form_fields + admin_form_fields: ClassVar[Sequence[str]] = Image.admin_form_fields -class Rendition(AbstractRendition): +class Rendition(AbstractPrivateRendition): """Our custom rendition class.""" - image = models.ForeignKey["CustomImage"]("CustomImage", related_name="renditions", on_delete=models.CASCADE) + image = models.ForeignKey( # type: ignore[var-annotated] + "CustomImage", related_name="renditions", on_delete=models.CASCADE + ) class Meta: unique_together = (("image", "filter_spec", "focal_point_key"),) From 8aad84014f97a19c5830d08288cab2d2502e9002 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Fri, 29 Nov 2024 17:09:09 +0000 Subject: [PATCH 03/63] Enable custom image serve view --- cms/urls.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cms/urls.py b/cms/urls.py index 73dd109a..e2bfe623 100644 --- a/cms/urls.py +++ b/cms/urls.py @@ -2,7 +2,7 @@ from django.apps import apps from django.conf import settings -from django.urls import include, path +from django.urls import include, path, re_path from django.views.decorators.cache import never_cache from django.views.decorators.vary import vary_on_headers from django.views.generic import TemplateView @@ -13,6 +13,7 @@ from cms.core import views as core_views from cms.core.cache import get_default_cache_control_decorator +from cms.private_media.views import PrivateImageServeView if TYPE_CHECKING: from django.urls import URLPattern, URLResolver @@ -80,6 +81,7 @@ # Public URLs that are meant to be cached. urlpatterns = [ path("sitemap.xml", sitemap), + re_path(r"^images/([^/]*)/(\d*)/([^/]*)/[^/]*$", PrivateImageServeView.as_view(), name="wagtailimages_serve"), ] # Set public URLs to use the "default" cache settings. urlpatterns = decorate_urlpatterns(urlpatterns, get_default_cache_control_decorator()) From b30bd3b2ede56febe1c6c1cb525b4581e60a30e5 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Fri, 29 Nov 2024 17:13:39 +0000 Subject: [PATCH 04/63] Fix unrelated linter complain on user model --- cms/users/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cms/users/models.py b/cms/users/models.py index 40ebe675..2da2ce5d 100644 --- a/cms/users/models.py +++ b/cms/users/models.py @@ -1,5 +1,5 @@ from django.contrib.auth.models import AbstractUser -class User(AbstractUser): # type: ignore[django-manager-missing] +class User(AbstractUser): """Barebones custom user model.""" From 4dd76f3a3dd792654cd38b6929966b57a64753c7 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Sun, 1 Dec 2024 11:27:52 +0000 Subject: [PATCH 05/63] Purge serve and file URLs when the privacy of an object changes --- cms/private_media/managers.py | 17 ++++++++++++++++- cms/private_media/models/documents.py | 12 +++++++++++- cms/private_media/models/images.py | 26 ++++++++++++++++++++++---- cms/private_media/models/mixins.py | 19 ++++++++++++++++++- cms/private_media/utils.py | 14 ++++++++++++-- 5 files changed, 79 insertions(+), 9 deletions(-) diff --git a/cms/private_media/managers.py b/cms/private_media/managers.py index d1cc1db6..fe612fc2 100644 --- a/cms/private_media/managers.py +++ b/cms/private_media/managers.py @@ -4,11 +4,14 @@ from django.db import models from django.utils import timezone +from wagtail.contrib.frontend_cache.utils import PurgeBatch from wagtail.documents.models import DocumentQuerySet from wagtail.images.models import ImageQuerySet +from wagtail.models import Site from .bulk_operations import bulk_set_file_permissions from .constants import Privacy +from .utils import get_frontend_cache_configuration if TYPE_CHECKING: from django.db.models.fields.files import FieldFile @@ -38,6 +41,16 @@ def bulk_set_privacy(self, objects: Iterable["PrivateMediaMixin"], intended_priv count = self.bulk_update(to_update, fields=["_privacy", "privacy_last_changed"]) # type: ignore[arg-type] self.bulk_update_file_permissions(to_update, intended_privacy) + + if get_frontend_cache_configuration(): + sites = Site.objects.all() + serve_url_batch = PurgeBatch() + file_url_batch = PurgeBatch() + for obj in to_update: + serve_url_batch.add_urls(obj.get_privacy_controlled_serve_urls(sites)) + file_url_batch.add_urls(obj.get_privacy_controlled_file_urls()) + serve_url_batch.purge() + file_url_batch.purge() return count def bulk_make_public(self, objects: Iterable["PrivateMediaMixin"]) -> int: @@ -54,7 +67,9 @@ def bulk_make_private(self, objects: Iterable["PrivateMediaMixin"]) -> int: """ return self.bulk_set_privacy(objects, Privacy.PRIVATE) - def bulk_update_file_permissions(self, objects: Iterable["PrivateMediaMixin"], intended_privacy: Privacy) -> int: + def bulk_set_file_permissions( + self, objects: Iterable["PrivateMediaMixin"], intended_privacy: Privacy + ) -> list["PrivateMediaMixin"]: """For an itrerable of objects of this type, set the file permissions for all related files to reflect `intended_privacy`. Returns the number of objects for which all related files were successfully updated (which will also have diff --git a/cms/private_media/models/documents.py b/cms/private_media/models/documents.py index 13224015..76471b30 100644 --- a/cms/private_media/models/documents.py +++ b/cms/private_media/models/documents.py @@ -1,4 +1,4 @@ -from collections.abc import Iterator +from collections.abc import Iterable, Iterator from typing import TYPE_CHECKING, ClassVar from cms.private_media.managers import PrivateDocumentManager @@ -7,6 +7,7 @@ if TYPE_CHECKING: from django.db.models.fields.files import FieldFile + from wagtail.models import Site class PrivateDocumentMixin(PrivateMediaMixin): @@ -24,3 +25,12 @@ def get_privacy_controlled_files(self) -> Iterator["FieldFile"]: file: FieldFile | None = getattr(self, "file", None) if file: yield file + + def get_privacy_controlled_serve_urls(self, sites: Iterable["Site"]) -> Iterator[str]: + """Return an iterator of fully-fledged serve URLs for this document, covering the domains for all + provided sites. + """ + if not sites: + return + for site in sites: + yield site.root_url + self.url diff --git a/cms/private_media/models/images.py b/cms/private_media/models/images.py index 5b2bb45c..14e9c07c 100644 --- a/cms/private_media/models/images.py +++ b/cms/private_media/models/images.py @@ -1,6 +1,7 @@ -from collections.abc import Iterator +from collections.abc import Iterable, Iterator from typing import TYPE_CHECKING, ClassVar +from django.utils.functional import cached_property from wagtail.images.models import AbstractImage, AbstractRendition from cms.private_media.bulk_operations import bulk_set_file_permissions @@ -11,6 +12,7 @@ if TYPE_CHECKING: from django.db.models.fields.files import FieldFile from wagtail.images.models import Filter + from wagtail.models import Site class PrivateImageMixin(PrivateMediaMixin): @@ -45,6 +47,16 @@ def create_renditions(self, *filters: "Filter") -> dict["Filter", AbstractRendit bulk_set_file_permissions(files, self.privacy) return created_renditions + def get_privacy_controlled_serve_urls(self, sites: Iterable["Site"]) -> Iterator[str]: + """Return an iterator of fully-fledged serve URLs for this image, covering the domains for all + provided sites. + """ + if not sites: + return + for rendition in self.renditions.all(): + for site in sites: + yield site.root_url + rendition.serve_url + class AbstractPrivateRendition(AbstractRendition): """A replacement for Wagtail's built-in `AbstractRendition` model, that should be used as @@ -83,16 +95,22 @@ def url(self) -> str: """Get the URL for accessing the rendition. Returns a direct file URL for public images with up-to-date permissions, - or a permission-checking view URL for private or unprocessed images. + or a permission-checking 'ServeView' URL for private or unprocessed images. Returns: str: URL for accessing the rendition """ - from wagtail.images.views.serve import generate_image_url # pylint: disable=import-outside-toplevel - image: PrivateImageMixin = self.image # pylint: disable=no-member if image.is_public and not image.file_permissions_are_outdated(): file_url: str = self.file.url return file_url + return self.serve_url + + @cached_property + def serve_url(self) -> str: + """Return a permission-checking 'ServeView' URL for this rendition.""" + from wagtail.images.views.serve import generate_image_url # pylint: disable=import-outside-toplevel + + image: PrivateImageMixin = self.image # pylint: disable=no-member generated_url: str = generate_image_url(image, self.filter_spec) return generated_url diff --git a/cms/private_media/models/mixins.py b/cms/private_media/models/mixins.py index afe3d1e0..cc81e844 100644 --- a/cms/private_media/models/mixins.py +++ b/cms/private_media/models/mixins.py @@ -1,5 +1,5 @@ import logging -from collections.abc import Iterator +from collections.abc import Iterable, Iterator from typing import TYPE_CHECKING, Any, ClassVar from django.db import models @@ -11,6 +11,7 @@ if TYPE_CHECKING: from django.db.models.fields.files import FieldFile + from wagtail.models import Site logger = logging.getLogger(__name__) @@ -106,3 +107,19 @@ def get_privacy_controlled_files(self) -> Iterator["FieldFile"]: Iterator[FieldFile]: An Iterator of files managed by the instance """ raise NotImplementedError + + def get_privacy_controlled_serve_urls(self, sites: Iterable["Site"]) -> Iterator[str]: + """Return an iterator of fully-fledged serve URLs for this object, covering the domains for all provided + sites. It is the responsibility of the subclass to implement this method. + """ + raise NotImplementedError + + def get_privacy_controlled_file_urls(self) -> Iterator[str]: + """Return an iterator of fully-fledged file URLs for this object. + + NOTE: Will only return URLs that include a domain prefix (where the files are externally hosted). + """ + for file in self.get_privacy_controlled_files(): + url = file.url + if not url.startswith("/"): + yield url diff --git a/cms/private_media/utils.py b/cms/private_media/utils.py index 0bc6111a..b4cc6794 100644 --- a/cms/private_media/utils.py +++ b/cms/private_media/utils.py @@ -1,11 +1,21 @@ from functools import cache +from typing import TYPE_CHECKING, Any from django.apps import apps +from django.conf import settings -from cms.private_media.models import PrivateMediaMixin +if TYPE_CHECKING: + from cms.private_media.models import PrivateMediaMixin @cache -def get_private_media_models() -> list[type[PrivateMediaMixin]]: +def get_private_media_models() -> list[type["PrivateMediaMixin"]]: """Return all registered models that use the `PrivateMediaMixin` mixin.""" + from cms.private_media.models import PrivateMediaMixin # pylint: disable=import-outside-toplevel + return [m for m in apps.get_models() if issubclass(m, PrivateMediaMixin)] + + +def get_frontend_cache_configuration() -> dict[str, Any]: + """Return the frontend cache configuration for this project.""" + return getattr(settings, "WAGTAILFRONTENDCACHE", {}) From 12a148065187bfd94e0cb89cd66635e4479ad80f Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Mon, 2 Dec 2024 09:47:59 +0000 Subject: [PATCH 06/63] Rejig manager methods to avoid a separate update query for 'file_permissions_last_set' --- .../retry_file_permission_set_attempts.py | 18 +++++++--- cms/private_media/managers.py | 36 ++++++++++--------- 2 files changed, 32 insertions(+), 22 deletions(-) diff --git a/cms/private_media/management/commands/retry_file_permission_set_attempts.py b/cms/private_media/management/commands/retry_file_permission_set_attempts.py index 0da730eb..e2479775 100644 --- a/cms/private_media/management/commands/retry_file_permission_set_attempts.py +++ b/cms/private_media/management/commands/retry_file_permission_set_attempts.py @@ -4,6 +4,7 @@ from django.db.models import F from cms.private_media.constants import Privacy +from cms.private_media.models.images import PrivateImageMixin from cms.private_media.utils import get_private_media_models if TYPE_CHECKING: @@ -26,7 +27,10 @@ def handle(self, *args: Any, **options: Any) -> None: self.stdout.write("This is a dry run.") for model in get_private_media_models(): - permissions_outdated = list(model.objects.filter(file_permissions_last_set__lt=F("privacy_last_changed"))) + queryset = model.objects.filter(file_permissions_last_set__lt=F("privacy_last_changed")) + if not self.dry_run and issubclass(model, PrivateImageMixin): + queryset = queryset.prefetch_related("renditions") + permissions_outdated = list(queryset) self.stdout.write(f"{len(permissions_outdated)} {model.__name__} instances have outdated file permissions.") if permissions_outdated: make_private = [] @@ -48,7 +52,11 @@ def update_file_permissions( if self.dry_run: self.stdout.write(f"Would update file permissions for {len(items)} {privacy} {plural}.") else: - result = model_class.objects.bulk_update_file_permissions( # type: ignore[attr-defined] - items, privacy - ) - self.stdout.write(f"File permissions successfully updated for {result} public {plural}.") + updated_count = 0 + for item in model_class.objects.bulk_set_file_permissions( # type: ignore[attr-defined] + items, privacy, save_changes=True + ): + if not item.file_permissions_are_outdated(): + updated_count += 1 + + self.stdout.write(f"File permissions successfully updated for {updated_count} {privacy} {plural}.") diff --git a/cms/private_media/managers.py b/cms/private_media/managers.py index fe612fc2..cd7c6dd1 100644 --- a/cms/private_media/managers.py +++ b/cms/private_media/managers.py @@ -39,10 +39,14 @@ def bulk_set_privacy(self, objects: Iterable["PrivateMediaMixin"], intended_priv if not to_update: return 0 - count = self.bulk_update(to_update, fields=["_privacy", "privacy_last_changed"]) # type: ignore[arg-type] - self.bulk_update_file_permissions(to_update, intended_privacy) + to_update = self.bulk_set_file_permissions(to_update, intended_privacy, save_changes=False) - if get_frontend_cache_configuration(): + count = self.bulk_update( + to_update, + fields=["_privacy", "privacy_last_changed", "file_permissions_last_set"], + ) + + if count and get_frontend_cache_configuration(): sites = Site.objects.all() serve_url_batch = PurgeBatch() file_url_batch = PurgeBatch() @@ -68,16 +72,16 @@ def bulk_make_private(self, objects: Iterable["PrivateMediaMixin"]) -> int: return self.bulk_set_privacy(objects, Privacy.PRIVATE) def bulk_set_file_permissions( - self, objects: Iterable["PrivateMediaMixin"], intended_privacy: Privacy + self, objects: Iterable["PrivateMediaMixin"], intended_privacy: Privacy, *, save_changes: bool = False ) -> list["PrivateMediaMixin"]: """For an itrerable of objects of this type, set the file permissions for all - related files to reflect `intended_privacy`. Returns the number of objects - for which all related files were successfully updated (which will also have - their 'file_permissions_last_set' datetime updated). + related files to reflect `intended_privacy`. Returns a list of the provided objects, + with their `file_permissions_last_set` datetime updated if all related files + were successfully updated. """ - successfully_updated_objects = [] files_by_object: dict[PrivateMediaMixin, list[FieldFile]] = defaultdict(list) all_files = [] + objects = list(objects) for obj in objects: for file in obj.get_privacy_controlled_files(): @@ -86,18 +90,16 @@ def bulk_set_file_permissions( results = bulk_set_file_permissions(all_files, intended_privacy) - for obj, files in files_by_object.items(): + now = timezone.now() + for obj in objects: + files = files_by_object[obj] if all(results.get(file) for file in files): - obj.file_permissions_last_set = timezone.now() - successfully_updated_objects.append(obj) + obj.file_permissions_last_set = now - if successfully_updated_objects: - return self.bulk_update( - successfully_updated_objects, # type: ignore[arg-type] - fields=["file_permissions_last_set"], - ) + if save_changes: + self.bulk_update(objects, ["file_permissions_last_set"]) - return 0 + return objects class PrivateImageManager(PrivateMediaModelManager): From 97215de1dd12f68cb2a5190cb9ff23780de5414c Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Mon, 2 Dec 2024 16:54:16 +0000 Subject: [PATCH 07/63] Revise signal handlers to ignore references from draft pages when deciding which media to make private --- cms/private_media/signal_handlers.py | 30 ++++++++++++++-------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/cms/private_media/signal_handlers.py b/cms/private_media/signal_handlers.py index 6b0f93d3..1e11f262 100644 --- a/cms/private_media/signal_handlers.py +++ b/cms/private_media/signal_handlers.py @@ -2,9 +2,9 @@ from django.contrib.contenttypes.models import ContentType from django.core.exceptions import ImproperlyConfigured -from django.db.models import Exists, IntegerField, OuterRef +from django.db.models import IntegerField from django.db.models.functions import Cast -from wagtail.models import ReferenceIndex +from wagtail.models import Page, ReferenceIndex from wagtail.signals import page_published, page_unpublished, published, unpublished from cms.private_media.models import PrivateImageMixin @@ -48,25 +48,25 @@ def unpublish_media_on_unpublish(instance: "Model", **kwargs: Any) -> None: privacy-controlled media used solely by the object, and ensuring that it is made private again. """ + page_ct = ContentType.objects.get_for_model(Page) + draft_page_ids = {str(pk) for pk in Page.objects.filter(live=False).values_list("id", flat=True)} for model_class in get_private_media_models(): model_ct = ContentType.objects.get_for_model(model_class) - referenced_pks = ( - ReferenceIndex.get_references_for_object(instance) - .filter(to_content_type=model_ct) + references = ReferenceIndex.get_references_for_object(instance).filter(to_content_type=model_ct) + referenced_pks = set(references.values_list("to_object_id", flat=True).distinct()) + if not referenced_pks: + continue + referenced_by_other_live_object_pks = set( + ReferenceIndex.objects.filter(to_content_type=model_ct, to_object_id__in=referenced_pks) + .exclude(pk__in=references.values_list("pk", flat=True)) + .exclude(base_content_type=page_ct, object_id__in=draft_page_ids) .annotate(int_object_id=Cast("to_object_id", output_field=IntegerField())) .values_list("int_object_id", flat=True) - .distinct() ) queryset = ( - model_class.objects.filter(pk__in=referenced_pks) - .alias( - referenced_by_other_pages=Exists( - ReferenceIndex.objects.filter(to_content_type=model_ct, to_object_id=OuterRef("pk")).exclude( - content_type=ContentType.objects.get_for_model(instance), object_id=instance.pk - ) - ) - ) - .filter(referenced_by_other_pages=False) + model_class.objects.all() + .filter(pk__in=[int(pk) for pk in referenced_pks]) + .exclude(pk__in=referenced_by_other_live_object_pks) ) if issubclass(model_class, PrivateImageMixin): queryset = queryset.prefetch_related("renditions") From ea7466c597cf5c971093c425c13cd81051752eec Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Mon, 2 Dec 2024 12:17:07 +0000 Subject: [PATCH 08/63] Placate linters --- cms/private_media/managers.py | 4 ++-- cms/private_media/models/documents.py | 2 +- cms/private_media/models/images.py | 2 +- cms/private_media/storages.py | 5 +++-- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/cms/private_media/managers.py b/cms/private_media/managers.py index cd7c6dd1..de3b2076 100644 --- a/cms/private_media/managers.py +++ b/cms/private_media/managers.py @@ -42,7 +42,7 @@ def bulk_set_privacy(self, objects: Iterable["PrivateMediaMixin"], intended_priv to_update = self.bulk_set_file_permissions(to_update, intended_privacy, save_changes=False) count = self.bulk_update( - to_update, + to_update, # type: ignore[arg-type] fields=["_privacy", "privacy_last_changed", "file_permissions_last_set"], ) @@ -97,7 +97,7 @@ def bulk_set_file_permissions( obj.file_permissions_last_set = now if save_changes: - self.bulk_update(objects, ["file_permissions_last_set"]) + self.bulk_update(objects, ["file_permissions_last_set"]) # type: ignore[arg-type] return objects diff --git a/cms/private_media/models/documents.py b/cms/private_media/models/documents.py index 76471b30..c43fe4b5 100644 --- a/cms/private_media/models/documents.py +++ b/cms/private_media/models/documents.py @@ -33,4 +33,4 @@ def get_privacy_controlled_serve_urls(self, sites: Iterable["Site"]) -> Iterator if not sites: return for site in sites: - yield site.root_url + self.url + yield site.root_url + self.url # type: ignore[attr-defined] diff --git a/cms/private_media/models/images.py b/cms/private_media/models/images.py index 14e9c07c..c3d7502d 100644 --- a/cms/private_media/models/images.py +++ b/cms/private_media/models/images.py @@ -53,7 +53,7 @@ def get_privacy_controlled_serve_urls(self, sites: Iterable["Site"]) -> Iterator """ if not sites: return - for rendition in self.renditions.all(): + for rendition in self.renditions.all(): # type: ignore[attr-defined] for site in sites: yield site.root_url + rendition.serve_url diff --git a/cms/private_media/storages.py b/cms/private_media/storages.py index bd525d75..b2525145 100644 --- a/cms/private_media/storages.py +++ b/cms/private_media/storages.py @@ -1,9 +1,8 @@ import logging from typing import TYPE_CHECKING -from django.core.files.storage import FileSystemStorage - from botocore.exceptions import ClientError +from django.core.files.storage import FileSystemStorage from storages.backends.s3 import S3Storage if TYPE_CHECKING: @@ -45,7 +44,9 @@ def _set_file_acl(self, file: "FieldFile", acl_name: str) -> bool: class DummyPrivacySettingFileSystemStorage(FileSystemStorage): def make_private(self, file: "FieldFile") -> bool: # pylint: disable=unused-argument + """Pretend to make the provided file private.""" return True def make_public(self, file: "FieldFile") -> bool: # pylint: disable=unused-argument + """Pretend to make the provided file public.""" return True From 592807ff4f45dfdd40a564b6bb638926c6b68a6d Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Mon, 2 Dec 2024 12:16:55 +0000 Subject: [PATCH 09/63] Add tests --- cms/private_media/tests/__init__.py | 0 cms/private_media/tests/test_documents.py | 311 +++++++++++++++ cms/private_media/tests/test_images.py | 374 ++++++++++++++++++ .../tests/test_management_commands.py | 72 ++++ .../tests/test_signal_handlers.py | 133 +++++++ cms/private_media/tests/utils.py | 13 + 6 files changed, 903 insertions(+) create mode 100644 cms/private_media/tests/__init__.py create mode 100644 cms/private_media/tests/test_documents.py create mode 100644 cms/private_media/tests/test_images.py create mode 100644 cms/private_media/tests/test_management_commands.py create mode 100644 cms/private_media/tests/test_signal_handlers.py create mode 100644 cms/private_media/tests/utils.py diff --git a/cms/private_media/tests/__init__.py b/cms/private_media/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cms/private_media/tests/test_documents.py b/cms/private_media/tests/test_documents.py new file mode 100644 index 00000000..e854b8e1 --- /dev/null +++ b/cms/private_media/tests/test_documents.py @@ -0,0 +1,311 @@ +from unittest import mock + +from django.contrib.auth import get_user_model +from django.test import SimpleTestCase, TestCase, override_settings +from wagtail.documents import get_document_model +from wagtail.models import Collection, Site +from wagtail_factories import DocumentFactory, SiteFactory + +from cms.private_media.constants import Privacy +from cms.private_media.managers import PrivateDocumentManager +from cms.private_media.models import PrivateDocumentMixin + +from .utils import PURGED_URLS + + +class TestModelConfiguration(SimpleTestCase): + def test_document_model_using_private_document_mixin(self): + """Verify that the document model correctly inherits from PrivateDocumentMixin. + And that the manager is a subclass of PrivateDocumentManager. + """ + document_model = get_document_model() + self.assertTrue(issubclass(document_model, PrivateDocumentMixin)) + self.assertIsInstance(document_model.objects, PrivateDocumentManager) + + +class TestDocumentModel(TestCase): + @classmethod + def setUpTestData(cls): + cls.root_collection = Collection.objects.get(depth=1) + + def test_private_document(self): + """Test the behaviour of private documents: + - Verify default privacy settings. + - Test privacy change tracking. + - Ensure file permission updates are handled correctly. + - Check file permission outdated status. + """ + with self.assertLogs("cms.private_media.bulk_operations", level="DEBUG") as logs: + document = DocumentFactory(collection=self.root_collection) + + # Documents should be 'private' by default + self.assertIs(document.privacy, Privacy.PRIVATE) + self.assertTrue(document.is_private) + self.assertFalse(document.is_public) + + # Attempts to set file permissions on save should have failed gracefully, + # since the default file backend doesn't support it + self.assertEqual( + logs.output, + [ + ( + "DEBUG:cms.private_media.bulk_operations:FileSystemStorage does not support setting of individual " + f"file permissions to private, so skipping for: {document.file.name}." + ) + ], + ) + + # File permissions should be considered up-to-date + self.assertFalse(document.file_permissions_are_outdated()) + + # Setting privacy to the same value should not trigger an update to 'privacy_last_changed + value_before = document.privacy_last_changed + document.privacy = Privacy.PRIVATE + self.assertEqual(document.privacy_last_changed, value_before) + + # Setting privacy to a different value should triggered an update to 'privacy_last_changed' + document.privacy = Privacy.PUBLIC + self.assertGreater(document.privacy_last_changed, value_before) + + # File permissions should now be considered outdated + self.assertTrue(document.file_permissions_are_outdated()) + + # Resaving should trigger an update to file permissions and the 'file_permissions_last_set' + # timestamp, resolving the issue + document.save() + self.assertFalse(document.file_permissions_are_outdated()) + + def test_public_document(self): + """Test the behaviour of public documents: + - Verify public privacy settings. + - Test privacy change tracking. + - Ensure file permission updates are handled correctly. + - Check file permission outdated status. + """ + with self.assertLogs("cms.private_media.bulk_operations", level="DEBUG") as logs: + document = DocumentFactory(_privacy=Privacy.PUBLIC, collection=self.root_collection) + + # This document should be 'public' + self.assertIs(document.privacy, Privacy.PUBLIC) + self.assertTrue(document.is_public) + self.assertFalse(document.is_private) + + # Attempts to set file permissions on save should have failed gracefully, + # since the default file backend doesn't support it + self.assertEqual( + logs.output, + [ + ( + "DEBUG:cms.private_media.bulk_operations:FileSystemStorage does not support setting of individual " + f"file permissions to public, so skipping for: {document.file.name}." + ) + ], + ) + + # File permissions should be considered up-to-date + self.assertFalse(document.file_permissions_are_outdated()) + + # Setting privacy to the same value should not trigger an update to 'privacy_last_changed + value_before = document.privacy_last_changed + document.privacy = Privacy.PUBLIC + self.assertEqual(document.privacy_last_changed, value_before) + + # Setting privacy to a different value should triggered an update to 'privacy_last_changed' + document.privacy = Privacy.PRIVATE + self.assertGreater(document.privacy_last_changed, value_before) + + # File permissions should now be considered outdated + self.assertTrue(document.file_permissions_are_outdated()) + + # Resaving should trigger an update to file permissions and the 'file_permissions_last_set' + # timestamp, resolving the issue + document.save() + self.assertFalse(document.file_permissions_are_outdated()) + + def test_invalid_privacy_value_raises_value_error(self): + """Verify that attempting to create a document with an invalid privacy value raises ValueError.""" + with self.assertRaises(ValueError): + DocumentFactory(_privacy="invalid", collection=self.root_collection) + + def test_get_privacy_controlled_serve_urls(self): + """Test the behaviour of PrivateImageMixin.get_privacy_controlled_serve_urls.""" + default_site = Site.objects.get(is_default_site=True) + site_2 = SiteFactory(hostname="foo.com", port=443) + sites = [default_site, site_2] + + document = DocumentFactory(collection=self.root_collection) + self.assertEqual( + list(document.get_privacy_controlled_serve_urls(sites)), + [ + f"http://localhost{document.url}", + f"https://foo.com{document.url}", + ], + ) + + # If no sites are provided, the result should be empty + with self.assertNumQueries(0): + self.assertFalse(list(document.get_privacy_controlled_serve_urls([]))) + + def test_get_privacy_controlled_file_urls(self): + """Test the behaviour of PrivateImageMixin.get_privacy_controlled_file_urls.""" + document = DocumentFactory(collection=self.root_collection) + + # FileSystemStorage returns relative URLs for files only, so the return value should be empty + self.assertFalse(list(document.get_privacy_controlled_file_urls())) + + # However, if the storage backend returns fully-fledged URLs, those values should be + # included in the return value + def domain_prefixed_url(name: str) -> str: + """Replacement for FileSystemStorage.url() that returns fully-fledged URLs.""" + return f"https://media.example.com/{name}" + + with mock.patch("django.core.files.storage.FileSystemStorage.url", side_effect=domain_prefixed_url): + self.assertEqual( + list(document.get_privacy_controlled_file_urls()), + [ + f"https://media.example.com/{document.file.name}", + ], + ) + + @override_settings( + STORAGES={"default": {"BACKEND": "cms.private_media.storages.DummyPrivacySettingFileSystemStorage"}} + ) + def test_file_permission_setting_success(self): + """Test successful file permission setting using DummyPrivacySettingFileSystemStorage: + - Verify permissions are set correctly for both public and private documents. + - Ensure no debug logs are generated during successful operation. + """ + with self.assertNoLogs("cms.private_media.bulk_operations", level="DEBUG"): + private_document = DocumentFactory(collection=self.root_collection) + public_document = DocumentFactory(_privacy=Privacy.PUBLIC, collection=self.root_collection) + + self.assertFalse(private_document.file_permissions_are_outdated()) + self.assertFalse(public_document.file_permissions_are_outdated()) + + @override_settings( + STORAGES={"default": {"BACKEND": "cms.private_media.storages.DummyPrivacySettingFileSystemStorage"}} + ) + @mock.patch("cms.private_media.storages.DummyPrivacySettingFileSystemStorage.make_private", return_value=False) + @mock.patch("cms.private_media.storages.DummyPrivacySettingFileSystemStorage.make_public", return_value=False) + def test_file_permission_setting_failure(self, mock_make_public, mock_make_private): + """Test handling of file permission setting failures: + - Mock failed permission settings for both public and private documents. + - Verify documents are marked as having outdated permissions. + - Ensure correct storage methods are called. + """ + with self.assertNoLogs("cms.private_media.bulk_operations", level="DEBUG"): + private_document = DocumentFactory(collection=self.root_collection) + public_document = DocumentFactory(_privacy=Privacy.PUBLIC, collection=self.root_collection) + + mock_make_private.assert_called_once_with(private_document.file) + self.assertTrue(private_document.file_permissions_are_outdated()) + mock_make_public.assert_called_once_with(public_document.file) + self.assertTrue(public_document.file_permissions_are_outdated()) + + +@override_settings( + WAGTAILFRONTENDCACHE={ + "default": { + "BACKEND": "cms.private_media.tests.utils.MockFrontEndCacheBackend", + }, + } +) +class TestPrivateDocumentManager(TestCase): + model = get_document_model() + + @classmethod + def setUpTestData(cls): + cls.root_collection = Collection.objects.get(depth=1) + + def setUp(self): + # Create six documents (a mix of private and public) + self.private_documents = [] + self.public_documents = [] + for _ in range(3): + self.private_documents.append(DocumentFactory(collection=self.root_collection)) + self.public_documents.append(DocumentFactory(_privacy=Privacy.PUBLIC, collection=self.root_collection)) + PURGED_URLS.clear() + + def test_bulk_make_public(self): + """Test the behaviour of PrivateDocumentManager.bulk_make_public().""" + # Three documents are already public, so only three should be updated + with self.assertNumQueries(3): + # Query summary: + # 1. to fetch the documents + # 2. to save updates + # 3. to fetch sites to facilitate cache purging + self.assertEqual(self.model.objects.bulk_make_public(self.model.objects.all()), 3) + + # Serve URLs for private documents should have been purged as part of the update + for obj in self.private_documents: + self.assertIn("http://localhost" + obj.url, PURGED_URLS) + + # Verify all images are now public + for obj in self.model.objects.only("_privacy", "file_permissions_last_set", "privacy_last_changed"): + self.assertIs(obj.privacy, Privacy.PUBLIC) + self.assertFalse(obj.file_permissions_are_outdated()) + + # Another attempt should result in no updates + with self.assertNumQueries(1): + self.assertEqual(self.model.objects.bulk_make_public(self.model.objects.all()), 0) + + def test_bulk_make_private(self): + """Test the behaviour of PrivateDocumentManager.bulk_make_private().""" + # Three images are already private, so only three should be updated + with self.assertNumQueries(3): + # Query summary: + # 1. to fetch the documents + # 2. to save updates + # 3. to fetch sites to facilitate cache purging + self.assertEqual(self.model.objects.bulk_make_private(self.model.objects.all()), 3) + + # Serve URLs for public documents should have been purged as part of the update + for obj in self.public_documents: + self.assertIn("http://localhost" + obj.url, PURGED_URLS) + + # Verify all images are now private + for image in self.model.objects.only("_privacy", "file_permissions_last_set", "privacy_last_changed"): + self.assertIs(image.privacy, Privacy.PRIVATE) + self.assertFalse(image.file_permissions_are_outdated()) + + # Another attempt should result in no updates + with self.assertNumQueries(1): + # Query summary: + # 1. to fetch the documents + self.assertEqual(self.model.objects.bulk_make_private(self.model.objects.all()), 0) + + +class TestDocumentServeView(TestCase): + model = get_document_model() + + @classmethod + def setUpTestData(cls): + cls.root_collection = Collection.objects.get(depth=1) + cls.private_document = DocumentFactory(collection=cls.root_collection) + cls.public_document = DocumentFactory(_privacy=Privacy.PUBLIC, collection=cls.root_collection) + cls.superuser = get_user_model().objects.create(username="superuser", is_superuser=True) + + def test_private_document(self): + """Test the serve view behaviour for private documents.""" + # If not authenticated, permission checks should fail and a Forbidden response returned + response = self.client.get(self.private_document.url) + self.assertEqual(response.status_code, 403) + + # If authenticated as a superuser, the view should serve the files + self.client.force_login(self.superuser) + response = self.client.get(self.private_document.url) + self.assertEqual(response.status_code, 200) + + def test_public_document(self): + """Test the serve view behaviour for public documents.""" + # For public documents, the serve view should redirect to the file URL. + response = self.client.get(self.public_document.url) + self.assertEqual(response.status_code, 200) + + # This remains the same for public documents, regardless of whether + # document.file_permissions_are_outdated() returns True + self.model.objects.filter(id=self.public_document.id).update(file_permissions_last_set=None) + self.public_document.refresh_from_db() + self.assertTrue(self.public_document.file_permissions_are_outdated()) + response = self.client.get(self.public_document.url) + self.assertEqual(response.status_code, 200) diff --git a/cms/private_media/tests/test_images.py b/cms/private_media/tests/test_images.py new file mode 100644 index 00000000..306ef9c8 --- /dev/null +++ b/cms/private_media/tests/test_images.py @@ -0,0 +1,374 @@ +from unittest import mock + +from django.contrib.auth import get_user_model +from django.test import SimpleTestCase, TestCase, override_settings +from wagtail.images import get_image_model +from wagtail.images.models import Filter +from wagtail.models import Collection, Site +from wagtail_factories import ImageFactory, SiteFactory + +from cms.private_media.constants import Privacy +from cms.private_media.managers import PrivateImageManager +from cms.private_media.models import AbstractPrivateRendition, PrivateImageMixin + +from .utils import PURGED_URLS + + +class TestModelConfiguration(SimpleTestCase): + def test_image_model_using_private_image_mixin(self): + """Verify that the configured image model inherits from PrivateImageMixin.""" + image_model = get_image_model() + self.assertTrue(issubclass(image_model, PrivateImageMixin)) + self.assertIsInstance(image_model.objects, PrivateImageManager) + + def test_rendition_model_using_abstract_private_rendition(self): + """Verify that the configured rendition model inherits from AbstractPrivateRendition.""" + rendition_model = get_image_model().get_rendition_model() + self.assertTrue(issubclass(rendition_model, AbstractPrivateRendition)) + + +class TestImageModel(TestCase): + @classmethod + def setUpTestData(cls): + cls.root_collection = Collection.objects.get(depth=1) + + def test_private_image(self): + """Test the behaviour of private images: + - Verify that images are created as PRIVATE by default. + - Verify that file permission states are updated on save to reflect image privacy. + - Verify that privacy change tracking works correctly. + - Graceful handling of unsupported storage backends. + """ + # Attempts to set file permissions on save should have failed gracefully, + # since the default file backend doesn't support it + with self.assertLogs("cms.private_media.bulk_operations", level="DEBUG") as logs: + image = ImageFactory(collection=self.root_collection) + + # Images should be 'private' by default + self.assertIs(image.privacy, Privacy.PRIVATE) + self.assertTrue(image.is_private) + self.assertFalse(image.is_public) + + # Attempts to set file permissions on save should have failed gracefully + self.assertEqual( + logs.output, + [ + ( + "DEBUG:cms.private_media.bulk_operations:FileSystemStorage does not support setting of individual " + f"file permissions to private, so skipping for: {image.file.name}." + ) + ], + ) + + # File permissions should be considered up-to-date + self.assertFalse(image.file_permissions_are_outdated()) + + # Setting privacy to the same value should not trigger an update to 'privacy_last_changed + value_before = image.privacy_last_changed + image.privacy = Privacy.PRIVATE + self.assertEqual(image.privacy_last_changed, value_before) + + # Setting privacy to a different value should triggered an update to 'privacy_last_changed' + image.privacy = Privacy.PUBLIC + self.assertGreater(image.privacy_last_changed, value_before) + + # File permissions should now be considered outdated + self.assertTrue(image.file_permissions_are_outdated()) + + # Resaving should trigger an update to file permissions and the 'file_permissions_last_set' + # timestamp, resolving the issue + image.save() + self.assertFalse(image.file_permissions_are_outdated()) + + def test_private_image_renditions(self): + """Test that private image renditions use serve URLs instead of direct file URLs.""" + image = ImageFactory(collection=self.root_collection) + renditions = image.create_renditions(Filter("fill-10x10"), Filter("fill-20x20")) + self.assertEqual(len(renditions), 2) + + # For private images, rendition.url should return a serve URL + for rendition in renditions.values(): + self.assertEqual(rendition.url, rendition.serve_url) + # And cache keys should include 'private' + self.assertIn("-private-", rendition.get_cache_key()) + + def test_public_image(self): + """Test the behaviour of public images. + - Verify that images can be created as PUBLIC. + - Verify that file permission states are updated on save to reflect image privacy. + - Verify that privacy change tracking works correctly. + - Graceful handling of unsupported storage backends. + """ + # Attempts to set file permissions on save should have failed gracefully, + # since the default file backend doesn't support it + with self.assertLogs("cms.private_media.bulk_operations", level="DEBUG") as logs: + image = ImageFactory(_privacy=Privacy.PUBLIC) + + # This image should be 'public' + self.assertIs(image.privacy, Privacy.PUBLIC) + self.assertTrue(image.is_public) + self.assertFalse(image.is_private) + + # Attempts to set file permissions on save should have failed gracefully + self.assertEqual( + logs.output, + [ + ( + "DEBUG:cms.private_media.bulk_operations:FileSystemStorage does not support setting of individual " + f"file permissions to public, so skipping for: {image.file.name}." + ) + ], + ) + + # File permissions should be considered up-to-date + self.assertFalse(image.file_permissions_are_outdated()) + + # Setting privacy to the same value should not trigger an update to 'privacy_last_changed + value_before = image.privacy_last_changed + image.privacy = Privacy.PUBLIC + self.assertEqual(image.privacy_last_changed, value_before) + + # Setting privacy to a different value should triggered an update to 'privacy_last_changed' + image.privacy = Privacy.PRIVATE + self.assertGreater(image.privacy_last_changed, value_before) + + # File permissions should now be considered outdated + self.assertTrue(image.file_permissions_are_outdated()) + + # Resaving should trigger an update to file permissions and the 'file_permissions_last_set' + # timestamp, resolving the issue + image.save() + self.assertFalse(image.file_permissions_are_outdated()) + + def test_public_image_renditions(self): + """Test rendition.url behaviour for public image renditions: + - Public images with up-to-date permissions use direct file URLs. + - Falls back to serve URLs when permissions are outdated. + """ + image = ImageFactory(_privacy=Privacy.PUBLIC, collection=self.root_collection) + renditions = image.create_renditions(Filter("fill-10x10"), Filter("fill-20x20")) + self.assertEqual(len(renditions), 2) + + # For public images with up-to-date file permissions, rendition.url should return the file url + for rendition in renditions.values(): + self.assertEqual(rendition.url, rendition.file.url) + # And cache keys should include 'public' + self.assertIn("-public-", rendition.get_cache_key()) + + # However, if the file permissions are outdated, rendition.url should return a serve URL + with mock.patch("cms.private_media.models.PrivateMediaMixin.file_permissions_are_outdated", return_value=True): + for rendition in renditions.values(): + self.assertEqual(rendition.url, rendition.serve_url) + + def test_get_privacy_controlled_serve_urls(self): + """Test the behaviour of PrivateImageMixin.get_privacy_controlled_serve_urls.""" + default_site = Site.objects.get(is_default_site=True) + site_2 = SiteFactory(hostname="foo.com", port=443) + sites = [default_site, site_2] + + # If an image has no renditions, the result should be empty + image = ImageFactory(collection=self.root_collection) + self.assertFalse(list(image.get_privacy_controlled_serve_urls(sites))) + + # If an image has renditions, the result should be a list of serve URLs, prefixed + # with the root url for each supplied site + renditions = image.create_renditions(Filter("fill-10x10"), Filter("fill-20x20")) + expected_result = [] + for rendition in renditions.values(): + expected_result.extend( + [ + f"http://localhost{rendition.url}", + f"https://foo.com{rendition.url}", + ] + ) + with self.assertNumQueries(1): + self.assertEqual(list(image.get_privacy_controlled_serve_urls(sites)), expected_result) + + # If no sites are provided, the result should be empty + with self.assertNumQueries(0): + self.assertFalse(list(image.get_privacy_controlled_serve_urls([]))) + + def test_get_privacy_controlled_file_urls(self): + """Test the behaviour of PrivateImageMixin.get_privacy_controlled_file_urls.""" + image = ImageFactory(collection=self.root_collection) + renditions = image.create_renditions(Filter("fill-10x10"), Filter("fill-20x20")) + + # FileSystemStorage returns relative URLs for files only, so the return value should be empty, + # even if renditions exist + self.assertFalse(list(image.get_privacy_controlled_file_urls())) + + # However, if the storage backend returns fully-fledged URLs, those values should be + # included in the return value + + def domain_prefixed_url(name: str) -> str: + """Replacement for FileSystemStorage.url() that returns fully-fledged URLs.""" + return f"https://media.example.com/{name}" + + with mock.patch("django.core.files.storage.FileSystemStorage.url", side_effect=domain_prefixed_url): + expected_result = [f"https://media.example.com/{image.file.name}"] + for rendition in renditions.values(): + expected_result.append(f"https://media.example.com/{rendition.file.name}") + self.assertEqual(list(image.get_privacy_controlled_file_urls()), expected_result) + + def test_invalid_privacy_value_raises_value_error(self): + """Verify that invalid privacy values raise a ValueError.""" + with self.assertRaises(ValueError): + ImageFactory(_privacy="invalid", collection=self.root_collection) + + @override_settings( + STORAGES={"default": {"BACKEND": "cms.private_media.storages.DummyPrivacySettingFileSystemStorage"}} + ) + def test_file_permission_setting_success(self): + """Test successful file permission setting using a storage backend that supports it.""" + with self.assertNoLogs("cms.private_media.bulk_operations", level="DEBUG"): + private_image = ImageFactory(collection=self.root_collection) + public_image = ImageFactory(_privacy=Privacy.PUBLIC, collection=self.root_collection) + + self.assertFalse(private_image.file_permissions_are_outdated()) + self.assertFalse(public_image.file_permissions_are_outdated()) + + @override_settings( + STORAGES={"default": {"BACKEND": "cms.private_media.storages.DummyPrivacySettingFileSystemStorage"}} + ) + @mock.patch("cms.private_media.storages.DummyPrivacySettingFileSystemStorage.make_private", return_value=False) + @mock.patch("cms.private_media.storages.DummyPrivacySettingFileSystemStorage.make_public", return_value=False) + def test_file_permission_setting_failure(self, mock_make_public, mock_make_private): + """Test graceful handling of file permission setting failures. + + Verifies that the system correctly tracks failed permission updates and + maintains the outdated state when storage operations fail. + """ + with self.assertNoLogs("cms.private_media.bulk_operations", level="DEBUG"): + private_image = ImageFactory(collection=self.root_collection) + public_image = ImageFactory(_privacy=Privacy.PUBLIC, collection=self.root_collection) + + mock_make_private.assert_called_once_with(private_image.file) + self.assertTrue(private_image.file_permissions_are_outdated()) + mock_make_public.assert_called_once_with(public_image.file) + self.assertTrue(public_image.file_permissions_are_outdated()) + + +@override_settings( + WAGTAILFRONTENDCACHE={ + "default": { + "BACKEND": "cms.private_media.tests.utils.MockFrontEndCacheBackend", + }, + } +) +class TestPrivateImageManager(TestCase): + model = get_image_model() + + @classmethod + def setUpTestData(cls): + cls.root_collection = Collection.objects.get(depth=1) + + def setUp(self): + # Create six images with renditions (a mix of private and public) + self.private_images = [] + self.public_images = [] + for _ in range(3): + private_image = ImageFactory(collection=self.root_collection) + public_image = ImageFactory(_privacy=Privacy.PUBLIC, collection=self.root_collection) + private_image.create_renditions(Filter("fill-10x10"), Filter("fill-20x20")) + public_image.create_renditions(Filter("fill-10x10"), Filter("fill-20x20")) + self.private_images.append(private_image) + self.public_images.append(public_image) + PURGED_URLS.clear() + + def test_bulk_make_public(self): + """Test the behaviour of PrivateImageManager.bulk_make_public().""" + # Three image are already public, so only three should be updated + qs = self.model.objects.all().prefetch_related("renditions") + with self.assertNumQueries(4): + # Query summary: + # 1. to fetch the images + # 2. to prefetch renditions + # 3. to save updates + # 4. to fetch sites to facilitate cache purging + self.assertEqual(self.model.objects.bulk_make_public(qs), 3) + + # Serve URLs for private image renditions should have been purged as part of the update + for obj in self.private_images: + for rendition in obj.renditions.all(): + self.assertIn("http://localhost" + rendition.url, PURGED_URLS) + + # Verify all images are now public + for obj in self.model.objects.only("_privacy", "file_permissions_last_set", "privacy_last_changed"): + self.assertIs(obj.privacy, Privacy.PUBLIC) + self.assertFalse(obj.file_permissions_are_outdated()) + + # Another attempt should result in no updates + with self.assertNumQueries(1): + # Query summary: + # 1. to fetch the images + self.assertEqual(self.model.objects.bulk_make_public(self.model.objects.all()), 0) + + def test_bulk_make_private(self): + """Test the behaviour of PrivateImageManager.bulk_make_private().""" + # Three images are already private, so only three should be updated + qs = self.model.objects.all().prefetch_related("renditions") + with self.assertNumQueries(4): + # Query summary: + # 1. to fetch the images + # 2. to prefetch renditions + # 3. to save updates + # 4. to fetch sites to facilitate cache purging + self.assertEqual(self.model.objects.bulk_make_private(qs), 3) + + # Serve URLs for public image renditions should have been purged as part of the update + for obj in self.public_images: + for rendition in obj.renditions.all(): + self.assertIn("http://localhost" + rendition.serve_url, PURGED_URLS) + + # Verify all images are now private + for image in self.model.objects.only("_privacy", "file_permissions_last_set", "privacy_last_changed"): + self.assertIs(image.privacy, Privacy.PRIVATE) + self.assertFalse(image.file_permissions_are_outdated()) + + # Another attempt should result in no updates + with self.assertNumQueries(1): + # Query summary: + # 1. to fetch the images + self.assertEqual(self.model.objects.bulk_make_private(self.model.objects.all()), 0) + + +class TestImageServeView(TestCase): + model = get_image_model() + + @classmethod + def setUpTestData(cls): + cls.root_collection = Collection.objects.get(depth=1) + cls.private_image = ImageFactory(collection=cls.root_collection) + cls.public_image = ImageFactory(_privacy=Privacy.PUBLIC, collection=cls.root_collection) + cls.private_image_renditions = cls.private_image.create_renditions(Filter("fill-10x10"), Filter("fill-20x20")) + cls.public_image_renditions = cls.public_image.create_renditions(Filter("fill-10x10"), Filter("fill-20x20")) + cls.superuser = get_user_model().objects.create(username="superuser", is_superuser=True) + + def test_private_image(self): + """Test the serve view behaviour for private image renditions.""" + # If not authenticated, permission checks should fail and a Forbidden response returned + for rendition in self.private_image_renditions.values(): + response = self.client.get(rendition.serve_url) + self.assertEqual(response.status_code, 403) + + # If authenticated as a superuser, the view should serve the files + self.client.force_login(self.superuser) + for rendition in self.private_image_renditions.values(): + response = self.client.get(rendition.serve_url) + self.assertEqual(response.status_code, 200) + + def test_public_image(self): + """Test the serve view behaviour for public image renditions.""" + # For public image renditions, the serve view should redirect to the file URL. + for rendition in self.public_image_renditions.values(): + response = self.client.get(rendition.serve_url) + self.assertEqual(response.status_code, 302) + + # That is, unless the image.file_permissions_are_outdated() returns True, + # In which case, the view will continue to serve the file directly + self.model.objects.filter(id=self.public_image.id).update(file_permissions_last_set=None) + self.public_image.refresh_from_db() + self.assertTrue(self.public_image.file_permissions_are_outdated()) + for rendition in self.public_image_renditions.values(): + response = self.client.get(rendition.serve_url) + self.assertEqual(response.status_code, 200) diff --git a/cms/private_media/tests/test_management_commands.py b/cms/private_media/tests/test_management_commands.py new file mode 100644 index 00000000..8751e200 --- /dev/null +++ b/cms/private_media/tests/test_management_commands.py @@ -0,0 +1,72 @@ +from datetime import timedelta +from io import StringIO + +from django.core.management import call_command +from django.test import TestCase +from django.utils import timezone +from wagtail.documents import get_document_model +from wagtail.images import get_image_model +from wagtail_factories import DocumentFactory, ImageFactory + +from cms.private_media.constants import Privacy + + +class RetryFilePermissionSetAttemptsCommandTests(TestCase): + command_name = "retry_file_permission_set_attempts" + + def setUp(self): + ImageFactory() + ImageFactory(_privacy=Privacy.PUBLIC) + DocumentFactory() + DocumentFactory(_privacy=Privacy.PUBLIC) + up_to_date_image = ImageFactory() + up_to_date_document = DocumentFactory() + + # Adjust `file_permissions_last_set` for all items but the 'up-to-date' ones + one_day_ago = timezone.now() - timedelta(days=1) + get_image_model().objects.exclude(id=up_to_date_image.id).update(file_permissions_last_set=one_day_ago) + get_document_model().objects.exclude(id=up_to_date_document.id).update(file_permissions_last_set=one_day_ago) + + def call_command(self, command_name: str, *args, **kwargs): + out = StringIO() + call_command( + command_name, + *args, + stdout=out, + stderr=StringIO(), + **kwargs, + ) + return out.getvalue() + + def test_default(self): + with self.assertNumQueries(7): + # Query summary: + # 1. Fetch outdated documents + # 2. Update 'file_permissions_last_set' for outdated private documents + # 3. Update 'file_permissions_last_set' for outdated public documents + # 4. Fetch outdated images + # 5. Fetch renditions for those images + # 6. Update 'file_permissions_last_set' for outdated private images + # 7. Update 'file_permissions_last_set' for outdated public images + output = self.call_command(self.command_name) + + self.assertIn("2 CustomDocument instances have outdated file permissions", output) + self.assertIn("File permissions successfully updated for 1 private custom documents.", output) + self.assertIn("File permissions successfully updated for 1 public custom documents.", output) + self.assertIn("2 CustomImage instances have outdated file permissions", output) + self.assertIn("File permissions successfully updated for 1 private custom images.", output) + self.assertIn("File permissions successfully updated for 1 public custom images.", output) + + def test_dry_run(self): + with self.assertNumQueries(2): + # Query summary: + # 1. Fetch outdated documents + # 2. Fetch outdated images + output = self.call_command(self.command_name, "--dry-run") + self.assertIn("This is a dry run.", output) + self.assertIn("2 CustomDocument instances have outdated file permissions", output) + self.assertIn("Would update file permissions for 1 private custom documents.", output) + self.assertIn("Would update file permissions for 1 public custom documents.", output) + self.assertIn("2 CustomImage instances have outdated file permissions", output) + self.assertIn("Would update file permissions for 1 private custom images.", output) + self.assertIn("Would update file permissions for 1 public custom images.", output) diff --git a/cms/private_media/tests/test_signal_handlers.py b/cms/private_media/tests/test_signal_handlers.py new file mode 100644 index 00000000..b82aa359 --- /dev/null +++ b/cms/private_media/tests/test_signal_handlers.py @@ -0,0 +1,133 @@ +from typing import Any + +from django.test import TestCase +from wagtail.models import Site +from wagtail_factories import DocumentFactory, ImageFactory + +from cms.private_media.constants import Privacy +from cms.standard_pages.models import InformationPage + + +class SignalHandlersTestCase(TestCase): + @classmethod + def setUpTestData(cls): + cls.site = Site.objects.select_related("root_page").get() + cls.home_page = cls.site.root_page + cls.test_page = cls.make_information_page([]) + + @classmethod + def make_information_page(cls, content: list[dict[str, Any]]) -> InformationPage: + """Creates an information page with the provided content value and returns it.""" + instance = InformationPage(title="Test Information Page", summary="Test", content=content) + cls.home_page.add_child(instance=instance) + return instance + + def setUp(self): + self.private_image = ImageFactory() + self.public_image = ImageFactory(_privacy=Privacy.PUBLIC) + self.private_document = DocumentFactory() + self.public_document = DocumentFactory(_privacy=Privacy.PUBLIC) + + def generate_non_media_referencing_content(self) -> list[dict[str, Any]]: + """Returns a raw StreamField value that includes a heading.""" + return [{"type": "heading", "value": "Test heading"}] + + def generate_media_referencing_content(self) -> list[dict[str, Any]]: + """Returns a raw StreamField value that includes image and document blocks that + reference all media created in setUp(). + """ + return [ + {"type": "heading", "value": "Test heading"}, + {"type": "image", "value": self.private_image.id}, + {"type": "image", "value": self.public_image.id}, + { + "type": "documents", + "value": [ + { + "type": "document", + "value": {"document": self.private_document.id, "title": "Private document", "description": ""}, + }, + { + "type": "document", + "value": {"document": self.public_document.id, "title": "Public document", "description": ""}, + }, + ], + }, + ] + + def assertMediaPrivacy(self, expected_privacy: Privacy): # pylint: disable=invalid-name + """Checks that all media created in setup() has the expected privacy, after reloading field + data from the database. + """ + self.private_image.refresh_from_db() + self.assertEqual(self.private_image.privacy, expected_privacy) + self.public_image.refresh_from_db() + self.assertEqual(self.public_image.privacy, expected_privacy) + self.private_document.refresh_from_db() + self.assertEqual(self.private_document.privacy, expected_privacy) + self.public_document.refresh_from_db() + self.assertEqual(self.public_document.privacy, expected_privacy) + + def test_publishing_a_page_makes_referenced_media_public(self): + """Tests the impact of publishing and unpublishing a page has on the privacy of referenced media.""" + self.test_page.content = self.generate_media_referencing_content() + + # Publishing the revision should result in all referenced media becoming public + self.test_page.save_revision().publish() + self.assertMediaPrivacy(Privacy.PUBLIC) + + def test_unpublishing_a_page_makes_referenced_media_private(self): + """Tests the impact of unpublishing a page has on the privacy of referenced media.""" + self.test_page.content = self.generate_media_referencing_content() + self.test_page.live = True + self.test_page.save(update_fields=["content", "live"]) + + # Unpublishing the page should result in all referenced media becoming private, + # as it isn't referenced by anything else + self.test_page.unpublish() + self.assertMediaPrivacy(Privacy.PRIVATE) + + def test_unpublishing_a_page_without_references_to_media(self): + """Tests the impact of unpublishing a page that doesn't reference any media.""" + self.test_page.content = self.generate_non_media_referencing_content() + self.test_page.live = True + self.test_page.save(update_fields=["content", "live"]) + + self.test_page.unpublish() + + def test_unpublishing_a_page_when_media_is_referenced_by_a_draft_page(self): + """Tests the impact of unpublishing a page that references media that is referenced by another draft page.""" + # Publish the first page, so that the reference index is populated, then unpublish it again + self.test_page.content = self.generate_media_referencing_content() + self.test_page.save_revision().publish() + self.test_page.unpublish() + + # The media should be private, since it has no other references + self.assertMediaPrivacy(Privacy.PRIVATE) + + # Create another page that references the media and publish it + new_page = self.make_information_page(content=self.test_page.content) + new_page.save_revision().publish() + + # The media should be public again, since it is referenced by the new page + self.assertMediaPrivacy(Privacy.PUBLIC) + + # Unpublishing the new page SHOULD result in referenced media becoming private, + # because it is only referenced by draft pages + new_page.unpublish() + self.assertMediaPrivacy(Privacy.PRIVATE) + + def test_unpublishing_a_page_when_media_is_referenced_by_a_live_page(self): + """Tests the impact of unpublishing a page that references media that is referenced by another live page.""" + # Publish the first page, with references to media + self.test_page.content = self.generate_media_referencing_content() + self.test_page.save_revision().publish() + + # Create another page that references the media and publish it + new_page = self.make_information_page(content=self.test_page.content) + new_page.save_revision().publish() + + # Unpublishing the new page SHOULD NOT result in referenced media becoming private, + # because it is still referenced by the original live page + new_page.unpublish() + self.assertMediaPrivacy(Privacy.PUBLIC) diff --git a/cms/private_media/tests/utils.py b/cms/private_media/tests/utils.py new file mode 100644 index 00000000..01aa3044 --- /dev/null +++ b/cms/private_media/tests/utils.py @@ -0,0 +1,13 @@ +from collections.abc import Iterable + +from wagtail.contrib.frontend_cache.backends import BaseBackend + +PURGED_URLS = [] + + +class MockFrontEndCacheBackend(BaseBackend): + def purge(self, url: str) -> None: + PURGED_URLS.append(url) + + def purge_batch(self, urls: Iterable[str]) -> None: + PURGED_URLS.extend(urls) From 1c34a508a0a35850a652c55a75879c3e92eed67a Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 3 Dec 2024 10:14:00 +0000 Subject: [PATCH 10/63] Use the warning log level for ACL setting issues --- cms/private_media/storages.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cms/private_media/storages.py b/cms/private_media/storages.py index b2525145..cc4074b5 100644 --- a/cms/private_media/storages.py +++ b/cms/private_media/storages.py @@ -30,12 +30,12 @@ def _set_file_acl(self, file: "FieldFile", acl_name: str) -> bool: try: obj_acl = obj.Acl() except ClientError as e: - logger.debug("Failed to retrieve ACL for %s: %s", file.name, repr(e)) + logger.warning("Failed to retrieve ACL for %s: %s", file.name, repr(e)) return False try: obj_acl.put(ACL=acl_name) except ClientError as e: - logger.debug("Failed to set ACL for %s: %s", file.name, repr(e)) + logger.warning("Failed to set ACL for %s: %s", file.name, repr(e)) return False logger.info("ACL set successfully for %s", file.name) From fb73cb3873d31233281b46f353738173f61271d1 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 3 Dec 2024 09:55:23 +0000 Subject: [PATCH 11/63] Add docs --- docs/custom-features/media_privacy.md | 34 +++++++++++++++++++++++++++ docs/index.md | 1 + 2 files changed, 35 insertions(+) create mode 100644 docs/custom-features/media_privacy.md diff --git a/docs/custom-features/media_privacy.md b/docs/custom-features/media_privacy.md new file mode 100644 index 00000000..600ad032 --- /dev/null +++ b/docs/custom-features/media_privacy.md @@ -0,0 +1,34 @@ +# Media privacy + +This project uses customised versions of Wagtail's `Image` and `Document` models to store and manage the +privacy of uploaded media files. + +1. The privacy of an image or document cannot be changed manually via the Wagtail UI. +2. Images and documents are private by default, and will only be served to authenticated users with some level of permission on the media item whilst they have this privacy status (so previewing from Wagtail should still work as expected). +3. Where the storage backend supports it, files for private images and documents (and any renditions created from the original image) will have their access permissions updated at the storage level, preventing direct file access. +4. Images and documents only become public when they are referenced by blocks in page content, and that page is published. +5. When a page is unpublished, any public images or documents referenced solely by that page are made private again (references from other draft pages are ignored, as the media can easily be made public again when the draft page is published). + +## Use of Wagtail's reference index + +Wagtail's reference index is used to track when images and documents (and other objects) are referenced by pages and other objects. + +Population of the reference index is automatically triggered by the `post_save` signal for all registered models. + +When publishing or unpublishing a page (or other type of 'publishable' object), object-level changes are saved before any `published`, `page_published`, `unpublished` or `page_unpublished` signals are emitted. So, as long as we use these signals to review the privacy +of referenced media, we can be sure that the reference index will be up-to-date. + +## Handling of file permission setting errors + +Whilst file-permission setting requests are quite reliable in S3, they can fail occasionally (as with any web service). + +Failed attempts are tracked using a `file_permissions_last_set` timestamp for each media item, which is only updated if all update requests for the media item were successful. + +The `retry_file_permission_set_attempts` management command seeks out any media items with an outdated timestamp and attempts to update the files again. + +The custom `PrivacySettingS3Storage` class logs failed file-setting attempts using the 'WARNING' log level, so should be available to review in most environments should issues occur. + +## Compatibility with upstream caching + +Because serve view responses and direct file urls can be cached, it's important that when the privacy of a media item changes, the cache +is invalidated for any affected URLs. In environments where Wagtail's front-end-cache app is configured, all relevant URLs should be collected into a batch and sent to the relevant backend to initiate purging. diff --git a/docs/index.md b/docs/index.md index 26f92252..e7b97ace 100644 --- a/docs/index.md +++ b/docs/index.md @@ -13,6 +13,7 @@ To work on this project, see the [README](../README.md). Any unique/custom features of note are listed here. Please keep this list updated as more features are added. - [Migration-friendly StreamFields](custom-features/migration_friendly_streamfields.md) +- [Media privacy](custom-features/media_privacy.md) ## External integrations From 92613b3f700d89c9f13d31e3081ac4cf0b379dff Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 3 Dec 2024 10:38:16 +0000 Subject: [PATCH 12/63] Revise model docstrings to account for changes --- cms/private_media/models/documents.py | 3 +-- cms/private_media/models/images.py | 2 +- cms/private_media/models/mixins.py | 6 +----- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/cms/private_media/models/documents.py b/cms/private_media/models/documents.py index c43fe4b5..3210cca7 100644 --- a/cms/private_media/models/documents.py +++ b/cms/private_media/models/documents.py @@ -12,8 +12,7 @@ class PrivateDocumentMixin(PrivateMediaMixin): """A mixin class to be applied to a project's custom Document model, - allowing the privacy to be controlled effectively, depending on the - collection the image belongs to. + allowing the privacy of related files to be controlled effectively. """ objects: ClassVar[PrivateDocumentManager] = PrivateDocumentManager() diff --git a/cms/private_media/models/images.py b/cms/private_media/models/images.py index c3d7502d..5ae0f5b5 100644 --- a/cms/private_media/models/images.py +++ b/cms/private_media/models/images.py @@ -17,7 +17,7 @@ class PrivateImageMixin(PrivateMediaMixin): """A mixin class to be applied to a project's custom Image model, - allowing the file privacy to be controlled effectively. + allowing the privacy of related files to be controlled effectively. """ objects: ClassVar[PrivateImageManager] = PrivateImageManager() diff --git a/cms/private_media/models/mixins.py b/cms/private_media/models/mixins.py index cc81e844..37c20107 100644 --- a/cms/private_media/models/mixins.py +++ b/cms/private_media/models/mixins.py @@ -17,13 +17,9 @@ class PrivateMediaMixin(models.Model): - """A mixin class for models that has files that need to remain private + """A mixin class for models that have files that need to remain private until the object itself is no longer private. - Subclasses must implement the `determine_privacy()` method, which should - return a `Privacy` value to determine the correct value for the `is_private` - field (and by extension, whether files should be made private or public). - Subclasses must implement the `get_privacy_controlled_files()` method, which should return an iterable of `FieldFile` objects that are managed by the model instance. From c73cd07df17c9b33e2cf8013266647df531c2e96 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 3 Dec 2024 10:52:07 +0000 Subject: [PATCH 13/63] Revise test docstrings in test_signal_handlers.py --- cms/private_media/tests/test_signal_handlers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cms/private_media/tests/test_signal_handlers.py b/cms/private_media/tests/test_signal_handlers.py index b82aa359..abe9655b 100644 --- a/cms/private_media/tests/test_signal_handlers.py +++ b/cms/private_media/tests/test_signal_handlers.py @@ -69,7 +69,7 @@ def assertMediaPrivacy(self, expected_privacy: Privacy): # pylint: disable=inva self.assertEqual(self.public_document.privacy, expected_privacy) def test_publishing_a_page_makes_referenced_media_public(self): - """Tests the impact of publishing and unpublishing a page has on the privacy of referenced media.""" + """Tests the impact of publishing a media-referencing page has on the privacy of referenced media.""" self.test_page.content = self.generate_media_referencing_content() # Publishing the revision should result in all referenced media becoming public @@ -77,7 +77,7 @@ def test_publishing_a_page_makes_referenced_media_public(self): self.assertMediaPrivacy(Privacy.PUBLIC) def test_unpublishing_a_page_makes_referenced_media_private(self): - """Tests the impact of unpublishing a page has on the privacy of referenced media.""" + """Tests the impact of unpublishing a media-referencing page has on the privacy of referenced media.""" self.test_page.content = self.generate_media_referencing_content() self.test_page.live = True self.test_page.save(update_fields=["content", "live"]) @@ -87,8 +87,8 @@ def test_unpublishing_a_page_makes_referenced_media_private(self): self.test_page.unpublish() self.assertMediaPrivacy(Privacy.PRIVATE) - def test_unpublishing_a_page_without_references_to_media(self): - """Tests the impact of unpublishing a page that doesn't reference any media.""" + def test_page_without_references_to_media(self): + """Tests that pages without any referenced to media can still be published and unpublished without issue.""" self.test_page.content = self.generate_non_media_referencing_content() self.test_page.live = True self.test_page.save(update_fields=["content", "live"]) From 04e31c2223dbc6004a1397f19ddaadee04101b93 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 3 Dec 2024 12:08:37 +0000 Subject: [PATCH 14/63] Tweak docs based on feedback --- docs/custom-features/media_privacy.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/custom-features/media_privacy.md b/docs/custom-features/media_privacy.md index 600ad032..d01c16e3 100644 --- a/docs/custom-features/media_privacy.md +++ b/docs/custom-features/media_privacy.md @@ -11,9 +11,9 @@ privacy of uploaded media files. ## Use of Wagtail's reference index -Wagtail's reference index is used to track when images and documents (and other objects) are referenced by pages and other objects. +Wagtail's [reference index](https://docs.wagtail.org/en/stable/advanced_topics/reference_index.html) is used to track when images and documents (and other objects) are referenced by pages and other objects. -Population of the reference index is automatically triggered by the `post_save` signal for all registered models. +The reference index is automatically populated via handlers connected to the `post_save` signal for all registered models. When publishing or unpublishing a page (or other type of 'publishable' object), object-level changes are saved before any `published`, `page_published`, `unpublished` or `page_unpublished` signals are emitted. So, as long as we use these signals to review the privacy of referenced media, we can be sure that the reference index will be up-to-date. From ab6a27dc13b51e1360124de6c8ea07d86dc40576 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 3 Dec 2024 12:11:49 +0000 Subject: [PATCH 15/63] Use more recognisable issue name in pylint: disable statement --- cms/private_media/storages.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cms/private_media/storages.py b/cms/private_media/storages.py index cc4074b5..a68ad0d7 100644 --- a/cms/private_media/storages.py +++ b/cms/private_media/storages.py @@ -13,7 +13,7 @@ class PrivacySettingS3Storage(S3Storage): - # pylint: disable=W0223 + # pylint: disable=abstract-method private_acl_name = "private" public_acl_name = "public-read" From 7f7cb4f469bb2e0ae0e2bdf00ae33c99107d9b6b Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 3 Dec 2024 12:13:09 +0000 Subject: [PATCH 16/63] Add a docstring to DummyPrivacySettingFileSystemStorage --- cms/private_media/storages.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cms/private_media/storages.py b/cms/private_media/storages.py index a68ad0d7..451ae6c5 100644 --- a/cms/private_media/storages.py +++ b/cms/private_media/storages.py @@ -43,6 +43,8 @@ def _set_file_acl(self, file: "FieldFile", acl_name: str) -> bool: class DummyPrivacySettingFileSystemStorage(FileSystemStorage): + """Dummy storage class for use in tests.""" + def make_private(self, file: "FieldFile") -> bool: # pylint: disable=unused-argument """Pretend to make the provided file private.""" return True From 19dde02607d9cae65100c8ceac05407b035ffc65 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 3 Dec 2024 12:44:25 +0000 Subject: [PATCH 17/63] Don't cache ServeImage view responses, and update PrivateImageMixin.get_privacy_controlled_serve_urls() to return an empty iterable --- cms/private_media/models/images.py | 9 +++---- cms/private_media/tests/test_images.py | 37 +++++++------------------- cms/urls.py | 2 +- 3 files changed, 15 insertions(+), 33 deletions(-) diff --git a/cms/private_media/models/images.py b/cms/private_media/models/images.py index 5ae0f5b5..c1850c58 100644 --- a/cms/private_media/models/images.py +++ b/cms/private_media/models/images.py @@ -50,12 +50,11 @@ def create_renditions(self, *filters: "Filter") -> dict["Filter", AbstractRendit def get_privacy_controlled_serve_urls(self, sites: Iterable["Site"]) -> Iterator[str]: """Return an iterator of fully-fledged serve URLs for this image, covering the domains for all provided sites. + + NOTE: Since image serve URLS in this project are exempt from caching, and this result is only + used for cache purging, it doesn't need to return anything. """ - if not sites: - return - for rendition in self.renditions.all(): # type: ignore[attr-defined] - for site in sites: - yield site.root_url + rendition.serve_url + return iter([]) class AbstractPrivateRendition(AbstractRendition): diff --git a/cms/private_media/tests/test_images.py b/cms/private_media/tests/test_images.py index 306ef9c8..a36a0f5b 100644 --- a/cms/private_media/tests/test_images.py +++ b/cms/private_media/tests/test_images.py @@ -170,23 +170,12 @@ def test_get_privacy_controlled_serve_urls(self): image = ImageFactory(collection=self.root_collection) self.assertFalse(list(image.get_privacy_controlled_serve_urls(sites))) - # If an image has renditions, the result should be a list of serve URLs, prefixed - # with the root url for each supplied site - renditions = image.create_renditions(Filter("fill-10x10"), Filter("fill-20x20")) + # Even when an image has renditions, the result should be empty, because + # image serve URLs are exempt from caching, so don't need to be purged + image.create_renditions(Filter("fill-10x10"), Filter("fill-20x20")) expected_result = [] - for rendition in renditions.values(): - expected_result.extend( - [ - f"http://localhost{rendition.url}", - f"https://foo.com{rendition.url}", - ] - ) - with self.assertNumQueries(1): - self.assertEqual(list(image.get_privacy_controlled_serve_urls(sites)), expected_result) - - # If no sites are provided, the result should be empty with self.assertNumQueries(0): - self.assertFalse(list(image.get_privacy_controlled_serve_urls([]))) + self.assertEqual(list(image.get_privacy_controlled_serve_urls(sites)), expected_result) def test_get_privacy_controlled_file_urls(self): """Test the behaviour of PrivateImageMixin.get_privacy_controlled_file_urls.""" @@ -279,18 +268,15 @@ def test_bulk_make_public(self): """Test the behaviour of PrivateImageManager.bulk_make_public().""" # Three image are already public, so only three should be updated qs = self.model.objects.all().prefetch_related("renditions") - with self.assertNumQueries(4): + with self.assertNumQueries(3): # Query summary: # 1. to fetch the images # 2. to prefetch renditions # 3. to save updates - # 4. to fetch sites to facilitate cache purging self.assertEqual(self.model.objects.bulk_make_public(qs), 3) - # Serve URLs for private image renditions should have been purged as part of the update - for obj in self.private_images: - for rendition in obj.renditions.all(): - self.assertIn("http://localhost" + rendition.url, PURGED_URLS) + # No urls should have been purged as part of the update + self.assertEqual(PURGED_URLS, []) # Verify all images are now public for obj in self.model.objects.only("_privacy", "file_permissions_last_set", "privacy_last_changed"): @@ -307,18 +293,15 @@ def test_bulk_make_private(self): """Test the behaviour of PrivateImageManager.bulk_make_private().""" # Three images are already private, so only three should be updated qs = self.model.objects.all().prefetch_related("renditions") - with self.assertNumQueries(4): + with self.assertNumQueries(3): # Query summary: # 1. to fetch the images # 2. to prefetch renditions # 3. to save updates - # 4. to fetch sites to facilitate cache purging self.assertEqual(self.model.objects.bulk_make_private(qs), 3) - # Serve URLs for public image renditions should have been purged as part of the update - for obj in self.public_images: - for rendition in obj.renditions.all(): - self.assertIn("http://localhost" + rendition.serve_url, PURGED_URLS) + # No urls should have been purged as part of the update + self.assertEqual(PURGED_URLS, []) # Verify all images are now private for image in self.model.objects.only("_privacy", "file_permissions_last_set", "privacy_last_changed"): diff --git a/cms/urls.py b/cms/urls.py index e2bfe623..ccb092dd 100644 --- a/cms/urls.py +++ b/cms/urls.py @@ -25,6 +25,7 @@ private_urlpatterns = [ path("documents/", include(wagtaildocs_urls)), path("-/", include((internal_urlpatterns, "internal"))), + re_path(r"^images/([^/]*)/(\d*)/([^/]*)/[^/]*$", PrivateImageServeView.as_view(), name="wagtailimages_serve"), ] # `wagtail.admin` must always be installed, @@ -81,7 +82,6 @@ # Public URLs that are meant to be cached. urlpatterns = [ path("sitemap.xml", sitemap), - re_path(r"^images/([^/]*)/(\d*)/([^/]*)/[^/]*$", PrivateImageServeView.as_view(), name="wagtailimages_serve"), ] # Set public URLs to use the "default" cache settings. urlpatterns = decorate_urlpatterns(urlpatterns, get_default_cache_control_decorator()) From f7c46e0ac723804ca31151133198afaf9757b467 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 3 Dec 2024 12:48:33 +0000 Subject: [PATCH 18/63] Update signal handlers to only fetch objects that are public or private where relevant --- cms/private_media/signal_handlers.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cms/private_media/signal_handlers.py b/cms/private_media/signal_handlers.py index 1e11f262..b8b41091 100644 --- a/cms/private_media/signal_handlers.py +++ b/cms/private_media/signal_handlers.py @@ -7,6 +7,7 @@ from wagtail.models import Page, ReferenceIndex from wagtail.signals import page_published, page_unpublished, published, unpublished +from cms.private_media.constants import Privacy from cms.private_media.models import PrivateImageMixin from cms.private_media.utils import get_private_media_models @@ -29,7 +30,7 @@ def publish_media_on_publish(instance: "Model", **kwargs: Any) -> None: .values_list("int_object_id", flat=True) .distinct() ) - queryset = model_class.objects.filter(pk__in=referenced_pks) + queryset = model_class.objects.filter(pk__in=referenced_pks, _privacy=Privacy.PRIVATE) if issubclass(model_class, PrivateImageMixin): queryset = queryset.prefetch_related("renditions") @@ -65,7 +66,7 @@ def unpublish_media_on_unpublish(instance: "Model", **kwargs: Any) -> None: ) queryset = ( model_class.objects.all() - .filter(pk__in=[int(pk) for pk in referenced_pks]) + .filter(pk__in=[int(pk) for pk in referenced_pks], _privacy=Privacy.PUBLIC) .exclude(pk__in=referenced_by_other_live_object_pks) ) if issubclass(model_class, PrivateImageMixin): From 6fc3c9884d160d1dd312351d186db36fa0e3150b Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 3 Dec 2024 15:34:55 +0000 Subject: [PATCH 19/63] Add tests for PrivacySettingS3Storage --- cms/private_media/tests/test_storages.py | 50 ++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 cms/private_media/tests/test_storages.py diff --git a/cms/private_media/tests/test_storages.py b/cms/private_media/tests/test_storages.py new file mode 100644 index 00000000..673dcc03 --- /dev/null +++ b/cms/private_media/tests/test_storages.py @@ -0,0 +1,50 @@ +from unittest import mock + +from botocore.exceptions import ClientError +from django.test import TestCase +from wagtail_factories import DocumentFactory + +from cms.private_media.storages import PrivacySettingS3Storage + + +class PrivacySettingS3StorageTests(TestCase): + @classmethod + def setUpTestData(cls): + cls.document = DocumentFactory() + + def setUp(self): + self.storage = PrivacySettingS3Storage() + + bucket_mock = mock.MagicMock() + bucket_mock.Object = mock.MagicMock() + object_mock = mock.MagicMock() + bucket_mock.Object.return_value = object_mock + acl_mock = mock.MagicMock() + object_mock.Acl.return_value = acl_mock + + # This should make self.storage.bucket() return our customised MagicMock instance + self.storage._bucket = bucket_mock # pylint: disable=protected-access + + # Make the mock objects available for use in tests + self.bucket_mock = bucket_mock + self.object_mock = object_mock + self.acl_mock = acl_mock + + def test_make_private(self): + self.storage.make_private(self.document.file) + self.bucket_mock.Object.assert_called_once_with(self.document.file.name) + self.object_mock.Acl.assert_called_once() + self.acl_mock.put.assert_called_once_with(ACL=self.storage.private_acl_name) + + def test_make_public(self): + self.storage.make_public(self.document.file) + self.object_mock.Acl.assert_called_once() + self.acl_mock.put.assert_called_once_with(ACL=self.storage.public_acl_name) + + def test_client_error_on_acl_get(self): + self.object_mock.Acl.side_effect = ClientError({"Error": {"Code": "AccessDenied"}}, "GetObjectAcl") + self.assertFalse(self.storage.make_private(self.document.file)) + + def test_client_error_on_acl_set(self): + self.acl_mock.put.side_effect = ClientError({"Error": {"Code": "AccessDenied"}}, "PutObjectAcl") + self.assertFalse(self.storage.make_private(self.document.file)) From 5919d8303947a932d006c8b459ea63ab12f16ca0 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 3 Dec 2024 15:46:30 +0000 Subject: [PATCH 20/63] Be consistant in checking mocks across test_make_public and test_make_private --- cms/private_media/tests/test_storages.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cms/private_media/tests/test_storages.py b/cms/private_media/tests/test_storages.py index 673dcc03..ca56d13d 100644 --- a/cms/private_media/tests/test_storages.py +++ b/cms/private_media/tests/test_storages.py @@ -38,6 +38,7 @@ def test_make_private(self): def test_make_public(self): self.storage.make_public(self.document.file) + self.bucket_mock.Object.assert_called_once_with(self.document.file.name) self.object_mock.Acl.assert_called_once() self.acl_mock.put.assert_called_once_with(ACL=self.storage.public_acl_name) From 22007bb8e792158d26ac80db340411781cd91e95 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 3 Dec 2024 20:02:57 +0000 Subject: [PATCH 21/63] Use mock_aws instead --- cms/private_media/tests/test_storages.py | 75 +++++++++++++----------- poetry.lock | 2 +- pyproject.toml | 2 +- 3 files changed, 44 insertions(+), 35 deletions(-) diff --git a/cms/private_media/tests/test_storages.py b/cms/private_media/tests/test_storages.py index ca56d13d..6225f1ab 100644 --- a/cms/private_media/tests/test_storages.py +++ b/cms/private_media/tests/test_storages.py @@ -1,51 +1,60 @@ from unittest import mock from botocore.exceptions import ClientError -from django.test import TestCase +from django.conf import settings +from django.test import TestCase, override_settings +from moto import mock_aws from wagtail_factories import DocumentFactory from cms.private_media.storages import PrivacySettingS3Storage +@mock_aws +@override_settings(AWS_STORAGE_BUCKET_NAME="test-bucket") class PrivacySettingS3StorageTests(TestCase): @classmethod def setUpTestData(cls): cls.document = DocumentFactory() def setUp(self): + super().setUp() self.storage = PrivacySettingS3Storage() - - bucket_mock = mock.MagicMock() - bucket_mock.Object = mock.MagicMock() + # Create a bucket to allow '_set_file_acl' to at least reach the 'put' stage + bucket = self.storage.connection.Bucket(settings.AWS_STORAGE_BUCKET_NAME) + bucket.create() + + def test_make_private_client_error_on_acl_set(self): + with self.assertLogs("cms.private_media.storages", level="WARNING") as logs: + self.assertFalse(self.storage.make_private(self.document.file)) + self.assertIn( + f"WARNING:cms.private_media.storages:Failed to set ACL for {self.document.file.name}: ", + logs.output[0], + ) + + def test_make_private_client_error_on_acl_get(self): object_mock = mock.MagicMock() - bucket_mock.Object.return_value = object_mock + object_mock.Acl.side_effect = ClientError({"Error": {"Code": "AccessDenied"}}, "GetObjectAcl") + # initialize the bucket to allow it to be patched + self.storage.bucket # noqa: B018 # pylint: disable=pointless-statement + with mock.patch.object(self.storage._bucket, "Object", return_value=object_mock): # pylint: disable=protected-access # noqa: SIM117 + with self.assertLogs("cms.private_media.storages", level="WARNING") as logs: + self.assertFalse(self.storage.make_private(self.document.file)) + self.assertIn( + f"WARNING:cms.private_media.storages:Failed to retrieve ACL for {self.document.file.name}: ", + logs.output[0], + ) + + def test_make_public_success(self): acl_mock = mock.MagicMock() + acl_mock.put.return_value = True + object_mock = mock.MagicMock() object_mock.Acl.return_value = acl_mock - - # This should make self.storage.bucket() return our customised MagicMock instance - self.storage._bucket = bucket_mock # pylint: disable=protected-access - - # Make the mock objects available for use in tests - self.bucket_mock = bucket_mock - self.object_mock = object_mock - self.acl_mock = acl_mock - - def test_make_private(self): - self.storage.make_private(self.document.file) - self.bucket_mock.Object.assert_called_once_with(self.document.file.name) - self.object_mock.Acl.assert_called_once() - self.acl_mock.put.assert_called_once_with(ACL=self.storage.private_acl_name) - - def test_make_public(self): - self.storage.make_public(self.document.file) - self.bucket_mock.Object.assert_called_once_with(self.document.file.name) - self.object_mock.Acl.assert_called_once() - self.acl_mock.put.assert_called_once_with(ACL=self.storage.public_acl_name) - - def test_client_error_on_acl_get(self): - self.object_mock.Acl.side_effect = ClientError({"Error": {"Code": "AccessDenied"}}, "GetObjectAcl") - self.assertFalse(self.storage.make_private(self.document.file)) - - def test_client_error_on_acl_set(self): - self.acl_mock.put.side_effect = ClientError({"Error": {"Code": "AccessDenied"}}, "PutObjectAcl") - self.assertFalse(self.storage.make_private(self.document.file)) + # initialize the bucket to allow it to be patched + self.storage.bucket # noqa: B018 # pylint: disable=pointless-statement + with mock.patch.object(self.storage._bucket, "Object", return_value=object_mock): # pylint: disable=protected-access # noqa: SIM117 + with self.assertLogs("cms.private_media.storages", level="INFO") as logs: + self.assertTrue(self.storage.make_public(self.document.file)) + self.assertIn( + f"INFO:cms.private_media.storages:ACL set successfully for {self.document.file}", + logs.output, + ) diff --git a/poetry.lock b/poetry.lock index 03da1f8b..df0c5eca 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2368,4 +2368,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "d6f3cd9d5d16ec644f0e0abfe25470f9d0baa2897b861f71ddcafa7987095dd3" +content-hash = "cf787ab3dc2de3ad1365016a4e4537bdd8cdc18d3c2d60f677ca586d995f02eb" diff --git a/pyproject.toml b/pyproject.toml index a7808c98..18e0ddc9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,7 +52,7 @@ Werkzeug = "~3.0.6" setuptools = "^75.2.0" honcho = "^2.0.0" detect-secrets = "^1.5.0" -moto = {extras = ["iam"], version = "^5.0.21"} +moto = {version = "^5.0.22", extras = ["s3,iam"]} [tool.ruff] target-version = "py312" From b3fa22e7c739ee62491d9038046a1c7c1085d1bf Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Wed, 4 Dec 2024 09:20:08 +0000 Subject: [PATCH 22/63] Add note to docs about image serve view responses not being cached --- docs/custom-features/media_privacy.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/custom-features/media_privacy.md b/docs/custom-features/media_privacy.md index d01c16e3..643014b0 100644 --- a/docs/custom-features/media_privacy.md +++ b/docs/custom-features/media_privacy.md @@ -32,3 +32,5 @@ The custom `PrivacySettingS3Storage` class logs failed file-setting attempts usi Because serve view responses and direct file urls can be cached, it's important that when the privacy of a media item changes, the cache is invalidated for any affected URLs. In environments where Wagtail's front-end-cache app is configured, all relevant URLs should be collected into a batch and sent to the relevant backend to initiate purging. + +NOTE: Image serve view responses are not cached, so don't need to be invalidated. From 8b19623e83e94c36d782935a0ceea7e7b8cf6433 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Thu, 5 Dec 2024 10:12:58 +0000 Subject: [PATCH 23/63] Use 'info' to log unsupported file permission setting attempts --- cms/private_media/bulk_operations.py | 2 +- cms/private_media/tests/test_documents.py | 27 ++++++++++++------- cms/private_media/tests/test_images.py | 32 ++++++++++++++++------- 3 files changed, 41 insertions(+), 20 deletions(-) diff --git a/cms/private_media/bulk_operations.py b/cms/private_media/bulk_operations.py index ad681149..6ffb47e8 100644 --- a/cms/private_media/bulk_operations.py +++ b/cms/private_media/bulk_operations.py @@ -39,7 +39,7 @@ def set_file_permission_and_report(file: "FieldFile") -> None: handler = getattr(storage, "make_public", None) if handler is None: - logger.debug( + logger.info( "%s does not support setting of individual file permissions to %s, so skipping for: %s.", storage.__class__.__name__, intended_privacy, diff --git a/cms/private_media/tests/test_documents.py b/cms/private_media/tests/test_documents.py index e854b8e1..e3257257 100644 --- a/cms/private_media/tests/test_documents.py +++ b/cms/private_media/tests/test_documents.py @@ -35,7 +35,7 @@ def test_private_document(self): - Ensure file permission updates are handled correctly. - Check file permission outdated status. """ - with self.assertLogs("cms.private_media.bulk_operations", level="DEBUG") as logs: + with self.assertLogs("cms.private_media.bulk_operations", level="INFO") as logs: document = DocumentFactory(collection=self.root_collection) # Documents should be 'private' by default @@ -49,7 +49,7 @@ def test_private_document(self): logs.output, [ ( - "DEBUG:cms.private_media.bulk_operations:FileSystemStorage does not support setting of individual " + "INFO:cms.private_media.bulk_operations:FileSystemStorage does not support setting of individual " f"file permissions to private, so skipping for: {document.file.name}." ) ], @@ -82,7 +82,7 @@ def test_public_document(self): - Ensure file permission updates are handled correctly. - Check file permission outdated status. """ - with self.assertLogs("cms.private_media.bulk_operations", level="DEBUG") as logs: + with self.assertLogs("cms.private_media.bulk_operations", level="INFO") as logs: document = DocumentFactory(_privacy=Privacy.PUBLIC, collection=self.root_collection) # This document should be 'public' @@ -96,7 +96,7 @@ def test_public_document(self): logs.output, [ ( - "DEBUG:cms.private_media.bulk_operations:FileSystemStorage does not support setting of individual " + "INFO:cms.private_media.bulk_operations:FileSystemStorage does not support setting of individual " f"file permissions to public, so skipping for: {document.file.name}." ) ], @@ -159,7 +159,10 @@ def domain_prefixed_url(name: str) -> str: """Replacement for FileSystemStorage.url() that returns fully-fledged URLs.""" return f"https://media.example.com/{name}" - with mock.patch("django.core.files.storage.FileSystemStorage.url", side_effect=domain_prefixed_url): + with mock.patch( + "django.core.files.storage.FileSystemStorage.url", + side_effect=domain_prefixed_url, + ): self.assertEqual( list(document.get_privacy_controlled_file_urls()), [ @@ -175,7 +178,7 @@ def test_file_permission_setting_success(self): - Verify permissions are set correctly for both public and private documents. - Ensure no debug logs are generated during successful operation. """ - with self.assertNoLogs("cms.private_media.bulk_operations", level="DEBUG"): + with self.assertNoLogs("cms.private_media.bulk_operations"): private_document = DocumentFactory(collection=self.root_collection) public_document = DocumentFactory(_privacy=Privacy.PUBLIC, collection=self.root_collection) @@ -185,15 +188,21 @@ def test_file_permission_setting_success(self): @override_settings( STORAGES={"default": {"BACKEND": "cms.private_media.storages.DummyPrivacySettingFileSystemStorage"}} ) - @mock.patch("cms.private_media.storages.DummyPrivacySettingFileSystemStorage.make_private", return_value=False) - @mock.patch("cms.private_media.storages.DummyPrivacySettingFileSystemStorage.make_public", return_value=False) + @mock.patch( + "cms.private_media.storages.DummyPrivacySettingFileSystemStorage.make_private", + return_value=False, + ) + @mock.patch( + "cms.private_media.storages.DummyPrivacySettingFileSystemStorage.make_public", + return_value=False, + ) def test_file_permission_setting_failure(self, mock_make_public, mock_make_private): """Test handling of file permission setting failures: - Mock failed permission settings for both public and private documents. - Verify documents are marked as having outdated permissions. - Ensure correct storage methods are called. """ - with self.assertNoLogs("cms.private_media.bulk_operations", level="DEBUG"): + with self.assertNoLogs("cms.private_media.bulk_operations"): private_document = DocumentFactory(collection=self.root_collection) public_document = DocumentFactory(_privacy=Privacy.PUBLIC, collection=self.root_collection) diff --git a/cms/private_media/tests/test_images.py b/cms/private_media/tests/test_images.py index a36a0f5b..656428b3 100644 --- a/cms/private_media/tests/test_images.py +++ b/cms/private_media/tests/test_images.py @@ -41,7 +41,7 @@ def test_private_image(self): """ # Attempts to set file permissions on save should have failed gracefully, # since the default file backend doesn't support it - with self.assertLogs("cms.private_media.bulk_operations", level="DEBUG") as logs: + with self.assertLogs("cms.private_media.bulk_operations", level="INFO") as logs: image = ImageFactory(collection=self.root_collection) # Images should be 'private' by default @@ -54,7 +54,7 @@ def test_private_image(self): logs.output, [ ( - "DEBUG:cms.private_media.bulk_operations:FileSystemStorage does not support setting of individual " + "INFO:cms.private_media.bulk_operations:FileSystemStorage does not support setting of individual " f"file permissions to private, so skipping for: {image.file.name}." ) ], @@ -101,7 +101,7 @@ def test_public_image(self): """ # Attempts to set file permissions on save should have failed gracefully, # since the default file backend doesn't support it - with self.assertLogs("cms.private_media.bulk_operations", level="DEBUG") as logs: + with self.assertLogs("cms.private_media.bulk_operations", level="INFO") as logs: image = ImageFactory(_privacy=Privacy.PUBLIC) # This image should be 'public' @@ -114,7 +114,7 @@ def test_public_image(self): logs.output, [ ( - "DEBUG:cms.private_media.bulk_operations:FileSystemStorage does not support setting of individual " + "INFO:cms.private_media.bulk_operations:FileSystemStorage does not support setting of individual " f"file permissions to public, so skipping for: {image.file.name}." ) ], @@ -156,7 +156,10 @@ def test_public_image_renditions(self): self.assertIn("-public-", rendition.get_cache_key()) # However, if the file permissions are outdated, rendition.url should return a serve URL - with mock.patch("cms.private_media.models.PrivateMediaMixin.file_permissions_are_outdated", return_value=True): + with mock.patch( + "cms.private_media.models.PrivateMediaMixin.file_permissions_are_outdated", + return_value=True, + ): for rendition in renditions.values(): self.assertEqual(rendition.url, rendition.serve_url) @@ -193,7 +196,10 @@ def domain_prefixed_url(name: str) -> str: """Replacement for FileSystemStorage.url() that returns fully-fledged URLs.""" return f"https://media.example.com/{name}" - with mock.patch("django.core.files.storage.FileSystemStorage.url", side_effect=domain_prefixed_url): + with mock.patch( + "django.core.files.storage.FileSystemStorage.url", + side_effect=domain_prefixed_url, + ): expected_result = [f"https://media.example.com/{image.file.name}"] for rendition in renditions.values(): expected_result.append(f"https://media.example.com/{rendition.file.name}") @@ -209,7 +215,7 @@ def test_invalid_privacy_value_raises_value_error(self): ) def test_file_permission_setting_success(self): """Test successful file permission setting using a storage backend that supports it.""" - with self.assertNoLogs("cms.private_media.bulk_operations", level="DEBUG"): + with self.assertNoLogs("cms.private_media.bulk_operations"): private_image = ImageFactory(collection=self.root_collection) public_image = ImageFactory(_privacy=Privacy.PUBLIC, collection=self.root_collection) @@ -219,15 +225,21 @@ def test_file_permission_setting_success(self): @override_settings( STORAGES={"default": {"BACKEND": "cms.private_media.storages.DummyPrivacySettingFileSystemStorage"}} ) - @mock.patch("cms.private_media.storages.DummyPrivacySettingFileSystemStorage.make_private", return_value=False) - @mock.patch("cms.private_media.storages.DummyPrivacySettingFileSystemStorage.make_public", return_value=False) + @mock.patch( + "cms.private_media.storages.DummyPrivacySettingFileSystemStorage.make_private", + return_value=False, + ) + @mock.patch( + "cms.private_media.storages.DummyPrivacySettingFileSystemStorage.make_public", + return_value=False, + ) def test_file_permission_setting_failure(self, mock_make_public, mock_make_private): """Test graceful handling of file permission setting failures. Verifies that the system correctly tracks failed permission updates and maintains the outdated state when storage operations fail. """ - with self.assertNoLogs("cms.private_media.bulk_operations", level="DEBUG"): + with self.assertNoLogs("cms.private_media.bulk_operations"): private_image = ImageFactory(collection=self.root_collection) public_image = ImageFactory(_privacy=Privacy.PUBLIC, collection=self.root_collection) From e1ecdb3ebf071725c4e1e98515bbdf27d20fadca Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 10 Dec 2024 09:52:51 +0000 Subject: [PATCH 24/63] Fix moto extras in pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 18e0ddc9..ec913546 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,7 +52,7 @@ Werkzeug = "~3.0.6" setuptools = "^75.2.0" honcho = "^2.0.0" detect-secrets = "^1.5.0" -moto = {version = "^5.0.22", extras = ["s3,iam"]} +moto = {version = "^5.0.22", extras = ["s3","iam"]} [tool.ruff] target-version = "py312" From 0621789d601a2ddc68f8c640ac62bb49f5654b28 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 10 Dec 2024 10:01:18 +0000 Subject: [PATCH 25/63] Regenerate lock --- poetry.lock | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index df0c5eca..4538ec3a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1234,7 +1234,9 @@ boto3 = ">=1.9.201" botocore = ">=1.14.0,<1.35.45 || >1.35.45,<1.35.46 || >1.35.46" cryptography = ">=3.3.1" Jinja2 = ">=2.10.1" +py-partiql-parser = {version = "0.5.6", optional = true, markers = "extra == \"s3\""} python-dateutil = ">=2.1,<3.0.0" +PyYAML = {version = ">=5.1", optional = true, markers = "extra == \"s3\""} requests = ">=2.5" responses = ">=0.15.0" werkzeug = ">=0.5,<2.2.0 || >2.2.0,<2.2.1 || >2.2.1" @@ -1583,6 +1585,20 @@ urwid-readline = "*" [package.extras] completion = ["shtab"] +[[package]] +name = "py-partiql-parser" +version = "0.5.6" +description = "Pure Python PartiQL Parser" +optional = false +python-versions = "*" +files = [ + {file = "py_partiql_parser-0.5.6-py2.py3-none-any.whl", hash = "sha256:622d7b0444becd08c1f4e9e73b31690f4b1c309ab6e5ed45bf607fe71319309f"}, + {file = "py_partiql_parser-0.5.6.tar.gz", hash = "sha256:6339f6bf85573a35686529fc3f491302e71dd091711dfe8df3be89a93767f97b"}, +] + +[package.extras] +dev = ["black (==22.6.0)", "flake8", "mypy", "pytest"] + [[package]] name = "pycparser" version = "2.22" @@ -2368,4 +2384,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "cf787ab3dc2de3ad1365016a4e4537bdd8cdc18d3c2d60f677ca586d995f02eb" +content-hash = "4eacd351052bb5b75075c16fd4ebb1c05e0545ca8061e5ded46598ae58309106" From 585f90f1690a2730e3bc4dddfdc6baad6d9f7022 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 10 Dec 2024 10:22:52 +0000 Subject: [PATCH 26/63] Don't double up on publish/unpublish signals for pages --- cms/private_media/signal_handlers.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cms/private_media/signal_handlers.py b/cms/private_media/signal_handlers.py index b8b41091..913c5a0e 100644 --- a/cms/private_media/signal_handlers.py +++ b/cms/private_media/signal_handlers.py @@ -5,7 +5,7 @@ from django.db.models import IntegerField from django.db.models.functions import Cast from wagtail.models import Page, ReferenceIndex -from wagtail.signals import page_published, page_unpublished, published, unpublished +from wagtail.signals import published, unpublished from cms.private_media.constants import Privacy from cms.private_media.models import PrivateImageMixin @@ -83,7 +83,5 @@ def unpublish_media_on_unpublish(instance: "Model", **kwargs: Any) -> None: def register_signal_handlers() -> None: """Register signal handlers for models using the private media system.""" - page_published.connect(publish_media_on_publish, dispatch_uid="publish_media") - page_unpublished.connect(unpublish_media_on_unpublish, dispatch_uid="unpublish_media") published.connect(publish_media_on_publish, dispatch_uid="publish_media") unpublished.connect(unpublish_media_on_unpublish, dispatch_uid="unpublish_media") From 52267f27ca61c2f9de0ff8ef4cdb8e45cdbdf8b9 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 10 Dec 2024 10:48:39 +0000 Subject: [PATCH 27/63] Utilize assertIsSubclass in tests --- cms/private_media/tests/test_documents.py | 2 +- cms/private_media/tests/test_images.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cms/private_media/tests/test_documents.py b/cms/private_media/tests/test_documents.py index e3257257..110695f8 100644 --- a/cms/private_media/tests/test_documents.py +++ b/cms/private_media/tests/test_documents.py @@ -19,7 +19,7 @@ def test_document_model_using_private_document_mixin(self): And that the manager is a subclass of PrivateDocumentManager. """ document_model = get_document_model() - self.assertTrue(issubclass(document_model, PrivateDocumentMixin)) + self.assertIsSubclass(document_model, PrivateDocumentMixin) self.assertIsInstance(document_model.objects, PrivateDocumentManager) diff --git a/cms/private_media/tests/test_images.py b/cms/private_media/tests/test_images.py index 656428b3..d24bfb9c 100644 --- a/cms/private_media/tests/test_images.py +++ b/cms/private_media/tests/test_images.py @@ -18,13 +18,13 @@ class TestModelConfiguration(SimpleTestCase): def test_image_model_using_private_image_mixin(self): """Verify that the configured image model inherits from PrivateImageMixin.""" image_model = get_image_model() - self.assertTrue(issubclass(image_model, PrivateImageMixin)) + self.assertIsSubclass(image_model, PrivateImageMixin) self.assertIsInstance(image_model.objects, PrivateImageManager) def test_rendition_model_using_abstract_private_rendition(self): """Verify that the configured rendition model inherits from AbstractPrivateRendition.""" rendition_model = get_image_model().get_rendition_model() - self.assertTrue(issubclass(rendition_model, AbstractPrivateRendition)) + self.assertIsSubclass(rendition_model, AbstractPrivateRendition) class TestImageModel(TestCase): From 10fdba70c715d173719e8eca174c315fe4cef73e Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 10 Dec 2024 11:01:02 +0000 Subject: [PATCH 28/63] Log ACL set/put errors as exceptions --- cms/private_media/storages.py | 4 ++-- cms/private_media/tests/test_storages.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cms/private_media/storages.py b/cms/private_media/storages.py index 451ae6c5..508f1600 100644 --- a/cms/private_media/storages.py +++ b/cms/private_media/storages.py @@ -30,12 +30,12 @@ def _set_file_acl(self, file: "FieldFile", acl_name: str) -> bool: try: obj_acl = obj.Acl() except ClientError as e: - logger.warning("Failed to retrieve ACL for %s: %s", file.name, repr(e)) + logger.exception("Failed to retrieve ACL for %s", file.name, e) return False try: obj_acl.put(ACL=acl_name) except ClientError as e: - logger.warning("Failed to set ACL for %s: %s", file.name, repr(e)) + logger.exception("Failed to set ACL for %s", file.name, e) return False logger.info("ACL set successfully for %s", file.name) diff --git a/cms/private_media/tests/test_storages.py b/cms/private_media/tests/test_storages.py index 6225f1ab..ffd681d7 100644 --- a/cms/private_media/tests/test_storages.py +++ b/cms/private_media/tests/test_storages.py @@ -24,10 +24,10 @@ def setUp(self): bucket.create() def test_make_private_client_error_on_acl_set(self): - with self.assertLogs("cms.private_media.storages", level="WARNING") as logs: + with self.assertLogs("cms.private_media.storages", level="EXCEPTION") as logs: self.assertFalse(self.storage.make_private(self.document.file)) self.assertIn( - f"WARNING:cms.private_media.storages:Failed to set ACL for {self.document.file.name}: ", + f"EXCEPTION:cms.private_media.storages:Failed to set ACL for {self.document.file.name}", logs.output[0], ) @@ -37,10 +37,10 @@ def test_make_private_client_error_on_acl_get(self): # initialize the bucket to allow it to be patched self.storage.bucket # noqa: B018 # pylint: disable=pointless-statement with mock.patch.object(self.storage._bucket, "Object", return_value=object_mock): # pylint: disable=protected-access # noqa: SIM117 - with self.assertLogs("cms.private_media.storages", level="WARNING") as logs: + with self.assertLogs("cms.private_media.storages", level="EXCEPTION") as logs: self.assertFalse(self.storage.make_private(self.document.file)) self.assertIn( - f"WARNING:cms.private_media.storages:Failed to retrieve ACL for {self.document.file.name}: ", + f"EXCEPTION:cms.private_media.storages:Failed to retrieve ACL for {self.document.file.name}", logs.output[0], ) From f67ac65c035666c2a343dfbb5222dfc577d296ad Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 10 Dec 2024 11:06:27 +0000 Subject: [PATCH 29/63] Test serve views with IS_EXTERNAL_ENV=True --- cms/private_media/tests/test_documents.py | 5 +++++ cms/private_media/tests/test_images.py | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/cms/private_media/tests/test_documents.py b/cms/private_media/tests/test_documents.py index 110695f8..4a0e6cef 100644 --- a/cms/private_media/tests/test_documents.py +++ b/cms/private_media/tests/test_documents.py @@ -318,3 +318,8 @@ def test_public_document(self): self.assertTrue(self.public_document.file_permissions_are_outdated()) response = self.client.get(self.public_document.url) self.assertEqual(response.status_code, 200) + + +@override_settings(IS_EXTERNAL_ENV=True) +class TestExternalEnvDocumentServeView(TestDocumentServeView): + pass diff --git a/cms/private_media/tests/test_images.py b/cms/private_media/tests/test_images.py index d24bfb9c..d3929567 100644 --- a/cms/private_media/tests/test_images.py +++ b/cms/private_media/tests/test_images.py @@ -367,3 +367,8 @@ def test_public_image(self): for rendition in self.public_image_renditions.values(): response = self.client.get(rendition.serve_url) self.assertEqual(response.status_code, 200) + + +@override_settings(IS_EXTERNAL_ENV=True) +class TestExternalEnvImageServeView(TestImageServeView): + pass From 8037f3d883e83ef07bf029d00572430706776643 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 10 Dec 2024 11:10:35 +0000 Subject: [PATCH 30/63] Fixup for 10fdba7 --- cms/private_media/storages.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cms/private_media/storages.py b/cms/private_media/storages.py index 508f1600..a9774d31 100644 --- a/cms/private_media/storages.py +++ b/cms/private_media/storages.py @@ -29,13 +29,13 @@ def _set_file_acl(self, file: "FieldFile", acl_name: str) -> bool: obj = self.bucket.Object(file.name) try: obj_acl = obj.Acl() - except ClientError as e: - logger.exception("Failed to retrieve ACL for %s", file.name, e) + except ClientError: + logger.exception("Failed to retrieve ACL for %s", file.name) return False try: obj_acl.put(ACL=acl_name) - except ClientError as e: - logger.exception("Failed to set ACL for %s", file.name, e) + except ClientError: + logger.exception("Failed to set ACL for %s", file.name) return False logger.info("ACL set successfully for %s", file.name) From f0c5d04e0b8466533ff694982a9616a2bafc79ba Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 10 Dec 2024 11:17:21 +0000 Subject: [PATCH 31/63] Revert "Utilize assertIsSubclass in tests" This reverts commit 52267f27ca61c2f9de0ff8ef4cdb8e45cdbdf8b9. --- cms/private_media/tests/test_documents.py | 2 +- cms/private_media/tests/test_images.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cms/private_media/tests/test_documents.py b/cms/private_media/tests/test_documents.py index 4a0e6cef..b17e66e0 100644 --- a/cms/private_media/tests/test_documents.py +++ b/cms/private_media/tests/test_documents.py @@ -19,7 +19,7 @@ def test_document_model_using_private_document_mixin(self): And that the manager is a subclass of PrivateDocumentManager. """ document_model = get_document_model() - self.assertIsSubclass(document_model, PrivateDocumentMixin) + self.assertTrue(issubclass(document_model, PrivateDocumentMixin)) self.assertIsInstance(document_model.objects, PrivateDocumentManager) diff --git a/cms/private_media/tests/test_images.py b/cms/private_media/tests/test_images.py index d3929567..945bb70d 100644 --- a/cms/private_media/tests/test_images.py +++ b/cms/private_media/tests/test_images.py @@ -18,13 +18,13 @@ class TestModelConfiguration(SimpleTestCase): def test_image_model_using_private_image_mixin(self): """Verify that the configured image model inherits from PrivateImageMixin.""" image_model = get_image_model() - self.assertIsSubclass(image_model, PrivateImageMixin) + self.assertTrue(issubclass(image_model, PrivateImageMixin)) self.assertIsInstance(image_model.objects, PrivateImageManager) def test_rendition_model_using_abstract_private_rendition(self): """Verify that the configured rendition model inherits from AbstractPrivateRendition.""" rendition_model = get_image_model().get_rendition_model() - self.assertIsSubclass(rendition_model, AbstractPrivateRendition) + self.assertTrue(issubclass(rendition_model, AbstractPrivateRendition)) class TestImageModel(TestCase): From aa86f2da29cf8af318d1e70e6aecd65650022434 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 10 Dec 2024 11:22:11 +0000 Subject: [PATCH 32/63] Fix storage tests --- cms/private_media/tests/test_storages.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cms/private_media/tests/test_storages.py b/cms/private_media/tests/test_storages.py index ffd681d7..2dee8a41 100644 --- a/cms/private_media/tests/test_storages.py +++ b/cms/private_media/tests/test_storages.py @@ -24,10 +24,10 @@ def setUp(self): bucket.create() def test_make_private_client_error_on_acl_set(self): - with self.assertLogs("cms.private_media.storages", level="EXCEPTION") as logs: + with self.assertLogs("cms.private_media.storages", level="ERROR") as logs: self.assertFalse(self.storage.make_private(self.document.file)) self.assertIn( - f"EXCEPTION:cms.private_media.storages:Failed to set ACL for {self.document.file.name}", + f"ERROR:cms.private_media.storages:Failed to set ACL for {self.document.file.name}", logs.output[0], ) @@ -37,10 +37,10 @@ def test_make_private_client_error_on_acl_get(self): # initialize the bucket to allow it to be patched self.storage.bucket # noqa: B018 # pylint: disable=pointless-statement with mock.patch.object(self.storage._bucket, "Object", return_value=object_mock): # pylint: disable=protected-access # noqa: SIM117 - with self.assertLogs("cms.private_media.storages", level="EXCEPTION") as logs: + with self.assertLogs("cms.private_media.storages", level="ERROR") as logs: self.assertFalse(self.storage.make_private(self.document.file)) self.assertIn( - f"EXCEPTION:cms.private_media.storages:Failed to retrieve ACL for {self.document.file.name}", + f"ERROR:cms.private_media.storages:Failed to retrieve ACL for {self.document.file.name}", logs.output[0], ) From 439d5a1b04bbe433a3426e1c77db37da5cdc8c1b Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 10 Dec 2024 11:23:15 +0000 Subject: [PATCH 33/63] Use InMemoryStorage for DummyPrivacySettingFileSystemStorage --- cms/private_media/storages.py | 4 ++-- cms/private_media/tests/test_documents.py | 14 +++++--------- cms/private_media/tests/test_images.py | 12 ++++-------- 3 files changed, 11 insertions(+), 19 deletions(-) diff --git a/cms/private_media/storages.py b/cms/private_media/storages.py index a9774d31..bf871142 100644 --- a/cms/private_media/storages.py +++ b/cms/private_media/storages.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING from botocore.exceptions import ClientError -from django.core.files.storage import FileSystemStorage +from django.core.files.storage import InMemoryStorage from storages.backends.s3 import S3Storage if TYPE_CHECKING: @@ -42,7 +42,7 @@ def _set_file_acl(self, file: "FieldFile", acl_name: str) -> bool: return True -class DummyPrivacySettingFileSystemStorage(FileSystemStorage): +class DummyPrivacySettingStorage(InMemoryStorage): """Dummy storage class for use in tests.""" def make_private(self, file: "FieldFile") -> bool: # pylint: disable=unused-argument diff --git a/cms/private_media/tests/test_documents.py b/cms/private_media/tests/test_documents.py index b17e66e0..0e60cd6b 100644 --- a/cms/private_media/tests/test_documents.py +++ b/cms/private_media/tests/test_documents.py @@ -170,11 +170,9 @@ def domain_prefixed_url(name: str) -> str: ], ) - @override_settings( - STORAGES={"default": {"BACKEND": "cms.private_media.storages.DummyPrivacySettingFileSystemStorage"}} - ) + @override_settings(STORAGES={"default": {"BACKEND": "cms.private_media.storages.DummyPrivacySettingStorage"}}) def test_file_permission_setting_success(self): - """Test successful file permission setting using DummyPrivacySettingFileSystemStorage: + """Test successful file permission setting using DummyPrivacySettingStorage: - Verify permissions are set correctly for both public and private documents. - Ensure no debug logs are generated during successful operation. """ @@ -185,15 +183,13 @@ def test_file_permission_setting_success(self): self.assertFalse(private_document.file_permissions_are_outdated()) self.assertFalse(public_document.file_permissions_are_outdated()) - @override_settings( - STORAGES={"default": {"BACKEND": "cms.private_media.storages.DummyPrivacySettingFileSystemStorage"}} - ) + @override_settings(STORAGES={"default": {"BACKEND": "cms.private_media.storages.DummyPrivacySettingStorage"}}) @mock.patch( - "cms.private_media.storages.DummyPrivacySettingFileSystemStorage.make_private", + "cms.private_media.storages.DummyPrivacySettingStorage.make_private", return_value=False, ) @mock.patch( - "cms.private_media.storages.DummyPrivacySettingFileSystemStorage.make_public", + "cms.private_media.storages.DummyPrivacySettingStorage.make_public", return_value=False, ) def test_file_permission_setting_failure(self, mock_make_public, mock_make_private): diff --git a/cms/private_media/tests/test_images.py b/cms/private_media/tests/test_images.py index 945bb70d..d2e41299 100644 --- a/cms/private_media/tests/test_images.py +++ b/cms/private_media/tests/test_images.py @@ -210,9 +210,7 @@ def test_invalid_privacy_value_raises_value_error(self): with self.assertRaises(ValueError): ImageFactory(_privacy="invalid", collection=self.root_collection) - @override_settings( - STORAGES={"default": {"BACKEND": "cms.private_media.storages.DummyPrivacySettingFileSystemStorage"}} - ) + @override_settings(STORAGES={"default": {"BACKEND": "cms.private_media.storages.DummyPrivacySettingStorage"}}) def test_file_permission_setting_success(self): """Test successful file permission setting using a storage backend that supports it.""" with self.assertNoLogs("cms.private_media.bulk_operations"): @@ -222,15 +220,13 @@ def test_file_permission_setting_success(self): self.assertFalse(private_image.file_permissions_are_outdated()) self.assertFalse(public_image.file_permissions_are_outdated()) - @override_settings( - STORAGES={"default": {"BACKEND": "cms.private_media.storages.DummyPrivacySettingFileSystemStorage"}} - ) + @override_settings(STORAGES={"default": {"BACKEND": "cms.private_media.storages.DummyPrivacySettingStorage"}}) @mock.patch( - "cms.private_media.storages.DummyPrivacySettingFileSystemStorage.make_private", + "cms.private_media.storages.DummyPrivacySettingStorage.make_private", return_value=False, ) @mock.patch( - "cms.private_media.storages.DummyPrivacySettingFileSystemStorage.make_public", + "cms.private_media.storages.DummyPrivacySettingStorage.make_public", return_value=False, ) def test_file_permission_setting_failure(self, mock_make_public, mock_make_private): From 0a31a10878418b3535d8f7db3b061b159f5c47e9 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 10 Dec 2024 11:50:11 +0000 Subject: [PATCH 34/63] Re-run only specific (read-only) tests with IS_EXTERNAL_ENV=True --- cms/private_media/tests/test_documents.py | 29 ++++++++++-------- cms/private_media/tests/test_images.py | 36 ++++++++++++++--------- 2 files changed, 38 insertions(+), 27 deletions(-) diff --git a/cms/private_media/tests/test_documents.py b/cms/private_media/tests/test_documents.py index 0e60cd6b..a8ce9cec 100644 --- a/cms/private_media/tests/test_documents.py +++ b/cms/private_media/tests/test_documents.py @@ -290,32 +290,35 @@ def setUpTestData(cls): cls.public_document = DocumentFactory(_privacy=Privacy.PUBLIC, collection=cls.root_collection) cls.superuser = get_user_model().objects.create(username="superuser", is_superuser=True) - def test_private_document(self): + def test_serve_private_document(self): """Test the serve view behaviour for private documents.""" # If not authenticated, permission checks should fail and a Forbidden response returned - response = self.client.get(self.private_document.url) - self.assertEqual(response.status_code, 403) + for is_external_env in [True, False]: + with self.subTest(is_external_env=is_external_env) and override_settings(IS_EXTERNAL_ENV=is_external_env): + response = self.client.get(self.private_document.url) + self.assertEqual(response.status_code, 403) # If authenticated as a superuser, the view should serve the files self.client.force_login(self.superuser) - response = self.client.get(self.private_document.url) - self.assertEqual(response.status_code, 200) + for is_external_env in [True, False]: + with self.subTest(is_external_env=is_external_env) and override_settings(IS_EXTERNAL_ENV=is_external_env): + response = self.client.get(self.private_document.url) + self.assertEqual(response.status_code, 200) - def test_public_document(self): + def test_serve_public_document(self): """Test the serve view behaviour for public documents.""" # For public documents, the serve view should redirect to the file URL. response = self.client.get(self.public_document.url) self.assertEqual(response.status_code, 200) - # This remains the same for public documents, regardless of whether - # document.file_permissions_are_outdated() returns True + @override_settings(IS_EXTERNAL_ENV=True) + def test_serve_public_document_external_env(self): + self.test_serve_public_document() + + def test_serve_public_document_with_outdated_file_permissions(self): + """Test the serve view behaviour for public documents with outdated file permissions.""" self.model.objects.filter(id=self.public_document.id).update(file_permissions_last_set=None) self.public_document.refresh_from_db() self.assertTrue(self.public_document.file_permissions_are_outdated()) response = self.client.get(self.public_document.url) self.assertEqual(response.status_code, 200) - - -@override_settings(IS_EXTERNAL_ENV=True) -class TestExternalEnvDocumentServeView(TestDocumentServeView): - pass diff --git a/cms/private_media/tests/test_images.py b/cms/private_media/tests/test_images.py index d2e41299..b1297340 100644 --- a/cms/private_media/tests/test_images.py +++ b/cms/private_media/tests/test_images.py @@ -335,36 +335,44 @@ def setUpTestData(cls): cls.public_image_renditions = cls.public_image.create_renditions(Filter("fill-10x10"), Filter("fill-20x20")) cls.superuser = get_user_model().objects.create(username="superuser", is_superuser=True) - def test_private_image(self): + def test_serve_private_image(self): """Test the serve view behaviour for private image renditions.""" # If not authenticated, permission checks should fail and a Forbidden response returned for rendition in self.private_image_renditions.values(): - response = self.client.get(rendition.serve_url) - self.assertEqual(response.status_code, 403) + for is_external_env in [True, False]: + with self.subTest(is_external_env=is_external_env) and override_settings( + IS_EXTERNAL_ENV=is_external_env + ): + response = self.client.get(rendition.serve_url) + self.assertEqual(response.status_code, 403) # If authenticated as a superuser, the view should serve the files self.client.force_login(self.superuser) for rendition in self.private_image_renditions.values(): - response = self.client.get(rendition.serve_url) - self.assertEqual(response.status_code, 200) - - def test_public_image(self): + for is_external_env in [True, False]: + with self.subTest(is_external_env=is_external_env) and override_settings( + IS_EXTERNAL_ENV=is_external_env + ): + response = self.client.get(rendition.serve_url) + self.assertEqual(response.status_code, 200) + + def test_serve_public_image(self): """Test the serve view behaviour for public image renditions.""" # For public image renditions, the serve view should redirect to the file URL. for rendition in self.public_image_renditions.values(): response = self.client.get(rendition.serve_url) self.assertEqual(response.status_code, 302) - # That is, unless the image.file_permissions_are_outdated() returns True, - # In which case, the view will continue to serve the file directly + @override_settings(IS_EXTERNAL_ENV=True) + def test_serve_public_image_external_env(self): + """Test the serve view behaviour for public image renditions in an external environment.""" + self.test_serve_public_image() + + def test_serve_public_image_with_outdated_file_permissions(self): + """Test the serve view behaviour for public image renditions with outdated file permissions.""" self.model.objects.filter(id=self.public_image.id).update(file_permissions_last_set=None) self.public_image.refresh_from_db() self.assertTrue(self.public_image.file_permissions_are_outdated()) for rendition in self.public_image_renditions.values(): response = self.client.get(rendition.serve_url) self.assertEqual(response.status_code, 200) - - -@override_settings(IS_EXTERNAL_ENV=True) -class TestExternalEnvImageServeView(TestImageServeView): - pass From a2748a6721f1fd8f418be13ba972a798af82e6bd Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 10 Dec 2024 12:46:22 +0000 Subject: [PATCH 35/63] Update unpublish_media_on_unpublish() to use a subset of 'referencing page ids' to figure out which pages are live --- cms/private_media/signal_handlers.py | 37 ++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/cms/private_media/signal_handlers.py b/cms/private_media/signal_handlers.py index 913c5a0e..362cc3cc 100644 --- a/cms/private_media/signal_handlers.py +++ b/cms/private_media/signal_handlers.py @@ -2,7 +2,7 @@ from django.contrib.contenttypes.models import ContentType from django.core.exceptions import ImproperlyConfigured -from django.db.models import IntegerField +from django.db.models import CharField, IntegerField from django.db.models.functions import Cast from wagtail.models import Page, ReferenceIndex from wagtail.signals import published, unpublished @@ -50,24 +50,47 @@ def unpublish_media_on_unpublish(instance: "Model", **kwargs: Any) -> None: made private again. """ page_ct = ContentType.objects.get_for_model(Page) - draft_page_ids = {str(pk) for pk in Page.objects.filter(live=False).values_list("id", flat=True)} for model_class in get_private_media_models(): model_ct = ContentType.objects.get_for_model(model_class) references = ReferenceIndex.get_references_for_object(instance).filter(to_content_type=model_ct) referenced_pks = set(references.values_list("to_object_id", flat=True).distinct()) if not referenced_pks: continue - referenced_by_other_live_object_pks = set( + + # Identify references from other pages and extract their IDs, so that we can + # figure out which of those pages is live + referencing_page_ids = ( ReferenceIndex.objects.filter(to_content_type=model_ct, to_object_id__in=referenced_pks) .exclude(pk__in=references.values_list("pk", flat=True)) - .exclude(base_content_type=page_ct, object_id__in=draft_page_ids) - .annotate(int_object_id=Cast("to_object_id", output_field=IntegerField())) - .values_list("int_object_id", flat=True) + .filter(base_content_type=page_ct) + .annotate(page_id=Cast("object_id", output_field=IntegerField())) + .values_list("page_id", flat=True) + .distinct() + ) + + # Out of the pages that are referencing the media, identify the ids of those + # that are currently live + live_page_ids = ( + Page.objects.filter(pk__in=referencing_page_ids) + .live() + .annotate(str_id=Cast("id", output_field=CharField())) + .values_list("str_id", flat=True) ) + + # Now we can identify references from live pages only, and + # generate a list of media item ids that should not be made private + live_page_referenced_media_pks = ( + ReferenceIndex.objects.filter(to_content_type=model_ct, to_object_id__in=referenced_pks) + .filter(base_content_type=page_ct, object_id__in=live_page_ids) + .annotate(int_id=Cast("to_object_id", output_field=IntegerField())) + .values_list("int_id", flat=True) + .distinct() + ) + queryset = ( model_class.objects.all() .filter(pk__in=[int(pk) for pk in referenced_pks], _privacy=Privacy.PUBLIC) - .exclude(pk__in=referenced_by_other_live_object_pks) + .exclude(pk__in=live_page_referenced_media_pks) ) if issubclass(model_class, PrivateImageMixin): queryset = queryset.prefetch_related("renditions") From f13bb20415a98d8a4d0b716f132a51ec0279781c Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 10 Dec 2024 16:13:58 +0000 Subject: [PATCH 36/63] Revise the docs --- docs/custom-features/media_privacy.md | 87 ++++++++++++++++++++++----- 1 file changed, 71 insertions(+), 16 deletions(-) diff --git a/docs/custom-features/media_privacy.md b/docs/custom-features/media_privacy.md index 643014b0..9ba41f13 100644 --- a/docs/custom-features/media_privacy.md +++ b/docs/custom-features/media_privacy.md @@ -1,36 +1,91 @@ # Media privacy This project uses customised versions of Wagtail's `Image` and `Document` models to store and manage the -privacy of uploaded media files. +privacy of uploaded media files, so that potentially sensitive media isn't leaked to the public before it's ready. -1. The privacy of an image or document cannot be changed manually via the Wagtail UI. -2. Images and documents are private by default, and will only be served to authenticated users with some level of permission on the media item whilst they have this privacy status (so previewing from Wagtail should still work as expected). -3. Where the storage backend supports it, files for private images and documents (and any renditions created from the original image) will have their access permissions updated at the storage level, preventing direct file access. -4. Images and documents only become public when they are referenced by blocks in page content, and that page is published. -5. When a page is unpublished, any public images or documents referenced solely by that page are made private again (references from other draft pages are ignored, as the media can easily be made public again when the draft page is published). +## The basic premise -## Use of Wagtail's reference index +1. Images and documents are 'private' by default, and will only be served to authenticated users with some level of permission on the media item. +2. They only become 'public' when a page referencing them is published. +3. When a live page unpublished, any images or documents referenced soley by that page (i.e. not used on any other live pages) are made private again. -Wagtail's [reference index](https://docs.wagtail.org/en/stable/advanced_topics/reference_index.html) is used to track when images and documents (and other objects) are referenced by pages and other objects. +## Setting of file-level permissions -The reference index is automatically populated via handlers connected to the `post_save` signal for all registered models. +In environments where storage service supports it (e.g. hosted environments using a S3 for media storage), attempts are made to set file-level permissions to reflect the privacy of the media item. This happens automatically when an image, rendition, or document is saved for the first time, and also when its privacy is altered as the result of another action (e.g. a referencing page being published or unpublished). -When publishing or unpublishing a page (or other type of 'publishable' object), object-level changes are saved before any `published`, `page_published`, `unpublished` or `page_unpublished` signals are emitted. So, as long as we use these signals to review the privacy -of referenced media, we can be sure that the reference index will be up-to-date. +The responsibility of setting file-level permissions in hosted environments falls to the `cms.private_media.storages.PrivacySettingS3Storage` class, which implements the `make_public()` and `make_private()` methods. + +In local dev environments, the permission-setting attempts themselves are skipped, but log entries are generated with the level 'INFO' in place of each attempt, so you can get a sense of what would have happen in a hosted environment, for example: + +``` +INFO:cms.private_media.bulk_operations:Skipping file permission setting for: /media/images/2024/12/09/image.jpg +``` + +Whenever the privacy of a media item is altered, it's `privacy_last_changed` timestamp field is updated and saved to the database alongside other changes. +If no errors occurred during the file-updating process, the media items' `file_permissions_last_set` timestamp will also be updated and saved. Because the file-permission setting happens later in the update process, the `file_permissions_last_set` timestamp should always be greater than the `privacy_last_changed` value when no errors occurred. + +### Performance considerations -## Handling of file permission setting errors +Because media item privacy is often updated in-bulk (e.g. making all images referenced by a page public when it's published), and each media item can have multiple files associated with it (e.g. multiple renditions of an image, each with it's own file), file-level permission setting has to be as performant as possible, and not choke if an errors occurs. + +Since media-hosting services don't provide a way to set file-level permissions in bulk, the best we can do is to make sure the individual requests don't block each other, so we use a `ThreadPoolExecutor` to run them in parallel. + +If an error occurred whilst attempting to get or set file-level permissions, the error is logged, but processing continues with the next item in the sequence. + +### Tracking of unsuccesful permission-setting attempts Whilst file-permission setting requests are quite reliable in S3, they can fail occasionally (as with any web service). -Failed attempts are tracked using a `file_permissions_last_set` timestamp for each media item, which is only updated if all update requests for the media item were successful. +Because a media item's `file_permissions_last_set` timestamp is only updated when all file-permission setting attempts were successful, for media items with outdated file permissions, the `file_permissions_last_set` timestamp will trail behind the `privacy_last_changed` value. + +For an individual object, the `file_permissions_are_outdated()` method will return `True` if the `file_permissions_last_set` timestamp is less than the `privacy_last_changed` value. This is used in a few places to vary the behaviour. Specifically, the `href` value for images will continue to point to the media serve view, so that it can still be served to users, and the serve view checks it before redirecting users to the direct file URL. -The `retry_file_permission_set_attempts` management command seeks out any media items with an outdated timestamp and attempts to update the files again. +The timestamp values can also be used to identify affected media items in bulk using the Django ORM, and reattempt the permission-setting process. This is the approach taken by the `retry_file_permission_set_attempts` management command, which runs regularly (every 5 minutes) in hosted environments to help keep file permissions up-to-date. -The custom `PrivacySettingS3Storage` class logs failed file-setting attempts using the 'WARNING' log level, so should be available to review in most environments should issues occur. +## Use of Wagtail's reference index + +Wagtail's [reference index](https://docs.wagtail.org/en/stable/advanced_topics/reference_index.html) is used to track when images and documents (and other objects) are referenced by pages and other objects. + +The reference index is automatically populated via handlers connected to the `post_save` signal for all registered models. + +When publishing or unpublishing a page (or other type of 'publishable' object), object-level changes are saved before any `published`, or `unpublished` or signals are emitted. So, as long as we use these signals to review the privacy +of referenced media, we can be sure that the reference index will be up-to-date. ## Compatibility with upstream caching Because serve view responses and direct file urls can be cached, it's important that when the privacy of a media item changes, the cache is invalidated for any affected URLs. In environments where Wagtail's front-end-cache app is configured, all relevant URLs should be collected into a batch and sent to the relevant backend to initiate purging. -NOTE: Image serve view responses are not cached, so don't need to be invalidated. +## Adding private media support to a new model + +To add private media support to a new model, simply include the `PrivateMediaMixin` mixin in the model's parent classes, and then implement a `get_privacy_controlled_files()` method which returns the values of all of the file field values you wish to protect. + +For example, if you had a `Chart` snippet model with `thumbnail_image` and `data_csv` fields that you wanted to protect, the final class definition might look like this: + +```python + +from typing import TYPE_CHECKING, Iterator +from django.db import models +from wagtail.snippets.models import register_snippet + +from cms.private_media.models.images import PrivateMediaMixin + + +if TYPE_CHECKING: + from django.db.models.fields.files import FieldFile + + +@register_snippet +class Chart(PrivateMediaMixin, models.Model): + ... + thumbnail_image = models.ImageField() + data_csv = models.FileField() + + def get_privacy_controlled_files(self) -> Iterator["FieldFile"]: + """Return an iterator of the files that should be protected.""" + yield self.thumbnail_image, yield self.data_csv +``` + +Depending on the type of model, it might also be necessary to [register the model with Wagtail's reference index](https://docs.wagtail.org/en/v6.3.1/advanced_topics/reference_index.html) (Though, this shouldn't be neccessary for snippets). + +The default model manager inherited from `PrivateMediaMixin` implements all of the methods required for keeping privacy up-to-date, and the existing signal handlers that listen for content changes will start looking for references to your model instances, and trigger privacy updates accordingly. From 22e3f8aa0f35f84ecb28fb092dd4468dea3c13b6 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 10 Dec 2024 16:21:00 +0000 Subject: [PATCH 37/63] Reference integration tests in docs --- docs/custom-features/media_privacy.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/custom-features/media_privacy.md b/docs/custom-features/media_privacy.md index 9ba41f13..d8eecf87 100644 --- a/docs/custom-features/media_privacy.md +++ b/docs/custom-features/media_privacy.md @@ -9,6 +9,8 @@ privacy of uploaded media files, so that potentially sensitive media isn't leake 2. They only become 'public' when a page referencing them is published. 3. When a live page unpublished, any images or documents referenced soley by that page (i.e. not used on any other live pages) are made private again. +These cases (and more) are covered by integration tests in [cms/private_media/tests/test_signal_handlers.py](https://github.com/ONSdigital/dis-wagtail/tree/main/cms/private_media/tests/test_signal_handlers.py). + ## Setting of file-level permissions In environments where storage service supports it (e.g. hosted environments using a S3 for media storage), attempts are made to set file-level permissions to reflect the privacy of the media item. This happens automatically when an image, rendition, or document is saved for the first time, and also when its privacy is altered as the result of another action (e.g. a referencing page being published or unpublished). From bef76b40fb8ecfb15cecc3c59bfaf46f98610758 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 10 Dec 2024 16:21:50 +0000 Subject: [PATCH 38/63] Fix typos --- docs/custom-features/media_privacy.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/custom-features/media_privacy.md b/docs/custom-features/media_privacy.md index d8eecf87..e2c97bbe 100644 --- a/docs/custom-features/media_privacy.md +++ b/docs/custom-features/media_privacy.md @@ -13,7 +13,7 @@ These cases (and more) are covered by integration tests in [cms/private_media/te ## Setting of file-level permissions -In environments where storage service supports it (e.g. hosted environments using a S3 for media storage), attempts are made to set file-level permissions to reflect the privacy of the media item. This happens automatically when an image, rendition, or document is saved for the first time, and also when its privacy is altered as the result of another action (e.g. a referencing page being published or unpublished). +In environments where the storage service supports it (e.g. hosted environments using S3 for media storage), attempts are made to set file-level permissions to reflect the privacy of the media item. This happens automatically when an image, rendition, or document is saved for the first time, and also when its privacy is altered as the result of another action (e.g. a referencing page being published or unpublished). The responsibility of setting file-level permissions in hosted environments falls to the `cms.private_media.storages.PrivacySettingS3Storage` class, which implements the `make_public()` and `make_private()` methods. From a42a9c55281459c697f435dd5644ff616513ad6f Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 10 Dec 2024 16:26:03 +0000 Subject: [PATCH 39/63] Fix another docs typo --- docs/custom-features/media_privacy.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/custom-features/media_privacy.md b/docs/custom-features/media_privacy.md index e2c97bbe..8ac5642f 100644 --- a/docs/custom-features/media_privacy.md +++ b/docs/custom-features/media_privacy.md @@ -17,7 +17,7 @@ In environments where the storage service supports it (e.g. hosted environments The responsibility of setting file-level permissions in hosted environments falls to the `cms.private_media.storages.PrivacySettingS3Storage` class, which implements the `make_public()` and `make_private()` methods. -In local dev environments, the permission-setting attempts themselves are skipped, but log entries are generated with the level 'INFO' in place of each attempt, so you can get a sense of what would have happen in a hosted environment, for example: +In local dev environments, the permission-setting attempts themselves are skipped, but log entries are generated with the level 'INFO' in place of each attempt, so you can get a sense of what would happen in a hosted environment, for example: ``` INFO:cms.private_media.bulk_operations:Skipping file permission setting for: /media/images/2024/12/09/image.jpg From c16b886caf8858fbe67a5dd859467a5be190ca1e Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 10 Dec 2024 16:33:59 +0000 Subject: [PATCH 40/63] One last round of typo fixes --- docs/custom-features/media_privacy.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/custom-features/media_privacy.md b/docs/custom-features/media_privacy.md index 8ac5642f..442dfa9b 100644 --- a/docs/custom-features/media_privacy.md +++ b/docs/custom-features/media_privacy.md @@ -24,15 +24,15 @@ INFO:cms.private_media.bulk_operations:Skipping file permission setting for: /me ``` Whenever the privacy of a media item is altered, it's `privacy_last_changed` timestamp field is updated and saved to the database alongside other changes. -If no errors occurred during the file-updating process, the media items' `file_permissions_last_set` timestamp will also be updated and saved. Because the file-permission setting happens later in the update process, the `file_permissions_last_set` timestamp should always be greater than the `privacy_last_changed` value when no errors occurred. +If no errors occurred during the file-updating process, the media item's `file_permissions_last_set` timestamp will also be updated and saved. Because the file-permission setting happens later in the update process, the `file_permissions_last_set` timestamp should always be greater than the `privacy_last_changed` value when no errors occurred. ### Performance considerations -Because media item privacy is often updated in-bulk (e.g. making all images referenced by a page public when it's published), and each media item can have multiple files associated with it (e.g. multiple renditions of an image, each with it's own file), file-level permission setting has to be as performant as possible, and not choke if an errors occurs. +Because media item privacy is often updated in-bulk (e.g. making all images referenced by a page public when it's published), and each media item can have multiple files associated with it (e.g. multiple renditions of an image, each with it's own file), file-level permission setting has to be as performant as possible, and not choke if errors occur. -Since media-hosting services don't provide a way to set file-level permissions in bulk, the best we can do is to make sure the individual requests don't block each other, so we use a `ThreadPoolExecutor` to run them in parallel. +Since media-hosting services don't provide a way to set file-level permissions in bulk, the best we can do is to make sure the individual requests block the process as little as possible, so we use a `ThreadPoolExecutor` to run them in parallel. -If an error occurred whilst attempting to get or set file-level permissions, the error is logged, but processing continues with the next item in the sequence. +If an error occurred whilst attempting to get or set the permissions for a particular file, the error is logged, but processing continues with the next item in the sequence. ### Tracking of unsuccesful permission-setting attempts @@ -40,13 +40,13 @@ Whilst file-permission setting requests are quite reliable in S3, they can fail Because a media item's `file_permissions_last_set` timestamp is only updated when all file-permission setting attempts were successful, for media items with outdated file permissions, the `file_permissions_last_set` timestamp will trail behind the `privacy_last_changed` value. -For an individual object, the `file_permissions_are_outdated()` method will return `True` if the `file_permissions_last_set` timestamp is less than the `privacy_last_changed` value. This is used in a few places to vary the behaviour. Specifically, the `href` value for images will continue to point to the media serve view, so that it can still be served to users, and the serve view checks it before redirecting users to the direct file URL. +For an individual object, the `file_permissions_are_outdated()` method will return `True` if the `file_permissions_last_set` timestamp is eariler than `privacy_last_changed`. This is used in a few places to vary the behaviour. Specifically, the `href` value for images will continue to point to the media serve view, so that it can still be served to users. The image serve view also checks it before redirecting users to the direct file URL. -The timestamp values can also be used to identify affected media items in bulk using the Django ORM, and reattempt the permission-setting process. This is the approach taken by the `retry_file_permission_set_attempts` management command, which runs regularly (every 5 minutes) in hosted environments to help keep file permissions up-to-date. +The timestamp values can also be used to identify affected media items in bulk. This is the approach taken by the `retry_file_permission_set_attempts` management command, which runs regularly (every 5 minutes) in hosted environments to help keep file permissions up-to-date. It uses the same bulk-processing logic as the signal handlers, so will retry failed requests, mark any items that are successfully updated, and log any errors that occured. ## Use of Wagtail's reference index -Wagtail's [reference index](https://docs.wagtail.org/en/stable/advanced_topics/reference_index.html) is used to track when images and documents (and other objects) are referenced by pages and other objects. +Wagtail's [reference index](https://docs.wagtail.org/en/stable/advanced_topics/reference_index.html) is used to track when images and documents (and other objects) are referenced by pages (and other objects). The reference index is automatically populated via handlers connected to the `post_save` signal for all registered models. From fe260b895771d712be3428f0b649037520160e6c Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 10 Dec 2024 16:47:27 +0000 Subject: [PATCH 41/63] Add notes on how private and public media items behave --- docs/custom-features/media_privacy.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/custom-features/media_privacy.md b/docs/custom-features/media_privacy.md index 442dfa9b..3f2139c0 100644 --- a/docs/custom-features/media_privacy.md +++ b/docs/custom-features/media_privacy.md @@ -9,6 +9,10 @@ privacy of uploaded media files, so that potentially sensitive media isn't leake 2. They only become 'public' when a page referencing them is published. 3. When a live page unpublished, any images or documents referenced soley by that page (i.e. not used on any other live pages) are made private again. +When a media item is 'private', file requests are handled by views that check the permissions of the requesting user, and only serve the file if the user has some level of permission on the media item (to support previews and reviews from other editors). + +When a media item is 'public', permission-checking in serve views is skipped, and the user is either redirected to the media storage URL (in the case of images), or served the file directly (in the case of documents). + These cases (and more) are covered by integration tests in [cms/private_media/tests/test_signal_handlers.py](https://github.com/ONSdigital/dis-wagtail/tree/main/cms/private_media/tests/test_signal_handlers.py). ## Setting of file-level permissions From 593af114e102734976ac4f69ff78ae026616a3e1 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Tue, 10 Dec 2024 23:04:57 +0000 Subject: [PATCH 42/63] Remove excess whitespace --- docs/custom-features/media_privacy.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/custom-features/media_privacy.md b/docs/custom-features/media_privacy.md index 3f2139c0..3f623f17 100644 --- a/docs/custom-features/media_privacy.md +++ b/docs/custom-features/media_privacy.md @@ -21,7 +21,7 @@ In environments where the storage service supports it (e.g. hosted environments The responsibility of setting file-level permissions in hosted environments falls to the `cms.private_media.storages.PrivacySettingS3Storage` class, which implements the `make_public()` and `make_private()` methods. -In local dev environments, the permission-setting attempts themselves are skipped, but log entries are generated with the level 'INFO' in place of each attempt, so you can get a sense of what would happen in a hosted environment, for example: +In local dev environments, the permission-setting attempts themselves are skipped, but log entries are generated with the level 'INFO' in place of each attempt, so you can get a sense of what would happen in a hosted environment, for example: ``` INFO:cms.private_media.bulk_operations:Skipping file permission setting for: /media/images/2024/12/09/image.jpg From ce09c50e132ff6f6c7be39ca3af93326ee60db9d Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Fri, 13 Dec 2024 12:26:13 +0000 Subject: [PATCH 43/63] PRIVATE_MEDIA_PERMISSION_SETTING_MAX_WORKERS -> PRIVATE_MEDIA_BULK_UPDATE_MAX_WORKERS --- cms/private_media/bulk_operations.py | 2 +- cms/settings/base.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cms/private_media/bulk_operations.py b/cms/private_media/bulk_operations.py index 6ffb47e8..09204a46 100644 --- a/cms/private_media/bulk_operations.py +++ b/cms/private_media/bulk_operations.py @@ -50,7 +50,7 @@ def set_file_permission_and_report(file: "FieldFile") -> None: results[file] = handler(file) with concurrent.futures.ThreadPoolExecutor( - max_workers=int(settings.PRIVATE_MEDIA_PERMISSION_SETTING_MAX_WORKERS) + max_workers=int(settings.PRIVATE_MEDIA_BULK_UPDATE_MAX_WORKERS) ) as executor: executor.map(set_file_permission_and_report, files) diff --git a/cms/settings/base.py b/cms/settings/base.py index 73bc6d5c..a0041f37 100644 --- a/cms/settings/base.py +++ b/cms/settings/base.py @@ -431,7 +431,7 @@ # https://github.com/jschneier/django-storages/blob/10d1929de5e0318dbd63d715db4bebc9a42257b5/storages/backends/s3boto3.py#L217 AWS_S3_URL_PROTOCOL = env.get("AWS_S3_URL_PROTOCOL", "https:") -PRIVATE_MEDIA_PERMISSION_SETTING_MAX_WORKERS = env.get("PRIVATE_MEDIA_PERMISSION_SETTING_MAX_WORKERS", 5) +PRIVATE_MEDIA_BULK_UPDATE_MAX_WORKERS = env.get("PRIVATE_MEDIA_BULK_UPDATE_MAX_WORKERS", 5) # Logging # This logging is configured to be used with Sentry and console logs. Console From 16a05fb0c4f38f0b846f84dbf8de1284e6da094e Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Fri, 13 Dec 2024 12:32:51 +0000 Subject: [PATCH 44/63] Raise an exception when the active storage backends does not implement the necessary methods --- cms/private_media/bulk_operations.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/cms/private_media/bulk_operations.py b/cms/private_media/bulk_operations.py index 09204a46..eb783b7f 100644 --- a/cms/private_media/bulk_operations.py +++ b/cms/private_media/bulk_operations.py @@ -4,6 +4,7 @@ from typing import TYPE_CHECKING from django.conf import settings +from django.core.exceptions import ImproperlyConfigured from cms.private_media.constants import Privacy @@ -39,13 +40,11 @@ def set_file_permission_and_report(file: "FieldFile") -> None: handler = getattr(storage, "make_public", None) if handler is None: - logger.info( - "%s does not support setting of individual file permissions to %s, so skipping for: %s.", + raise ImproperlyConfigured( + "%s does not implement the make_private() or make_public() methods, which is a requirement " + "for bulk-setting file permissions.", storage.__class__.__name__, - intended_privacy, - file.name, ) - results[file] = True else: results[file] = handler(file) From 60fdb1593874fa6f8711694c777e52aec9557d24 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Fri, 13 Dec 2024 12:35:16 +0000 Subject: [PATCH 45/63] Rename PrivacySettingS3Storage to AccessControlledS3Storage and implement a version of FileSystemStorage to use in local dev (and by extension, tests) --- cms/private_media/storages.py | 26 +++++++++++++++++++++----- cms/settings/base.py | 4 ++-- docs/custom-features/media_privacy.md | 4 ++-- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/cms/private_media/storages.py b/cms/private_media/storages.py index bf871142..e496fbe4 100644 --- a/cms/private_media/storages.py +++ b/cms/private_media/storages.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING from botocore.exceptions import ClientError -from django.core.files.storage import InMemoryStorage +from django.core.files.storage import FileSystemStorage, InMemoryStorage from storages.backends.s3 import S3Storage if TYPE_CHECKING: @@ -12,7 +12,7 @@ logger = logging.getLogger(__name__) -class PrivacySettingS3Storage(S3Storage): +class AccessControlledS3Storage(S3Storage): # pylint: disable=abstract-method private_acl_name = "private" public_acl_name = "public-read" @@ -42,13 +42,29 @@ def _set_file_acl(self, file: "FieldFile", acl_name: str) -> bool: return True -class DummyPrivacySettingStorage(InMemoryStorage): - """Dummy storage class for use in tests.""" - +class AccessControlLoggingMixin: + """ + A mixin for storage backends that do not support setting of individual file permissions. + """ def make_private(self, file: "FieldFile") -> bool: # pylint: disable=unused-argument """Pretend to make the provided file private.""" + logger.info( + "%s does not support setting of individual file permissions to private, so skipping for: %s.", + self.__class__.__name__, + file.name, + ) return True def make_public(self, file: "FieldFile") -> bool: # pylint: disable=unused-argument """Pretend to make the provided file public.""" + logger.info( + "%s does not support setting of individual file permissions to public, so skipping for: %s.", + self.__class__.__name__, + file.name, + ) return True + + +class AccessControlLoggingFileSystemStorage(AccessControlLoggingMixin, FileSystemStorage): + """A version of Django's `FileSystemStorage` backend for local development and tests, which logs + file-permission-setting requests, and always reports success.""" diff --git a/cms/settings/base.py b/cms/settings/base.py index a0041f37..a506da4f 100644 --- a/cms/settings/base.py +++ b/cms/settings/base.py @@ -331,7 +331,7 @@ # http://whitenoise.evans.io/en/stable/#quickstart-for-django-apps # https://docs.djangoproject.com/en/stable/ref/settings/#std-setting-STORAGES STORAGES = { - "default": {"BACKEND": "django.core.files.storage.FileSystemStorage"}, + "default": {"BACKEND": "cms.private_media.storages.AccessControlLoggingFileSystemStorage"}, "staticfiles": {"BACKEND": "whitenoise.storage.CompressedManifestStaticFilesStorage"}, } @@ -393,7 +393,7 @@ INSTALLED_APPS += ["storages", "wagtail_storages"] # https://docs.djangoproject.com/en/stable/ref/settings/#std-setting-STORAGES - STORAGES["default"]["BACKEND"] = "cms.private_media.storages.PrivacySettingS3Storage" + STORAGES["default"]["BACKEND"] = "cms.private_media.storages.AccessControlledS3Storage" AWS_STORAGE_BUCKET_NAME = env["AWS_STORAGE_BUCKET_NAME"] diff --git a/docs/custom-features/media_privacy.md b/docs/custom-features/media_privacy.md index 3f623f17..a83aa00b 100644 --- a/docs/custom-features/media_privacy.md +++ b/docs/custom-features/media_privacy.md @@ -19,12 +19,12 @@ These cases (and more) are covered by integration tests in [cms/private_media/te In environments where the storage service supports it (e.g. hosted environments using S3 for media storage), attempts are made to set file-level permissions to reflect the privacy of the media item. This happens automatically when an image, rendition, or document is saved for the first time, and also when its privacy is altered as the result of another action (e.g. a referencing page being published or unpublished). -The responsibility of setting file-level permissions in hosted environments falls to the `cms.private_media.storages.PrivacySettingS3Storage` class, which implements the `make_public()` and `make_private()` methods. +The responsibility of setting file-level permissions in hosted environments falls to the `cms.private_media.storages.AccessControlledS3Storage` class, which implements the `make_public()` and `make_private()` methods. In local dev environments, the permission-setting attempts themselves are skipped, but log entries are generated with the level 'INFO' in place of each attempt, so you can get a sense of what would happen in a hosted environment, for example: ``` -INFO:cms.private_media.bulk_operations:Skipping file permission setting for: /media/images/2024/12/09/image.jpg +INFO:cms.private_media.storages:Skipping file permission setting for: /media/images/2024/12/09/image.jpg ``` Whenever the privacy of a media item is altered, it's `privacy_last_changed` timestamp field is updated and saved to the database alongside other changes. From 2f5dd307dca760eddd8415c0f60d0a041d51b2e9 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Fri, 13 Dec 2024 12:35:42 +0000 Subject: [PATCH 46/63] Update tests to account for logging location and storage class name changes --- cms/private_media/tests/test_documents.py | 18 +++++++++--------- cms/private_media/tests/test_images.py | 16 ++++++++-------- cms/private_media/tests/test_storages.py | 6 +++--- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/cms/private_media/tests/test_documents.py b/cms/private_media/tests/test_documents.py index a8ce9cec..2864cf9f 100644 --- a/cms/private_media/tests/test_documents.py +++ b/cms/private_media/tests/test_documents.py @@ -35,7 +35,7 @@ def test_private_document(self): - Ensure file permission updates are handled correctly. - Check file permission outdated status. """ - with self.assertLogs("cms.private_media.bulk_operations", level="INFO") as logs: + with self.assertLogs("cms.private_media.storages", level="INFO") as logs: document = DocumentFactory(collection=self.root_collection) # Documents should be 'private' by default @@ -49,7 +49,7 @@ def test_private_document(self): logs.output, [ ( - "INFO:cms.private_media.bulk_operations:FileSystemStorage does not support setting of individual " + "INFO:cms.private_media.storages:AccessControlLoggingFileSystemStorage does not support setting of individual " f"file permissions to private, so skipping for: {document.file.name}." ) ], @@ -82,7 +82,7 @@ def test_public_document(self): - Ensure file permission updates are handled correctly. - Check file permission outdated status. """ - with self.assertLogs("cms.private_media.bulk_operations", level="INFO") as logs: + with self.assertLogs("cms.private_media.storages", level="INFO") as logs: document = DocumentFactory(_privacy=Privacy.PUBLIC, collection=self.root_collection) # This document should be 'public' @@ -96,7 +96,7 @@ def test_public_document(self): logs.output, [ ( - "INFO:cms.private_media.bulk_operations:FileSystemStorage does not support setting of individual " + "INFO:cms.private_media.storages:AccessControlLoggingFileSystemStorage does not support setting of individual " f"file permissions to public, so skipping for: {document.file.name}." ) ], @@ -170,9 +170,9 @@ def domain_prefixed_url(name: str) -> str: ], ) - @override_settings(STORAGES={"default": {"BACKEND": "cms.private_media.storages.DummyPrivacySettingStorage"}}) + @override_settings(STORAGES={"default": {"BACKEND": "cms.private_media.storages.AccessControlLoggingFileSystemStorage"}}) def test_file_permission_setting_success(self): - """Test successful file permission setting using DummyPrivacySettingStorage: + """Test successful file permission setting using AccessControlLoggingFileSystemStorage: - Verify permissions are set correctly for both public and private documents. - Ensure no debug logs are generated during successful operation. """ @@ -183,13 +183,13 @@ def test_file_permission_setting_success(self): self.assertFalse(private_document.file_permissions_are_outdated()) self.assertFalse(public_document.file_permissions_are_outdated()) - @override_settings(STORAGES={"default": {"BACKEND": "cms.private_media.storages.DummyPrivacySettingStorage"}}) + @override_settings(STORAGES={"default": {"BACKEND": "cms.private_media.storages.AccessControlLoggingFileSystemStorage"}}) @mock.patch( - "cms.private_media.storages.DummyPrivacySettingStorage.make_private", + "cms.private_media.storages.AccessControlLoggingFileSystemStorage.make_private", return_value=False, ) @mock.patch( - "cms.private_media.storages.DummyPrivacySettingStorage.make_public", + "cms.private_media.storages.AccessControlLoggingFileSystemStorage.make_public", return_value=False, ) def test_file_permission_setting_failure(self, mock_make_public, mock_make_private): diff --git a/cms/private_media/tests/test_images.py b/cms/private_media/tests/test_images.py index b1297340..483e3a5d 100644 --- a/cms/private_media/tests/test_images.py +++ b/cms/private_media/tests/test_images.py @@ -41,7 +41,7 @@ def test_private_image(self): """ # Attempts to set file permissions on save should have failed gracefully, # since the default file backend doesn't support it - with self.assertLogs("cms.private_media.bulk_operations", level="INFO") as logs: + with self.assertLogs("cms.private_media.storages", level="INFO") as logs: image = ImageFactory(collection=self.root_collection) # Images should be 'private' by default @@ -54,7 +54,7 @@ def test_private_image(self): logs.output, [ ( - "INFO:cms.private_media.bulk_operations:FileSystemStorage does not support setting of individual " + "INFO:cms.private_media.storages:AccessControlLoggingFileSystemStorage does not support setting of individual " f"file permissions to private, so skipping for: {image.file.name}." ) ], @@ -101,7 +101,7 @@ def test_public_image(self): """ # Attempts to set file permissions on save should have failed gracefully, # since the default file backend doesn't support it - with self.assertLogs("cms.private_media.bulk_operations", level="INFO") as logs: + with self.assertLogs("cms.private_media.storages", level="INFO") as logs: image = ImageFactory(_privacy=Privacy.PUBLIC) # This image should be 'public' @@ -114,7 +114,7 @@ def test_public_image(self): logs.output, [ ( - "INFO:cms.private_media.bulk_operations:FileSystemStorage does not support setting of individual " + "INFO:cms.private_media.storages:AccessControlLoggingFileSystemStorage does not support setting of individual " f"file permissions to public, so skipping for: {image.file.name}." ) ], @@ -210,7 +210,7 @@ def test_invalid_privacy_value_raises_value_error(self): with self.assertRaises(ValueError): ImageFactory(_privacy="invalid", collection=self.root_collection) - @override_settings(STORAGES={"default": {"BACKEND": "cms.private_media.storages.DummyPrivacySettingStorage"}}) + @override_settings(STORAGES={"default": {"BACKEND": "cms.private_media.storages.AccessControlLoggingFileSystemStorage"}}) def test_file_permission_setting_success(self): """Test successful file permission setting using a storage backend that supports it.""" with self.assertNoLogs("cms.private_media.bulk_operations"): @@ -220,13 +220,13 @@ def test_file_permission_setting_success(self): self.assertFalse(private_image.file_permissions_are_outdated()) self.assertFalse(public_image.file_permissions_are_outdated()) - @override_settings(STORAGES={"default": {"BACKEND": "cms.private_media.storages.DummyPrivacySettingStorage"}}) + @override_settings(STORAGES={"default": {"BACKEND": "cms.private_media.storages.AccessControlLoggingFileSystemStorage"}}) @mock.patch( - "cms.private_media.storages.DummyPrivacySettingStorage.make_private", + "cms.private_media.storages.AccessControlLoggingFileSystemStorage.make_private", return_value=False, ) @mock.patch( - "cms.private_media.storages.DummyPrivacySettingStorage.make_public", + "cms.private_media.storages.AccessControlLoggingFileSystemStorage.make_public", return_value=False, ) def test_file_permission_setting_failure(self, mock_make_public, mock_make_private): diff --git a/cms/private_media/tests/test_storages.py b/cms/private_media/tests/test_storages.py index 2dee8a41..08ed9084 100644 --- a/cms/private_media/tests/test_storages.py +++ b/cms/private_media/tests/test_storages.py @@ -6,19 +6,19 @@ from moto import mock_aws from wagtail_factories import DocumentFactory -from cms.private_media.storages import PrivacySettingS3Storage +from cms.private_media.storages import AccessControlledS3Storage @mock_aws @override_settings(AWS_STORAGE_BUCKET_NAME="test-bucket") -class PrivacySettingS3StorageTests(TestCase): +class AccessControlledS3StorageTests(TestCase): @classmethod def setUpTestData(cls): cls.document = DocumentFactory() def setUp(self): super().setUp() - self.storage = PrivacySettingS3Storage() + self.storage = AccessControlledS3Storage() # Create a bucket to allow '_set_file_acl' to at least reach the 'put' stage bucket = self.storage.connection.Bucket(settings.AWS_STORAGE_BUCKET_NAME) bucket.create() From dee11931d65f9207e1cc9d3b17b0e7a842db7df3 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Fri, 13 Dec 2024 12:46:12 +0000 Subject: [PATCH 47/63] intended_privacy -> privacy --- cms/private_media/bulk_operations.py | 8 ++++---- cms/private_media/managers.py | 18 +++++++++--------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/cms/private_media/bulk_operations.py b/cms/private_media/bulk_operations.py index eb783b7f..599e9c3b 100644 --- a/cms/private_media/bulk_operations.py +++ b/cms/private_media/bulk_operations.py @@ -15,7 +15,7 @@ logger = logging.getLogger(__name__) -def bulk_set_file_permissions(files: Iterable["FieldFile"], intended_privacy: Privacy) -> dict["FieldFile", bool]: +def bulk_set_file_permissions(files: Iterable["FieldFile"], privacy: Privacy) -> dict["FieldFile", bool]: """Set file permissions for an iterable of FieldFile objects, using the make_private() or make_public() methods of the storage backend. @@ -23,7 +23,7 @@ def bulk_set_file_permissions(files: Iterable["FieldFile"], intended_privacy: Pr Args: files: An iterable of FieldFile objects to set permissions for - intended_privacy: The intended privacy status for the supplied files + privacy: The intended privacy status for the supplied files Returns: dict[FieldFile, bool]: A mapping of files to a boolean indicating whether @@ -34,9 +34,9 @@ def bulk_set_file_permissions(files: Iterable["FieldFile"], intended_privacy: Pr def set_file_permission_and_report(file: "FieldFile") -> None: storage = file.storage handler: Callable[[FieldFile], bool] | None - if intended_privacy is Privacy.PRIVATE: + if privacy is Privacy.PRIVATE: handler = getattr(storage, "make_private", None) - elif intended_privacy is Privacy.PUBLIC: + elif privacy is Privacy.PUBLIC: handler = getattr(storage, "make_public", None) if handler is None: diff --git a/cms/private_media/managers.py b/cms/private_media/managers.py index de3b2076..cf000ef1 100644 --- a/cms/private_media/managers.py +++ b/cms/private_media/managers.py @@ -26,20 +26,20 @@ class PrivateMediaModelManager(models.Manager): time. """ - def bulk_set_privacy(self, objects: Iterable["PrivateMediaMixin"], intended_privacy: Privacy) -> int: - """Update an iterable of objects of this type to reflect the 'intended_privacy' + def bulk_set_privacy(self, objects: Iterable["PrivateMediaMixin"], privacy: Privacy) -> int: + """Update an iterable of objects of this type to reflect the 'privacy' as efficiently as possible. Returns the number of objects that were actually updated in the process. """ to_update = [] for obj in objects: - if obj.privacy != intended_privacy: - obj.privacy = intended_privacy + if obj.privacy != privacy: + obj.privacy = privacy to_update.append(obj) if not to_update: return 0 - to_update = self.bulk_set_file_permissions(to_update, intended_privacy, save_changes=False) + to_update = self.bulk_set_file_permissions(to_update, privacy, save_changes=False) count = self.bulk_update( to_update, # type: ignore[arg-type] @@ -72,10 +72,10 @@ def bulk_make_private(self, objects: Iterable["PrivateMediaMixin"]) -> int: return self.bulk_set_privacy(objects, Privacy.PRIVATE) def bulk_set_file_permissions( - self, objects: Iterable["PrivateMediaMixin"], intended_privacy: Privacy, *, save_changes: bool = False + self, objects: Iterable["PrivateMediaMixin"], privacy: Privacy, *, save_changes: bool = False ) -> list["PrivateMediaMixin"]: - """For an itrerable of objects of this type, set the file permissions for all - related files to reflect `intended_privacy`. Returns a list of the provided objects, + """For an iterable of objects of this type, set the file permissions for all + related files to reflect `privacy`. Returns a list of the provided objects, with their `file_permissions_last_set` datetime updated if all related files were successfully updated. """ @@ -88,7 +88,7 @@ def bulk_set_file_permissions( all_files.append(file) files_by_object[obj].append(file) - results = bulk_set_file_permissions(all_files, intended_privacy) + results = bulk_set_file_permissions(all_files, privacy) now = timezone.now() for obj in objects: From 50f131d6d0b5a3c8ff9d19961e543bcc2c917b2b Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Fri, 13 Dec 2024 12:49:48 +0000 Subject: [PATCH 48/63] Improve readability in PrivateMediaMixin.save() --- cms/private_media/models/mixins.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cms/private_media/models/mixins.py b/cms/private_media/models/mixins.py index 37c20107..adf61a0b 100644 --- a/cms/private_media/models/mixins.py +++ b/cms/private_media/models/mixins.py @@ -87,7 +87,8 @@ def save(self, *args: Any, set_file_permissions: bool = True, **kwargs: Any) -> if set_file_permissions and self.file_permissions_are_outdated(): results = bulk_set_file_permissions(self.get_privacy_controlled_files(), self.privacy) # Only update 'file_permissions_last_set' if all updates were successfull - if set(results.values()) == {True}: + all_updates_successful = all(results.values()) + if all_updates_successful: self.file_permissions_last_set = timezone.now() kwargs.update(force_insert=False, update_fields=["file_permissions_last_set"]) super().save(*args, **kwargs) From f3af123d24642b833926c80148c13671b994e929 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Fri, 13 Dec 2024 13:10:58 +0000 Subject: [PATCH 49/63] Raise storage exceptions outside of threadpool excecution --- cms/private_media/bulk_operations.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/cms/private_media/bulk_operations.py b/cms/private_media/bulk_operations.py index 599e9c3b..2b10be9b 100644 --- a/cms/private_media/bulk_operations.py +++ b/cms/private_media/bulk_operations.py @@ -31,6 +31,18 @@ def bulk_set_file_permissions(files: Iterable["FieldFile"], privacy: Privacy) -> """ results: dict[FieldFile, bool] = {} + for file in files: + if privacy is Privacy.PUBLIC and not hasattr(file.storage, "make_public"): + raise ImproperlyConfigured( + f"{file.storage.__class__.__name__} does not implement make_public(), " + "which is a requirement for bulk-setting of file permissions." + ) + if privacy is Privacy.PRIVATE and not hasattr(file.storage, "make_private"): + raise ImproperlyConfigured( + f"{file.storage.__class__.__name__} does not implement make_private(), " + "which is a requirement for bulk-setting of file permissions." + ) + def set_file_permission_and_report(file: "FieldFile") -> None: storage = file.storage handler: Callable[[FieldFile], bool] | None @@ -38,15 +50,7 @@ def set_file_permission_and_report(file: "FieldFile") -> None: handler = getattr(storage, "make_private", None) elif privacy is Privacy.PUBLIC: handler = getattr(storage, "make_public", None) - - if handler is None: - raise ImproperlyConfigured( - "%s does not implement the make_private() or make_public() methods, which is a requirement " - "for bulk-setting file permissions.", - storage.__class__.__name__, - ) - else: - results[file] = handler(file) + results[file] = handler(file) with concurrent.futures.ThreadPoolExecutor( max_workers=int(settings.PRIVATE_MEDIA_BULK_UPDATE_MAX_WORKERS) From b5818237c912cb051d21ff811ae73e96e42f0cea Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Fri, 13 Dec 2024 13:12:45 +0000 Subject: [PATCH 50/63] Placate linters --- cms/private_media/bulk_operations.py | 6 +-- .../retry_file_permission_set_attempts.py | 6 ++- cms/private_media/models/images.py | 2 +- cms/private_media/models/mixins.py | 6 +-- cms/private_media/storages.py | 14 +++--- cms/private_media/tests/test_documents.py | 42 ++++++++++-------- cms/private_media/tests/test_images.py | 44 ++++++++++--------- cms/private_media/views.py | 2 +- docs/custom-features/media_privacy.md | 2 +- 9 files changed, 66 insertions(+), 58 deletions(-) diff --git a/cms/private_media/bulk_operations.py b/cms/private_media/bulk_operations.py index 2b10be9b..add4a8cb 100644 --- a/cms/private_media/bulk_operations.py +++ b/cms/private_media/bulk_operations.py @@ -45,12 +45,10 @@ def bulk_set_file_permissions(files: Iterable["FieldFile"], privacy: Privacy) -> def set_file_permission_and_report(file: "FieldFile") -> None: storage = file.storage - handler: Callable[[FieldFile], bool] | None if privacy is Privacy.PRIVATE: - handler = getattr(storage, "make_private", None) + results[file] = storage.make_private(file) elif privacy is Privacy.PUBLIC: - handler = getattr(storage, "make_public", None) - results[file] = handler(file) + results[file] = storage.make_public(file) with concurrent.futures.ThreadPoolExecutor( max_workers=int(settings.PRIVATE_MEDIA_BULK_UPDATE_MAX_WORKERS) diff --git a/cms/private_media/management/commands/retry_file_permission_set_attempts.py b/cms/private_media/management/commands/retry_file_permission_set_attempts.py index e2479775..3734e354 100644 --- a/cms/private_media/management/commands/retry_file_permission_set_attempts.py +++ b/cms/private_media/management/commands/retry_file_permission_set_attempts.py @@ -12,6 +12,8 @@ class Command(BaseCommand): + dry_run: bool + def add_arguments(self, parser: Any) -> None: parser.add_argument( "--dry-run", @@ -22,7 +24,7 @@ def add_arguments(self, parser: Any) -> None: ) def handle(self, *args: Any, **options: Any) -> None: - self.dry_run = options["dry_run"] # pylint: disable=attribute-defined-outside-init + self.dry_run = options["dry_run"] if self.dry_run: self.stdout.write("This is a dry run.") @@ -56,7 +58,7 @@ def update_file_permissions( for item in model_class.objects.bulk_set_file_permissions( # type: ignore[attr-defined] items, privacy, save_changes=True ): - if not item.file_permissions_are_outdated(): + if not item.has_outdated_file_permissions(): updated_count += 1 self.stdout.write(f"File permissions successfully updated for {updated_count} {privacy} {plural}.") diff --git a/cms/private_media/models/images.py b/cms/private_media/models/images.py index c1850c58..2ce011c7 100644 --- a/cms/private_media/models/images.py +++ b/cms/private_media/models/images.py @@ -100,7 +100,7 @@ def url(self) -> str: str: URL for accessing the rendition """ image: PrivateImageMixin = self.image # pylint: disable=no-member - if image.is_public and not image.file_permissions_are_outdated(): + if image.is_public and not image.has_outdated_file_permissions(): file_url: str = self.file.url return file_url return self.serve_url diff --git a/cms/private_media/models/mixins.py b/cms/private_media/models/mixins.py index adf61a0b..6ec00fe4 100644 --- a/cms/private_media/models/mixins.py +++ b/cms/private_media/models/mixins.py @@ -84,16 +84,16 @@ def save(self, *args: Any, set_file_permissions: bool = True, **kwargs: Any) -> super().save(*args, **kwargs) # Trigger file permission updates after-the-fact - if set_file_permissions and self.file_permissions_are_outdated(): + if set_file_permissions and self.has_outdated_file_permissions(): results = bulk_set_file_permissions(self.get_privacy_controlled_files(), self.privacy) # Only update 'file_permissions_last_set' if all updates were successfull - all_updates_successful = all(results.values()) + all_updates_successful = all(results.values()) if all_updates_successful: self.file_permissions_last_set = timezone.now() kwargs.update(force_insert=False, update_fields=["file_permissions_last_set"]) super().save(*args, **kwargs) - def file_permissions_are_outdated(self) -> bool: + def has_outdated_file_permissions(self) -> bool: """Check if the file permissions are outdated relative to privacy changes.""" return self.file_permissions_last_set is None or self.file_permissions_last_set < self.privacy_last_changed diff --git a/cms/private_media/storages.py b/cms/private_media/storages.py index e496fbe4..a6653404 100644 --- a/cms/private_media/storages.py +++ b/cms/private_media/storages.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING from botocore.exceptions import ClientError -from django.core.files.storage import FileSystemStorage, InMemoryStorage +from django.core.files.storage import FileSystemStorage from storages.backends.s3 import S3Storage if TYPE_CHECKING: @@ -43,10 +43,9 @@ def _set_file_acl(self, file: "FieldFile", acl_name: str) -> bool: class AccessControlLoggingMixin: - """ - A mixin for storage backends that do not support setting of individual file permissions. - """ - def make_private(self, file: "FieldFile") -> bool: # pylint: disable=unused-argument + """A mixin for storage backends that do not support setting of individual file permissions.""" + + def make_private(self, file: "FieldFile") -> bool: """Pretend to make the provided file private.""" logger.info( "%s does not support setting of individual file permissions to private, so skipping for: %s.", @@ -55,7 +54,7 @@ def make_private(self, file: "FieldFile") -> bool: # pylint: disable=unused-arg ) return True - def make_public(self, file: "FieldFile") -> bool: # pylint: disable=unused-argument + def make_public(self, file: "FieldFile") -> bool: """Pretend to make the provided file public.""" logger.info( "%s does not support setting of individual file permissions to public, so skipping for: %s.", @@ -67,4 +66,5 @@ def make_public(self, file: "FieldFile") -> bool: # pylint: disable=unused-argu class AccessControlLoggingFileSystemStorage(AccessControlLoggingMixin, FileSystemStorage): """A version of Django's `FileSystemStorage` backend for local development and tests, which logs - file-permission-setting requests, and always reports success.""" + file-permission-setting requests, and always reports success. + """ diff --git a/cms/private_media/tests/test_documents.py b/cms/private_media/tests/test_documents.py index 2864cf9f..45390c27 100644 --- a/cms/private_media/tests/test_documents.py +++ b/cms/private_media/tests/test_documents.py @@ -49,14 +49,14 @@ def test_private_document(self): logs.output, [ ( - "INFO:cms.private_media.storages:AccessControlLoggingFileSystemStorage does not support setting of individual " - f"file permissions to private, so skipping for: {document.file.name}." + "INFO:cms.private_media.storages:AccessControlLoggingFileSystemStorage does not support setting " + f"of individual file permissions to private, so skipping for: {document.file.name}." ) ], ) # File permissions should be considered up-to-date - self.assertFalse(document.file_permissions_are_outdated()) + self.assertFalse(document.has_outdated_file_permissions()) # Setting privacy to the same value should not trigger an update to 'privacy_last_changed value_before = document.privacy_last_changed @@ -68,12 +68,12 @@ def test_private_document(self): self.assertGreater(document.privacy_last_changed, value_before) # File permissions should now be considered outdated - self.assertTrue(document.file_permissions_are_outdated()) + self.assertTrue(document.has_outdated_file_permissions()) # Resaving should trigger an update to file permissions and the 'file_permissions_last_set' # timestamp, resolving the issue document.save() - self.assertFalse(document.file_permissions_are_outdated()) + self.assertFalse(document.has_outdated_file_permissions()) def test_public_document(self): """Test the behaviour of public documents: @@ -96,14 +96,14 @@ def test_public_document(self): logs.output, [ ( - "INFO:cms.private_media.storages:AccessControlLoggingFileSystemStorage does not support setting of individual " - f"file permissions to public, so skipping for: {document.file.name}." + "INFO:cms.private_media.storages:AccessControlLoggingFileSystemStorage does not support setting " + f"of individual file permissions to public, so skipping for: {document.file.name}." ) ], ) # File permissions should be considered up-to-date - self.assertFalse(document.file_permissions_are_outdated()) + self.assertFalse(document.has_outdated_file_permissions()) # Setting privacy to the same value should not trigger an update to 'privacy_last_changed value_before = document.privacy_last_changed @@ -115,12 +115,12 @@ def test_public_document(self): self.assertGreater(document.privacy_last_changed, value_before) # File permissions should now be considered outdated - self.assertTrue(document.file_permissions_are_outdated()) + self.assertTrue(document.has_outdated_file_permissions()) # Resaving should trigger an update to file permissions and the 'file_permissions_last_set' # timestamp, resolving the issue document.save() - self.assertFalse(document.file_permissions_are_outdated()) + self.assertFalse(document.has_outdated_file_permissions()) def test_invalid_privacy_value_raises_value_error(self): """Verify that attempting to create a document with an invalid privacy value raises ValueError.""" @@ -170,7 +170,9 @@ def domain_prefixed_url(name: str) -> str: ], ) - @override_settings(STORAGES={"default": {"BACKEND": "cms.private_media.storages.AccessControlLoggingFileSystemStorage"}}) + @override_settings( + STORAGES={"default": {"BACKEND": "cms.private_media.storages.AccessControlLoggingFileSystemStorage"}} + ) def test_file_permission_setting_success(self): """Test successful file permission setting using AccessControlLoggingFileSystemStorage: - Verify permissions are set correctly for both public and private documents. @@ -180,10 +182,12 @@ def test_file_permission_setting_success(self): private_document = DocumentFactory(collection=self.root_collection) public_document = DocumentFactory(_privacy=Privacy.PUBLIC, collection=self.root_collection) - self.assertFalse(private_document.file_permissions_are_outdated()) - self.assertFalse(public_document.file_permissions_are_outdated()) + self.assertFalse(private_document.has_outdated_file_permissions()) + self.assertFalse(public_document.has_outdated_file_permissions()) - @override_settings(STORAGES={"default": {"BACKEND": "cms.private_media.storages.AccessControlLoggingFileSystemStorage"}}) + @override_settings( + STORAGES={"default": {"BACKEND": "cms.private_media.storages.AccessControlLoggingFileSystemStorage"}} + ) @mock.patch( "cms.private_media.storages.AccessControlLoggingFileSystemStorage.make_private", return_value=False, @@ -203,9 +207,9 @@ def test_file_permission_setting_failure(self, mock_make_public, mock_make_priva public_document = DocumentFactory(_privacy=Privacy.PUBLIC, collection=self.root_collection) mock_make_private.assert_called_once_with(private_document.file) - self.assertTrue(private_document.file_permissions_are_outdated()) + self.assertTrue(private_document.has_outdated_file_permissions()) mock_make_public.assert_called_once_with(public_document.file) - self.assertTrue(public_document.file_permissions_are_outdated()) + self.assertTrue(public_document.has_outdated_file_permissions()) @override_settings( @@ -248,7 +252,7 @@ def test_bulk_make_public(self): # Verify all images are now public for obj in self.model.objects.only("_privacy", "file_permissions_last_set", "privacy_last_changed"): self.assertIs(obj.privacy, Privacy.PUBLIC) - self.assertFalse(obj.file_permissions_are_outdated()) + self.assertFalse(obj.has_outdated_file_permissions()) # Another attempt should result in no updates with self.assertNumQueries(1): @@ -271,7 +275,7 @@ def test_bulk_make_private(self): # Verify all images are now private for image in self.model.objects.only("_privacy", "file_permissions_last_set", "privacy_last_changed"): self.assertIs(image.privacy, Privacy.PRIVATE) - self.assertFalse(image.file_permissions_are_outdated()) + self.assertFalse(image.has_outdated_file_permissions()) # Another attempt should result in no updates with self.assertNumQueries(1): @@ -319,6 +323,6 @@ def test_serve_public_document_with_outdated_file_permissions(self): """Test the serve view behaviour for public documents with outdated file permissions.""" self.model.objects.filter(id=self.public_document.id).update(file_permissions_last_set=None) self.public_document.refresh_from_db() - self.assertTrue(self.public_document.file_permissions_are_outdated()) + self.assertTrue(self.public_document.has_outdated_file_permissions()) response = self.client.get(self.public_document.url) self.assertEqual(response.status_code, 200) diff --git a/cms/private_media/tests/test_images.py b/cms/private_media/tests/test_images.py index 483e3a5d..be99ea7f 100644 --- a/cms/private_media/tests/test_images.py +++ b/cms/private_media/tests/test_images.py @@ -54,14 +54,14 @@ def test_private_image(self): logs.output, [ ( - "INFO:cms.private_media.storages:AccessControlLoggingFileSystemStorage does not support setting of individual " - f"file permissions to private, so skipping for: {image.file.name}." + "INFO:cms.private_media.storages:AccessControlLoggingFileSystemStorage does not support setting " + f"of individual file permissions to private, so skipping for: {image.file.name}." ) ], ) # File permissions should be considered up-to-date - self.assertFalse(image.file_permissions_are_outdated()) + self.assertFalse(image.has_outdated_file_permissions()) # Setting privacy to the same value should not trigger an update to 'privacy_last_changed value_before = image.privacy_last_changed @@ -73,12 +73,12 @@ def test_private_image(self): self.assertGreater(image.privacy_last_changed, value_before) # File permissions should now be considered outdated - self.assertTrue(image.file_permissions_are_outdated()) + self.assertTrue(image.has_outdated_file_permissions()) # Resaving should trigger an update to file permissions and the 'file_permissions_last_set' # timestamp, resolving the issue image.save() - self.assertFalse(image.file_permissions_are_outdated()) + self.assertFalse(image.has_outdated_file_permissions()) def test_private_image_renditions(self): """Test that private image renditions use serve URLs instead of direct file URLs.""" @@ -114,14 +114,14 @@ def test_public_image(self): logs.output, [ ( - "INFO:cms.private_media.storages:AccessControlLoggingFileSystemStorage does not support setting of individual " - f"file permissions to public, so skipping for: {image.file.name}." + "INFO:cms.private_media.storages:AccessControlLoggingFileSystemStorage does not support setting " + f"of individual file permissions to public, so skipping for: {image.file.name}." ) ], ) # File permissions should be considered up-to-date - self.assertFalse(image.file_permissions_are_outdated()) + self.assertFalse(image.has_outdated_file_permissions()) # Setting privacy to the same value should not trigger an update to 'privacy_last_changed value_before = image.privacy_last_changed @@ -133,12 +133,12 @@ def test_public_image(self): self.assertGreater(image.privacy_last_changed, value_before) # File permissions should now be considered outdated - self.assertTrue(image.file_permissions_are_outdated()) + self.assertTrue(image.has_outdated_file_permissions()) # Resaving should trigger an update to file permissions and the 'file_permissions_last_set' # timestamp, resolving the issue image.save() - self.assertFalse(image.file_permissions_are_outdated()) + self.assertFalse(image.has_outdated_file_permissions()) def test_public_image_renditions(self): """Test rendition.url behaviour for public image renditions: @@ -157,7 +157,7 @@ def test_public_image_renditions(self): # However, if the file permissions are outdated, rendition.url should return a serve URL with mock.patch( - "cms.private_media.models.PrivateMediaMixin.file_permissions_are_outdated", + "cms.private_media.models.PrivateMediaMixin.has_outdated_file_permissions", return_value=True, ): for rendition in renditions.values(): @@ -210,17 +210,21 @@ def test_invalid_privacy_value_raises_value_error(self): with self.assertRaises(ValueError): ImageFactory(_privacy="invalid", collection=self.root_collection) - @override_settings(STORAGES={"default": {"BACKEND": "cms.private_media.storages.AccessControlLoggingFileSystemStorage"}}) + @override_settings( + STORAGES={"default": {"BACKEND": "cms.private_media.storages.AccessControlLoggingFileSystemStorage"}} + ) def test_file_permission_setting_success(self): """Test successful file permission setting using a storage backend that supports it.""" with self.assertNoLogs("cms.private_media.bulk_operations"): private_image = ImageFactory(collection=self.root_collection) public_image = ImageFactory(_privacy=Privacy.PUBLIC, collection=self.root_collection) - self.assertFalse(private_image.file_permissions_are_outdated()) - self.assertFalse(public_image.file_permissions_are_outdated()) + self.assertFalse(private_image.has_outdated_file_permissions()) + self.assertFalse(public_image.has_outdated_file_permissions()) - @override_settings(STORAGES={"default": {"BACKEND": "cms.private_media.storages.AccessControlLoggingFileSystemStorage"}}) + @override_settings( + STORAGES={"default": {"BACKEND": "cms.private_media.storages.AccessControlLoggingFileSystemStorage"}} + ) @mock.patch( "cms.private_media.storages.AccessControlLoggingFileSystemStorage.make_private", return_value=False, @@ -240,9 +244,9 @@ def test_file_permission_setting_failure(self, mock_make_public, mock_make_priva public_image = ImageFactory(_privacy=Privacy.PUBLIC, collection=self.root_collection) mock_make_private.assert_called_once_with(private_image.file) - self.assertTrue(private_image.file_permissions_are_outdated()) + self.assertTrue(private_image.has_outdated_file_permissions()) mock_make_public.assert_called_once_with(public_image.file) - self.assertTrue(public_image.file_permissions_are_outdated()) + self.assertTrue(public_image.has_outdated_file_permissions()) @override_settings( @@ -289,7 +293,7 @@ def test_bulk_make_public(self): # Verify all images are now public for obj in self.model.objects.only("_privacy", "file_permissions_last_set", "privacy_last_changed"): self.assertIs(obj.privacy, Privacy.PUBLIC) - self.assertFalse(obj.file_permissions_are_outdated()) + self.assertFalse(obj.has_outdated_file_permissions()) # Another attempt should result in no updates with self.assertNumQueries(1): @@ -314,7 +318,7 @@ def test_bulk_make_private(self): # Verify all images are now private for image in self.model.objects.only("_privacy", "file_permissions_last_set", "privacy_last_changed"): self.assertIs(image.privacy, Privacy.PRIVATE) - self.assertFalse(image.file_permissions_are_outdated()) + self.assertFalse(image.has_outdated_file_permissions()) # Another attempt should result in no updates with self.assertNumQueries(1): @@ -372,7 +376,7 @@ def test_serve_public_image_with_outdated_file_permissions(self): """Test the serve view behaviour for public image renditions with outdated file permissions.""" self.model.objects.filter(id=self.public_image.id).update(file_permissions_last_set=None) self.public_image.refresh_from_db() - self.assertTrue(self.public_image.file_permissions_are_outdated()) + self.assertTrue(self.public_image.has_outdated_file_permissions()) for rendition in self.public_image_renditions.values(): response = self.client.get(rendition.serve_url) self.assertEqual(response.status_code, 200) diff --git a/cms/private_media/views.py b/cms/private_media/views.py index 645aef99..9776212a 100644 --- a/cms/private_media/views.py +++ b/cms/private_media/views.py @@ -17,7 +17,7 @@ def serve(self, rendition: "AbstractPrivateRendition") -> "HttpResponse": # If there's no reason (within our control) for the file not to be served by # media infrastructure, redirect - if image.is_public and not image.file_permissions_are_outdated(): + if image.is_public and not image.has_outdated_file_permissions(): return redirect(rendition.file.url) # Block access to private images if the user has insufficient permissions diff --git a/docs/custom-features/media_privacy.md b/docs/custom-features/media_privacy.md index a83aa00b..6d8cdf27 100644 --- a/docs/custom-features/media_privacy.md +++ b/docs/custom-features/media_privacy.md @@ -44,7 +44,7 @@ Whilst file-permission setting requests are quite reliable in S3, they can fail Because a media item's `file_permissions_last_set` timestamp is only updated when all file-permission setting attempts were successful, for media items with outdated file permissions, the `file_permissions_last_set` timestamp will trail behind the `privacy_last_changed` value. -For an individual object, the `file_permissions_are_outdated()` method will return `True` if the `file_permissions_last_set` timestamp is eariler than `privacy_last_changed`. This is used in a few places to vary the behaviour. Specifically, the `href` value for images will continue to point to the media serve view, so that it can still be served to users. The image serve view also checks it before redirecting users to the direct file URL. +For an individual object, the `has_outdated_file_permissions()` method will return `True` if the `file_permissions_last_set` timestamp is eariler than `privacy_last_changed`. This is used in a few places to vary the behaviour. Specifically, the `href` value for images will continue to point to the media serve view, so that it can still be served to users. The image serve view also checks it before redirecting users to the direct file URL. The timestamp values can also be used to identify affected media items in bulk. This is the approach taken by the `retry_file_permission_set_attempts` management command, which runs regularly (every 5 minutes) in hosted environments to help keep file permissions up-to-date. It uses the same bulk-processing logic as the signal handlers, so will retry failed requests, mark any items that are successfully updated, and log any errors that occured. From 7f0de81f3fe70d3fbaf9115c7f2a212bf337b56e Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Fri, 13 Dec 2024 13:18:47 +0000 Subject: [PATCH 51/63] Return early to reduce indentation in retry_file_permission_set_attempts command --- cms/private_media/bulk_operations.py | 2 +- .../retry_file_permission_set_attempts.py | 22 ++++++++++--------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/cms/private_media/bulk_operations.py b/cms/private_media/bulk_operations.py index add4a8cb..7456139f 100644 --- a/cms/private_media/bulk_operations.py +++ b/cms/private_media/bulk_operations.py @@ -1,6 +1,6 @@ import concurrent.futures import logging -from collections.abc import Callable, Iterable +from collections.abc import Iterable from typing import TYPE_CHECKING from django.conf import settings diff --git a/cms/private_media/management/commands/retry_file_permission_set_attempts.py b/cms/private_media/management/commands/retry_file_permission_set_attempts.py index 3734e354..1267876d 100644 --- a/cms/private_media/management/commands/retry_file_permission_set_attempts.py +++ b/cms/private_media/management/commands/retry_file_permission_set_attempts.py @@ -34,17 +34,19 @@ def handle(self, *args: Any, **options: Any) -> None: queryset = queryset.prefetch_related("renditions") permissions_outdated = list(queryset) self.stdout.write(f"{len(permissions_outdated)} {model.__name__} instances have outdated file permissions.") - if permissions_outdated: - make_private = [] - make_public = [] - for obj in permissions_outdated: - if obj.privacy is Privacy.PRIVATE: - make_private.append(obj) - elif obj.privacy is Privacy.PUBLIC: - make_public.append(obj) + if not permissions_outdated: + continue - self.update_file_permissions(model, make_private, Privacy.PRIVATE) - self.update_file_permissions(model, make_public, Privacy.PUBLIC) + make_private = [] + make_public = [] + for obj in permissions_outdated: + if obj.privacy is Privacy.PRIVATE: + make_private.append(obj) + elif obj.privacy is Privacy.PUBLIC: + make_public.append(obj) + + self.update_file_permissions(model, make_private, Privacy.PRIVATE) + self.update_file_permissions(model, make_public, Privacy.PUBLIC) def update_file_permissions( self, model_class: type["PrivateMediaMixin"], items: list["PrivateMediaMixin"], privacy: Privacy From 51989435c30917f3233920e1d77f2b5b72826519 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Fri, 13 Dec 2024 13:21:39 +0000 Subject: [PATCH 52/63] Use abstract Collection type in update_file_permissions() annotation --- .../management/commands/retry_file_permission_set_attempts.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cms/private_media/management/commands/retry_file_permission_set_attempts.py b/cms/private_media/management/commands/retry_file_permission_set_attempts.py index 1267876d..52bc9338 100644 --- a/cms/private_media/management/commands/retry_file_permission_set_attempts.py +++ b/cms/private_media/management/commands/retry_file_permission_set_attempts.py @@ -1,3 +1,4 @@ +from collections.abc import Collection from typing import TYPE_CHECKING, Any from django.core.management.base import BaseCommand @@ -49,7 +50,7 @@ def handle(self, *args: Any, **options: Any) -> None: self.update_file_permissions(model, make_public, Privacy.PUBLIC) def update_file_permissions( - self, model_class: type["PrivateMediaMixin"], items: list["PrivateMediaMixin"], privacy: Privacy + self, model_class: type["PrivateMediaMixin"], items: Collection["PrivateMediaMixin"], privacy: Privacy ) -> None: """Update the file permissions for the provided items to reflect the provided privacy status.""" plural = model_class._meta.verbose_name_plural From 89a670ff771b9bf7fc82adc47a839eb17761d210 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Fri, 13 Dec 2024 13:31:01 +0000 Subject: [PATCH 53/63] Placate mypy --- cms/private_media/bulk_operations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cms/private_media/bulk_operations.py b/cms/private_media/bulk_operations.py index 7456139f..3a5d1aa8 100644 --- a/cms/private_media/bulk_operations.py +++ b/cms/private_media/bulk_operations.py @@ -46,9 +46,9 @@ def bulk_set_file_permissions(files: Iterable["FieldFile"], privacy: Privacy) -> def set_file_permission_and_report(file: "FieldFile") -> None: storage = file.storage if privacy is Privacy.PRIVATE: - results[file] = storage.make_private(file) + results[file] = storage.make_private(file) # type: ignore[attr-defined] elif privacy is Privacy.PUBLIC: - results[file] = storage.make_public(file) + results[file] = storage.make_public(file) # type: ignore[attr-defined] with concurrent.futures.ThreadPoolExecutor( max_workers=int(settings.PRIVATE_MEDIA_BULK_UPDATE_MAX_WORKERS) From d5f2eccec7896eb52e4baf4ce604b6f6772cbf38 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Fri, 13 Dec 2024 15:24:31 +0000 Subject: [PATCH 54/63] Fix bulk_operations not setting file permissions when a generator is provided --- cms/private_media/bulk_operations.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/cms/private_media/bulk_operations.py b/cms/private_media/bulk_operations.py index 3a5d1aa8..8df7b622 100644 --- a/cms/private_media/bulk_operations.py +++ b/cms/private_media/bulk_operations.py @@ -31,6 +31,9 @@ def bulk_set_file_permissions(files: Iterable["FieldFile"], privacy: Privacy) -> """ results: dict[FieldFile, bool] = {} + # Convert generator values to a tuple to allow for repeat access + files = tuple(files) + for file in files: if privacy is Privacy.PUBLIC and not hasattr(file.storage, "make_public"): raise ImproperlyConfigured( @@ -44,11 +47,10 @@ def bulk_set_file_permissions(files: Iterable["FieldFile"], privacy: Privacy) -> ) def set_file_permission_and_report(file: "FieldFile") -> None: - storage = file.storage - if privacy is Privacy.PRIVATE: - results[file] = storage.make_private(file) # type: ignore[attr-defined] - elif privacy is Privacy.PUBLIC: - results[file] = storage.make_public(file) # type: ignore[attr-defined] + if privacy == Privacy.PRIVATE: + results[file] = file.storage.make_private(file) # type: ignore[attr-defined] + elif privacy == Privacy.PUBLIC: + results[file] = file.storage.make_public(file) # type: ignore[attr-defined] with concurrent.futures.ThreadPoolExecutor( max_workers=int(settings.PRIVATE_MEDIA_BULK_UPDATE_MAX_WORKERS) From faefcac80eadabf132fc877fc96267a441837344 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Fri, 13 Dec 2024 15:37:15 +0000 Subject: [PATCH 55/63] Simplify logging in AccessControlLoggingFileSystemStorage and use alternate in-memory storages to reduce need for mocking in tests --- cms/private_media/storages.py | 48 +++++++++++++++-------- cms/private_media/tests/test_documents.py | 14 +------ cms/private_media/tests/test_images.py | 37 ++++------------- 3 files changed, 41 insertions(+), 58 deletions(-) diff --git a/cms/private_media/storages.py b/cms/private_media/storages.py index a6653404..c152220a 100644 --- a/cms/private_media/storages.py +++ b/cms/private_media/storages.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING from botocore.exceptions import ClientError -from django.core.files.storage import FileSystemStorage +from django.core.files.storage import FileSystemStorage, InMemoryStorage from storages.backends.s3 import S3Storage if TYPE_CHECKING: @@ -42,29 +42,45 @@ def _set_file_acl(self, file: "FieldFile", acl_name: str) -> bool: return True -class AccessControlLoggingMixin: - """A mixin for storage backends that do not support setting of individual file permissions.""" +class AccessControlLoggingFileSystemStorage(FileSystemStorage): + """A version of Django's `FileSystemStorage` backend for local development and tests, which logs + file-permission-setting requests, and always reports success. + """ def make_private(self, file: "FieldFile") -> bool: """Pretend to make the provided file private.""" - logger.info( - "%s does not support setting of individual file permissions to private, so skipping for: %s.", - self.__class__.__name__, - file.name, - ) + logger.info("Skipping private file permission setting for '%s'.", file.name) return True def make_public(self, file: "FieldFile") -> bool: """Pretend to make the provided file public.""" - logger.info( - "%s does not support setting of individual file permissions to public, so skipping for: %s.", - self.__class__.__name__, - file.name, - ) + logger.info("Skipping public file permission setting for '%s'.", file.name) return True -class AccessControlLoggingFileSystemStorage(AccessControlLoggingMixin, FileSystemStorage): - """A version of Django's `FileSystemStorage` backend for local development and tests, which logs - file-permission-setting requests, and always reports success. +class ReliableAccessControlInMemoryStorage(InMemoryStorage): + """A version of Django's `InMemoryStorage` backend for unit tests, that always reports success + for file-permission-setting requests. + """ + + def make_private(self, file: "FieldFile") -> bool: # pylint: disable=unused-argument + """Report success in making the provided file private.""" + return True + + def make_public(self, file: "FieldFile") -> bool: # pylint: disable=unused-argument + """Report success in making the provided file public.""" + return True + + +class FlakyAccessControlInMemoryStorage(InMemoryStorage): + """A version of Django's `InMemoryStorage` backend for unit tests, that always reports failure + for file-permission-setting requests. """ + + def make_private(self, file: "FieldFile") -> bool: # pylint: disable=unused-argument + """Report failure in making the provided file private.""" + return False + + def make_public(self, file: "FieldFile") -> bool: # pylint: disable=unused-argument + """Report failure in making the provided file public.""" + return False diff --git a/cms/private_media/tests/test_documents.py b/cms/private_media/tests/test_documents.py index 45390c27..4e677928 100644 --- a/cms/private_media/tests/test_documents.py +++ b/cms/private_media/tests/test_documents.py @@ -47,12 +47,7 @@ def test_private_document(self): # since the default file backend doesn't support it self.assertEqual( logs.output, - [ - ( - "INFO:cms.private_media.storages:AccessControlLoggingFileSystemStorage does not support setting " - f"of individual file permissions to private, so skipping for: {document.file.name}." - ) - ], + [(f"INFO:cms.private_media.storages:Skipping private file permission setting for '{document.file.name}'.")], ) # File permissions should be considered up-to-date @@ -94,12 +89,7 @@ def test_public_document(self): # since the default file backend doesn't support it self.assertEqual( logs.output, - [ - ( - "INFO:cms.private_media.storages:AccessControlLoggingFileSystemStorage does not support setting " - f"of individual file permissions to public, so skipping for: {document.file.name}." - ) - ], + [(f"INFO:cms.private_media.storages:Skipping public file permission setting for '{document.file.name}'.")], ) # File permissions should be considered up-to-date diff --git a/cms/private_media/tests/test_images.py b/cms/private_media/tests/test_images.py index be99ea7f..d6b90d41 100644 --- a/cms/private_media/tests/test_images.py +++ b/cms/private_media/tests/test_images.py @@ -37,7 +37,6 @@ def test_private_image(self): - Verify that images are created as PRIVATE by default. - Verify that file permission states are updated on save to reflect image privacy. - Verify that privacy change tracking works correctly. - - Graceful handling of unsupported storage backends. """ # Attempts to set file permissions on save should have failed gracefully, # since the default file backend doesn't support it @@ -52,12 +51,7 @@ def test_private_image(self): # Attempts to set file permissions on save should have failed gracefully self.assertEqual( logs.output, - [ - ( - "INFO:cms.private_media.storages:AccessControlLoggingFileSystemStorage does not support setting " - f"of individual file permissions to private, so skipping for: {image.file.name}." - ) - ], + [(f"INFO:cms.private_media.storages:Skipping private file permission setting for '{image.file.name}'.")], ) # File permissions should be considered up-to-date @@ -112,12 +106,7 @@ def test_public_image(self): # Attempts to set file permissions on save should have failed gracefully self.assertEqual( logs.output, - [ - ( - "INFO:cms.private_media.storages:AccessControlLoggingFileSystemStorage does not support setting " - f"of individual file permissions to public, so skipping for: {image.file.name}." - ) - ], + [(f"INFO:cms.private_media.storages:Skipping public file permission setting for '{image.file.name}'.")], ) # File permissions should be considered up-to-date @@ -211,7 +200,7 @@ def test_invalid_privacy_value_raises_value_error(self): ImageFactory(_privacy="invalid", collection=self.root_collection) @override_settings( - STORAGES={"default": {"BACKEND": "cms.private_media.storages.AccessControlLoggingFileSystemStorage"}} + STORAGES={"default": {"BACKEND": "cms.private_media.storages.ReliableAccessControlInMemoryStorage"}} ) def test_file_permission_setting_success(self): """Test successful file permission setting using a storage backend that supports it.""" @@ -223,29 +212,17 @@ def test_file_permission_setting_success(self): self.assertFalse(public_image.has_outdated_file_permissions()) @override_settings( - STORAGES={"default": {"BACKEND": "cms.private_media.storages.AccessControlLoggingFileSystemStorage"}} - ) - @mock.patch( - "cms.private_media.storages.AccessControlLoggingFileSystemStorage.make_private", - return_value=False, + STORAGES={"default": {"BACKEND": "cms.private_media.storages.FlakyAccessControlInMemoryStorage"}} ) - @mock.patch( - "cms.private_media.storages.AccessControlLoggingFileSystemStorage.make_public", - return_value=False, - ) - def test_file_permission_setting_failure(self, mock_make_public, mock_make_private): + def test_file_permission_setting_failure(self): """Test graceful handling of file permission setting failures. Verifies that the system correctly tracks failed permission updates and maintains the outdated state when storage operations fail. """ - with self.assertNoLogs("cms.private_media.bulk_operations"): - private_image = ImageFactory(collection=self.root_collection) - public_image = ImageFactory(_privacy=Privacy.PUBLIC, collection=self.root_collection) - - mock_make_private.assert_called_once_with(private_image.file) + private_image = ImageFactory(collection=self.root_collection) + public_image = ImageFactory(_privacy=Privacy.PUBLIC, collection=self.root_collection) self.assertTrue(private_image.has_outdated_file_permissions()) - mock_make_public.assert_called_once_with(public_image.file) self.assertTrue(public_image.has_outdated_file_permissions()) From 0bf2898f4d648354fcdc4cba21cf1c431d8cb783 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Fri, 13 Dec 2024 15:40:20 +0000 Subject: [PATCH 56/63] Update docs to reflect logging format changes --- docs/custom-features/media_privacy.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/custom-features/media_privacy.md b/docs/custom-features/media_privacy.md index 6d8cdf27..438d468a 100644 --- a/docs/custom-features/media_privacy.md +++ b/docs/custom-features/media_privacy.md @@ -24,7 +24,7 @@ The responsibility of setting file-level permissions in hosted environments fall In local dev environments, the permission-setting attempts themselves are skipped, but log entries are generated with the level 'INFO' in place of each attempt, so you can get a sense of what would happen in a hosted environment, for example: ``` -INFO:cms.private_media.storages:Skipping file permission setting for: /media/images/2024/12/09/image.jpg +INFO:cms.private_media.storages:Skipping file permission setting for '/media/images/2024/12/09/image.jpg'. ``` Whenever the privacy of a media item is altered, it's `privacy_last_changed` timestamp field is updated and saved to the database alongside other changes. From c196deed4ab6ae23fe1e6f2612db06a5f5dc4c9a Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Fri, 13 Dec 2024 15:47:42 +0000 Subject: [PATCH 57/63] Revert: Don't cache ServeImage view responses, and update PrivateImageMixin.get_privacy_controlled_serve_urls() to return an empty iterable --- cms/private_media/models/images.py | 9 ++++--- cms/private_media/tests/test_images.py | 37 +++++++++++++++++++------- cms/urls.py | 2 +- 3 files changed, 33 insertions(+), 15 deletions(-) diff --git a/cms/private_media/models/images.py b/cms/private_media/models/images.py index 2ce011c7..5d786b75 100644 --- a/cms/private_media/models/images.py +++ b/cms/private_media/models/images.py @@ -50,11 +50,12 @@ def create_renditions(self, *filters: "Filter") -> dict["Filter", AbstractRendit def get_privacy_controlled_serve_urls(self, sites: Iterable["Site"]) -> Iterator[str]: """Return an iterator of fully-fledged serve URLs for this image, covering the domains for all provided sites. - - NOTE: Since image serve URLS in this project are exempt from caching, and this result is only - used for cache purging, it doesn't need to return anything. """ - return iter([]) + if not sites: + return + for rendition in self.renditions.all(): # type: ignore[attr-defined] + for site in sites: + yield site.root_url + rendition.serve_url class AbstractPrivateRendition(AbstractRendition): diff --git a/cms/private_media/tests/test_images.py b/cms/private_media/tests/test_images.py index d6b90d41..dbf11533 100644 --- a/cms/private_media/tests/test_images.py +++ b/cms/private_media/tests/test_images.py @@ -162,13 +162,24 @@ def test_get_privacy_controlled_serve_urls(self): image = ImageFactory(collection=self.root_collection) self.assertFalse(list(image.get_privacy_controlled_serve_urls(sites))) - # Even when an image has renditions, the result should be empty, because - # image serve URLs are exempt from caching, so don't need to be purged - image.create_renditions(Filter("fill-10x10"), Filter("fill-20x20")) + # If an image has renditions, the result should be a list of serve URLs, prefixed + # with the root url for each supplied site + renditions = image.create_renditions(Filter("fill-10x10"), Filter("fill-20x20")) expected_result = [] - with self.assertNumQueries(0): + for rendition in renditions.values(): + expected_result.extend( + [ + f"http://localhost{rendition.url}", + f"https://foo.com{rendition.url}", + ] + ) + with self.assertNumQueries(1): self.assertEqual(list(image.get_privacy_controlled_serve_urls(sites)), expected_result) + # If no sites are provided, the result should be empty + with self.assertNumQueries(0): + self.assertFalse(list(image.get_privacy_controlled_serve_urls([]))) + def test_get_privacy_controlled_file_urls(self): """Test the behaviour of PrivateImageMixin.get_privacy_controlled_file_urls.""" image = ImageFactory(collection=self.root_collection) @@ -257,15 +268,18 @@ def test_bulk_make_public(self): """Test the behaviour of PrivateImageManager.bulk_make_public().""" # Three image are already public, so only three should be updated qs = self.model.objects.all().prefetch_related("renditions") - with self.assertNumQueries(3): + with self.assertNumQueries(4): # Query summary: # 1. to fetch the images # 2. to prefetch renditions # 3. to save updates + # 4. to fetch sites to facilitate cache purging self.assertEqual(self.model.objects.bulk_make_public(qs), 3) - # No urls should have been purged as part of the update - self.assertEqual(PURGED_URLS, []) + # Serve URLs for private image renditions should have been purged as part of the update + for obj in self.private_images: + for rendition in obj.renditions.all(): + self.assertIn("http://localhost" + rendition.url, PURGED_URLS) # Verify all images are now public for obj in self.model.objects.only("_privacy", "file_permissions_last_set", "privacy_last_changed"): @@ -282,15 +296,18 @@ def test_bulk_make_private(self): """Test the behaviour of PrivateImageManager.bulk_make_private().""" # Three images are already private, so only three should be updated qs = self.model.objects.all().prefetch_related("renditions") - with self.assertNumQueries(3): + with self.assertNumQueries(4): # Query summary: # 1. to fetch the images # 2. to prefetch renditions # 3. to save updates + # 4. to fetch sites to facilitate cache purging self.assertEqual(self.model.objects.bulk_make_private(qs), 3) - # No urls should have been purged as part of the update - self.assertEqual(PURGED_URLS, []) + # Serve URLs for public image renditions should have been purged as part of the update + for obj in self.public_images: + for rendition in obj.renditions.all(): + self.assertIn("http://localhost" + rendition.serve_url, PURGED_URLS) # Verify all images are now private for image in self.model.objects.only("_privacy", "file_permissions_last_set", "privacy_last_changed"): diff --git a/cms/urls.py b/cms/urls.py index ccb092dd..b8f3c1af 100644 --- a/cms/urls.py +++ b/cms/urls.py @@ -25,7 +25,6 @@ private_urlpatterns = [ path("documents/", include(wagtaildocs_urls)), path("-/", include((internal_urlpatterns, "internal"))), - re_path(r"^images/([^/]*)/(\d*)/([^/]*)/[^/]*$", PrivateImageServeView.as_view(), name="wagtailimages_serve"), ] # `wagtail.admin` must always be installed, @@ -103,6 +102,7 @@ + debug_urlpatterns + urlpatterns + [ + re_path(r"^images/([^/]*)/(\d*)/([^/]*)/[^/]*$", PrivateImageServeView.as_view(), name="wagtailimages_serve"), # Add Wagtail URLs at the end. # Wagtail cache-control is set on the page models' serve methods path("", include(wagtail_urls)), From e164276714854c1391e274e888145a98bf818c9d Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Fri, 13 Dec 2024 15:49:14 +0000 Subject: [PATCH 58/63] Simplify PrivateImageMixin.get_privacy_controlled_serve_urls() --- cms/private_media/models/images.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cms/private_media/models/images.py b/cms/private_media/models/images.py index 5d786b75..5613a374 100644 --- a/cms/private_media/models/images.py +++ b/cms/private_media/models/images.py @@ -51,10 +51,9 @@ def get_privacy_controlled_serve_urls(self, sites: Iterable["Site"]) -> Iterator """Return an iterator of fully-fledged serve URLs for this image, covering the domains for all provided sites. """ - if not sites: - return - for rendition in self.renditions.all(): # type: ignore[attr-defined] - for site in sites: + renditions = self.renditions.all() # type: ignore[attr-defined] + for site in sites: + for rendition in renditions: yield site.root_url + rendition.serve_url From 62847414506e22847d1fa85f9ffa66a52df9c93c Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Fri, 13 Dec 2024 17:03:24 +0000 Subject: [PATCH 59/63] Fix rendition purge urls test --- cms/private_media/tests/test_images.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/cms/private_media/tests/test_images.py b/cms/private_media/tests/test_images.py index dbf11533..9cb1b8ff 100644 --- a/cms/private_media/tests/test_images.py +++ b/cms/private_media/tests/test_images.py @@ -166,13 +166,9 @@ def test_get_privacy_controlled_serve_urls(self): # with the root url for each supplied site renditions = image.create_renditions(Filter("fill-10x10"), Filter("fill-20x20")) expected_result = [] - for rendition in renditions.values(): - expected_result.extend( - [ - f"http://localhost{rendition.url}", - f"https://foo.com{rendition.url}", - ] - ) + for base_url in [site.root_url for site in sites]: + for rendition in renditions.values(): + expected_result.append(f"{base_url}{rendition.url}") with self.assertNumQueries(1): self.assertEqual(list(image.get_privacy_controlled_serve_urls(sites)), expected_result) From da9be199134b321e4d90b8fe23e4251c36f84615 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Thu, 16 Jan 2025 12:23:17 +0000 Subject: [PATCH 60/63] Extend error handling in storage backend to cover Boto3Error exceptions as well as instances of botocore.ClientError --- cms/private_media/storages.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/cms/private_media/storages.py b/cms/private_media/storages.py index c152220a..56c922b6 100644 --- a/cms/private_media/storages.py +++ b/cms/private_media/storages.py @@ -1,6 +1,7 @@ import logging from typing import TYPE_CHECKING +from boto3.exceptions import Boto3Error from botocore.exceptions import ClientError from django.core.files.storage import FileSystemStorage, InMemoryStorage from storages.backends.s3 import S3Storage @@ -26,15 +27,19 @@ def make_public(self, file: "FieldFile") -> bool: return self._set_file_acl(file, self.public_acl_name) def _set_file_acl(self, file: "FieldFile", acl_name: str) -> bool: - obj = self.bucket.Object(file.name) + try: + obj = self.bucket.Object(file.name) + except (ClientError, Boto3Error): + logger.exception("Failed to retrieve object for %s", file.name) + return False try: obj_acl = obj.Acl() - except ClientError: + except (ClientError, Boto3Error): logger.exception("Failed to retrieve ACL for %s", file.name) return False try: obj_acl.put(ACL=acl_name) - except ClientError: + except (ClientError, Boto3Error): logger.exception("Failed to set ACL for %s", file.name) return False From ce5bb63054f7518d5b63d5480c45d18aee6725eb Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Thu, 16 Jan 2025 13:04:49 +0000 Subject: [PATCH 61/63] Placate django-migration-linter (Add null=False to privacy_last_changed fields) --- .../migrations/0003_customdocument__privacy_and_more.py | 2 +- cms/images/migrations/0003_customimage__privacy_and_more.py | 2 +- cms/private_media/models/mixins.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cms/documents/migrations/0003_customdocument__privacy_and_more.py b/cms/documents/migrations/0003_customdocument__privacy_and_more.py index 2f6df745..b6bfb2e8 100644 --- a/cms/documents/migrations/0003_customdocument__privacy_and_more.py +++ b/cms/documents/migrations/0003_customdocument__privacy_and_more.py @@ -23,6 +23,6 @@ class Migration(migrations.Migration): migrations.AddField( model_name="customdocument", name="privacy_last_changed", - field=models.DateTimeField(default=django.utils.timezone.now, editable=False), + field=models.DateTimeField(default=django.utils.timezone.now, editable=False, null=False), ), ] diff --git a/cms/images/migrations/0003_customimage__privacy_and_more.py b/cms/images/migrations/0003_customimage__privacy_and_more.py index a638727e..8d1e3f9c 100644 --- a/cms/images/migrations/0003_customimage__privacy_and_more.py +++ b/cms/images/migrations/0003_customimage__privacy_and_more.py @@ -23,6 +23,6 @@ class Migration(migrations.Migration): migrations.AddField( model_name="customimage", name="privacy_last_changed", - field=models.DateTimeField(default=django.utils.timezone.now, editable=False), + field=models.DateTimeField(default=django.utils.timezone.now, editable=False, null=False), ), ] diff --git a/cms/private_media/models/mixins.py b/cms/private_media/models/mixins.py index 6ec00fe4..5cf76cd9 100644 --- a/cms/private_media/models/mixins.py +++ b/cms/private_media/models/mixins.py @@ -36,7 +36,7 @@ class PrivateMediaMixin(models.Model): _privacy = models.CharField(max_length=10, choices=Privacy.choices, default=Privacy.PRIVATE) file_permissions_last_set = models.DateTimeField(editable=False, null=True) - privacy_last_changed = models.DateTimeField(editable=False, default=timezone.now) + privacy_last_changed = models.DateTimeField(editable=False, default=timezone.now, null=False) class Meta: abstract = True From c258a7c9a9b2ed86581ac29d1346ecdadeaa6477 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Thu, 16 Jan 2025 13:17:09 +0000 Subject: [PATCH 62/63] Add problematic migrations to ignore list --- cms/settings/dev.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cms/settings/dev.py b/cms/settings/dev.py index 24fa6eb9..79816b0e 100644 --- a/cms/settings/dev.py +++ b/cms/settings/dev.py @@ -75,5 +75,7 @@ "0004_contactdetails_core_contactdetails_name_unique", "0003_delete_tracking", "0002_customimage_description", + "0003_customimage__privacy_and_more", + "0003_customdocument__privacy_and_more", ], } From 84b6dc5b0b9855abd3c2b7c99bb16a24f6f5e445 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Thu, 16 Jan 2025 13:43:35 +0000 Subject: [PATCH 63/63] Update log messages in AccessControlLoggingFileSystemStorage to better communicate what would be happening in production --- cms/private_media/storages.py | 10 ++++++++-- cms/private_media/tests/test_documents.py | 14 ++++++++++++-- cms/private_media/tests/test_images.py | 14 ++++++++++++-- docs/custom-features/media_privacy.md | 2 +- 4 files changed, 33 insertions(+), 7 deletions(-) diff --git a/cms/private_media/storages.py b/cms/private_media/storages.py index 56c922b6..537451e3 100644 --- a/cms/private_media/storages.py +++ b/cms/private_media/storages.py @@ -54,12 +54,18 @@ class AccessControlLoggingFileSystemStorage(FileSystemStorage): def make_private(self, file: "FieldFile") -> bool: """Pretend to make the provided file private.""" - logger.info("Skipping private file permission setting for '%s'.", file.name) + logger.info( + "Simulating private permission setting for file '%s'. No actual changes applied.", + file.name, + ) return True def make_public(self, file: "FieldFile") -> bool: """Pretend to make the provided file public.""" - logger.info("Skipping public file permission setting for '%s'.", file.name) + logger.info( + "Simulating public permission setting for file '%s'. No actual changes applied.", + file.name, + ) return True diff --git a/cms/private_media/tests/test_documents.py b/cms/private_media/tests/test_documents.py index 4e677928..3679603d 100644 --- a/cms/private_media/tests/test_documents.py +++ b/cms/private_media/tests/test_documents.py @@ -47,7 +47,12 @@ def test_private_document(self): # since the default file backend doesn't support it self.assertEqual( logs.output, - [(f"INFO:cms.private_media.storages:Skipping private file permission setting for '{document.file.name}'.")], + [ + ( + "INFO:cms.private_media.storages:Simulating private permission setting for file " + f"'{document.file.name}'. No actual changes applied." + ) + ], ) # File permissions should be considered up-to-date @@ -89,7 +94,12 @@ def test_public_document(self): # since the default file backend doesn't support it self.assertEqual( logs.output, - [(f"INFO:cms.private_media.storages:Skipping public file permission setting for '{document.file.name}'.")], + [ + ( + "INFO:cms.private_media.storages:Simulating public permission setting for file " + f"'{document.file.name}'. No actual changes applied." + ) + ], ) # File permissions should be considered up-to-date diff --git a/cms/private_media/tests/test_images.py b/cms/private_media/tests/test_images.py index 9cb1b8ff..82fabd71 100644 --- a/cms/private_media/tests/test_images.py +++ b/cms/private_media/tests/test_images.py @@ -51,7 +51,12 @@ def test_private_image(self): # Attempts to set file permissions on save should have failed gracefully self.assertEqual( logs.output, - [(f"INFO:cms.private_media.storages:Skipping private file permission setting for '{image.file.name}'.")], + [ + ( + "INFO:cms.private_media.storages:Simulating private permission setting for file " + f"'{image.file.name}'. No actual changes applied." + ) + ], ) # File permissions should be considered up-to-date @@ -106,7 +111,12 @@ def test_public_image(self): # Attempts to set file permissions on save should have failed gracefully self.assertEqual( logs.output, - [(f"INFO:cms.private_media.storages:Skipping public file permission setting for '{image.file.name}'.")], + [ + ( + "INFO:cms.private_media.storages:Simulating public permission setting for file " + f"'{image.file.name}'. No actual changes applied." + ) + ], ) # File permissions should be considered up-to-date diff --git a/docs/custom-features/media_privacy.md b/docs/custom-features/media_privacy.md index 438d468a..4cf0cece 100644 --- a/docs/custom-features/media_privacy.md +++ b/docs/custom-features/media_privacy.md @@ -24,7 +24,7 @@ The responsibility of setting file-level permissions in hosted environments fall In local dev environments, the permission-setting attempts themselves are skipped, but log entries are generated with the level 'INFO' in place of each attempt, so you can get a sense of what would happen in a hosted environment, for example: ``` -INFO:cms.private_media.storages:Skipping file permission setting for '/media/images/2024/12/09/image.jpg'. +INFO:cms.private_media.storages:Simulating private permission setting for file '/media/images/2024/12/09/image.jpg'. No actual changes applied. ``` Whenever the privacy of a media item is altered, it's `privacy_last_changed` timestamp field is updated and saved to the database alongside other changes.