diff --git a/cms/documents/migrations/0003_customdocument__privacy_and_more.py b/cms/documents/migrations/0003_customdocument__privacy_and_more.py new file mode 100644 index 00000000..2f6df745 --- /dev/null +++ b/cms/documents/migrations/0003_customdocument__privacy_and_more.py @@ -0,0 +1,28 @@ +# Generated by Django 5.1.3 on 2024-11-29 16:19 + +import django.utils.timezone +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("documents", "0002_alter_customdocument_file_size"), + ] + + operations = [ + migrations.AddField( + model_name="customdocument", + name="_privacy", + field=models.CharField(default="public", max_length=10), + ), + migrations.AddField( + model_name="customdocument", + name="file_permissions_last_set", + field=models.DateTimeField(editable=False, null=True), + ), + migrations.AddField( + model_name="customdocument", + name="privacy_last_changed", + field=models.DateTimeField(default=django.utils.timezone.now, editable=False), + ), + ] diff --git a/cms/documents/migrations/0004_alter_customdocument__privacy.py b/cms/documents/migrations/0004_alter_customdocument__privacy.py new file mode 100644 index 00000000..2e5c1cb2 --- /dev/null +++ b/cms/documents/migrations/0004_alter_customdocument__privacy.py @@ -0,0 +1,17 @@ +# Generated by Django 5.1.3 on 2024-11-29 16:20 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("documents", "0003_customdocument__privacy_and_more"), + ] + + operations = [ + migrations.AlterField( + model_name="customdocument", + name="_privacy", + field=models.CharField(default="private", max_length=10), + ), + ] diff --git a/cms/documents/models.py b/cms/documents/models.py index 870a6d10..d966590f 100644 --- a/cms/documents/models.py +++ b/cms/documents/models.py @@ -1,12 +1,16 @@ -from wagtail.documents.models import AbstractDocument -from wagtail.documents.models import Document as WagtailDocument +from collections.abc import Sequence +from typing import ClassVar +from wagtail.documents.models import AbstractDocument, Document -class CustomDocument(AbstractDocument): +from cms.private_media.models import PrivateDocumentMixin + + +class CustomDocument(PrivateDocumentMixin, AbstractDocument): """Custom Wagtail document class. Using a custom class from the beginning allows us to add any customisations we may need. """ - admin_form_fields = WagtailDocument.admin_form_fields + admin_form_fields: ClassVar[Sequence[str]] = Document.admin_form_fields diff --git a/cms/images/migrations/0003_customimage__privacy_and_more.py b/cms/images/migrations/0003_customimage__privacy_and_more.py new file mode 100644 index 00000000..a638727e --- /dev/null +++ b/cms/images/migrations/0003_customimage__privacy_and_more.py @@ -0,0 +1,28 @@ +# Generated by Django 5.1.3 on 2024-11-29 16:19 + +import django.utils.timezone +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("images", "0002_customimage_description"), + ] + + operations = [ + migrations.AddField( + model_name="customimage", + name="_privacy", + field=models.CharField(default="public", max_length=10), + ), + migrations.AddField( + model_name="customimage", + name="file_permissions_last_set", + field=models.DateTimeField(editable=False, null=True), + ), + migrations.AddField( + model_name="customimage", + name="privacy_last_changed", + field=models.DateTimeField(default=django.utils.timezone.now, editable=False), + ), + ] diff --git a/cms/images/migrations/0004_alter_customimage__privacy.py b/cms/images/migrations/0004_alter_customimage__privacy.py new file mode 100644 index 00000000..9f9503c9 --- /dev/null +++ b/cms/images/migrations/0004_alter_customimage__privacy.py @@ -0,0 +1,17 @@ +# Generated by Django 5.1.3 on 2024-11-29 16:20 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("images", "0003_customimage__privacy_and_more"), + ] + + operations = [ + migrations.AlterField( + model_name="customimage", + name="_privacy", + field=models.CharField(default="private", max_length=10), + ), + ] diff --git a/cms/images/models.py b/cms/images/models.py index 0cd5ecf1..79039406 100644 --- a/cms/images/models.py +++ b/cms/images/models.py @@ -1,21 +1,28 @@ +from collections.abc import Sequence +from typing import ClassVar + from django.db import models -from wagtail.images.models import AbstractImage, AbstractRendition, Image +from wagtail.images.models import AbstractImage, Image + +from cms.private_media.models import AbstractPrivateRendition, PrivateImageMixin -class CustomImage(AbstractImage): +class CustomImage(PrivateImageMixin, AbstractImage): # type: ignore[django-manager-missing] """Custom Wagtail image class. Using a custom class from the beginning allows us to add any customisations we may need. """ - admin_form_fields = Image.admin_form_fields + admin_form_fields: ClassVar[Sequence[str]] = Image.admin_form_fields -class Rendition(AbstractRendition): +class Rendition(AbstractPrivateRendition): """Our custom rendition class.""" - image = models.ForeignKey["CustomImage"]("CustomImage", related_name="renditions", on_delete=models.CASCADE) + image = models.ForeignKey( # type: ignore[var-annotated] + "CustomImage", related_name="renditions", on_delete=models.CASCADE + ) class Meta: unique_together = (("image", "filter_spec", "focal_point_key"),) diff --git a/cms/private_media/__init__.py b/cms/private_media/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cms/private_media/apps.py b/cms/private_media/apps.py new file mode 100644 index 00000000..2dbbdc4a --- /dev/null +++ b/cms/private_media/apps.py @@ -0,0 +1,11 @@ +from django.apps import AppConfig + + +class PrivateMediaConfig(AppConfig): + default_auto_field = "django.db.models.AutoField" + name = "cms.private_media" + + def ready(self) -> None: + from .signal_handlers import register_signal_handlers # pylint: disable=import-outside-toplevel + + register_signal_handlers() diff --git a/cms/private_media/bulk_operations.py b/cms/private_media/bulk_operations.py new file mode 100644 index 00000000..8df7b622 --- /dev/null +++ b/cms/private_media/bulk_operations.py @@ -0,0 +1,60 @@ +import concurrent.futures +import logging +from collections.abc import Iterable +from typing import TYPE_CHECKING + +from django.conf import settings +from django.core.exceptions import ImproperlyConfigured + +from cms.private_media.constants import Privacy + +if TYPE_CHECKING: + from django.db.models.fields.files import FieldFile + + +logger = logging.getLogger(__name__) + + +def bulk_set_file_permissions(files: Iterable["FieldFile"], privacy: Privacy) -> dict["FieldFile", bool]: + """Set file permissions for an iterable of FieldFile objects, using the + make_private() or make_public() methods of the storage backend. + + Uses a thread pool to set the file permissions in parallel where possible. + + Args: + files: An iterable of FieldFile objects to set permissions for + privacy: The intended privacy status for the supplied files + + Returns: + dict[FieldFile, bool]: A mapping of files to a boolean indicating whether + the permission setting request was successful + """ + results: dict[FieldFile, bool] = {} + + # Convert generator values to a tuple to allow for repeat access + files = tuple(files) + + for file in files: + if privacy is Privacy.PUBLIC and not hasattr(file.storage, "make_public"): + raise ImproperlyConfigured( + f"{file.storage.__class__.__name__} does not implement make_public(), " + "which is a requirement for bulk-setting of file permissions." + ) + if privacy is Privacy.PRIVATE and not hasattr(file.storage, "make_private"): + raise ImproperlyConfigured( + f"{file.storage.__class__.__name__} does not implement make_private(), " + "which is a requirement for bulk-setting of file permissions." + ) + + def set_file_permission_and_report(file: "FieldFile") -> None: + if privacy == Privacy.PRIVATE: + results[file] = file.storage.make_private(file) # type: ignore[attr-defined] + elif privacy == Privacy.PUBLIC: + results[file] = file.storage.make_public(file) # type: ignore[attr-defined] + + with concurrent.futures.ThreadPoolExecutor( + max_workers=int(settings.PRIVATE_MEDIA_BULK_UPDATE_MAX_WORKERS) + ) as executor: + executor.map(set_file_permission_and_report, files) + + return results diff --git a/cms/private_media/constants.py b/cms/private_media/constants.py new file mode 100644 index 00000000..a108ab74 --- /dev/null +++ b/cms/private_media/constants.py @@ -0,0 +1,7 @@ +from django.db.models import TextChoices +from django.utils.translation import gettext_lazy as _ + + +class Privacy(TextChoices): + PRIVATE = "private", _("Private") + PUBLIC = "public", _("Public") diff --git a/cms/private_media/management/__init__.py b/cms/private_media/management/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cms/private_media/management/commands/__init__.py b/cms/private_media/management/commands/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cms/private_media/management/commands/retry_file_permission_set_attempts.py b/cms/private_media/management/commands/retry_file_permission_set_attempts.py new file mode 100644 index 00000000..52bc9338 --- /dev/null +++ b/cms/private_media/management/commands/retry_file_permission_set_attempts.py @@ -0,0 +1,67 @@ +from collections.abc import Collection +from typing import TYPE_CHECKING, Any + +from django.core.management.base import BaseCommand +from django.db.models import F + +from cms.private_media.constants import Privacy +from cms.private_media.models.images import PrivateImageMixin +from cms.private_media.utils import get_private_media_models + +if TYPE_CHECKING: + from cms.private_media.models import PrivateMediaMixin + + +class Command(BaseCommand): + dry_run: bool + + def add_arguments(self, parser: Any) -> None: + parser.add_argument( + "--dry-run", + action="store_true", + dest="dry_run", + default=False, + help="Dry run -- don't change anything.", + ) + + def handle(self, *args: Any, **options: Any) -> None: + self.dry_run = options["dry_run"] + if self.dry_run: + self.stdout.write("This is a dry run.") + + for model in get_private_media_models(): + queryset = model.objects.filter(file_permissions_last_set__lt=F("privacy_last_changed")) + if not self.dry_run and issubclass(model, PrivateImageMixin): + queryset = queryset.prefetch_related("renditions") + permissions_outdated = list(queryset) + self.stdout.write(f"{len(permissions_outdated)} {model.__name__} instances have outdated file permissions.") + if not permissions_outdated: + continue + + make_private = [] + make_public = [] + for obj in permissions_outdated: + if obj.privacy is Privacy.PRIVATE: + make_private.append(obj) + elif obj.privacy is Privacy.PUBLIC: + make_public.append(obj) + + self.update_file_permissions(model, make_private, Privacy.PRIVATE) + self.update_file_permissions(model, make_public, Privacy.PUBLIC) + + def update_file_permissions( + self, model_class: type["PrivateMediaMixin"], items: Collection["PrivateMediaMixin"], privacy: Privacy + ) -> None: + """Update the file permissions for the provided items to reflect the provided privacy status.""" + plural = model_class._meta.verbose_name_plural + if self.dry_run: + self.stdout.write(f"Would update file permissions for {len(items)} {privacy} {plural}.") + else: + updated_count = 0 + for item in model_class.objects.bulk_set_file_permissions( # type: ignore[attr-defined] + items, privacy, save_changes=True + ): + if not item.has_outdated_file_permissions(): + updated_count += 1 + + self.stdout.write(f"File permissions successfully updated for {updated_count} {privacy} {plural}.") diff --git a/cms/private_media/managers.py b/cms/private_media/managers.py new file mode 100644 index 00000000..cf000ef1 --- /dev/null +++ b/cms/private_media/managers.py @@ -0,0 +1,124 @@ +from collections import defaultdict +from collections.abc import Iterable +from typing import TYPE_CHECKING + +from django.db import models +from django.utils import timezone +from wagtail.contrib.frontend_cache.utils import PurgeBatch +from wagtail.documents.models import DocumentQuerySet +from wagtail.images.models import ImageQuerySet +from wagtail.models import Site + +from .bulk_operations import bulk_set_file_permissions +from .constants import Privacy +from .utils import get_frontend_cache_configuration + +if TYPE_CHECKING: + from django.db.models.fields.files import FieldFile + + from cms.private_media.models import PrivateMediaMixin + + +class PrivateMediaModelManager(models.Manager): + """A custom model `Manager` to be used by concrete subclasses of + `PrivateMediaCollectionMember`. It includes several methods for + applying privacy-related changes to multiple objects at the same + time. + """ + + def bulk_set_privacy(self, objects: Iterable["PrivateMediaMixin"], privacy: Privacy) -> int: + """Update an iterable of objects of this type to reflect the 'privacy' + as efficiently as possible. Returns the number of objects that were actually + updated in the process. + """ + to_update = [] + for obj in objects: + if obj.privacy != privacy: + obj.privacy = privacy + to_update.append(obj) + if not to_update: + return 0 + + to_update = self.bulk_set_file_permissions(to_update, privacy, save_changes=False) + + count = self.bulk_update( + to_update, # type: ignore[arg-type] + fields=["_privacy", "privacy_last_changed", "file_permissions_last_set"], + ) + + if count and get_frontend_cache_configuration(): + sites = Site.objects.all() + serve_url_batch = PurgeBatch() + file_url_batch = PurgeBatch() + for obj in to_update: + serve_url_batch.add_urls(obj.get_privacy_controlled_serve_urls(sites)) + file_url_batch.add_urls(obj.get_privacy_controlled_file_urls()) + serve_url_batch.purge() + file_url_batch.purge() + return count + + def bulk_make_public(self, objects: Iterable["PrivateMediaMixin"]) -> int: + """Make an iterable of objects of this type 'public' as efficiently as + possible. Returns the number of objects that were actually updated in + the process. + """ + return self.bulk_set_privacy(objects, Privacy.PUBLIC) + + def bulk_make_private(self, objects: Iterable["PrivateMediaMixin"]) -> int: + """Make an iterable of objects of this type 'private' as efficiently as + possible. Returns the number of objects that were actually updated + in the process. + """ + return self.bulk_set_privacy(objects, Privacy.PRIVATE) + + def bulk_set_file_permissions( + self, objects: Iterable["PrivateMediaMixin"], privacy: Privacy, *, save_changes: bool = False + ) -> list["PrivateMediaMixin"]: + """For an iterable of objects of this type, set the file permissions for all + related files to reflect `privacy`. Returns a list of the provided objects, + with their `file_permissions_last_set` datetime updated if all related files + were successfully updated. + """ + files_by_object: dict[PrivateMediaMixin, list[FieldFile]] = defaultdict(list) + all_files = [] + objects = list(objects) + + for obj in objects: + for file in obj.get_privacy_controlled_files(): + all_files.append(file) + files_by_object[obj].append(file) + + results = bulk_set_file_permissions(all_files, privacy) + + now = timezone.now() + for obj in objects: + files = files_by_object[obj] + if all(results.get(file) for file in files): + obj.file_permissions_last_set = now + + if save_changes: + self.bulk_update(objects, ["file_permissions_last_set"]) # type: ignore[arg-type] + + return objects + + +class PrivateImageManager(PrivateMediaModelManager): + """A subclass of `PrivateMediaModelManager` that returns instances of + Wagtail's custom `ImageQuerySet`, which includes image-specific + filter methods other functionality that Wagtail itself depends on. + """ + + def get_queryset(self) -> ImageQuerySet: + """Return an `ImageQuerySet` instance.""" + return ImageQuerySet(self.model, using=self._db) + + +class PrivateDocumentManager(PrivateMediaModelManager): + """A subclass of `PrivateMediaModelManager` that returns instances of + Wagtail's custom `DocumentQuerySet`, which includes document-specific + filter methods other functionality that Wagtail itself depends on. + """ + + def get_queryset(self) -> DocumentQuerySet: + """Return a `DocumentQuerySet` instance.""" + return DocumentQuerySet(self.model, using=self._db) diff --git a/cms/private_media/models/__init__.py b/cms/private_media/models/__init__.py new file mode 100644 index 00000000..1e95900a --- /dev/null +++ b/cms/private_media/models/__init__.py @@ -0,0 +1,3 @@ +from .documents import PrivateDocumentMixin # noqa: F401 +from .images import AbstractPrivateRendition, PrivateImageMixin # noqa: F401 +from .mixins import PrivateMediaMixin # noqa: F401 diff --git a/cms/private_media/models/documents.py b/cms/private_media/models/documents.py new file mode 100644 index 00000000..3210cca7 --- /dev/null +++ b/cms/private_media/models/documents.py @@ -0,0 +1,35 @@ +from collections.abc import Iterable, Iterator +from typing import TYPE_CHECKING, ClassVar + +from cms.private_media.managers import PrivateDocumentManager + +from .mixins import PrivateMediaMixin + +if TYPE_CHECKING: + from django.db.models.fields.files import FieldFile + from wagtail.models import Site + + +class PrivateDocumentMixin(PrivateMediaMixin): + """A mixin class to be applied to a project's custom Document model, + allowing the privacy of related files to be controlled effectively. + """ + + objects: ClassVar[PrivateDocumentManager] = PrivateDocumentManager() + + class Meta: + abstract = True + + def get_privacy_controlled_files(self) -> Iterator["FieldFile"]: + file: FieldFile | None = getattr(self, "file", None) + if file: + yield file + + def get_privacy_controlled_serve_urls(self, sites: Iterable["Site"]) -> Iterator[str]: + """Return an iterator of fully-fledged serve URLs for this document, covering the domains for all + provided sites. + """ + if not sites: + return + for site in sites: + yield site.root_url + self.url # type: ignore[attr-defined] diff --git a/cms/private_media/models/images.py b/cms/private_media/models/images.py new file mode 100644 index 00000000..5613a374 --- /dev/null +++ b/cms/private_media/models/images.py @@ -0,0 +1,115 @@ +from collections.abc import Iterable, Iterator +from typing import TYPE_CHECKING, ClassVar + +from django.utils.functional import cached_property +from wagtail.images.models import AbstractImage, AbstractRendition + +from cms.private_media.bulk_operations import bulk_set_file_permissions +from cms.private_media.managers import PrivateImageManager + +from .mixins import PrivateMediaMixin + +if TYPE_CHECKING: + from django.db.models.fields.files import FieldFile + from wagtail.images.models import Filter + from wagtail.models import Site + + +class PrivateImageMixin(PrivateMediaMixin): + """A mixin class to be applied to a project's custom Image model, + allowing the privacy of related files to be controlled effectively. + """ + + objects: ClassVar[PrivateImageManager] = PrivateImageManager() + + class Meta: + abstract = True + + def get_privacy_controlled_files(self) -> Iterator["FieldFile"]: + file: FieldFile | None = getattr(self, "file", None) + if file: + yield file + for rendition in self.renditions.all(): # type: ignore[attr-defined] + rendition_file: FieldFile = rendition.file + yield rendition_file + + def create_renditions(self, *filters: "Filter") -> dict["Filter", AbstractRendition]: + """Create image renditions and set their privacy permissions. + + Args: + *filters: Filter objects defining the renditions to create + + Returns: + dict: Mapping of filters to their corresponding rendition objects + """ + created_renditions: dict[Filter, AbstractRendition] = super().create_renditions(*filters) # type: ignore[misc] + files = [r.file for r in created_renditions.values()] + bulk_set_file_permissions(files, self.privacy) + return created_renditions + + def get_privacy_controlled_serve_urls(self, sites: Iterable["Site"]) -> Iterator[str]: + """Return an iterator of fully-fledged serve URLs for this image, covering the domains for all + provided sites. + """ + renditions = self.renditions.all() # type: ignore[attr-defined] + for site in sites: + for rendition in renditions: + yield site.root_url + rendition.serve_url + + +class AbstractPrivateRendition(AbstractRendition): + """A replacement for Wagtail's built-in `AbstractRendition` model, that should be used as + a base for rendition models for image models subclassing `PrivateImageMixin`. This + is necessary to ensure that only users with relevant permissions can view renditions + for private images. + """ + + class Meta: + abstract = True + + @staticmethod + def construct_cache_key(image: "AbstractImage", filter_cache_key: str, filter_spec: str) -> str: + """Construct a cache key for the rendition that includes privacy status. + + Args: + image: The source image + filter_cache_key: The filter's cache key + filter_spec: The filter specification string + + Returns: + str: A unique cache key for the rendition + """ + return "wagtail-rendition-" + "-".join( + [ + str(image.id), + image.file_hash, + str(image.privacy), + filter_cache_key, + filter_spec, + ] + ) + + @property + def url(self) -> str: + """Get the URL for accessing the rendition. + + Returns a direct file URL for public images with up-to-date permissions, + or a permission-checking 'ServeView' URL for private or unprocessed images. + + Returns: + str: URL for accessing the rendition + """ + image: PrivateImageMixin = self.image # pylint: disable=no-member + if image.is_public and not image.has_outdated_file_permissions(): + file_url: str = self.file.url + return file_url + return self.serve_url + + @cached_property + def serve_url(self) -> str: + """Return a permission-checking 'ServeView' URL for this rendition.""" + from wagtail.images.views.serve import generate_image_url # pylint: disable=import-outside-toplevel + + image: PrivateImageMixin = self.image # pylint: disable=no-member + generated_url: str = generate_image_url(image, self.filter_spec) + return generated_url diff --git a/cms/private_media/models/mixins.py b/cms/private_media/models/mixins.py new file mode 100644 index 00000000..6ec00fe4 --- /dev/null +++ b/cms/private_media/models/mixins.py @@ -0,0 +1,122 @@ +import logging +from collections.abc import Iterable, Iterator +from typing import TYPE_CHECKING, Any, ClassVar + +from django.db import models +from django.utils import timezone + +from cms.private_media.bulk_operations import bulk_set_file_permissions +from cms.private_media.constants import Privacy +from cms.private_media.managers import PrivateMediaModelManager + +if TYPE_CHECKING: + from django.db.models.fields.files import FieldFile + from wagtail.models import Site + +logger = logging.getLogger(__name__) + + +class PrivateMediaMixin(models.Model): + """A mixin class for models that have files that need to remain private + until the object itself is no longer private. + + Subclasses must implement the `get_privacy_controlled_files()` method, + which should return an iterable of `FieldFile` objects that are managed + by the model instance. + + Where individual objects are updated (usually via the Wagtail UI), changes to + managed field values are written to the database by the overridden + `save()` method. + + Where multiple objects are updated at once (e.g. via a signal handler or + management command running on the server), changes to managed field values + are written to the database by the bulk update methods provided by + `PrivateMediaModelManager`. + """ + + _privacy = models.CharField(max_length=10, choices=Privacy.choices, default=Privacy.PRIVATE) + file_permissions_last_set = models.DateTimeField(editable=False, null=True) + privacy_last_changed = models.DateTimeField(editable=False, default=timezone.now) + + class Meta: + abstract = True + + objects: ClassVar[models.Manager] = PrivateMediaModelManager() + + @property + def privacy(self) -> Privacy: + """Return the privacy status of the object.""" + if isinstance(self._privacy, Privacy): + return self._privacy + for item in Privacy: + if item.value == self._privacy: + return item + raise ValueError(f"Invalid privacy value: {self._privacy}") + + @privacy.setter + def privacy(self, value: Privacy) -> None: + """Set the privacy status of the object and conditionally update the + privacy_last_changed timestamp if the privacy has changed. + """ + if self.privacy is not value: + self._privacy = value.value + self.privacy_last_changed = timezone.now() + + @property + def is_private(self) -> bool: + """Return True if the object is private.""" + return self.privacy is Privacy.PRIVATE + + @property + def is_public(self) -> bool: + """Return True if the object is public.""" + return self.privacy is Privacy.PUBLIC + + def save(self, *args: Any, set_file_permissions: bool = True, **kwargs: Any) -> None: + """Save the model instance and manage file permissions. + + Args: + set_file_permissions: If True, updates file permissions after saving + *args: Additional positional arguments passed to parent save method + **kwargs: Additional keyword arguments passed to parent save method + """ + # Save model field changes at this point + super().save(*args, **kwargs) + + # Trigger file permission updates after-the-fact + if set_file_permissions and self.has_outdated_file_permissions(): + results = bulk_set_file_permissions(self.get_privacy_controlled_files(), self.privacy) + # Only update 'file_permissions_last_set' if all updates were successfull + all_updates_successful = all(results.values()) + if all_updates_successful: + self.file_permissions_last_set = timezone.now() + kwargs.update(force_insert=False, update_fields=["file_permissions_last_set"]) + super().save(*args, **kwargs) + + def has_outdated_file_permissions(self) -> bool: + """Check if the file permissions are outdated relative to privacy changes.""" + return self.file_permissions_last_set is None or self.file_permissions_last_set < self.privacy_last_changed + + def get_privacy_controlled_files(self) -> Iterator["FieldFile"]: + """Return an Iterator of files that are managed by the model instance. + + Returns: + Iterator[FieldFile]: An Iterator of files managed by the instance + """ + raise NotImplementedError + + def get_privacy_controlled_serve_urls(self, sites: Iterable["Site"]) -> Iterator[str]: + """Return an iterator of fully-fledged serve URLs for this object, covering the domains for all provided + sites. It is the responsibility of the subclass to implement this method. + """ + raise NotImplementedError + + def get_privacy_controlled_file_urls(self) -> Iterator[str]: + """Return an iterator of fully-fledged file URLs for this object. + + NOTE: Will only return URLs that include a domain prefix (where the files are externally hosted). + """ + for file in self.get_privacy_controlled_files(): + url = file.url + if not url.startswith("/"): + yield url diff --git a/cms/private_media/signal_handlers.py b/cms/private_media/signal_handlers.py new file mode 100644 index 00000000..362cc3cc --- /dev/null +++ b/cms/private_media/signal_handlers.py @@ -0,0 +1,110 @@ +from typing import TYPE_CHECKING, Any + +from django.contrib.contenttypes.models import ContentType +from django.core.exceptions import ImproperlyConfigured +from django.db.models import CharField, IntegerField +from django.db.models.functions import Cast +from wagtail.models import Page, ReferenceIndex +from wagtail.signals import published, unpublished + +from cms.private_media.constants import Privacy +from cms.private_media.models import PrivateImageMixin +from cms.private_media.utils import get_private_media_models + +if TYPE_CHECKING: + from django.db.models import Model + + +def publish_media_on_publish(instance: "Model", **kwargs: Any) -> None: + """Signal handler to be connected to the 'page_published' and 'published' + signals for all publishable models. It is responsible for identifying any + privacy-controlled media used by the object, and ensuring that it is also + made public. + """ + for model_class in get_private_media_models(): + model_ct = ContentType.objects.get_for_model(model_class) + referenced_pks = ( + ReferenceIndex.get_references_for_object(instance) + .filter(to_content_type=model_ct) + .annotate(int_object_id=Cast("to_object_id", output_field=IntegerField())) + .values_list("int_object_id", flat=True) + .distinct() + ) + queryset = model_class.objects.filter(pk__in=referenced_pks, _privacy=Privacy.PRIVATE) + if issubclass(model_class, PrivateImageMixin): + queryset = queryset.prefetch_related("renditions") + + if hasattr(model_class.objects, "bulk_make_public"): + model_class.objects.bulk_make_public(queryset) + else: + raise ImproperlyConfigured( + f"The manager for {model_class.__name__} is missing a bulk_make_public() method implementation. " + "Did you override the manager class and forget to subclass PrivateMediaModelManager?", + ) + + +def unpublish_media_on_unpublish(instance: "Model", **kwargs: Any) -> None: + """Signal handler to be connected to the 'page_unpublished' and 'unpublished' + signals for all publishable models. It is responsible for identifying any + privacy-controlled media used solely by the object, and ensuring that it is + made private again. + """ + page_ct = ContentType.objects.get_for_model(Page) + for model_class in get_private_media_models(): + model_ct = ContentType.objects.get_for_model(model_class) + references = ReferenceIndex.get_references_for_object(instance).filter(to_content_type=model_ct) + referenced_pks = set(references.values_list("to_object_id", flat=True).distinct()) + if not referenced_pks: + continue + + # Identify references from other pages and extract their IDs, so that we can + # figure out which of those pages is live + referencing_page_ids = ( + ReferenceIndex.objects.filter(to_content_type=model_ct, to_object_id__in=referenced_pks) + .exclude(pk__in=references.values_list("pk", flat=True)) + .filter(base_content_type=page_ct) + .annotate(page_id=Cast("object_id", output_field=IntegerField())) + .values_list("page_id", flat=True) + .distinct() + ) + + # Out of the pages that are referencing the media, identify the ids of those + # that are currently live + live_page_ids = ( + Page.objects.filter(pk__in=referencing_page_ids) + .live() + .annotate(str_id=Cast("id", output_field=CharField())) + .values_list("str_id", flat=True) + ) + + # Now we can identify references from live pages only, and + # generate a list of media item ids that should not be made private + live_page_referenced_media_pks = ( + ReferenceIndex.objects.filter(to_content_type=model_ct, to_object_id__in=referenced_pks) + .filter(base_content_type=page_ct, object_id__in=live_page_ids) + .annotate(int_id=Cast("to_object_id", output_field=IntegerField())) + .values_list("int_id", flat=True) + .distinct() + ) + + queryset = ( + model_class.objects.all() + .filter(pk__in=[int(pk) for pk in referenced_pks], _privacy=Privacy.PUBLIC) + .exclude(pk__in=live_page_referenced_media_pks) + ) + if issubclass(model_class, PrivateImageMixin): + queryset = queryset.prefetch_related("renditions") + + if hasattr(model_class.objects, "bulk_make_private"): + model_class.objects.bulk_make_private(queryset) + else: + raise ImproperlyConfigured( + f"The manager for {model_class.__name__} is missing a bulk_make_private() method implementation. " + "Did you override the manager class and forget to subclass PrivateMediaModelManager?", + ) + + +def register_signal_handlers() -> None: + """Register signal handlers for models using the private media system.""" + published.connect(publish_media_on_publish, dispatch_uid="publish_media") + unpublished.connect(unpublish_media_on_unpublish, dispatch_uid="unpublish_media") diff --git a/cms/private_media/storages.py b/cms/private_media/storages.py new file mode 100644 index 00000000..c152220a --- /dev/null +++ b/cms/private_media/storages.py @@ -0,0 +1,86 @@ +import logging +from typing import TYPE_CHECKING + +from botocore.exceptions import ClientError +from django.core.files.storage import FileSystemStorage, InMemoryStorage +from storages.backends.s3 import S3Storage + +if TYPE_CHECKING: + from django.db.models.fields.files import FieldFile + + +logger = logging.getLogger(__name__) + + +class AccessControlledS3Storage(S3Storage): + # pylint: disable=abstract-method + private_acl_name = "private" + public_acl_name = "public-read" + + def make_private(self, file: "FieldFile") -> bool: + """Make the provided file private in S3.""" + return self._set_file_acl(file, self.private_acl_name) + + def make_public(self, file: "FieldFile") -> bool: + """Make the provided file publically readable in S3.""" + return self._set_file_acl(file, self.public_acl_name) + + def _set_file_acl(self, file: "FieldFile", acl_name: str) -> bool: + obj = self.bucket.Object(file.name) + try: + obj_acl = obj.Acl() + except ClientError: + logger.exception("Failed to retrieve ACL for %s", file.name) + return False + try: + obj_acl.put(ACL=acl_name) + except ClientError: + logger.exception("Failed to set ACL for %s", file.name) + return False + + logger.info("ACL set successfully for %s", file.name) + return True + + +class AccessControlLoggingFileSystemStorage(FileSystemStorage): + """A version of Django's `FileSystemStorage` backend for local development and tests, which logs + file-permission-setting requests, and always reports success. + """ + + def make_private(self, file: "FieldFile") -> bool: + """Pretend to make the provided file private.""" + logger.info("Skipping private file permission setting for '%s'.", file.name) + return True + + def make_public(self, file: "FieldFile") -> bool: + """Pretend to make the provided file public.""" + logger.info("Skipping public file permission setting for '%s'.", file.name) + return True + + +class ReliableAccessControlInMemoryStorage(InMemoryStorage): + """A version of Django's `InMemoryStorage` backend for unit tests, that always reports success + for file-permission-setting requests. + """ + + def make_private(self, file: "FieldFile") -> bool: # pylint: disable=unused-argument + """Report success in making the provided file private.""" + return True + + def make_public(self, file: "FieldFile") -> bool: # pylint: disable=unused-argument + """Report success in making the provided file public.""" + return True + + +class FlakyAccessControlInMemoryStorage(InMemoryStorage): + """A version of Django's `InMemoryStorage` backend for unit tests, that always reports failure + for file-permission-setting requests. + """ + + def make_private(self, file: "FieldFile") -> bool: # pylint: disable=unused-argument + """Report failure in making the provided file private.""" + return False + + def make_public(self, file: "FieldFile") -> bool: # pylint: disable=unused-argument + """Report failure in making the provided file public.""" + return False diff --git a/cms/private_media/tests/__init__.py b/cms/private_media/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cms/private_media/tests/test_documents.py b/cms/private_media/tests/test_documents.py new file mode 100644 index 00000000..4e677928 --- /dev/null +++ b/cms/private_media/tests/test_documents.py @@ -0,0 +1,318 @@ +from unittest import mock + +from django.contrib.auth import get_user_model +from django.test import SimpleTestCase, TestCase, override_settings +from wagtail.documents import get_document_model +from wagtail.models import Collection, Site +from wagtail_factories import DocumentFactory, SiteFactory + +from cms.private_media.constants import Privacy +from cms.private_media.managers import PrivateDocumentManager +from cms.private_media.models import PrivateDocumentMixin + +from .utils import PURGED_URLS + + +class TestModelConfiguration(SimpleTestCase): + def test_document_model_using_private_document_mixin(self): + """Verify that the document model correctly inherits from PrivateDocumentMixin. + And that the manager is a subclass of PrivateDocumentManager. + """ + document_model = get_document_model() + self.assertTrue(issubclass(document_model, PrivateDocumentMixin)) + self.assertIsInstance(document_model.objects, PrivateDocumentManager) + + +class TestDocumentModel(TestCase): + @classmethod + def setUpTestData(cls): + cls.root_collection = Collection.objects.get(depth=1) + + def test_private_document(self): + """Test the behaviour of private documents: + - Verify default privacy settings. + - Test privacy change tracking. + - Ensure file permission updates are handled correctly. + - Check file permission outdated status. + """ + with self.assertLogs("cms.private_media.storages", level="INFO") as logs: + document = DocumentFactory(collection=self.root_collection) + + # Documents should be 'private' by default + self.assertIs(document.privacy, Privacy.PRIVATE) + self.assertTrue(document.is_private) + self.assertFalse(document.is_public) + + # Attempts to set file permissions on save should have failed gracefully, + # since the default file backend doesn't support it + self.assertEqual( + logs.output, + [(f"INFO:cms.private_media.storages:Skipping private file permission setting for '{document.file.name}'.")], + ) + + # File permissions should be considered up-to-date + self.assertFalse(document.has_outdated_file_permissions()) + + # Setting privacy to the same value should not trigger an update to 'privacy_last_changed + value_before = document.privacy_last_changed + document.privacy = Privacy.PRIVATE + self.assertEqual(document.privacy_last_changed, value_before) + + # Setting privacy to a different value should triggered an update to 'privacy_last_changed' + document.privacy = Privacy.PUBLIC + self.assertGreater(document.privacy_last_changed, value_before) + + # File permissions should now be considered outdated + self.assertTrue(document.has_outdated_file_permissions()) + + # Resaving should trigger an update to file permissions and the 'file_permissions_last_set' + # timestamp, resolving the issue + document.save() + self.assertFalse(document.has_outdated_file_permissions()) + + def test_public_document(self): + """Test the behaviour of public documents: + - Verify public privacy settings. + - Test privacy change tracking. + - Ensure file permission updates are handled correctly. + - Check file permission outdated status. + """ + with self.assertLogs("cms.private_media.storages", level="INFO") as logs: + document = DocumentFactory(_privacy=Privacy.PUBLIC, collection=self.root_collection) + + # This document should be 'public' + self.assertIs(document.privacy, Privacy.PUBLIC) + self.assertTrue(document.is_public) + self.assertFalse(document.is_private) + + # Attempts to set file permissions on save should have failed gracefully, + # since the default file backend doesn't support it + self.assertEqual( + logs.output, + [(f"INFO:cms.private_media.storages:Skipping public file permission setting for '{document.file.name}'.")], + ) + + # File permissions should be considered up-to-date + self.assertFalse(document.has_outdated_file_permissions()) + + # Setting privacy to the same value should not trigger an update to 'privacy_last_changed + value_before = document.privacy_last_changed + document.privacy = Privacy.PUBLIC + self.assertEqual(document.privacy_last_changed, value_before) + + # Setting privacy to a different value should triggered an update to 'privacy_last_changed' + document.privacy = Privacy.PRIVATE + self.assertGreater(document.privacy_last_changed, value_before) + + # File permissions should now be considered outdated + self.assertTrue(document.has_outdated_file_permissions()) + + # Resaving should trigger an update to file permissions and the 'file_permissions_last_set' + # timestamp, resolving the issue + document.save() + self.assertFalse(document.has_outdated_file_permissions()) + + def test_invalid_privacy_value_raises_value_error(self): + """Verify that attempting to create a document with an invalid privacy value raises ValueError.""" + with self.assertRaises(ValueError): + DocumentFactory(_privacy="invalid", collection=self.root_collection) + + def test_get_privacy_controlled_serve_urls(self): + """Test the behaviour of PrivateImageMixin.get_privacy_controlled_serve_urls.""" + default_site = Site.objects.get(is_default_site=True) + site_2 = SiteFactory(hostname="foo.com", port=443) + sites = [default_site, site_2] + + document = DocumentFactory(collection=self.root_collection) + self.assertEqual( + list(document.get_privacy_controlled_serve_urls(sites)), + [ + f"http://localhost{document.url}", + f"https://foo.com{document.url}", + ], + ) + + # If no sites are provided, the result should be empty + with self.assertNumQueries(0): + self.assertFalse(list(document.get_privacy_controlled_serve_urls([]))) + + def test_get_privacy_controlled_file_urls(self): + """Test the behaviour of PrivateImageMixin.get_privacy_controlled_file_urls.""" + document = DocumentFactory(collection=self.root_collection) + + # FileSystemStorage returns relative URLs for files only, so the return value should be empty + self.assertFalse(list(document.get_privacy_controlled_file_urls())) + + # However, if the storage backend returns fully-fledged URLs, those values should be + # included in the return value + def domain_prefixed_url(name: str) -> str: + """Replacement for FileSystemStorage.url() that returns fully-fledged URLs.""" + return f"https://media.example.com/{name}" + + with mock.patch( + "django.core.files.storage.FileSystemStorage.url", + side_effect=domain_prefixed_url, + ): + self.assertEqual( + list(document.get_privacy_controlled_file_urls()), + [ + f"https://media.example.com/{document.file.name}", + ], + ) + + @override_settings( + STORAGES={"default": {"BACKEND": "cms.private_media.storages.AccessControlLoggingFileSystemStorage"}} + ) + def test_file_permission_setting_success(self): + """Test successful file permission setting using AccessControlLoggingFileSystemStorage: + - Verify permissions are set correctly for both public and private documents. + - Ensure no debug logs are generated during successful operation. + """ + with self.assertNoLogs("cms.private_media.bulk_operations"): + private_document = DocumentFactory(collection=self.root_collection) + public_document = DocumentFactory(_privacy=Privacy.PUBLIC, collection=self.root_collection) + + self.assertFalse(private_document.has_outdated_file_permissions()) + self.assertFalse(public_document.has_outdated_file_permissions()) + + @override_settings( + STORAGES={"default": {"BACKEND": "cms.private_media.storages.AccessControlLoggingFileSystemStorage"}} + ) + @mock.patch( + "cms.private_media.storages.AccessControlLoggingFileSystemStorage.make_private", + return_value=False, + ) + @mock.patch( + "cms.private_media.storages.AccessControlLoggingFileSystemStorage.make_public", + return_value=False, + ) + def test_file_permission_setting_failure(self, mock_make_public, mock_make_private): + """Test handling of file permission setting failures: + - Mock failed permission settings for both public and private documents. + - Verify documents are marked as having outdated permissions. + - Ensure correct storage methods are called. + """ + with self.assertNoLogs("cms.private_media.bulk_operations"): + private_document = DocumentFactory(collection=self.root_collection) + public_document = DocumentFactory(_privacy=Privacy.PUBLIC, collection=self.root_collection) + + mock_make_private.assert_called_once_with(private_document.file) + self.assertTrue(private_document.has_outdated_file_permissions()) + mock_make_public.assert_called_once_with(public_document.file) + self.assertTrue(public_document.has_outdated_file_permissions()) + + +@override_settings( + WAGTAILFRONTENDCACHE={ + "default": { + "BACKEND": "cms.private_media.tests.utils.MockFrontEndCacheBackend", + }, + } +) +class TestPrivateDocumentManager(TestCase): + model = get_document_model() + + @classmethod + def setUpTestData(cls): + cls.root_collection = Collection.objects.get(depth=1) + + def setUp(self): + # Create six documents (a mix of private and public) + self.private_documents = [] + self.public_documents = [] + for _ in range(3): + self.private_documents.append(DocumentFactory(collection=self.root_collection)) + self.public_documents.append(DocumentFactory(_privacy=Privacy.PUBLIC, collection=self.root_collection)) + PURGED_URLS.clear() + + def test_bulk_make_public(self): + """Test the behaviour of PrivateDocumentManager.bulk_make_public().""" + # Three documents are already public, so only three should be updated + with self.assertNumQueries(3): + # Query summary: + # 1. to fetch the documents + # 2. to save updates + # 3. to fetch sites to facilitate cache purging + self.assertEqual(self.model.objects.bulk_make_public(self.model.objects.all()), 3) + + # Serve URLs for private documents should have been purged as part of the update + for obj in self.private_documents: + self.assertIn("http://localhost" + obj.url, PURGED_URLS) + + # Verify all images are now public + for obj in self.model.objects.only("_privacy", "file_permissions_last_set", "privacy_last_changed"): + self.assertIs(obj.privacy, Privacy.PUBLIC) + self.assertFalse(obj.has_outdated_file_permissions()) + + # Another attempt should result in no updates + with self.assertNumQueries(1): + self.assertEqual(self.model.objects.bulk_make_public(self.model.objects.all()), 0) + + def test_bulk_make_private(self): + """Test the behaviour of PrivateDocumentManager.bulk_make_private().""" + # Three images are already private, so only three should be updated + with self.assertNumQueries(3): + # Query summary: + # 1. to fetch the documents + # 2. to save updates + # 3. to fetch sites to facilitate cache purging + self.assertEqual(self.model.objects.bulk_make_private(self.model.objects.all()), 3) + + # Serve URLs for public documents should have been purged as part of the update + for obj in self.public_documents: + self.assertIn("http://localhost" + obj.url, PURGED_URLS) + + # Verify all images are now private + for image in self.model.objects.only("_privacy", "file_permissions_last_set", "privacy_last_changed"): + self.assertIs(image.privacy, Privacy.PRIVATE) + self.assertFalse(image.has_outdated_file_permissions()) + + # Another attempt should result in no updates + with self.assertNumQueries(1): + # Query summary: + # 1. to fetch the documents + self.assertEqual(self.model.objects.bulk_make_private(self.model.objects.all()), 0) + + +class TestDocumentServeView(TestCase): + model = get_document_model() + + @classmethod + def setUpTestData(cls): + cls.root_collection = Collection.objects.get(depth=1) + cls.private_document = DocumentFactory(collection=cls.root_collection) + cls.public_document = DocumentFactory(_privacy=Privacy.PUBLIC, collection=cls.root_collection) + cls.superuser = get_user_model().objects.create(username="superuser", is_superuser=True) + + def test_serve_private_document(self): + """Test the serve view behaviour for private documents.""" + # If not authenticated, permission checks should fail and a Forbidden response returned + for is_external_env in [True, False]: + with self.subTest(is_external_env=is_external_env) and override_settings(IS_EXTERNAL_ENV=is_external_env): + response = self.client.get(self.private_document.url) + self.assertEqual(response.status_code, 403) + + # If authenticated as a superuser, the view should serve the files + self.client.force_login(self.superuser) + for is_external_env in [True, False]: + with self.subTest(is_external_env=is_external_env) and override_settings(IS_EXTERNAL_ENV=is_external_env): + response = self.client.get(self.private_document.url) + self.assertEqual(response.status_code, 200) + + def test_serve_public_document(self): + """Test the serve view behaviour for public documents.""" + # For public documents, the serve view should redirect to the file URL. + response = self.client.get(self.public_document.url) + self.assertEqual(response.status_code, 200) + + @override_settings(IS_EXTERNAL_ENV=True) + def test_serve_public_document_external_env(self): + self.test_serve_public_document() + + def test_serve_public_document_with_outdated_file_permissions(self): + """Test the serve view behaviour for public documents with outdated file permissions.""" + self.model.objects.filter(id=self.public_document.id).update(file_permissions_last_set=None) + self.public_document.refresh_from_db() + self.assertTrue(self.public_document.has_outdated_file_permissions()) + response = self.client.get(self.public_document.url) + self.assertEqual(response.status_code, 200) diff --git a/cms/private_media/tests/test_images.py b/cms/private_media/tests/test_images.py new file mode 100644 index 00000000..9cb1b8ff --- /dev/null +++ b/cms/private_media/tests/test_images.py @@ -0,0 +1,372 @@ +from unittest import mock + +from django.contrib.auth import get_user_model +from django.test import SimpleTestCase, TestCase, override_settings +from wagtail.images import get_image_model +from wagtail.images.models import Filter +from wagtail.models import Collection, Site +from wagtail_factories import ImageFactory, SiteFactory + +from cms.private_media.constants import Privacy +from cms.private_media.managers import PrivateImageManager +from cms.private_media.models import AbstractPrivateRendition, PrivateImageMixin + +from .utils import PURGED_URLS + + +class TestModelConfiguration(SimpleTestCase): + def test_image_model_using_private_image_mixin(self): + """Verify that the configured image model inherits from PrivateImageMixin.""" + image_model = get_image_model() + self.assertTrue(issubclass(image_model, PrivateImageMixin)) + self.assertIsInstance(image_model.objects, PrivateImageManager) + + def test_rendition_model_using_abstract_private_rendition(self): + """Verify that the configured rendition model inherits from AbstractPrivateRendition.""" + rendition_model = get_image_model().get_rendition_model() + self.assertTrue(issubclass(rendition_model, AbstractPrivateRendition)) + + +class TestImageModel(TestCase): + @classmethod + def setUpTestData(cls): + cls.root_collection = Collection.objects.get(depth=1) + + def test_private_image(self): + """Test the behaviour of private images: + - Verify that images are created as PRIVATE by default. + - Verify that file permission states are updated on save to reflect image privacy. + - Verify that privacy change tracking works correctly. + """ + # Attempts to set file permissions on save should have failed gracefully, + # since the default file backend doesn't support it + with self.assertLogs("cms.private_media.storages", level="INFO") as logs: + image = ImageFactory(collection=self.root_collection) + + # Images should be 'private' by default + self.assertIs(image.privacy, Privacy.PRIVATE) + self.assertTrue(image.is_private) + self.assertFalse(image.is_public) + + # Attempts to set file permissions on save should have failed gracefully + self.assertEqual( + logs.output, + [(f"INFO:cms.private_media.storages:Skipping private file permission setting for '{image.file.name}'.")], + ) + + # File permissions should be considered up-to-date + self.assertFalse(image.has_outdated_file_permissions()) + + # Setting privacy to the same value should not trigger an update to 'privacy_last_changed + value_before = image.privacy_last_changed + image.privacy = Privacy.PRIVATE + self.assertEqual(image.privacy_last_changed, value_before) + + # Setting privacy to a different value should triggered an update to 'privacy_last_changed' + image.privacy = Privacy.PUBLIC + self.assertGreater(image.privacy_last_changed, value_before) + + # File permissions should now be considered outdated + self.assertTrue(image.has_outdated_file_permissions()) + + # Resaving should trigger an update to file permissions and the 'file_permissions_last_set' + # timestamp, resolving the issue + image.save() + self.assertFalse(image.has_outdated_file_permissions()) + + def test_private_image_renditions(self): + """Test that private image renditions use serve URLs instead of direct file URLs.""" + image = ImageFactory(collection=self.root_collection) + renditions = image.create_renditions(Filter("fill-10x10"), Filter("fill-20x20")) + self.assertEqual(len(renditions), 2) + + # For private images, rendition.url should return a serve URL + for rendition in renditions.values(): + self.assertEqual(rendition.url, rendition.serve_url) + # And cache keys should include 'private' + self.assertIn("-private-", rendition.get_cache_key()) + + def test_public_image(self): + """Test the behaviour of public images. + - Verify that images can be created as PUBLIC. + - Verify that file permission states are updated on save to reflect image privacy. + - Verify that privacy change tracking works correctly. + - Graceful handling of unsupported storage backends. + """ + # Attempts to set file permissions on save should have failed gracefully, + # since the default file backend doesn't support it + with self.assertLogs("cms.private_media.storages", level="INFO") as logs: + image = ImageFactory(_privacy=Privacy.PUBLIC) + + # This image should be 'public' + self.assertIs(image.privacy, Privacy.PUBLIC) + self.assertTrue(image.is_public) + self.assertFalse(image.is_private) + + # Attempts to set file permissions on save should have failed gracefully + self.assertEqual( + logs.output, + [(f"INFO:cms.private_media.storages:Skipping public file permission setting for '{image.file.name}'.")], + ) + + # File permissions should be considered up-to-date + self.assertFalse(image.has_outdated_file_permissions()) + + # Setting privacy to the same value should not trigger an update to 'privacy_last_changed + value_before = image.privacy_last_changed + image.privacy = Privacy.PUBLIC + self.assertEqual(image.privacy_last_changed, value_before) + + # Setting privacy to a different value should triggered an update to 'privacy_last_changed' + image.privacy = Privacy.PRIVATE + self.assertGreater(image.privacy_last_changed, value_before) + + # File permissions should now be considered outdated + self.assertTrue(image.has_outdated_file_permissions()) + + # Resaving should trigger an update to file permissions and the 'file_permissions_last_set' + # timestamp, resolving the issue + image.save() + self.assertFalse(image.has_outdated_file_permissions()) + + def test_public_image_renditions(self): + """Test rendition.url behaviour for public image renditions: + - Public images with up-to-date permissions use direct file URLs. + - Falls back to serve URLs when permissions are outdated. + """ + image = ImageFactory(_privacy=Privacy.PUBLIC, collection=self.root_collection) + renditions = image.create_renditions(Filter("fill-10x10"), Filter("fill-20x20")) + self.assertEqual(len(renditions), 2) + + # For public images with up-to-date file permissions, rendition.url should return the file url + for rendition in renditions.values(): + self.assertEqual(rendition.url, rendition.file.url) + # And cache keys should include 'public' + self.assertIn("-public-", rendition.get_cache_key()) + + # However, if the file permissions are outdated, rendition.url should return a serve URL + with mock.patch( + "cms.private_media.models.PrivateMediaMixin.has_outdated_file_permissions", + return_value=True, + ): + for rendition in renditions.values(): + self.assertEqual(rendition.url, rendition.serve_url) + + def test_get_privacy_controlled_serve_urls(self): + """Test the behaviour of PrivateImageMixin.get_privacy_controlled_serve_urls.""" + default_site = Site.objects.get(is_default_site=True) + site_2 = SiteFactory(hostname="foo.com", port=443) + sites = [default_site, site_2] + + # If an image has no renditions, the result should be empty + image = ImageFactory(collection=self.root_collection) + self.assertFalse(list(image.get_privacy_controlled_serve_urls(sites))) + + # If an image has renditions, the result should be a list of serve URLs, prefixed + # with the root url for each supplied site + renditions = image.create_renditions(Filter("fill-10x10"), Filter("fill-20x20")) + expected_result = [] + for base_url in [site.root_url for site in sites]: + for rendition in renditions.values(): + expected_result.append(f"{base_url}{rendition.url}") + with self.assertNumQueries(1): + self.assertEqual(list(image.get_privacy_controlled_serve_urls(sites)), expected_result) + + # If no sites are provided, the result should be empty + with self.assertNumQueries(0): + self.assertFalse(list(image.get_privacy_controlled_serve_urls([]))) + + def test_get_privacy_controlled_file_urls(self): + """Test the behaviour of PrivateImageMixin.get_privacy_controlled_file_urls.""" + image = ImageFactory(collection=self.root_collection) + renditions = image.create_renditions(Filter("fill-10x10"), Filter("fill-20x20")) + + # FileSystemStorage returns relative URLs for files only, so the return value should be empty, + # even if renditions exist + self.assertFalse(list(image.get_privacy_controlled_file_urls())) + + # However, if the storage backend returns fully-fledged URLs, those values should be + # included in the return value + + def domain_prefixed_url(name: str) -> str: + """Replacement for FileSystemStorage.url() that returns fully-fledged URLs.""" + return f"https://media.example.com/{name}" + + with mock.patch( + "django.core.files.storage.FileSystemStorage.url", + side_effect=domain_prefixed_url, + ): + expected_result = [f"https://media.example.com/{image.file.name}"] + for rendition in renditions.values(): + expected_result.append(f"https://media.example.com/{rendition.file.name}") + self.assertEqual(list(image.get_privacy_controlled_file_urls()), expected_result) + + def test_invalid_privacy_value_raises_value_error(self): + """Verify that invalid privacy values raise a ValueError.""" + with self.assertRaises(ValueError): + ImageFactory(_privacy="invalid", collection=self.root_collection) + + @override_settings( + STORAGES={"default": {"BACKEND": "cms.private_media.storages.ReliableAccessControlInMemoryStorage"}} + ) + def test_file_permission_setting_success(self): + """Test successful file permission setting using a storage backend that supports it.""" + with self.assertNoLogs("cms.private_media.bulk_operations"): + private_image = ImageFactory(collection=self.root_collection) + public_image = ImageFactory(_privacy=Privacy.PUBLIC, collection=self.root_collection) + + self.assertFalse(private_image.has_outdated_file_permissions()) + self.assertFalse(public_image.has_outdated_file_permissions()) + + @override_settings( + STORAGES={"default": {"BACKEND": "cms.private_media.storages.FlakyAccessControlInMemoryStorage"}} + ) + def test_file_permission_setting_failure(self): + """Test graceful handling of file permission setting failures. + + Verifies that the system correctly tracks failed permission updates and + maintains the outdated state when storage operations fail. + """ + private_image = ImageFactory(collection=self.root_collection) + public_image = ImageFactory(_privacy=Privacy.PUBLIC, collection=self.root_collection) + self.assertTrue(private_image.has_outdated_file_permissions()) + self.assertTrue(public_image.has_outdated_file_permissions()) + + +@override_settings( + WAGTAILFRONTENDCACHE={ + "default": { + "BACKEND": "cms.private_media.tests.utils.MockFrontEndCacheBackend", + }, + } +) +class TestPrivateImageManager(TestCase): + model = get_image_model() + + @classmethod + def setUpTestData(cls): + cls.root_collection = Collection.objects.get(depth=1) + + def setUp(self): + # Create six images with renditions (a mix of private and public) + self.private_images = [] + self.public_images = [] + for _ in range(3): + private_image = ImageFactory(collection=self.root_collection) + public_image = ImageFactory(_privacy=Privacy.PUBLIC, collection=self.root_collection) + private_image.create_renditions(Filter("fill-10x10"), Filter("fill-20x20")) + public_image.create_renditions(Filter("fill-10x10"), Filter("fill-20x20")) + self.private_images.append(private_image) + self.public_images.append(public_image) + PURGED_URLS.clear() + + def test_bulk_make_public(self): + """Test the behaviour of PrivateImageManager.bulk_make_public().""" + # Three image are already public, so only three should be updated + qs = self.model.objects.all().prefetch_related("renditions") + with self.assertNumQueries(4): + # Query summary: + # 1. to fetch the images + # 2. to prefetch renditions + # 3. to save updates + # 4. to fetch sites to facilitate cache purging + self.assertEqual(self.model.objects.bulk_make_public(qs), 3) + + # Serve URLs for private image renditions should have been purged as part of the update + for obj in self.private_images: + for rendition in obj.renditions.all(): + self.assertIn("http://localhost" + rendition.url, PURGED_URLS) + + # Verify all images are now public + for obj in self.model.objects.only("_privacy", "file_permissions_last_set", "privacy_last_changed"): + self.assertIs(obj.privacy, Privacy.PUBLIC) + self.assertFalse(obj.has_outdated_file_permissions()) + + # Another attempt should result in no updates + with self.assertNumQueries(1): + # Query summary: + # 1. to fetch the images + self.assertEqual(self.model.objects.bulk_make_public(self.model.objects.all()), 0) + + def test_bulk_make_private(self): + """Test the behaviour of PrivateImageManager.bulk_make_private().""" + # Three images are already private, so only three should be updated + qs = self.model.objects.all().prefetch_related("renditions") + with self.assertNumQueries(4): + # Query summary: + # 1. to fetch the images + # 2. to prefetch renditions + # 3. to save updates + # 4. to fetch sites to facilitate cache purging + self.assertEqual(self.model.objects.bulk_make_private(qs), 3) + + # Serve URLs for public image renditions should have been purged as part of the update + for obj in self.public_images: + for rendition in obj.renditions.all(): + self.assertIn("http://localhost" + rendition.serve_url, PURGED_URLS) + + # Verify all images are now private + for image in self.model.objects.only("_privacy", "file_permissions_last_set", "privacy_last_changed"): + self.assertIs(image.privacy, Privacy.PRIVATE) + self.assertFalse(image.has_outdated_file_permissions()) + + # Another attempt should result in no updates + with self.assertNumQueries(1): + # Query summary: + # 1. to fetch the images + self.assertEqual(self.model.objects.bulk_make_private(self.model.objects.all()), 0) + + +class TestImageServeView(TestCase): + model = get_image_model() + + @classmethod + def setUpTestData(cls): + cls.root_collection = Collection.objects.get(depth=1) + cls.private_image = ImageFactory(collection=cls.root_collection) + cls.public_image = ImageFactory(_privacy=Privacy.PUBLIC, collection=cls.root_collection) + cls.private_image_renditions = cls.private_image.create_renditions(Filter("fill-10x10"), Filter("fill-20x20")) + cls.public_image_renditions = cls.public_image.create_renditions(Filter("fill-10x10"), Filter("fill-20x20")) + cls.superuser = get_user_model().objects.create(username="superuser", is_superuser=True) + + def test_serve_private_image(self): + """Test the serve view behaviour for private image renditions.""" + # If not authenticated, permission checks should fail and a Forbidden response returned + for rendition in self.private_image_renditions.values(): + for is_external_env in [True, False]: + with self.subTest(is_external_env=is_external_env) and override_settings( + IS_EXTERNAL_ENV=is_external_env + ): + response = self.client.get(rendition.serve_url) + self.assertEqual(response.status_code, 403) + + # If authenticated as a superuser, the view should serve the files + self.client.force_login(self.superuser) + for rendition in self.private_image_renditions.values(): + for is_external_env in [True, False]: + with self.subTest(is_external_env=is_external_env) and override_settings( + IS_EXTERNAL_ENV=is_external_env + ): + response = self.client.get(rendition.serve_url) + self.assertEqual(response.status_code, 200) + + def test_serve_public_image(self): + """Test the serve view behaviour for public image renditions.""" + # For public image renditions, the serve view should redirect to the file URL. + for rendition in self.public_image_renditions.values(): + response = self.client.get(rendition.serve_url) + self.assertEqual(response.status_code, 302) + + @override_settings(IS_EXTERNAL_ENV=True) + def test_serve_public_image_external_env(self): + """Test the serve view behaviour for public image renditions in an external environment.""" + self.test_serve_public_image() + + def test_serve_public_image_with_outdated_file_permissions(self): + """Test the serve view behaviour for public image renditions with outdated file permissions.""" + self.model.objects.filter(id=self.public_image.id).update(file_permissions_last_set=None) + self.public_image.refresh_from_db() + self.assertTrue(self.public_image.has_outdated_file_permissions()) + for rendition in self.public_image_renditions.values(): + response = self.client.get(rendition.serve_url) + self.assertEqual(response.status_code, 200) diff --git a/cms/private_media/tests/test_management_commands.py b/cms/private_media/tests/test_management_commands.py new file mode 100644 index 00000000..8751e200 --- /dev/null +++ b/cms/private_media/tests/test_management_commands.py @@ -0,0 +1,72 @@ +from datetime import timedelta +from io import StringIO + +from django.core.management import call_command +from django.test import TestCase +from django.utils import timezone +from wagtail.documents import get_document_model +from wagtail.images import get_image_model +from wagtail_factories import DocumentFactory, ImageFactory + +from cms.private_media.constants import Privacy + + +class RetryFilePermissionSetAttemptsCommandTests(TestCase): + command_name = "retry_file_permission_set_attempts" + + def setUp(self): + ImageFactory() + ImageFactory(_privacy=Privacy.PUBLIC) + DocumentFactory() + DocumentFactory(_privacy=Privacy.PUBLIC) + up_to_date_image = ImageFactory() + up_to_date_document = DocumentFactory() + + # Adjust `file_permissions_last_set` for all items but the 'up-to-date' ones + one_day_ago = timezone.now() - timedelta(days=1) + get_image_model().objects.exclude(id=up_to_date_image.id).update(file_permissions_last_set=one_day_ago) + get_document_model().objects.exclude(id=up_to_date_document.id).update(file_permissions_last_set=one_day_ago) + + def call_command(self, command_name: str, *args, **kwargs): + out = StringIO() + call_command( + command_name, + *args, + stdout=out, + stderr=StringIO(), + **kwargs, + ) + return out.getvalue() + + def test_default(self): + with self.assertNumQueries(7): + # Query summary: + # 1. Fetch outdated documents + # 2. Update 'file_permissions_last_set' for outdated private documents + # 3. Update 'file_permissions_last_set' for outdated public documents + # 4. Fetch outdated images + # 5. Fetch renditions for those images + # 6. Update 'file_permissions_last_set' for outdated private images + # 7. Update 'file_permissions_last_set' for outdated public images + output = self.call_command(self.command_name) + + self.assertIn("2 CustomDocument instances have outdated file permissions", output) + self.assertIn("File permissions successfully updated for 1 private custom documents.", output) + self.assertIn("File permissions successfully updated for 1 public custom documents.", output) + self.assertIn("2 CustomImage instances have outdated file permissions", output) + self.assertIn("File permissions successfully updated for 1 private custom images.", output) + self.assertIn("File permissions successfully updated for 1 public custom images.", output) + + def test_dry_run(self): + with self.assertNumQueries(2): + # Query summary: + # 1. Fetch outdated documents + # 2. Fetch outdated images + output = self.call_command(self.command_name, "--dry-run") + self.assertIn("This is a dry run.", output) + self.assertIn("2 CustomDocument instances have outdated file permissions", output) + self.assertIn("Would update file permissions for 1 private custom documents.", output) + self.assertIn("Would update file permissions for 1 public custom documents.", output) + self.assertIn("2 CustomImage instances have outdated file permissions", output) + self.assertIn("Would update file permissions for 1 private custom images.", output) + self.assertIn("Would update file permissions for 1 public custom images.", output) diff --git a/cms/private_media/tests/test_signal_handlers.py b/cms/private_media/tests/test_signal_handlers.py new file mode 100644 index 00000000..abe9655b --- /dev/null +++ b/cms/private_media/tests/test_signal_handlers.py @@ -0,0 +1,133 @@ +from typing import Any + +from django.test import TestCase +from wagtail.models import Site +from wagtail_factories import DocumentFactory, ImageFactory + +from cms.private_media.constants import Privacy +from cms.standard_pages.models import InformationPage + + +class SignalHandlersTestCase(TestCase): + @classmethod + def setUpTestData(cls): + cls.site = Site.objects.select_related("root_page").get() + cls.home_page = cls.site.root_page + cls.test_page = cls.make_information_page([]) + + @classmethod + def make_information_page(cls, content: list[dict[str, Any]]) -> InformationPage: + """Creates an information page with the provided content value and returns it.""" + instance = InformationPage(title="Test Information Page", summary="Test", content=content) + cls.home_page.add_child(instance=instance) + return instance + + def setUp(self): + self.private_image = ImageFactory() + self.public_image = ImageFactory(_privacy=Privacy.PUBLIC) + self.private_document = DocumentFactory() + self.public_document = DocumentFactory(_privacy=Privacy.PUBLIC) + + def generate_non_media_referencing_content(self) -> list[dict[str, Any]]: + """Returns a raw StreamField value that includes a heading.""" + return [{"type": "heading", "value": "Test heading"}] + + def generate_media_referencing_content(self) -> list[dict[str, Any]]: + """Returns a raw StreamField value that includes image and document blocks that + reference all media created in setUp(). + """ + return [ + {"type": "heading", "value": "Test heading"}, + {"type": "image", "value": self.private_image.id}, + {"type": "image", "value": self.public_image.id}, + { + "type": "documents", + "value": [ + { + "type": "document", + "value": {"document": self.private_document.id, "title": "Private document", "description": ""}, + }, + { + "type": "document", + "value": {"document": self.public_document.id, "title": "Public document", "description": ""}, + }, + ], + }, + ] + + def assertMediaPrivacy(self, expected_privacy: Privacy): # pylint: disable=invalid-name + """Checks that all media created in setup() has the expected privacy, after reloading field + data from the database. + """ + self.private_image.refresh_from_db() + self.assertEqual(self.private_image.privacy, expected_privacy) + self.public_image.refresh_from_db() + self.assertEqual(self.public_image.privacy, expected_privacy) + self.private_document.refresh_from_db() + self.assertEqual(self.private_document.privacy, expected_privacy) + self.public_document.refresh_from_db() + self.assertEqual(self.public_document.privacy, expected_privacy) + + def test_publishing_a_page_makes_referenced_media_public(self): + """Tests the impact of publishing a media-referencing page has on the privacy of referenced media.""" + self.test_page.content = self.generate_media_referencing_content() + + # Publishing the revision should result in all referenced media becoming public + self.test_page.save_revision().publish() + self.assertMediaPrivacy(Privacy.PUBLIC) + + def test_unpublishing_a_page_makes_referenced_media_private(self): + """Tests the impact of unpublishing a media-referencing page has on the privacy of referenced media.""" + self.test_page.content = self.generate_media_referencing_content() + self.test_page.live = True + self.test_page.save(update_fields=["content", "live"]) + + # Unpublishing the page should result in all referenced media becoming private, + # as it isn't referenced by anything else + self.test_page.unpublish() + self.assertMediaPrivacy(Privacy.PRIVATE) + + def test_page_without_references_to_media(self): + """Tests that pages without any referenced to media can still be published and unpublished without issue.""" + self.test_page.content = self.generate_non_media_referencing_content() + self.test_page.live = True + self.test_page.save(update_fields=["content", "live"]) + + self.test_page.unpublish() + + def test_unpublishing_a_page_when_media_is_referenced_by_a_draft_page(self): + """Tests the impact of unpublishing a page that references media that is referenced by another draft page.""" + # Publish the first page, so that the reference index is populated, then unpublish it again + self.test_page.content = self.generate_media_referencing_content() + self.test_page.save_revision().publish() + self.test_page.unpublish() + + # The media should be private, since it has no other references + self.assertMediaPrivacy(Privacy.PRIVATE) + + # Create another page that references the media and publish it + new_page = self.make_information_page(content=self.test_page.content) + new_page.save_revision().publish() + + # The media should be public again, since it is referenced by the new page + self.assertMediaPrivacy(Privacy.PUBLIC) + + # Unpublishing the new page SHOULD result in referenced media becoming private, + # because it is only referenced by draft pages + new_page.unpublish() + self.assertMediaPrivacy(Privacy.PRIVATE) + + def test_unpublishing_a_page_when_media_is_referenced_by_a_live_page(self): + """Tests the impact of unpublishing a page that references media that is referenced by another live page.""" + # Publish the first page, with references to media + self.test_page.content = self.generate_media_referencing_content() + self.test_page.save_revision().publish() + + # Create another page that references the media and publish it + new_page = self.make_information_page(content=self.test_page.content) + new_page.save_revision().publish() + + # Unpublishing the new page SHOULD NOT result in referenced media becoming private, + # because it is still referenced by the original live page + new_page.unpublish() + self.assertMediaPrivacy(Privacy.PUBLIC) diff --git a/cms/private_media/tests/test_storages.py b/cms/private_media/tests/test_storages.py new file mode 100644 index 00000000..08ed9084 --- /dev/null +++ b/cms/private_media/tests/test_storages.py @@ -0,0 +1,60 @@ +from unittest import mock + +from botocore.exceptions import ClientError +from django.conf import settings +from django.test import TestCase, override_settings +from moto import mock_aws +from wagtail_factories import DocumentFactory + +from cms.private_media.storages import AccessControlledS3Storage + + +@mock_aws +@override_settings(AWS_STORAGE_BUCKET_NAME="test-bucket") +class AccessControlledS3StorageTests(TestCase): + @classmethod + def setUpTestData(cls): + cls.document = DocumentFactory() + + def setUp(self): + super().setUp() + self.storage = AccessControlledS3Storage() + # Create a bucket to allow '_set_file_acl' to at least reach the 'put' stage + bucket = self.storage.connection.Bucket(settings.AWS_STORAGE_BUCKET_NAME) + bucket.create() + + def test_make_private_client_error_on_acl_set(self): + with self.assertLogs("cms.private_media.storages", level="ERROR") as logs: + self.assertFalse(self.storage.make_private(self.document.file)) + self.assertIn( + f"ERROR:cms.private_media.storages:Failed to set ACL for {self.document.file.name}", + logs.output[0], + ) + + def test_make_private_client_error_on_acl_get(self): + object_mock = mock.MagicMock() + object_mock.Acl.side_effect = ClientError({"Error": {"Code": "AccessDenied"}}, "GetObjectAcl") + # initialize the bucket to allow it to be patched + self.storage.bucket # noqa: B018 # pylint: disable=pointless-statement + with mock.patch.object(self.storage._bucket, "Object", return_value=object_mock): # pylint: disable=protected-access # noqa: SIM117 + with self.assertLogs("cms.private_media.storages", level="ERROR") as logs: + self.assertFalse(self.storage.make_private(self.document.file)) + self.assertIn( + f"ERROR:cms.private_media.storages:Failed to retrieve ACL for {self.document.file.name}", + logs.output[0], + ) + + def test_make_public_success(self): + acl_mock = mock.MagicMock() + acl_mock.put.return_value = True + object_mock = mock.MagicMock() + object_mock.Acl.return_value = acl_mock + # initialize the bucket to allow it to be patched + self.storage.bucket # noqa: B018 # pylint: disable=pointless-statement + with mock.patch.object(self.storage._bucket, "Object", return_value=object_mock): # pylint: disable=protected-access # noqa: SIM117 + with self.assertLogs("cms.private_media.storages", level="INFO") as logs: + self.assertTrue(self.storage.make_public(self.document.file)) + self.assertIn( + f"INFO:cms.private_media.storages:ACL set successfully for {self.document.file}", + logs.output, + ) diff --git a/cms/private_media/tests/utils.py b/cms/private_media/tests/utils.py new file mode 100644 index 00000000..01aa3044 --- /dev/null +++ b/cms/private_media/tests/utils.py @@ -0,0 +1,13 @@ +from collections.abc import Iterable + +from wagtail.contrib.frontend_cache.backends import BaseBackend + +PURGED_URLS = [] + + +class MockFrontEndCacheBackend(BaseBackend): + def purge(self, url: str) -> None: + PURGED_URLS.append(url) + + def purge_batch(self, urls: Iterable[str]) -> None: + PURGED_URLS.extend(urls) diff --git a/cms/private_media/utils.py b/cms/private_media/utils.py new file mode 100644 index 00000000..b4cc6794 --- /dev/null +++ b/cms/private_media/utils.py @@ -0,0 +1,21 @@ +from functools import cache +from typing import TYPE_CHECKING, Any + +from django.apps import apps +from django.conf import settings + +if TYPE_CHECKING: + from cms.private_media.models import PrivateMediaMixin + + +@cache +def get_private_media_models() -> list[type["PrivateMediaMixin"]]: + """Return all registered models that use the `PrivateMediaMixin` mixin.""" + from cms.private_media.models import PrivateMediaMixin # pylint: disable=import-outside-toplevel + + return [m for m in apps.get_models() if issubclass(m, PrivateMediaMixin)] + + +def get_frontend_cache_configuration() -> dict[str, Any]: + """Return the frontend cache configuration for this project.""" + return getattr(settings, "WAGTAILFRONTENDCACHE", {}) diff --git a/cms/private_media/views.py b/cms/private_media/views.py new file mode 100644 index 00000000..9776212a --- /dev/null +++ b/cms/private_media/views.py @@ -0,0 +1,34 @@ +from typing import TYPE_CHECKING + +from django.core.exceptions import PermissionDenied +from django.shortcuts import redirect +from wagtail.images.permissions import permission_policy +from wagtail.images.views.serve import ServeView + +if TYPE_CHECKING: + from django.http import HttpResponse + + from cms.private_media.models import AbstractPrivateRendition, PrivateImageMixin + + +class PrivateImageServeView(ServeView): + def serve(self, rendition: "AbstractPrivateRendition") -> "HttpResponse": + image: PrivateImageMixin = rendition.image + + # If there's no reason (within our control) for the file not to be served by + # media infrastructure, redirect + if image.is_public and not image.has_outdated_file_permissions(): + return redirect(rendition.file.url) + + # Block access to private images if the user has insufficient permissions + user = self.request.user + if image.is_private and ( + not user.is_authenticated + or not permission_policy.user_has_any_permission_for_instance(user, ["choose", "add", "change"], image) + ): + raise PermissionDenied + + # Serve the file until it is no longer private, or file permissions + # have been set successfully + response: HttpResponse = super().serve(rendition) + return response diff --git a/cms/private_media/wagtail_hooks.py b/cms/private_media/wagtail_hooks.py new file mode 100644 index 00000000..4f1842b3 --- /dev/null +++ b/cms/private_media/wagtail_hooks.py @@ -0,0 +1,22 @@ +from typing import TYPE_CHECKING + +from django.core.exceptions import PermissionDenied +from wagtail import hooks +from wagtail.documents.permissions import permission_policy + +if TYPE_CHECKING: + from django.http import HttpRequest + + from cms.private_media.models import PrivateDocumentMixin + + +@hooks.register("before_serve_document") +def protect_private_documents(document: "PrivateDocumentMixin", request: "HttpRequest") -> None: + """Block access to private documents if the user has insufficient permissions.""" + if document.is_private and ( + not request.user.is_authenticated + or not permission_policy.user_has_any_permission_for_instance( + request.user, ["choose", "add", "change"], document + ) + ): + raise PermissionDenied diff --git a/cms/settings/base.py b/cms/settings/base.py index 321c5280..a506da4f 100644 --- a/cms/settings/base.py +++ b/cms/settings/base.py @@ -62,6 +62,7 @@ "cms.documents", "cms.home", "cms.images", + "cms.private_media", "cms.release_calendar", "cms.themes", "cms.topics", @@ -330,7 +331,7 @@ # http://whitenoise.evans.io/en/stable/#quickstart-for-django-apps # https://docs.djangoproject.com/en/stable/ref/settings/#std-setting-STORAGES STORAGES = { - "default": {"BACKEND": "django.core.files.storage.FileSystemStorage"}, + "default": {"BACKEND": "cms.private_media.storages.AccessControlLoggingFileSystemStorage"}, "staticfiles": {"BACKEND": "whitenoise.storage.CompressedManifestStaticFilesStorage"}, } @@ -392,7 +393,7 @@ INSTALLED_APPS += ["storages", "wagtail_storages"] # https://docs.djangoproject.com/en/stable/ref/settings/#std-setting-STORAGES - STORAGES["default"]["BACKEND"] = "storages.backends.s3.S3Storage" + STORAGES["default"]["BACKEND"] = "cms.private_media.storages.AccessControlledS3Storage" AWS_STORAGE_BUCKET_NAME = env["AWS_STORAGE_BUCKET_NAME"] @@ -430,6 +431,7 @@ # https://github.com/jschneier/django-storages/blob/10d1929de5e0318dbd63d715db4bebc9a42257b5/storages/backends/s3boto3.py#L217 AWS_S3_URL_PROTOCOL = env.get("AWS_S3_URL_PROTOCOL", "https:") +PRIVATE_MEDIA_BULK_UPDATE_MAX_WORKERS = env.get("PRIVATE_MEDIA_BULK_UPDATE_MAX_WORKERS", 5) # Logging # This logging is configured to be used with Sentry and console logs. Console diff --git a/cms/urls.py b/cms/urls.py index 73dd109a..b8f3c1af 100644 --- a/cms/urls.py +++ b/cms/urls.py @@ -2,7 +2,7 @@ from django.apps import apps from django.conf import settings -from django.urls import include, path +from django.urls import include, path, re_path from django.views.decorators.cache import never_cache from django.views.decorators.vary import vary_on_headers from django.views.generic import TemplateView @@ -13,6 +13,7 @@ from cms.core import views as core_views from cms.core.cache import get_default_cache_control_decorator +from cms.private_media.views import PrivateImageServeView if TYPE_CHECKING: from django.urls import URLPattern, URLResolver @@ -101,6 +102,7 @@ + debug_urlpatterns + urlpatterns + [ + re_path(r"^images/([^/]*)/(\d*)/([^/]*)/[^/]*$", PrivateImageServeView.as_view(), name="wagtailimages_serve"), # Add Wagtail URLs at the end. # Wagtail cache-control is set on the page models' serve methods path("", include(wagtail_urls)), diff --git a/cms/users/models.py b/cms/users/models.py index 40ebe675..2da2ce5d 100644 --- a/cms/users/models.py +++ b/cms/users/models.py @@ -1,5 +1,5 @@ from django.contrib.auth.models import AbstractUser -class User(AbstractUser): # type: ignore[django-manager-missing] +class User(AbstractUser): """Barebones custom user model.""" diff --git a/docs/custom-features/media_privacy.md b/docs/custom-features/media_privacy.md new file mode 100644 index 00000000..438d468a --- /dev/null +++ b/docs/custom-features/media_privacy.md @@ -0,0 +1,97 @@ +# Media privacy + +This project uses customised versions of Wagtail's `Image` and `Document` models to store and manage the +privacy of uploaded media files, so that potentially sensitive media isn't leaked to the public before it's ready. + +## The basic premise + +1. Images and documents are 'private' by default, and will only be served to authenticated users with some level of permission on the media item. +2. They only become 'public' when a page referencing them is published. +3. When a live page unpublished, any images or documents referenced soley by that page (i.e. not used on any other live pages) are made private again. + +When a media item is 'private', file requests are handled by views that check the permissions of the requesting user, and only serve the file if the user has some level of permission on the media item (to support previews and reviews from other editors). + +When a media item is 'public', permission-checking in serve views is skipped, and the user is either redirected to the media storage URL (in the case of images), or served the file directly (in the case of documents). + +These cases (and more) are covered by integration tests in [cms/private_media/tests/test_signal_handlers.py](https://github.com/ONSdigital/dis-wagtail/tree/main/cms/private_media/tests/test_signal_handlers.py). + +## Setting of file-level permissions + +In environments where the storage service supports it (e.g. hosted environments using S3 for media storage), attempts are made to set file-level permissions to reflect the privacy of the media item. This happens automatically when an image, rendition, or document is saved for the first time, and also when its privacy is altered as the result of another action (e.g. a referencing page being published or unpublished). + +The responsibility of setting file-level permissions in hosted environments falls to the `cms.private_media.storages.AccessControlledS3Storage` class, which implements the `make_public()` and `make_private()` methods. + +In local dev environments, the permission-setting attempts themselves are skipped, but log entries are generated with the level 'INFO' in place of each attempt, so you can get a sense of what would happen in a hosted environment, for example: + +``` +INFO:cms.private_media.storages:Skipping file permission setting for '/media/images/2024/12/09/image.jpg'. +``` + +Whenever the privacy of a media item is altered, it's `privacy_last_changed` timestamp field is updated and saved to the database alongside other changes. +If no errors occurred during the file-updating process, the media item's `file_permissions_last_set` timestamp will also be updated and saved. Because the file-permission setting happens later in the update process, the `file_permissions_last_set` timestamp should always be greater than the `privacy_last_changed` value when no errors occurred. + +### Performance considerations + +Because media item privacy is often updated in-bulk (e.g. making all images referenced by a page public when it's published), and each media item can have multiple files associated with it (e.g. multiple renditions of an image, each with it's own file), file-level permission setting has to be as performant as possible, and not choke if errors occur. + +Since media-hosting services don't provide a way to set file-level permissions in bulk, the best we can do is to make sure the individual requests block the process as little as possible, so we use a `ThreadPoolExecutor` to run them in parallel. + +If an error occurred whilst attempting to get or set the permissions for a particular file, the error is logged, but processing continues with the next item in the sequence. + +### Tracking of unsuccesful permission-setting attempts + +Whilst file-permission setting requests are quite reliable in S3, they can fail occasionally (as with any web service). + +Because a media item's `file_permissions_last_set` timestamp is only updated when all file-permission setting attempts were successful, for media items with outdated file permissions, the `file_permissions_last_set` timestamp will trail behind the `privacy_last_changed` value. + +For an individual object, the `has_outdated_file_permissions()` method will return `True` if the `file_permissions_last_set` timestamp is eariler than `privacy_last_changed`. This is used in a few places to vary the behaviour. Specifically, the `href` value for images will continue to point to the media serve view, so that it can still be served to users. The image serve view also checks it before redirecting users to the direct file URL. + +The timestamp values can also be used to identify affected media items in bulk. This is the approach taken by the `retry_file_permission_set_attempts` management command, which runs regularly (every 5 minutes) in hosted environments to help keep file permissions up-to-date. It uses the same bulk-processing logic as the signal handlers, so will retry failed requests, mark any items that are successfully updated, and log any errors that occured. + +## Use of Wagtail's reference index + +Wagtail's [reference index](https://docs.wagtail.org/en/stable/advanced_topics/reference_index.html) is used to track when images and documents (and other objects) are referenced by pages (and other objects). + +The reference index is automatically populated via handlers connected to the `post_save` signal for all registered models. + +When publishing or unpublishing a page (or other type of 'publishable' object), object-level changes are saved before any `published`, or `unpublished` or signals are emitted. So, as long as we use these signals to review the privacy +of referenced media, we can be sure that the reference index will be up-to-date. + +## Compatibility with upstream caching + +Because serve view responses and direct file urls can be cached, it's important that when the privacy of a media item changes, the cache +is invalidated for any affected URLs. In environments where Wagtail's front-end-cache app is configured, all relevant URLs should be collected into a batch and sent to the relevant backend to initiate purging. + +## Adding private media support to a new model + +To add private media support to a new model, simply include the `PrivateMediaMixin` mixin in the model's parent classes, and then implement a `get_privacy_controlled_files()` method which returns the values of all of the file field values you wish to protect. + +For example, if you had a `Chart` snippet model with `thumbnail_image` and `data_csv` fields that you wanted to protect, the final class definition might look like this: + +```python + +from typing import TYPE_CHECKING, Iterator +from django.db import models +from wagtail.snippets.models import register_snippet + +from cms.private_media.models.images import PrivateMediaMixin + + +if TYPE_CHECKING: + from django.db.models.fields.files import FieldFile + + +@register_snippet +class Chart(PrivateMediaMixin, models.Model): + ... + thumbnail_image = models.ImageField() + data_csv = models.FileField() + + def get_privacy_controlled_files(self) -> Iterator["FieldFile"]: + """Return an iterator of the files that should be protected.""" + yield self.thumbnail_image, yield self.data_csv +``` + +Depending on the type of model, it might also be necessary to [register the model with Wagtail's reference index](https://docs.wagtail.org/en/v6.3.1/advanced_topics/reference_index.html) (Though, this shouldn't be neccessary for snippets). + +The default model manager inherited from `PrivateMediaMixin` implements all of the methods required for keeping privacy up-to-date, and the existing signal handlers that listen for content changes will start looking for references to your model instances, and trigger privacy updates accordingly. diff --git a/docs/index.md b/docs/index.md index 19b484fa..fe19c980 100644 --- a/docs/index.md +++ b/docs/index.md @@ -13,6 +13,7 @@ To work on this project, see the [README](../README.md). Any unique/custom features of note are listed here. Please keep this list updated as more features are added. - [Migration-friendly StreamFields](custom-features/migration_friendly_streamfields.md) +- [Media privacy](custom-features/media_privacy.md) ## External integrations diff --git a/poetry.lock b/poetry.lock index a9f63fa4..babf3673 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. [[package]] name = "anyascii" @@ -1234,7 +1234,9 @@ boto3 = ">=1.9.201" botocore = ">=1.14.0,<1.35.45 || >1.35.45,<1.35.46 || >1.35.46" cryptography = ">=3.3.1" Jinja2 = ">=2.10.1" +py-partiql-parser = {version = "0.5.6", optional = true, markers = "extra == \"s3\""} python-dateutil = ">=2.1,<3.0.0" +PyYAML = {version = ">=5.1", optional = true, markers = "extra == \"s3\""} requests = ">=2.5" responses = ">=0.15.0" werkzeug = ">=0.5,<2.2.0 || >2.2.0,<2.2.1 || >2.2.1" @@ -1583,6 +1585,20 @@ urwid-readline = "*" [package.extras] completion = ["shtab"] +[[package]] +name = "py-partiql-parser" +version = "0.5.6" +description = "Pure Python PartiQL Parser" +optional = false +python-versions = "*" +files = [ + {file = "py_partiql_parser-0.5.6-py2.py3-none-any.whl", hash = "sha256:622d7b0444becd08c1f4e9e73b31690f4b1c309ab6e5ed45bf607fe71319309f"}, + {file = "py_partiql_parser-0.5.6.tar.gz", hash = "sha256:6339f6bf85573a35686529fc3f491302e71dd091711dfe8df3be89a93767f97b"}, +] + +[package.extras] +dev = ["black (==22.6.0)", "flake8", "mypy", "pytest"] + [[package]] name = "pycparser" version = "2.22" @@ -2368,4 +2384,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "45b7aa48a3120408add8c87d4b8cf3b97d669ec131b863319f741b0c719d4c12" +content-hash = "16ce0aabd64d44ee8239045a03843a46ab6713782186e3dd7add2b921be5c985" diff --git a/pyproject.toml b/pyproject.toml index 59a767bf..93f304f7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,7 +53,7 @@ Werkzeug = "~3.0.6" setuptools = "^75.2.0" honcho = "^2.0.0" detect-secrets = "^1.5.0" -moto = {extras = ["iam"], version = "^5.0.21"} +moto = {version = "^5.0.22", extras = ["s3","iam"]} [tool.ruff] target-version = "py312"