Skip to content

Commit

Permalink
✨(backend) add url to download media attachments with access rights
Browse files Browse the repository at this point in the history
We make use of nginx subrequests to block media file downloads while
we check for access rights. The request is then proxied to the object
storage engine and authorization is added via the "Authorization"
header. This way the media urls are static and can be stored in the
document's json content without compromising on security: access
control is done on all requests based on the user cookie session.
  • Loading branch information
sampaccoud committed Aug 27, 2024
1 parent c9f1356 commit 67a20f2
Show file tree
Hide file tree
Showing 12 changed files with 514 additions and 8 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0),
and this project adheres to
[Semantic Versioning](https://semver.org/spec/v2.0.0.html).

# Added

- ✨Add image attachments with access control

## [Unreleased]


Expand Down
33 changes: 33 additions & 0 deletions src/backend/core/api/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""Util to generate S3 authorization headers for object storage access control"""

from django.core.files.storage import default_storage

import botocore


def generate_s3_authorization_headers(key):
"""
Generate authorization headers for an s3 object.
These headers can be used as an alternative to signed urls with many benefits:
- the urls of our files never expire and can be stored in our documents' content
- we don't leak authorized urls that could be shared (file access can only be done
with cookies)
- access control is truly realtime
- the object storage service does not need to be exposed on internet
"""
url = default_storage.unsigned_connection.meta.client.generate_presigned_url(
"get_object",
ExpiresIn=0,
Params={"Bucket": default_storage.bucket_name, "Key": key},
)
request = botocore.awsrequest.AWSRequest(method="get", url=url)

s3_client = default_storage.connection.meta.client
# pylint: disable=protected-access
credentials = s3_client._request_signer._credentials # noqa: SLF001
frozen_credentials = credentials.get_frozen_credentials()
region = s3_client.meta.region_name
auth = botocore.auth.S3SigV4Auth(frozen_credentials, "s3", region)
auth.add_auth(request)

return request
56 changes: 55 additions & 1 deletion src/backend/core/api/viewsets.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""API endpoints"""

import os
import re
import uuid
from urllib.parse import urlparse

from django.conf import settings
from django.contrib.postgres.aggregates import ArrayAgg
Expand Down Expand Up @@ -30,7 +32,17 @@
from core import models
from core.utils import email_invitation

from . import permissions, serializers
from . import permissions, serializers, utils

ATTACHMENTS_FOLDER = "attachments"
UUID_REGEX = (
r"[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}"
)
FILE_EXT_REGEX = r"\.[a-zA-Z]{3,4}"
MEDIA_URL_PATTERN = re.compile(
f"{settings.MEDIA_URL:s}({UUID_REGEX:s})/"
f"({ATTACHMENTS_FOLDER:s}/{UUID_REGEX:s}{FILE_EXT_REGEX:s})$"
)

# pylint: disable=too-many-ancestors

Expand Down Expand Up @@ -422,6 +434,48 @@ def attachment_upload(self, request, *args, **kwargs):
{"file": f"{settings.MEDIA_URL:s}{key:s}"}, status=status.HTTP_201_CREATED
)

@decorators.action(detail=False, methods=["get"], url_path="retrieve-auth")
def retrieve_auth(self, request, *args, **kwargs):
"""
This view is used by an Nginx subrequest to control access to a document's
attachment file.
The original url is passed by nginx in the "HTTP_X_ORIGINAL_URL" header.
See corresponding ingress configuration in Helm chart and read about the
nginx.ingress.kubernetes.io/auth-url annotation to understand how the Nginx ingress
is configured to do this.
Based on the original url and the logged in user, we must decide if we authorize Nginx
to let this request go through (by returning a 200 code) or if we block it (by returning
a 403 error). Note that we return 403 errors without any further details for security
reasons.
When we let the request go through, we compute authorization headers that will be added to
the request going through thanks to the nginx.ingress.kubernetes.io/auth-response-headers
annotation. The request will then be proxied to the object storage backend who will
respond with the file after checking the signature included in headers.
"""
original_url = urlparse(request.META.get("HTTP_X_ORIGINAL_URL"))
match = MEDIA_URL_PATTERN.search(original_url.path)

try:
pk, attachment_key = match.groups()
except AttributeError as excpt:
raise exceptions.PermissionDenied() from excpt

# Check permission
try:
document = models.Document.objects.get(pk=pk)
except models.Document.DoesNotExist as excpt:
raise exceptions.PermissionDenied() from excpt

if not document.get_abilities(request.user).get("retrieve", False):
raise exceptions.PermissionDenied()

# Generate authorization headers and return an authorization to proceed with the request
request = utils.generate_s3_authorization_headers(f"{pk:s}/{attachment_key:s}")
return drf_response.Response("authorized", headers=request.headers, status=200)


class DocumentAccessViewSet(
ResourceAccessViewsetMixin,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,7 @@ def test_api_documents_attachment_upload_reader(via, mock_user_get_teams):

@pytest.mark.parametrize("role", ["editor", "administrator", "owner"])
@pytest.mark.parametrize("via", VIA)
def test_api_documents_attachment_upload_success(
via, role, mock_user_get_teams, settings
):
def test_api_documents_attachment_upload_success(via, role, mock_user_get_teams):
"""
Editors, administrators and owners of a document should be able to upload an attachment.
"""
Expand All @@ -130,7 +128,7 @@ def test_api_documents_attachment_upload_success(

assert response.status_code == 201

pattern = re.compile(rf"^{settings.MEDIA_URL}{document.id!s}/attachments/(.*)\.jpg")
pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.jpg")
match = pattern.search(response.json()["file"])
file_id = match.group(1)

Expand Down
213 changes: 213 additions & 0 deletions src/backend/core/tests/documents/test_api_documents_retrieve_auth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
"""
Test file uploads API endpoint for users in impress's core app.
"""

import uuid
from io import BytesIO
from urllib.parse import urlparse

from django.conf import settings
from django.core.files.storage import default_storage
from django.utils import timezone

import pytest
import requests
from rest_framework.test import APIClient

from core import factories
from core.tests.conftest import TEAM, USER, VIA

pytestmark = pytest.mark.django_db


def test_api_documents_retrieve_auth_anonymous_public():
"""Anonymous users should be able to retrieve attachments linked to a public document"""
document = factories.DocumentFactory(is_public=True)

filename = f"{uuid.uuid4()!s}.jpg"
key = f"{document.pk!s}/attachments/{filename:s}"

default_storage.connection.meta.client.put_object(
Bucket=default_storage.bucket_name,
Key=key,
Body=BytesIO(b"my prose"),
ContentType="text/plain",
)

original_url = f"http://localhost/media/{key:s}"
response = APIClient().get(
"/api/v1.0/documents/retrieve-auth/", HTTP_X_ORIGINAL_URL=original_url
)

assert response.status_code == 200

authorization = response["Authorization"]
assert "AWS4-HMAC-SHA256 Credential=" in authorization
assert (
"SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature="
in authorization
)
assert response["X-Amz-Date"] == timezone.now().strftime("%Y%m%dT%H%M%SZ")

s3_url = urlparse(settings.AWS_S3_ENDPOINT_URL)
file_url = f"{settings.AWS_S3_ENDPOINT_URL:s}/impress-media-storage/{key:s}"
response = requests.get(
file_url,
headers={
"authorization": authorization,
"x-amz-date": response["x-amz-date"],
"x-amz-content-sha256": response["x-amz-content-sha256"],
"Host": f"{s3_url.hostname:s}:{s3_url.port:d}",
},
timeout=1,
)
assert response.content.decode("utf-8") == "my prose"


def test_api_documents_retrieve_auth_anonymous_not_public():
"""
Anonymous users should not be allowed to retrieve attachments linked to a document
that is not public.
"""
document = factories.DocumentFactory(is_public=False)

filename = f"{uuid.uuid4()!s}.jpg"
media_url = f"http://localhost/media/{document.pk!s}/attachments/{filename:s}"

response = APIClient().get(
"/api/v1.0/documents/retrieve-auth/", HTTP_X_ORIGINAL_URL=media_url
)

assert response.status_code == 403
assert "Authorization" not in response


def test_api_documents_retrieve_auth_authenticated_public():
"""
Authenticated users who are not related to a document should be able to
retrieve attachments linked to a public document.
"""
document = factories.DocumentFactory(is_public=True)

user = factories.UserFactory()
client = APIClient()
client.force_login(user)

filename = f"{uuid.uuid4()!s}.jpg"
key = f"{document.pk!s}/attachments/{filename:s}"

default_storage.connection.meta.client.put_object(
Bucket=default_storage.bucket_name,
Key=key,
Body=BytesIO(b"my prose"),
ContentType="text/plain",
)

original_url = f"http://localhost/media/{key:s}"
response = APIClient().get(
"/api/v1.0/documents/retrieve-auth/", HTTP_X_ORIGINAL_URL=original_url
)

assert response.status_code == 200

authorization = response["Authorization"]
assert "AWS4-HMAC-SHA256 Credential=" in authorization
assert (
"SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature="
in authorization
)
assert response["X-Amz-Date"] == timezone.now().strftime("%Y%m%dT%H%M%SZ")

s3_url = urlparse(settings.AWS_S3_ENDPOINT_URL)
file_url = f"{settings.AWS_S3_ENDPOINT_URL:s}/impress-media-storage/{key:s}"
response = requests.get(
file_url,
headers={
"authorization": authorization,
"x-amz-date": response["x-amz-date"],
"x-amz-content-sha256": response["x-amz-content-sha256"],
"Host": f"{s3_url.hostname:s}:{s3_url.port:d}",
},
timeout=1,
)
assert response.content.decode("utf-8") == "my prose"


def test_api_documents_retrieve_auth_authenticated_not_public():
"""
Authenticated users who are not related to a document should not be allowed to
retrieve attachments linked to a document that is not public.
"""
document = factories.DocumentFactory(is_public=False)

user = factories.UserFactory()
client = APIClient()
client.force_login(user)

filename = f"{uuid.uuid4()!s}.jpg"
media_url = f"http://localhost/media/{document.pk!s}/attachments/{filename:s}"

response = APIClient().get(
"/api/v1.0/documents/retrieve-auth/", HTTP_X_ORIGINAL_URL=media_url
)

assert response.status_code == 403
assert "Authorization" not in response


@pytest.mark.parametrize("is_public", [True, False])
@pytest.mark.parametrize("via", VIA)
def test_api_documents_retrieve_auth_related(via, is_public, mock_user_get_teams):
"""
Users who have a role on a document, whatever the role, should be able to
retrieve related attachments.
"""
user = factories.UserFactory()
client = APIClient()
client.force_login(user)

document = factories.DocumentFactory(is_public=is_public)
if via == USER:
factories.UserDocumentAccessFactory(document=document, user=user)
elif via == TEAM:
mock_user_get_teams.return_value = ["lasuite", "unknown"]
factories.TeamDocumentAccessFactory(document=document, team="lasuite")

filename = f"{uuid.uuid4()!s}.jpg"
key = f"{document.pk!s}/attachments/{filename:s}"

default_storage.connection.meta.client.put_object(
Bucket=default_storage.bucket_name,
Key=key,
Body=BytesIO(b"my prose"),
ContentType="text/plain",
)

original_url = f"http://localhost/media/{key:s}"
response = client.get(
"/api/v1.0/documents/retrieve-auth/", HTTP_X_ORIGINAL_URL=original_url
)

assert response.status_code == 200

authorization = response["Authorization"]
assert "AWS4-HMAC-SHA256 Credential=" in authorization
assert (
"SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature="
in authorization
)
assert response["X-Amz-Date"] == timezone.now().strftime("%Y%m%dT%H%M%SZ")

s3_url = urlparse(settings.AWS_S3_ENDPOINT_URL)
file_url = f"{settings.AWS_S3_ENDPOINT_URL:s}/impress-media-storage/{key:s}"
response = requests.get(
file_url,
headers={
"authorization": authorization,
"x-amz-date": response["x-amz-date"],
"x-amz-content-sha256": response["x-amz-content-sha256"],
"Host": f"{s3_url.hostname:s}:{s3_url.port:d}",
},
timeout=1,
)
assert response.content.decode("utf-8") == "my prose"
14 changes: 14 additions & 0 deletions src/helm/env.d/dev/values.impress.yaml.gotmpl
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,17 @@ ingressWS:
ingressAdmin:
enabled: true
host: impress.127.0.0.1.nip.io

ingressMedia:
enabled: true
host: impress.127.0.0.1.nip.io

annotations:
nginx.ingress.kubernetes.io/auth-url: https://impress.127.0.0.1.nip.io/api/v1.0/documents/retrieve-auth/
nginx.ingress.kubernetes.io/auth-response-headers: "Authorization, X-Amz-Date, X-Amz-Content-SHA256"
nginx.ingress.kubernetes.io/upstream-vhost: minio.impress.svc.cluster.local:9000
nginx.ingress.kubernetes.io/rewrite-target: /impress-media-storage/$1

serviceMedia:
host: minio.impress.svc.cluster.local
port: 9000
21 changes: 21 additions & 0 deletions src/helm/env.d/preprod/values.impress.yaml.gotmpl
Original file line number Diff line number Diff line change
Expand Up @@ -154,3 +154,24 @@ ingressAdmin:
cert-manager.io/cluster-issuer: letsencrypt-prod
nginx.ingress.kubernetes.io/auth-signin: https://oauth2-proxy-preprod.beta.numerique.gouv.fr/oauth2/start
nginx.ingress.kubernetes.io/auth-url: https://oauth2-proxy-preprod.beta.numerique.gouv.fr/oauth2/auth

ingressMedia:
enabled: true
host: impress-preprod.beta.numerique.gouv.fr

annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
nginx.ingress.kubernetes.io/auth-url: https://impress-preprod.beta.numerique.gouv.fr/api/v1.0/documents/retrieve-auth/
nginx.ingress.kubernetes.io/auth-response-headers: "Authorization, X-Amz-Date, X-Amz-Content-SHA256"
nginx.ingress.kubernetes.io/upstream-vhost:
secretKeyRef:
name: impress-media-storage.bucket.libre.sh
key: url
nginx.ingress.kubernetes.io/rewrite-target: /impress-media-storage/$1

serviceMedia:
host:
secretKeyRef:
name: impress-media-storage.bucket.libre.sh
key: url
port: 9000
Loading

0 comments on commit 67a20f2

Please sign in to comment.