Skip to content

Improvements to path handling #95

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions gateway/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"django-celery-beat>=2.7.0",
"django-cors-headers>=4.5.0",
"django-crispy-forms>=2.3",
"django-extensions>=3.2.3",
"django-model-utils>=5.0.0",
"django-redis>=5.4.0",
"django-storages[boto3]>=1.14.4",
Expand All @@ -19,8 +20,11 @@
"djangorestframework-api-key>=3.0.0",
"djangorestframework>=3.15.2",
"drf-spectacular>=0.27.2",
"environs[django]>=14.1.1",
"factory-boy>=3.3.1",
"fido2>=1.1.3",
"flower>=2.0.1",
"h5py>=3.12.1",
"hiredis>=3.0.0",
"loguru>=0.7.2",
"minio>=7.2.9",
Expand All @@ -31,13 +35,10 @@
"python-slugify>=8.0.4",
"redis>=5.2.0",
"rich>=13.9.3",
"sentry-sdk[django]>=2.25.1",
"uvicorn-worker>=0.2.0",
"uvicorn>=0.32.0",
"whitenoise>=6.7.0",
"h5py>=3.12.1",
"django-extensions>=3.2.3",
"sentry-sdk[django]>=2.25.1",
"environs[django]>=14.1.1",
]
description = "Gateway for SpectrumX Data System"
name = "sds_gateway"
Expand All @@ -54,7 +55,6 @@
"django-types>=0.19.1",
"djangorestframework-stubs[compatible-mypy]>=3.15.1",
"djlint>=1.35.2",
"factory-boy>=3.3.1",
"ipdb>=0.13",
"mypy>=1.11",
"pre-commit>=4.0",
Expand Down
1 change: 1 addition & 0 deletions gateway/requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ pytest-django==4.8.0 # https://github.com/pytest-dev/pytest-django
django-coverage-plugin>=3.1.0
django-stubs[compatible-mypy]==5.0.2 # https://github.com/typeddjango/django-stubs
djangorestframework-stubs[compatible-mypy]==3.15.0 # https://github.com/typeddjango/djangorestframework-stubs
factory-boy==3.3.0 # https://github.com/FactoryBoy/factory_boy
mypy==1.10.0 # https://github.com/python/mypy
pytest-cov>=5.0.0
pytest-sugar==1.0.0 # https://github.com/Frozenball/pytest-sugar
Expand Down
1 change: 0 additions & 1 deletion gateway/requirements/local.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ pre-commit==3.7.1 # https://github.com/pre-commit/pre-commit

# Django
# ------------------------------------------------------------------------------
factory-boy==3.3.0 # https://github.com/FactoryBoy/factory_boy
django-debug-toolbar==4.4.6 # https://github.com/jazzband/django-debug-toolbar
django-extensions==3.2.3 # https://github.com/django-extensions/django-extensions
django-coverage-plugin==3.1.0 # https://github.com/nedbat/django_coverage_plugin
25 changes: 23 additions & 2 deletions gateway/sds_gateway/api_methods/serializers/capture_serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,27 @@ def is_valid(self, *, raise_exception: bool = True) -> bool:
return super().is_valid(raise_exception=raise_exception)

def create(self, validated_data: dict[str, Any]) -> Capture:
# Set the owner to the request user
# set the owner to the request user
validated_data["owner"] = self.context["request_user"]
return super().create(validated_data)
validated_data["top_level_dir"] = normalize_top_level_dir(
validated_data["top_level_dir"],
)
return super().create(validated_data=validated_data)

def update(self, instance: Capture, validated_data: dict[str, Any]) -> Capture:
validated_data["top_level_dir"] = normalize_top_level_dir(
validated_data["top_level_dir"],
)
return super().update(instance=instance, validated_data=validated_data)


def normalize_top_level_dir(unknown_path: str) -> str:
"""Normalize the top level directory path."""
valid_path = str(unknown_path)

# top level dir must start with '/'
if not valid_path.startswith("/"):
valid_path = "/" + valid_path

# top level dir must not end with '/'
return valid_path.rstrip("/")
27 changes: 5 additions & 22 deletions gateway/sds_gateway/api_methods/serializers/file_serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import uuid
from pathlib import Path
from pathlib import PurePosixPath
from typing import Any

from django.http import QueryDict
Expand All @@ -13,6 +12,7 @@
from sds_gateway.api_methods.serializers.capture_serializers import CaptureGetSerializer
from sds_gateway.api_methods.serializers.dataset_serializers import DatasetGetSerializer
from sds_gateway.api_methods.serializers.user_serializer import UserGetSerializer
from sds_gateway.api_methods.utils.sds_files import sanitize_path_rel_to_user
from sds_gateway.users.models import User

BAD_REQUEST = 400
Expand Down Expand Up @@ -105,28 +105,11 @@ def is_valid(self, *, raise_exception: bool = True) -> bool:
def create(self, validated_data: dict[str, Any]) -> File:
"""Creates a new file instance validating and saving the data."""

# set the owner to the request user
validated_data["owner"] = self.context["request_user"]
user_files_dir = f"/files/{validated_data['owner'].email}"

# ensure directory is not a relative path
if not PurePosixPath(validated_data["directory"]).is_absolute():
raise serializers.ValidationError(
{"detail": "Relative paths are not allowed."},
code=str(BAD_REQUEST),
)

# ensure the resolved path is within the user's files directory
validated_data["directory"] = f"{user_files_dir}{validated_data['directory']}/"
resolved_dir = Path(validated_data["directory"]).resolve(strict=False)
resolved_user_files_dir = Path(user_files_dir).resolve(strict=False)
if not resolved_dir.is_relative_to(resolved_user_files_dir):
raise serializers.ValidationError(
{
"detail": f"The provided directory must be in the user's files directory: {user_files_dir}", # noqa: E501
},
code=str(BAD_REQUEST),
)
validated_data["directory"] = sanitize_path_rel_to_user(
unsafe_path=validated_data["directory"],
user=validated_data["owner"],
)

if "media_type" not in validated_data:
validated_data["media_type"] = ""
Expand Down
45 changes: 45 additions & 0 deletions gateway/sds_gateway/api_methods/utils/sds_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from pathlib import Path

from loguru import logger as log
from rest_framework.request import Request

from sds_gateway.users.models import User


def sanitize_path_rel_to_user(
unsafe_path: str,
request: Request | None = None,
user: User | None = None,
) -> Path | None:
"""Ensures a path is safe by making it relative to the user's root in SDS.
Args:
unsafe_path: The unsafe path.
request: The request object with `.user.email`. OR
user: The user that should own the path.
Returns:
A Path object if the path is safe,
or None if it is not safe to continue.
"""
files_dir = Path("/files/")
if request is not None:
user_root_path = files_dir / request.user.email
elif user is not None:
user_root_path = files_dir / user.email
else:
msg = "Either user or request must be provided."
raise ValueError(msg)
if not user_root_path.is_relative_to(files_dir):
msg = (
"INTERNAL ERROR: User root path is not a subdirectory "
f"of '{files_dir}': '{user_root_path}'"
)
log.warning(msg)
return None
unsafe_concat_path = Path(
f"{user_root_path}/{unsafe_path}",
# needs to be a concatenation, as we want to remain under the user's files dir
)
user_rel_path = unsafe_concat_path.resolve(strict=False)
if not user_rel_path.is_relative_to(user_root_path):
return None
return user_rel_path
6 changes: 6 additions & 0 deletions gateway/sds_gateway/api_methods/views/capture_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,12 @@ def ingest_capture(
drf_channel=drf_channel,
)

# disconnect files that are no longer in the capture
for cur_file in capture.files.all():
if cur_file not in files_to_connect:
cur_file.capture = None
cur_file.save()

# connect the files to the capture
for cur_file in files_to_connect:
cur_file.capture = capture
Expand Down
43 changes: 3 additions & 40 deletions gateway/sds_gateway/api_methods/views/file_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,13 @@
from sds_gateway.api_methods.serializers.file_serializers import FileGetSerializer
from sds_gateway.api_methods.serializers.file_serializers import FilePostSerializer
from sds_gateway.api_methods.utils.minio_client import get_minio_client
from sds_gateway.users.models import User
from sds_gateway.api_methods.utils.sds_files import sanitize_path_rel_to_user

if TYPE_CHECKING:
from django.http.request import QueryDict

from sds_gateway.users.models import User


class FilePagination(PageNumberPagination):
page_size = 30
Expand Down Expand Up @@ -589,43 +591,4 @@ def post(self, request: Request) -> Response:
return Response(conditions, status=status.HTTP_200_OK)


def sanitize_path_rel_to_user(
unsafe_path: str,
request: Request | None = None,
user: User | None = None,
) -> Path | None:
"""Ensures a path is safe by making it relative to the user's root in SDS.
Args:
unsafe_path: The unsafe path.
request: The request object with `.user.email`. OR
user: The user that should own the path.
Returns:
A Path object if the path is safe,
or None if it is not safe to continue.
"""
files_dir = Path("/files/")
if request is not None:
user_root_path = files_dir / request.user.email
elif user is not None:
user_root_path = files_dir / user.email
else:
msg = "Either user or request must be provided."
raise ValueError(msg)
if not user_root_path.is_relative_to(files_dir):
msg = (
"INTERNAL ERROR: User root path is not a subdirectory "
f"of '{files_dir}': '{user_root_path}'"
)
log.warning(msg)
return None
unsafe_concat_path = Path(
f"{user_root_path}/{unsafe_path}",
# needs to be a concatenation, as we want to remain under the user's files dir
)
user_rel_path = unsafe_concat_path.resolve(strict=False)
if not user_rel_path.is_relative_to(user_root_path):
return None
return user_rel_path


check_contents_exist = CheckFileContentsExistView.as_view()
Loading