From d603a3f47cdb857c0fd808a28c79596d7f6d162e Mon Sep 17 00:00:00 2001 From: Jacob Nesbitt Date: Wed, 4 Dec 2024 12:18:10 -0500 Subject: [PATCH] Include zarr asset sizes when validating version metadata Co-authored-by: Aaron Kanzer --- dandiapi/api/services/metadata/__init__.py | 5 +- dandiapi/api/tests/test_tasks.py | 57 ++++++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/dandiapi/api/services/metadata/__init__.py b/dandiapi/api/services/metadata/__init__.py index 0656916a1..b7c7792ba 100644 --- a/dandiapi/api/services/metadata/__init__.py +++ b/dandiapi/api/services/metadata/__init__.py @@ -7,6 +7,7 @@ from dandischema.metadata import aggregate_assets_summary, validate from django.conf import settings from django.db import transaction +from django.db.models.query_utils import Q from django.utils import timezone from dandiapi.api.models import Asset, Version @@ -124,7 +125,9 @@ def _build_validatable_version_metadata(version: Version) -> dict: metadata_for_validation['doi'] = '10.80507/dandi.123456/0.123456.1234' metadata_for_validation['assetsSummary'] = { 'schemaKey': 'AssetsSummary', - 'numberOfBytes': 1 if version.assets.filter(blob__size__gt=0).exists() else 0, + 'numberOfBytes': 1 + if version.assets.filter(Q(blob__size__gt=0) | Q(zarr__size__gt=0)).exists() + else 0, 'numberOfFiles': 1 if version.assets.exists() else 0, } return metadata_for_validation diff --git a/dandiapi/api/tests/test_tasks.py b/dandiapi/api/tests/test_tasks.py index 3cb632bb9..85020359b 100644 --- a/dandiapi/api/tests/test_tasks.py +++ b/dandiapi/api/tests/test_tasks.py @@ -2,6 +2,7 @@ import datetime import hashlib +from pathlib import Path from typing import TYPE_CHECKING from django.conf import settings @@ -10,9 +11,13 @@ from guardian.shortcuts import assign_perm import pytest from rest_framework.test import APIClient +from zarr_checksum import compute_zarr_checksum +from zarr_checksum.checksum import EMPTY_CHECKSUM +from zarr_checksum.generators import ZarrArchiveFile from dandiapi.api import tasks from dandiapi.api.models import Asset, AssetBlob, Version +from dandiapi.zarr.models import ZarrArchiveStatus from .fuzzy import URN_RE, UTC_ISO_TIMESTAMP_RE @@ -321,6 +326,58 @@ def test_validate_version_metadata_no_assets( ] +@pytest.mark.django_db +def test_validate_version_metadata_empty_zarr_asset( + draft_version: Version, zarr_archive_factory, draft_asset_factory +): + asset = draft_asset_factory( + blob=None, + zarr=zarr_archive_factory( + status=ZarrArchiveStatus.COMPLETE, checksum=EMPTY_CHECKSUM, size=0, file_count=0 + ), + status=Asset.Status.VALID, + ) + assert asset.size == 0 + draft_version.assets.add(asset) + + # Since the zarr asset has zero size, a validation error should be produced + tasks.validate_version_metadata_task(draft_version.id) + draft_version.refresh_from_db() + assert draft_version.status == Version.Status.INVALID + assert draft_version.validation_errors == [ + { + 'field': 'assetsSummary', + 'message': 'Value error, ' + 'A Dandiset containing no files or zero bytes is not publishable', + } + ] + + +@pytest.mark.django_db +def test_validate_version_metadata_only_zarr_assets( + draft_version: Version, zarr_archive_factory, draft_asset_factory +): + asset = draft_asset_factory( + blob=None, + zarr=zarr_archive_factory( + status=ZarrArchiveStatus.COMPLETE, + checksum=compute_zarr_checksum( + [ZarrArchiveFile(path=Path('foo/bar'), size=100, digest=hashlib.md5().hexdigest())] + ), + size=100, + file_count=1, + ), + status=Asset.Status.VALID, + ) + assert asset.size > 0 + draft_version.assets.add(asset) + + tasks.validate_version_metadata_task(draft_version.id) + draft_version.refresh_from_db() + assert draft_version.status == Version.Status.VALID + assert not draft_version.validation_errors + + @pytest.mark.django_db def test_publish_task( api_client: APIClient,