Skip to content

Commit

Permalink
Include zarr asset sizes when validating version metadata
Browse files Browse the repository at this point in the history
Co-authored-by: Aaron Kanzer <[email protected]>
  • Loading branch information
jjnesbitt and aaronkanzer committed Dec 4, 2024
1 parent 8e09a37 commit d603a3f
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 1 deletion.
5 changes: 4 additions & 1 deletion dandiapi/api/services/metadata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from dandischema.metadata import aggregate_assets_summary, validate
from django.conf import settings
from django.db import transaction
from django.db.models.query_utils import Q
from django.utils import timezone

from dandiapi.api.models import Asset, Version
Expand Down Expand Up @@ -124,7 +125,9 @@ def _build_validatable_version_metadata(version: Version) -> dict:
metadata_for_validation['doi'] = '10.80507/dandi.123456/0.123456.1234'
metadata_for_validation['assetsSummary'] = {
'schemaKey': 'AssetsSummary',
'numberOfBytes': 1 if version.assets.filter(blob__size__gt=0).exists() else 0,
'numberOfBytes': 1
if version.assets.filter(Q(blob__size__gt=0) | Q(zarr__size__gt=0)).exists()
else 0,
'numberOfFiles': 1 if version.assets.exists() else 0,
}
return metadata_for_validation
Expand Down
57 changes: 57 additions & 0 deletions dandiapi/api/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import datetime
import hashlib
from pathlib import Path
from typing import TYPE_CHECKING

from django.conf import settings
Expand All @@ -10,9 +11,13 @@
from guardian.shortcuts import assign_perm
import pytest
from rest_framework.test import APIClient
from zarr_checksum import compute_zarr_checksum
from zarr_checksum.checksum import EMPTY_CHECKSUM
from zarr_checksum.generators import ZarrArchiveFile

from dandiapi.api import tasks
from dandiapi.api.models import Asset, AssetBlob, Version
from dandiapi.zarr.models import ZarrArchiveStatus

from .fuzzy import URN_RE, UTC_ISO_TIMESTAMP_RE

Expand Down Expand Up @@ -321,6 +326,58 @@ def test_validate_version_metadata_no_assets(
]


@pytest.mark.django_db
def test_validate_version_metadata_empty_zarr_asset(
draft_version: Version, zarr_archive_factory, draft_asset_factory
):
asset = draft_asset_factory(
blob=None,
zarr=zarr_archive_factory(
status=ZarrArchiveStatus.COMPLETE, checksum=EMPTY_CHECKSUM, size=0, file_count=0
),
status=Asset.Status.VALID,
)
assert asset.size == 0
draft_version.assets.add(asset)

# Since the zarr asset has zero size, a validation error should be produced
tasks.validate_version_metadata_task(draft_version.id)
draft_version.refresh_from_db()
assert draft_version.status == Version.Status.INVALID
assert draft_version.validation_errors == [
{
'field': 'assetsSummary',
'message': 'Value error, '
'A Dandiset containing no files or zero bytes is not publishable',
}
]


@pytest.mark.django_db
def test_validate_version_metadata_only_zarr_assets(
draft_version: Version, zarr_archive_factory, draft_asset_factory
):
asset = draft_asset_factory(
blob=None,
zarr=zarr_archive_factory(
status=ZarrArchiveStatus.COMPLETE,
checksum=compute_zarr_checksum(
[ZarrArchiveFile(path=Path('foo/bar'), size=100, digest=hashlib.md5().hexdigest())]
),
size=100,
file_count=1,
),
status=Asset.Status.VALID,
)
assert asset.size > 0
draft_version.assets.add(asset)

tasks.validate_version_metadata_task(draft_version.id)
draft_version.refresh_from_db()
assert draft_version.status == Version.Status.VALID
assert not draft_version.validation_errors


@pytest.mark.django_db
def test_publish_task(
api_client: APIClient,
Expand Down

0 comments on commit d603a3f

Please sign in to comment.