From 62695d6145943488099d076d9f7153d39c20ae11 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 18 Oct 2024 14:05:14 -0400 Subject: [PATCH] feat: extend and rename validation result properties BREAKING CHANGE: we renamed .bids_version to more generic .standard + .standard_version --- dandi/files/bids.py | 2 +- dandi/files/zarr.py | 32 +++++++++++++++++++------------- dandi/validate.py | 3 ++- dandi/validate_types.py | 28 ++++++++++++++++++++++------ 4 files changed, 44 insertions(+), 21 deletions(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 8541b5030..907a14c59 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -111,7 +111,7 @@ def _validate(self) -> None: self._asset_metadata[bids_path] = prepare_metadata( result.metadata ) - self._bids_version = result.origin.bids_version + self._bids_version = result.origin.standard_version def get_asset_errors(self, asset: BIDSAsset) -> list[ValidationResult]: """:meta private:""" diff --git a/dandi/files/zarr.py b/dandi/files/zarr.py index 1c5140c08..8dee86cd8 100644 --- a/dandi/files/zarr.py +++ b/dandi/files/zarr.py @@ -16,7 +16,7 @@ import requests from zarr_checksum.tree import ZarrChecksumTree -from dandi import get_logger +from dandi import __version__, get_logger from dandi.consts import ( MAX_ZARR_DEPTH, ZARR_DELETE_BATCH_SIZE, @@ -209,6 +209,12 @@ def get_validation_errors( import zarr errors: list[ValidationResult] = [] + origin: ValidationOrigin = ValidationOrigin( + name="zarr", + version=zarr.__version__, + standard="zarr", + ) + try: data = zarr.open(str(self.filepath)) except Exception: @@ -216,10 +222,7 @@ def get_validation_errors( raise errors.append( ValidationResult( - origin=ValidationOrigin( - name="zarr", - version=zarr.version.version, - ), + origin=origin, severity=Severity.ERROR, id="zarr.cannot_open", scope=Scope.FILE, @@ -228,13 +231,19 @@ def get_validation_errors( ) ) data = None + + origin = ValidationOrigin( + name="dandi.zarr", + version=__version__, + standard="zarr", + ) + # if data: + # TODO: figure out how to assign standard_version + # origin.standard_version = data.??? if isinstance(data, zarr.Group) and not data: errors.append( ValidationResult( - origin=ValidationOrigin( - name="zarr", - version=zarr.version.version, - ), + origin=origin, severity=Severity.ERROR, id="zarr.empty_group", scope=Scope.FILE, @@ -248,10 +257,7 @@ def get_validation_errors( raise ValueError(msg) errors.append( ValidationResult( - origin=ValidationOrigin( - name="zarr", - version=zarr.version.version, - ), + origin=origin, severity=Severity.ERROR, id="zarr.tree_depth_exceeded", scope=Scope.FILE, diff --git a/dandi/validate.py b/dandi/validate.py index 3746434f8..d6ab74c00 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -49,7 +49,8 @@ def validate_bids( origin = ValidationOrigin( name="bidsschematools", version=bidsschematools.__version__, - bids_version=validation_result["bids_version"], + standard="bids", + standard_version=validation_result["bids_version"], ) # Storing variable to not re-compute set paths for each individual file. diff --git a/dandi/validate_types.py b/dandi/validate_types.py index c09740d39..a379a9059 100644 --- a/dandi/validate_types.py +++ b/dandi/validate_types.py @@ -1,35 +1,51 @@ from __future__ import annotations from dataclasses import dataclass -from enum import Enum +from enum import Enum, IntEnum from pathlib import Path +from typing import Any @dataclass class ValidationOrigin: name: str version: str - bids_version: str | None = None + standard: str | None = None # TODO: Enum for the standards?? + standard_version: str | None = None -class Severity(Enum): +# TODO: decide on the naming consistency -- either prepend all with Validation or not +class Severity(IntEnum): HINT = 1 - WARNING = 2 - ERROR = 3 + INFO = 2 # new/unused, available in linkml + WARNING = 3 + ERROR = 4 + CRITICAL = 5 # new/unused, linkml has FATAL class Scope(Enum): FILE = "file" FOLDER = "folder" + # Isaac: make it/add "dandiset-metadata" to signal specific relation to metadata DANDISET = "dandiset" DATASET = "dataset" +# new/unused, may be should be gone +class ValidationObject(Enum): + METADATA = "metadata" + DATA = "data" # e.g. actual data contained in files, not metadata (e.g. as in + # nwb or nifti header) + FILE = "file" # e.g. file itself, e.g. truncated file or file not matching checksum + + @dataclass class ValidationResult: id: str - origin: ValidationOrigin + origin: ValidationOrigin # metadata about underlying validator and standard scope: Scope + origin_result: Any | None = None # original validation result from "origin" + object: ValidationObject | None = None severity: Severity | None = None # asset_paths, if not populated, assumes [.path], but could be smth like # {"path": "task-broken_bold.json",