From e32f32d5619860b5daa27eb866ec60e8f69b8e55 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Fri, 14 Feb 2025 12:05:04 -0800 Subject: [PATCH] rf: refactor code to allow caching of jsonschema validators Refactor code to allow caching of jsonschema validators. Specifically, `_get_jsonschema_validator()` caches validators for schemas downloaded from the `dandi/schema` repo, and `_get_jsonschema_validator_local()` caches validator for schemas generated from locally defined Pydantic models. In this refactoring, additional appropriate validations of arguments to involved funcs are added. --- dandischema/metadata.py | 119 ++++++++++--- dandischema/tests/test_metadata.py | 187 ++++++++++++++++++-- dandischema/tests/test_utils.py | 274 +++++++++++++++++------------ dandischema/utils.py | 92 +++++----- 4 files changed, 475 insertions(+), 197 deletions(-) diff --git a/dandischema/metadata.py b/dandischema/metadata.py index 3fbe1973..5be9294b 100644 --- a/dandischema/metadata.py +++ b/dandischema/metadata.py @@ -1,11 +1,12 @@ from copy import deepcopy from enum import Enum -from functools import lru_cache +from functools import cache from inspect import isclass import json from pathlib import Path from typing import Any, Dict, Iterable, Optional, TypeVar, Union, cast, get_args +from jsonschema.protocols import Validator as JsonschemaValidator import pydantic import requests @@ -20,13 +21,16 @@ from .utils import ( TransitionalGenerateJsonSchema, _ensure_newline, + dandi_jsonschema_validator, + json_object_adapter, sanitize_value, strip_top_level_optional, validate_json, version2tuple, ) -schema_map = { +# A mapping of the schema keys of DANDI models to the names of their JSON schema files +SCHEMA_MAP = { "Dandiset": "dandiset.json", "PublishedDandiset": "published-dandiset.json", "Asset": "asset.json", @@ -130,7 +134,7 @@ def publish_model_schemata(releasedir: Union[str, Path]) -> Path: version = models.get_schema_version() vdir = Path(releasedir, version) vdir.mkdir(exist_ok=True, parents=True) - for class_, filename in schema_map.items(): + for class_, filename in SCHEMA_MAP.items(): (vdir / filename).write_text( _ensure_newline( json.dumps( @@ -148,14 +152,14 @@ def publish_model_schemata(releasedir: Union[str, Path]) -> Path: def _validate_obj_json( - instance: Any, schema: dict[str, Any], *, missing_ok: bool = False + instance: Any, validator: JsonschemaValidator, *, missing_ok: bool = False ) -> None: """ - Validate a metadata instance of a **DANDI model** against the JSON schema of the - model with an option to filter out errors related to missing required properties + Validate a data instance using a jsonschema validator with an option to filter out + errors related to missing required properties - :param instance: The metadata instance to validate - :param schema: The JSON schema of the model + :param instance: The data instance to validate + :param validator: The JSON schema validator to use :param missing_ok: Indicates whether to filter out errors related to missing required properties :raises JsonschemaValidationError: If the metadata instance is invalid, and there @@ -165,7 +169,7 @@ def _validate_obj_json( (remaining) errors detected in the validation """ try: - validate_json(instance, schema) + validate_json(instance, validator) except JsonschemaValidationError as e: if missing_ok: remaining_errs = [ @@ -181,23 +185,88 @@ def _validate_obj_json( def _validate_dandiset_json(data: dict, schema_dir: Union[str, Path]) -> None: with Path(schema_dir, "dandiset.json").open() as fp: schema = json.load(fp) - _validate_obj_json(data, schema) + _validate_obj_json(data, dandi_jsonschema_validator(schema)) def _validate_asset_json(data: dict, schema_dir: Union[str, Path]) -> None: with Path(schema_dir, "asset.json").open() as fp: schema = json.load(fp) - _validate_obj_json(data, schema) + _validate_obj_json(data, dandi_jsonschema_validator(schema)) -@lru_cache -def _get_schema(schema_version: str, schema_name: str) -> Any: - r = requests.get( - "https://raw.githubusercontent.com/dandi/schema/" - f"master/releases/{schema_version}/{schema_name}" +@cache +def _get_jsonschema_validator( + schema_version: str, schema_key: str +) -> JsonschemaValidator: + """ + Get jsonschema validator for validating instances against a specific DANDI schema + + :param schema_version: The version of the specific DANDI schema + :param schema_key: The schema key that identifies the specific DANDI schema + :return: The jsonschema validator appropriate for validating instances against the + specific DANDI schema + :raises ValueError: If the provided schema version is among the allowed versions, + `ALLOWED_VALIDATION_SCHEMAS` + :raises ValueError: If the provided schema key is not among the keys in `SCHEMA_MAP` + :raises requests.HTTPError: If the schema cannot be fetched from the `dandi/schema` + repository + :raises RuntimeError: If the fetched schema is not a valid JSON object + """ + if schema_version not in ALLOWED_VALIDATION_SCHEMAS: + raise ValueError( + f"DANDI schema version {schema_version} is not allowed. " + f"Allowed are: {', '.join(ALLOWED_VALIDATION_SCHEMAS)}." + ) + if schema_key not in SCHEMA_MAP: + raise ValueError( + f"Schema key must be one of {', '.join(map(repr, SCHEMA_MAP.keys()))}" + ) + + # Fetch the schema from the `dandi/schema` repository + schema_url = ( + f"https://raw.githubusercontent.com/dandi/schema/" + f"master/releases/{schema_version}/{SCHEMA_MAP[schema_key]}" ) + r = requests.get(schema_url) r.raise_for_status() - return r.json() + schema = r.json() + + # Validate that the retrieved schema is a valid JSON object, i.e., a dictionary + # This step is needed because the `jsonschema` package requires the schema to be a + # `Mapping[str, Any]` object + try: + json_object_adapter.validate_python(schema) + except pydantic.ValidationError as e: + msg = ( + f"The JSON schema at {schema_url} is not a valid JSON object. " + f"Received: {schema}" + ) + raise RuntimeError(msg) from e + + # Create a jsonschema validator for the schema + return dandi_jsonschema_validator(schema) + + +@cache +def _get_jsonschema_validator_local(schema_key: str) -> JsonschemaValidator: + """ + Get jsonschema validator for validating instances against a specific DANDI schema + generated from the corresponding locally defined Pydantic model + + :param schema_key: The schema key that identifies the specific DANDI schema + :raises ValueError: If the provided schema key is not among the keys in `SCHEMA_MAP` + """ + if schema_key not in SCHEMA_MAP: + raise ValueError( + f"Schema key must be one of {', '.join(map(repr, SCHEMA_MAP.keys()))}" + ) + + # The pydantic model with the specified schema key + m: type[pydantic.BaseModel] = getattr(models, schema_key) + + return dandi_jsonschema_validator( + m.model_json_schema(schema_generator=TransitionalGenerateJsonSchema) + ) def validate( @@ -240,25 +309,22 @@ def validate( if schema_key is None: raise ValueError("Provided object has no known schemaKey") schema_version = schema_version or obj.get("schemaVersion") - if schema_version not in ALLOWED_VALIDATION_SCHEMAS and schema_key in schema_map: + if schema_version not in ALLOWED_VALIDATION_SCHEMAS and schema_key in SCHEMA_MAP: raise ValueError( f"Metadata version {schema_version} is not allowed. " f"Allowed are: {', '.join(ALLOWED_VALIDATION_SCHEMAS)}." ) if json_validation: if schema_version == DANDI_SCHEMA_VERSION: - klass = getattr(models, schema_key) - schema = klass.model_json_schema( - schema_generator=TransitionalGenerateJsonSchema - ) + jvalidator = _get_jsonschema_validator_local(schema_key) else: - if schema_key not in schema_map: + if schema_key not in SCHEMA_MAP: raise ValueError( "Only dandisets and assets can be validated " "using json schema for older versions" ) - schema = _get_schema(schema_version, schema_map[schema_key]) - _validate_obj_json(obj, schema, missing_ok=missing_ok) + jvalidator = _get_jsonschema_validator(schema_version, schema_key) + _validate_obj_json(obj, jvalidator, missing_ok=missing_ok) klass = getattr(models, schema_key) try: klass(**obj) @@ -366,8 +432,7 @@ def migrate( # Optionally validate the instance against the DANDI schema it specifies # before migration if not skip_validation: - schema = _get_schema(obj_ver, "dandiset.json") - _validate_obj_json(obj, schema) + _validate_obj_json(obj, _get_jsonschema_validator(obj_ver, "Dandiset")) obj_migrated = deepcopy(obj) diff --git a/dandischema/tests/test_metadata.py b/dandischema/tests/test_metadata.py index 5a8034d6..b85b3ef1 100644 --- a/dandischema/tests/test_metadata.py +++ b/dandischema/tests/test_metadata.py @@ -3,14 +3,21 @@ import json from pathlib import Path from typing import Any, Dict, Optional, Sequence, Set -from unittest.mock import MagicMock +from unittest.mock import MagicMock, patch +from jsonschema.protocols import Validator as JsonschemaValidator +from pydantic import BaseModel import pytest +from dandischema.models import Asset, Dandiset, PublishedAsset, PublishedDandiset +from dandischema.utils import TransitionalGenerateJsonSchema, jsonschema_validator + from .utils import skipif_no_network from ..consts import DANDI_SCHEMA_VERSION from ..exceptions import JsonschemaValidationError, PydanticValidationError from ..metadata import ( + _get_jsonschema_validator, + _get_jsonschema_validator_local, _validate_asset_json, _validate_dandiset_json, _validate_obj_json, @@ -677,13 +684,16 @@ class TestValidateObjJson: """ @pytest.fixture - def dummy_schema(self) -> dict: - """Returns a dummy JSON schema.""" - return { - "type": "object", - "properties": {"name": {"type": "string"}}, - "required": ["name"], - } + def dummy_jvalidator(self) -> JsonschemaValidator: + """Returns a dummy jsonschema validator initialized with a dummy schema.""" + return jsonschema_validator( + { + "type": "object", + "properties": {"name": {"type": "string"}}, + "required": ["name"], + }, + check_format=True, + ) @pytest.fixture def dummy_instance(self) -> dict: @@ -691,7 +701,10 @@ def dummy_instance(self) -> dict: return {"name": "Example"} def test_valid_obj_no_errors( - self, monkeypatch: pytest.MonkeyPatch, dummy_schema: dict, dummy_instance: dict + self, + monkeypatch: pytest.MonkeyPatch, + dummy_jvalidator: JsonschemaValidator, + dummy_instance: dict, ) -> None: """ Test that `_validate_obj_json` does not raise when `validate_json` has no errors @@ -707,10 +720,13 @@ def mock_validate_json(_instance: dict, _schema: dict) -> None: monkeypatch.setattr(metadata, "validate_json", mock_validate_json) # `_validate_obj_json` should succeed without raising an exception - _validate_obj_json(dummy_instance, dummy_schema) + _validate_obj_json(dummy_instance, dummy_jvalidator) def test_raises_error_without_missing_ok( - self, monkeypatch: pytest.MonkeyPatch, dummy_schema: dict, dummy_instance: dict + self, + monkeypatch: pytest.MonkeyPatch, + dummy_jvalidator: JsonschemaValidator, + dummy_instance: dict, ) -> None: """ Test that `_validate_obj_json` forwards JsonschemaValidationError @@ -730,7 +746,7 @@ def mock_validate_json(_instance: dict, _schema: dict) -> None: # Since `missing_ok=False`, any error should be re-raised. with pytest.raises(JsonschemaValidationError) as excinfo: - _validate_obj_json(dummy_instance, dummy_schema, missing_ok=False) + _validate_obj_json(dummy_instance, dummy_jvalidator, missing_ok=False) assert "`name` is a required property" == excinfo.value.errors[0].message @pytest.mark.parametrize( @@ -759,7 +775,7 @@ def mock_validate_json(_instance: dict, _schema: dict) -> None: def test_raises_only_nonmissing_errors_with_missing_ok( self, monkeypatch: pytest.MonkeyPatch, - dummy_schema: dict, + dummy_jvalidator: JsonschemaValidator, dummy_instance: dict, validation_errs: list[MagicMock], expect_raises: bool, @@ -789,7 +805,7 @@ def mock_validate_json(_instance: dict, _schema: dict) -> None: ) with ctx as excinfo: - _validate_obj_json(dummy_instance, dummy_schema, missing_ok=True) + _validate_obj_json(dummy_instance, dummy_jvalidator, missing_ok=True) if excinfo is not None: filtered_errors = excinfo.value.errors @@ -797,3 +813,146 @@ def mock_validate_json(_instance: dict, _schema: dict) -> None: # We expect the "required property" error to be filtered out, # so we should only see the "Some other validation error". assert len(filtered_errors) == expected_remaining_errs_count + + +class TestGetJsonschemaValidator: + @pytest.mark.parametrize( + "schema_version, schema_key, expected_error_msg", + [ + pytest.param( + "0.5.8", + "Dandiset", + "DANDI schema version 0.5.8 is not allowed", + id="invalid-schema-version", + ), + pytest.param( + "0.6.0", + "Nonexistent", + "Schema key must be one of", + id="invalid-schema-key", + ), + ], + ) + def test_invalid_parameters( + self, schema_version: str, schema_key: str, expected_error_msg: str + ) -> None: + """ + Test that providing an invalid schema version or key raises ValueError. + """ + # Clear the cache to avoid interference from previous calls + _get_jsonschema_validator.cache_clear() + with pytest.raises(ValueError, match=expected_error_msg): + _get_jsonschema_validator(schema_version, schema_key) + + def test_valid_schema(self) -> None: + """ + Test the valid case: + - requests.get() is patched directly using patch("requests.get") + - The returned JSON is a valid dict + - The resulting validator is produced via dandi_jsonschema_validator + """ + valid_version = "0.6.0" + valid_key = "Dandiset" + expected_url = ( + f"https://raw.githubusercontent.com/dandi/schema/master/releases/" + f"{valid_version}/dandiset.json" + ) + dummy_validator = MagicMock(spec=JsonschemaValidator) + valid_schema = {"type": "object"} + + with patch("requests.get") as mock_get, patch( + "dandischema.metadata.dandi_jsonschema_validator", + return_value=dummy_validator, + ) as mock_validator: + mock_response = MagicMock() + mock_response.raise_for_status.return_value = None + mock_response.json.return_value = valid_schema + mock_get.return_value = mock_response + + # Clear the cache to avoid interference from previous calls + _get_jsonschema_validator.cache_clear() + result = _get_jsonschema_validator(valid_version, valid_key) + + mock_get.assert_called_once_with(expected_url) + mock_response.raise_for_status.assert_called_once() + mock_response.json.assert_called_once() + mock_validator.assert_called_once_with(valid_schema) + assert result is dummy_validator + + def test_invalid_json_schema_raises_runtime_error(self) -> None: + """ + Test that if the fetched schema is not a valid JSON object, + then _get_jsonschema_validator() raises a RuntimeError. + """ + valid_version = "0.6.0" + valid_key = "Dandiset" + expected_url = ( + f"https://raw.githubusercontent.com/dandi/schema/master/releases/" + f"{valid_version}/dandiset.json" + ) + # Return a list (instead of a dict) to trigger a ValidationError in json_object_adapter + invalid_schema = {4: 2} + + with patch("requests.get") as mock_get: + mock_response = MagicMock() + mock_response.raise_for_status.return_value = None + mock_response.json.return_value = invalid_schema + mock_get.return_value = mock_response + + # Clear the cache to avoid interference from previous calls + _get_jsonschema_validator.cache_clear() + with pytest.raises(RuntimeError, match="not a valid JSON object"): + _get_jsonschema_validator(valid_version, valid_key) + + mock_get.assert_called_once_with(expected_url) + mock_response.raise_for_status.assert_called_once() + mock_response.json.assert_called_once() + + +class TestGetJsonschemaValidatorLocal: + @pytest.mark.parametrize( + ("schema_key", "pydantic_model"), + [ + pytest.param("Dandiset", Dandiset, id="valid-Dandiset"), + pytest.param( + "PublishedDandiset", PublishedDandiset, id="valid-PublishedDandiset" + ), + pytest.param("Asset", Asset, id="valid-Asset"), + pytest.param("PublishedAsset", PublishedAsset, id="valid-PublishedAsset"), + ], + ) + def test_valid_schema_keys( + self, schema_key: str, pydantic_model: type[BaseModel] + ) -> None: + # Get the expected schema from the corresponding model. + expected_schema = pydantic_model.model_json_schema( + schema_generator=TransitionalGenerateJsonSchema + ) + + # Clear the cache to avoid interference from previous calls + _get_jsonschema_validator_local.cache_clear() + + # Call the function under test. + validator = _get_jsonschema_validator_local(schema_key) + + # Assert that the returned validator has a 'schema' attribute + # equal to the expected schema. + assert validator.schema == expected_schema, ( + f"For schema key {schema_key!r}, expected schema:\n{expected_schema}\n" + f"but got:\n{validator.schema}" + ) + + @pytest.mark.parametrize( + "invalid_schema_key", + [ + pytest.param("Nonexistent", id="invalid-Nonexistent"), + pytest.param("", id="invalid-empty-string"), + pytest.param("InvalidKey", id="invalid-Key"), + ], + ) + def test_invalid_schema_keys(self, invalid_schema_key: str) -> None: + # Clear the cache to avoid interference from previous calls + _get_jsonschema_validator_local.cache_clear() + + with pytest.raises(ValueError, match="Schema key must be one of"): + _get_jsonschema_validator_local(invalid_schema_key) diff --git a/dandischema/tests/test_utils.py b/dandischema/tests/test_utils.py index 2cba8b30..bff96744 100644 --- a/dandischema/tests/test_utils.py +++ b/dandischema/tests/test_utils.py @@ -1,6 +1,6 @@ from contextlib import nullcontext -from copy import deepcopy from typing import Any, Dict, List, Optional, Union, cast +from unittest.mock import patch from jsonschema.exceptions import SchemaError, ValidationError from jsonschema.protocols import Validator as JsonschemaValidator @@ -10,6 +10,7 @@ from dandischema.exceptions import JsonschemaValidationError from dandischema.utils import ( _ensure_newline, + dandi_jsonschema_validator, jsonschema_validator, name2title, sanitize_value, @@ -259,178 +260,219 @@ def test_invalid_schema_raises_schema_error(self) -> None: jsonschema_validator(invalid_schema, check_format=False) -# -------------------------------------------------------------------- -# FIXTURES that build on top of existing schema fixtures -# -------------------------------------------------------------------- +# --------------------------- +# Example validator fixtures +# --------------------------- @pytest.fixture -def draft7_schema_with_version(draft7_schema: Dict[str, Any]) -> Dict[str, Any]: +def draft7_validator() -> JsonschemaValidator: """ - Copies the existing Draft 7 fixture and adds a "schemaVersion" property. + A Draft 7 validator that requires a 'name' (type string). """ - schema = deepcopy(draft7_schema) - schema["properties"]["schemaVersion"] = { - "type": "string", - "default": "0.6.1", + from jsonschema.validators import Draft7Validator + + schema = { + "type": "object", + "properties": {"name": {"type": "string"}}, + "required": ["name"], } - return schema + return cast(JsonschemaValidator, Draft7Validator(schema)) @pytest.fixture -def draft202012_schema_with_version( - draft202012_schema: Dict[str, Any] -) -> Dict[str, Any]: +def draft202012_validator() -> JsonschemaValidator: """ - Copies the existing Draft 2020-12 fixture and adds a "schemaVersion" property. + A Draft 2020-12 validator that requires a 'title' (type string). """ - schema = deepcopy(draft202012_schema) - schema["properties"]["schemaVersion"] = { - "type": "string", - "default": "0.6.9", - } - return schema - - -# -------------------------------------------------------------------- -# TEST CLASS for validate_json -# -------------------------------------------------------------------- -class TestValidateJson: - @pytest.mark.parametrize( - "schema_fixture", - [ - # Fixtures without the 'schemaVersion' property, - "draft7_schema", - "draft202012_schema", - "draft202012_format_schema", - "schema_no_dollar_schema", - ], - ids=[ - "draft7_schema_no_schemaVersion", - "draft202012_schema_no_schemaVersion", - "draft202012_format_schema_no_schemaVersion", - "schema_no_dollar_schema_no_schemaVersion", - ], - ) - def test_missing_schema_version( - self, request: pytest.FixtureRequest, schema_fixture: str - ) -> None: - """ - Test that a ValueError is raised if the schema lacks a "schemaVersion" property - that specifies the version of DANDI JSON schema - """ - # Retrieve the fixture by name - fixture_schema = request.getfixturevalue(schema_fixture) + from jsonschema.validators import Draft202012Validator - schema = fixture_schema - with pytest.raises(ValueError): - validate_json({}, schema) + schema = { + "type": "object", + "properties": {"title": {"type": "string"}}, + "required": ["title"], + } + return cast(JsonschemaValidator, Draft202012Validator(schema)) - # Test schema without 'properties' key - schema = deepcopy(fixture_schema).pop("properties") - with pytest.raises(ValueError): - validate_json({}, schema) - # Test schema without a "default" field in the "schemaVersion" property - schema = deepcopy(fixture_schema) - schema["properties"]["schemaVersion"] = {"type": "string"} # Missing 'default' - with pytest.raises(ValueError): - validate_json({}, schema) +@pytest.fixture +def multiple_required_validator() -> JsonschemaValidator: + """ + A Draft 7 validator that requires *two* string properties: 'name' and 'title'. + This enables multiple errors in a single validation if both are missing. + """ + from jsonschema.validators import Draft7Validator - @pytest.mark.parametrize( - "schema_fixture", - [ - # We pass a fixture with a valid "schemaVersion" property, - # but we break the schema definition - pytest.param("draft7_schema_with_version", id="draft7_schema_invalid"), - pytest.param( - "draft202012_schema_with_version", id="draft202012_schema_invalid" - ), - ], - ) - def test_invalid_schema( - self, request: pytest.FixtureRequest, schema_fixture: str - ) -> None: - """ - Test that a SchemaError is raised if the schema is invalid. - We intentionally corrupt the "type" field to a non-string value. - """ - schema = request.getfixturevalue(schema_fixture) - schema["type"] = 123 # Invalid: 'type' must be a string or array in JSON Schema + schema = { + "type": "object", + "properties": { + "name": {"type": "string"}, + "title": {"type": "string"}, + }, + "required": ["name", "title"], + } + return cast(JsonschemaValidator, Draft7Validator(schema)) - with pytest.raises(SchemaError): - validate_json({}, schema) +class TestValidateJson: @pytest.mark.parametrize( - ("schema_fixture", "instance"), + "validator_fixture, instance", [ pytest.param( - "draft7_schema_with_version", + "draft7_validator", {"name": "Alice"}, - id="draft7_schema_valid_instance", + id="draft7_valid_instance", ), pytest.param( - "draft202012_schema_with_version", + "draft202012_validator", {"title": "My Title"}, - id="draft202012_schema_valid_instance", + id="draft202012_valid_instance", + ), + pytest.param( + "multiple_required_validator", + {"name": "Bob", "title": "Something"}, + id="multiple_required_valid_instance", ), ], ) def test_valid_instance( self, request: pytest.FixtureRequest, - schema_fixture: str, + validator_fixture: str, instance: Dict[str, Any], ) -> None: """ Test that a valid instance does not raise any exceptions. """ - schema = request.getfixturevalue(schema_fixture) - validate_json(instance, schema) # No exception expected + # Load the correct validator using `request.getfixturevalue` + validator: JsonschemaValidator = request.getfixturevalue(validator_fixture) + validate_json(instance, validator) # Should not raise @pytest.mark.parametrize( - ("schema_fixture", "instance"), + "validator_fixture, instance, expected_error_count", [ + # Single error: missing "name" pytest.param( - "draft7_schema_with_version", - {}, # Missing required "name" - id="draft7_schema_missing_name", + "draft7_validator", + {}, + 1, + id="draft7_missing_name", ), + # Single error: "name" has wrong type pytest.param( - "draft7_schema_with_version", - {"name": 123}, # Wrong type - id="draft7_schema_wrong_type_for_name", + "draft7_validator", + {"name": 123}, + 1, + id="draft7_wrong_type_name", ), + # Single error: missing "title" pytest.param( - "draft202012_schema_with_version", - {}, # Missing required "title" - id="draft202012_schema_missing_title", + "draft202012_validator", + {}, + 1, + id="draft202012_missing_title", ), + # Single error: "title" has wrong type pytest.param( - "draft202012_schema_with_version", - {"title": 999}, # Wrong type - id="draft202012_schema_wrong_type_for_title", + "draft202012_validator", + {"title": 999}, + 1, + id="draft202012_wrong_type_title", + ), + # Multiple errors: missing both "name" and "title" + pytest.param( + "multiple_required_validator", + {}, + 2, + id="multiple_required_missing_both", + ), + # Another multiple error scenario: 'name' wrong type, 'title' missing + pytest.param( + "multiple_required_validator", + {"name": 123}, + 2, + id="multiple_required_wrong_type_and_missing", ), ], ) def test_invalid_instance( self, request: pytest.FixtureRequest, - schema_fixture: str, + validator_fixture: str, instance: Dict[str, Any], + expected_error_count: int, ) -> None: """ - Test that an invalid instance raises a JsonschemaValidationError. - Also assert that the raised error contains a non-empty list of - `jsonschema.exceptions.ValidationError` objects. + Tests that an invalid instance raises a JsonschemaValidationError. + Verifies that the number of validation errors matches `expected_error_count`. """ - schema = request.getfixturevalue(schema_fixture) + validator: JsonschemaValidator = request.getfixturevalue(validator_fixture) with pytest.raises(JsonschemaValidationError) as exc_info: - validate_json(instance, schema) + validate_json(instance, validator) - # Ensure that the exception has a non-empty list of validation errors errs = exc_info.value.errors - assert type(errs) is list, "Expected a list" - assert len(errs) > 0, "Expected at least one error" + assert isinstance(errs, list), "Expected a list" + assert ( + len(errs) == expected_error_count + ), f"Expected {expected_error_count} error(s), got {len(errs)}" assert all( isinstance(err, ValidationError) for err in errs ), "All errors must be `jsonschema.exceptions.ValidationError`" + + +class TestDandiJsonschemaValidator: + @pytest.mark.parametrize( + "version, expected_validator_cls", + [ + pytest.param("0.6.5", Draft202012Validator, id="version-0.6.5"), + pytest.param("0.7.0", Draft202012Validator, id="version-0.7.0"), + pytest.param("0.6.0", Draft7Validator, id="version-0.6.0"), + ], + ) + def test_dandi_jsonschema_validator_versions( + self, version: str, expected_validator_cls: JsonschemaValidator + ) -> None: + """ + Test that dandi_jsonschema_validator() selects the correct default validator + class based on the version specified in the schema's "schemaVersion" default. + """ + schema = {"properties": {"schemaVersion": {"default": version}}} + # Patch jsonschema_validator so we can intercept the call and + # verify the parameters. + with patch( + "dandischema.utils.jsonschema_validator", autospec=True + ) as mock_validator: + mock_validator.return_value = "dummy_validator_result" + result = cast(str, dandi_jsonschema_validator(schema)) + # Verify that the dummy return value is propagated. + assert result == "dummy_validator_result" + # Assert that jsonschema_validator was called with the expected parameters. + mock_validator.assert_called_once_with( + schema, + check_format=True, + default_cls=expected_validator_cls, + ) + + @pytest.mark.parametrize( + "schema", + [ + pytest.param({}, id="missing-properties"), + pytest.param( + {"properties": {}}, + id="missing-schemaVersion", + ), + pytest.param( + {"properties": {"schemaVersion": {}}}, + id="missing-default", + ), + ], + ) + def test_dandi_jsonschema_validator_missing_keys(self, schema: dict) -> None: + """ + Test that dandi_jsonschema_validator() raises a `ValueError` when the schema + does not have a 'schemaVersion' property that specifies the schema version with + a 'default' field. + """ + with pytest.raises( + ValueError, match="schema must has a 'schemaVersion' property" + ): + dandi_jsonschema_validator(schema) diff --git a/dandischema/utils.py b/dandischema/utils.py index 07a5e5d1..076b34cd 100644 --- a/dandischema/utils.py +++ b/dandischema/utils.py @@ -6,6 +6,7 @@ from jsonschema import Draft7Validator, Draft202012Validator from jsonschema.protocols import Validator as JsonschemaValidator from jsonschema.validators import validator_for +from pydantic import ConfigDict, TypeAdapter from pydantic.json_schema import GenerateJsonSchema, JsonSchemaMode, JsonSchemaValue from pydantic_core import CoreSchema, core_schema @@ -143,6 +144,47 @@ def sanitize_value(value: str, field: str = "non-extension", sub: str = "-") -> return value +def dandi_jsonschema_validator(schema: dict[str, Any]) -> JsonschemaValidator: + """ + Create a JSON Schema validator appropriate for validating instances against the + JSON schema of a DANDI model + + :param schema: The JSON schema of the DANDI model to validate against + :return: The JSON schema validator + :raises ValueError: If the schema does not have a 'schemaVersion' property that + specifies the schema version with a 'default' field. + :raises jsonschema.exceptions.SchemaError: If the JSON schema is invalid + """ + if ( + "properties" not in schema + or "schemaVersion" not in schema["properties"] + or "default" not in schema["properties"]["schemaVersion"] + ): + msg = ( + "The schema must has a 'schemaVersion' property that specifies the schema " + "version with a 'default' field." + ) + raise ValueError(msg) + + default_validator_cls = cast( + type[JsonschemaValidator], + ( + Draft202012Validator + # `"schemaVersion"` 0.6.5 and above is produced with Pydantic V2 + # which is compliant with JSON Schema Draft 2020-12 + if ( + version2tuple(schema["properties"]["schemaVersion"]["default"]) + >= version2tuple("0.6.5") + ) + else Draft7Validator + ), + ) + + return jsonschema_validator( + schema, check_format=True, default_cls=default_validator_cls + ) + + def jsonschema_validator( schema: dict[str, Any], *, @@ -150,8 +192,8 @@ def jsonschema_validator( default_cls: type[JsonschemaValidator] | None = None, ) -> JsonschemaValidator: """ - Create a JSON schema validator appropriate for validating instances against a given - schema + Create a jsonschema validator appropriate for validating instances against a given + JSON schema :param schema: The JSON schema to validate against :param check_format: Indicates whether to check the format against format @@ -181,51 +223,21 @@ def jsonschema_validator( return validator_cls(schema) -def validate_json(instance: Any, schema: dict[str, Any]) -> None: +def validate_json(instance: Any, validator: JsonschemaValidator) -> None: """ - Validate a metadata instance of a **DANDI model** against the JSON schema of the - model - - :param instance: The metadata instance to validate - :param schema: The JSON schema of the model + Validate a data instance using a jsonschema validator + :param instance: The data instance to validate + :param validator: The JSON schema validator to use :raises JsonschemaValidationError: If the metadata instance is invalid, an instance of this exception containing a list of `jsonschema.exceptions.ValidationError` instances representing all the errors detected in the validation is raised - :raises jsonschema.exceptions.SchemaError: If the JSON schema is invalid - :raises ValueError: If the schema does not have a 'schemaVersion' property that - specifies the schema version with a 'default' field. """ - if ( - "properties" not in schema - or "schemaVersion" not in schema["properties"] - or "default" not in schema["properties"]["schemaVersion"] - ): - msg = ( - "The schema must has a 'schemaVersion' property that specifies the schema " - "version with a 'default' field." - ) - raise ValueError(msg) - - default_validator_cls = cast( - type[JsonschemaValidator], - ( - Draft202012Validator - # `"schemaVersion"` 0.6.5 and above is produced with Pydantic V2 - # which is compliant with JSON Schema Draft 2020-12 - if ( - version2tuple(schema["properties"]["schemaVersion"]["default"]) - >= version2tuple("0.6.5") - ) - else Draft7Validator - ), - ) - - validator = jsonschema_validator( - schema, check_format=True, default_cls=default_validator_cls - ) - errs = sorted(validator.iter_errors(instance), key=str) if errs: raise JsonschemaValidationError(errs) + + +# Pydantic type adapter for a JSON object, which is of type `dict[str, Any]` +json_object_adapter = TypeAdapter(dict[str, Any], config=ConfigDict(strict=True))