From ab68c3537278140607b7c290fb5fc3cf81a45cd9 Mon Sep 17 00:00:00 2001 From: Brett Graham Date: Fri, 22 Nov 2024 13:43:38 -0500 Subject: [PATCH] Remove assignment validation (#417) --- changes/417.removal.rst | 1 + .../datamodels/general_structure.rst | 3 +- docs/roman_datamodels/datamodels/stnode.rst | 44 +------- .../datamodels/using_datamodels.rst | 32 ++++-- src/roman_datamodels/datamodels/_core.py | 2 +- src/roman_datamodels/stnode/_node.py | 65 ----------- src/roman_datamodels/validate.py | 102 ------------------ tests/test_models.py | 5 +- tests/test_stnode.py | 88 --------------- 9 files changed, 32 insertions(+), 310 deletions(-) create mode 100644 changes/417.removal.rst diff --git a/changes/417.removal.rst b/changes/417.removal.rst new file mode 100644 index 00000000..b6be71f4 --- /dev/null +++ b/changes/417.removal.rst @@ -0,0 +1 @@ +Remove validation on assignment. diff --git a/docs/roman_datamodels/datamodels/general_structure.rst b/docs/roman_datamodels/datamodels/general_structure.rst index c8b8d5b2..fdc678c1 100644 --- a/docs/roman_datamodels/datamodels/general_structure.rst +++ b/docs/roman_datamodels/datamodels/general_structure.rst @@ -259,7 +259,8 @@ ref_optical_element-1.0.0:: ... If one tries to modify the datamodel contents with a value inconsistent with -what a schema requires, validation will raise an error. +what a schema requires, validation will raise an error when the datamodel is +validated. Level 1 Example ............... diff --git a/docs/roman_datamodels/datamodels/stnode.rst b/docs/roman_datamodels/datamodels/stnode.rst index 64e8b3dc..3fdde82b 100644 --- a/docs/roman_datamodels/datamodels/stnode.rst +++ b/docs/roman_datamodels/datamodels/stnode.rst @@ -78,8 +78,8 @@ The specific stnode objects will be subclasses of the `~roman_datamodels.stnode.TaggedObjectNode` or `~roman_datamodels.stnode.TaggedListNode` classes. These classes are extensions of the `~roman_datamodels.stnode.DNode` and `~roman_datamodels.stnode.LNode` -classes which have extensions to handle looking up the schema information for on -the fly data validation. In particular, they will track the ``tag`` information +classes which have extensions to handle looking up the schema information. +In particular, they will track the ``tag`` information contained within the manifest from RAD. These "tagged-nodes" are then turned into specific stnode objects via the @@ -126,46 +126,6 @@ is reorganized, then scalar node concept can be removed from the codebase. dictionary. -Validation ----------- - -The stnode objects are designed to attempt to validate the data they contain or -have set in them against the schema information in RAD. This is typically done -on the fly when the data is set via the ``.`` interface; however, ASDF by -default will validate the data stored in the node against its schema during both -serialization and de-serialization. - -In order to avoid the overhead of re-validating all of the data in a node when -one thing is updated (this can induce a lot of overhead), the stnode objects -will attempt to parse a given "tagged-node's" schema down so that it is only -validating the field being updated. It performs the validation by attempting to -construct in-memory an ASDF-schema representing just the portion of the schema -it needs to validate just that single field against. It then passes that schema -into the ASDF validation routines to check the data. Unfortunately, this is not -a perfect process nor is it particularly robust. It is possible for a schema to -have fields that the parse down process cannot handle, or use JSON-schema -constructs which the parser is unaware of. In these cases, validation might -raise an error or pass invalid data. - -.. warning:: - - The only validation process that is guaranteed to validate the data - correctly is the full ASDF validation process. This is because ASDF will be - using the full schema and be checking everything against it. The on-the-fly - validation's parsing may create unreliable validation scenarios. - -.. note:: - - In order to avoid the "on-the-fly" validation process, one can set values in - a node/datamodel via the dictionary, ``[]``, interface instead of the ``.`` - interface. This is because the ``[]`` purposely bypasses the on-the-fly - validation process. Thus in general it is recommend that one uses the ``.`` - interface for setting values in a node/datamodel, and only using the ``[]`` - when one needs to store temporary invalid data in a node/datamodel. The use - ``[]`` runs the risk of placing the node/datamodel in a state where it - cannot be serialized to ASDF. - - ASDF ---- diff --git a/docs/roman_datamodels/datamodels/using_datamodels.rst b/docs/roman_datamodels/datamodels/using_datamodels.rst index a5ed45b4..f91a8809 100644 --- a/docs/roman_datamodels/datamodels/using_datamodels.rst +++ b/docs/roman_datamodels/datamodels/using_datamodels.rst @@ -41,9 +41,10 @@ page:: print(dm.meta.exposure.start_time_mjd) 60000.0 - # Try to assign invalid type + # Assign invalid type >>> d.meta.exposure.start_time_mjd = "hello" + >>> d.validate() # Last part of resulting traceback @@ -72,13 +73,28 @@ page:: # Try to assign wrong kind of node - >>> dm.meta.observation = dm.meta.exposure - Failed validating 'tag' in schema: - {'$schema': 'http://stsci.edu/schemas/asdf-schema/0.1.0/asdf-schema', - 'tag': 'asdf://stsci.edu/datamodels/roman/tags/observation-1.0.0'} - - On instance: - {'groupgap': 0, 'ma_table_name': 'High Latitude Spec. Survey', 'ma_table_number': 1, 'nframes': 8, 'ngroups': 6, 'p_exptype': 'WFI_IMAGE|', 'type': 'WFI_IMAGE'} + >>> dm.meta.exposure = dm.meta.observation + >>> dm.validate() + + ValidationError: mismatched tags, wanted 'asdf://stsci.edu/datamodels/roman/tags/exposure-1.0.0', got 'asdf://stsci.edu/datamodels/roman/tags/observation-1.0.0' + + Failed validating 'tag' in schema['properties']['meta']['allOf'][0]['allOf'][1]['properties']['exposure']: + {'tag': 'asdf://stsci.edu/datamodels/roman/tags/exposure-1.0.0', + 'title': 'Exposure Information'} + + On instance['meta']['exposure']: + {'execution_plan': 1, + 'exposure': 1, + 'observation': 1, + 'observation_id': '?', + 'pass': 1, + 'program': 1, + 'segment': 1, + 'visit': 1, + 'visit_file_activity': '01', + 'visit_file_group': 1, + 'visit_file_sequence': 1, + 'visit_id': '?'} # Show and then change pixel value in data diff --git a/src/roman_datamodels/datamodels/_core.py b/src/roman_datamodels/datamodels/_core.py index c265d5a3..a414d66a 100644 --- a/src/roman_datamodels/datamodels/_core.py +++ b/src/roman_datamodels/datamodels/_core.py @@ -344,7 +344,7 @@ def validate(self): """ Re-validate the model instance against the tags """ - validate.value_change(self._instance, pass_invalid_values=False, strict_validation=True) + self._asdf.validate() @_set_default_asdf def info(self, *args, **kwargs): diff --git a/src/roman_datamodels/stnode/_node.py b/src/roman_datamodels/stnode/_node.py index c1e1ccee..6f6646a8 100644 --- a/src/roman_datamodels/stnode/_node.py +++ b/src/roman_datamodels/stnode/_node.py @@ -5,78 +5,19 @@ import datetime import re -import warnings from collections import UserList from collections.abc import MutableMapping import asdf -import asdf.schema as asdfschema -import asdf.yamlutil as yamlutil import numpy as np -from asdf.exceptions import ValidationError from asdf.lazy_nodes import AsdfDictNode, AsdfListNode from asdf.tags.core import ndarray -from asdf.util import HashableDict from astropy.time import Time -from roman_datamodels.validate import ValidationWarning, _check_type, _error_message, will_strict_validate, will_validate - from ._registry import SCALAR_NODE_CLASSES_BY_KEY __all__ = ["DNode", "LNode"] -validator_callbacks = HashableDict(asdfschema.YAML_VALIDATORS) -validator_callbacks.update({"type": _check_type}) - - -def _value_change(path, value, schema, pass_invalid_values, strict_validation, ctx): - """ - Validate a change in value against a schema. - Trap error and return a flag. - """ - try: - _check_value(value, schema, ctx) - update = True - - except ValidationError as error: - update = False - errmsg = _error_message(path, error) - if pass_invalid_values: - update = True - if strict_validation: - raise ValidationError(errmsg) - else: - warnings.warn(errmsg, ValidationWarning) - return update - - -def _check_value(value, schema, validator_context): - """ - Perform the actual validation. - """ - - temp_schema = {"$schema": "http://stsci.edu/schemas/asdf-schema/0.1.0/asdf-schema"} - temp_schema.update(schema) - validator = asdfschema.get_validator(temp_schema, validator_context, validator_callbacks) - validator.validate(value, _schema=temp_schema) - validator_context.close() - - -def _validate(attr, instance, schema, ctx): - """ - Validate the attribute against the schema. - """ - # Note that the following checks cannot use isinstance since the TaggedObjectNode - # and TaggedListNode subclasses will break as a result. And currently there is no - # non-tagged subclasses of these classes that exist, nor are any envisioned yet. - if type(instance) == DNode: # noqa: E721 - instance = instance._data - elif type(instance) == LNode: # noqa: E721 - instance = instance.data - - tagged_tree = yamlutil.custom_tree_to_tagged_tree(instance, ctx) - return _value_change(attr, tagged_tree, schema, False, will_strict_validate(), ctx) - def _get_schema_for_property(schema, attr): """ @@ -244,12 +185,6 @@ def __setattr__(self, key, value): value = self._convert_to_scalar(key, value, self._data.get(key)) if key in self._data or key in self._schema_attributes: - # Perform validation if enabled - if will_validate(): - schema = _get_schema_for_property(self._schema(), key) - if schema: - _validate(key, value, schema, self.ctx) - # Finally set the value self._data[key] = value else: diff --git a/src/roman_datamodels/validate.py b/src/roman_datamodels/validate.py index 1ecfff5d..52912d3c 100644 --- a/src/roman_datamodels/validate.py +++ b/src/roman_datamodels/validate.py @@ -7,15 +7,10 @@ from contextlib import contextmanager from textwrap import dedent -from asdf import AsdfFile from asdf import schema as asdf_schema -from asdf import yamlutil -from asdf.exceptions import ValidationError -from asdf.util import HashableDict __all__ = [ "ValidationWarning", - "value_change", ] ROMAN_VALIDATE = "ROMAN_VALIDATE" @@ -55,107 +50,10 @@ def will_validate(): return validate -def strict_validation_is_disabled(): - MESSAGE = dedent( - """\ - Strict validation has been disabled. This may result - in validation errors arising when writing data. - - To turn strict validation back on, unset the - environment variable ROMAN_STRICT_VALIDATION or set - it to "true". - """ - ) - - warnings.warn(MESSAGE, ValidationWarning) - - -def will_strict_validate(): - """ - Determine if strict validation is enabled. - """ - var = os.getenv(ROMAN_STRICT_VALIDATION) - - if not (validate := var is None or var.lower() in ["true", "yes", "1"]): - strict_validation_is_disabled() - - return validate - - class ValidationWarning(Warning): pass -def value_change(value, pass_invalid_values, strict_validation): - """ - Validate a change in value against a schema. - Trap error and return a flag. - """ - try: - _check_value(value) - update = True - except ValidationError as errmsg: - update = False - if pass_invalid_values: - update = True - if strict_validation: - raise errmsg - else: - warnings.warn(errmsg, ValidationWarning) - return update - - -def _check_type(validator, types, instance, schema): - """ - Callback to check data type. Skips over null values. - """ - if instance is None: - errors = [] - else: - errors = asdf_schema.validate_type(validator, types, instance, schema) - return errors - - -validator_callbacks = HashableDict(asdf_schema.YAML_VALIDATORS) -validator_callbacks.update({"type": _check_type}) - - -def _check_value(value): - """ - Perform the actual validation. - """ - - validator_context = AsdfFile() - - if hasattr(value, "_schema"): - temp_schema = value._schema() - else: - temp_schema = {"$schema": "http://stsci.edu/schemas/asdf-schema/0.1.0/asdf-schema"} - validator = asdf_schema.get_validator(temp_schema, validator_context, validators=validator_callbacks) - - value = yamlutil.custom_tree_to_tagged_tree(value, validator_context) - validator.validate(value, _schema=temp_schema) - validator_context.close() - - -def _error_message(path, error): - """ - Add the path to the attribute as context for a validation error - """ - if isinstance(path, list): - spath = [str(p) for p in path] - name = ".".join(spath) - else: - name = str(path) - - error = str(error) - if len(error) > 2000: - error = error[0:1996] + " ..." - errfmt = "While validating {} the following error occurred:\n{}" - errmsg = errfmt.format(name, error) - return errmsg - - @contextmanager def nuke_validation(): """ diff --git a/tests/test_models.py b/tests/test_models.py index bcd6147d..34f4058d 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -552,8 +552,9 @@ def test_add_model_attribute(tmp_path): assert readnoise2.new_attribute == 77 readnoise2.new_attribute = 88 assert readnoise2.new_attribute == 88 + readnoise2.data = "bad_data_value" with pytest.raises(ValidationError): - readnoise.data = "bad_data_value" + readnoise2.validate() def test_model_subscribable(tmp_path): @@ -679,8 +680,6 @@ def test_node_assignment(): assert isinstance(exposure, stnode.DNode) wfi_image.meta.exposure = exposure assert isinstance(wfi_image.meta.exposure, stnode.DNode) - with pytest.raises(ValidationError): - wfi_image.meta.exposure = utils.mk_program() # The following tests that supplying a LNode passes validation. rampmodel = datamodels.RampModel(utils.mk_ramp(shape=(9, 9, 2))) assert isinstance(rampmodel.meta.exposure.read_pattern[1:], stnode.LNode) diff --git a/tests/test_stnode.py b/tests/test_stnode.py index fce096a5..4919237b 100644 --- a/tests/test_stnode.py +++ b/tests/test_stnode.py @@ -2,9 +2,7 @@ from contextlib import nullcontext import asdf -import astropy.units as u import pytest -from asdf.exceptions import ValidationError from roman_datamodels import datamodels from roman_datamodels import maker_utils @@ -163,55 +161,10 @@ def test_schema_info(): } -def test_set_pattern_properties(): - """ - Regression test for patternProperties not being validated - """ - - # This model uses includes a patternProperty - mdl = maker_utils.mk_wfi_img_photom() - - # This should be invalid because it is not a quantity - with pytest.raises(asdf.ValidationError): - mdl.phot_table.F062.photmjsr = 3.14 - with pytest.raises(asdf.ValidationError): - mdl.phot_table.F062.uncertainty = 3.14 - with pytest.raises(asdf.ValidationError): - mdl.phot_table.F062.pixelareasr = 3.14 - - # This is invalid because it is not a scalar - with pytest.raises(asdf.ValidationError): - mdl.phot_table.F062.photmjsr = [37.0] * (u.MJy / u.sr) - with pytest.raises(asdf.ValidationError): - mdl.phot_table.F062.uncertainty = [37.0] * (u.MJy / u.sr) - with pytest.raises(asdf.ValidationError): - mdl.phot_table.F062.pixelareasr = [37.0] * u.sr - - # This should be invalid because it has the wrong unit - with pytest.raises(asdf.ValidationError): - mdl.phot_table.F062.photmjsr = 3.14 * u.m - with pytest.raises(asdf.ValidationError): - mdl.phot_table.F062.uncertainty = 3.14 * u.m - with pytest.raises(asdf.ValidationError): - mdl.phot_table.F062.pixelareasr = 3.14 * u.m - - # Test some valid values (including the rest of the patternProperties) - mdl.phot_table.F062.photmjsr = 3.14 * (u.MJy / u.sr) - mdl.phot_table.F062.uncertainty = 0.1 * (u.MJy / u.sr) - mdl.phot_table.F062.pixelareasr = 37.0 * u.sr - - # Test it can be None (including the rest of the patternProperties) - mdl.phot_table.F062.photmjsr = None - mdl.phot_table.F062.uncertainty = None - mdl.phot_table.F062.pixelareasr = None - - # Test that a currently undefined attribute can be assigned using dot notation # so long as the attribute is defined in the corresponding schema. def test_node_new_attribute_assignment(): exp = stnode.Exposure() - with pytest.raises(AttributeError): - exp.bozo = 0 exp.nresultants = 5 assert exp.nresultants == 5 # Test patternProperties attribute case @@ -222,8 +175,6 @@ def test_node_new_attribute_assignment(): photmod.phot_table.F213 = phottab["F213"] with pytest.raises(AttributeError): photmod.phot_table.F214 = phottab["F213"] - with pytest.raises(ValidationError): - photmod.phot_table.F106 = 0 VALIDATION_CASES = ("true", "yes", "1", "True", "Yes", "TrUe", "YeS", "foo", "Bar", "BaZ") @@ -262,17 +213,6 @@ def test_will_validate(nuke_env_var): def test_nuke_validation(nuke_env_var, tmp_path): context = pytest.raises(asdf.ValidationError) if nuke_env_var[1] else pytest.warns(validate.ValidationWarning) - # Create a broken DNode object - mdl = maker_utils.mk_wfi_img_photom() - mdl["phot_table"] = "THIS IS NOT VALID" - with context: - datamodels.WfiImgPhotomRefModel(mdl) - - # __setattr__ a broken value - mdl = maker_utils.mk_wfi_img_photom() - with context: - mdl.phot_table = "THIS IS NOT VALID" - # Break model without outside validation with nullcontext() if nuke_env_var[1] else pytest.warns(validate.ValidationWarning): mdl = datamodels.WfiImgPhotomRefModel(maker_utils.mk_wfi_img_photom()) @@ -312,34 +252,6 @@ def test_nuke_validation(nuke_env_var, tmp_path): pass -@pytest.mark.parametrize("nuke_env_strict_var", VALIDATION_CASES, indirect=True) -def test_will_strict_validate(nuke_env_strict_var): - # Test the fixture passed the value of the environment variable - assert os.getenv(validate.ROMAN_STRICT_VALIDATION) == nuke_env_strict_var - - # Test the validate property - truth = nuke_env_strict_var.lower() in ["true", "yes", "1"] - context = nullcontext() if truth else pytest.warns(validate.ValidationWarning) - - with context: - assert validate.will_strict_validate() is truth - - # Try all uppercase - os.environ[validate.ROMAN_STRICT_VALIDATION] = nuke_env_strict_var.upper() - with context: - assert validate.will_strict_validate() is truth - - # Try all lowercase - os.environ[validate.ROMAN_STRICT_VALIDATION] = nuke_env_strict_var.lower() - with context: - assert validate.will_strict_validate() is truth - - # Remove the environment variable to test the default value - del os.environ[validate.ROMAN_STRICT_VALIDATION] - assert os.getenv(validate.ROMAN_STRICT_VALIDATION) is None - assert validate.will_strict_validate() is True - - @pytest.mark.parametrize("model", [mdl for mdl in datamodels.MODEL_REGISTRY.values() if "Ref" not in mdl.__name__]) def test_node_representation(model): """