From cd83c3470e21997c9f8d8b1a6afcecf64e405a8f Mon Sep 17 00:00:00 2001 From: Hendrik Huyskens Date: Tue, 18 Jun 2024 14:24:52 +0200 Subject: [PATCH] Add warnings for optional fields in metadata validation --- src/omi/validation.py | 33 +++++++++++++++++++ .../validation/metadata_for_data_csv.json | 1 - 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/src/omi/validation.py b/src/omi/validation.py index 0801645..6abf2e9 100644 --- a/src/omi/validation.py +++ b/src/omi/validation.py @@ -73,6 +73,7 @@ def validate_metadata(metadata: dict | str) -> None: except jsonschema.exceptions.ValidationError as ve: raise ValidationError(f"Error validating metadata against related metadata schema: {ve.message}") from ve license.validate_oemetadata_licenses(metadata) + __validate_optional_fields_in_metadata(metadata, metadata_schema.schema) def validate_data( @@ -386,6 +387,38 @@ def __validate_data_against_schema(data: pd.DataFrame, fields: dict[str, str]) - return report +def __validate_optional_fields_in_metadata(metadata: dict, schema: dict) -> None: + """ + Validate optional fields in metadata dictionary based on schema. Raise warnings if optional fields are missing. + + Parameters + ---------- + metadata: dict + Metadata as dictionary to check optional fields + schema: dict + JSONSchema for checking optional fields + + Returns + ------- + None + """ + + def check_properties(sub_meta: dict, sub_schema: dict, current_path: str) -> None: + """Check optional fields in metadata dictionary iteratively.""" + if "properties" not in sub_schema: + return + for field in sub_schema["properties"]: + if ("required" not in sub_schema or field not in sub_schema["required"]) and field not in sub_meta: + if current_path == "": + current_path = "top level" + warnings.warn(f"Optional field '{field}' not found in metadata at {current_path}.", stacklevel=2) + if field in sub_meta: + new_path = field if current_path == "" else f"{current_path}.{field}" + check_properties(sub_meta[field], sub_schema["properties"][field], new_path) + + check_properties(metadata, schema, "") + + def __map_fields_to_frictionless_fields(fields: dict[str, str]) -> list[Field]: """ Map fields to Frictionless fields. diff --git a/tests/test_data/validation/metadata_for_data_csv.json b/tests/test_data/validation/metadata_for_data_csv.json index fcbc28e..5ae241d 100644 --- a/tests/test_data/validation/metadata_for_data_csv.json +++ b/tests/test_data/validation/metadata_for_data_csv.json @@ -193,7 +193,6 @@ ], "@id":null, "@context":"https://raw.githubusercontent.com/OpenEnergyPlatform/oemetadata/develop/metadata/latest/context.json", - "review":null, "metaMetadata":{ "metadataVersion":"OEP-1.5.2", "metadataLicense":{