From fba5a5f004bf15dbd06cea4c35616acf027ccdfe Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Fri, 26 Jan 2024 13:08:00 +0100 Subject: [PATCH 1/6] update changelog #95 --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ad5dc31..2a0f8a6 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,7 +4,7 @@ Changelog current (2024-XX-XX) -------------------- -* +* Reorder metadata fields after the json input was compiled & prevent removing context fields if they are Null (#96)[https://github.com/OpenEnergyPlatform/omi/pull/96] 0.2.0 (2024-01-25) -------------------- From 2920999b5bc998a4b9aebf9195c8a19e888efec6 Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Fri, 26 Jan 2024 13:08:55 +0100 Subject: [PATCH 2/6] reorder compiled fields and also apply some formatting and logging #95 --- src/omi/dialects/oep/compiler.py | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/src/omi/dialects/oep/compiler.py b/src/omi/dialects/oep/compiler.py index 50e2742..49dc431 100644 --- a/src/omi/dialects/oep/compiler.py +++ b/src/omi/dialects/oep/compiler.py @@ -1,4 +1,5 @@ import datetime +import logging from omi import structure from omi.dialects.base.compiler import Compiler @@ -51,22 +52,33 @@ def visit_context(self, context: structure.Context, *args, **kwargs): ("grantNo", context.grant_number), ) if context.funding_agency is not None: - if context.funding_agency.name is not None: - result["fundingAgency"] = context.funding_agency.name - if context.funding_agency.logo is not None: - result["fundingAgencyLogo"] = context.funding_agency.logo + result["fundingAgency"] = context.funding_agency.name + result["fundingAgencyLogo"] = context.funding_agency.logo if context.publisher is not None: result["publisherLogo"] = context.publisher.logo + logging.info( + f"The context is parsed from file with the following values: {context}" + ) + logging.info( + f"The context class is compiled to the following python dict: {result}" + ) return result def visit_contribution(self, contribution: structure.Contribution, *args, **kwargs): - return self._construct_dict( + result = self._construct_dict( ("title", contribution.contributor.name), ("email", contribution.contributor.email), ("object", contribution.object), ("comment", contribution.comment), ("date", compile_date_or_none(contribution.date, "%Y-%m-%d")), ) + logging.info( + f"The contributions are parsed from file with the following values: {contribution}" + ) + logging.info( + f"The contributions class is compiled to the following python dict: {result}" + ) + return result def visit_language(self, language: structure.Language, *args, **kwargs): return str(language) @@ -133,7 +145,7 @@ def visit_terms_of_use(self, terms_of_use: structure.TermsOfUse): return self._construct_dict( ("instruction", terms_of_use.instruction), ("attribution", terms_of_use.attribution), - **license_kwargs + **license_kwargs, ) def visit_resource(self, resource: structure.Resource, *args, **kwargs): @@ -367,20 +379,20 @@ def visit_metadata(self, metadata: oem_v15.OEPMetadata, *args, **kwargs): ("title", metadata.title), ("id", metadata.identifier), ("description", metadata.description), - ("language", metadata.languages), ("subject", metadata.subject), + ("language", metadata.languages), ("keywords", metadata.keywords), ("publicationDate", publication_date), ("context", metadata.context), ("spatial", metadata.spatial), ("temporal", metadata.temporal), - ("review", metadata.review), ("sources", metadata.sources), ("licenses", metadata.license), ("contributors", metadata.contributions), ("resources", metadata.resources), ("@id", metadata.databus_identifier), ("@context", metadata.databus_context), + ("review", metadata.review), metaMetadata=self._construct_dict( ("metadataVersion", self.__METADATA_VERSION), metadataLicense=self._construct_dict( @@ -399,5 +411,5 @@ def visit_metadata(self, metadata: oem_v15.OEPMetadata, *args, **kwargs): null="If not applicable use: null", todo="If a value is not yet available, use: todo", ), - **kwargs + **kwargs, ) From 84ee218cc81f53a48ca133074c4851db64f4a040 Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Fri, 26 Jan 2024 13:09:40 +0100 Subject: [PATCH 3/6] formatting #95 --- src/omi/dialects/oep/parser.py | 76 ++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 31 deletions(-) diff --git a/src/omi/dialects/oep/parser.py b/src/omi/dialects/oep/parser.py index 8378fa4..9980da8 100644 --- a/src/omi/dialects/oep/parser.py +++ b/src/omi/dialects/oep/parser.py @@ -9,6 +9,7 @@ import dateutil import jsonschema from jsonschema import ValidationError + # oemetadata from metadata.latest.schema import OEMETADATA_LATEST_SCHEMA from metadata.v130.schema import OEMETADATA_V130_SCHEMA @@ -32,7 +33,7 @@ ] -def parse_date_or_none(x): +def parse_date_or_none(x, fieldname=None, element=None): if x is None: pass elif type(x) == int: @@ -53,16 +54,22 @@ def parse_date_or_none(x): try: date_time = dateutil.parser.parse(x) except Exception: - raise ParserException(f"invalid value for date: {x}") + raise ParserException( + f"In fields {fieldname} {element} element is a invalid value for date: {x}" + ) if re.match("^[123][0-9]{3}-[0-9]{1,2}-[0-9]{1,2}$", x): # date only x = date_time.date() else: x = date_time else: - raise ParserException(f"invalid value for date: {x}") + raise ParserException( + f"In fields {fieldname} {element} element is a invalid value for date: {x}" + ) else: - raise ParserException(f"invalid type for date: {type(x)}") + raise ParserException( + f"In fields {fieldname} {element} element is a invalid type for date: {type(x)}" + ) return x @@ -203,11 +210,10 @@ def validate(self, metadata: dict, schema: dict = None, save_report=True): return report def is_valid(self, inp: dict, schema): - # 1 - valid JSON? if isinstance(inp, str): try: - jsn = json.loads(inp, encode="utf-8") + jsn = json.loads(inp) except ValueError: return False else: @@ -224,7 +230,6 @@ def is_valid(self, inp: dict, schema): class JSONParser_1_3(JSONParser): def is_valid(self, inp: dict, schema=OEMETADATA_V130_SCHEMA): - # 1 - valid JSON? if isinstance(inp, str): try: @@ -327,7 +332,9 @@ def parse(self, json_old, *args, **kwargs): else: resources = [] if len(old_resources) == 0: - raise ParserException("Resource field doesn't have any child entity") + raise ParserException( + "The field Resource field is empty! Please provide a description of your data resources e.g. a table schema." + ) for resource in old_resources: old_fields = resource.get("fields") if old_fields is None: @@ -374,7 +381,6 @@ def parse(self, json_old, *args, **kwargs): class JSONParser_1_4(JSONParser): def is_valid(self, inp: dict, schema=OEMETADATA_V141_SCHEMA): - # 1 - valid JSON? if isinstance(inp, str): try: @@ -406,13 +412,14 @@ def parse_term_of_use(self, old_license: dict): def parse(self, json_old: dict, *args, **kwargs): # context section if "id" not in json_old: - raise ParserException("metadata string does not contain an id") + raise ParserException( + "The metadata string does not contain an id. This field is required." + ) inp_context = json_old.get("context") if inp_context is None: context = None else: - funding_agency = None if "fundingAgency" in inp_context: funding_agency = structure.Agency( @@ -506,11 +513,13 @@ def parse(self, json_old: dict, *args, **kwargs): name=old_contributor.get("title"), email=old_contributor.get("email"), ), - date=parse_date_or_none(old_contributor.get("date")), + date=parse_date_or_none( + old_contributor.get("date"), f"{cont_element}contributors.data" + ), obj=old_contributor.get("object"), comment=old_contributor.get("comment"), ) - for old_contributor in old_contributors + for cont_element, old_contributor in enumerate(old_contributors) ] # extending with script-user information @@ -777,7 +786,6 @@ def get_table_name(self, metadata_file): class JSONParser_1_5(JSONParser): def is_valid(self, inp: dict, schema=OEMETADATA_LATEST_SCHEMA): - # 1 - valid JSON? if isinstance(inp, str): try: @@ -828,12 +836,12 @@ def get_any_value_not_none( ): """ Get the value for a key in a dict - but try multiple key names, in - case they have changed in eralryer oemetadata versions. + case they have changed in earlier oemetadata versions. Args: element (dict): dict element of the input metadata keys (list[str]): list of key name options - get_return_default (_type_, optional): A default return vlaue if key is not present. Defaults to None. + get_return_default (_type_, optional): A default return value if key is not present. Defaults to None. Returns: any: By default it is the value at the key or None - but can be any as the value is not strict. @@ -900,7 +908,6 @@ def parse(self, json_old: dict, *args, **kwargs): if inp_context is None: context = None else: - funding_agency = None if "fundingAgency" in inp_context: funding_agency = oem_v15.Agency( @@ -1024,25 +1031,25 @@ def parse_old_licenses_including_former_key_names(element: dict): element, key_name_options.get("licenses_equal") ) - def parse_licence_including_former_structure(licences_element): + def parse_license_including_former_structure(licenses_element): """ The lincences key was got a structural differnece in former oemetada versions. In Version 1.3 the key was called lincense and was a singe object/dict, in the current version this key is calles licences and is a list of objects/dicts. Also the key names in the dicht are deviating. """ - if isinstance(licences_element, list): + if isinstance(licenses_element, list): _result = [ self.parse_term_of_use(old_license) for old_license in old_licenses ] - if isinstance(licences_element, dict): + if isinstance(licenses_element, dict): _mapping_former_keys = { - "name": licences_element.get("id"), - "title": licences_element.get("name"), - "path": licences_element.get("url"), - "instruction": licences_element.get("instruction"), - "attribution": licences_element.get("copyright"), + "name": licenses_element.get("id"), + "title": licenses_element.get("name"), + "path": licenses_element.get("url"), + "instruction": licenses_element.get("instruction"), + "attribution": licenses_element.get("copyright"), } _result = [self.parse_term_of_use(old_license=_mapping_former_keys)] @@ -1054,8 +1061,8 @@ def parse_licence_including_former_structure(licences_element): if old_licenses is None: licenses = None else: - licenses = parse_licence_including_former_structure( - licences_element=old_licenses + licenses = parse_license_including_former_structure( + licenses_element=old_licenses ) # filling the contributers section @@ -1071,11 +1078,13 @@ def parse_licence_including_former_structure(licences_element): ), email=old_contributor.get("email"), ), - date=parse_date_or_none(old_contributor.get("date")), + date=parse_date_or_none( + old_contributor.get("date"), "contributors.data", cont_element + ), obj=old_contributor.get("object"), comment=old_contributor.get("comment"), ) - for old_contributor in old_contributors + for cont_element, old_contributor in enumerate(old_contributors) ] # extending with script-user information @@ -1085,7 +1094,9 @@ def parse_licence_including_former_structure(licences_element): # Code added to raise exception when resource is empty else: if len(old_resources) == 0: - raise ParserException("Resources field is empty!") + raise ParserException( + "The resources field is empty! Please provide a description of you data resources e.g. a table schema. See https://github.com/OpenEnergyPlatform/oemetadata/blob/master/metadata/latest/metadata_key_description.md#resource-keys." + ) resources = [] for resource in old_resources: old_schema = resource.get("schema") @@ -1145,7 +1156,10 @@ def parse_licence_including_former_structure(licences_element): for fk in old_foreign_keys: old_reference = fk.get("reference") if old_reference is None: - raise ParserException("Foreign key without reference:", fk) + raise ParserException( + "The Foreign key you provided is missing reference information:", + fk, + ) source_fields = [ field_dict[field_name] for field_name in fk.get("fields", []) From dcc9402d43ed1a60f5ebd2f6b5b50861a3df0bec Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Fri, 26 Jan 2024 14:07:14 +0100 Subject: [PATCH 4/6] attempt to fix isort issue --- src/omi/dialects/oep/compiler.py | 2 +- src/omi/dialects/oep/parser.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/omi/dialects/oep/compiler.py b/src/omi/dialects/oep/compiler.py index 49dc431..4ab02fb 100644 --- a/src/omi/dialects/oep/compiler.py +++ b/src/omi/dialects/oep/compiler.py @@ -1,5 +1,5 @@ -import datetime import logging +import datetime from omi import structure from omi.dialects.base.compiler import Compiler diff --git a/src/omi/dialects/oep/parser.py b/src/omi/dialects/oep/parser.py index 9980da8..3ef7db9 100644 --- a/src/omi/dialects/oep/parser.py +++ b/src/omi/dialects/oep/parser.py @@ -1,8 +1,8 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import logging import json -import logging import pathlib import re From 435e1c95289dfc461c781c0b1017a90cbd9daf4e Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Fri, 26 Jan 2024 14:12:22 +0100 Subject: [PATCH 5/6] fix imports --- src/omi/dialects/oep/compiler.py | 2 +- src/omi/dialects/oep/parser.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/omi/dialects/oep/compiler.py b/src/omi/dialects/oep/compiler.py index 4ab02fb..49dc431 100644 --- a/src/omi/dialects/oep/compiler.py +++ b/src/omi/dialects/oep/compiler.py @@ -1,5 +1,5 @@ -import logging import datetime +import logging from omi import structure from omi.dialects.base.compiler import Compiler diff --git a/src/omi/dialects/oep/parser.py b/src/omi/dialects/oep/parser.py index 3ef7db9..10a85b3 100644 --- a/src/omi/dialects/oep/parser.py +++ b/src/omi/dialects/oep/parser.py @@ -1,15 +1,14 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import json import logging -import json import pathlib import re import dateutil import jsonschema from jsonschema import ValidationError - # oemetadata from metadata.latest.schema import OEMETADATA_LATEST_SCHEMA from metadata.v130.schema import OEMETADATA_V130_SCHEMA From 0602861c767736f709680ceb21adb106674272f7 Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Fri, 26 Jan 2024 14:13:16 +0100 Subject: [PATCH 6/6] fix more imports --- src/omi/dialects/oep/parser.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/omi/dialects/oep/parser.py b/src/omi/dialects/oep/parser.py index 10a85b3..e670480 100644 --- a/src/omi/dialects/oep/parser.py +++ b/src/omi/dialects/oep/parser.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import json import logging - import pathlib import re