From ad863d2d8e68a53827b58d6dca6ef94dd0bdff04 Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Sat, 12 Nov 2022 13:52:19 +0100 Subject: [PATCH 01/28] update omis parser to handle former key names - oem-v1.3 got deviations in licences key names, the dialect for 1.5.parser is now able to handle former key names --- src/omi/dialects/oep/parser.py | 107 ++++++++++++++++++++++++++++----- 1 file changed, 93 insertions(+), 14 deletions(-) diff --git a/src/omi/dialects/oep/parser.py b/src/omi/dialects/oep/parser.py index e227f2a..34ce732 100644 --- a/src/omi/dialects/oep/parser.py +++ b/src/omi/dialects/oep/parser.py @@ -614,6 +614,16 @@ def parse_from_string( **(parse_kwargs or {}) ) + def get_value_or_none( + self, element: dict, keys: list[str], get_return_default=None + ): + for key_name in keys: + _element = element.get(key_name, get_return_default) + if _element is None: + continue + + return _element + def parse_term_of_use(self, old_license: dict): return oem_v15.TermsOfUse( lic=oem_v15.License( @@ -723,32 +733,101 @@ def parse(self, json_old: dict, *args, **kwargs): timeseries_collection=timeseries, ) + def try_parse_sources_lincese_including_former_key_names(element: dict): + licenses_new = "licenses" + licenses_old = "license" + + if isinstance(element.get(licenses_new), list): + return [self.parse_term_of_use(l) for l in element.get(licenses_new)] + + if isinstance(element.get(licenses_old), str): + name = element.get(licenses_old) + + # avoide empty structures like [{}] + if name is None: + _result = [] + else: + _result = [oem_v15.License(identifier=name)] + + return _result + + def try_parse_source_including_former_key_names(key: dict): + # sources key name options - including key names pre oem v1.4 + key_name_options = { + "title_equal": ["title", "name"], + "path_equal": ["path", "url"], + "licenses_equal": ["licenses", "license"], + } + + source = oem_v15.Source( + title=self.get_value_or_none( + element=key, keys=key_name_options.get("title_equal") + ), + description=key.get("description"), + path=self.get_value_or_none( + element=key, keys=key_name_options.get("path_equal") + ), + licenses=try_parse_sources_lincese_including_former_key_names( + element=key + ), + ) + + return source + # filling the source section - old_sources = json_old.get("sources") + # expected to be a list but can also be a dict in old versions + old_sources: list = json_old.get("sources") if old_sources is None: sources = None else: sources = [ - oem_v15.Source( - title=old_source.get("title"), - description=old_source.get("description"), - path=old_source.get("path"), - licenses=[ - self.parse_term_of_use(l) - for l in old_source.get("licenses", []) - ], - ) + try_parse_source_including_former_key_names(key=old_source) for old_source in old_sources ] + def parse_old_licenses_including_former_key_names(element: dict): + key_name_options = { + "licenses_equal": ["licenses", "license"], + } + + return self.get_value_or_none( + element, key_name_options.get("licenses_equal") + ) + + def iterate_licence_influcding_former_structure(licences_element): + """ + The lincences key was got a structural differnece in former oemetada versions. + In Version 1.3 the key was called lincense and was a singe object/dict, in the + current version this key is calles licences and is a list of objects/dicts. + Also the key names in the dicht are deviating. + """ + if isinstance(licences_element, list): + _result = [ + self.parse_term_of_use(old_license) for old_license in old_licenses + ] + + if isinstance(licences_element, dict): + _mapping_dromers_keys = { + "name": licences_element.get("id"), + "title": licences_element.get("name"), + "path": licences_element.get("url"), + "instruction": licences_element.get("instruction"), + "attribution": licences_element.get("copyright"), + } + + _result = [self.parse_term_of_use(old_license=_mapping_dromers_keys)] + + return _result + # filling the license section - old_licenses = json_old.get("licenses") + old_licenses = parse_old_licenses_including_former_key_names(element=json_old) + print(old_licenses) if old_licenses is None: licenses = None else: - licenses = [ - self.parse_term_of_use(old_license) for old_license in old_licenses - ] + licenses = iterate_licence_influcding_former_structure( + licences_element=old_licenses + ) # filling the contributers section old_contributors = json_old.get("contributors") From ffb8275998b843d83ee07c3e6b6ff22098be117c Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Sun, 13 Nov 2022 17:26:34 +0100 Subject: [PATCH 02/28] fix function name --- src/omi/dialects/oep/parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/omi/dialects/oep/parser.py b/src/omi/dialects/oep/parser.py index 34ce732..1487168 100644 --- a/src/omi/dialects/oep/parser.py +++ b/src/omi/dialects/oep/parser.py @@ -794,7 +794,7 @@ def parse_old_licenses_including_former_key_names(element: dict): element, key_name_options.get("licenses_equal") ) - def iterate_licence_influcding_former_structure(licences_element): + def iterate_licence_including_former_structure(licences_element): """ The lincences key was got a structural differnece in former oemetada versions. In Version 1.3 the key was called lincense and was a singe object/dict, in the @@ -825,7 +825,7 @@ def iterate_licence_influcding_former_structure(licences_element): if old_licenses is None: licenses = None else: - licenses = iterate_licence_influcding_former_structure( + licenses = iterate_licence_including_former_structure( licences_element=old_licenses ) From 788848791380da828f7326d9b55886662e098711 Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Sun, 13 Nov 2022 22:18:37 +0100 Subject: [PATCH 03/28] fix variable name --- src/omi/dialects/oep/parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/omi/dialects/oep/parser.py b/src/omi/dialects/oep/parser.py index 1487168..4efb3c4 100644 --- a/src/omi/dialects/oep/parser.py +++ b/src/omi/dialects/oep/parser.py @@ -807,7 +807,7 @@ def iterate_licence_including_former_structure(licences_element): ] if isinstance(licences_element, dict): - _mapping_dromers_keys = { + _mapping_former_keys = { "name": licences_element.get("id"), "title": licences_element.get("name"), "path": licences_element.get("url"), @@ -815,7 +815,7 @@ def iterate_licence_including_former_structure(licences_element): "attribution": licences_element.get("copyright"), } - _result = [self.parse_term_of_use(old_license=_mapping_dromers_keys)] + _result = [self.parse_term_of_use(old_license=_mapping_former_keys)] return _result From 307ebc445a98508fbbe8a8ca969868a0d3224f3c Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Sun, 13 Nov 2022 22:19:54 +0100 Subject: [PATCH 04/28] rename parser-helper function --- src/omi/dialects/oep/parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/omi/dialects/oep/parser.py b/src/omi/dialects/oep/parser.py index 4efb3c4..6fa1c8d 100644 --- a/src/omi/dialects/oep/parser.py +++ b/src/omi/dialects/oep/parser.py @@ -794,7 +794,7 @@ def parse_old_licenses_including_former_key_names(element: dict): element, key_name_options.get("licenses_equal") ) - def iterate_licence_including_former_structure(licences_element): + def parse_licence_including_former_structure(licences_element): """ The lincences key was got a structural differnece in former oemetada versions. In Version 1.3 the key was called lincense and was a singe object/dict, in the @@ -825,7 +825,7 @@ def iterate_licence_including_former_structure(licences_element): if old_licenses is None: licenses = None else: - licenses = iterate_licence_including_former_structure( + licenses = parse_licence_including_former_structure( licences_element=old_licenses ) From 24187fb3580957d8e9b3948fde2e2b761fb0b656 Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Tue, 22 Nov 2022 23:07:43 +0100 Subject: [PATCH 05/28] add docstrings and update function names --- src/omi/dialects/oep/parser.py | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/src/omi/dialects/oep/parser.py b/src/omi/dialects/oep/parser.py index 6fa1c8d..fb26920 100644 --- a/src/omi/dialects/oep/parser.py +++ b/src/omi/dialects/oep/parser.py @@ -617,6 +617,19 @@ def parse_from_string( def get_value_or_none( self, element: dict, keys: list[str], get_return_default=None ): + """ + Get the value for a key in a dict - but try multiple key names, in + case they have changed in eralryer oemetadata versions. + + Args: + element (dict): dict element of the input metadata + keys (list[str]): list of key name options + get_return_default (_type_, optional): A default return vlaue if key is not present. Defaults to None. + + Returns: + any: By default it is the value at the key or None - but can be any as the value is not strict. + """ + for key_name in keys: _element = element.get(key_name, get_return_default) if _element is None: @@ -635,8 +648,13 @@ def parse_term_of_use(self, old_license: dict): attribution=old_license.get("attribution"), ) - def parse_timeseries(self, old_timeseries: dict): - pass + def ensure_json_keys_lowercase(json_old: dict): + element = json_old # element must be part of json_old not hole json_old + if isinstance(element, dict): + pass + + if isinstance(element, list): + pass def parse(self, json_old: dict, *args, **kwargs): """_summary_ @@ -751,7 +769,7 @@ def try_parse_sources_lincese_including_former_key_names(element: dict): return _result - def try_parse_source_including_former_key_names(key: dict): + def parse_source_including_former_key_names(key: dict): # sources key name options - including key names pre oem v1.4 key_name_options = { "title_equal": ["title", "name"], @@ -781,11 +799,15 @@ def try_parse_source_including_former_key_names(key: dict): sources = None else: sources = [ - try_parse_source_including_former_key_names(key=old_source) + parse_source_including_former_key_names(key=old_source) for old_source in old_sources ] def parse_old_licenses_including_former_key_names(element: dict): + """ + Parse license from imput data - also handle key name variations from + early oemetadata versions. + """ key_name_options = { "licenses_equal": ["licenses", "license"], } @@ -837,7 +859,9 @@ def parse_licence_including_former_structure(licences_element): contributors = [ oem_v15.Contribution( contributor=oem_v15.Person( - name=old_contributor.get("title"), + name=self.get_value_or_none( + element=old_contributor, keys=["title", "name"] + ), email=old_contributor.get("email"), ), date=parse_date_or_none(old_contributor.get("date")), From 5b5b5adf2639b8c590aac7146db8018e5b2719fd Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Tue, 22 Nov 2022 23:56:52 +0100 Subject: [PATCH 06/28] update changelog --- CHANGELOG.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 2ef7568..c00ab07 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,6 +4,7 @@ Changelog current (2022-XX-XX) -------------------- +* update parser for v15 to handle former v13 key names (PR#77) 0.1.0 (2022-11-18) -------------------- From ea7269bd54ccf21419cdaff0745b78f4ca9f38e3 Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Thu, 24 Nov 2022 00:56:29 +0100 Subject: [PATCH 07/28] update licence attributes to oemv15 --- src/omi/oem_structures/oem_v15.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/omi/oem_structures/oem_v15.py b/src/omi/oem_structures/oem_v15.py index 9a1916e..07045a4 100644 --- a/src/omi/oem_structures/oem_v15.py +++ b/src/omi/oem_structures/oem_v15.py @@ -96,18 +96,18 @@ class License(Compilable): def __init__( self, name: str = None, - identifier: str = None, - text: str = None, + title: str = None, + instruction: str = None, path: str = None, - other_references: Iterable[str] = None, - comment: str = None, + attribution: str = None, + # other_references: Iterable[str] = None, ): self.name = name + self.title = title self.path = path - self.identifier = identifier - self.other_references = other_references - self.text = text - self.comment = comment + self.instruction = instruction + self.attribution = attribution + # self.other_references = other_references @staticmethod def instance_name_from_id(identifier: str): From 0742041590aa33990a5d8af6dfe757662b112e31 Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Fri, 25 Nov 2022 18:29:29 +0100 Subject: [PATCH 08/28] fix license missing / wrong license keys --- src/omi/oem_structures/oem_v15.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/omi/oem_structures/oem_v15.py b/src/omi/oem_structures/oem_v15.py index 07045a4..866aa48 100644 --- a/src/omi/oem_structures/oem_v15.py +++ b/src/omi/oem_structures/oem_v15.py @@ -97,16 +97,16 @@ def __init__( self, name: str = None, title: str = None, - instruction: str = None, path: str = None, - attribution: str = None, + # instruction: str = None, + # attribution: str = None, # other_references: Iterable[str] = None, ): self.name = name self.title = title self.path = path - self.instruction = instruction - self.attribution = attribution + # self.instruction = instruction + # self.attribution = attribution # self.other_references = other_references @staticmethod @@ -122,9 +122,10 @@ class TermsOfUse(Compilable): def __init__( self, instruction: str = None, attribution: str = None, lic: License = None ): + self.license = lic self.instruction = instruction self.attribution = attribution - self.license = lic + class Source(Compilable): From 92cab3a4c94a97c5d5cd71f9a0d930e3de79e97d Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Fri, 25 Nov 2022 18:30:15 +0100 Subject: [PATCH 09/28] update version to latest oemetadata release --- src/omi/dialects/oep/compiler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/omi/dialects/oep/compiler.py b/src/omi/dialects/oep/compiler.py index 23ec510..fc8be78 100644 --- a/src/omi/dialects/oep/compiler.py +++ b/src/omi/dialects/oep/compiler.py @@ -241,7 +241,7 @@ class JSONCompilerOEM15(JSONCompiler): the metadata structure. """ - __METADATA_VERSION = "OEP-1.5.1" + __METADATA_VERSION = "OEP-1.5.2" def visit(self, obj, *args, **kwargs): """ From 099ce6e8e1411d7f0063f929e79720ef819c6ba4 Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Fri, 25 Nov 2022 18:30:44 +0100 Subject: [PATCH 10/28] update test metadata v15 to latest version --- tests/data/metadata_v15.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data/metadata_v15.json b/tests/data/metadata_v15.json index f902875..81893a8 100644 --- a/tests/data/metadata_v15.json +++ b/tests/data/metadata_v15.json @@ -248,7 +248,7 @@ "badge": "Platinum" }, "metaMetadata": { - "metadataVersion": "OEP-1.5.1", + "metadataVersion": "OEP-1.5.2", "metadataLicense": { "name": "CC0-1.0", "title": "Creative Commons Zero v1.0 Universal", From c6f2eaa1ef9c59314d797427433ca27407eebd7c Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Fri, 25 Nov 2022 18:33:57 +0100 Subject: [PATCH 11/28] fix outdated key names are parsed when reading licences from metadata --- src/omi/dialects/oep/parser.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/omi/dialects/oep/parser.py b/src/omi/dialects/oep/parser.py index 4bbf0ff..260b217 100644 --- a/src/omi/dialects/oep/parser.py +++ b/src/omi/dialects/oep/parser.py @@ -753,7 +753,7 @@ def parse_from_string( **(parse_kwargs or {}), ) - def get_value_or_none( + def get_any_value_not_none( self, element: dict, keys: list[str], get_return_default=None ): """ @@ -779,8 +779,8 @@ def get_value_or_none( def parse_term_of_use(self, old_license: dict): return oem_v15.TermsOfUse( lic=oem_v15.License( - identifier=old_license.get("name"), - name=old_license.get("title"), + name=old_license.get("name"), + title=old_license.get("title"), path=old_license.get("path"), ), instruction=old_license.get("instruction"), @@ -890,7 +890,7 @@ def parse(self, json_old: dict, *args, **kwargs): timeseries_collection=timeseries, ) - def try_parse_sources_lincese_including_former_key_names(element: dict): + def parse_sources_lincese_including_former_key_names(element: dict): licenses_new = "licenses" licenses_old = "license" @@ -917,14 +917,14 @@ def parse_source_including_former_key_names(key: dict): } source = oem_v15.Source( - title=self.get_value_or_none( + title=self.get_any_value_not_none( element=key, keys=key_name_options.get("title_equal") ), description=key.get("description"), - path=self.get_value_or_none( + path=self.get_any_value_not_none( element=key, keys=key_name_options.get("path_equal") ), - licenses=try_parse_sources_lincese_including_former_key_names( + licenses=parse_sources_lincese_including_former_key_names( element=key ), ) @@ -951,7 +951,7 @@ def parse_old_licenses_including_former_key_names(element: dict): "licenses_equal": ["licenses", "license"], } - return self.get_value_or_none( + return self.get_any_value_not_none( element, key_name_options.get("licenses_equal") ) @@ -998,7 +998,7 @@ def parse_licence_including_former_structure(licences_element): contributors = [ oem_v15.Contribution( contributor=oem_v15.Person( - name=self.get_value_or_none( + name=self.get_any_value_not_none( element=old_contributor, keys=["title", "name"] ), email=old_contributor.get("email"), @@ -1017,7 +1017,7 @@ def parse_licence_including_former_structure(licences_element): # Code added to raise exception when resource is empty else: if len(old_resources) == 0: - raise ParserException("Resource field doesn't have any child entity") + raise ParserException("Resources field is empty!") resources = [] for resource in old_resources: old_schema = resource.get("schema") @@ -1028,6 +1028,7 @@ def parse_licence_including_former_structure(licences_element): old_fields = old_schema.get("fields") if old_fields is None: fields = None + logging.info(f"Parse fields from: {old_fields}") else: fields = [] From 6adf4d855c2dbf66de6f6f2ebef37d3de84a3636 Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Fri, 25 Nov 2022 20:39:48 +0100 Subject: [PATCH 12/28] update interal testing oemetadata structure --- tests/test_dialects/internal_structures.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/tests/test_dialects/internal_structures.py b/tests/test_dialects/internal_structures.py index e05dc8c..97d9f81 100644 --- a/tests/test_dialects/internal_structures.py +++ b/tests/test_dialects/internal_structures.py @@ -359,6 +359,19 @@ ) ############################################### oem v151 ######################################################### +cc010_v15 = oem_v15.License( + name="CC0-1.0", + title="Creative Commons Zero v1.0 Universal", + path="https://creativecommons.org/publicdomain/zero/1.0/legalcode", +) + +odbl10_v15 = oem_v15.License( + name="ODbL-1.0", + title="Open Data Commons Open Database License 1.0", + path="https://opendatacommons.org/licenses/odbl/1.0/", +) + + metadata_v_1_5 = oem_v15.OEPMetadata( name="oep_metadata_table_example_v151", title="Example title for metadata example - Version 1.5.1", @@ -446,7 +459,7 @@ path="https://github.com/OpenEnergyPlatform", licenses=[ oem_v15.TermsOfUse( - lic=cc010, + lic=cc010_v15, instruction="You are free: To Share, To Create, To Adapt", attribution="© Reiner Lemoine Institut", ) @@ -458,7 +471,7 @@ path="https://www.openstreetmap.org/", licenses=[ oem_v15.TermsOfUse( - lic=odbl10, + lic=odbl10_v15, instruction="You are free: To Share, To Create, To Adapt; As long as you: Attribute, Share-Alike, Keep open!", attribution="© OpenStreetMap contributors", ) @@ -467,7 +480,7 @@ ], terms_of_use=[ oem_v15.TermsOfUse( - lic=odbl10, + lic=odbl10_v15, instruction="You are free: To Share, To Create, To Adapt; As long as you: Attribute, Share-Alike, Keep open!", attribution="© Reiner Lemoine Institut © OpenStreetMap contributors", ) From 4706347e29c716db522f4e61b33a6f7aa260cb84 Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Fri, 25 Nov 2022 20:41:17 +0100 Subject: [PATCH 13/28] format document and remove deprecated code --- src/omi/dialects/oep/parser.py | 104 ++------------------------------- 1 file changed, 5 insertions(+), 99 deletions(-) diff --git a/src/omi/dialects/oep/parser.py b/src/omi/dialects/oep/parser.py index 260b217..1c148dd 100644 --- a/src/omi/dialects/oep/parser.py +++ b/src/omi/dialects/oep/parser.py @@ -8,6 +8,7 @@ import jsonschema from dateutil.parser import parse as parse_date from jsonschema import ValidationError + # oemetadata from metadata.latest.schema import OEMETADATA_LATEST_SCHEMA from metadata.v130.schema import OEMETADATA_V130_SCHEMA @@ -904,7 +905,8 @@ def parse_sources_lincese_including_former_key_names(element: dict): if name is None: _result = [] else: - _result = [oem_v15.License(identifier=name)] + from_13_license = oem_v15.License(name=name) + _result = [oem_v15.TermsOfUse(lic=from_13_license)] return _result @@ -924,9 +926,7 @@ def parse_source_including_former_key_names(key: dict): path=self.get_any_value_not_none( element=key, keys=key_name_options.get("path_equal") ), - licenses=parse_sources_lincese_including_former_key_names( - element=key - ), + licenses=parse_sources_lincese_including_former_key_names(element=key), ) return source @@ -1106,6 +1106,7 @@ def parse_licence_including_former_structure(licences_element): primary_key=resource["schema"].get("primaryKey"), foreign_keys=foreign_keys, ) + old_dialect = resource.get("dialect") if old_dialect is None: dialect = None @@ -1172,101 +1173,6 @@ def parse_licence_including_former_structure(licences_element): ) return metadata - def assert_1_5_metastring(self, json_string: str): - """Checks string conformity to OEP Metadata Standard Version 1.5 - - Parameters - ---------- - json_string: str - The JSON string to be checked. - - Returns - ------- - bool - True if valid, Raises Exception otherwise. - """ - - keys = [ - "title", - "description", - "language", - "spatial", - "temporal", - "sources", - "license", - "contributions", - "resources", - "metadata_version", - ] - subkeys_spatial = ["location", "extent", "resolution"] - subkeys_timeseries = [ - "start", - "end", - "resolution", - "alignment", - "aggregationType", - ] - subkeys_temporal = ["reference_date", "timeseries"] - subkeys_license = ["id", "name", "version", "url", "instruction", "copyright"] - object_subkeys = { - "spatial": subkeys_spatial, - "temporal": subkeys_temporal, - "license": subkeys_license, - } - subkeys_sources = [ - "name", - "description", - "url", - "license", - "copyright", - ] # in list of objects - subkeys_contributors = [ - "name", - "email", - "date", - "comment", - ] # in list of objects - subkeys_resources = ["name", "format", "fields"] # in list of objects - list_subkeys = { - "sources": subkeys_sources, - "contributions": subkeys_contributors, - "resources": subkeys_resources, - } - subkeys_resources_fields = ["name", "description", "unit"] # in list of objects - - json_dict = json.loads(json_string) - try: - # check if all top level keys are present - for i in keys: - if not i in json_dict.keys(): - raise Exception( - 'The String did not contain the key "{0}"'.format(i) - ) - # check for all keys in second level objects - for key in object_subkeys: - for subkey in object_subkeys[key]: - if not subkey in json_dict[key]: - raise Exception( - 'The "{0}" object did not contain a "{1}" key'.format( - key, subkey - ) - ) - # check for all objects in lists if they contain all required keys - for key in list_subkeys: - for list_element in json_dict[key]: - for subkey in list_subkeys[key]: - if not subkey in list_element: - raise Exception( - 'An object in "{0}" is missing a "{1}" key'.format( - key, subkey - ) - ) - except Exception as error: - print( - "The input String does not conform to metadatastring version 1.3 standard" - ) - print(error) - # TODO make function check all subkeys as well def has_rogue_keys(self, json_string): """Checks all keys if they are part of the metadata specification. Gives warnings if not. From 3d7397e431b1a7978d182438abc1e1768e15d97d Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Fri, 25 Nov 2022 20:44:18 +0100 Subject: [PATCH 14/28] add missing vistor for licences objects in oemv15 structure --- src/omi/dialects/oep/compiler.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/omi/dialects/oep/compiler.py b/src/omi/dialects/oep/compiler.py index fc8be78..d79ee4c 100644 --- a/src/omi/dialects/oep/compiler.py +++ b/src/omi/dialects/oep/compiler.py @@ -302,6 +302,13 @@ def visit_temporal(self, temporal: oem_v15.Temporal, *args, **kwargs): ("referenceDate", self._compile_date(temporal.reference_date, "%Y-%m-%d")), ("timeseries", temporal.timeseries_collection), ) + + def visit_license(self, lic: oem_v15.License, *args, **kwargs): + return self._construct_dict( + ("name", lic.name), + ("title", lic.title), + ("path", lic.path), + ) def visit_isAbout(self, isAbout: oem_v15.IsAbout, *args, **kwargs): return self._construct_dict(("name", isAbout.name), ("path", isAbout.path)) @@ -378,4 +385,5 @@ def visit_metadata(self, metadata: oem_v15.OEPMetadata, *args, **kwargs): null="If not applicable use: null", todo="If a value is not yet available, use: todo", ), + **kwargs ) From 58e1de1e5b04c31988b2cc463f5846ba8471acd8 Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Sun, 27 Nov 2022 15:22:59 +0100 Subject: [PATCH 15/28] run isort --- src/omi/dialects/oep/parser.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/omi/dialects/oep/parser.py b/src/omi/dialects/oep/parser.py index 1c148dd..51657d6 100644 --- a/src/omi/dialects/oep/parser.py +++ b/src/omi/dialects/oep/parser.py @@ -8,7 +8,6 @@ import jsonschema from dateutil.parser import parse as parse_date from jsonschema import ValidationError - # oemetadata from metadata.latest.schema import OEMETADATA_LATEST_SCHEMA from metadata.v130.schema import OEMETADATA_V130_SCHEMA @@ -54,7 +53,9 @@ def create_report_json( class JSONParser(Parser): - # one_schema_was_valid = False + + def normalize_key_names_of_input(iput: dict): + pass def load_string(self, string: str, *args, **kwargs): return json.loads(string) From cce28370fe8978a39562f74f3a4d5900304f727a Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Sun, 27 Nov 2022 15:24:46 +0100 Subject: [PATCH 16/28] remove print --- src/omi/dialects/oep/parser.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/omi/dialects/oep/parser.py b/src/omi/dialects/oep/parser.py index 51657d6..2bf5465 100644 --- a/src/omi/dialects/oep/parser.py +++ b/src/omi/dialects/oep/parser.py @@ -132,9 +132,6 @@ def get_schema_by_metadata_version(self, metadata: dict): "Metadata does not contain the expected 'metaMetadata' or 'metadata_version' key. Fallback to latest schema." ) schema = OEMETADATA_LATEST_SCHEMA - - print(schema.get("$id")) - return schema def validate(self, metadata: dict, schema: dict = None): From 1b6f98fb24e6b9f33a45c1a2b856603e041d4b3c Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Sun, 27 Nov 2022 17:42:02 +0100 Subject: [PATCH 17/28] add info on how to add additional key-value pairs to the oemetadata object: the additional keys are not included in the oemetadata specification --- README.rst | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/README.rst b/README.rst index 8bcd339..962e101 100644 --- a/README.rst +++ b/README.rst @@ -147,6 +147,27 @@ Module usage:: schema = ... get a schema or import form oemetadata module parser.is_valid(metadata, schema) +**Additional Fields - not related to the OEMetadata speification** + +Sometimes it is necessary to store additional key-value pairs along with the keys included in the OEMetadata specification. +OMI's compiler methods are capable of handling additional arguments or key-value arguments, but this must be +be explicitly specified. To add additional key-value pairs, you must: + +1 Parse the oemetadata json file into omis internal structure:: + + from omi.dialects.oep.dialect import OEP_V_1_5_Dialect + + min_inp = '{"id":"unique_id"} # or read from json file + minimal_oemetadata15 = OEP_V_1_5_Dialect.parse(min_inp) + +2 Now you can get(from json file)/define the additional key-value data:: + + data = "test" + +3 And add it to the OEMetadata object that was parsed in step 1:: + + compiled = OEP_V_1_5_Dialect.compile(minimal_oemetadata15, _additionalField=data) + rendered = OEP_V_1_5_Dialect.render(compiled) Development =========== From 299dce8dff8dafe1bc766f99f65c6cc85f026f16 Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Sun, 27 Nov 2022 17:48:44 +0100 Subject: [PATCH 18/28] improve text --- README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 962e101..80c32a3 100644 --- a/README.rst +++ b/README.rst @@ -153,18 +153,18 @@ Sometimes it is necessary to store additional key-value pairs along with the key OMI's compiler methods are capable of handling additional arguments or key-value arguments, but this must be be explicitly specified. To add additional key-value pairs, you must: -1 Parse the oemetadata json file into omis internal structure:: +1 Parse the oemetadata from json file / variable into omis internal structure:: from omi.dialects.oep.dialect import OEP_V_1_5_Dialect min_inp = '{"id":"unique_id"} # or read from json file minimal_oemetadata15 = OEP_V_1_5_Dialect.parse(min_inp) -2 Now you can get(from json file)/define the additional key-value data:: +2 Now you can get(from json file)/define the additional data:: data = "test" -3 And add it to the OEMetadata object that was parsed in step 1:: +3 And add it to the OEMetadata object that was parsed in step 1 by ading a key-value argument:: compiled = OEP_V_1_5_Dialect.compile(minimal_oemetadata15, _additionalField=data) rendered = OEP_V_1_5_Dialect.render(compiled) From 3e69e644149554c94d8fca7a0ec7a3c1c6193711 Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Sun, 27 Nov 2022 17:49:21 +0100 Subject: [PATCH 19/28] improve text --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 80c32a3..769bc5a 100644 --- a/README.rst +++ b/README.rst @@ -147,7 +147,7 @@ Module usage:: schema = ... get a schema or import form oemetadata module parser.is_valid(metadata, schema) -**Additional Fields - not related to the OEMetadata speification** +**Additional Fields - not related to the OEMetadata specification** Sometimes it is necessary to store additional key-value pairs along with the keys included in the OEMetadata specification. OMI's compiler methods are capable of handling additional arguments or key-value arguments, but this must be From 13d96faf364efc8dd75a18253ce0d79cfbd00a55 Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Sun, 27 Nov 2022 20:05:01 +0100 Subject: [PATCH 20/28] update changelog --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c00ab07..e6cedd5 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,7 +4,7 @@ Changelog current (2022-XX-XX) -------------------- -* update parser for v15 to handle former v13 key names (PR#77) +* update parser for v15 to handle former v13 key names, also update outdated License (data-)class in oem_v15 structure. (PR#77) 0.1.0 (2022-11-18) -------------------- From e11a29f2df500e12878a20e5a88ae3b5e099e963 Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Sun, 27 Nov 2022 20:24:56 +0100 Subject: [PATCH 21/28] add note to the developer how to handle additional key value data that is not parsed by omi's parser method --- README.rst | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 769bc5a..37c37a8 100644 --- a/README.rst +++ b/README.rst @@ -151,7 +151,12 @@ Module usage:: Sometimes it is necessary to store additional key-value pairs along with the keys included in the OEMetadata specification. OMI's compiler methods are capable of handling additional arguments or key-value arguments, but this must be -be explicitly specified. To add additional key-value pairs, you must: +be explicitly specified. + +To add additional key-value pairs, you must: + + NOTE: If you save the renderer return value in a json file and try to parse the file, the extra field is not included. + You must read the json file using Python and then add the extra field back oemetadata object as shown below. 1 Parse the oemetadata from json file / variable into omis internal structure:: From ef0008bff037f567737061e005c3099e9530c6a9 Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Sun, 27 Nov 2022 22:54:24 +0100 Subject: [PATCH 22/28] make save report activate/deactivateable by function interface, - update available schema list as new oemetadata patch version was released - update missing is_valid method in version specific parser classes - return the report after validate() was called --- src/omi/dialects/oep/parser.py | 74 ++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 16 deletions(-) diff --git a/src/omi/dialects/oep/parser.py b/src/omi/dialects/oep/parser.py index 2bf5465..94546ad 100644 --- a/src/omi/dialects/oep/parser.py +++ b/src/omi/dialects/oep/parser.py @@ -23,6 +23,7 @@ ALL_OEM_SCHEMAS = [ OEMETADATA_LATEST_SCHEMA, + OEMETADATA_V151_SCHEMA, OEMETADATA_V150_SCHEMA, OEMETADATA_V141_SCHEMA, OEMETADATA_V140_SCHEMA, @@ -134,7 +135,7 @@ def get_schema_by_metadata_version(self, metadata: dict): schema = OEMETADATA_LATEST_SCHEMA return schema - def validate(self, metadata: dict, schema: dict = None): + def validate(self, metadata: dict, schema: dict = None, save_report=True): """ Check whether the given dictionary adheres to the the json-schema and oemetadata specification. If errors are found a jsonschema error @@ -169,7 +170,10 @@ def validate(self, metadata: dict, schema: dict = None): } report.append(error_dict) - create_report_json(report) + if save_report: + create_report_json(report) + + return report def is_valid(self, inp: dict, schema): @@ -192,15 +196,25 @@ def is_valid(self, inp: dict, schema): class JSONParser_1_3(JSONParser): - def is_valid(self, inp: str): - if not super(self, JSONParser_1_3).is_valid(inp): - return False - try: - self.assert_1_3_metastring(inp) - except: - return False + def is_valid(self, inp: dict, schema=OEMETADATA_V130_SCHEMA): + + # 1 - valid JSON? + if isinstance(inp, str): + try: + jsn = json.loads(inp, encode="utf-8") + except ValueError: + return False else: + jsn = inp + + # 2 - valid OEMETADATA + try: + validator = self.get_json_validator(schema) + validator.validate(jsn) return True + except ValidationError: + return False + def parse(self, json_old, *args, **kwargs): # context section @@ -333,15 +347,24 @@ def parse(self, json_old, *args, **kwargs): class JSONParser_1_4(JSONParser): - def is_valid(self, inp: str): - if not super(self, JSONParser_1_4).is_valid(inp): - return False - try: - self.assert_1_3_metastring(inp) - except: - return False + def is_valid(self, inp: dict, schema=OEMETADATA_V141_SCHEMA): + + # 1 - valid JSON? + if isinstance(inp, str): + try: + jsn = json.loads(inp, encode="utf-8") + except ValueError: + return False else: + jsn = inp + + # 2 - valid OEMETADATA + try: + validator = self.get_json_validator(schema) + validator.validate(jsn) return True + except ValidationError: + return False def parse_term_of_use(self, old_license: dict): return structure.TermsOfUse( @@ -727,6 +750,25 @@ def get_table_name(self, metadata_file): class JSONParser_1_5(JSONParser): + def is_valid(self, inp: dict, schema=OEMETADATA_LATEST_SCHEMA): + + # 1 - valid JSON? + if isinstance(inp, str): + try: + jsn = json.loads(inp, encode="utf-8") + except ValueError: + return False + else: + jsn = inp + + # 2 - valid OEMETADATA + try: + validator = self.get_json_validator(schema) + validator.validate(jsn) + return True + except ValidationError: + return False + def parse_from_string( self, string: str, From 3843a5aa06841f35350dcce8740b3c7bf8234ab2 Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Tue, 29 Nov 2022 11:54:23 +0100 Subject: [PATCH 23/28] udpate changelog --- CHANGELOG.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index e6cedd5..026a0ad 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -5,6 +5,7 @@ Changelog current (2022-XX-XX) -------------------- * update parser for v15 to handle former v13 key names, also update outdated License (data-)class in oem_v15 structure. (PR#77) +* change the validation to return a report and enable report file creation option to the arguments of validation method. (PR#81) 0.1.0 (2022-11-18) -------------------- From aabc0a3401539b542bd2f4a5a63a9fa6849f330b Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Tue, 29 Nov 2022 14:12:19 +0100 Subject: [PATCH 24/28] remove print --- src/omi/dialects/oep/parser.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/omi/dialects/oep/parser.py b/src/omi/dialects/oep/parser.py index 94546ad..ca3a69b 100644 --- a/src/omi/dialects/oep/parser.py +++ b/src/omi/dialects/oep/parser.py @@ -1022,7 +1022,6 @@ def parse_licence_including_former_structure(licences_element): # filling the license section old_licenses = parse_old_licenses_including_former_key_names(element=json_old) - print(old_licenses) if old_licenses is None: licenses = None else: From f737453f42bf9c9494a4ef12eebc66f2d27f2f2d Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Tue, 29 Nov 2022 15:28:45 +0100 Subject: [PATCH 25/28] deactivate error prone code --- src/omi/dialects/oep/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/omi/dialects/oep/parser.py b/src/omi/dialects/oep/parser.py index ca3a69b..da6555a 100644 --- a/src/omi/dialects/oep/parser.py +++ b/src/omi/dialects/oep/parser.py @@ -162,7 +162,7 @@ def validate(self, metadata: dict, schema: dict = None, save_report=True): # https://python-jsonschema.readthedocs.io/en/stable/errors/#handling-validation-errors error_dict = { "oemetadata schema version": schema.get("$id"), - "json path": error.absolute_path, + # "json path": error.absolute_path, "instance path": [i for i in error.absolute_path], "value that raised the error": error.instance, "error message": error.message, From db1fffb17fc516c38aac9ad56db264e3673d4d8f Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Tue, 29 Nov 2022 15:33:24 +0100 Subject: [PATCH 26/28] update changelog for release v0.1.1 --- CHANGELOG.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 026a0ad..bca3685 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,6 +4,9 @@ Changelog current (2022-XX-XX) -------------------- + +0.1.1 (2022-11-29) +-------------------- * update parser for v15 to handle former v13 key names, also update outdated License (data-)class in oem_v15 structure. (PR#77) * change the validation to return a report and enable report file creation option to the arguments of validation method. (PR#81) From b819ca1418662e31ffc05883022759e5b6f818ea Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Tue, 29 Nov 2022 15:36:47 +0100 Subject: [PATCH 27/28] raise omi version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 79ef8ea..ea05d3b 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ def read(*names, **kwargs): setup( name="omi", - version="0.1.0", + version="0.1.1", license="AGPL-3.0", description="A library to process and translate open energy metadata.", long_description="%s\n%s" From 12465bd8f6e1a5be65baec176a2005f5bb1dd752 Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Tue, 29 Nov 2022 16:24:38 +0100 Subject: [PATCH 28/28] remove error prone code - full type hints not supported by python 3.8 and this triggers errors in the OEP CI --- src/omi/dialects/oep/parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/omi/dialects/oep/parser.py b/src/omi/dialects/oep/parser.py index da6555a..7e701ea 100644 --- a/src/omi/dialects/oep/parser.py +++ b/src/omi/dialects/oep/parser.py @@ -39,7 +39,7 @@ def parse_date_or_none(x, *args, **kwargs): def create_report_json( - error_data: list[dict], + error_data, # type list[dict] save_at: pathlib.Path = "reports/", filename: str = "report.json", ): @@ -795,7 +795,7 @@ def parse_from_string( ) def get_any_value_not_none( - self, element: dict, keys: list[str], get_return_default=None + self, element: dict, keys, get_return_default=None #keys: list[str] - reove as not support by py3.8 ): """ Get the value for a key in a dict - but try multiple key names, in