diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 2ef7568..bca3685 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -5,6 +5,11 @@ Changelog current (2022-XX-XX) -------------------- +0.1.1 (2022-11-29) +-------------------- +* update parser for v15 to handle former v13 key names, also update outdated License (data-)class in oem_v15 structure. (PR#77) +* change the validation to return a report and enable report file creation option to the arguments of validation method. (PR#81) + 0.1.0 (2022-11-18) -------------------- * Add validation and helper functionality - validation based on json schema and the oemetadata schema files that are published for each release (PR#63) diff --git a/README.rst b/README.rst index 8bcd339..37c37a8 100644 --- a/README.rst +++ b/README.rst @@ -147,6 +147,32 @@ Module usage:: schema = ... get a schema or import form oemetadata module parser.is_valid(metadata, schema) +**Additional Fields - not related to the OEMetadata specification** + +Sometimes it is necessary to store additional key-value pairs along with the keys included in the OEMetadata specification. +OMI's compiler methods are capable of handling additional arguments or key-value arguments, but this must be +be explicitly specified. + +To add additional key-value pairs, you must: + + NOTE: If you save the renderer return value in a json file and try to parse the file, the extra field is not included. + You must read the json file using Python and then add the extra field back oemetadata object as shown below. + +1 Parse the oemetadata from json file / variable into omis internal structure:: + + from omi.dialects.oep.dialect import OEP_V_1_5_Dialect + + min_inp = '{"id":"unique_id"} # or read from json file + minimal_oemetadata15 = OEP_V_1_5_Dialect.parse(min_inp) + +2 Now you can get(from json file)/define the additional data:: + + data = "test" + +3 And add it to the OEMetadata object that was parsed in step 1 by ading a key-value argument:: + + compiled = OEP_V_1_5_Dialect.compile(minimal_oemetadata15, _additionalField=data) + rendered = OEP_V_1_5_Dialect.render(compiled) Development =========== diff --git a/setup.py b/setup.py index 79ef8ea..ea05d3b 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ def read(*names, **kwargs): setup( name="omi", - version="0.1.0", + version="0.1.1", license="AGPL-3.0", description="A library to process and translate open energy metadata.", long_description="%s\n%s" diff --git a/src/omi/dialects/oep/compiler.py b/src/omi/dialects/oep/compiler.py index 23ec510..d79ee4c 100644 --- a/src/omi/dialects/oep/compiler.py +++ b/src/omi/dialects/oep/compiler.py @@ -241,7 +241,7 @@ class JSONCompilerOEM15(JSONCompiler): the metadata structure. """ - __METADATA_VERSION = "OEP-1.5.1" + __METADATA_VERSION = "OEP-1.5.2" def visit(self, obj, *args, **kwargs): """ @@ -302,6 +302,13 @@ def visit_temporal(self, temporal: oem_v15.Temporal, *args, **kwargs): ("referenceDate", self._compile_date(temporal.reference_date, "%Y-%m-%d")), ("timeseries", temporal.timeseries_collection), ) + + def visit_license(self, lic: oem_v15.License, *args, **kwargs): + return self._construct_dict( + ("name", lic.name), + ("title", lic.title), + ("path", lic.path), + ) def visit_isAbout(self, isAbout: oem_v15.IsAbout, *args, **kwargs): return self._construct_dict(("name", isAbout.name), ("path", isAbout.path)) @@ -378,4 +385,5 @@ def visit_metadata(self, metadata: oem_v15.OEPMetadata, *args, **kwargs): null="If not applicable use: null", todo="If a value is not yet available, use: todo", ), + **kwargs ) diff --git a/src/omi/dialects/oep/parser.py b/src/omi/dialects/oep/parser.py index 78251b0..7e701ea 100644 --- a/src/omi/dialects/oep/parser.py +++ b/src/omi/dialects/oep/parser.py @@ -23,6 +23,7 @@ ALL_OEM_SCHEMAS = [ OEMETADATA_LATEST_SCHEMA, + OEMETADATA_V151_SCHEMA, OEMETADATA_V150_SCHEMA, OEMETADATA_V141_SCHEMA, OEMETADATA_V140_SCHEMA, @@ -38,7 +39,7 @@ def parse_date_or_none(x, *args, **kwargs): def create_report_json( - error_data: list[dict], + error_data, # type list[dict] save_at: pathlib.Path = "reports/", filename: str = "report.json", ): @@ -53,7 +54,9 @@ def create_report_json( class JSONParser(Parser): - # one_schema_was_valid = False + + def normalize_key_names_of_input(iput: dict): + pass def load_string(self, string: str, *args, **kwargs): return json.loads(string) @@ -130,12 +133,9 @@ def get_schema_by_metadata_version(self, metadata: dict): "Metadata does not contain the expected 'metaMetadata' or 'metadata_version' key. Fallback to latest schema." ) schema = OEMETADATA_LATEST_SCHEMA - - print(schema.get("$id")) - return schema - def validate(self, metadata: dict, schema: dict = None): + def validate(self, metadata: dict, schema: dict = None, save_report=True): """ Check whether the given dictionary adheres to the the json-schema and oemetadata specification. If errors are found a jsonschema error @@ -162,7 +162,7 @@ def validate(self, metadata: dict, schema: dict = None): # https://python-jsonschema.readthedocs.io/en/stable/errors/#handling-validation-errors error_dict = { "oemetadata schema version": schema.get("$id"), - "json path": error.absolute_path, + # "json path": error.absolute_path, "instance path": [i for i in error.absolute_path], "value that raised the error": error.instance, "error message": error.message, @@ -170,7 +170,10 @@ def validate(self, metadata: dict, schema: dict = None): } report.append(error_dict) - create_report_json(report) + if save_report: + create_report_json(report) + + return report def is_valid(self, inp: dict, schema): @@ -193,15 +196,25 @@ def is_valid(self, inp: dict, schema): class JSONParser_1_3(JSONParser): - def is_valid(self, inp: str): - if not super(self, JSONParser_1_3).is_valid(inp): - return False - try: - self.assert_1_3_metastring(inp) - except: - return False + def is_valid(self, inp: dict, schema=OEMETADATA_V130_SCHEMA): + + # 1 - valid JSON? + if isinstance(inp, str): + try: + jsn = json.loads(inp, encode="utf-8") + except ValueError: + return False else: + jsn = inp + + # 2 - valid OEMETADATA + try: + validator = self.get_json_validator(schema) + validator.validate(jsn) return True + except ValidationError: + return False + def parse(self, json_old, *args, **kwargs): # context section @@ -334,15 +347,24 @@ def parse(self, json_old, *args, **kwargs): class JSONParser_1_4(JSONParser): - def is_valid(self, inp: str): - if not super(self, JSONParser_1_4).is_valid(inp): - return False - try: - self.assert_1_3_metastring(inp) - except: - return False + def is_valid(self, inp: dict, schema=OEMETADATA_V141_SCHEMA): + + # 1 - valid JSON? + if isinstance(inp, str): + try: + jsn = json.loads(inp, encode="utf-8") + except ValueError: + return False else: + jsn = inp + + # 2 - valid OEMETADATA + try: + validator = self.get_json_validator(schema) + validator.validate(jsn) return True + except ValidationError: + return False def parse_term_of_use(self, old_license: dict): return structure.TermsOfUse( @@ -728,6 +750,25 @@ def get_table_name(self, metadata_file): class JSONParser_1_5(JSONParser): + def is_valid(self, inp: dict, schema=OEMETADATA_LATEST_SCHEMA): + + # 1 - valid JSON? + if isinstance(inp, str): + try: + jsn = json.loads(inp, encode="utf-8") + except ValueError: + return False + else: + jsn = inp + + # 2 - valid OEMETADATA + try: + validator = self.get_json_validator(schema) + validator.validate(jsn) + return True + except ValidationError: + return False + def parse_from_string( self, string: str, @@ -753,19 +794,47 @@ def parse_from_string( **(parse_kwargs or {}), ) + def get_any_value_not_none( + self, element: dict, keys, get_return_default=None #keys: list[str] - reove as not support by py3.8 + ): + """ + Get the value for a key in a dict - but try multiple key names, in + case they have changed in eralryer oemetadata versions. + + Args: + element (dict): dict element of the input metadata + keys (list[str]): list of key name options + get_return_default (_type_, optional): A default return vlaue if key is not present. Defaults to None. + + Returns: + any: By default it is the value at the key or None - but can be any as the value is not strict. + """ + + for key_name in keys: + _element = element.get(key_name, get_return_default) + if _element is None: + continue + + return _element + def parse_term_of_use(self, old_license: dict): return oem_v15.TermsOfUse( lic=oem_v15.License( - identifier=old_license.get("name"), - name=old_license.get("title"), + name=old_license.get("name"), + title=old_license.get("title"), path=old_license.get("path"), ), instruction=old_license.get("instruction"), attribution=old_license.get("attribution"), ) - def parse_timeseries(self, old_timeseries: dict): - pass + def ensure_json_keys_lowercase(json_old: dict): + element = json_old # element must be part of json_old not hole json_old + if isinstance(element, dict): + pass + + if isinstance(element, list): + pass def parse(self, json_old: dict, *args, **kwargs): """_summary_ @@ -862,32 +931,103 @@ def parse(self, json_old: dict, *args, **kwargs): timeseries_collection=timeseries, ) + def parse_sources_lincese_including_former_key_names(element: dict): + licenses_new = "licenses" + licenses_old = "license" + + if isinstance(element.get(licenses_new), list): + return [self.parse_term_of_use(l) for l in element.get(licenses_new)] + + if isinstance(element.get(licenses_old), str): + name = element.get(licenses_old) + + # avoide empty structures like [{}] + if name is None: + _result = [] + else: + from_13_license = oem_v15.License(name=name) + _result = [oem_v15.TermsOfUse(lic=from_13_license)] + + return _result + + def parse_source_including_former_key_names(key: dict): + # sources key name options - including key names pre oem v1.4 + key_name_options = { + "title_equal": ["title", "name"], + "path_equal": ["path", "url"], + "licenses_equal": ["licenses", "license"], + } + + source = oem_v15.Source( + title=self.get_any_value_not_none( + element=key, keys=key_name_options.get("title_equal") + ), + description=key.get("description"), + path=self.get_any_value_not_none( + element=key, keys=key_name_options.get("path_equal") + ), + licenses=parse_sources_lincese_including_former_key_names(element=key), + ) + + return source + # filling the source section - old_sources = json_old.get("sources") + # expected to be a list but can also be a dict in old versions + old_sources: list = json_old.get("sources") if old_sources is None: sources = None else: sources = [ - oem_v15.Source( - title=old_source.get("title"), - description=old_source.get("description"), - path=old_source.get("path"), - licenses=[ - self.parse_term_of_use(l) - for l in old_source.get("licenses", []) - ], - ) + parse_source_including_former_key_names(key=old_source) for old_source in old_sources ] + def parse_old_licenses_including_former_key_names(element: dict): + """ + Parse license from imput data - also handle key name variations from + early oemetadata versions. + """ + key_name_options = { + "licenses_equal": ["licenses", "license"], + } + + return self.get_any_value_not_none( + element, key_name_options.get("licenses_equal") + ) + + def parse_licence_including_former_structure(licences_element): + """ + The lincences key was got a structural differnece in former oemetada versions. + In Version 1.3 the key was called lincense and was a singe object/dict, in the + current version this key is calles licences and is a list of objects/dicts. + Also the key names in the dicht are deviating. + """ + if isinstance(licences_element, list): + _result = [ + self.parse_term_of_use(old_license) for old_license in old_licenses + ] + + if isinstance(licences_element, dict): + _mapping_former_keys = { + "name": licences_element.get("id"), + "title": licences_element.get("name"), + "path": licences_element.get("url"), + "instruction": licences_element.get("instruction"), + "attribution": licences_element.get("copyright"), + } + + _result = [self.parse_term_of_use(old_license=_mapping_former_keys)] + + return _result + # filling the license section - old_licenses = json_old.get("licenses") + old_licenses = parse_old_licenses_including_former_key_names(element=json_old) if old_licenses is None: licenses = None else: - licenses = [ - self.parse_term_of_use(old_license) for old_license in old_licenses - ] + licenses = parse_licence_including_former_structure( + licences_element=old_licenses + ) # filling the contributers section old_contributors = json_old.get("contributors") @@ -897,7 +1037,9 @@ def parse(self, json_old: dict, *args, **kwargs): contributors = [ oem_v15.Contribution( contributor=oem_v15.Person( - name=old_contributor.get("title"), + name=self.get_any_value_not_none( + element=old_contributor, keys=["title", "name"] + ), email=old_contributor.get("email"), ), date=parse_date_or_none(old_contributor.get("date")), @@ -914,7 +1056,7 @@ def parse(self, json_old: dict, *args, **kwargs): # Code added to raise exception when resource is empty else: if len(old_resources) == 0: - raise ParserException("Resource field doesn't have any child entity") + raise ParserException("Resources field is empty!") resources = [] for resource in old_resources: old_schema = resource.get("schema") @@ -925,6 +1067,7 @@ def parse(self, json_old: dict, *args, **kwargs): old_fields = old_schema.get("fields") if old_fields is None: fields = None + logging.info(f"Parse fields from: {old_fields}") else: fields = [] @@ -1002,6 +1145,7 @@ def parse(self, json_old: dict, *args, **kwargs): primary_key=resource["schema"].get("primaryKey"), foreign_keys=foreign_keys, ) + old_dialect = resource.get("dialect") if old_dialect is None: dialect = None @@ -1068,101 +1212,6 @@ def parse(self, json_old: dict, *args, **kwargs): ) return metadata - def assert_1_5_metastring(self, json_string: str): - """Checks string conformity to OEP Metadata Standard Version 1.5 - - Parameters - ---------- - json_string: str - The JSON string to be checked. - - Returns - ------- - bool - True if valid, Raises Exception otherwise. - """ - - keys = [ - "title", - "description", - "language", - "spatial", - "temporal", - "sources", - "license", - "contributions", - "resources", - "metadata_version", - ] - subkeys_spatial = ["location", "extent", "resolution"] - subkeys_timeseries = [ - "start", - "end", - "resolution", - "alignment", - "aggregationType", - ] - subkeys_temporal = ["reference_date", "timeseries"] - subkeys_license = ["id", "name", "version", "url", "instruction", "copyright"] - object_subkeys = { - "spatial": subkeys_spatial, - "temporal": subkeys_temporal, - "license": subkeys_license, - } - subkeys_sources = [ - "name", - "description", - "url", - "license", - "copyright", - ] # in list of objects - subkeys_contributors = [ - "name", - "email", - "date", - "comment", - ] # in list of objects - subkeys_resources = ["name", "format", "fields"] # in list of objects - list_subkeys = { - "sources": subkeys_sources, - "contributions": subkeys_contributors, - "resources": subkeys_resources, - } - subkeys_resources_fields = ["name", "description", "unit"] # in list of objects - - json_dict = json.loads(json_string) - try: - # check if all top level keys are present - for i in keys: - if not i in json_dict.keys(): - raise Exception( - 'The String did not contain the key "{0}"'.format(i) - ) - # check for all keys in second level objects - for key in object_subkeys: - for subkey in object_subkeys[key]: - if not subkey in json_dict[key]: - raise Exception( - 'The "{0}" object did not contain a "{1}" key'.format( - key, subkey - ) - ) - # check for all objects in lists if they contain all required keys - for key in list_subkeys: - for list_element in json_dict[key]: - for subkey in list_subkeys[key]: - if not subkey in list_element: - raise Exception( - 'An object in "{0}" is missing a "{1}" key'.format( - key, subkey - ) - ) - except Exception as error: - print( - "The input String does not conform to metadatastring version 1.3 standard" - ) - print(error) - # TODO make function check all subkeys as well def has_rogue_keys(self, json_string): """Checks all keys if they are part of the metadata specification. Gives warnings if not. diff --git a/src/omi/oem_structures/oem_v15.py b/src/omi/oem_structures/oem_v15.py index 9a1916e..866aa48 100644 --- a/src/omi/oem_structures/oem_v15.py +++ b/src/omi/oem_structures/oem_v15.py @@ -96,18 +96,18 @@ class License(Compilable): def __init__( self, name: str = None, - identifier: str = None, - text: str = None, + title: str = None, path: str = None, - other_references: Iterable[str] = None, - comment: str = None, + # instruction: str = None, + # attribution: str = None, + # other_references: Iterable[str] = None, ): self.name = name + self.title = title self.path = path - self.identifier = identifier - self.other_references = other_references - self.text = text - self.comment = comment + # self.instruction = instruction + # self.attribution = attribution + # self.other_references = other_references @staticmethod def instance_name_from_id(identifier: str): @@ -122,9 +122,10 @@ class TermsOfUse(Compilable): def __init__( self, instruction: str = None, attribution: str = None, lic: License = None ): + self.license = lic self.instruction = instruction self.attribution = attribution - self.license = lic + class Source(Compilable): diff --git a/tests/data/metadata_v15.json b/tests/data/metadata_v15.json index f902875..81893a8 100644 --- a/tests/data/metadata_v15.json +++ b/tests/data/metadata_v15.json @@ -248,7 +248,7 @@ "badge": "Platinum" }, "metaMetadata": { - "metadataVersion": "OEP-1.5.1", + "metadataVersion": "OEP-1.5.2", "metadataLicense": { "name": "CC0-1.0", "title": "Creative Commons Zero v1.0 Universal", diff --git a/tests/test_dialects/internal_structures.py b/tests/test_dialects/internal_structures.py index e05dc8c..97d9f81 100644 --- a/tests/test_dialects/internal_structures.py +++ b/tests/test_dialects/internal_structures.py @@ -359,6 +359,19 @@ ) ############################################### oem v151 ######################################################### +cc010_v15 = oem_v15.License( + name="CC0-1.0", + title="Creative Commons Zero v1.0 Universal", + path="https://creativecommons.org/publicdomain/zero/1.0/legalcode", +) + +odbl10_v15 = oem_v15.License( + name="ODbL-1.0", + title="Open Data Commons Open Database License 1.0", + path="https://opendatacommons.org/licenses/odbl/1.0/", +) + + metadata_v_1_5 = oem_v15.OEPMetadata( name="oep_metadata_table_example_v151", title="Example title for metadata example - Version 1.5.1", @@ -446,7 +459,7 @@ path="https://github.com/OpenEnergyPlatform", licenses=[ oem_v15.TermsOfUse( - lic=cc010, + lic=cc010_v15, instruction="You are free: To Share, To Create, To Adapt", attribution="© Reiner Lemoine Institut", ) @@ -458,7 +471,7 @@ path="https://www.openstreetmap.org/", licenses=[ oem_v15.TermsOfUse( - lic=odbl10, + lic=odbl10_v15, instruction="You are free: To Share, To Create, To Adapt; As long as you: Attribute, Share-Alike, Keep open!", attribution="© OpenStreetMap contributors", ) @@ -467,7 +480,7 @@ ], terms_of_use=[ oem_v15.TermsOfUse( - lic=odbl10, + lic=odbl10_v15, instruction="You are free: To Share, To Create, To Adapt; As long as you: Attribute, Share-Alike, Keep open!", attribution="© Reiner Lemoine Institut © OpenStreetMap contributors", )