Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reorder compiled to make results more consistent with the oemetadata schema #96

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Changelog

current (2024-XX-XX)
--------------------
*
* Reorder metadata fields after the json input was compiled & prevent removing context fields if they are Null (#96)[https://github.com/OpenEnergyPlatform/omi/pull/96]

0.2.0 (2024-01-25)
--------------------
Expand Down
30 changes: 21 additions & 9 deletions src/omi/dialects/oep/compiler.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datetime
import logging

from omi import structure
from omi.dialects.base.compiler import Compiler
Expand Down Expand Up @@ -51,22 +52,33 @@ def visit_context(self, context: structure.Context, *args, **kwargs):
("grantNo", context.grant_number),
)
if context.funding_agency is not None:
if context.funding_agency.name is not None:
result["fundingAgency"] = context.funding_agency.name
if context.funding_agency.logo is not None:
result["fundingAgencyLogo"] = context.funding_agency.logo
result["fundingAgency"] = context.funding_agency.name
result["fundingAgencyLogo"] = context.funding_agency.logo
if context.publisher is not None:
result["publisherLogo"] = context.publisher.logo
logging.info(
f"The context is parsed from file with the following values: {context}"
)
logging.info(
f"The context class is compiled to the following python dict: {result}"
)
return result

def visit_contribution(self, contribution: structure.Contribution, *args, **kwargs):
return self._construct_dict(
result = self._construct_dict(
("title", contribution.contributor.name),
("email", contribution.contributor.email),
("object", contribution.object),
("comment", contribution.comment),
("date", compile_date_or_none(contribution.date, "%Y-%m-%d")),
)
logging.info(
f"The contributions are parsed from file with the following values: {contribution}"
)
logging.info(
f"The contributions class is compiled to the following python dict: {result}"
)
return result

def visit_language(self, language: structure.Language, *args, **kwargs):
return str(language)
Expand Down Expand Up @@ -133,7 +145,7 @@ def visit_terms_of_use(self, terms_of_use: structure.TermsOfUse):
return self._construct_dict(
("instruction", terms_of_use.instruction),
("attribution", terms_of_use.attribution),
**license_kwargs
**license_kwargs,
)

def visit_resource(self, resource: structure.Resource, *args, **kwargs):
Expand Down Expand Up @@ -367,20 +379,20 @@ def visit_metadata(self, metadata: oem_v15.OEPMetadata, *args, **kwargs):
("title", metadata.title),
("id", metadata.identifier),
("description", metadata.description),
("language", metadata.languages),
("subject", metadata.subject),
("language", metadata.languages),
("keywords", metadata.keywords),
("publicationDate", publication_date),
("context", metadata.context),
("spatial", metadata.spatial),
("temporal", metadata.temporal),
("review", metadata.review),
("sources", metadata.sources),
("licenses", metadata.license),
("contributors", metadata.contributions),
("resources", metadata.resources),
("@id", metadata.databus_identifier),
("@context", metadata.databus_context),
("review", metadata.review),
metaMetadata=self._construct_dict(
("metadataVersion", self.__METADATA_VERSION),
metadataLicense=self._construct_dict(
Expand All @@ -399,5 +411,5 @@ def visit_metadata(self, metadata: oem_v15.OEPMetadata, *args, **kwargs):
null="If not applicable use: null",
todo="If a value is not yet available, use: todo",
),
**kwargs
**kwargs,
)
76 changes: 44 additions & 32 deletions src/omi/dialects/oep/parser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import json
import logging
import pathlib
Expand Down Expand Up @@ -32,7 +31,7 @@
]


def parse_date_or_none(x):
def parse_date_or_none(x, fieldname=None, element=None):
if x is None:
pass
elif type(x) == int:
Expand All @@ -53,16 +52,22 @@ def parse_date_or_none(x):
try:
date_time = dateutil.parser.parse(x)
except Exception:
raise ParserException(f"invalid value for date: {x}")
raise ParserException(
f"In fields {fieldname} {element} element is a invalid value for date: {x}"
)
if re.match("^[123][0-9]{3}-[0-9]{1,2}-[0-9]{1,2}$", x):
# date only
x = date_time.date()
else:
x = date_time
else:
raise ParserException(f"invalid value for date: {x}")
raise ParserException(
f"In fields {fieldname} {element} element is a invalid value for date: {x}"
)
else:
raise ParserException(f"invalid type for date: {type(x)}")
raise ParserException(
f"In fields {fieldname} {element} element is a invalid type for date: {type(x)}"
)
return x


Expand Down Expand Up @@ -203,11 +208,10 @@ def validate(self, metadata: dict, schema: dict = None, save_report=True):
return report

def is_valid(self, inp: dict, schema):

# 1 - valid JSON?
if isinstance(inp, str):
try:
jsn = json.loads(inp, encode="utf-8")
jsn = json.loads(inp)
except ValueError:
return False
else:
Expand All @@ -224,7 +228,6 @@ def is_valid(self, inp: dict, schema):

class JSONParser_1_3(JSONParser):
def is_valid(self, inp: dict, schema=OEMETADATA_V130_SCHEMA):

# 1 - valid JSON?
if isinstance(inp, str):
try:
Expand Down Expand Up @@ -327,7 +330,9 @@ def parse(self, json_old, *args, **kwargs):
else:
resources = []
if len(old_resources) == 0:
raise ParserException("Resource field doesn't have any child entity")
raise ParserException(
"The field Resource field is empty! Please provide a description of your data resources e.g. a table schema."
)
for resource in old_resources:
old_fields = resource.get("fields")
if old_fields is None:
Expand Down Expand Up @@ -374,7 +379,6 @@ def parse(self, json_old, *args, **kwargs):

class JSONParser_1_4(JSONParser):
def is_valid(self, inp: dict, schema=OEMETADATA_V141_SCHEMA):

# 1 - valid JSON?
if isinstance(inp, str):
try:
Expand Down Expand Up @@ -406,13 +410,14 @@ def parse_term_of_use(self, old_license: dict):
def parse(self, json_old: dict, *args, **kwargs):
# context section
if "id" not in json_old:
raise ParserException("metadata string does not contain an id")
raise ParserException(
"The metadata string does not contain an id. This field is required."
)

inp_context = json_old.get("context")
if inp_context is None:
context = None
else:

funding_agency = None
if "fundingAgency" in inp_context:
funding_agency = structure.Agency(
Expand Down Expand Up @@ -506,11 +511,13 @@ def parse(self, json_old: dict, *args, **kwargs):
name=old_contributor.get("title"),
email=old_contributor.get("email"),
),
date=parse_date_or_none(old_contributor.get("date")),
date=parse_date_or_none(
old_contributor.get("date"), f"{cont_element}contributors.data"
),
obj=old_contributor.get("object"),
comment=old_contributor.get("comment"),
)
for old_contributor in old_contributors
for cont_element, old_contributor in enumerate(old_contributors)
]

# extending with script-user information
Expand Down Expand Up @@ -777,7 +784,6 @@ def get_table_name(self, metadata_file):

class JSONParser_1_5(JSONParser):
def is_valid(self, inp: dict, schema=OEMETADATA_LATEST_SCHEMA):

# 1 - valid JSON?
if isinstance(inp, str):
try:
Expand Down Expand Up @@ -828,12 +834,12 @@ def get_any_value_not_none(
):
"""
Get the value for a key in a dict - but try multiple key names, in
case they have changed in eralryer oemetadata versions.
case they have changed in earlier oemetadata versions.

Args:
element (dict): dict element of the input metadata
keys (list[str]): list of key name options
get_return_default (_type_, optional): A default return vlaue if key is not present. Defaults to None.
get_return_default (_type_, optional): A default return value if key is not present. Defaults to None.

Returns:
any: By default it is the value at the key or None - but can be any as the value is not strict.
Expand Down Expand Up @@ -900,7 +906,6 @@ def parse(self, json_old: dict, *args, **kwargs):
if inp_context is None:
context = None
else:

funding_agency = None
if "fundingAgency" in inp_context:
funding_agency = oem_v15.Agency(
Expand Down Expand Up @@ -1024,25 +1029,25 @@ def parse_old_licenses_including_former_key_names(element: dict):
element, key_name_options.get("licenses_equal")
)

def parse_licence_including_former_structure(licences_element):
def parse_license_including_former_structure(licenses_element):
"""
The lincences key was got a structural differnece in former oemetada versions.
In Version 1.3 the key was called lincense and was a singe object/dict, in the
current version this key is calles licences and is a list of objects/dicts.
Also the key names in the dicht are deviating.
"""
if isinstance(licences_element, list):
if isinstance(licenses_element, list):
_result = [
self.parse_term_of_use(old_license) for old_license in old_licenses
]

if isinstance(licences_element, dict):
if isinstance(licenses_element, dict):
_mapping_former_keys = {
"name": licences_element.get("id"),
"title": licences_element.get("name"),
"path": licences_element.get("url"),
"instruction": licences_element.get("instruction"),
"attribution": licences_element.get("copyright"),
"name": licenses_element.get("id"),
"title": licenses_element.get("name"),
"path": licenses_element.get("url"),
"instruction": licenses_element.get("instruction"),
"attribution": licenses_element.get("copyright"),
}

_result = [self.parse_term_of_use(old_license=_mapping_former_keys)]
Expand All @@ -1054,8 +1059,8 @@ def parse_licence_including_former_structure(licences_element):
if old_licenses is None:
licenses = None
else:
licenses = parse_licence_including_former_structure(
licences_element=old_licenses
licenses = parse_license_including_former_structure(
licenses_element=old_licenses
)

# filling the contributers section
Expand All @@ -1071,11 +1076,13 @@ def parse_licence_including_former_structure(licences_element):
),
email=old_contributor.get("email"),
),
date=parse_date_or_none(old_contributor.get("date")),
date=parse_date_or_none(
old_contributor.get("date"), "contributors.data", cont_element
),
obj=old_contributor.get("object"),
comment=old_contributor.get("comment"),
)
for old_contributor in old_contributors
for cont_element, old_contributor in enumerate(old_contributors)
]

# extending with script-user information
Expand All @@ -1085,7 +1092,9 @@ def parse_licence_including_former_structure(licences_element):
# Code added to raise exception when resource is empty
else:
if len(old_resources) == 0:
raise ParserException("Resources field is empty!")
raise ParserException(
"The resources field is empty! Please provide a description of you data resources e.g. a table schema. See https://github.com/OpenEnergyPlatform/oemetadata/blob/master/metadata/latest/metadata_key_description.md#resource-keys."
)
resources = []
for resource in old_resources:
old_schema = resource.get("schema")
Expand Down Expand Up @@ -1145,7 +1154,10 @@ def parse_licence_including_former_structure(licences_element):
for fk in old_foreign_keys:
old_reference = fk.get("reference")
if old_reference is None:
raise ParserException("Foreign key without reference:", fk)
raise ParserException(
"The Foreign key you provided is missing reference information:",
fk,
)
source_fields = [
field_dict[field_name]
for field_name in fk.get("fields", [])
Expand Down
Loading