Skip to content

Commit

Permalink
Merge pull request #78 from OpenEnergyPlatform/release/v0.1.0
Browse files Browse the repository at this point in the history
Release/v0.1.0
  • Loading branch information
jh-RLI authored Nov 18, 2022
2 parents 14570fa + 44759b9 commit e5b855b
Show file tree
Hide file tree
Showing 6 changed files with 209 additions and 24 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ htmlcov
.idea
*.iml
*.komodoproject
/0_local_test
/reports
.vscode

# Complexity
Expand All @@ -70,6 +72,7 @@ docs/_build
.bootstrap
.appveyor.token
*.bak
/1_env

# Mypy Cache
.mypy_cache/
3 changes: 3 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ Changelog
current (2022-XX-XX)
--------------------

0.1.0 (2022-11-18)
--------------------
* Add validation and helper functionality - validation based on json schema and the oemetadata schema files that are published for each release (PR#63)

0.0.9 (2022-10-31)
--------------------
Expand Down
25 changes: 25 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,31 @@ CLI - oemetadata conversion from v1.4 to v1.5::

omi convert -i {input/path} -o {output/path}

**Validation**

The validation is based on `jsonschema`. We release a schema with each `oemetadata` release, that schema
can be used to validate the user metadata. The dialect currently does not support direct access on to the
validation. This will be updated soon.
This will create a report.json containing information to debug possible errors. The parser.validate() takes
two arguments the first one is the metadata and the second optional one is the schmea. By default (if no schema is passed)
the validation will try to get the matching schema for the current metadata.

Module usage::

# You can import the JSONParser directly like this:
import json
from omi.dialects.oep.parser import JSONParser

with open("tests/data/metadata_v15.json", "r", encoding="utf-8") as f:
metadata = json.load(f)

parser = JSONParser()
parser.validate(metadata)
# check if your metadata is valid for the given schmea
schema = ... get a schema or import form oemetadata module
parser.is_valid(metadata, schema)


Development
===========
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def read(*names, **kwargs):

setup(
name="omi",
version="0.0.9",
version="0.1.0",
license="AGPL-3.0",
description="A library to process and translate open energy metadata.",
long_description="%s\n%s"
Expand Down Expand Up @@ -70,7 +70,7 @@ def read(*names, **kwargs):
# eg: 'keyword1', 'keyword2', 'keyword3',
],
python_requires=">=3.5",
install_requires=["click", "rdfLib", "python-dateutil"],
install_requires=["click", "rdfLib", "python-dateutil", "jsonschema", "oemetadata>=1.5.2"],
tests_require=["tox", "pytest"],
extras_require={
"dev": ["black", "isort", "pre-commit"]
Expand Down
183 changes: 161 additions & 22 deletions src/omi/dialects/oep/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,33 @@
# -*- coding: utf-8 -*-

import json
import logging
import pathlib

import jsonschema
from dateutil.parser import parse as parse_date
from jsonschema import ValidationError
# oemetadata
from metadata.latest.schema import OEMETADATA_LATEST_SCHEMA
from metadata.v130.schema import OEMETADATA_V130_SCHEMA
from metadata.v140.schema import OEMETADATA_V140_SCHEMA
from metadata.v141.schema import OEMETADATA_V141_SCHEMA
from metadata.v150.schema import OEMETADATA_V150_SCHEMA
from metadata.v151.schema import OEMETADATA_V151_SCHEMA

from omi import structure
from omi.dialects.base.parser import Parser
from omi.dialects.base.parser import ParserException
from omi.oem_structures import oem_v15

ALL_OEM_SCHEMAS = [
OEMETADATA_LATEST_SCHEMA,
OEMETADATA_V150_SCHEMA,
OEMETADATA_V141_SCHEMA,
OEMETADATA_V140_SCHEMA,
OEMETADATA_V130_SCHEMA,
]


def parse_date_or_none(x, *args, **kwargs):
if x is None:
Expand All @@ -18,29 +37,159 @@ def parse_date_or_none(x, *args, **kwargs):
return parse_date(x, *args, **kwargs)


def create_report_json(
error_data: list[dict],
save_at: pathlib.Path = "reports/",
filename: str = "report.json",
):
# if len(error_data) >= 1:
pathlib.Path(save_at).mkdir(parents=True, exist_ok=True)
with open(f"{save_at}{filename}", "w", encoding="utf-8") as fp:
json.dump(error_data, fp, indent=4, sort_keys=False)

print(
f"Created error report containing {len(error_data)} errors at: {save_at}{filename}"
)


class JSONParser(Parser):
# one_schema_was_valid = False

def load_string(self, string: str, *args, **kwargs):
return json.loads(string)

def is_valid(self, inp: str):
"""Checks the validity of a JSON string
def get_json_validator(self, schema: OEMETADATA_LATEST_SCHEMA):
"""
Get the jsonschema validator that matches the schema.
Also checks if the schmea is valid.
Args:
schema (OEMETADATA_LATEST_SCHEMA):
Returns:
validator: jsonschema.Draft202012Validator
"""
jsonschema.Draft202012Validator.check_schema(schema)
validator = jsonschema.Draft202012Validator(schema=schema)
return validator

def get_any_schema_valid(
self,
metadata: dict,
schemas: list = ALL_OEM_SCHEMAS,
):
"""
Additional helper funtion - get any schema that is valid for the metadata.
Returns The first valid schema or None
Args:
schemas (list): _description_
metadata (dict): _description_
Returns:
_type_: _description_
"""

valid_schemas = []
for schema in schemas:
if len(valid_schemas) <= 1:
continue
elif self.is_valid(inp=metadata, schema=schema):
valid_schemas.append(schema)

if len(valid_schemas) >= 1:
valid_schemas = None
return valid_schemas

def get_schema_by_metadata_version(self, metadata: dict):
oem_13 = ["1.3", "OEP-1.3"]
oem_14 = "OEP-1.4.0"
oem_141 = "OEP-1.4.1"
oem_15 = "OEP-1.5.0"
oem_151 = "OEP-1.5.1"

schema = None

if metadata.get("metadata_version"):
if metadata.get("metadata_version") in oem_13:
schema = OEMETADATA_V130_SCHEMA

if metadata.get("metaMetadata"):
if metadata.get("metaMetadata")["metadataVersion"] == oem_14:
schema = OEMETADATA_V140_SCHEMA
if metadata.get("metaMetadata")["metadataVersion"] == oem_141:
schema = OEMETADATA_V141_SCHEMA
if metadata.get("metaMetadata")["metadataVersion"] == oem_15:
schema = OEMETADATA_V150_SCHEMA
if metadata.get("metaMetadata")["metadataVersion"] == oem_151:
schema = OEMETADATA_V151_SCHEMA

# fallback to latest schema if metadata does not contian the exprected metadata version sting
if schema is None:
logging.info(
"Metadata does not contain the expected 'metaMetadata' or 'metadata_version' key. Fallback to latest schema."
)
schema = OEMETADATA_LATEST_SCHEMA

print(schema.get("$id"))

return schema

def validate(self, metadata: dict, schema: dict = None):
"""
Check whether the given dictionary adheres to the the json-schema
and oemetadata specification. If errors are found a jsonschema error
report is created in directory 'reports/'.
Parameters
----------
inp: str
The JSON string to be checked.
metadata
The dictionary to validate
schema: optional
The jsonschema used for validation.
Default is None.
Returns
-------
bool
True if valid JSON, False otherwise.
Nothing
"""

report = []
if not schema:
schema = self.get_schema_by_metadata_version(metadata=metadata)
validator = self.get_json_validator(schema)

for error in sorted(validator.iter_errors(instance=metadata), key=str):
# https://python-jsonschema.readthedocs.io/en/stable/errors/#handling-validation-errors
error_dict = {
"oemetadata schema version": schema.get("$id"),
"json path": error.absolute_path,
"instance path": [i for i in error.absolute_path],
"value that raised the error": error.instance,
"error message": error.message,
"schema_path": [i for i in error.schema_path],
}
report.append(error_dict)

create_report_json(report)

def is_valid(self, inp: dict, schema):

# 1 - valid JSON?
if isinstance(inp, str):
try:
jsn = json.loads(inp, encode="utf-8")
except ValueError:
return False
else:
jsn = inp

# 2 - valid OEMETADATA
try:
json.loads(inp)
except ValueError:
validator = self.get_json_validator(schema)
validator.validate(jsn)
return True
except ValidationError:
return False
return True


class JSONParser_1_3(JSONParser):
Expand Down Expand Up @@ -268,7 +417,7 @@ def parse(self, json_old: dict, *args, **kwargs):
)
temporal = structure.Temporal(
reference_date=parse_date_or_none(inp_temporal.get("referenceDate")),
**timeseries
**timeseries,
)

# filling the source section
Expand Down Expand Up @@ -579,16 +728,6 @@ def get_table_name(self, metadata_file):


class JSONParser_1_5(JSONParser):
def is_valid(self, inp: str):
if not super(self, JSONParser_1_5).is_valid(inp):
return False
try:
self.assert_1_5_metastring(inp)
except:
return False
else:
return True

def parse_from_string(
self,
string: str,
Expand All @@ -611,7 +750,7 @@ def parse_from_string(
return self.parse(
self.load_string(string, *(load_args or []), **(load_kwargs or {})),
*(parse_args or []),
**(parse_kwargs or {})
**(parse_kwargs or {}),
)

def parse_term_of_use(self, old_license: dict):
Expand Down
15 changes: 15 additions & 0 deletions tests/test_dialects/test_oep/test_parser.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from email import parser
import unittest
from omi.dialects.oep.parser import JSONParser_1_3
from omi.dialects.oep.parser import JSONParser_1_4
Expand All @@ -12,6 +13,10 @@
from ..internal_structures import metadata_v_1_5_minimal
from omi.dialects.base.parser import ParserException

from metadata.latest.schema import OEMETADATA_LATEST_SCHEMA

import json


class ParserTest(unittest.TestCase):
def test_parser_v1_3(self):
Expand Down Expand Up @@ -50,3 +55,13 @@ def test_parser_v1_5(self):
_input_file = "tests/data/metadata_v15.json"
expected_result = metadata_v_1_5
_test_generic_parsing(parser, _input_file, expected_result)

def test_parser_v1_5_is_valid(self):
parser = JSONParser_1_5()
_input_file = "tests/data/metadata_v15.json"

with open(_input_file, "r", encoding="utf-8") as f:
jsn = json.load(f)

# file = parser.parse_from_file(_input_file)
parser.validate(jsn)

0 comments on commit e5b855b

Please sign in to comment.