Skip to content

Commit

Permalink
hxl-processing-specs (#14): now is possible to specify inline data ('…
Browse files Browse the repository at this point in the history
…input_data', 'output_data', as part of hrecipe.exemplum; the underlining inplementation still not ready, but the idea is be able to specify self-contained example when creating recipes with YAML; the hrecipe.exemplum[N]objectivum.datum can be used for self-contained testing!
  • Loading branch information
fititnt committed Mar 14, 2021
1 parent 57d2cca commit 2550222
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 49 deletions.
99 changes: 66 additions & 33 deletions hxlm/core/model/hdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@

from typing import (
Union,
Tuple
List,
Tuple,
)

import json
Expand Down Expand Up @@ -181,6 +182,68 @@ def _prepare_from_string(self,

raise RuntimeError('json_string or yml_string are required')

def _prepare_hrecipe_item(self, recipe: List) -> List:
# result = []
# recipeitem = {}

# recipeitem['recipe'] = recipe['_recipe']

# Note: for convention, the first recipe always will not use any
# example input (this can make easyer for reuse). But old
# examples will need to be updated
recipe_without_examplum = {
'recipe': recipe['_recipe']
}
result = [recipe_without_examplum]

# result.append(recipeitem)
# print('ooooi', result)
# return result

# print('eita123', recipe['exemplum'])
# print('eita123', recipe['exemplum'][0])

# TODO: this have the [0] hardoded. Should be generalized
# if 'exemplum' in recipe and 'fontem' in recipe['exemplum'][0]: # noqa
# # print('oioioi', hrecipeitem['exemplum'])
# if 'iri' in recipe['exemplum'][0]['fontem']:
# recipeitem['input'] = recipe['exemplum'][0]['fontem']['iri'] # noqa
# if '_sheet_index' in recipe['exemplum'][0]['fontem']:
# recipeitem['sheet_index'] = recipe['exemplum'][0]['fontem']['_sheet_index'] # noqa

# result.append(recipeitem)

if 'exemplum' in recipe:
loop = 0
# print('exemplum loop', loop)
# print('exemplum loop 2', recipe['exemplum'])
# print('exemplum loop 2 loop', recipe['exemplum'][loop])
# print('exemplum loop 2 loop fontem', recipe['exemplum'][loop]['fontem']) # noqa
# print('exemplum loop 2 loop fontem b', 'fontem' in recipe['exemplum'][loop]) # noqa
# print('exemplum loop 2 loop fontem b', loop in recipe['exemplum']) # noqa
# while loop in recipe['exemplum'] and 'fontem' in recipe['exemplum'][loop]: # noqa
# while 'fontem' in recipe['exemplum'][loop]:
while True:
# print('exemplum loop, inside', loop, recipe['exemplum'][loop]['fontem']) # noqa
recipeitem = {
'recipe': recipe['_recipe']
}
if 'iri' in recipe['exemplum'][loop]['fontem']:
recipeitem['input'] = recipe['exemplum'][loop]['fontem']['iri'] # noqa
if '_sheet_index' in recipe['exemplum'][loop]['fontem']:
recipeitem['sheet_index'] = recipe['exemplum'][loop]['fontem']['_sheet_index'] # noqa
if 'datum' in recipe['exemplum'][loop]['fontem']:
recipeitem['input_data'] = recipe['exemplum'][loop]['fontem']['datum'] # noqa
if 'objectivum' in recipe['exemplum'][loop] and 'datum' in recipe['exemplum'][loop]['objectivum']: # noqa
recipeitem['output_data'] = recipe['exemplum'][loop]['objectivum']['datum'] # noqa
result.append(recipeitem)
loop = loop + 1
if loop >= len(recipe['exemplum']):
break

# return recipeitem
return result

# def _prepare_from_yml_string(self, hdp_yml_string):
# self._hdp_raw = hdp_yml_string
# self._hdp = yaml.safe_load(hdp_yml_string)
Expand Down Expand Up @@ -230,38 +293,8 @@ def export_json_processing_specs(self, options=None) -> str:
for hsilo in self._hdp:
if 'hrecipe' in hsilo:
for hrecipeitem in hsilo['hrecipe']:
recipeitem = {}
# Code pré v0.7.4
# if 'iri_example' in hrecipeitem:
# # Note: here is an list, but we're taking the first
# recipeitem['input'] = \
# hrecipeitem['iri_example'][0]['iri']
# if 'sheet_index' in hrecipeitem['iri_example'][0]:
# recipeitem['sheet_index'] = \
# hrecipeitem['iri_example'][0]['sheet_index']
# recipeitem['recipe'] = hrecipeitem['recipe']

recipeitem['recipe'] = hrecipeitem['_recipe']

# print('eita123', hrecipeitem['exemplum'])
# print('eita123', hrecipeitem['exemplum'][0])

# TODO: this have the [0] hardoded. Should be generalized
if 'exemplum' in hrecipeitem and 'fontem' in hrecipeitem['exemplum'][0]: # noqa
# print('oioioi', hrecipeitem['exemplum'])
if 'iri' in hrecipeitem['exemplum'][0]['fontem']:
recipeitem['input'] = hrecipeitem['exemplum'][0]['fontem']['iri'] # noqa
if 'sheet_index' in hrecipeitem['exemplum'][0]:
recipeitem['sheet_index'] = \
hrecipeitem['iri_example'][0]['sheet_index']
# recipeitem['recipe'] = hrecipeitem['recipe']

result.append(recipeitem)

# If the result already is exact one item, return just one
# So the output can be chained
if len(result) == 1:
result = result[0]
# result.append(self._prepare_hrecipe_item(hrecipeitem))
result.extend(self._prepare_hrecipe_item(hrecipeitem))

return json.dumps(result, indent=4, sort_keys=True)

Expand Down
30 changes: 18 additions & 12 deletions tests/hrecipe/hello-world.hrecipe.hdp.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

# cd tests/hrecipe
# hdpcli --export-to-hxl-json-processing-specs hello-world.hrecipe.hdp.yml
# hdpcli --export-to-hxl-json-processing-specs hello-world.hrecipe.hdp.yml | jq '.[1]' | hxlspec
---
- hsilo:
nomen: hello-world.hrecipe.hdp.yml
Expand All @@ -19,24 +19,30 @@
# - iri: https://data.humdata.org/dataset/yemen-humanitarian-needs-overview
# sheet_index: 1
exemplum:

# Example one
- fontem:
iri: https://data.humdata.org/dataset/yemen-humanitarian-needs-overview
_sheet_index: 1

# Example two includes both an inline data
- fontem:
# Note: fontem.datum not implemented
# Note: fontem.datum not fully implemented. But the idea here is
# be able to create an ad-hoc table instead of use
# external input. So help show as quick example or...
# as some sort of unitary test for an HXL data processing
# spec!
datum:
- ['header 1', 'header 2', 'header 3']
- ['#item +id', '#item +name', '#item +value']
- ['ACME1', 'ACME Inc.', '123']
- ['XPTO1', 'XPTO org', '456']
- ["header 1", "header 2", "header 3"]
- ["#item +id", "#item +name", "#item +value"]
- ["ACME1", "ACME Inc.", "123"]
- ["XPTO1", "XPTO org", "456"]
objectivum:
# Note: fontem.objectivum not implemented
# Note: fontem.objectivum not fully implemented. But the idea here
# is (like the fontem.datum) work as ad-hoc table, but is
# really allow create some sort of unitary test for a HXL
# data processing spec!
datum:
- ['header 1', 'header 2', 'header 3']
- ['#item +id', '#item +name', '#item +value']
- ['ACME1', 'ACME Inc.', '123']
- ['XPTO1', 'XPTO org', '456']
- ["header 1", "header 2", "header 3"]
- ["#item +id", "#item +name", "#item +value"]
- ["ACME1", "ACME Inc.", "123"]
- ["XPTO1", "XPTO org", "456"]
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@

# To inspect the result (pretty print)
# hdpcli --export-to-hxl-json-processing-specs tests/hxl-processing-specs/hxl-processing-specs-test-01.hdp.yml
# To pipe the result direct to hxlspec (first item of array, use jq '.[0]')
# hdpcli --export-to-hxl-json-processing-specs tests/hxl-processing-specs/hxl-processing-specs-test-01.hdp.yml | jq '.[0]' | hxlspec
# To pipe the result direct to hxlspec (first item of array, use jq '.[1]')
# To pipe the result direct to hxlspec (second item of array, use jq '.[1]')
# hdpcli --export-to-hxl-json-processing-specs tests/hxl-processing-specs/hxl-processing-specs-test-01.hdp.yml | jq '.[1]' | hxlspec
# To pipe the result direct to hxlspec (4º item of array, use jq '.[1]')
# hdpcli --export-to-hxl-json-processing-specs tests/hxl-processing-specs/hxl-processing-specs-test-01.hdp.yml | jq '.[3]' | hxlspec

---

Expand Down Expand Up @@ -47,4 +47,3 @@
- fontem:
iri: https://data.humdata.org/dataset/yemen-humanitarian-needs-overview
_sheet_index: 1

0 comments on commit 2550222

Please sign in to comment.