Skip to content

Commit

Permalink
hxl-processing-specs (#14): HDP.export_json_processing_specs() works
Browse files Browse the repository at this point in the history
  • Loading branch information
fititnt committed Mar 13, 2021
1 parent 57de6da commit de9467a
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 15 deletions.
12 changes: 7 additions & 5 deletions hxlm/core/bin/hdpcli.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,11 +169,12 @@ def _exec_export_to_hxl_json_processing_specs(self,
online_unrestricted_init=True,
debug=debug)

if debug:
print('hdpcli ... hdp', hdp)
print('hdpcli ... hdp', hdp.export_yml())
# if debug:
# print('hdpcli ... hdp', hdp)
# print('hdpcli ... hdp', hdp.export_yml())

string_result = hdp.export_yml()
# string_result = hdp.export_yml()
string_result = hdp.export_json_processing_specs()

return string_result

Expand Down Expand Up @@ -547,7 +548,8 @@ def execute_cli(self, args,
)
# print('export_to_hxl_json_processing_specs', hdp)
# return str(hdp)
return hdp_result
print(hdp_result)
return self.EXIT_OK

# TODO: 'Is AI just a bunch of if and else statements?'
if (args.hdp_init and (args.hdp_init_home or args.hdp_init_data)) or \
Expand Down
52 changes: 51 additions & 1 deletion hxlm/core/model/hdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,57 @@ def export_json(self) -> str:
# in an place outside HDP internal metadata?
# (Emerson Rocha, 2021-03-13 01:00 UTC)

return json.dumps(self._hdp)
return json.dumps(self._hdp, indent=4, sort_keys=True)

def export_json_processing_specs(self, options=None) -> str:
"""Export JSON processing specs for HXL data (as an array)
Note: the result is an Array, but HXL-Proxy and hxlspec expects an
single result. You may need to manually decide which item to use.
If the result already is only one item, remove the starting '['
and the last ']'
Example
# Via command line
hxlspec myspec.json > data.hxl.csv
# Test on HXL-proxy
https://proxy.hxlstandard.org/api/from-spec.html
Args:
options ([type], optional): Select more specific recipes.
Defaults to None.
Raises:
NotImplementedError: [description]
Returns:
List of JSON processing specs for HXL data
"""

if options:
raise NotImplementedError('options not implemented yet')

result = []
for hsilo in self._hdp:
if 'hrecipe' in hsilo:
for hrecipeitem in hsilo['hrecipe']:
recipeitem = {}
if 'iri_example' in hrecipeitem:
# Note: here is an list, but we're taking the first now
recipeitem['input'] = \
hrecipeitem['iri_example'][0]['iri']
if 'sheet_index' in hrecipeitem['iri_example'][0]:
recipeitem['sheet_index'] = \
hrecipeitem['iri_example'][0]['sheet_index']
recipeitem['recipe'] = hrecipeitem['recipe']
result.append(recipeitem)

# If the result already is exact one item, return just one
# So the output can be chained
if len(result) == 1:
result = result[0]

return json.dumps(result, indent=4, sort_keys=True)

def export_yml(self) -> str:
"""Export the current HDP internal metadata in an YAML format
Expand Down
2 changes: 1 addition & 1 deletion hxlm/core/schema/hdp.json-schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@
"type": "object",
"required": [
"id",
"source"
"recipe"
]
},
"minItems": 1
Expand Down
42 changes: 34 additions & 8 deletions tests/hxl-processing-specs/hxl-processing-specs-test-01.hdp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,39 @@

- hsilo: "test1"
hrecipe:
# https://proxy.hxlstandard.org/data/edit?dest=data_edit&filter01=cut&filter-label01=with_columns&cut-include-tags01=%23vocab%2Bid%2Bv_iso6393_3letter%2C%23vocab%2Bcode%2Bv_6391%2C%23vocab%2Bname&filter02=select&filter-label02=without_rows&select-query02-01=%23vocab%2Bcode%2Bv_6391%3D&select-reverse02=on&url=https%3A%2F%2Fdocs.google.com%2Fspreadsheets%2Fd%2F12k4BWqq5c3mV9ihQscPIwtuDa_QRB-iFohO7dXSSptI%2Fedit%23gid%3D0
# https://proxy.hxlstandard.org/data.csv?dest=data_edit&filter01=cut&filter-label01=with_columns&cut-include-tags01=%23vocab%2Bid%2Bv_iso6393_3letter%2C%23vocab%2Bcode%2Bv_6391%2C%23vocab%2Bname&filter02=select&filter-label02=without_rows&select-query02-01=%23vocab%2Bcode%2Bv_6391%3D&select-reverse02=on&url=https%3A%2F%2Fdocs.google.com%2Fspreadsheets%2Fd%2F12k4BWqq5c3mV9ihQscPIwtuDa_QRB-iFohO7dXSSptI%2Fedit%23gid%3D0
- id: recipe1
source:
iri_example:
- iri: https://docs.google.com/spreadsheets/d/12k4BWqq5c3mV9ihQscPIwtuDa_QRB-iFohO7dXSSptI/edit#gid=0
filters:
- filter: with_columns
with_columns: "#vocab+id+v_iso6393_3letter,#vocab+code+v_6391,#vocab+name"
- filter: without_rows
without_rows: "#vocab+code+v_6391="
recipe:
- filter: with_columns
includes: "#vocab+id+v_iso6393_3letter,#vocab+code+v_6391,#vocab+name"
- filter: without_rows
queries: "#vocab+code+v_6391="

- hsilo:
name: "test1"
desc: from https://docs.google.com/presentation/d/17vXOnq2atIDnrODGLs36P1EaUvT-vXPjsc2I1q1Qc50/
hrecipe:
- id: example-processing-with-a-JSON-spec
iri_example:
- iri: https://data.humdata.org/dataset/yemen-humanitarian-needs-overview
sheet_index: 1
recipe:
- filter: count
patterns: "adm1+name,adm1+code"
aggregators:
- "sum(population) as Population#population"
- filter: clean_data
number: "population"
number_format: .0f

# {
# "filter": "count",
# "patterns": "adm1+name,adm1+code",
# "aggregators": ["sum(population) as Population#population"]
# },
# {
# "filter": "clean_data",
# "number": "population",
# "number_format": ".0f"
# }

0 comments on commit de9467a

Please sign in to comment.