Skip to content

Commit

Permalink
hxlm (#11), urnresolver (#13): working on get_urn_resolver_local(), f…
Browse files Browse the repository at this point in the history
…ile order is important
  • Loading branch information
fititnt committed Mar 7, 2021
1 parent 77895bc commit 926019b
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 10 deletions.
52 changes: 44 additions & 8 deletions hxlm/core/schema/urn/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,24 @@
__all__ = ['get_urn_vault_local_info', 'get_urn_vault_local_info']


_HOME = str(Path.home())

# TODO: move these variables to somewere else
HXLM_CONFIG_BASE = os.getenv(
'HXLM_CONFIG_BASE', os.getenv('HOME') + '/.config/hxlm/')
'HXLM_CONFIG_BASE', _HOME + '/.config/hxlm/')

HXLM_DATA_POLICY_BASE = os.getenv(
'HXLM_DATA_POLICY_BASE', os.getenv('HOME') + '/.config/hxlm/policy/')
'HXLM_DATA_POLICY_BASE', _HOME + '/.config/hxlm/policy/')

HXLM_DATA_VAULT_BASE = os.getenv(
'HXLM_DATA_VAULT_BASE', os.getenv('HOME') + '/.local/var/hxlm/data/')
'HXLM_DATA_VAULT_BASE', _HOME + '/.local/var/hxlm/data/')

HXLM_DATA_VAULT_BASE_ALT = os.getenv('HXLM_DATA_VAULT_BASE_ALT')
HXLM_DATA_VAULT_BASE_ACTIVE = os.getenv(
'HXLM_DATA_VAULT_BASE_ACTIVE', HXLM_DATA_VAULT_BASE)

#: HXLM_DATA_URN_EXTENSIONS Must be a python truple
HXLM_DATA_URN_EXTENSIONS = ('urn.csv', 'urn.json', 'urn.yml', 'urn.txt')

# import json
# import yaml
Expand All @@ -57,6 +61,7 @@
# ./hxlm/core/bin/urnresolver.py urn:data:xz:eticaai:pcode:br
# ./hxlm/core/bin/urnresolver.py urn:data:xz:hxl:std:core:hashtag


def debug_local_data():
"""[summary]
"""
Expand Down Expand Up @@ -84,20 +89,51 @@ def get_urn_vault_local_info(urn: Type[GenericUrnHtype]):

def get_urn_resolver_local(local_file_or_path: str,
required: bool = False) -> List[str]:
# urn.csv, urn.json, urn.yml, example.urn.csv, etc-123.urn.json, ...
result = []
if Path(local_file_or_path).is_dir():
lpath = local_file_or_path
basepath = local_file_or_path
elif Path(local_file_or_path).is_file():
result.append(Path(local_file_or_path).read_text())
return result
elif required:
raise RuntimeError(
'local_file_or_path [' + local_file_or_path + '] not found')

# urn.csv, urn.json, urn.yml, example.urn.csv, etc-123.urn.json, ...
files = Path(lpath).glob('*urn.[csv|json|yml]')

print('files', files)
# pitr = Path(basepath)
pitr = Path(basepath).glob('*')
for file_ in pitr:
print('file_', file_)
print('file_ start', str(file_).startswith('~'))
print('file_ ends with csv', str(file_).endswith('.csv'))
print('file_ ends with HXLM_DATA_URN_EXTENSIONS',
str(file_).endswith(HXLM_DATA_URN_EXTENSIONS))
if str(file_).startswith('~'):
continue

# print('pitr', pitr)
# # print('list(pitr)', list(pitr))
# print('list(pitr.glob(*)', list(pitr.glob('*')))

# files_ = Path(lpath).glob('*urn.[csv|json|yml]')
# files_ = [Path(lpath).glob('*urn.csv')
# files_ = Path(lpath).glob('*.[csv][xl][ts]*')
# files_ = Path(lpath).glob('*.{json}')
# urnfiles = []

# for file_ in

# exts = ["urn.csv", ".json", ".yml", ".urn.txt", ".ppt"]
# files_ = (str(i) for i in map(Path, os.listdir(lpath))
# print('i', i)
# if i.suffix.lower() in exts and not i.stem.startswith("~"))

# print('filelist', filelist)

# files = [p for p in Path(mainpath).iterdir() if p.suffix in exts]
# files_ = Path(lpath).glob('*.json')
# for file_ in files_:
# print('files', file_)


def get_urn_resolver_remote(iri_or_domain: str,
Expand Down
7 changes: 5 additions & 2 deletions tests/test_core_urn.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
HdpUrnHtype
)

from hxlm.core.schema.urn.util import (
get_urn_resolver_local
)

# def test_core_schema_urn_example_valid():
# example1 = 'urn:x-hdp:xz:eticaai:HXL-Data-Science-file-formats'
Expand Down Expand Up @@ -208,9 +211,9 @@ def test_core_htype_urn_cast_b():
# assert urn_hdp1.nid == 'x-hdp'
# assert urn_hdp2.nid == 'x-hdp'
# assert resul2 is False
# get_urn_resolver_local('/workspace/git/EticaAI/HXL-Data-Science-file-formats/tests/urnresolver/all-in-same-dir')


test_core_htype_urn_cast_b()
# test_core_htype_urn_cast_b()

# TODO:
# - https://opendatasus.saude.gov.br/dataset/covid-19-vacinacao
Expand Down
20 changes: 20 additions & 0 deletions tests/urnresolver/all-in-same-dir/00-named-urn.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
[
{
"urn": "urn:data:xz:hxl:std:core:hashtag",
"source": [
"https://docs.google.com/spreadsheets/d/1En9FlmM8PrbTWgl3UHPF_MXnJ6ziVZFhBbojSJzBdLI/pub?gid=319251406&single=true&output=csv"
]
},
{
"urn": "urn:data:xz:hxl:std:core:attribute",
"source": [
"https://docs.google.com/spreadsheets/d/1En9FlmM8PrbTWgl3UHPF_MXnJ6ziVZFhBbojSJzBdLI/pub?gid=1810309357&single=true&output=csv"
]
},
{
"urn": "urn:data:xz:hxlcplp:fod:lang",
"source": [
"https://proxy.hxlstandard.org/data.csv?dest=data_view&url=https%3A%2F%2Fdocs.google.com%2Fspreadsheets%2Fd%2F12k4BWqq5c3mV9ihQscPIwtuDa_QRB-iFohO7dXSSptI%2Fedit%23gid%3D0"
]
}
]
4 changes: 4 additions & 0 deletions tests/urnresolver/all-in-same-dir/urn.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"#item+urn","#x_source"
"urn:data:xz:hxl:std:core:hashtag","https://docs.google.com/spreadsheets/d/1En9FlmM8PrbTWgl3UHPF_MXnJ6ziVZFhBbojSJzBdLI/pub?gid=319251406&single=true&output=csv"
"urn:data:xz:hxl:std:core:attribute","https://docs.google.com/spreadsheets/d/1En9FlmM8PrbTWgl3UHPF_MXnJ6ziVZFhBbojSJzBdLI/pub?gid=1810309357&single=true&output=csv"
"urn:data:xz:hxlcplp:fod:lang","https://proxy.hxlstandard.org/data.csv?dest=data_view&url=https%3A%2F%2Fdocs.google.com%2Fspreadsheets%2Fd%2F12k4BWqq5c3mV9ihQscPIwtuDa_QRB-iFohO7dXSSptI%2Fedit%23gid%3D0"
16 changes: 16 additions & 0 deletions tests/urnresolver/all-in-same-dir/zz-name-urn.urn.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---

# TODO: when URNResolver start to undestand CKAN instances, also put as
# extra source CKAN instead of direct access Gooogle Drive
# (Emerson Rocha, 2021-03-06 20:01 UTC)

# https://data.humdata.org/dataset/hxl-core-schemas
- urn: "urn:data:xz:hxl:std:core:hashtag"
source:
- https://docs.google.com/spreadsheets/d/1En9FlmM8PrbTWgl3UHPF_MXnJ6ziVZFhBbojSJzBdLI/pub?gid=319251406&single=true&output=csv
- urn: "urn:data:xz:hxl:std:core:attribute"
source:
- https://docs.google.com/spreadsheets/d/1En9FlmM8PrbTWgl3UHPF_MXnJ6ziVZFhBbojSJzBdLI/pub?gid=1810309357&single=true&output=csv
- urn: "urn:data:xz:hxlcplp:fod:lang"
source:
- https://proxy.hxlstandard.org/data.csv?dest=data_view&url=https%3A%2F%2Fdocs.google.com%2Fspreadsheets%2Fd%2F12k4BWqq5c3mV9ihQscPIwtuDa_QRB-iFohO7dXSSptI%2Fedit%23gid%3D0

0 comments on commit 926019b

Please sign in to comment.