Skip to content

Commit

Permalink
Extract more information from CDF while building the inventory
Browse files Browse the repository at this point in the history
closes #162

Signed-off-by: Alexis Jeandet <[email protected]>
  • Loading branch information
jeandet committed Jan 16, 2025
1 parent 8bba7db commit 486e4df
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 14 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ classifiers = [
"Programming Language :: Python :: 3.13",
]
dependencies = ['requests', 'pandas', 'diskcache', 'appdirs', 'numpy', 'packaging', 'python-dateutil',
'astropy', 'astroquery', 'pyistp>=0.5.0', 'tqdm', 'matplotlib', 'urllib3>=1.26.0', "PyYAML", "scipy"]
'astropy', 'astroquery', 'pyistp>=0.7.0', 'tqdm', 'matplotlib', 'urllib3>=1.26.0', "PyYAML", "scipy"]
[project.urls]
homepage = "https://github.com/SciQLop/speasy"

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ packaging
numpy
python_dateutil
astropy
pyistp>=0.5.0
pyistp>=0.7.0
astroquery
tqdm
matplotlib
Expand Down
2 changes: 1 addition & 1 deletion requirements_dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ astropy
ddt
pytest
pytest-cov
pyistp>=0.5.0
pyistp>=0.7.0
astroquery
tqdm
matplotlib
Expand Down
21 changes: 13 additions & 8 deletions speasy/core/cdf/inventory_extractor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
from datetime import timedelta
from typing import List, Optional
from typing import List, Optional, Union

import pyistp
from pyistp.loader import DataVariable, ISTPLoader
Expand All @@ -13,8 +13,9 @@

def filter_variable_meta(datavar: DataVariable) -> dict:
keep_list = ['CATDESC', 'FIELDNAM', 'UNITS', 'UNIT_PTR', 'DISPLAY_TYPE', 'LABLAXIS', 'LABL_PTR_1', 'LABL_PTR_2',
'LABL_PTR_3', 'VIRTUAL', 'FUNCT']
'LABL_PTR_3', 'VIRTUAL', 'FUNCT', 'FILLVAL']
base = {key: value for key, value in datavar.attributes.items() if key in keep_list}
base['cdf_type'] = datavar.cdf_type
if len(datavar.values.shape) == 1:
base['spz_shape'] = 1
else:
Expand All @@ -30,7 +31,8 @@ def _attribute_value(attr):


def filter_dataset_meta(dataset: ISTPLoader) -> dict:
keep_list = ['Caveats', 'Rules_of_use']
keep_list = ['Caveats', 'Rules_of_use', 'Time_resolution', 'spase_DatasetResourceID', 'HTTP_LINK', 'Data_type',
'Acknowledgement']
return {key: _attribute_value(dataset.attribute(key)) for key in dataset.attributes() if key in keep_list}


Expand All @@ -55,15 +57,18 @@ def _extract_parameters_impl(cdf: ISTPLoader, provider: str, uid_fmt: str = "{va
cdf.data_variables())))


def extract_parameters(url: str, provider: str, uid_fmt: str = "{var_name}", meta=None) -> List[ParameterIndex]:
def extract_parameters(url_or_istp_loader: Union[str,ISTPLoader], provider: str, uid_fmt: str = "{var_name}", meta=None) -> List[ParameterIndex]:
indexes: List[ParameterIndex] = []
try:
with any_loc_open(url) as remote_cdf:
cdf = pyistp.load(buffer=remote_cdf.read())
return _extract_parameters_impl(cdf, provider=provider, uid_fmt=uid_fmt, meta=meta)
if isinstance(url_or_istp_loader, str):
with any_loc_open(url_or_istp_loader) as remote_cdf:
cdf = pyistp.load(buffer=remote_cdf.read())
return _extract_parameters_impl(cdf, provider=provider, uid_fmt=uid_fmt, meta=meta)
else:
return _extract_parameters_impl(url_or_istp_loader, provider=provider, uid_fmt=uid_fmt, meta=meta)

except RuntimeError:
print(f"Issue loading {url}")
print(f"Issue loading {url_or_istp_loader}")
return indexes


Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import logging
import os.path
from typing import List
import pyistp

from speasy.core.cdf.inventory_extractor import extract_parameters
from speasy.core.cdf.inventory_extractor import extract_parameters, filter_dataset_meta
from speasy.core.inventory.indexes import ParameterIndex, DatasetIndex, SpeasyIndex

log = logging.getLogger(__name__)
Expand All @@ -16,10 +17,13 @@ def _patch_parameter(parameter: ParameterIndex, dataset: DatasetIndex):


def load_master_cdf(path, dataset: DatasetIndex):
cdf = pyistp.loader.ISTPLoader(path)
dataset.__dict__.update(
{p.spz_name(): p for p in
map(lambda p: _patch_parameter(p, dataset), extract_parameters(path, provider="cda",
map(lambda p: _patch_parameter(p, dataset), extract_parameters(cdf, provider="cda",
uid_fmt=f"{dataset.serviceprovider_ID}/{{var_name}}"))})
dataset.__dict__.update(filter_dataset_meta(cdf))



def _extract_datasets(root: SpeasyIndex) -> List[DatasetIndex]:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_direct_archive_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def test_get_product_with_custom_loader(self):
)
def test_build_inventory_from_remote_cdf(self, url):
parameters = extract_parameters(
url=url,
url_or_istp_loader=url,
provider="test")
self.assertGreater(len(parameters), 0)

Expand Down

0 comments on commit 486e4df

Please sign in to comment.