Skip to content

Commit

Permalink
DAS-2165 - Add a config file parameter to generate_collection_umm_var…
Browse files Browse the repository at this point in the history
… method (#28)

* DAS-2165 updates to add the optional config file parameter

* DAS-2165 unused variables

* DAS-2165 - updated changelog.md and version

* DAS-2165 - updates to support Python version <=3.9

* DAS-2165 - updates based on PR feedback

* Update varinfo/generate_umm_var.py

Co-authored-by: Matt Savoie <[email protected]>

* DAS-2165 - more updates based on PR feedback

* DAS-2165 - PR refactoring updates

* DAS-2165 updated release date

---------

Co-authored-by: Matt Savoie <[email protected]>
  • Loading branch information
sudha-murthy and flamingbear authored Jul 16, 2024
1 parent 606b292 commit e7ab668
Show file tree
Hide file tree
Showing 6 changed files with 108 additions and 38 deletions.
9 changes: 8 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@

## v2.2.2
### 2024-07-16

The generate_collection_umm_var function in earthdata-varinfo updated to support an
optional kwarg 'config_file=' for a configuration file, to be able to override known metadata errors.


## v2.2.1
### Unreleased

The `requests` package has been added as an explicit dependency of the package.
Additionally, black code formatting has been applied to the entire repository.
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.2.1
2.2.2
2 changes: 1 addition & 1 deletion docs/earthdata-varinfo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
27 changes: 25 additions & 2 deletions docs/how_to_publish_to_cmr_with_earthdata_varinfo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,8 @@
"The following cell specifies the collection concept ID of **GLDAS_NOAH10_3H** (from the `EEDTEST` CMR provider). \n",
"This can be updated to any concept-id for any provider.\n",
"\n",
"Update `auth_header` in the cell below to include your Launchpad token."
"Update `auth_header` in the cell below to include your Launchpad token.\n",
"An optional config file can be passed to override default configuration"
]
},
{
Expand All @@ -128,11 +129,12 @@
"\n",
"auth_header = '<Launchpad token>'\n",
"collection_concept_id_gldas = 'C1256543837-EEDTEST'\n",
"\n",
"test_config_file = 'tests/unit/data/test_config.json'\n",
"generate_collection_umm_var(\n",
" collection_concept_id=collection_concept_id_gldas,\n",
" auth_header=auth_header,\n",
" publish=True,\n",
" config_file=test_config_file,\n",
")"
]
},
Expand Down Expand Up @@ -251,6 +253,27 @@
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Instantiate a VarInfoFromNetCDF4 object with an optional config file. This will override default configuration."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"test_config_file = 'tests/unit/data/test_config.json'\n",
"var_info = VarInfoFromNetCDF4(\n",
" 'MERRA2_400.inst1_2d_asm_Nx.20220130.nc4',\n",
" short_name='M2I1NXASM',\n",
" config_file=test_config_file,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down
98 changes: 68 additions & 30 deletions tests/unit/test_generate_umm_var.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,16 @@ def setUpClass(cls):
cls.launchpad_token_header = 'launchpad-token'
cls.netcdf4_basename = 'f16_ssmis_20210426v7.nc'
cls.netcdf4_url = f'https://example.com/{cls.netcdf4_basename}'
cls.query_granule_return = [
{
'links': [
{
'href': cls.netcdf4_url,
'rel': 'http://esipfed.org/ns/fedsearch/1.1/data#',
}
]
}
]
cls.rssmif16d_variables = [
'atmosphere_cloud_liquid_water_content',
'atmosphere_water_vapor_content',
Expand Down Expand Up @@ -55,16 +65,7 @@ def test_generate_collection_umm_var_no_publication(
in detail in test_cmr_search.py.
"""
mock_granule_query.return_value.get.return_value = [
{
'links': [
{
'href': self.netcdf4_url,
'rel': 'http://esipfed.org/ns/fedsearch/1.1/data#',
}
]
}
]
mock_granule_query.return_value.get.return_value = self.query_granule_return

# Add side effect that will copy test file to the temporary directory,
# simulating a download.
Expand Down Expand Up @@ -115,16 +116,7 @@ def test_generate_collection_umm_var_with_publication(
'V0000000007-PROV',
'V0000000008-PROV',
]
mock_granule_query.return_value.get.return_value = [
{
'links': [
{
'href': self.netcdf4_url,
'rel': 'http://esipfed.org/ns/fedsearch/1.1/data#',
}
]
}
]
mock_granule_query.return_value.get.return_value = self.query_granule_return

# Add side effect that will copy test file to the temporary directory,
# simulating a download.
Expand Down Expand Up @@ -214,16 +206,7 @@ def test_publishing_errors(
# ['V0000000001-PROV', 'V0000000002-PROV', ..., 'Invalid JSON']
concept_ids_and_error = concept_ids + [error_message]

mock_granule_query.return_value.get.return_value = [
{
'links': [
{
'href': self.netcdf4_url,
'rel': 'http://esipfed.org/ns/fedsearch/1.1/data#',
}
]
}
]
mock_granule_query.return_value.get.return_value = self.query_granule_return

# Add side effect that will copy test file to the temporary directory,
# simulating a download.
Expand Down Expand Up @@ -294,3 +277,58 @@ def test_is_variable_concept_id(self):

with self.subTest('Random string returns False'):
self.assertFalse(is_variable_concept_id('Random string'))

@patch('varinfo.generate_umm_var.VarInfoFromNetCDF4')
@patch('varinfo.cmr_search.GranuleQuery')
@patch('varinfo.generate_umm_var.download_granule')
def test_generate_collection_umm_var_config_file(
self, mock_download_granule, mock_granule_query, mock_varinfo_from_netcdf4
):
"""This test just verifies the config file that is passed in to the
generate_collection_umm_var method is picked up by VarInfoFromNetCDF4 that
would use it. The granule query and download methods are mocked to make it
simpler test.
"""
mock_granule_query.return_value.get.return_value = self.query_granule_return

# Add side effect that will copy test file to the temporary directory,
# simulating a download.
mock_download_granule.side_effect = self.download_granule_side_effect

# Run the test:
generate_collection_umm_var(
self.collection_concept_id,
self.bearer_token_header,
config_file='tests/unit/data/test_config.json',
)

# Ensure the the config file provided is passed to the VarInfo class
mock_varinfo_from_netcdf4.assert_called_once_with(
ANY,
config_file='tests/unit/data/test_config.json',
)

@patch('varinfo.generate_umm_var.VarInfoFromNetCDF4')
@patch('varinfo.cmr_search.GranuleQuery')
@patch('varinfo.generate_umm_var.download_granule')
def test_generate_collection_umm_var_with_no_config_file(
self, mock_download_granule, mock_granule_query, mock_varinfo_from_netcdf4
):
"""This test just verifies if the config file is 'None' in the
generate_collection_umm_var method, the VarInfoFromNetCDF4 would still succeed
and continue with a 'None' value for that parameter. The granule query and
download methods are mocked to make it simpler test.
"""
mock_granule_query.return_value.get.return_value = self.query_granule_return

# Add side effect that will copy test file to the temporary directory,
# simulating a download.
mock_download_granule.side_effect = self.download_granule_side_effect

# Run the test:
generate_collection_umm_var(
self.collection_concept_id, self.bearer_token_header
)

# Ensure that the VarInfoFromNetCDF4 invocation includes NONE for the config file
mock_varinfo_from_netcdf4.assert_called_with(ANY, config_file=None)
8 changes: 5 additions & 3 deletions varinfo/generate_umm_var.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"""

from tempfile import TemporaryDirectory
from typing import Dict, List, Union
from typing import Dict, List, Union, Optional
import re

from cmr import CMR_UAT
Expand All @@ -37,6 +37,7 @@ def generate_collection_umm_var(
auth_header: str,
cmr_env: CmrEnvType = CMR_UAT,
publish: bool = False,
config_file: Optional[str] = None,
) -> UmmVarReturnType:
"""Run all the of the functions for downloading and publishing
a UMM-Var entry to CMR given:
Expand All @@ -49,7 +50,8 @@ def generate_collection_umm_var(
* publish: Optional argument determining whether to publish the
generated UMM-Var records to the indicated CMR instance. Defaults to
False.
* config_file: Optional argument to provide a configuration file that
could be used to override any known errors in a collection. Defaults to None
Note - if attempting to publish to CMR, a LaunchPad token must be used.
"""
Expand All @@ -70,7 +72,7 @@ def generate_collection_umm_var(
)

# Parse the granule with VarInfo to map all variables and relations:
var_info = VarInfoFromNetCDF4(local_granule)
var_info = VarInfoFromNetCDF4(local_granule, config_file=config_file)

# Generate all the UMM-Var records:
all_umm_var_records = get_all_umm_var(var_info)
Expand Down

0 comments on commit e7ab668

Please sign in to comment.