diff --git a/CHANGELOG.md b/CHANGELOG.md index d681dd8..e831fe8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ + +## v2.2.2 +### 2024-07-16 + +The generate_collection_umm_var function in earthdata-varinfo updated to support an +optional kwarg 'config_file=' for a configuration file, to be able to override known metadata errors. + + ## v2.2.1 -### Unreleased The `requests` package has been added as an explicit dependency of the package. Additionally, black code formatting has been applied to the entire repository. diff --git a/VERSION b/VERSION index c043eea..b1b25a5 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.2.1 +2.2.2 diff --git a/docs/earthdata-varinfo.ipynb b/docs/earthdata-varinfo.ipynb index a8d7389..1fdaa36 100644 --- a/docs/earthdata-varinfo.ipynb +++ b/docs/earthdata-varinfo.ipynb @@ -454,7 +454,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.11.9" } }, "nbformat": 4, diff --git a/docs/how_to_publish_to_cmr_with_earthdata_varinfo.ipynb b/docs/how_to_publish_to_cmr_with_earthdata_varinfo.ipynb index 173873f..d58fade 100644 --- a/docs/how_to_publish_to_cmr_with_earthdata_varinfo.ipynb +++ b/docs/how_to_publish_to_cmr_with_earthdata_varinfo.ipynb @@ -114,7 +114,8 @@ "The following cell specifies the collection concept ID of **GLDAS_NOAH10_3H** (from the `EEDTEST` CMR provider). \n", "This can be updated to any concept-id for any provider.\n", "\n", - "Update `auth_header` in the cell below to include your Launchpad token." + "Update `auth_header` in the cell below to include your Launchpad token.\n", + "An optional config file can be passed to override default configuration" ] }, { @@ -128,11 +129,12 @@ "\n", "auth_header = ''\n", "collection_concept_id_gldas = 'C1256543837-EEDTEST'\n", - "\n", + "test_config_file = 'tests/unit/data/test_config.json'\n", "generate_collection_umm_var(\n", " collection_concept_id=collection_concept_id_gldas,\n", " auth_header=auth_header,\n", " publish=True,\n", + " config_file=test_config_file,\n", ")" ] }, @@ -251,6 +253,27 @@ ")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Instantiate a VarInfoFromNetCDF4 object with an optional config file. This will override default configuration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test_config_file = 'tests/unit/data/test_config.json'\n", + "var_info = VarInfoFromNetCDF4(\n", + " 'MERRA2_400.inst1_2d_asm_Nx.20220130.nc4',\n", + " short_name='M2I1NXASM',\n", + " config_file=test_config_file,\n", + ")" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/tests/unit/test_generate_umm_var.py b/tests/unit/test_generate_umm_var.py index bbeea65..4d7f21c 100644 --- a/tests/unit/test_generate_umm_var.py +++ b/tests/unit/test_generate_umm_var.py @@ -20,6 +20,16 @@ def setUpClass(cls): cls.launchpad_token_header = 'launchpad-token' cls.netcdf4_basename = 'f16_ssmis_20210426v7.nc' cls.netcdf4_url = f'https://example.com/{cls.netcdf4_basename}' + cls.query_granule_return = [ + { + 'links': [ + { + 'href': cls.netcdf4_url, + 'rel': 'http://esipfed.org/ns/fedsearch/1.1/data#', + } + ] + } + ] cls.rssmif16d_variables = [ 'atmosphere_cloud_liquid_water_content', 'atmosphere_water_vapor_content', @@ -55,16 +65,7 @@ def test_generate_collection_umm_var_no_publication( in detail in test_cmr_search.py. """ - mock_granule_query.return_value.get.return_value = [ - { - 'links': [ - { - 'href': self.netcdf4_url, - 'rel': 'http://esipfed.org/ns/fedsearch/1.1/data#', - } - ] - } - ] + mock_granule_query.return_value.get.return_value = self.query_granule_return # Add side effect that will copy test file to the temporary directory, # simulating a download. @@ -115,16 +116,7 @@ def test_generate_collection_umm_var_with_publication( 'V0000000007-PROV', 'V0000000008-PROV', ] - mock_granule_query.return_value.get.return_value = [ - { - 'links': [ - { - 'href': self.netcdf4_url, - 'rel': 'http://esipfed.org/ns/fedsearch/1.1/data#', - } - ] - } - ] + mock_granule_query.return_value.get.return_value = self.query_granule_return # Add side effect that will copy test file to the temporary directory, # simulating a download. @@ -214,16 +206,7 @@ def test_publishing_errors( # ['V0000000001-PROV', 'V0000000002-PROV', ..., 'Invalid JSON'] concept_ids_and_error = concept_ids + [error_message] - mock_granule_query.return_value.get.return_value = [ - { - 'links': [ - { - 'href': self.netcdf4_url, - 'rel': 'http://esipfed.org/ns/fedsearch/1.1/data#', - } - ] - } - ] + mock_granule_query.return_value.get.return_value = self.query_granule_return # Add side effect that will copy test file to the temporary directory, # simulating a download. @@ -294,3 +277,58 @@ def test_is_variable_concept_id(self): with self.subTest('Random string returns False'): self.assertFalse(is_variable_concept_id('Random string')) + + @patch('varinfo.generate_umm_var.VarInfoFromNetCDF4') + @patch('varinfo.cmr_search.GranuleQuery') + @patch('varinfo.generate_umm_var.download_granule') + def test_generate_collection_umm_var_config_file( + self, mock_download_granule, mock_granule_query, mock_varinfo_from_netcdf4 + ): + """This test just verifies the config file that is passed in to the + generate_collection_umm_var method is picked up by VarInfoFromNetCDF4 that + would use it. The granule query and download methods are mocked to make it + simpler test. + """ + mock_granule_query.return_value.get.return_value = self.query_granule_return + + # Add side effect that will copy test file to the temporary directory, + # simulating a download. + mock_download_granule.side_effect = self.download_granule_side_effect + + # Run the test: + generate_collection_umm_var( + self.collection_concept_id, + self.bearer_token_header, + config_file='tests/unit/data/test_config.json', + ) + + # Ensure the the config file provided is passed to the VarInfo class + mock_varinfo_from_netcdf4.assert_called_once_with( + ANY, + config_file='tests/unit/data/test_config.json', + ) + + @patch('varinfo.generate_umm_var.VarInfoFromNetCDF4') + @patch('varinfo.cmr_search.GranuleQuery') + @patch('varinfo.generate_umm_var.download_granule') + def test_generate_collection_umm_var_with_no_config_file( + self, mock_download_granule, mock_granule_query, mock_varinfo_from_netcdf4 + ): + """This test just verifies if the config file is 'None' in the + generate_collection_umm_var method, the VarInfoFromNetCDF4 would still succeed + and continue with a 'None' value for that parameter. The granule query and + download methods are mocked to make it simpler test. + """ + mock_granule_query.return_value.get.return_value = self.query_granule_return + + # Add side effect that will copy test file to the temporary directory, + # simulating a download. + mock_download_granule.side_effect = self.download_granule_side_effect + + # Run the test: + generate_collection_umm_var( + self.collection_concept_id, self.bearer_token_header + ) + + # Ensure that the VarInfoFromNetCDF4 invocation includes NONE for the config file + mock_varinfo_from_netcdf4.assert_called_with(ANY, config_file=None) diff --git a/varinfo/generate_umm_var.py b/varinfo/generate_umm_var.py index d2406a5..af7a384 100644 --- a/varinfo/generate_umm_var.py +++ b/varinfo/generate_umm_var.py @@ -11,7 +11,7 @@ """ from tempfile import TemporaryDirectory -from typing import Dict, List, Union +from typing import Dict, List, Union, Optional import re from cmr import CMR_UAT @@ -37,6 +37,7 @@ def generate_collection_umm_var( auth_header: str, cmr_env: CmrEnvType = CMR_UAT, publish: bool = False, + config_file: Optional[str] = None, ) -> UmmVarReturnType: """Run all the of the functions for downloading and publishing a UMM-Var entry to CMR given: @@ -49,7 +50,8 @@ def generate_collection_umm_var( * publish: Optional argument determining whether to publish the generated UMM-Var records to the indicated CMR instance. Defaults to False. - + * config_file: Optional argument to provide a configuration file that + could be used to override any known errors in a collection. Defaults to None Note - if attempting to publish to CMR, a LaunchPad token must be used. """ @@ -70,7 +72,7 @@ def generate_collection_umm_var( ) # Parse the granule with VarInfo to map all variables and relations: - var_info = VarInfoFromNetCDF4(local_granule) + var_info = VarInfoFromNetCDF4(local_granule, config_file=config_file) # Generate all the UMM-Var records: all_umm_var_records = get_all_umm_var(var_info)