Use CF-Convention long_name in UMM-Var.

nasa · Nov 20, 2023 · 00600b4 · 00600b4
1 parent 0616125
commit 00600b4
Show file tree

Hide file tree

Showing 7 changed files with 65 additions and 12 deletions.
diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
@@ -8,7 +8,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [ '3.9', '3.10', '3.11', '3.x' ]
+        python-version: [ '3.9', '3.10', '3.11' ]
       fail-fast: false
 
     steps:

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,9 +1,20 @@
+## v2.1.2
+### 2023-11-14
+
+This version of `earthdata-varinfo` updates the value of the `LongName`
+attribute in generated UMM-Var records to use the value of the CF-Convention
+`long_name` attribute for a variable, if it is present in the file. If this
+attribute is not present in the in-file metadata, then the full path to the
+variable (without the leading `/`) is used as before.
+
 ## v2.1.1
 ### 2023-10-24
+
 Fixed deployment issues
 
 ## v2.1.0
 ### 2023-10-20
+
 This version of `earthdata-varinfo` improves the functionality of the
 `varinfo.get_science_variables` function with `varinfo.is_science_variable()` method.
 This method returns true if a variable is a science variable by checking if

diff --git a/README.md b/README.md
@@ -131,7 +131,7 @@ Expected outputs:
 Native IDs for generated UMM-Var records will be of format:
 
 ```
-<collection concept ID>-<variable LongName>
+<collection concept ID>-<variable Name>
 ```
 
 For variables that are hierarchical, slashes will be converted to underscores,

diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-2.1.1
+2.1.2
diff --git a/tests/unit/test_generate_umm_var.py b/tests/unit/test_generate_umm_var.py
@@ -90,7 +90,7 @@ def test_generate_collection_umm_var_no_publication(self,
         # not performed to avoid test brittleness.
         expected_variables = set(self.rssmif16d_variables)
 
-        actual_variables = set([record['LongName']
+        actual_variables = set([record['Name']
                                 for record in generated_umm_var])
 
         self.assertSetEqual(actual_variables, expected_variables)

diff --git a/tests/unit/test_umm_var.py b/tests/unit/test_umm_var.py
@@ -188,6 +188,44 @@ def test_get_umm_var_all_fields(self):
             r'https://cdn.earthdata.nasa.gov/umm/variable/v\d+\.\d+\.\d+$'
         )
 
+    def test_get_umm_var_cf_long_name(self):
+        """ Ensure that if a variable in a granule has the `long_name`
+            CF-Convention attribute, the value of that attribute is used in
+            place of the variable full path. Otherwise, the `LongName` in the
+            UMM-Var record should default to that full path.
+
+        """
+        netcdf4_file = f'{self.tmp_dir}/input.nc4'
+        with Dataset(netcdf4_file, 'w') as dataset:
+            dataset.setncatts({'short_name': 'test'})
+            long_name_variable = dataset.createVariable('long_name', float64)
+            long_name_variable.setncatts({'long_name': 'this is really long'})
+
+            no_long_name_variable = dataset.createVariable('no_long_name',
+                                                           float64)
+
+        nc_varinfo = VarInfoFromNetCDF4(netcdf4_file)
+
+        with self.subTest('CF-Convention long_name is used'):
+            long_name_umm_var = get_umm_var(
+                nc_varinfo, nc_varinfo.get_variable('/long_name')
+            )
+
+            self.assertEqual(
+                long_name_umm_var['LongName'],
+                'this is really long'
+            )
+
+        with self.subTest('No CF-Convention long_name attribute'):
+            no_long_name_umm_var = get_umm_var(
+                nc_varinfo, nc_varinfo.get_variable('/no_long_name')
+            )
+
+            self.assertEqual(
+                no_long_name_umm_var['LongName'],
+                'no_long_name'
+            )
+
     def test_get_umm_var_absent_fields_removed(self):
         """ Ensure that a variable with minimal information populates the
             minimum fields required by the UMM-Var schema, and that any fields
@@ -781,8 +819,8 @@ def test_publish_umm_var(self, mock_requests_put):
 
         # Input parameters
         umm_var_dict = {
-            'LongName': 'test_variable',
-            'Name': 'Test',
+            'LongName': 'This is a test variable',
+            'Name': 'test_variable',
             'MetadataSpecification': {
                 'URL': 'https://foo.gov/umm/variable/v1.8.2',
                 'Name': 'UMM-Var',
@@ -931,21 +969,21 @@ def test_generate_variable_native_id(self):
 
         """
         with self.subTest('Variable in flat file'):
-            umm_var_json = {'LongName': 'time'}
+            umm_var_json = {'Name': 'time'}
             self.assertEqual(
                 generate_variable_native_id('C1234567890-PROV', umm_var_json),
                 'C1234567890-PROV-time'
             )
 
         with self.subTest('Variable in hierarchical file'):
-            umm_var_json = {'LongName': 'Grid/time'}
+            umm_var_json = {'Name': 'Grid/time'}
             self.assertEqual(
                 generate_variable_native_id('C1234567890-PROV', umm_var_json),
                 'C1234567890-PROV-Grid_time'
             )
 
         with self.subTest('No leading slashes affect name.'):
-            umm_var_json = {'LongName': '/Grid/time'}
+            umm_var_json = {'Name': '/Grid/time'}
             self.assertEqual(
                 generate_variable_native_id('C1234567890-PROV', umm_var_json),
                 'C1234567890-PROV-Grid_time'

diff --git a/varinfo/umm_var.py b/varinfo/umm_var.py
@@ -73,7 +73,8 @@ def get_umm_var(var_info: VarInfoBase, variable: VariableBase) -> Dict:
     variable_name = variable.full_name_path.lstrip('/')
     umm_var_record = {
         'Name': variable_name,
-        'LongName': variable_name,
+        'LongName': get_first_matched_attribute(variable, ['long_name'],
+                                                variable_name),
         'StandardName': get_first_matched_attribute(variable,
                                                     ['standard_name']),
         'Definition': get_first_matched_attribute(
@@ -349,12 +350,15 @@ def generate_variable_native_id(collection_concept_id: str,
     """ A helper function to create a CMR native ID given the collection
         concept ID and the variable UMM-Var JSON. This native ID must be unique
         within the entire provider. The initial implementation will be to
-        concatenate the collection concept ID and the long name of the variable
+        concatenate the collection concept ID and the name of the variable
         while removing slashes that CMR will interpret as part of the URL path.
 
+        Note - the `Name` attribute of the generated UMM-Var record is the full
+        path of the variable.
+
     """
     return '-'.join([collection_concept_id,
-                     umm_var_record['LongName'].replace('/', '_').lstrip('_')])
+                     umm_var_record['Name'].replace('/', '_').lstrip('_')])
 
 
 def get_variable_type(var_info: VarInfoBase, variable: VariableBase) -> str: