Skip to content

Commit

Permalink
Use CF-Convention long_name in UMM-Var.
Browse files Browse the repository at this point in the history
  • Loading branch information
owenlittlejohns authored Nov 20, 2023
1 parent 0616125 commit 00600b4
Show file tree
Hide file tree
Showing 7 changed files with 65 additions and 12 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/run_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ '3.9', '3.10', '3.11', '3.x' ]
python-version: [ '3.9', '3.10', '3.11' ]
fail-fast: false

steps:
Expand Down
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,20 @@
## v2.1.2
### 2023-11-14

This version of `earthdata-varinfo` updates the value of the `LongName`
attribute in generated UMM-Var records to use the value of the CF-Convention
`long_name` attribute for a variable, if it is present in the file. If this
attribute is not present in the in-file metadata, then the full path to the
variable (without the leading `/`) is used as before.

## v2.1.1
### 2023-10-24

Fixed deployment issues

## v2.1.0
### 2023-10-20

This version of `earthdata-varinfo` improves the functionality of the
`varinfo.get_science_variables` function with `varinfo.is_science_variable()` method.
This method returns true if a variable is a science variable by checking if
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ Expected outputs:
Native IDs for generated UMM-Var records will be of format:

```
<collection concept ID>-<variable LongName>
<collection concept ID>-<variable Name>
```

For variables that are hierarchical, slashes will be converted to underscores,
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.1.1
2.1.2
2 changes: 1 addition & 1 deletion tests/unit/test_generate_umm_var.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def test_generate_collection_umm_var_no_publication(self,
# not performed to avoid test brittleness.
expected_variables = set(self.rssmif16d_variables)

actual_variables = set([record['LongName']
actual_variables = set([record['Name']
for record in generated_umm_var])

self.assertSetEqual(actual_variables, expected_variables)
Expand Down
48 changes: 43 additions & 5 deletions tests/unit/test_umm_var.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,44 @@ def test_get_umm_var_all_fields(self):
r'https://cdn.earthdata.nasa.gov/umm/variable/v\d+\.\d+\.\d+$'
)

def test_get_umm_var_cf_long_name(self):
""" Ensure that if a variable in a granule has the `long_name`
CF-Convention attribute, the value of that attribute is used in
place of the variable full path. Otherwise, the `LongName` in the
UMM-Var record should default to that full path.
"""
netcdf4_file = f'{self.tmp_dir}/input.nc4'
with Dataset(netcdf4_file, 'w') as dataset:
dataset.setncatts({'short_name': 'test'})
long_name_variable = dataset.createVariable('long_name', float64)
long_name_variable.setncatts({'long_name': 'this is really long'})

no_long_name_variable = dataset.createVariable('no_long_name',
float64)

nc_varinfo = VarInfoFromNetCDF4(netcdf4_file)

with self.subTest('CF-Convention long_name is used'):
long_name_umm_var = get_umm_var(
nc_varinfo, nc_varinfo.get_variable('/long_name')
)

self.assertEqual(
long_name_umm_var['LongName'],
'this is really long'
)

with self.subTest('No CF-Convention long_name attribute'):
no_long_name_umm_var = get_umm_var(
nc_varinfo, nc_varinfo.get_variable('/no_long_name')
)

self.assertEqual(
no_long_name_umm_var['LongName'],
'no_long_name'
)

def test_get_umm_var_absent_fields_removed(self):
""" Ensure that a variable with minimal information populates the
minimum fields required by the UMM-Var schema, and that any fields
Expand Down Expand Up @@ -781,8 +819,8 @@ def test_publish_umm_var(self, mock_requests_put):

# Input parameters
umm_var_dict = {
'LongName': 'test_variable',
'Name': 'Test',
'LongName': 'This is a test variable',
'Name': 'test_variable',
'MetadataSpecification': {
'URL': 'https://foo.gov/umm/variable/v1.8.2',
'Name': 'UMM-Var',
Expand Down Expand Up @@ -931,21 +969,21 @@ def test_generate_variable_native_id(self):
"""
with self.subTest('Variable in flat file'):
umm_var_json = {'LongName': 'time'}
umm_var_json = {'Name': 'time'}
self.assertEqual(
generate_variable_native_id('C1234567890-PROV', umm_var_json),
'C1234567890-PROV-time'
)

with self.subTest('Variable in hierarchical file'):
umm_var_json = {'LongName': 'Grid/time'}
umm_var_json = {'Name': 'Grid/time'}
self.assertEqual(
generate_variable_native_id('C1234567890-PROV', umm_var_json),
'C1234567890-PROV-Grid_time'
)

with self.subTest('No leading slashes affect name.'):
umm_var_json = {'LongName': '/Grid/time'}
umm_var_json = {'Name': '/Grid/time'}
self.assertEqual(
generate_variable_native_id('C1234567890-PROV', umm_var_json),
'C1234567890-PROV-Grid_time'
Expand Down
10 changes: 7 additions & 3 deletions varinfo/umm_var.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ def get_umm_var(var_info: VarInfoBase, variable: VariableBase) -> Dict:
variable_name = variable.full_name_path.lstrip('/')
umm_var_record = {
'Name': variable_name,
'LongName': variable_name,
'LongName': get_first_matched_attribute(variable, ['long_name'],
variable_name),
'StandardName': get_first_matched_attribute(variable,
['standard_name']),
'Definition': get_first_matched_attribute(
Expand Down Expand Up @@ -349,12 +350,15 @@ def generate_variable_native_id(collection_concept_id: str,
""" A helper function to create a CMR native ID given the collection
concept ID and the variable UMM-Var JSON. This native ID must be unique
within the entire provider. The initial implementation will be to
concatenate the collection concept ID and the long name of the variable
concatenate the collection concept ID and the name of the variable
while removing slashes that CMR will interpret as part of the URL path.
Note - the `Name` attribute of the generated UMM-Var record is the full
path of the variable.
"""
return '-'.join([collection_concept_id,
umm_var_record['LongName'].replace('/', '_').lstrip('_')])
umm_var_record['Name'].replace('/', '_').lstrip('_')])


def get_variable_type(var_info: VarInfoBase, variable: VariableBase) -> str:
Expand Down

0 comments on commit 00600b4

Please sign in to comment.