Skip to content

Commit

Permalink
DAS-2231 Adds ability to get attributes for missing variables and the…
Browse files Browse the repository at this point in the history
… ability to get referenced variables (#34)

* Two methods added to support SMAP L3 products that are not CF compliant

* DAS-2231 - updates to unit tests

* DAS-2231 - Changes made based on PR feedback

* DAS-2231 - corrected a couple of issues from feedback that was missed in the last commit

* DAS-2231 - corrected a smalll issue from feedback that was missed

* DAS-2231 - changed absent_override to absent_variable based on PR feedback
  • Loading branch information
sudha-murthy authored Aug 27, 2024
1 parent fdb7260 commit bccf65b
Show file tree
Hide file tree
Showing 7 changed files with 103 additions and 8 deletions.
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@

## v2.3.0
### 2024-08-26

The VarInfoBase.get_missing_variable_attributes method has been added to allow
someone to get metadata attributes from the configuration file for variables
that are absent from a file. An example usage is when a CF Convention grid
mapping variable is missing from a source file.
The VarInfoBase.get_references_for_attribute method has been added to retrieve
all unique variable references contained in a single metadata attribute for a
list of variables. For example, retrieving all references listed under the
coordinates metadata attribute.

## v2.2.2
### 2024-07-16

Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.2.2
2.3.0
4 changes: 2 additions & 2 deletions tests/unit/data/test_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@
},
{
"Applicability": {
"Variable_Pattern": "/absent_override"
"Variable_Pattern": "/absent_variable"
},
"Attributes": [
{
Expand Down Expand Up @@ -193,7 +193,7 @@
},
{
"Applicability": {
"Variable_Pattern": "/absent_override"
"Variable_Pattern": "/absent_variable"
},
"Attributes": [
{
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/test_cf_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ def setUpClass(cls):
cls.global_supplements = {'fakesat_global_supplement': 'fakesat value'}
cls.cf_overrides = {
'.*': {'collection_override': 'collection value'},
'/absent_override': {'extra_override': 'overriding value'},
'/absent_variable': {'extra_override': 'overriding value'},
'/coordinates_group/.*': {'coordinates': 'lat, lon'},
'/group/.*': {'group_override': 'group value'},
'/group/variable': {'variable_override': 'variable value'},
}
cls.cf_supplements = {
'.*': {'collection_supplement': 'FAKE99 supplement'},
'/absent_override': {'extra_override': 'supplemental value'},
'/absent_variable': {'extra_override': 'supplemental value'},
'/absent_supplement': {'extra_supplement': 'supplemental value'},
'/group4/.*': {'group_supplement': 'FAKE99 group4'},
}
Expand Down
45 changes: 45 additions & 0 deletions tests/unit/test_var_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -862,3 +862,48 @@ def test_is_science_variable(self):
self.assertFalse(dataset.is_science_variable(lat_variable))
# Check that a science variable returns True
self.assertTrue(dataset.is_science_variable(science_variable))

def test_get_missing_variable_attributes(self):
"""Ensure that CF attributes for a variable is returned even if
the variable is not present in the source granule or dmrpp file
as long as there is a CF override for the variable defined in the
config json file
"""
dataset = VarInfoFromDmr(
self.mock_dmr_two, 'FAKE99', config_file=self.config_file
)

with self.subTest('All CF attributes are retrieved for missing variable'):
self.assertDictEqual(
dataset.get_missing_variable_attributes('/absent_variable'),
{
'collection_supplement': 'FAKE99 supplement',
'collection_override': 'collection value',
'extra_override': 'overriding value',
},
)

def test_get_references_for_attribute(self):
"""Ensure that a complete set of unique references are
returned when requesting all references present in the
metadata attribute for the given list of variables.
"""
dmr_path = 'tests/unit/data/GPM_3IMERGHH_example.dmr'
dataset = VarInfoFromDmr(dmr_path, 'GPM_3IMERGHH', config_file=self.config_file)
with self.subTest('All coordinate references for a variable'):
self.assertSetEqual(
dataset.get_references_for_attribute(
['/Grid/precipitationCal'], 'coordinates'
),
{'/Grid/lat', '/Grid/time', '/Grid/lon'},
)

with self.subTest('All bounds references for the required dimensions'):
self.assertSetEqual(
dataset.get_references_for_attribute(
['/Grid/lat', '/Grid/lon'], 'bounds'
),
{'/Grid/lat_bnds', '/Grid/lon_bnds'},
)
4 changes: 2 additions & 2 deletions tests/unit/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,12 +256,12 @@ def test_variable_cf_override_absent(self):
"""
dmr_variable = ET.fromstring(
f'<{self.namespace}Float64 name="absent_override">'
f'<{self.namespace}Float64 name="absent_variable">'
f'</{self.namespace}Float64>'
)

variable = VariableFromDmr(
dmr_variable, self.fakesat_config, self.namespace, '/absent_override'
dmr_variable, self.fakesat_config, self.namespace, '/absent_variable'
)

self.assertEqual(variable.attributes.get('extra_override'), 'overriding value')
Expand Down
40 changes: 39 additions & 1 deletion varinfo/var_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from abc import ABC, abstractmethod
from os.path import exists
from typing import Dict, Optional, Set, Tuple, Union
from typing import Dict, Optional, Set, Tuple, Union, Any
import json
import re
import xml.etree.ElementTree as ET
Expand Down Expand Up @@ -362,6 +362,44 @@ def get_required_dimensions(self, variables: Set[str]) -> Set[str]:
if self.get_variable(dimension) is not None
)

def get_missing_variable_attributes(self, variable_name: str) -> dict[str, Any]:
"""Return a dictionary of all attributes for a variable that is not present
in the granule file (e.g. grid_mapping variable in collections that are not
fully CF compliant). The metadata attributes and the overrides for the
variables would need to be in the configuration file.
"""
variable_attributes = self.cf_config.get_cf_attributes(variable_name)
return (
variable_attributes['cf_supplements'] | variable_attributes['cf_overrides']
)

def get_references_for_attribute(
self, list_of_variables: list[str], reference_attribute_name: str
) -> set[str]:
"""Return a single set of all references in a specific metadata attribute
for a list of variables (e.g. bounds, coordinates, cf attributes). The full
list of supported metadata attributes can be found in
varinfo.utilities::CF_REFERENCE_ATTRIBUTES
Iterate through all requested variables and extract a list of
references for the metadata attribute. This will produce a list of lists,
which should be flattened into a single list and then combined into a set
to remove duplicates.
"""
reference_set = [
self.get_variable(variable).references.get(reference_attribute_name)
for variable in list_of_variables
if self.get_variable(variable).references.get(reference_attribute_name)
is not None
]
return set(
variable_reference
for variable_references in reference_set
for variable_reference in variable_references
)

def get_spatial_dimensions(self, variables: Set[str]) -> Set[str]:
"""Return a single set of all variables that are both used as
dimensions for any of the input variables, and that are horizontal
Expand Down

0 comments on commit bccf65b

Please sign in to comment.