Skip to content

Commit

Permalink
Merge pull request NCAR#317 from justin-richling/dataclass-fixes
Browse files Browse the repository at this point in the history
Dataclass fixes
  • Loading branch information
justin-richling authored Aug 7, 2024
2 parents 157fc6f + 7b844bc commit 9a94260
Show file tree
Hide file tree
Showing 2 changed files with 172 additions and 95 deletions.
247 changes: 160 additions & 87 deletions lib/adf_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def set_reference(self):
self.ref_var_loc = {v: self.adf.var_obs_dict[v]['obs_file'] for v in self.adf.var_obs_dict}
self.ref_labels = {v: self.adf.var_obs_dict[v]['obs_name'] for v in self.adf.var_obs_dict}
self.ref_var_nam = {v: self.adf.var_obs_dict[v]['obs_var'] for v in self.adf.var_obs_dict}
self.ref_case_label = "Obs"
if not self.adf.var_obs_dict:
warnings.warn("\t WARNING: reference is observations, but no observations found to plot against.")
else:
Expand All @@ -76,97 +77,34 @@ def set_reference(self):
# when using a reference simulation, allow a "special" attribute with the case name:
self.ref_case_label = self.adf.get_baseline_info("cam_case_name", required=True)
for v in self.adf.diag_var_list:
self.ref_var_nam[v] = v
self.ref_labels[v] = self.adf.get_baseline_info("cam_case_name", required=True)
f = self.get_reference_climo_file(v)
if f is None:
warnings.warn(f"\t WARNING: ADFData found no reference climo file for {v}")
continue
else:
if f:
self.ref_var_loc[v] = f
self.ref_var_nam[v] = v
self.ref_labels[v] = self.adf.get_baseline_info("cam_case_name", required=True)

def get_reference_climo_file(self, var):
"""Return a list of files to be used as reference (aka baseline) for variable var."""
if self.adf.compare_obs:
fils = self.ref_var_loc.get(var, None)
return [fils] if fils is not None else None
ref_loc = self.adf.get_baseline_info("cam_climo_loc")
# NOTE: originally had this looking for *_baseline.nc
fils = sorted(Path(ref_loc).glob(f"{self.ref_case_label}_{var}_climo.nc"))
if fils:
return fils
return None

def load_reference_dataset(self, var):
fils = self.get_reference_climo_file(var)
if not fils:
warnings.warn(f"ERROR: Did not find any reference files for variable: {var}. Will try to skip.")
return None
return self.load_dataset(fils)

def load_reference_da(self, variablename):
da = self.load_reference_dataset(variablename)[self.ref_var_nam[variablename]]
if variablename in self.adf.variable_defaults:
vres = self.adf.variable_defaults[variablename]
if self.adf.compare_obs:
scale_factor = vres.get("obs_scale_factor",1)
add_offset = vres.get("obs_add_offset", 0)
else:
scale_factor = vres.get("scale_factor",1)
add_offset = vres.get("add_offset", 0)
da = da * scale_factor + add_offset
da.attrs['units'] = vres.get("new_unit", da.attrs.get('units', 'none'))
return da


def load_reference_regrid_dataset(self, case, field):
fils = self.get_ref_regrid_file(case, field)
if not fils:
warnings.warn(f"ERROR: Did not find regrid file(s) for case: {case}, variable: {field}")
return None
return self.load_dataset(fils)


def load_reference_regrid_da(self, case, field):
fils = self.get_ref_regrid_file(case, field)
if not fils:
warnings.warn(f"ERROR: Did not find regrid file(s) for case: {case}, variable: {field}")
return None
return self.load_da(fils, field)


def load_climo_da(self, case, variablename):
"""Return DataArray from climo file"""
fils = self.get_climo_file(case, variablename)
return self.load_da(fils, variablename)
def set_ref_var_loc(self):
"""Set reference climo file locations"""
for v in self.adf.diag_var_list:
f = self.get_reference_climo_file(v)
self.ref_var_loc[v] = f


def load_climo_file(self, case, variablename):
"""Return Dataset for climo of variablename"""
fils = self.get_climo_file(case, variablename)
if not fils:
warnings.warn(f"ERROR: Did not find climo file for variable: {variablename}. Will try to skip.")
return None
return self.load_dataset(fils)


def get_climo_file(self, case, variablename):
"""Retrieve the climo file path(s) for variablename for a specific case."""
a = self.adf.get_cam_info("cam_climo_loc", required=True) # list of paths (could be multiple cases)
caseindex = (self.case_names).index(case) # the entry for specified case
model_cl_loc = Path(a[caseindex])
return sorted(model_cl_loc.glob(f"{case}_{variablename}_climo.nc"))

# Time series files
#------------------
# Test case(s)
def get_timeseries_file(self, case, field):
"""Return list of test time series files"""
ts_locs = self.adf.get_cam_info("cam_ts_loc", required=True) # list of paths (could be multiple cases)
caseindex = (self.case_names).index(case)
ts_loc = Path(ts_locs[caseindex])
ts_filenames = f'{case}.*.{field}.*nc'
ts_files = sorted(ts_loc.glob(ts_filenames))
return ts_files


# Reference case (baseline/obs)
def get_ref_timeseries_file(self, field):
"""Return list of reference time series files"""
if self.adf.compare_obs:
return None
else:
Expand All @@ -177,6 +115,7 @@ def get_ref_timeseries_file(self, field):


def load_timeseries_dataset(self, fils):
"""Return DataSet from time series file(s) and assign time to midpoint of interval"""
if (len(fils) == 0):
warnings.warn("Input file list is empty.")
return None
Expand All @@ -203,32 +142,130 @@ def load_timeseries_dataset(self, fils):
warnings.warn("Timeseries file does not have time bounds info.")
return xr.decode_cf(ds)

def get_ref_regrid_file(self, case, field):
model_rg_loc = Path(self.adf.get_basic_info("cam_regrid_loc", required=True))
return sorted(model_rg_loc.glob(f"{case}_{field}_*.nc"))

#------------------


# Climatology files
#------------------

# Test case(s)
def load_climo_da(self, case, variablename):
"""Return DataArray from climo file"""
add_offset, scale_factor = self.get_value_converters(case, variablename)
fils = self.get_climo_file(case, variablename)
return self.load_da(fils, variablename, add_offset=add_offset, scale_factor=scale_factor)


def load_climo_file(self, case, variablename):
"""Return Dataset for climo of variablename"""
fils = self.get_climo_file(case, variablename)
if not fils:
warnings.warn(f"WARNING: Did not find climo file for variable: {variablename}. Will try to skip.")
return None
return self.load_dataset(fils)


def get_climo_file(self, case, variablename):
"""Retrieve the climo file path(s) for variablename for a specific case."""
a = self.adf.get_cam_info("cam_climo_loc", required=True) # list of paths (could be multiple cases)
caseindex = (self.case_names).index(case) # the entry for specified case
model_cl_loc = Path(a[caseindex])
return sorted(model_cl_loc.glob(f"{case}_{variablename}_climo.nc"))


# Reference case (baseline/obs)
def get_reference_climo_file(self, var):
"""Return a list of files to be used as reference (aka baseline) for variable var."""
if self.adf.compare_obs:
fils = self.ref_var_loc.get(var, None)
return [fils] if fils is not None else None
ref_loc = self.adf.get_baseline_info("cam_climo_loc")
# NOTE: originally had this looking for *_baseline.nc
fils = sorted(Path(ref_loc).glob(f"{self.ref_case_label}_{var}_climo.nc"))
if fils:
return fils
return None

#------------------


# Regridded files
#------------------

# Test case(s)
def get_regrid_file(self, case, field):
"""Return list of test regridded files"""
model_rg_loc = Path(self.adf.get_basic_info("cam_regrid_loc", required=True))
rlbl = self.ref_labels[field] # rlbl = "reference label" = the name of the reference data that defines target grid
return sorted(model_rg_loc.glob(f"{rlbl}_{case}_{field}_*.nc"))


def load_regrid_dataset(self, case, field):
"""Return a data set to be used as reference (aka baseline) for variable field."""
fils = self.get_regrid_file(case, field)
if not fils:
warnings.warn(f"ERROR: Did not find regrid file(s) for case: {case}, variable: {field}")
warnings.warn(f"WARNING: Did not find regrid file(s) for case: {case}, variable: {field}")
return None
return self.load_dataset(fils)


def load_regrid_da(self, case, field):
"""Return a data array to be used as reference (aka baseline) for variable field."""
add_offset, scale_factor = self.get_value_converters(case, field)
fils = self.get_regrid_file(case, field)
if not fils:
warnings.warn(f"ERROR: Did not find regrid file(s) for case: {case}, variable: {field}")
warnings.warn(f"WARNING: Did not find regrid file(s) for case: {case}, variable: {field}")
return None
return self.load_da(fils, field)
return self.load_da(fils, field, add_offset=add_offset, scale_factor=scale_factor)


# Reference case (baseline/obs)
def get_ref_regrid_file(self, case, field):
"""Return list of reference regridded files"""
if self.adf.compare_obs:
obs_loc = self.ref_var_loc.get(field, None)
if obs_loc:
fils = [str(obs_loc)]
else:
fils = []
else:
model_rg_loc = Path(self.adf.get_basic_info("cam_regrid_loc", required=True))
fils = sorted(model_rg_loc.glob(f"{case}_{field}_*.nc"))
return fils


def load_reference_regrid_dataset(self, case, field):
"""Return a data set to be used as reference (aka baseline) for variable field."""
fils = self.get_ref_regrid_file(case, field)
if not fils:
warnings.warn(f"WARNING: Did not find regridded file(s) for case: {case}, variable: {field}")
return None
return self.load_dataset(fils)


def load_reference_regrid_da(self, case, field):
"""Return a data array to be used as reference (aka baseline) for variable field."""
add_offset, scale_factor = self.get_value_converters(case, field)
fils = self.get_ref_regrid_file(case, field)
if not fils:
warnings.warn(f"WARNING: Did not find regridded file(s) for case: {case}, variable: {field}")
return None
#Change the variable name from CAM standard to what is
# listed in variable defaults for this observation field
if self.adf.compare_obs:
field = self.ref_var_nam[field]
return self.load_da(fils, field, add_offset=add_offset, scale_factor=scale_factor)

#------------------


# DataSet and DataArray load
#---------------------------

# Load DataSet
def load_dataset(self, fils):
"""Return xarray DataSet from file(s)"""
if (len(fils) == 0):
warnings.warn("Input file list is empty.")
return None
Expand All @@ -244,15 +281,51 @@ def load_dataset(self, fils):
warnings.warn(f"invalid data on load_dataset")
return ds


def load_da(self, fils, variablename):
# Load DataArray
def load_da(self, fils, variablename, **kwargs):
"""Return xarray DataArray from files(s) w/ optional scale factor, offset, and/or new units"""
ds = self.load_dataset(fils)
if ds is None:
warnings.warn(f"ERROR: Load failed for {variablename}")
warnings.warn(f"WARNING: Load failed for {variablename}")
return None
da = (ds[variablename]).squeeze()
scale_factor = kwargs.get('scale_factor', 1)
add_offset = kwargs.get('add_offset', 0)
da = da * scale_factor + add_offset
if variablename in self.adf.variable_defaults:
vres = self.adf.variable_defaults[variablename]
da = da * vres.get("scale_factor",1) + vres.get("add_offset", 0)
da.attrs['units'] = vres.get("new_unit", da.attrs.get('units', 'none'))
else:
da.attrs['units'] = 'none'
return da

# Get variable conversion defaults, if applicable
def get_value_converters(self, case, variablename):
"""
Get variable defaults if applicable
- This is to get any scale factors or off-sets
Returns
-------
add_offset - int/float
scale_factor - int/float
"""
add_offset = 0
scale_factor = 1
res = self.adf.variable_defaults
if variablename in res:
vres = res[variablename]
if (case == self.ref_labels[variablename]) and (self.adf.compare_obs):
scale_factor = vres.get("obs_scale_factor",1)
add_offset = vres.get("obs_add_offset", 0)
else:
scale_factor = vres.get("scale_factor",1)
add_offset = vres.get("add_offset", 0)
return add_offset, scale_factor

#------------------




20 changes: 12 additions & 8 deletions lib/adf_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,9 @@ def __init__(self, config_file, debug=False):
#that check this variable won't crash:
self.__cam_bl_climo_info = None

# Set baseline hist string object to None
self.__base_hist_str = None

#Also set data name for use below:
data_name = "Obs"
base_nickname = "Obs"
Expand Down Expand Up @@ -778,16 +781,17 @@ def get_climo_yrs_from_ts(self, input_ts_loc, case_name):
errmsg = f"Time series directory '{input_ts_loc}' not found. Script is exiting."
raise AdfError(errmsg)

# Search for first variable in var_list to get a time series file to read
# Search for first available variable in var_list to get a time series file to read
# NOTE: it is assumed all the variables have the same dates!
# Also, it is assumed that only h0 files should be climo-ed.
ts_files = sorted(input_location.glob(f"{case_name}*h0*.{var_list[0]}.*nc"))

#Read hist_str (component.hist_num) from the yaml file, or set to default
hist_str = self.get_basic_info('hist_str')
#If hist_str is not present, then default to 'cam.h0':
if not hist_str:
hist_str = 'cam.h0'
for var in var_list:
ts_files = sorted(input_location.glob(f"{case_name}*h0*.{var}.*nc"))
if ts_files:
break
else:
logmsg = "get years for time series:"
logmsg = f"\tVar '{var}' not in dataset, skip to next to try and find climo years..."
self.debug_log(logmsg)

#Read in file(s)
if len(ts_files) == 1:
Expand Down

0 comments on commit 9a94260

Please sign in to comment.