diff --git a/lib/adf_dataset.py b/lib/adf_dataset.py index 35d26a865..cb2430853 100644 --- a/lib/adf_dataset.py +++ b/lib/adf_dataset.py @@ -67,6 +67,7 @@ def set_reference(self): self.ref_var_loc = {v: self.adf.var_obs_dict[v]['obs_file'] for v in self.adf.var_obs_dict} self.ref_labels = {v: self.adf.var_obs_dict[v]['obs_name'] for v in self.adf.var_obs_dict} self.ref_var_nam = {v: self.adf.var_obs_dict[v]['obs_var'] for v in self.adf.var_obs_dict} + self.ref_case_label = "Obs" if not self.adf.var_obs_dict: warnings.warn("\t WARNING: reference is observations, but no observations found to plot against.") else: @@ -76,88 +77,24 @@ def set_reference(self): # when using a reference simulation, allow a "special" attribute with the case name: self.ref_case_label = self.adf.get_baseline_info("cam_case_name", required=True) for v in self.adf.diag_var_list: + self.ref_var_nam[v] = v + self.ref_labels[v] = self.adf.get_baseline_info("cam_case_name", required=True) f = self.get_reference_climo_file(v) - if f is None: - warnings.warn(f"\t WARNING: ADFData found no reference climo file for {v}") - continue - else: + if f: self.ref_var_loc[v] = f - self.ref_var_nam[v] = v - self.ref_labels[v] = self.adf.get_baseline_info("cam_case_name", required=True) - def get_reference_climo_file(self, var): - """Return a list of files to be used as reference (aka baseline) for variable var.""" - if self.adf.compare_obs: - fils = self.ref_var_loc.get(var, None) - return [fils] if fils is not None else None - ref_loc = self.adf.get_baseline_info("cam_climo_loc") - # NOTE: originally had this looking for *_baseline.nc - fils = sorted(Path(ref_loc).glob(f"{self.ref_case_label}_{var}_climo.nc")) - if fils: - return fils - return None - - def load_reference_dataset(self, var): - fils = self.get_reference_climo_file(var) - if not fils: - warnings.warn(f"ERROR: Did not find any reference files for variable: {var}. Will try to skip.") - return None - return self.load_dataset(fils) - - def load_reference_da(self, variablename): - da = self.load_reference_dataset(variablename)[self.ref_var_nam[variablename]] - if variablename in self.adf.variable_defaults: - vres = self.adf.variable_defaults[variablename] - if self.adf.compare_obs: - scale_factor = vres.get("obs_scale_factor",1) - add_offset = vres.get("obs_add_offset", 0) - else: - scale_factor = vres.get("scale_factor",1) - add_offset = vres.get("add_offset", 0) - da = da * scale_factor + add_offset - da.attrs['units'] = vres.get("new_unit", da.attrs.get('units', 'none')) - return da - - - def load_reference_regrid_dataset(self, case, field): - fils = self.get_ref_regrid_file(case, field) - if not fils: - warnings.warn(f"ERROR: Did not find regrid file(s) for case: {case}, variable: {field}") - return None - return self.load_dataset(fils) - - - def load_reference_regrid_da(self, case, field): - fils = self.get_ref_regrid_file(case, field) - if not fils: - warnings.warn(f"ERROR: Did not find regrid file(s) for case: {case}, variable: {field}") - return None - return self.load_da(fils, field) - - - def load_climo_da(self, case, variablename): - """Return DataArray from climo file""" - fils = self.get_climo_file(case, variablename) - return self.load_da(fils, variablename) + def set_ref_var_loc(self): + """Set reference climo file locations""" + for v in self.adf.diag_var_list: + f = self.get_reference_climo_file(v) + self.ref_var_loc[v] = f - - def load_climo_file(self, case, variablename): - """Return Dataset for climo of variablename""" - fils = self.get_climo_file(case, variablename) - if not fils: - warnings.warn(f"ERROR: Did not find climo file for variable: {variablename}. Will try to skip.") - return None - return self.load_dataset(fils) - - def get_climo_file(self, case, variablename): - """Retrieve the climo file path(s) for variablename for a specific case.""" - a = self.adf.get_cam_info("cam_climo_loc", required=True) # list of paths (could be multiple cases) - caseindex = (self.case_names).index(case) # the entry for specified case - model_cl_loc = Path(a[caseindex]) - return sorted(model_cl_loc.glob(f"{case}_{variablename}_climo.nc")) - + # Time series files + #------------------ + # Test case(s) def get_timeseries_file(self, case, field): + """Return list of test time series files""" ts_locs = self.adf.get_cam_info("cam_ts_loc", required=True) # list of paths (could be multiple cases) caseindex = (self.case_names).index(case) ts_loc = Path(ts_locs[caseindex]) @@ -165,8 +102,9 @@ def get_timeseries_file(self, case, field): ts_files = sorted(ts_loc.glob(ts_filenames)) return ts_files - + # Reference case (baseline/obs) def get_ref_timeseries_file(self, field): + """Return list of reference time series files""" if self.adf.compare_obs: return None else: @@ -177,6 +115,7 @@ def get_ref_timeseries_file(self, field): def load_timeseries_dataset(self, fils): + """Return DataSet from time series file(s) and assign time to midpoint of interval""" if (len(fils) == 0): warnings.warn("Input file list is empty.") return None @@ -203,32 +142,130 @@ def load_timeseries_dataset(self, fils): warnings.warn("Timeseries file does not have time bounds info.") return xr.decode_cf(ds) - def get_ref_regrid_file(self, case, field): - model_rg_loc = Path(self.adf.get_basic_info("cam_regrid_loc", required=True)) - return sorted(model_rg_loc.glob(f"{case}_{field}_*.nc")) + + #------------------ + + + # Climatology files + #------------------ + + # Test case(s) + def load_climo_da(self, case, variablename): + """Return DataArray from climo file""" + add_offset, scale_factor = self.get_value_converters(case, variablename) + fils = self.get_climo_file(case, variablename) + return self.load_da(fils, variablename, add_offset=add_offset, scale_factor=scale_factor) + + + def load_climo_file(self, case, variablename): + """Return Dataset for climo of variablename""" + fils = self.get_climo_file(case, variablename) + if not fils: + warnings.warn(f"WARNING: Did not find climo file for variable: {variablename}. Will try to skip.") + return None + return self.load_dataset(fils) + + + def get_climo_file(self, case, variablename): + """Retrieve the climo file path(s) for variablename for a specific case.""" + a = self.adf.get_cam_info("cam_climo_loc", required=True) # list of paths (could be multiple cases) + caseindex = (self.case_names).index(case) # the entry for specified case + model_cl_loc = Path(a[caseindex]) + return sorted(model_cl_loc.glob(f"{case}_{variablename}_climo.nc")) + + + # Reference case (baseline/obs) + def get_reference_climo_file(self, var): + """Return a list of files to be used as reference (aka baseline) for variable var.""" + if self.adf.compare_obs: + fils = self.ref_var_loc.get(var, None) + return [fils] if fils is not None else None + ref_loc = self.adf.get_baseline_info("cam_climo_loc") + # NOTE: originally had this looking for *_baseline.nc + fils = sorted(Path(ref_loc).glob(f"{self.ref_case_label}_{var}_climo.nc")) + if fils: + return fils + return None + + #------------------ + + # Regridded files + #------------------ + # Test case(s) def get_regrid_file(self, case, field): + """Return list of test regridded files""" model_rg_loc = Path(self.adf.get_basic_info("cam_regrid_loc", required=True)) rlbl = self.ref_labels[field] # rlbl = "reference label" = the name of the reference data that defines target grid return sorted(model_rg_loc.glob(f"{rlbl}_{case}_{field}_*.nc")) + def load_regrid_dataset(self, case, field): + """Return a data set to be used as reference (aka baseline) for variable field.""" fils = self.get_regrid_file(case, field) if not fils: - warnings.warn(f"ERROR: Did not find regrid file(s) for case: {case}, variable: {field}") + warnings.warn(f"WARNING: Did not find regrid file(s) for case: {case}, variable: {field}") return None return self.load_dataset(fils) + def load_regrid_da(self, case, field): + """Return a data array to be used as reference (aka baseline) for variable field.""" + add_offset, scale_factor = self.get_value_converters(case, field) fils = self.get_regrid_file(case, field) if not fils: - warnings.warn(f"ERROR: Did not find regrid file(s) for case: {case}, variable: {field}") + warnings.warn(f"WARNING: Did not find regrid file(s) for case: {case}, variable: {field}") return None - return self.load_da(fils, field) + return self.load_da(fils, field, add_offset=add_offset, scale_factor=scale_factor) + # Reference case (baseline/obs) + def get_ref_regrid_file(self, case, field): + """Return list of reference regridded files""" + if self.adf.compare_obs: + obs_loc = self.ref_var_loc.get(field, None) + if obs_loc: + fils = [str(obs_loc)] + else: + fils = [] + else: + model_rg_loc = Path(self.adf.get_basic_info("cam_regrid_loc", required=True)) + fils = sorted(model_rg_loc.glob(f"{case}_{field}_*.nc")) + return fils + + + def load_reference_regrid_dataset(self, case, field): + """Return a data set to be used as reference (aka baseline) for variable field.""" + fils = self.get_ref_regrid_file(case, field) + if not fils: + warnings.warn(f"WARNING: Did not find regridded file(s) for case: {case}, variable: {field}") + return None + return self.load_dataset(fils) + + + def load_reference_regrid_da(self, case, field): + """Return a data array to be used as reference (aka baseline) for variable field.""" + add_offset, scale_factor = self.get_value_converters(case, field) + fils = self.get_ref_regrid_file(case, field) + if not fils: + warnings.warn(f"WARNING: Did not find regridded file(s) for case: {case}, variable: {field}") + return None + #Change the variable name from CAM standard to what is + # listed in variable defaults for this observation field + if self.adf.compare_obs: + field = self.ref_var_nam[field] + return self.load_da(fils, field, add_offset=add_offset, scale_factor=scale_factor) + + #------------------ + + + # DataSet and DataArray load + #--------------------------- + + # Load DataSet def load_dataset(self, fils): + """Return xarray DataSet from file(s)""" if (len(fils) == 0): warnings.warn("Input file list is empty.") return None @@ -244,15 +281,51 @@ def load_dataset(self, fils): warnings.warn(f"invalid data on load_dataset") return ds - - def load_da(self, fils, variablename): + # Load DataArray + def load_da(self, fils, variablename, **kwargs): + """Return xarray DataArray from files(s) w/ optional scale factor, offset, and/or new units""" ds = self.load_dataset(fils) if ds is None: - warnings.warn(f"ERROR: Load failed for {variablename}") + warnings.warn(f"WARNING: Load failed for {variablename}") return None da = (ds[variablename]).squeeze() + scale_factor = kwargs.get('scale_factor', 1) + add_offset = kwargs.get('add_offset', 0) + da = da * scale_factor + add_offset if variablename in self.adf.variable_defaults: vres = self.adf.variable_defaults[variablename] - da = da * vres.get("scale_factor",1) + vres.get("add_offset", 0) da.attrs['units'] = vres.get("new_unit", da.attrs.get('units', 'none')) + else: + da.attrs['units'] = 'none' return da + + # Get variable conversion defaults, if applicable + def get_value_converters(self, case, variablename): + """ + Get variable defaults if applicable + + - This is to get any scale factors or off-sets + + Returns + ------- + add_offset - int/float + scale_factor - int/float + """ + add_offset = 0 + scale_factor = 1 + res = self.adf.variable_defaults + if variablename in res: + vres = res[variablename] + if (case == self.ref_labels[variablename]) and (self.adf.compare_obs): + scale_factor = vres.get("obs_scale_factor",1) + add_offset = vres.get("obs_add_offset", 0) + else: + scale_factor = vres.get("scale_factor",1) + add_offset = vres.get("add_offset", 0) + return add_offset, scale_factor + + #------------------ + + + + \ No newline at end of file diff --git a/lib/adf_info.py b/lib/adf_info.py index cc5aa3357..a2c1851bf 100644 --- a/lib/adf_info.py +++ b/lib/adf_info.py @@ -163,6 +163,9 @@ def __init__(self, config_file, debug=False): #that check this variable won't crash: self.__cam_bl_climo_info = None + # Set baseline hist string object to None + self.__base_hist_str = None + #Also set data name for use below: data_name = "Obs" base_nickname = "Obs" @@ -778,16 +781,17 @@ def get_climo_yrs_from_ts(self, input_ts_loc, case_name): errmsg = f"Time series directory '{input_ts_loc}' not found. Script is exiting." raise AdfError(errmsg) - # Search for first variable in var_list to get a time series file to read + # Search for first available variable in var_list to get a time series file to read # NOTE: it is assumed all the variables have the same dates! # Also, it is assumed that only h0 files should be climo-ed. - ts_files = sorted(input_location.glob(f"{case_name}*h0*.{var_list[0]}.*nc")) - - #Read hist_str (component.hist_num) from the yaml file, or set to default - hist_str = self.get_basic_info('hist_str') - #If hist_str is not present, then default to 'cam.h0': - if not hist_str: - hist_str = 'cam.h0' + for var in var_list: + ts_files = sorted(input_location.glob(f"{case_name}*h0*.{var}.*nc")) + if ts_files: + break + else: + logmsg = "get years for time series:" + logmsg = f"\tVar '{var}' not in dataset, skip to next to try and find climo years..." + self.debug_log(logmsg) #Read in file(s) if len(ts_files) == 1: