From d423f5a144ea63923f12957808248bfc825f7dd8 Mon Sep 17 00:00:00 2001 From: Yaoping Wang Date: Mon, 29 Mar 2021 00:45:48 -0400 Subject: [PATCH 01/18] add soil moisture data; buggy --- src/ILAMB/ConfSoilMoisture.py | 941 ++++++++++++++++++++++++++++++++++ src/ILAMB/Scoreboard.py | 5 +- src/ILAMB/Variable.py | 45 ++ src/ILAMB/data/sample.cfg | 5 + 4 files changed, 995 insertions(+), 1 deletion(-) create mode 100644 src/ILAMB/ConfSoilMoisture.py diff --git a/src/ILAMB/ConfSoilMoisture.py b/src/ILAMB/ConfSoilMoisture.py new file mode 100644 index 00000000..b5f3815a --- /dev/null +++ b/src/ILAMB/ConfSoilMoisture.py @@ -0,0 +1,941 @@ +from .Confrontation import getVariableList +from .Confrontation import Confrontation +from .constants import earth_rad,mid_months,lbl_months,bnd_months +from .Variable import Variable +from .Regions import Regions +from . import ilamblib as il +from . import Post as post +from netCDF4 import Dataset +from copy import deepcopy +import pylab as plt +import numpy as np +import os,glob,re +from sympy import sympify +from cf_units import Unit + +from mpi4py import MPI +import logging +logger = logging.getLogger("%i" % MPI.COMM_WORLD.rank) + +def VariableReduce(var,region="global",time=None,depth=None,lat=None,lon=None): + ILAMBregions = Regions() + out = deepcopy(var) + out.data.mask += ILAMBregions.getMask(region,out) + if time is not None: + out = out.integrateInTime (t0=time[0] ,tf=time[1] ,mean=True) + if depth is not None and var.layered: + out = out.integrateInDepth(z0=depth[0],zf=depth[1],mean=True) + if lat is not None: + lat0 = np.argmin(np.abs(var.lat-lat[0])) + latf = np.argmin(np.abs(var.lat-lat[1]))+1 + wgt = earth_rad*(np.sin(var.lat_bnds[:,1])-np.sin(var.lat_bnds[:,0]))[lat0:latf] + np.seterr(over='ignore',under='ignore') + out.data = np.ma.average(out.data[...,lat0:latf,:],axis=-2,weights=wgt/wgt.sum()) + np.seterr(over='raise',under='raise') + out.lat = None + out.lat_bnd = None + out.spatial = False + if lon is not None: + lon0 = np.argmin(np.abs(var.lon-lon[0])) + lonf = np.argmin(np.abs(var.lon-lon[1]))+1 + wgt = earth_rad*(var.lon_bnds[:,1]-var.lon_bnds[:,0])[lon0:lonf] + np.seterr(over='ignore',under='ignore') + out.data = np.ma.average(out.data[...,lon0:lonf],axis=-1,weights=wgt/wgt.sum()) + np.seterr(over='raise',under='raise') + out.lon = None + out.lon_bnd = None + out.spatial = False + return out + +def TimeLatBias(ref,com): + # composite depth axis + d0 = max(ref.depth_bnds.min(),com.depth_bnds.min()) + df = min(ref.depth_bnds.max(),com.depth_bnds.max()) + d = np.unique(np.hstack([ref.depth_bnds.flatten(),com.depth_bnds.flatten()])) + d = d[(d>=d0)*(d<=df)] + db = np.asarray([d[:-1],d[1:]]).T + d = db.mean(axis=1) + # composite lat axis + l0 = max(ref.lat_bnds.min(),com.lat_bnds.min()) + lf = min(ref.lat_bnds.max(),com.lat_bnds.max()) + l = np.unique(np.hstack([ref.lat_bnds.flatten(),com.lat_bnds.flatten()])) + l = l[(l>=l0)*(l<=lf)] + lb = np.asarray([l[:-1],l[1:]]).T + l = lb.mean(axis=1) + # interpolation / difference + data = il.NearestNeighborInterpolation(com.depth,com.lat,com.data,d,l) + data -= il.NearestNeighborInterpolation(ref.depth,ref.lat,ref.data,d,l) + area = np.diff(db)[:,np.newaxis] * (earth_rad*(np.sin(lb[:,1])-np.sin(lb[:,0]))) + return Variable(name = ref.name.replace("timelonint","timelonbias"), + unit = ref.unit, + data = data, + area = area, + lat = l, + depth = d, + lat_bnds = lb, + depth_bnds = db) + +def CycleBias(ref,com): + # composite depth axis + d0 = max(ref.depth_bnds.min(),com.depth_bnds.min()) + df = min(ref.depth_bnds.max(),com.depth_bnds.max()) + d = np.unique(np.hstack([ref.depth_bnds.flatten(),com.depth_bnds.flatten()])) + d = d[(d>=d0)*(d<=df)] + db = np.asarray([d[:-1],d[1:]]).T + d = db.mean(axis=1) + # interpolation / difference + data = il.NearestNeighborInterpolation(com.time,com.depth,com.data,com.time,d) + data -= il.NearestNeighborInterpolation(ref.time,ref.depth,ref.data,ref.time,d) + return Variable(name = ref.name.replace("cycle","cyclebias"), + unit = ref.unit, + data = data, + time = mid_months, + time_bnds = np.asarray([bnd_months[:-1],bnd_months[1:]]).T, + depth = d, + depth_bnds = db) + + + +class ConfSoilMoisture(Confrontation): + + def __init__(self,**keywords): + + # Calls the regular constructor + super(ConfSoilMoisture,self).__init__(**keywords) + + # Get/modify depths + self.depths = np.asarray(self.keywords.get("depths",[0,100,250]),dtype=float) + with Dataset(self.source) as dset: + v = dset.variables[self.variable] + depth_name = [d for d in v.dimensions if d in ["layer","depth"]] + + if len(depth_name) == 0: + # if there is no depth dimension, we assume the data is surface + self.depths = np.asarray([0],dtype=float) + else: + # if there are depths, then make sure that the depths + # at which we will compute are in the range of depths + # of the data + depth_name = depth_name[0] + data = dset.variables[dset.variables[depth_name].bounds][...] if "bounds" in dset.variables[depth_name].ncattrs() else dset.variables[depth_name][...] + self.depths = self.depths[(self.depths>=data.min())*(self.depths<=data.max())] + + # Setup a html layout for generating web views of the results + pages = [] + sections = ["Period Mean at %d [m]" % d for d in self.depths] + sections += ["Mean regional depth profiles"] + sections += ["Overlapping mean regional depth profiles"] + sections += ["Mean regional annual cycle"] + sections += ["Overlapping mean regional annual cycle"] + pages.append(post.HtmlPage("MeanState","Mean State")) + pages[-1].setHeader("CNAME / RNAME / MNAME") + pages[-1].setSections(sections) + pages.append(post.HtmlAllModelsPage("AllModels","All Models")) + pages[-1].setHeader("CNAME / RNAME") + pages[-1].setSections([]) + pages[-1].setRegions(self.regions) + pages.append(post.HtmlPage("DataInformation","Data Information")) + pages[-1].setSections([]) + pages[-1].text = "\n" + with Dataset(self.source) as dset: + for attr in dset.ncattrs(): + pages[-1].text += "

  %s: %s

\n" % (attr,str(dset.getncattr(attr)).encode('ascii','ignore')) + self.layout = post.HtmlLayout(pages,self.longname) + + def stageData(self,m): + + mem_slab = self.keywords.get("mem_slab",100000.) # Mb + + # peak at the reference dataset without reading much into memory + info = "" + unit = "" + with Dataset(self.source) as dset: + var = dset.variables[self.variable] + obs_t,obs_tb,obs_cb,obs_b,obs_e,cal = il.GetTime(var) + obs_nt = obs_t.size + obs_mem = var.size*8e-6 + unit = var.units + climatology = False if obs_cb is None else True + if climatology: + info += "[climatology]" + obs_cb = (obs_cb-1850)*365. + t0 = obs_cb[0]; tf = obs_cb[1] + else: + t0 = obs_tb[0,0]; tf = obs_tb[-1,1] + info += " contents span years %.1f to %.1f, est memory %d [Mb]" % (t0/365.+1850,tf/365.+1850,obs_mem) + logger.info("[%s][%s]%s" % (self.name,self.variable,info)) + + # to peak at the model, we need any variable that could be + # part of the expression to look at the time + info = "" + possible = [self.variable,] + self.alternate_vars + if self.derived is not None: possible += [str(s) for s in sympify(self.derived).free_symbols] + vname = [v for v in possible if v in m.variables.keys()] + if len(vname) == 0: + logger.debug("[%s] Could not find [%s] in the model results" % (self.name,",".join(possible))) + raise il.VarNotInModel() + vname = vname[0] + + # peak at the model dataset without reading much into memory + mod_nt = 0 + mod_mem = 0. + mod_t0 = 2147483647 + mod_tf = -2147483648 + for fname in m.variables[vname]: + with Dataset(fname) as dset: + var = dset.variables[vname] + mod_t,mod_tb,mod_cb,mod_b,mod_e,cal = il.GetTime(var,t0=t0-m.shift,tf=tf-m.shift) + if mod_t is None: + info += "\n %s does not overlap the reference" % (fname) + continue + mod_t += m.shift + mod_tb += m.shift + ind = np.where((mod_tb[:,0] >= t0)*(mod_tb[:,1] <= tf))[0] + if ind.size == 0: + info += "\n %s does not overlap the reference" % (fname) + continue + mod_t = mod_t [ind] + mod_tb = mod_tb[ind] + mod_t0 = min(mod_t0,mod_tb[ 0,0]) + mod_tf = max(mod_tf,mod_tb[-1,1]) + nt = mod_t.size + mod_nt += nt + mem = (var.size/var.shape[0]*nt)*8e-6 + mod_mem += mem + info += "\n %s spans years %.1f to %.1f, est memory in time bounds %d [Mb]" % (fname,mod_t.min()/365.+1850,mod_t.max()/365.+1850,mem) + info += "\n total est memory = %d [Mb]" % mod_mem + logger.info("[%s][%s][%s] reading model data from possibly many files%s" % (self.name,m.name,vname,info)) + if mod_t0 > mod_tf: + logger.debug("[%s] Could not find [%s] in the model results in the given time frame, tinput = [%.1f,%.1f]" % (self.name,",".join(possible),t0,tf)) + raise il.VarNotInModel() + + # if the reference is a climatology, then build a model climatology in slabs + info = "" + if climatology: + + # how many slabs + ns = int(np.floor(mod_mem/mem_slab))+1 + ns = min(max(1,ns),mod_nt) + logger.info("[%s][%s] building climatology in %d slabs" % (self.name,m.name,ns)) + + # across what times? + slab_t = (mod_tf-mod_t0)*np.linspace(0,1,ns+1)+mod_t0 + slab_t = np.floor(slab_t / 365)*365 + bnd_months[(np.abs(bnd_months[:,np.newaxis] - (slab_t % 365))).argmin(axis=0)] + + # ready to slab + tb_prev = None + data = None + dnum = None + for i in range(ns): + + v = m.extractTimeSeries(self.variable, + alt_vars = self.alternate_vars, + expression = self.derived, + initial_time = slab_t[i], + final_time = slab_t[i+1]).convert(unit) + + # trim does not work properly so we will add a manual check ourselves + if tb_prev is None: + tb_prev = v.time_bnds[...] + else: + if np.allclose(tb_prev[-1],v.time_bnds[0]): + v.data = v.data[1:] + v.time = v.time[1:] + v.time_bnds = v.time_bnds[1:] + tb_prev = v.time_bnds[...] + if v.time.size == 0: continue + + mind = (np.abs(mid_months[:,np.newaxis]-(v.time % 365))).argmin(axis=0) + if data is None: + data = np.ma.zeros((12,)+v.data.shape[1:]) + dnum = np.ma.zeros(data.shape,dtype=int) + data[mind,...] += v.data + dnum[mind,...] += 1 + with np.errstate(over='ignore',under='ignore'): + data = data / dnum.clip(1) + + # return variables + obs = Variable(filename = self.source, + variable_name = self.variable, + alternate_vars = self.alternate_vars) + mod = Variable(name = obs.name, + unit = unit, + data = data, + time = obs.time, + lat = v.lat, + lon = v.lon, + depth = v.depth, + time_bnds = obs.time_bnds, + lat_bnds = v.lat_bnds, + lon_bnds = v.lon_bnds, + depth_bnds = v.depth_bnds) + + yield obs,mod + + # if obs is historical, then we yield slabs of both + else: + + obs_mem *= (mod_tf-mod_t0)/(tf-t0) + mod_t0 = max(mod_t0,t0) + mod_tf = min(mod_tf,tf) + ns = int(np.floor(max(obs_mem,mod_mem)/mem_slab))+1 + ns = min(min(max(1,ns),mod_nt),obs_nt) + logger.info("[%s][%s] staging data in %d slabs" % (self.name,m.name,ns)) + + # across what times? + slab_t = (mod_tf-mod_t0)*np.linspace(0,1,ns+1)+mod_t0 + slab_t = np.floor(slab_t / 365)*365 + bnd_months[(np.abs(bnd_months[:,np.newaxis] - (slab_t % 365))).argmin(axis=0)] + + obs_tb = None; mod_tb = None + for i in range(ns): + + # get reference variable + obs = Variable(filename = self.source, + variable_name = self.variable, + alternate_vars = self.alternate_vars, + t0 = slab_t[i], + tf = slab_t[i+1]).trim(t=[slab_t[i],slab_t[i+1]]) + if obs_tb is None: + obs_tb = obs.time_bnds[...] + else: + if np.allclose(obs_tb[-1],obs.time_bnds[0]): + obs.data = obs.data[1:] + obs.time = obs.time[1:] + obs.time_bnds = obs.time_bnds[1:] + assert np.allclose(obs.time_bnds[0,0],obs_tb[-1,1]) + obs_tb = obs.time_bnds[...] + + # get model variable + mod = m.extractTimeSeries(self.variable, + alt_vars = self.alternate_vars, + expression = self.derived, + initial_time = slab_t[i], + final_time = slab_t[i+1]).trim(t=[slab_t[i],slab_t[i+1]]).convert(obs.unit) + if mod_tb is None: + mod_tb = mod.time_bnds[...] + else: + if np.allclose(mod_tb[-1],mod.time_bnds[0]): + mod.data = mod.data[1:] + mod.time = mod.time[1:] + mod.time_bnds = mod.time_bnds[1:] + assert np.allclose(mod.time_bnds[0,0],mod_tb[-1,1]) + mod_tb = mod.time_bnds[...] + assert obs.time.size == mod.time.size + yield obs,mod + + def confront(self,m): + + def _addDepth(v): + v.depth = np.asarray([.5]) + v.depth_bnds = np.asarray([[0.,1.]]) + shp = list(v.data.shape) + shp.insert(1,1) + v.data.shape = shp + v.layered = True + return v + + mod_file = os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)) + obs_file = os.path.join(self.output_path,"%s_Benchmark.nc" % (self.name, )) + with il.FileContextManager(self.master,mod_file,obs_file) as fcm: + + # Encode some names and colors + fcm.mod_dset.setncatts({"name" :m.name, + "color":m.color, + "complete":0}) + if self.master: + fcm.obs_dset.setncatts({"name" :"Benchmark", + "color":np.asarray([0.5,0.5,0.5]), + "complete":0}) + + # Combined IOMB and one-layer analysis from "Confrontation.py" + obs_timeint = {}; mod_timeint = {} + obs_depth = {}; mod_depth = {} + ocyc = {}; oN = {} + mcyc = {}; mN = {} + for depth in self.depths: + dlbl = "%d" % depth + obs_timeint[dlbl] = [] + mod_timeint[dlbl] = [] + for region in self.regions: + obs_depth[region] = [] + mod_depth[region] = [] + unit = None + max_obs = -1e20 + for obs,mod in self.stageData(m): + + # if the data has no depth, we assume it is surface + if not obs.layered: obs = _addDepth(obs) + if not mod.layered: mod = _addDepth(mod) + max_obs = max(max_obs,obs.data.max()) + + # !!! Here need to add an integration step to perform surface analysis + + # time bounds for this slab + tb = obs.time_bnds[[0,-1],[0,1]].reshape((1,2)) + t = np.asarray([tb.mean()]) + + # mean lat/lon slices at various depths + for depth in self.depths: + + dlbl = "%d" % depth + + # !!! the +/- 1 may be incorrect + z = obs.integrateInDepth(z0=depth-1.,zf=depth+1,mean=True).integrateInTime(mean=True) + unit = z.unit + obs_timeint[dlbl].append(Variable(name = "timeint%s" % dlbl, + unit = z.unit, + data = z.data.reshape((1,)+z.data.shape), + time = t, time_bnds = tb, + lat = z.lat, lat_bnds = z.lat_bnds, + lon = z.lon, lon_bnds = z.lon_bnds)) + z = mod + if mod.layered: z = z.integrateInDepth(z0=depth-1.,zf=depth+1,mean=True) + z = z.integrateInTime(mean=True) + mod_timeint[dlbl].append(Variable(name = "timeint%s" % dlbl, + unit = z.unit, + data = z.data.reshape((1,)+z.data.shape), + time = t, time_bnds = tb, + lat = z.lat, lat_bnds = z.lat_bnds, + lon = z.lon, lon_bnds = z.lon_bnds)) + + # mean + for region in self.regions: + z = VariableReduce(obs,region=region,time=tb[0],lon=[-180.,+180.]) + z.time = t; z.time_bnds = tb; z.temporal = True; z.data.shape = (1,)+z.data.shape + obs_depth[region].append(z) + + z = VariableReduce(mod,region=region,time=tb[0],lon=[-180.,+180.]) + z.time = t; z.time_bnds = tb; z.temporal = True; z.data.shape = (1,)+z.data.shape + mod_depth[region].append(z) + + # annual cycle in slabs + for region in self.regions: + z = obs.integrateInSpace(region=region,mean=True) + if region not in ocyc: + ocyc[region] = np.ma.zeros((12,)+z.data.shape[1:]) + oN [region] = np.ma.zeros((12,)+z.data.shape[1:],dtype=int) + i = (np.abs(mid_months[:,np.newaxis]-(z.time % 365))).argmin(axis=0) + (ocyc[region])[i,...] += z.data + (oN [region])[i,...] += 1 + + z = mod.integrateInSpace(region=region,mean=True) + if region not in mcyc: + mcyc[region] = np.ma.zeros((12,)+z.data.shape[1:]) + mN [region] = np.ma.zeros((12,)+z.data.shape[1:],dtype=int) + i = (np.abs(mid_months[:,np.newaxis]-(z.time % 365))).argmin(axis=0) + (mcyc[region])[i,...] += z.data + (mN [region])[i,...] += 1 + + + # vertical integrated analysis + print(obs) + z = obs.integrateInDepth(z0 = obs.depths[0]-1., zf = obs.depths[-1]+1., mean = True) + obs_layerint = Variable(name = "layerint", unit = z.unit, data = z.data, + time = t, time_bnds = tb, + lat = z.lat, lat_bnds = z.lat_bnds, + lon = z.lon, lon_bnds = z.lon_bnds) + obs_timeint = obs_layerint.integrateInTime(mean = True) + if mod.layered: + z = mod.integrateInDepth(z0 = obs.depths[0]-1., zf = obs.depths[-1]+1., mean = True) + else: + z = mod + mod_layerint = Variable(name = "layerint", unit = z.unit, data = z.data, + time = t, time_bnds = tb, + lat = z.lat, lat_bnds = z.lat_bnds, + lon = z.lon, lon_bnds = z.lon_bnds) + mod_timeint = mod_layerint.integrateInTime(mean = True) + + # Read in some options and run the mean state analysis + mass_weighting = self.keywords.get("mass_weighting",False) + skip_rmse = self.keywords.get("skip_rmse" ,False) + skip_iav = self.keywords.get("skip_iav" ,True ) + skip_cycle = self.keywords.get("skip_cycle" ,False) + if obs_layerint.spatial: + print(m) + il.AnalysisMeanStateSpace(obs_layerint,mod_layerint,dataset = fcm.mod_dset, + regions = self.regions, + benchmark_dataset = fcm.obs_dset, + table_unit = self.table_unit, + plot_unit = self.plot_unit, + space_mean = self.space_mean, + skip_rmse = skip_rmse, + skip_iav = skip_iav, + skip_cycle = skip_cycle, + mass_weighting = mass_weighting, + ref_timeint = obs_timeint, + com_timeint = mod_timeint) + else: + il.AnalysisMeanStateSites(obs_layerint,mod_layerint,dataset = fcm.mod_dset, + regions = self.regions, + benchmark_dataset = fcm.obs_dset, + table_unit = self.table_unit, + plot_unit = self.plot_unit, + space_mean = self.space_mean, + skip_rmse = skip_rmse, + skip_iav = skip_iav, + skip_cycle = skip_cycle, + mass_weighting = mass_weighting) + + # combine time slabs from the different depths + large_bias = float(self.keywords.get("large_bias",0.1*max_obs)) + + for dlbl in obs_timeint.keys(): + + # period means and bias + obs_tmp = il.CombineVariables(obs_timeint[dlbl]).integrateInTime(mean=True) + mod_tmp = il.CombineVariables(mod_timeint[dlbl]).integrateInTime(mean=True) + obs_tmp.name = obs_tmp.name.split("_")[0] + mod_tmp.name = mod_tmp.name.split("_")[0] + bias = obs_tmp.spatialDifference(mod_tmp) + bias.name = mod_tmp.name.replace("timeint","bias") + mod_tmp.toNetCDF4(fcm.mod_dset,group="MeanState") + bias.toNetCDF4(fcm.mod_dset,group="MeanState") + bias_score = None + if dlbl == "0": + with np.errstate(all="ignore"): + bias_score = Variable(name = bias.name.replace("bias","biasscore"), + data = np.exp(-np.abs(bias.data)/large_bias), + unit = "1", + ndata = bias.ndata, + lat = bias.lat, lat_bnds = bias.lat_bnds, + lon = bias.lon, lon_bnds = bias.lon_bnds, + area = bias.area) + bias_score.toNetCDF4(fcm.mod_dset,group="MeanState") + + for region in self.regions: + + sval = mod_tmp.integrateInSpace(region=region,mean=True) + sval.name = "Period Mean at %s %s" % (dlbl,region) + sval.toNetCDF4(fcm.mod_dset,group="MeanState") + + sval = bias.integrateInSpace(region=region,mean=True) + sval.name = "Bias at %s %s" % (dlbl,region) + sval.toNetCDF4(fcm.mod_dset,group="MeanState") + + if bias_score is not None: + sval = bias_score.integrateInSpace(region=region,mean=True) + sval.name = "Bias Score at %s %s" % (dlbl,region) + sval.toNetCDF4(fcm.mod_dset,group="MeanState") + + if self.master: + obs_tmp.toNetCDF4(fcm.obs_dset,group="MeanState") + for region in self.regions: + sval = obs_tmp.integrateInSpace(region=region,mean=True) + sval.name = "Period Mean at %s %s" % (dlbl,region) + sval.toNetCDF4(fcm.obs_dset,group="MeanState") + + # combine depth/lat slabs for different regions + for region in self.regions: + mod_tmp = il.CombineVariables(mod_depth[region]).integrateInTime(mean=True) + mod_tmp.name = "timelonint_of_%s_over_%s" % (self.variable,region) + mod_tmp.toNetCDF4(fcm.mod_dset,group="MeanState") + obs_tmp = il.CombineVariables(obs_depth[region]).integrateInTime(mean=True) + obs_tmp.name = "timelonint_of_%s_over_%s" % (self.variable,region) + mod_bias = TimeLatBias(obs_tmp,mod_tmp) + mod_bias.toNetCDF4(fcm.mod_dset,group="MeanState") + np.seterr(over='ignore',under='ignore') + ocyc[region] = ocyc[region]/(oN[region].clip(1)) + mcyc[region] = mcyc[region]/(mN[region].clip(1)) + + np.seterr(over='raise',under='raise') + mcyc[region] = Variable(name = "cycle_of_%s_over_%s" % (self.variable,region), + unit = mod.unit, + data = mcyc[region], + depth = mod.depth, + depth_bnds = mod.depth_bnds, + time = mid_months) + ocyc[region] = Variable(name = "cycle_of_%s_over_%s" % (self.variable,region), + unit = obs.unit, + data = ocyc[region], + depth = obs.depth, + depth_bnds = obs.depth_bnds, + time = mid_months) + cyc_bias = CycleBias(ocyc[region],mcyc[region]) + cyc_bias .toNetCDF4(fcm.mod_dset,group="MeanState") + mcyc[region].toNetCDF4(fcm.mod_dset,group="MeanState") + if self.master: + obs_tmp .toNetCDF4(fcm.obs_dset,group="MeanState") + ocyc[region].toNetCDF4(fcm.obs_dset,group="MeanState") + + fcm.mod_dset.setncattr("complete",1) + if self.master: fcm.obs_dset.setncattr("complete",1) + + def modelPlots(self,m): + + def _fheight(region): + if region in ["arctic","southern"]: return 6.8 + return 2.8 + + bname = "%s/%s_Benchmark.nc" % (self.output_path,self.name) + fname = "%s/%s_%s.nc" % (self.output_path,self.name,m.name) + if not os.path.isfile(bname): return + if not os.path.isfile(fname): return + + # get the HTML page and set table priorities + page = [page for page in self.layout.pages if "MeanState" in page.name][0] + page.priority = [" %d " % d for d in self.depths] + page.priority += ["Period","Bias"] + page.priority += ["Score","Overall"] + + # model plots + cmap = { "timeint" : self.cmap, + "bias" : "seismic", + "biasscore" : "score" } + plbl = { "timeint" : "MEAN", + "bias" : "BIAS", + "biasscore" : "BIAS SCORE" } + with Dataset(fname) as dataset: + group = dataset.groups["MeanState"] + variables = getVariableList(group) + color = dataset.getncattr("color") + for ptype in ["timeint","bias","biasscore"]: + for vname in [v for v in variables if ptype in v]: + var = Variable(filename=fname,variable_name=vname,groupname="MeanState") + try: + z = int(vname.replace(ptype,"")) + except: + continue + page.addFigure("Period Mean at %d [m]" % z, + vname, + "MNAME_RNAME_%s.png" % vname, + side = "MODEL %s AT %d [m]" % (plbl[ptype],z), + legend = True) + for region in self.regions: + ax = var.plot(None, + region = region, + vmin = self.limits[vname]["min"], + vmax = self.limits[vname]["max"], + cmap = cmap[ptype], + land = 0.750, + water = 0.875) + fig = ax.get_figure() + fig.savefig("%s/%s_%s_%s.png" % (self.output_path,m.name,region,vname)) + plt.close() + + for region in self.regions: + + vname = "timelonint_of_%s_over_%s" % (self.variable,region) + if vname in variables: + var0 = Variable(filename=bname,variable_name=vname,groupname="MeanState") + var = Variable(filename=fname,variable_name=vname,groupname="MeanState") + bias = Variable(filename=fname,variable_name=vname.replace("timelonint","timelonbias"),groupname="MeanState") + if region == "global": + page.addFigure("Mean regional depth profiles", + "timelonint", + "MNAME_RNAME_timelonint.png", + side = "MODEL DEPTH PROFILE", + legend = True, + longname = "Time/longitude averaged profile") + page.addFigure("Overlapping mean regional depth profiles", + "timelonints", + "MNAME_RNAME_timelonints.png", + side = "MODEL DEPTH PROFILE", + legend = True, + longname = "Overlapping Time/longitude averaged profile") + page.addFigure("Overlapping mean regional depth profiles", + "timelonbias", + "MNAME_RNAME_timelonbias.png", + side = "MODEL DEPTH PROFILE BIAS", + legend = True, + longname = "Overlapping Time/longitude averaged profile bias") + fig,ax = plt.subplots(figsize=(6.8,2.8),tight_layout=True) + l = np.hstack([var .lat_bnds [:,0],var .lat_bnds [-1,1]]) + d0 = np.hstack([var0.depth_bnds[:,0],var0.depth_bnds[-1,1]]) + d = np.hstack([var .depth_bnds[:,0],var .depth_bnds[-1,1]]) + ind = np.all(var.data.mask,axis=0) + ind = np.ma.masked_array(range(ind.size),mask=ind,dtype=int) + b = ind.min() + e = ind.max()+1 + ax.pcolormesh(l[b:(e+1)],d,var.data[:,b:e], + vmin = self.limits["timelonint"]["global"]["min"], + vmax = self.limits["timelonint"]["global"]["max"], + cmap = self.cmap) + ax.set_xlabel("latitude") + ax.set_ylim((d.max(),d.min())) + ax.set_ylabel("depth [m]") + fig.savefig("%s/%s_%s_timelonint.png" % (self.output_path,m.name,region)) + ax.set_ylim((min(d0.max(),d.max()),max(d0.min(),d.min()))) + fig.savefig("%s/%s_%s_timelonints.png" % (self.output_path,m.name,region)) + plt.close() + fig,ax = plt.subplots(figsize=(6.8,2.8),tight_layout=True) + l = np.hstack([bias.lat_bnds [:,0],bias.lat_bnds [-1,1]]) + d0 = np.hstack([var0.depth_bnds[:,0],var0.depth_bnds[-1,1]]) + d = np.hstack([bias.depth_bnds[:,0],bias.depth_bnds[-1,1]]) + ind = np.all(bias.data.mask,axis=0) + ind = np.ma.masked_array(range(ind.size),mask=ind,dtype=int) + b = ind.min() + e = ind.max()+1 + ax.pcolormesh(l[b:(e+1)],d,bias.data[:,b:e], + vmin = self.limits["timelonbias"]["global"]["min"], + vmax = self.limits["timelonbias"]["global"]["max"], + cmap = "seismic") + ax.set_xlabel("latitude") + ax.set_ylim((d.max(),d.min())) + ax.set_ylabel("depth [m]") + ax.set_ylim((min(d0.max(),d.max()),max(d0.min(),d.min()))) + fig.savefig("%s/%s_%s_timelonbias.png" % (self.output_path,m.name,region)) + plt.close() + + + vname = "cycle_of_%s_over_%s" % (self.variable,region) + if vname in variables: + var0 = Variable(filename=bname,variable_name=vname,groupname="MeanState") + var = Variable(filename=fname,variable_name=vname,groupname="MeanState") + bias = Variable(filename=fname,variable_name=vname.replace("cycle","cyclebias"),groupname="MeanState") + if region == "global": + page.addFigure("Mean regional annual cycle", + "cycle", + "MNAME_RNAME_cycle.png", + side = "MODEL ANNUAL CYCLE", + legend = True, + longname = "Annual cycle") + page.addFigure("Overlapping mean regional annual cycle", + "cycles", + "MNAME_RNAME_cycles.png", + side = "MODEL ANNUAL CYCLE", + legend = True, + longname = "Overlapping annual cycle") + page.addFigure("Overlapping mean regional annual cycle", + "cyclebias", + "MNAME_RNAME_cyclebias.png", + side = "MODEL ANNUAL CYCLE BIAS", + legend = True, + longname = "Overlapping annual cycle bias") + fig,ax = plt.subplots(figsize=(6.8,2.8),tight_layout=True) + d0 = np.hstack([var0.depth_bnds[:,0],var0.depth_bnds[-1,1]]) + d = np.hstack([var .depth_bnds[:,0],var .depth_bnds[-1,1]]) + ax.pcolormesh(bnd_months,d,var.data.T, + vmin = self.limits["cycle"]["global"]["min"], + vmax = self.limits["cycle"]["global"]["max"], + cmap = self.cmap) + ax.set_xticks (mid_months) + ax.set_xticklabels(lbl_months) + ax.set_ylim((d.max(),d.min())) + ax.set_ylabel("depth [m]") + fig.savefig("%s/%s_%s_cycle.png" % (self.output_path,m.name,region)) + ax.set_ylim((min(d0.max(),d.max()),max(d0.min(),d.min()))) + fig.savefig("%s/%s_%s_cycles.png" % (self.output_path,m.name,region)) + plt.close() + fig,ax = plt.subplots(figsize=(6.8,2.8),tight_layout=True) + ax.pcolormesh(bnd_months, + np.hstack([bias.depth_bnds[:,0],bias.depth_bnds[-1,1]]), + bias.data.T, + vmin = self.limits["cyclebias"]["global"]["min"], + vmax = self.limits["cyclebias"]["global"]["max"], + cmap = "seismic") + ax.set_xticks (mid_months) + ax.set_xticklabels(lbl_months) + ax.set_ylim((d.max(),d.min())) + ax.set_ylabel("depth [m]") + ax.set_ylim((min(d0.max(),d.max()),max(d0.min(),d.min()))) + fig.savefig("%s/%s_%s_cyclebias.png" % (self.output_path,m.name,region)) + plt.close() + + + # benchmark plots + if not self.master: return + with Dataset(bname) as dataset: + group = dataset.groups["MeanState"] + variables = getVariableList(group) + color = dataset.getncattr("color") + for ptype in ["timeint"]: + for vname in [v for v in variables if ptype in v]: + var = Variable(filename=bname,variable_name=vname,groupname="MeanState") + z = int(vname.replace(ptype,"")) + page.addFigure("Period Mean at %d [m]" % z, + "benchmark_%s" % vname, + "Benchmark_RNAME_%s.png" % vname, + side = "BENCHMARK %s AT %d [m]" % (plbl[ptype],z), + legend = True) + for region in self.regions: + ax = var.plot(None, + region = region, + vmin = self.limits[vname]["min"], + vmax = self.limits[vname]["max"], + cmap = cmap[ptype], + land = 0.750, + water = 0.875) + fig = ax.get_figure() + fig.savefig("%s/Benchmark_%s_%s.png" % (self.output_path,region,vname)) + plt.close() + + for region in self.regions: + + vname = "timelonint_of_%s_over_%s" % (self.variable,region) + if vname in variables: + var0 = Variable(filename=fname,variable_name=vname,groupname="MeanState") + var = Variable(filename=bname,variable_name=vname,groupname="MeanState") + if region == "global": + page.addFigure("Mean regional depth profiles", + "benchmark_timelonint", + "Benchmark_RNAME_timelonint.png", + side = "BENCHMARK DEPTH PROFILE", + legend = True, + longname = "Time/longitude averaged profile") + page.addFigure("Overlapping mean regional depth profiles", + "benchmark_timelonints", + "Benchmark_RNAME_timelonints.png", + side = "BENCHMARK DEPTH PROFILE", + legend = True, + longname = "Overlapping Time/longitude averaged profile") + fig,ax = plt.subplots(figsize=(6.8,2.8),tight_layout=True) + l = np.hstack([var .lat_bnds [:,0],var .lat_bnds [-1,1]]) + d0 = np.hstack([var0.depth_bnds[:,0],var0.depth_bnds[-1,1]]) + d = np.hstack([var .depth_bnds[:,0],var .depth_bnds[-1,1]]) + ind = np.all(var.data.mask,axis=0) + ind = np.ma.masked_array(range(ind.size),mask=ind,dtype=int) + b = ind.min() + e = ind.max()+1 + ax.pcolormesh(l[b:(e+1)],d,var.data[:,b:e], + vmin = self.limits["timelonint"]["global"]["min"], + vmax = self.limits["timelonint"]["global"]["max"], + cmap = self.cmap) + ax.set_xlabel("latitude") + ax.set_ylim((d.max(),d.min())) + ax.set_ylabel("depth [m]") + fig.savefig("%s/Benchmark_%s_timelonint.png" % (self.output_path,region)) + ax.set_ylim((min(d0.max(),d.max()),max(d0.min(),d.min()))) + fig.savefig("%s/Benchmark_%s_timelonints.png" % (self.output_path,region)) + plt.close() + + vname = "cycle_of_%s_over_%s" % (self.variable,region) + if vname in variables: + var0 = Variable(filename=bname,variable_name=vname,groupname="MeanState") + var = Variable(filename=fname,variable_name=vname,groupname="MeanState") + if region == "global": + page.addFigure("Mean regional annual cycle", + "benchmark_cycle", + "Benchmark_RNAME_cycle.png", + side = "BENCHMARK ANNUAL CYCLE", + legend = True, + longname = "Annual cycle") + page.addFigure("Overlapping mean regional annual cycle", + "benchmark_cycles", + "Benchmark_RNAME_cycles.png", + side = "BENCHMARK ANNUAL CYCLE", + legend = True, + longname = "Overlapping annual cycle") + fig,ax = plt.subplots(figsize=(6.8,2.8),tight_layout=True) + d = np.hstack([var0.depth_bnds[:,0],var0.depth_bnds[-1,1]]) + d0 = np.hstack([var .depth_bnds[:,0],var .depth_bnds[-1,1]]) + ax.pcolormesh(bnd_months,d,var0.data.T, + vmin = self.limits["cycle"]["global"]["min"], + vmax = self.limits["cycle"]["global"]["max"], + cmap = self.cmap) + ax.set_xticks (mid_months) + ax.set_xticklabels(lbl_months) + ax.set_ylim((d.max(),d.min())) + ax.set_ylabel("depth [m]") + fig.savefig("%s/%s_%s_cycle.png" % (self.output_path,"Benchmark",region)) + ax.set_ylim((min(d0.max(),d.max()),max(d0.min(),d.min()))) + fig.savefig("%s/%s_%s_cycles.png" % (self.output_path,"Benchmark",region)) + plt.close() + + def determinePlotLimits(self): + + # Pick limit type + max_str = "up99"; min_str = "dn99" + if self.keywords.get("limit_type","99per") == "minmax": + max_str = "max"; min_str = "min" + + # Determine the min/max of variables over all models + limits = {} + for fname in glob.glob("%s/*.nc" % self.output_path): + with Dataset(fname) as dataset: + if "MeanState" not in dataset.groups: continue + group = dataset.groups["MeanState"] + variables = [v for v in group.variables.keys() if (v not in group.dimensions.keys() and + "_bnds" not in v and + group.variables[v][...].size > 1)] + for vname in variables: + var = group.variables[vname] + pname = vname.split("_")[ 0] + if "_score" in vname: + pname = "_".join(vname.split("_")[:2]) + if "_over_" in vname: + region = vname.split("_over_")[-1] + if pname not in limits: limits[pname] = {} + if region not in limits[pname]: + limits[pname][region] = {} + limits[pname][region]["min"] = +1e20 + limits[pname][region]["max"] = -1e20 + limits[pname][region]["unit"] = post.UnitStringToMatplotlib(var.getncattr("units")) + limits[pname][region]["min"] = min(limits[pname][region]["min"],var.getncattr("min")) + limits[pname][region]["max"] = max(limits[pname][region]["max"],var.getncattr("max")) + else: + if pname not in limits: + limits[pname] = {} + limits[pname]["min"] = +1e20 + limits[pname]["max"] = -1e20 + limits[pname]["unit"] = post.UnitStringToMatplotlib(var.getncattr("units")) + limits[pname]["min"] = min(limits[pname]["min"],var.getncattr(min_str)) + limits[pname]["max"] = max(limits[pname]["max"],var.getncattr(max_str)) + + # Another pass to fix score limits + for pname in limits.keys(): + if "score" in pname: + if "min" in limits[pname].keys(): + limits[pname]["min"] = 0. + limits[pname]["max"] = 1. + else: + for region in limits[pname].keys(): + limits[pname][region]["min"] = 0. + limits[pname][region]["max"] = 1. + self.limits = limits + + # Second pass to plot legends + cmaps = {"bias" :"seismic", + "timelonbias":"seismic", + "cyclebias" :"seismic", + "rmse" :"YlOrRd"} + for pname in limits.keys(): + + base_pname = pname + m = re.search("(\D+)\d+",pname) + if m: base_pname = m.group(1) + + # Pick colormap + cmap = self.cmap + if base_pname in cmaps: + cmap = cmaps[base_pname] + elif "score" in pname: + cmap = "score" + + # Need to symetrize? + if base_pname in ["bias","timelonbias","cyclebias"]: + if "min" in limits[pname]: + vabs = max(abs(limits[pname]["max"]),abs(limits[pname]["min"])) + limits[pname]["min"] = -vabs + limits[pname]["max"] = vabs + else: + vabs = max(abs(limits[pname]["global"]["max"]),abs(limits[pname]["global"]["min"])) + limits[pname]["global"]["min"] = -vabs + limits[pname]["global"]["max"] = vabs + + # Some plots need legends + if base_pname in ["timeint","bias","biasscore","rmse","rmsescore","timelonint","timelonbias","cycle","cyclebias"]: + if "min" in limits[pname]: + fig,ax = plt.subplots(figsize=(6.8,1.0),tight_layout=True) + post.ColorBar(ax, + vmin = limits[pname]["min" ], + vmax = limits[pname]["max" ], + label = limits[pname]["unit"], + cmap = cmap) + fig.savefig("%s/legend_%s.png" % (self.output_path,pname)) + if base_pname == "timelonint" or base_pname == "cycle": + fig.savefig("%s/legend_%ss.png" % (self.output_path,pname)) + plt.close() + else: + fig,ax = plt.subplots(figsize=(6.8,1.0),tight_layout=True) + post.ColorBar(ax, + vmin = limits[pname]["global"]["min" ], + vmax = limits[pname]["global"]["max" ], + label = limits[pname]["global"]["unit"], + cmap = cmap) + fig.savefig("%s/legend_%s.png" % (self.output_path,pname)) + if base_pname == "timelonint" or base_pname == "cycle": + fig.savefig("%s/legend_%ss.png" % (self.output_path,pname)) + plt.close() + + def compositePlots(self): + pass diff --git a/src/ILAMB/Scoreboard.py b/src/ILAMB/Scoreboard.py index 75e1bd14..1a4a4d4d 100644 --- a/src/ILAMB/Scoreboard.py +++ b/src/ILAMB/Scoreboard.py @@ -12,6 +12,7 @@ from .ConfSoilCarbon import ConfSoilCarbon from .ConfUncertainty import ConfUncertainty from .ConfBurntArea import ConfBurntArea +from .ConfSoilMoisture import ConfSoilMoisture from .Regions import Regions import os,re from netCDF4 import Dataset @@ -298,7 +299,8 @@ def _loadScores(node): "ConfCO2" : ConfCO2, "ConfSoilCarbon" : ConfSoilCarbon, "ConfUncertainty" : ConfUncertainty, - "ConfBurntArea" : ConfBurntArea} + "ConfBurntArea" : ConfBurntArea, + "ConfSoilMoisture": ConfSoilMoisture} class Scoreboard(): """ @@ -394,6 +396,7 @@ def createJSON(self,M,filename="scalars.json"): models = [m.name for m in M] scalars = {} TraversePreorder (self.tree,BuildDictionary) + section = "MeanState" ; TraversePostorder(self.tree,BuildScalars) TraversePreorder (self.tree,ConvertList) check = rel_tree.children diff --git a/src/ILAMB/Variable.py b/src/ILAMB/Variable.py index 02fa185c..14edc24d 100644 --- a/src/ILAMB/Variable.py +++ b/src/ILAMB/Variable.py @@ -833,6 +833,10 @@ def convert(self,unit,density=998.2,molar_mass=12.0107): can be changed by specifying the density when calling the function. + For soil moisture, there is often requirement to convert + between kg m-2 and m3 m-3. This conversion is achieved here using + the depth property. + Parameters ---------- unit : str @@ -850,12 +854,27 @@ def convert(self,unit,density=998.2,molar_mass=12.0107): this object with its unit converted """ + def _tellAxis(indx): + args = [] + axis = 0 + if self.temporal: + axis += 1 + args.append(range(self.time.size)) + if self.layered: args.append(indx) + if self.ndata: + args.append(range(self.ndata)) + if self.spatial: + args.append(range(self.lat.size)) + args.append(range(self.lon.size)) + return np.ix_(*args) + if unit is None: return self src_unit = Unit(self.unit) tar_unit = Unit( unit) mask = self.data.mask mass_density = Unit("kg m-3") molar_density = Unit("g mol-1") + area_sm = Unit("kg m-2") if ((src_unit/tar_unit)/mass_density).is_dimensionless(): with np.errstate(all='ignore'): self.data /= density @@ -872,6 +891,32 @@ def convert(self,unit,density=998.2,molar_mass=12.0107): with np.errstate(all='ignore'): self.data *= molar_mass src_unit *= molar_density + if ((src_unit/tar_unit)/area_sm).is_dimensionless(): + with np.errstate(all='ignore'): + if (self.depth is None) or (self.depth_bnds is None): + print('Missing depth or depth_bnds.') + raise il.UnitConversionError() + + self.data = src_unit.convert(self.data, 'kg m-2') + for ind in range(len(self.depth)): + ind2 = _tellAxis([ind]) + dz = self.depth_bnds[ind,1] - self.depth_bnds[ind,0] + self.data[ind2] = self.data[ind2] / dz * 0.001 + src_unit = Unit('m3 m-3') + self.unit = 'm3 m-3' + elif ((tar_unit/src_unit)/area_sm).is_dimensionless(): + with np.errstate(all='ignore'): + if (self.depth is None) or (self.depth_bnds is None): + print('Missing depth or depth_bnds.') + raise il.UnitConversionError() + + self.data = src_unit.convert(self.data, 'm3 m-3') + for ind in range(len(self.depth)): + ind2 = _tellAxis([ind]) + dz = self.depth_bnds[ind,1] - self.depth_bnds[ind,0] + self.data[ind2] = self.data[ind2] * dz / 0.001 + src_unit = Unit('kg m-2') + self.unit = 'kg m-2' try: with np.errstate(all='ignore'): src_unit.convert(self.data,tar_unit,inplace=True) diff --git a/src/ILAMB/data/sample.cfg b/src/ILAMB/data/sample.cfg index 0e3abaa9..d0fc8194 100644 --- a/src/ILAMB/data/sample.cfg +++ b/src/ILAMB/data/sample.cfg @@ -8,6 +8,11 @@ variable = "rsus" [CERES] source = "DATA/rsus/CERES/rsus_0.5x0.5.nc" +weight = 5 + +[WRMC.BSRN] +source = "DATA/rsus/WRMC.BSRN/rsus.nc" +weight = 1 [h2: Albedo] variable = "albedo" From 25529958045d934423f056fd1a5b0595dae4153e Mon Sep 17 00:00:00 2001 From: Yaoping Wang Date: Sun, 4 Apr 2021 00:08:14 -0400 Subject: [PATCH 02/18] Relaxed NBPplot constraint to accept one model & >10 year difference --- bin/ilamb-run | 6 ++++++ src/ILAMB/ConfNBP.py | 11 ++++++++--- src/ILAMB/ConfSoilMoisture.py | 4 ++-- src/ILAMB/Scoreboard.py | 1 + src/ILAMB/Variable.py | 9 +++++++++ src/ILAMB/ilamblib.py | 3 +++ 6 files changed, 29 insertions(+), 5 deletions(-) diff --git a/bin/ilamb-run b/bin/ilamb-run index a1e753a4..4c1a9b87 100644 --- a/bin/ilamb-run +++ b/bin/ilamb-run @@ -611,6 +611,7 @@ if args.model_setup is None: models_path=args.build_dir[0]) else: M = ParseModelSetup(args.model_setup[0],args.models,not args.quiet,filter=args.filter[0],models_path=args.build_dir[0]) + if rank == 0 and not args.quiet: print("\nParsing config file %s...\n" % args.config[0]) S = Scoreboard(args.config[0], regions = args.regions, @@ -622,7 +623,10 @@ S = Scoreboard(args.config[0], mem_per_pair = args.mem_per_pair, run_title = args.run_title, rmse_score_basis = args.rmse_score_basis) + C = MatchRelationshipConfrontation(S.list()) + + if len(args.study_limits) == 2: args.study_limits[1] += 1 for c in C: c.study_limits = (np.asarray(args.study_limits)-1850)*365. @@ -662,6 +666,8 @@ if rank==0 and not args.quiet: print("\nRunning model-confrontation pairs...\n") sys.stdout.flush(); comm.Barrier() W = BuildLocalWorkList(M,C,skip_cache=True) + + WorkConfront(W,not args.quiet,args.clean) sys.stdout.flush(); comm.Barrier() diff --git a/src/ILAMB/ConfNBP.py b/src/ILAMB/ConfNBP.py index 05a4dd2a..68a04803 100644 --- a/src/ILAMB/ConfNBP.py +++ b/src/ILAMB/ConfNBP.py @@ -8,7 +8,10 @@ import numpy as np import os,glob -def SpaceLabels(y,ymin,maxit=1000): +def SpaceLabels(y,ymin,maxit=1000): + if len(y) == 1: + return y + for j in range(maxit): dy = np.diff(y) for i in range(1,y.size-1): @@ -261,19 +264,21 @@ def NBPplot(V,vmin,vmax,colors,fname): Y = []; L = [] for key in V: if key == "Benchmark": continue - if V[key].time[0] > V["Benchmark"].time[0]+10: continue + # 2021/04/03 YW Seems uncessary? + ##if V[key].time[0] > V["Benchmark"].time[0]+10: continue L.append(key) Y.append(V[key].data[-1]) Y = np.asarray(Y); L = np.asarray(L) ind = np.argsort(Y) Y = Y[ind]; L = L[ind] - + fig = plt.figure(figsize=(11.8,5.8)) ax = fig.add_subplot(1,1,1,position=[0.06,0.06,0.8,0.92]) data_range = vmax-vmin fig_height = fig.get_figheight() font_size = 10 dy = 0.05*data_range + y = SpaceLabels(Y.copy(),data_range/fig_height*font_size/50.) v = V["Benchmark"] for i in range(L.size): diff --git a/src/ILAMB/ConfSoilMoisture.py b/src/ILAMB/ConfSoilMoisture.py index b5f3815a..dae935f5 100644 --- a/src/ILAMB/ConfSoilMoisture.py +++ b/src/ILAMB/ConfSoilMoisture.py @@ -429,14 +429,14 @@ def _addDepth(v): # vertical integrated analysis print(obs) - z = obs.integrateInDepth(z0 = obs.depths[0]-1., zf = obs.depths[-1]+1., mean = True) + z = obs.integrateInDepth(z0 = obs.depth[0]-1., zf = obs.depth[-1]+1., mean = True) obs_layerint = Variable(name = "layerint", unit = z.unit, data = z.data, time = t, time_bnds = tb, lat = z.lat, lat_bnds = z.lat_bnds, lon = z.lon, lon_bnds = z.lon_bnds) obs_timeint = obs_layerint.integrateInTime(mean = True) if mod.layered: - z = mod.integrateInDepth(z0 = obs.depths[0]-1., zf = obs.depths[-1]+1., mean = True) + z = mod.integrateInDepth(z0 = obs.depth[0]-1., zf = obs.depth[-1]+1., mean = True) else: z = mod mod_layerint = Variable(name = "layerint", unit = z.unit, data = z.data, diff --git a/src/ILAMB/Scoreboard.py b/src/ILAMB/Scoreboard.py index 1a4a4d4d..9715a57f 100644 --- a/src/ILAMB/Scoreboard.py +++ b/src/ILAMB/Scoreboard.py @@ -391,6 +391,7 @@ def createJSON(self,M,filename="scalars.json"): global models global global_scores global section + rel_tree = GenerateRelationshipTree(self,M) global_scores = [] models = [m.name for m in M] diff --git a/src/ILAMB/Variable.py b/src/ILAMB/Variable.py index 14edc24d..1bad6a8d 100644 --- a/src/ILAMB/Variable.py +++ b/src/ILAMB/Variable.py @@ -9,6 +9,7 @@ from . import Post as post import numpy as np import matplotlib.pyplot as plt +import warnings def _shiftLon(lon): return (lon<=180)*lon + (lon>180)*(lon-360) + (lon<-180)*360 @@ -833,9 +834,13 @@ def convert(self,unit,density=998.2,molar_mass=12.0107): can be changed by specifying the density when calling the function. + 2021/03/21 For soil moisture, there is often requirement to convert between kg m-2 and m3 m-3. This conversion is achieved here using the depth property. + + 2021/04/03 + Leaf area index: if "none", treat as 1. Parameters ---------- @@ -868,6 +873,10 @@ def _tellAxis(indx): args.append(range(self.lon.size)) return np.ix_(*args) + if self.unit.lower() == "none": + warnings.warn('Treat ' + self.unit + ' as 1.') + self.unit = "1" + if unit is None: return self src_unit = Unit(self.unit) tar_unit = Unit( unit) diff --git a/src/ILAMB/ilamblib.py b/src/ILAMB/ilamblib.py index 69536c18..32fccde9 100644 --- a/src/ILAMB/ilamblib.py +++ b/src/ILAMB/ilamblib.py @@ -260,10 +260,12 @@ def GetTime(var,t0=None,tf=None,convert_calendar=True,ignore_time_array=True): if t0 is not None: t0 = cf.num2date(t0,units="days since 1850-1-1 00:00:00",calendar="noleap") t0 = ConvertCalendar(t0,t.units,t.calendar) + if (t0 > tb[-1,1]): return None,None,None,None,None,None if tf is not None: tf = cf.num2date(tf,units="days since 1850-1-1 00:00:00",calendar="noleap") tf = ConvertCalendar(tf,t.units,t.calendar) + if (tf < tb[0,0]): return None,None,None,None,None,None # Subset by the desired initial and final times @@ -787,6 +789,7 @@ def FromNetCDF4(filename,variable_name,alternate_vars=[],t0=None,tf=None,group=N depth = None; depth_bnd = None data = None; cbounds = None + t,t_bnd,cbounds,begin,end,calendar = GetTime(var,t0=t0,tf=tf,convert_calendar=convert_calendar) # Are there uncertainties? From da6c8b0f2c50dd30f837cde467963424dd09b78d Mon Sep 17 00:00:00 2001 From: Yaoping Wang Date: Fri, 30 Apr 2021 20:36:10 -0400 Subject: [PATCH 03/18] blank plots bug --- bin/ilamb-run | 21 +- src/ILAMB/ConfSoilMoisture.py | 1100 +++++++++++++-------------------- src/ILAMB/ModelResult.py | 1 + src/ILAMB/Variable.py | 4 +- src/ILAMB/ilamblib.py | 37 +- 5 files changed, 465 insertions(+), 698 deletions(-) diff --git a/bin/ilamb-run b/bin/ilamb-run index 4c1a9b87..8359963b 100644 --- a/bin/ilamb-run +++ b/bin/ilamb-run @@ -378,7 +378,26 @@ def WorkConfront(W,verbose=False,clean=False): # try to run the confrontation try: - t0 = time.time() + t0 = time.time() + + # YW + print(m) + print(m.path) + print(m.color) + print(m.filter) + print(m.regex) + print(m.shift) + print(m.name) + print(m.confrontations) + print(m.cell_areas) + print(m.land_fraction) + print(m.land_areas) + print(m.land_area) + print(m.variables) + print(m.names) + print(m.extents) + print(m.paths) + c.confront(m) dt = time.time()-t0 proc[rank] += dt diff --git a/src/ILAMB/ConfSoilMoisture.py b/src/ILAMB/ConfSoilMoisture.py index dae935f5..b7c6c1aa 100644 --- a/src/ILAMB/ConfSoilMoisture.py +++ b/src/ILAMB/ConfSoilMoisture.py @@ -1,149 +1,67 @@ -from .Confrontation import getVariableList -from .Confrontation import Confrontation -from .constants import earth_rad,mid_months,lbl_months,bnd_months -from .Variable import Variable -from .Regions import Regions from . import ilamblib as il -from . import Post as post +from .Variable import * +from .Regions import Regions +from .constants import space_opts,time_opts,mid_months,bnd_months +import os,glob,re from netCDF4 import Dataset -from copy import deepcopy +from . import Post as post import pylab as plt -import numpy as np -import os,glob,re +from matplotlib.colors import LogNorm +from mpl_toolkits.axes_grid1 import make_axes_locatable +from mpi4py import MPI from sympy import sympify -from cf_units import Unit +import cftime as cf +from .Confrontation import getVariableList +from .Confrontation import Confrontation + -from mpi4py import MPI import logging logger = logging.getLogger("%i" % MPI.COMM_WORLD.rank) -def VariableReduce(var,region="global",time=None,depth=None,lat=None,lon=None): - ILAMBregions = Regions() - out = deepcopy(var) - out.data.mask += ILAMBregions.getMask(region,out) - if time is not None: - out = out.integrateInTime (t0=time[0] ,tf=time[1] ,mean=True) - if depth is not None and var.layered: - out = out.integrateInDepth(z0=depth[0],zf=depth[1],mean=True) - if lat is not None: - lat0 = np.argmin(np.abs(var.lat-lat[0])) - latf = np.argmin(np.abs(var.lat-lat[1]))+1 - wgt = earth_rad*(np.sin(var.lat_bnds[:,1])-np.sin(var.lat_bnds[:,0]))[lat0:latf] - np.seterr(over='ignore',under='ignore') - out.data = np.ma.average(out.data[...,lat0:latf,:],axis=-2,weights=wgt/wgt.sum()) - np.seterr(over='raise',under='raise') - out.lat = None - out.lat_bnd = None - out.spatial = False - if lon is not None: - lon0 = np.argmin(np.abs(var.lon-lon[0])) - lonf = np.argmin(np.abs(var.lon-lon[1]))+1 - wgt = earth_rad*(var.lon_bnds[:,1]-var.lon_bnds[:,0])[lon0:lonf] - np.seterr(over='ignore',under='ignore') - out.data = np.ma.average(out.data[...,lon0:lonf],axis=-1,weights=wgt/wgt.sum()) - np.seterr(over='raise',under='raise') - out.lon = None - out.lon_bnd = None - out.spatial = False - return out - -def TimeLatBias(ref,com): - # composite depth axis - d0 = max(ref.depth_bnds.min(),com.depth_bnds.min()) - df = min(ref.depth_bnds.max(),com.depth_bnds.max()) - d = np.unique(np.hstack([ref.depth_bnds.flatten(),com.depth_bnds.flatten()])) - d = d[(d>=d0)*(d<=df)] - db = np.asarray([d[:-1],d[1:]]).T - d = db.mean(axis=1) - # composite lat axis - l0 = max(ref.lat_bnds.min(),com.lat_bnds.min()) - lf = min(ref.lat_bnds.max(),com.lat_bnds.max()) - l = np.unique(np.hstack([ref.lat_bnds.flatten(),com.lat_bnds.flatten()])) - l = l[(l>=l0)*(l<=lf)] - lb = np.asarray([l[:-1],l[1:]]).T - l = lb.mean(axis=1) - # interpolation / difference - data = il.NearestNeighborInterpolation(com.depth,com.lat,com.data,d,l) - data -= il.NearestNeighborInterpolation(ref.depth,ref.lat,ref.data,d,l) - area = np.diff(db)[:,np.newaxis] * (earth_rad*(np.sin(lb[:,1])-np.sin(lb[:,0]))) - return Variable(name = ref.name.replace("timelonint","timelonbias"), - unit = ref.unit, - data = data, - area = area, - lat = l, - depth = d, - lat_bnds = lb, - depth_bnds = db) - -def CycleBias(ref,com): - # composite depth axis - d0 = max(ref.depth_bnds.min(),com.depth_bnds.min()) - df = min(ref.depth_bnds.max(),com.depth_bnds.max()) - d = np.unique(np.hstack([ref.depth_bnds.flatten(),com.depth_bnds.flatten()])) - d = d[(d>=d0)*(d<=df)] - db = np.asarray([d[:-1],d[1:]]).T - d = db.mean(axis=1) - # interpolation / difference - data = il.NearestNeighborInterpolation(com.time,com.depth,com.data,com.time,d) - data -= il.NearestNeighborInterpolation(ref.time,ref.depth,ref.data,ref.time,d) - return Variable(name = ref.name.replace("cycle","cyclebias"), - unit = ref.unit, - data = data, - time = mid_months, - time_bnds = np.asarray([bnd_months[:-1],bnd_months[1:]]).T, - depth = d, - depth_bnds = db) - - class ConfSoilMoisture(Confrontation): def __init__(self,**keywords): - # Calls the regular constructor super(ConfSoilMoisture,self).__init__(**keywords) # Get/modify depths - self.depths = np.asarray(self.keywords.get("depths",[0,100,250]),dtype=float) with Dataset(self.source) as dset: v = dset.variables[self.variable] depth_name = [d for d in v.dimensions if d in ["layer","depth"]] - if len(depth_name) == 0: - # if there is no depth dimension, we assume the data is surface - self.depths = np.asarray([0],dtype=float) + # if there is no depth dimension, we assume the data is + # top 10cm + self.depths = np.asarray([[0., .1]],dtype=float) + self.depths_units = 'm' else: # if there are depths, then make sure that the depths # at which we will compute are in the range of depths # of the data depth_name = depth_name[0] - data = dset.variables[dset.variables[depth_name].bounds][...] if "bounds" in dset.variables[depth_name].ncattrs() else dset.variables[depth_name][...] - self.depths = self.depths[(self.depths>=data.min())*(self.depths<=data.max())] - - # Setup a html layout for generating web views of the results - pages = [] - sections = ["Period Mean at %d [m]" % d for d in self.depths] - sections += ["Mean regional depth profiles"] - sections += ["Overlapping mean regional depth profiles"] - sections += ["Mean regional annual cycle"] - sections += ["Overlapping mean regional annual cycle"] - pages.append(post.HtmlPage("MeanState","Mean State")) - pages[-1].setHeader("CNAME / RNAME / MNAME") - pages[-1].setSections(sections) - pages.append(post.HtmlAllModelsPage("AllModels","All Models")) - pages[-1].setHeader("CNAME / RNAME") - pages[-1].setSections([]) - pages[-1].setRegions(self.regions) - pages.append(post.HtmlPage("DataInformation","Data Information")) - pages[-1].setSections([]) - pages[-1].text = "\n" - with Dataset(self.source) as dset: - for attr in dset.ncattrs(): - pages[-1].text += "

  %s: %s

\n" % (attr,str(dset.getncattr(attr)).encode('ascii','ignore')) - self.layout = post.HtmlLayout(pages,self.longname) - def stageData(self,m): + if 'bounds' in dset.variables[depth_name].ncattrs(): + data = dset.variables[dset.variables[depth_name \ + ].bounds][...] + self.depths = data + self.depths_units = dset.variables[dset.variables[depth_name \ + ].bounds].units + else: + data = dset.variables[depth_name][...] + + self.depths = np.asarray(self.keywords.get("depths_bnds", + [[0., .1], + [.1, .3], + [.3, .5], + [.5, 1.]]), + dtype = float) + self.depths = self.depths[(self.depths[:,1]>=data.min() + )*(self.depths[:,0]<=data.max()), :] + self.depths_units = dset.variables[depth_name].units + def stageData(self,m): + """ Extract Model data with interpolation to the confrontation + depth.""" mem_slab = self.keywords.get("mem_slab",100000.) # Mb # peak at the reference dataset without reading much into memory @@ -227,7 +145,6 @@ def stageData(self,m): data = None dnum = None for i in range(ns): - v = m.extractTimeSeries(self.variable, alt_vars = self.alternate_vars, expression = self.derived, @@ -269,26 +186,23 @@ def stageData(self,m): lat_bnds = v.lat_bnds, lon_bnds = v.lon_bnds, depth_bnds = v.depth_bnds) - yield obs,mod # if obs is historical, then we yield slabs of both else: - obs_mem *= (mod_tf-mod_t0)/(tf-t0) mod_t0 = max(mod_t0,t0) mod_tf = min(mod_tf,tf) ns = int(np.floor(max(obs_mem,mod_mem)/mem_slab))+1 ns = min(min(max(1,ns),mod_nt),obs_nt) logger.info("[%s][%s] staging data in %d slabs" % (self.name,m.name,ns)) - + # across what times? slab_t = (mod_tf-mod_t0)*np.linspace(0,1,ns+1)+mod_t0 slab_t = np.floor(slab_t / 365)*365 + bnd_months[(np.abs(bnd_months[:,np.newaxis] - (slab_t % 365))).argmin(axis=0)] - + obs_tb = None; mod_tb = None for i in range(ns): - # get reference variable obs = Variable(filename = self.source, variable_name = self.variable, @@ -312,7 +226,7 @@ def stageData(self,m): initial_time = slab_t[i], final_time = slab_t[i+1]).trim(t=[slab_t[i],slab_t[i+1]]).convert(obs.unit) if mod_tb is None: - mod_tb = mod.time_bnds[...] + mod_tb = mod.time_bnds else: if np.allclose(mod_tb[-1],mod.time_bnds[0]): mod.data = mod.data[1:] @@ -321,13 +235,15 @@ def stageData(self,m): assert np.allclose(mod.time_bnds[0,0],mod_tb[-1,1]) mod_tb = mod.time_bnds[...] assert obs.time.size == mod.time.size + yield obs,mod + def confront(self,m): def _addDepth(v): - v.depth = np.asarray([.5]) - v.depth_bnds = np.asarray([[0.,1.]]) + v.depth = np.asarray([.05]) + v.depth_bnds = np.asarray([[0.,.1]]) shp = list(v.data.shape) shp.insert(1,1) v.data.shape = shp @@ -347,595 +263,423 @@ def _addDepth(v): "color":np.asarray([0.5,0.5,0.5]), "complete":0}) - # Combined IOMB and one-layer analysis from "Confrontation.py" + # Get the depth-integrated observation and model data for each slab. obs_timeint = {}; mod_timeint = {} - obs_depth = {}; mod_depth = {} - ocyc = {}; oN = {} - mcyc = {}; mN = {} - for depth in self.depths: - dlbl = "%d" % depth - obs_timeint[dlbl] = [] - mod_timeint[dlbl] = [] - for region in self.regions: - obs_depth[region] = [] - mod_depth[region] = [] - unit = None - max_obs = -1e20 + for dind in range(self.depths.shape[0]): + obs_timeint[dind] = [] + mod_timeint[dind] = [] for obs,mod in self.stageData(m): - # if the data has no depth, we assume it is surface if not obs.layered: obs = _addDepth(obs) if not mod.layered: mod = _addDepth(mod) - max_obs = max(max_obs,obs.data.max()) - - # !!! Here need to add an integration step to perform surface analysis # time bounds for this slab tb = obs.time_bnds[[0,-1],[0,1]].reshape((1,2)) t = np.asarray([tb.mean()]) - # mean lat/lon slices at various depths - for depth in self.depths: - - dlbl = "%d" % depth + # + for dind, z0 in enumerate(self.depths[:, 0]): + zf = self.depths[dind, 1] + z = obs.integrateInDepth(z0 = z0, zf = zf, mean = True).integrateInTime(mean = True) - # !!! the +/- 1 may be incorrect - z = obs.integrateInDepth(z0=depth-1.,zf=depth+1,mean=True).integrateInTime(mean=True) - unit = z.unit - obs_timeint[dlbl].append(Variable(name = "timeint%s" % dlbl, + #YW + print('Staging data ... %.2f-%.2f' % (z0, zf)) + + obs_timeint[dind].append(Variable(name = "sm%.2f-%.2f" % (z0, zf), unit = z.unit, - data = z.data.reshape((1,)+z.data.shape), - time = t, time_bnds = tb, - lat = z.lat, lat_bnds = z.lat_bnds, - lon = z.lon, lon_bnds = z.lon_bnds)) - z = mod - if mod.layered: z = z.integrateInDepth(z0=depth-1.,zf=depth+1,mean=True) - z = z.integrateInTime(mean=True) - mod_timeint[dlbl].append(Variable(name = "timeint%s" % dlbl, + data = z.data.reshape((1,) +z.data.shape), + time = t, time_bnds = tb, + lat = z.lat, lat_bnds = z.lat_bnds, + lon = z.lon, lon_bnds = z.lon_bnds)) + z = mod.integrateInDepth(z0 = z0, zf = zf, mean = True).integrateInTime(mean = True) + mod_timeint[dind].append(Variable(name = "sm%.2f-%.2f" % (z0, zf), unit = z.unit, data = z.data.reshape((1,)+z.data.shape), time = t, time_bnds = tb, lat = z.lat, lat_bnds = z.lat_bnds, lon = z.lon, lon_bnds = z.lon_bnds)) - # mean - for region in self.regions: - z = VariableReduce(obs,region=region,time=tb[0],lon=[-180.,+180.]) - z.time = t; z.time_bnds = tb; z.temporal = True; z.data.shape = (1,)+z.data.shape - obs_depth[region].append(z) - - z = VariableReduce(mod,region=region,time=tb[0],lon=[-180.,+180.]) - z.time = t; z.time_bnds = tb; z.temporal = True; z.data.shape = (1,)+z.data.shape - mod_depth[region].append(z) - - # annual cycle in slabs - for region in self.regions: - z = obs.integrateInSpace(region=region,mean=True) - if region not in ocyc: - ocyc[region] = np.ma.zeros((12,)+z.data.shape[1:]) - oN [region] = np.ma.zeros((12,)+z.data.shape[1:],dtype=int) - i = (np.abs(mid_months[:,np.newaxis]-(z.time % 365))).argmin(axis=0) - (ocyc[region])[i,...] += z.data - (oN [region])[i,...] += 1 - - z = mod.integrateInSpace(region=region,mean=True) - if region not in mcyc: - mcyc[region] = np.ma.zeros((12,)+z.data.shape[1:]) - mN [region] = np.ma.zeros((12,)+z.data.shape[1:],dtype=int) - i = (np.abs(mid_months[:,np.newaxis]-(z.time % 365))).argmin(axis=0) - (mcyc[region])[i,...] += z.data - (mN [region])[i,...] += 1 - - - # vertical integrated analysis - print(obs) - z = obs.integrateInDepth(z0 = obs.depth[0]-1., zf = obs.depth[-1]+1., mean = True) - obs_layerint = Variable(name = "layerint", unit = z.unit, data = z.data, - time = t, time_bnds = tb, - lat = z.lat, lat_bnds = z.lat_bnds, - lon = z.lon, lon_bnds = z.lon_bnds) - obs_timeint = obs_layerint.integrateInTime(mean = True) - if mod.layered: - z = mod.integrateInDepth(z0 = obs.depth[0]-1., zf = obs.depth[-1]+1., mean = True) - else: - z = mod - mod_layerint = Variable(name = "layerint", unit = z.unit, data = z.data, - time = t, time_bnds = tb, - lat = z.lat, lat_bnds = z.lat_bnds, - lon = z.lon, lon_bnds = z.lon_bnds) - mod_timeint = mod_layerint.integrateInTime(mean = True) - - # Read in some options and run the mean state analysis - mass_weighting = self.keywords.get("mass_weighting",False) - skip_rmse = self.keywords.get("skip_rmse" ,False) - skip_iav = self.keywords.get("skip_iav" ,True ) - skip_cycle = self.keywords.get("skip_cycle" ,False) - if obs_layerint.spatial: - print(m) - il.AnalysisMeanStateSpace(obs_layerint,mod_layerint,dataset = fcm.mod_dset, - regions = self.regions, - benchmark_dataset = fcm.obs_dset, - table_unit = self.table_unit, - plot_unit = self.plot_unit, - space_mean = self.space_mean, - skip_rmse = skip_rmse, - skip_iav = skip_iav, - skip_cycle = skip_cycle, - mass_weighting = mass_weighting, - ref_timeint = obs_timeint, - com_timeint = mod_timeint) - else: - il.AnalysisMeanStateSites(obs_layerint,mod_layerint,dataset = fcm.mod_dset, - regions = self.regions, - benchmark_dataset = fcm.obs_dset, - table_unit = self.table_unit, - plot_unit = self.plot_unit, - space_mean = self.space_mean, - skip_rmse = skip_rmse, - skip_iav = skip_iav, - skip_cycle = skip_cycle, - mass_weighting = mass_weighting) - - # combine time slabs from the different depths - large_bias = float(self.keywords.get("large_bias",0.1*max_obs)) - - for dlbl in obs_timeint.keys(): - - # period means and bias - obs_tmp = il.CombineVariables(obs_timeint[dlbl]).integrateInTime(mean=True) - mod_tmp = il.CombineVariables(mod_timeint[dlbl]).integrateInTime(mean=True) + # Read in some options and run the mean state analysis + mass_weighting = self.keywords.get("mass_weighting",False) + skip_rmse = self.keywords.get("skip_rmse" ,False) + skip_iav = self.keywords.get("skip_iav" ,True ) + skip_cycle = self.keywords.get("skip_cycle" ,False) + rmse_score_basis = self.keywords.get("rmse_score_basis","cycle") + + for dind in range(self.depths.shape[0]): + obs_tmp = il.CombineVariables(obs_timeint[dind]) + mod_tmp = il.CombineVariables(mod_timeint[dind]) + print(obs_tmp.name) + print(mod_tmp.name) obs_tmp.name = obs_tmp.name.split("_")[0] mod_tmp.name = mod_tmp.name.split("_")[0] - bias = obs_tmp.spatialDifference(mod_tmp) - bias.name = mod_tmp.name.replace("timeint","bias") - mod_tmp.toNetCDF4(fcm.mod_dset,group="MeanState") - bias.toNetCDF4(fcm.mod_dset,group="MeanState") - bias_score = None - if dlbl == "0": - with np.errstate(all="ignore"): - bias_score = Variable(name = bias.name.replace("bias","biasscore"), - data = np.exp(-np.abs(bias.data)/large_bias), - unit = "1", - ndata = bias.ndata, - lat = bias.lat, lat_bnds = bias.lat_bnds, - lon = bias.lon, lon_bnds = bias.lon_bnds, - area = bias.area) - bias_score.toNetCDF4(fcm.mod_dset,group="MeanState") - - for region in self.regions: - - sval = mod_tmp.integrateInSpace(region=region,mean=True) - sval.name = "Period Mean at %s %s" % (dlbl,region) - sval.toNetCDF4(fcm.mod_dset,group="MeanState") - - sval = bias.integrateInSpace(region=region,mean=True) - sval.name = "Bias at %s %s" % (dlbl,region) - sval.toNetCDF4(fcm.mod_dset,group="MeanState") - - if bias_score is not None: - sval = bias_score.integrateInSpace(region=region,mean=True) - sval.name = "Bias Score at %s %s" % (dlbl,region) - sval.toNetCDF4(fcm.mod_dset,group="MeanState") - - if self.master: - obs_tmp.toNetCDF4(fcm.obs_dset,group="MeanState") - for region in self.regions: - sval = obs_tmp.integrateInSpace(region=region,mean=True) - sval.name = "Period Mean at %s %s" % (dlbl,region) - sval.toNetCDF4(fcm.obs_dset,group="MeanState") - - # combine depth/lat slabs for different regions - for region in self.regions: - mod_tmp = il.CombineVariables(mod_depth[region]).integrateInTime(mean=True) - mod_tmp.name = "timelonint_of_%s_over_%s" % (self.variable,region) - mod_tmp.toNetCDF4(fcm.mod_dset,group="MeanState") - obs_tmp = il.CombineVariables(obs_depth[region]).integrateInTime(mean=True) - obs_tmp.name = "timelonint_of_%s_over_%s" % (self.variable,region) - mod_bias = TimeLatBias(obs_tmp,mod_tmp) - mod_bias.toNetCDF4(fcm.mod_dset,group="MeanState") - np.seterr(over='ignore',under='ignore') - ocyc[region] = ocyc[region]/(oN[region].clip(1)) - mcyc[region] = mcyc[region]/(mN[region].clip(1)) - - np.seterr(over='raise',under='raise') - mcyc[region] = Variable(name = "cycle_of_%s_over_%s" % (self.variable,region), - unit = mod.unit, - data = mcyc[region], - depth = mod.depth, - depth_bnds = mod.depth_bnds, - time = mid_months) - ocyc[region] = Variable(name = "cycle_of_%s_over_%s" % (self.variable,region), - unit = obs.unit, - data = ocyc[region], - depth = obs.depth, - depth_bnds = obs.depth_bnds, - time = mid_months) - cyc_bias = CycleBias(ocyc[region],mcyc[region]) - cyc_bias .toNetCDF4(fcm.mod_dset,group="MeanState") - mcyc[region].toNetCDF4(fcm.mod_dset,group="MeanState") - if self.master: - obs_tmp .toNetCDF4(fcm.obs_dset,group="MeanState") - ocyc[region].toNetCDF4(fcm.obs_dset,group="MeanState") + if obs_tmp.spatial: + il.AnalysisMeanStateSpace(obs_tmp,mod_tmp,dataset = fcm.mod_dset, + regions = self.regions, + benchmark_dataset = fcm.obs_dset, + table_unit = self.table_unit, + plot_unit = self.plot_unit, + space_mean = self.space_mean, + skip_rmse = skip_rmse, + skip_iav = skip_iav, + skip_cycle = skip_cycle, + mass_weighting = mass_weighting, + rmse_score_basis = rmse_score_basis) + else: + il.AnalysisMeanStateSites(obs_tmp,mod_tmp,dataset = fcm.mod_dset, + regions = self.regions, + benchmark_dataset = fcm.obs_dset, + table_unit = self.table_unit, + plot_unit = self.plot_unit, + space_mean = self.space_mean, + skip_rmse = skip_rmse, + skip_iav = skip_iav, + skip_cycle = skip_cycle, + mass_weighting = mass_weighting) fcm.mod_dset.setncattr("complete",1) if self.master: fcm.obs_dset.setncattr("complete",1) + logger.info("[%s][%s] Success" % (self.longname,m.name)) - def modelPlots(self,m): - def _fheight(region): - if region in ["arctic","southern"]: return 6.8 - return 2.8 + def computeOverallScore(self,m): + """Computes the overall composite score for a given model. - bname = "%s/%s_Benchmark.nc" % (self.output_path,self.name) - fname = "%s/%s_%s.nc" % (self.output_path,self.name,m.name) - if not os.path.isfile(bname): return + This routine opens the netCDF results file associated with + this confrontation-model pair, and then looks for a "scalars" + group in the dataset as well as any subgroups that may be + present. For each grouping of scalars, it will blend any value + with the word "Score" in the name to render an overall score, + overwriting the existing value if present. + + Parameters + ---------- + m : ILAMB.ModelResult.ModelResult + the model results + + """ + + def _computeOverallScore(scalars): + """Given a netCDF4 group of scalars, blend them into an overall score""" + scores = {} + variables = [v for v in scalars.variables.keys() if "Score" in v and "Overall" not in v] + for region in self.regions: + overall_score = 0. + sum_of_weights = 0. + for v in variables: + if region not in v: continue + score = v.replace(region,"").strip() + weight = 1. + if score in self.weight: weight = self.weight[score] + overall_score += weight*scalars.variables[v][...] + sum_of_weights += weight + overall_score /= max(sum_of_weights,1e-12) + scores["Overall Score %s" % region] = overall_score + return scores + + fname = os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)) if not os.path.isfile(fname): return + with Dataset(fname,mode="r+") as dataset: + datasets = [dataset.groups[grp] for grp in dataset.groups if "scalars" not in grp] + groups = [grp for grp in dataset.groups if "scalars" not in grp] + datasets.append(dataset) + groups .append(None) + for dset,grp in zip(datasets,groups): + if "scalars" in dset.groups: + scalars = dset.groups["scalars"] + score = _computeOverallScore(scalars) + for key in score.keys(): + if key in scalars.variables: + scalars.variables[key][0] = score[key] + else: + Variable(data=score[key],name=key,unit="1").toNetCDF4(dataset,group=grp) + + + def compositePlots(self): + """Renders plots which display information of all models. + + This routine renders plots which contain information from all + models. Thus only the master process will run this routine, + and only after all analysis has finished. + + """ + if not self.master: return - # get the HTML page and set table priorities + # get the HTML page page = [page for page in self.layout.pages if "MeanState" in page.name][0] - page.priority = [" %d " % d for d in self.depths] - page.priority += ["Period","Bias"] - page.priority += ["Score","Overall"] - - # model plots - cmap = { "timeint" : self.cmap, - "bias" : "seismic", - "biasscore" : "score" } - plbl = { "timeint" : "MEAN", - "bias" : "BIAS", - "biasscore" : "BIAS SCORE" } - with Dataset(fname) as dataset: - group = dataset.groups["MeanState"] - variables = getVariableList(group) - color = dataset.getncattr("color") - for ptype in ["timeint","bias","biasscore"]: - for vname in [v for v in variables if ptype in v]: - var = Variable(filename=fname,variable_name=vname,groupname="MeanState") - try: - z = int(vname.replace(ptype,"")) - except: - continue - page.addFigure("Period Mean at %d [m]" % z, - vname, - "MNAME_RNAME_%s.png" % vname, - side = "MODEL %s AT %d [m]" % (plbl[ptype],z), - legend = True) - for region in self.regions: - ax = var.plot(None, - region = region, - vmin = self.limits[vname]["min"], - vmax = self.limits[vname]["max"], - cmap = cmap[ptype], - land = 0.750, - water = 0.875) - fig = ax.get_figure() - fig.savefig("%s/%s_%s_%s.png" % (self.output_path,m.name,region,vname)) - plt.close() - for region in self.regions: + models = [] + colors = [] + corr = {} + std = {} + cycle = {} + has_cycle = False + has_std = False + for fname in glob.glob(os.path.join(self.output_path,"*.nc")): + dataset = Dataset(fname) + if "MeanState" not in dataset.groups: continue + dset = dataset.groups["MeanState"] + models.append(dataset.getncattr("name")) + colors.append(dataset.getncattr("color")) + for region in self.regions: + if region not in cycle: cycle[region] = {} + if region not in std: std[region] = {} + if region not in corr: corr[region] = {} + + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind,1] + zstr = '%.2f-%.2f' % (z0, zf) + + if zstr not in cycle[region]: cycle[region][zstr] = [] + + key = [v for v in dset.variables.keys() if ("cycle_" in v and zstr in v and region in v)] + if len(key)>0: + has_cycle = True + cycle[region][zstr].append(Variable(filename=fname,groupname="MeanState", + variable_name=key[0])) + + if zstr not in std[region]: std[region][zstr] = [] + if zstr not in corr[region]: corr[region][zstr] = [] + + key = [] + if "scalars" in dset.groups: + key = [v for v in dset.groups["scalars"].variables.keys() \ + if ("Spatial Distribution Score" in v and zstr in v and region in v)] + if len(key) > 0: + has_std = True + sds = dset.groups["scalars"].variables[key[0]] + corr[region][zstr].append(sds.getncattr("R" )) + std [region][zstr].append(sds.getncattr("std")) + + # composite annual cycle plot + if has_cycle and len(models) > 2: + page.addFigure("Spatially integrated regional mean", + "compcycle", + "RNAME_compcycle.png", + side = "ANNUAL CYCLE", + legend = False) - vname = "timelonint_of_%s_over_%s" % (self.variable,region) - if vname in variables: - var0 = Variable(filename=bname,variable_name=vname,groupname="MeanState") - var = Variable(filename=fname,variable_name=vname,groupname="MeanState") - bias = Variable(filename=fname,variable_name=vname.replace("timelonint","timelonbias"),groupname="MeanState") - if region == "global": - page.addFigure("Mean regional depth profiles", - "timelonint", - "MNAME_RNAME_timelonint.png", - side = "MODEL DEPTH PROFILE", - legend = True, - longname = "Time/longitude averaged profile") - page.addFigure("Overlapping mean regional depth profiles", - "timelonints", - "MNAME_RNAME_timelonints.png", - side = "MODEL DEPTH PROFILE", - legend = True, - longname = "Overlapping Time/longitude averaged profile") - page.addFigure("Overlapping mean regional depth profiles", - "timelonbias", - "MNAME_RNAME_timelonbias.png", - side = "MODEL DEPTH PROFILE BIAS", - legend = True, - longname = "Overlapping Time/longitude averaged profile bias") - fig,ax = plt.subplots(figsize=(6.8,2.8),tight_layout=True) - l = np.hstack([var .lat_bnds [:,0],var .lat_bnds [-1,1]]) - d0 = np.hstack([var0.depth_bnds[:,0],var0.depth_bnds[-1,1]]) - d = np.hstack([var .depth_bnds[:,0],var .depth_bnds[-1,1]]) - ind = np.all(var.data.mask,axis=0) - ind = np.ma.masked_array(range(ind.size),mask=ind,dtype=int) - b = ind.min() - e = ind.max()+1 - ax.pcolormesh(l[b:(e+1)],d,var.data[:,b:e], - vmin = self.limits["timelonint"]["global"]["min"], - vmax = self.limits["timelonint"]["global"]["max"], - cmap = self.cmap) - ax.set_xlabel("latitude") - ax.set_ylim((d.max(),d.min())) - ax.set_ylabel("depth [m]") - fig.savefig("%s/%s_%s_timelonint.png" % (self.output_path,m.name,region)) - ax.set_ylim((min(d0.max(),d.max()),max(d0.min(),d.min()))) - fig.savefig("%s/%s_%s_timelonints.png" % (self.output_path,m.name,region)) - plt.close() - fig,ax = plt.subplots(figsize=(6.8,2.8),tight_layout=True) - l = np.hstack([bias.lat_bnds [:,0],bias.lat_bnds [-1,1]]) - d0 = np.hstack([var0.depth_bnds[:,0],var0.depth_bnds[-1,1]]) - d = np.hstack([bias.depth_bnds[:,0],bias.depth_bnds[-1,1]]) - ind = np.all(bias.data.mask,axis=0) - ind = np.ma.masked_array(range(ind.size),mask=ind,dtype=int) - b = ind.min() - e = ind.max()+1 - ax.pcolormesh(l[b:(e+1)],d,bias.data[:,b:e], - vmin = self.limits["timelonbias"]["global"]["min"], - vmax = self.limits["timelonbias"]["global"]["max"], - cmap = "seismic") - ax.set_xlabel("latitude") - ax.set_ylim((d.max(),d.min())) - ax.set_ylabel("depth [m]") - ax.set_ylim((min(d0.max(),d.max()),max(d0.min(),d.min()))) - fig.savefig("%s/%s_%s_timelonbias.png" % (self.output_path,m.name,region)) + for region in self.regions: + if region not in cycle: continue + fig, axes = plt.subplots(self.depths.shape[0], 1, + figsize=(6.8,2.8 * self.depths.shape[0]), + tight_layout=True) + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind, 1] + zstr = '%.2f-%.2f' % (z0, zf) + + ax = axes[dind] + for name,color,var in zip(models,colors,cycle[region][zstr]): + dy = 0.05*(self.limits["cycle"][region]["max"]-self.limits["cycle"][region]["min"]) + var.plot(ax, lw=2, color=color, label=name, + ticks = time_opts["cycle"]["ticks"], + ticklabels = time_opts["cycle"]["ticklabels"], + vmin = self.limits["cycle"][region]["min"]-dy, + vmax = self.limits["cycle"][region]["max"]+dy) + ylbl = time_opts["cycle"]["ylabel"] + if ylbl == "unit": ylbl = post.UnitStringToMatplotlib(var.unit) + ylbl = ylbl + ' ' + zstr + self.depths_units + ax.set_ylabel(ylbl) + fig.savefig(os.path.join(self.output_path,"%s_compcycle.png" % (region))) + plt.close() + + # plot legends with model colors (sorted with Benchmark data on top) + page.addFigure("Spatially integrated regional mean", + "legend_compcycle", + "legend_compcycle.png", + side = "MODEL COLORS", + legend = False) + def _alphabeticalBenchmarkFirst(key): + key = key[0].lower() + if key == "BENCHMARK": return "A" + return key + tmp = sorted(zip(models,colors),key=_alphabeticalBenchmarkFirst) + fig,ax = plt.subplots() + for model,color in tmp: + ax.plot(0,0,'o',mew=0,ms=8,color=color,label=model) + handles,labels = ax.get_legend_handles_labels() + plt.close() + + ncol = np.ceil(float(len(models))/11.).astype(int) + if ncol > 0: + fig,ax = plt.subplots(figsize=(3.*ncol,2.8),tight_layout=True) + ax.legend(handles,labels,loc="upper right",ncol=ncol,fontsize=10,numpoints=1) + ax.axis(False) + fig.savefig(os.path.join(self.output_path,"legend_compcycle.png")) + fig.savefig(os.path.join(self.output_path,"legend_spatial_variance.png")) + fig.savefig(os.path.join(self.output_path,"legend_temporal_variance.png")) + plt.close() + + # spatial distribution Taylor plot + if has_std: + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind, 1] + zstr = '%.2f-%.2f' % (z0, zf) + page.addFigure("Temporally integrated period mean", + "spatial_variance", + "RNAME_spatial_variance_" + zstr + ".png", + side = "SPATIAL TAYLOR DIAGRAM", + legend = False) + page.addFigure("Temporally integrated period mean", + "legend_spatial_variance", + "legend_spatial_variance.png", + side = "MODEL COLORS", + legend = False) + if "Benchmark" in models: colors.pop(models.index("Benchmark")) + for region in self.regions: + if not (region in std and region in corr): continue + + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind, 1] + zstr = '%.2f-%.2f' % (z0, zf) + + if not (zstr in std[region] and zstr in corr[region]): continue + if len(std[region][zstr]) != len(corr[region][zstr]): continue + if len(std[region][zstr]) == 0: continue + + fig = plt.figure(figsize=(6.0,6.0)) + post.TaylorDiagram(np.asarray(std[region][zstr]), + np.asarray(corr[region][zstr]), + 1.0,fig,colors) + fig.savefig(os.path.join(self.output_path, + "%s_spatial_variance_%s.png" % (region, zstr))) plt.close() + def modelPlots(self,m): + """For a given model, create the plots of the analysis results. - vname = "cycle_of_%s_over_%s" % (self.variable,region) - if vname in variables: - var0 = Variable(filename=bname,variable_name=vname,groupname="MeanState") - var = Variable(filename=fname,variable_name=vname,groupname="MeanState") - bias = Variable(filename=fname,variable_name=vname.replace("cycle","cyclebias"),groupname="MeanState") - if region == "global": - page.addFigure("Mean regional annual cycle", - "cycle", - "MNAME_RNAME_cycle.png", - side = "MODEL ANNUAL CYCLE", - legend = True, - longname = "Annual cycle") - page.addFigure("Overlapping mean regional annual cycle", - "cycles", - "MNAME_RNAME_cycles.png", - side = "MODEL ANNUAL CYCLE", - legend = True, - longname = "Overlapping annual cycle") - page.addFigure("Overlapping mean regional annual cycle", - "cyclebias", - "MNAME_RNAME_cyclebias.png", - side = "MODEL ANNUAL CYCLE BIAS", - legend = True, - longname = "Overlapping annual cycle bias") - fig,ax = plt.subplots(figsize=(6.8,2.8),tight_layout=True) - d0 = np.hstack([var0.depth_bnds[:,0],var0.depth_bnds[-1,1]]) - d = np.hstack([var .depth_bnds[:,0],var .depth_bnds[-1,1]]) - ax.pcolormesh(bnd_months,d,var.data.T, - vmin = self.limits["cycle"]["global"]["min"], - vmax = self.limits["cycle"]["global"]["max"], - cmap = self.cmap) - ax.set_xticks (mid_months) - ax.set_xticklabels(lbl_months) - ax.set_ylim((d.max(),d.min())) - ax.set_ylabel("depth [m]") - fig.savefig("%s/%s_%s_cycle.png" % (self.output_path,m.name,region)) - ax.set_ylim((min(d0.max(),d.max()),max(d0.min(),d.min()))) - fig.savefig("%s/%s_%s_cycles.png" % (self.output_path,m.name,region)) - plt.close() - fig,ax = plt.subplots(figsize=(6.8,2.8),tight_layout=True) - ax.pcolormesh(bnd_months, - np.hstack([bias.depth_bnds[:,0],bias.depth_bnds[-1,1]]), - bias.data.T, - vmin = self.limits["cyclebias"]["global"]["min"], - vmax = self.limits["cyclebias"]["global"]["max"], - cmap = "seismic") - ax.set_xticks (mid_months) - ax.set_xticklabels(lbl_months) - ax.set_ylim((d.max(),d.min())) - ax.set_ylabel("depth [m]") - ax.set_ylim((min(d0.max(),d.max()),max(d0.min(),d.min()))) - fig.savefig("%s/%s_%s_cyclebias.png" % (self.output_path,m.name,region)) - plt.close() + This routine will extract plotting information out of the + netCDF file which results from the analysis and create + plots. Note that determinePlotLimits should be called before + this routine. + """ + bname = os.path.join(self.output_path,"%s_Benchmark.nc" % (self.name )) + fname = os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)) + if not os.path.isfile(bname): return + if not os.path.isfile(fname): return - # benchmark plots - if not self.master: return - with Dataset(bname) as dataset: + # get the HTML page + page = [page for page in self.layout.pages if "MeanState" in page.name][0] + + with Dataset(fname) as dataset: group = dataset.groups["MeanState"] variables = getVariableList(group) color = dataset.getncattr("color") - for ptype in ["timeint"]: - for vname in [v for v in variables if ptype in v]: - var = Variable(filename=bname,variable_name=vname,groupname="MeanState") - z = int(vname.replace(ptype,"")) - page.addFigure("Period Mean at %d [m]" % z, - "benchmark_%s" % vname, - "Benchmark_RNAME_%s.png" % vname, - side = "BENCHMARK %s AT %d [m]" % (plbl[ptype],z), - legend = True) + for vname in variables: + # The other depths will be handled in plotting + zstr_0 = '%.2f-%.2f' % (self.depths[0,0], self.depths[0,1]) + if not zstr_0 in vname: continue + + # is this a variable we need to plot? + pname = vname.split("_")[0] + if group.variables[vname][...].size <= 1: continue + var = Variable(filename=fname,groupname="MeanState",variable_name=vname) + + if (var.spatial or (var.ndata is not None)) and not var.temporal: + + # grab plotting options + if pname not in self.limits.keys(): continue + if pname not in space_opts: continue + opts = space_opts[pname] + + # add to html layout + page.addFigure(opts["section"], + pname, + opts["pattern"], + side = opts["sidelbl"], + legend = opts["haslegend"]) + + # plot variable for region in self.regions: - ax = var.plot(None, - region = region, - vmin = self.limits[vname]["min"], - vmax = self.limits[vname]["max"], - cmap = cmap[ptype], - land = 0.750, - water = 0.875) - fig = ax.get_figure() - fig.savefig("%s/Benchmark_%s_%s.png" % (self.output_path,region,vname)) + fig, axes = plt.subplots(self.depths.shape[0], 1, + figsize = (6.5, 2.8*self.depths.shape[0])) + + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind,1] + zstr = '%.2f-%.2f' % (z0, zf) + ax = axes.flat[dind] + var2 = Variable(filename=fname, groupname = "MeanState", + variable_name=vname.replace(zstr_0, zstr)) + var2.plot(ax, region = region, + vmin = self.limits[pname]["min"], + vmax = self.limits[pname]["max"], + cmap = self.limits[pname]["cmap"]) + fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (m.name,region,pname))) plt.close() - for region in self.regions: - - vname = "timelonint_of_%s_over_%s" % (self.variable,region) - if vname in variables: - var0 = Variable(filename=fname,variable_name=vname,groupname="MeanState") - var = Variable(filename=bname,variable_name=vname,groupname="MeanState") - if region == "global": - page.addFigure("Mean regional depth profiles", - "benchmark_timelonint", - "Benchmark_RNAME_timelonint.png", - side = "BENCHMARK DEPTH PROFILE", - legend = True, - longname = "Time/longitude averaged profile") - page.addFigure("Overlapping mean regional depth profiles", - "benchmark_timelonints", - "Benchmark_RNAME_timelonints.png", - side = "BENCHMARK DEPTH PROFILE", - legend = True, - longname = "Overlapping Time/longitude averaged profile") - fig,ax = plt.subplots(figsize=(6.8,2.8),tight_layout=True) - l = np.hstack([var .lat_bnds [:,0],var .lat_bnds [-1,1]]) - d0 = np.hstack([var0.depth_bnds[:,0],var0.depth_bnds[-1,1]]) - d = np.hstack([var .depth_bnds[:,0],var .depth_bnds[-1,1]]) - ind = np.all(var.data.mask,axis=0) - ind = np.ma.masked_array(range(ind.size),mask=ind,dtype=int) - b = ind.min() - e = ind.max()+1 - ax.pcolormesh(l[b:(e+1)],d,var.data[:,b:e], - vmin = self.limits["timelonint"]["global"]["min"], - vmax = self.limits["timelonint"]["global"]["max"], - cmap = self.cmap) - ax.set_xlabel("latitude") - ax.set_ylim((d.max(),d.min())) - ax.set_ylabel("depth [m]") - fig.savefig("%s/Benchmark_%s_timelonint.png" % (self.output_path,region)) - ax.set_ylim((min(d0.max(),d.max()),max(d0.min(),d.min()))) - fig.savefig("%s/Benchmark_%s_timelonints.png" % (self.output_path,region)) - plt.close() - - vname = "cycle_of_%s_over_%s" % (self.variable,region) - if vname in variables: - var0 = Variable(filename=bname,variable_name=vname,groupname="MeanState") - var = Variable(filename=fname,variable_name=vname,groupname="MeanState") - if region == "global": - page.addFigure("Mean regional annual cycle", - "benchmark_cycle", - "Benchmark_RNAME_cycle.png", - side = "BENCHMARK ANNUAL CYCLE", - legend = True, - longname = "Annual cycle") - page.addFigure("Overlapping mean regional annual cycle", - "benchmark_cycles", - "Benchmark_RNAME_cycles.png", - side = "BENCHMARK ANNUAL CYCLE", - legend = True, - longname = "Overlapping annual cycle") - fig,ax = plt.subplots(figsize=(6.8,2.8),tight_layout=True) - d = np.hstack([var0.depth_bnds[:,0],var0.depth_bnds[-1,1]]) - d0 = np.hstack([var .depth_bnds[:,0],var .depth_bnds[-1,1]]) - ax.pcolormesh(bnd_months,d,var0.data.T, - vmin = self.limits["cycle"]["global"]["min"], - vmax = self.limits["cycle"]["global"]["max"], - cmap = self.cmap) - ax.set_xticks (mid_months) - ax.set_xticklabels(lbl_months) - ax.set_ylim((d.max(),d.min())) - ax.set_ylabel("depth [m]") - fig.savefig("%s/%s_%s_cycle.png" % (self.output_path,"Benchmark",region)) - ax.set_ylim((min(d0.max(),d.max()),max(d0.min(),d.min()))) - fig.savefig("%s/%s_%s_cycles.png" % (self.output_path,"Benchmark",region)) - plt.close() - - def determinePlotLimits(self): - - # Pick limit type - max_str = "up99"; min_str = "dn99" - if self.keywords.get("limit_type","99per") == "minmax": - max_str = "max"; min_str = "min" - - # Determine the min/max of variables over all models - limits = {} - for fname in glob.glob("%s/*.nc" % self.output_path): - with Dataset(fname) as dataset: - if "MeanState" not in dataset.groups: continue - group = dataset.groups["MeanState"] - variables = [v for v in group.variables.keys() if (v not in group.dimensions.keys() and - "_bnds" not in v and - group.variables[v][...].size > 1)] - for vname in variables: - var = group.variables[vname] - pname = vname.split("_")[ 0] - if "_score" in vname: - pname = "_".join(vname.split("_")[:2]) - if "_over_" in vname: - region = vname.split("_over_")[-1] - if pname not in limits: limits[pname] = {} - if region not in limits[pname]: - limits[pname][region] = {} - limits[pname][region]["min"] = +1e20 - limits[pname][region]["max"] = -1e20 - limits[pname][region]["unit"] = post.UnitStringToMatplotlib(var.getncattr("units")) - limits[pname][region]["min"] = min(limits[pname][region]["min"],var.getncattr("min")) - limits[pname][region]["max"] = max(limits[pname][region]["max"],var.getncattr("max")) - else: - if pname not in limits: - limits[pname] = {} - limits[pname]["min"] = +1e20 - limits[pname]["max"] = -1e20 - limits[pname]["unit"] = post.UnitStringToMatplotlib(var.getncattr("units")) - limits[pname]["min"] = min(limits[pname]["min"],var.getncattr(min_str)) - limits[pname]["max"] = max(limits[pname]["max"],var.getncattr(max_str)) - - # Another pass to fix score limits - for pname in limits.keys(): - if "score" in pname: - if "min" in limits[pname].keys(): - limits[pname]["min"] = 0. - limits[pname]["max"] = 1. - else: - for region in limits[pname].keys(): - limits[pname][region]["min"] = 0. - limits[pname][region]["max"] = 1. - self.limits = limits - - # Second pass to plot legends - cmaps = {"bias" :"seismic", - "timelonbias":"seismic", - "cyclebias" :"seismic", - "rmse" :"YlOrRd"} - for pname in limits.keys(): - - base_pname = pname - m = re.search("(\D+)\d+",pname) - if m: base_pname = m.group(1) - - # Pick colormap - cmap = self.cmap - if base_pname in cmaps: - cmap = cmaps[base_pname] - elif "score" in pname: - cmap = "score" - - # Need to symetrize? - if base_pname in ["bias","timelonbias","cyclebias"]: - if "min" in limits[pname]: - vabs = max(abs(limits[pname]["max"]),abs(limits[pname]["min"])) - limits[pname]["min"] = -vabs - limits[pname]["max"] = vabs - else: - vabs = max(abs(limits[pname]["global"]["max"]),abs(limits[pname]["global"]["min"])) - limits[pname]["global"]["min"] = -vabs - limits[pname]["global"]["max"] = vabs - - # Some plots need legends - if base_pname in ["timeint","bias","biasscore","rmse","rmsescore","timelonint","timelonbias","cycle","cyclebias"]: - if "min" in limits[pname]: - fig,ax = plt.subplots(figsize=(6.8,1.0),tight_layout=True) - post.ColorBar(ax, - vmin = limits[pname]["min" ], - vmax = limits[pname]["max" ], - label = limits[pname]["unit"], - cmap = cmap) - fig.savefig("%s/legend_%s.png" % (self.output_path,pname)) - if base_pname == "timelonint" or base_pname == "cycle": - fig.savefig("%s/legend_%ss.png" % (self.output_path,pname)) - plt.close() - else: - fig,ax = plt.subplots(figsize=(6.8,1.0),tight_layout=True) - post.ColorBar(ax, - vmin = limits[pname]["global"]["min" ], - vmax = limits[pname]["global"]["max" ], - label = limits[pname]["global"]["unit"], - cmap = cmap) - fig.savefig("%s/legend_%s.png" % (self.output_path,pname)) - if base_pname == "timelonint" or base_pname == "cycle": - fig.savefig("%s/legend_%ss.png" % (self.output_path,pname)) - plt.close() - - def compositePlots(self): - pass + # Jumping through hoops to get the benchmark plotted and in the html output + if self.master and (pname == "timeint" or pname == "phase" or pname == "iav"): + opts = space_opts[pname] + + # add to html layout + page.addFigure(opts["section"], + "benchmark_%s" % pname, + opts["pattern"].replace("MNAME","Benchmark"), + side = opts["sidelbl"].replace("MODEL","BENCHMARK"), + legend = True) + + # plot variable + for region in self.regions: + fig, axes = plt.subplots(self.depths.shape[0], 1, + figsize = (6.5, 2.8*self.depths.shape[0])) + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind,1] + zstr = '%.2f-%.2f' % (z0, zf) + ax = axes.flat[dind] + obs = Variable(filename=bname,groupname="MeanState", + variable_name=vname.replace(zstr_0, zstr)) + obs.plot(ax, region = region, + vmin = self.limits[pname]["min"], + vmax = self.limits[pname]["max"], + cmap = self.limits[pname]["cmap"]) + fig.savefig(os.path.join(self.output_path,"Benchmark_%s_%s.png" % (region,pname))) + plt.close() + + if not (var.spatial or (var.ndata is not None)) and var.temporal: + # grab the benchmark dataset to plot along with + try: + obs = Variable(filename=bname,groupname="MeanState", + variable_name=vname).convert(var.unit) + except: + continue + + # grab plotting options + if pname not in time_opts: continue + opts = time_opts[pname] + + # add to html layout + page.addFigure(opts["section"], + pname, + opts["pattern"], + side = opts["sidelbl"], + legend = opts["haslegend"]) + + # plot variable + for region in self.regions: + if region not in vname: continue + fig, axes = plt.subplots(self.depths.shape[0], 1, + figsize = (6.5, 2.8*self.depths.shape[0])) + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind,1] + zstr = '%.2f-%.2f' % (z0, zf) + ax = axes.flat[dind] + + var2 = Variable(filename=fname, groupname = "MeanState", + variable_name=vname.replace(zstr_0, zstr)) + obs = Variable(filename=bname,groupname="MeanState", + variable_name=vname.replace(zstr_0, zstr)).convert(var2.unit) + obs.plot(ax, lw = 2, color = 'k', alpha = 0.5) + var2.plot(ax, lw = 2, color = color, label = m.name, + ticks =opts["ticks"], + ticklabels=opts["ticklabels"]) + dy = 0.05*(self.limits[pname][region]["max"]-self.limits[pname][region]["min"]) + ax.set_ylim(self.limits[pname][region]["min"]-dy, + self.limits[pname][region]["max"]+dy) + ylbl = opts["ylabel"] + if ylbl == "unit": ylbl = post.UnitStringToMatplotlib(var.unit) + ax.set_ylabel(ylbl) + fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (m.name,region,pname))) + plt.close() + + logger.info("[%s][%s] Success" % (self.longname,m.name)) diff --git a/src/ILAMB/ModelResult.py b/src/ILAMB/ModelResult.py index 1b30f5a4..3e024dbd 100644 --- a/src/ILAMB/ModelResult.py +++ b/src/ILAMB/ModelResult.py @@ -269,6 +269,7 @@ def extractTimeSeries(self,variable,lats=None,lons=None,alt_vars=[],initial_time convert_calendar = convert_calendar, t0 = initial_time - self.shift, tf = final_time - self.shift) + if var.time is None: continue tmin = min(tmin,var.time_bnds.min()) tmax = max(tmax,var.time_bnds.max()) diff --git a/src/ILAMB/Variable.py b/src/ILAMB/Variable.py index 1bad6a8d..c37da677 100644 --- a/src/ILAMB/Variable.py +++ b/src/ILAMB/Variable.py @@ -440,6 +440,7 @@ def integrateInDepth(self,**keywords): # find which time bounds are included even partially in the interval [z0,zf] depth_bnds = np.copy(self.depth_bnds) + ind = np.where((z0depth_bnds[:,0]))[0] depth_bnds[(z0>depth_bnds[:,0])*(z0depth_bnds[:,0])*(zf Date: Wed, 9 Jun 2021 19:19:36 -0400 Subject: [PATCH 04/18] working now for one model --- bin/ilamb-run | 34 +-- src/ILAMB/ConfSoilMoisture.py | 479 +++++++++++++++++----------------- src/ILAMB/Confrontation.py | 1 + src/ILAMB/Post.py | 18 +- src/ILAMB/Variable.py | 25 +- 5 files changed, 283 insertions(+), 274 deletions(-) diff --git a/bin/ilamb-run b/bin/ilamb-run index 8359963b..73dddf9a 100644 --- a/bin/ilamb-run +++ b/bin/ilamb-run @@ -380,23 +380,23 @@ def WorkConfront(W,verbose=False,clean=False): try: t0 = time.time() - # YW - print(m) - print(m.path) - print(m.color) - print(m.filter) - print(m.regex) - print(m.shift) - print(m.name) - print(m.confrontations) - print(m.cell_areas) - print(m.land_fraction) - print(m.land_areas) - print(m.land_area) - print(m.variables) - print(m.names) - print(m.extents) - print(m.paths) + ## YW + #print(m) + #print(m.path) + #print(m.color) + #print(m.filter) + #print(m.regex) + #print(m.shift) + #print(m.name) + #print(m.confrontations) + #print(m.cell_areas) + #print(m.land_fraction) + #print(m.land_areas) + #print(m.land_area) + #print(m.variables) + #print(m.names) + #print(m.extents) + #print(m.paths) c.confront(m) dt = time.time()-t0 diff --git a/src/ILAMB/ConfSoilMoisture.py b/src/ILAMB/ConfSoilMoisture.py index b7c6c1aa..5632c3b8 100644 --- a/src/ILAMB/ConfSoilMoisture.py +++ b/src/ILAMB/ConfSoilMoisture.py @@ -13,6 +13,7 @@ import cftime as cf from .Confrontation import getVariableList from .Confrontation import Confrontation +import numpy as np import logging @@ -39,21 +40,20 @@ def __init__(self,**keywords): # at which we will compute are in the range of depths # of the data depth_name = depth_name[0] + depth_bnd_name = [d for d in dset.variables.keys() \ + if depth_name in d and \ + ("bound" in d or "bnd" in d)] - if 'bounds' in dset.variables[depth_name].ncattrs(): - data = dset.variables[dset.variables[depth_name \ - ].bounds][...] + if len(depth_bnd_name) > 0: + depth_bnd_name = depth_bnd_name[0] + data = dset.variables[depth_bnd_name][...].data self.depths = data - self.depths_units = dset.variables[dset.variables[depth_name \ - ].bounds].units + self.depths_units = dset.variables[depth_bnd_name].units else: data = dset.variables[depth_name][...] self.depths = np.asarray(self.keywords.get("depths_bnds", - [[0., .1], - [.1, .3], - [.3, .5], - [.5, 1.]]), + [[0., .1]]), dtype = float) self.depths = self.depths[(self.depths[:,1]>=data.min() )*(self.depths[:,0]<=data.max()), :] @@ -80,7 +80,19 @@ def stageData(self,m): t0 = obs_cb[0]; tf = obs_cb[1] else: t0 = obs_tb[0,0]; tf = obs_tb[-1,1] - info += " contents span years %.1f to %.1f, est memory %d [Mb]" % (t0/365.+1850,tf/365.+1850,obs_mem) + + dname = [name for name in dset.variables.keys() \ + if name.lower() in ["depth_bnds", "depth_bounds"]] + if len(dname) == 0: + # if there is no depth, assume the data is surface + obs_z0 = 0; obs_zf = 0.1; obs_nd = 0 + else: + dname = dname[0] + obs_z0 = np.min(dset.variables[dname]) + obs_zf = np.max(dset.variables[dname]) + obs_nd = dset.variables[dname].shape[0] + + info += " contents span years %.1f to %.1f and depths %.1f to %.1f, est memory %d [Mb]" % (t0/365.+1850,tf/365.+1850,obs_z0,obs_zf,obs_mem) logger.info("[%s][%s]%s" % (self.name,self.variable,info)) # to peak at the model, we need any variable that could be @@ -97,12 +109,16 @@ def stageData(self,m): # peak at the model dataset without reading much into memory mod_nt = 0 mod_mem = 0. - mod_t0 = 2147483647 + mod_t0 = 2147483647 mod_tf = -2147483648 + mod_z0 = 2147483647 + mod_zf = -2147483648 + mod_nd = 999 for fname in m.variables[vname]: with Dataset(fname) as dset: var = dset.variables[vname] - mod_t,mod_tb,mod_cb,mod_b,mod_e,cal = il.GetTime(var,t0=t0-m.shift,tf=tf-m.shift) + mod_t,mod_tb,mod_cb,mod_b,mod_e,cal = il.GetTime(var,t0=t0-m.shift, + tf=tf-m.shift) if mod_t is None: info += "\n %s does not overlap the reference" % (fname) continue @@ -116,99 +132,79 @@ def stageData(self,m): mod_tb = mod_tb[ind] mod_t0 = min(mod_t0,mod_tb[ 0,0]) mod_tf = max(mod_tf,mod_tb[-1,1]) + + dname = [name for name in dset.variables.keys() \ + if name.lower() in ["depth_bnds", "depth_bounds"]] + if len(dname) == 0: + # if there is no depth, assume the data is surface + z0 = 0; zf = 0.1; mod_nd = 0 + else: + dname = dname[0] + temp = dset.variables[dname][...] + ind = (temp[:,1] > obs_z0)*(temp[:,0] < obs_zf) + if sum(ind) == 0: + info += "\n %s does not overlap the reference" % (fname) + continue + z0 = np.min(temp[ind, :]) + zf = np.max(temp[ind, :]) + mod_nd = min(mod_nd, sum(ind)) + mod_z0 = min(mod_z0,z0) + mod_zf = max(mod_zf,zf) + nt = mod_t.size mod_nt += nt mem = (var.size/var.shape[0]*nt)*8e-6 mod_mem += mem - info += "\n %s spans years %.1f to %.1f, est memory in time bounds %d [Mb]" % (fname,mod_t.min()/365.+1850,mod_t.max()/365.+1850,mem) + info += "\n %s spans years %.1f to %.1f and depths %.1f to %.1f, est memory in time bounds %d [Mb]" % (fname,mod_t.min()/365.+1850,mod_t.max()/365.+1850,mod_z0,mod_zf,mem) info += "\n total est memory = %d [Mb]" % mod_mem logger.info("[%s][%s][%s] reading model data from possibly many files%s" % (self.name,m.name,vname,info)) if mod_t0 > mod_tf: logger.debug("[%s] Could not find [%s] in the model results in the given time frame, tinput = [%.1f,%.1f]" % (self.name,",".join(possible),t0,tf)) raise il.VarNotInModel() - # if the reference is a climatology, then build a model climatology in slabs - info = "" - if climatology: - # how many slabs - ns = int(np.floor(mod_mem/mem_slab))+1 - ns = min(max(1,ns),mod_nt) - logger.info("[%s][%s] building climatology in %d slabs" % (self.name,m.name,ns)) + # yield the results by observational depths + def _addDepth(v): + v.depth = np.asarray([.05]) + v.depth_bnds = np.asarray([[0.,.1]]) + shp = list(v.data.shape) + shp.insert(1,1) + v.data.shape = shp + v.layered = True + return v - # across what times? - slab_t = (mod_tf-mod_t0)*np.linspace(0,1,ns+1)+mod_t0 - slab_t = np.floor(slab_t / 365)*365 + bnd_months[(np.abs(bnd_months[:,np.newaxis] - (slab_t % 365))).argmin(axis=0)] + info = "" + for i in range(self.depths.shape[0]): + z0 = max(self.depths[i,0], obs_z0, mod_z0) + zf = min(self.depths[i,1], obs_zf, mod_zf) + if z0 >= zf: + continue - # ready to slab - tb_prev = None - data = None - dnum = None - for i in range(ns): - v = m.extractTimeSeries(self.variable, - alt_vars = self.alternate_vars, - expression = self.derived, - initial_time = slab_t[i], - final_time = slab_t[i+1]).convert(unit) - - # trim does not work properly so we will add a manual check ourselves - if tb_prev is None: - tb_prev = v.time_bnds[...] - else: - if np.allclose(tb_prev[-1],v.time_bnds[0]): - v.data = v.data[1:] - v.time = v.time[1:] - v.time_bnds = v.time_bnds[1:] - tb_prev = v.time_bnds[...] - if v.time.size == 0: continue - - mind = (np.abs(mid_months[:,np.newaxis]-(v.time % 365))).argmin(axis=0) - if data is None: - data = np.ma.zeros((12,)+v.data.shape[1:]) - dnum = np.ma.zeros(data.shape,dtype=int) - data[mind,...] += v.data - dnum[mind,...] += 1 - with np.errstate(over='ignore',under='ignore'): - data = data / dnum.clip(1) - - # return variables - obs = Variable(filename = self.source, - variable_name = self.variable, - alternate_vars = self.alternate_vars) - mod = Variable(name = obs.name, - unit = unit, - data = data, - time = obs.time, - lat = v.lat, - lon = v.lon, - depth = v.depth, - time_bnds = obs.time_bnds, - lat_bnds = v.lat_bnds, - lon_bnds = v.lon_bnds, - depth_bnds = v.depth_bnds) - yield obs,mod - - # if obs is historical, then we yield slabs of both - else: obs_mem *= (mod_tf-mod_t0)/(tf-t0) mod_t0 = max(mod_t0,t0) mod_tf = min(mod_tf,tf) ns = int(np.floor(max(obs_mem,mod_mem)/mem_slab))+1 - ns = min(min(max(1,ns),mod_nt),obs_nt) - logger.info("[%s][%s] staging data in %d slabs" % (self.name,m.name,ns)) - + ns = min(min(max(1,ns),mod_nt),obs_nt) + logger.info("[%s][%s] building depths %.1f to %.1f in %d slabs" \ + % (self.name,m.name,z0,zf,ns)) + # across what times? slab_t = (mod_tf-mod_t0)*np.linspace(0,1,ns+1)+mod_t0 slab_t = np.floor(slab_t / 365)*365 + bnd_months[(np.abs(bnd_months[:,np.newaxis] - (slab_t % 365))).argmin(axis=0)] - + obs_tb = None; mod_tb = None - for i in range(ns): + obs_di = []; mod_di = [] + for j in range(ns): + # YW + print(j, slab_t[j], slab_t[j+1]) + # get reference variable obs = Variable(filename = self.source, variable_name = self.variable, alternate_vars = self.alternate_vars, - t0 = slab_t[i], - tf = slab_t[i+1]).trim(t=[slab_t[i],slab_t[i+1]]) + t0 = slab_t[j], + tf = slab_t[j+1] \ + ).trim(t = [slab_t[j],slab_t[j+1]]) if obs_tb is None: obs_tb = obs.time_bnds[...] else: @@ -218,13 +214,29 @@ def stageData(self,m): obs.time_bnds = obs.time_bnds[1:] assert np.allclose(obs.time_bnds[0,0],obs_tb[-1,1]) obs_tb = obs.time_bnds[...] + if not obs.layered: + obs = _addDepth(obs) + else: + obs = obs.trim(d = [z0, zf]).integrateInDepth(z0 = z0, zf = zf, mean = True) + + # YW + print(obs.unit, obs.data, obs.time, obs_tb, obs.lat, obs.lat_bnds, + obs.lon, obs.lon_bnds) + + obs_di.append(Variable(name = "depthint%.2f-%.2f" % (z0, zf), + unit = obs.unit, + data = obs.data, + time = obs.time, time_bnds = obs_tb, + lat = obs.lat, lat_bnds = obs.lat_bnds, + lon = obs.lon, lon_bnds = obs.lon_bnds)) # get model variable mod = m.extractTimeSeries(self.variable, alt_vars = self.alternate_vars, expression = self.derived, - initial_time = slab_t[i], - final_time = slab_t[i+1]).trim(t=[slab_t[i],slab_t[i+1]]).convert(obs.unit) + initial_time = slab_t[j], + final_time = slab_t[j+1] \ + ).trim(t=[slab_t[j],slab_t[j+1]]).convert(obs.unit) if mod_tb is None: mod_tb = mod.time_bnds else: @@ -235,21 +247,31 @@ def stageData(self,m): assert np.allclose(mod.time_bnds[0,0],mod_tb[-1,1]) mod_tb = mod.time_bnds[...] assert obs.time.size == mod.time.size - - yield obs,mod + if not mod.layered: + mod = _addDepth(mod) + else: + mod = mod.trim(d = [z0, zf]).integrateInDepth(z0 = z0, zf = zf, mean = True) + # YW + print(mod.unit, mod.data, mod.time, mod_tb, mod.lat, mod.lat_bnds, + mod.lon, mod.lon_bnds) - def confront(self,m): + mod_di.append(Variable(name = "depthint%.2f-%.2f" % (z0, zf), + unit = mod.unit, + data = mod.data, + time = mod.time, time_bnds = mod_tb, + lat = mod.lat, lat_bnds = mod.lat_bnds, + lon = mod.lon, lon_bnds = mod.lon_bnds)) - def _addDepth(v): - v.depth = np.asarray([.05]) - v.depth_bnds = np.asarray([[0.,.1]]) - shp = list(v.data.shape) - shp.insert(1,1) - v.data.shape = shp - v.layered = True - return v + obs_tmp = il.CombineVariables(obs_di) + mod_tmp = il.CombineVariables(mod_di) + obs_tmp.name = obs_tmp.name.split("_")[0] + mod_tmp.name = mod_tmp.name.split("_")[0] + yield obs_tmp, mod_tmp, z0, zf + + + def confront(self,m): mod_file = os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)) obs_file = os.path.join(self.output_path,"%s_Benchmark.nc" % (self.name, )) with il.FileContextManager(self.master,mod_file,obs_file) as fcm: @@ -263,80 +285,43 @@ def _addDepth(v): "color":np.asarray([0.5,0.5,0.5]), "complete":0}) - # Get the depth-integrated observation and model data for each slab. - obs_timeint = {}; mod_timeint = {} - for dind in range(self.depths.shape[0]): - obs_timeint[dind] = [] - mod_timeint[dind] = [] - for obs,mod in self.stageData(m): - # if the data has no depth, we assume it is surface - if not obs.layered: obs = _addDepth(obs) - if not mod.layered: mod = _addDepth(mod) - - # time bounds for this slab - tb = obs.time_bnds[[0,-1],[0,1]].reshape((1,2)) - t = np.asarray([tb.mean()]) - - # - for dind, z0 in enumerate(self.depths[:, 0]): - zf = self.depths[dind, 1] - z = obs.integrateInDepth(z0 = z0, zf = zf, mean = True).integrateInTime(mean = True) - - #YW - print('Staging data ... %.2f-%.2f' % (z0, zf)) - - obs_timeint[dind].append(Variable(name = "sm%.2f-%.2f" % (z0, zf), - unit = z.unit, - data = z.data.reshape((1,) +z.data.shape), - time = t, time_bnds = tb, - lat = z.lat, lat_bnds = z.lat_bnds, - lon = z.lon, lon_bnds = z.lon_bnds)) - z = mod.integrateInDepth(z0 = z0, zf = zf, mean = True).integrateInTime(mean = True) - mod_timeint[dind].append(Variable(name = "sm%.2f-%.2f" % (z0, zf), - unit = z.unit, - data = z.data.reshape((1,)+z.data.shape), - time = t, time_bnds = tb, - lat = z.lat, lat_bnds = z.lat_bnds, - lon = z.lon, lon_bnds = z.lon_bnds)) - # Read in some options and run the mean state analysis mass_weighting = self.keywords.get("mass_weighting",False) skip_rmse = self.keywords.get("skip_rmse" ,False) - skip_iav = self.keywords.get("skip_iav" ,True ) + skip_iav = self.keywords.get("skip_iav" ,False) skip_cycle = self.keywords.get("skip_cycle" ,False) rmse_score_basis = self.keywords.get("rmse_score_basis","cycle") - for dind in range(self.depths.shape[0]): - obs_tmp = il.CombineVariables(obs_timeint[dind]) - mod_tmp = il.CombineVariables(mod_timeint[dind]) - print(obs_tmp.name) - print(mod_tmp.name) - obs_tmp.name = obs_tmp.name.split("_")[0] - mod_tmp.name = mod_tmp.name.split("_")[0] - - if obs_tmp.spatial: - il.AnalysisMeanStateSpace(obs_tmp,mod_tmp,dataset = fcm.mod_dset, - regions = self.regions, - benchmark_dataset = fcm.obs_dset, - table_unit = self.table_unit, - plot_unit = self.plot_unit, - space_mean = self.space_mean, - skip_rmse = skip_rmse, - skip_iav = skip_iav, - skip_cycle = skip_cycle, - mass_weighting = mass_weighting, - rmse_score_basis = rmse_score_basis) + # Get the depth-integrated observation and model data for each slab. + for obs,mod,z0,zf in self.stageData(m): + #YW + print('Staging data ... %.2f-%.2f' % (z0, zf)) + print(obs.name) + print(mod.name) + + if obs.spatial: + il.AnalysisMeanStateSpace(obs, mod, dataset = fcm.mod_dset, + regions = self.regions, + benchmark_dataset = fcm.obs_dset, + table_unit = self.table_unit, + plot_unit = self.plot_unit, + space_mean = self.space_mean, + skip_rmse = skip_rmse, + skip_iav = skip_iav, + skip_cycle = skip_cycle, + mass_weighting = mass_weighting, + rmse_score_basis = rmse_score_basis) else: - il.AnalysisMeanStateSites(obs_tmp,mod_tmp,dataset = fcm.mod_dset, - regions = self.regions, - benchmark_dataset = fcm.obs_dset, - table_unit = self.table_unit, - plot_unit = self.plot_unit, - space_mean = self.space_mean, - skip_rmse = skip_rmse, - skip_iav = skip_iav, - skip_cycle = skip_cycle, - mass_weighting = mass_weighting) + il.AnalysisMeanStateSites(obs, mod, dataset = fcm.mod_dset, + regions = self.regions, + benchmark_dataset = fcm.obs_dset, + table_unit = self.table_unit, + plot_unit = self.plot_unit, + space_mean = self.space_mean, + skip_rmse = skip_rmse, + skip_iav = skip_iav, + skip_cycle = skip_cycle, + mass_weighting = mass_weighting) fcm.mod_dset.setncattr("complete",1) if self.master: fcm.obs_dset.setncattr("complete",1) logger.info("[%s][%s] Success" % (self.longname,m.name)) @@ -436,7 +421,7 @@ def compositePlots(self): if len(key)>0: has_cycle = True cycle[region][zstr].append(Variable(filename=fname,groupname="MeanState", - variable_name=key[0])) + variable_name=key[0])) if zstr not in std[region]: std[region][zstr] = [] if zstr not in corr[region]: corr[region][zstr] = [] @@ -452,7 +437,7 @@ def compositePlots(self): std [region][zstr].append(sds.getncattr("std")) # composite annual cycle plot - if has_cycle and len(models) > 2: + if has_cycle and len(models) > 0: page.addFigure("Spatially integrated regional mean", "compcycle", "RNAME_compcycle.png", @@ -462,23 +447,28 @@ def compositePlots(self): for region in self.regions: if region not in cycle: continue fig, axes = plt.subplots(self.depths.shape[0], 1, - figsize=(6.8,2.8 * self.depths.shape[0]), - tight_layout=True) + figsize = (6.5, 2.8*self.depths.shape[0])) for dind, z0 in enumerate(self.depths[:,0]): zf = self.depths[dind, 1] zstr = '%.2f-%.2f' % (z0, zf) - ax = axes[dind] + if self.depths.shape[0] == 1: + ax = axes + else: + ax = axes.flat[dind] + for name,color,var in zip(models,colors,cycle[region][zstr]): - dy = 0.05*(self.limits["cycle"][region]["max"]-self.limits["cycle"][region]["min"]) + dy = 0.05*(self.limits["cycle"][region]["max"] - \ + self.limits["cycle"][region]["min"]) + var.plot(ax, lw=2, color=color, label=name, ticks = time_opts["cycle"]["ticks"], ticklabels = time_opts["cycle"]["ticklabels"], vmin = self.limits["cycle"][region]["min"]-dy, vmax = self.limits["cycle"][region]["max"]+dy) - ylbl = time_opts["cycle"]["ylabel"] - if ylbl == "unit": ylbl = post.UnitStringToMatplotlib(var.unit) - ylbl = ylbl + ' ' + zstr + self.depths_units + #ylbl = time_opts["cycle"]["ylabel"] + #if ylbl == "unit": ylbl = post.UnitStringToMatplotlib(var.unit) + ylbl = zstr + ' '+ self.depths_units ax.set_ylabel(ylbl) fig.savefig(os.path.join(self.output_path,"%s_compcycle.png" % (region))) plt.close() @@ -512,14 +502,11 @@ def _alphabeticalBenchmarkFirst(key): # spatial distribution Taylor plot if has_std: - for dind, z0 in enumerate(self.depths[:,0]): - zf = self.depths[dind, 1] - zstr = '%.2f-%.2f' % (z0, zf) - page.addFigure("Temporally integrated period mean", - "spatial_variance", - "RNAME_spatial_variance_" + zstr + ".png", - side = "SPATIAL TAYLOR DIAGRAM", - legend = False) + page.addFigure("Temporally integrated period mean", + "spatial_variance", + "RNAME_spatial_variance.png", + side = "SPATIAL TAYLOR DIAGRAM", + legend = False) page.addFigure("Temporally integrated period mean", "legend_spatial_variance", "legend_spatial_variance.png", @@ -529,6 +516,7 @@ def _alphabeticalBenchmarkFirst(key): for region in self.regions: if not (region in std and region in corr): continue + fig = plt.figure(figsize=(12.0,12.0)) for dind, z0 in enumerate(self.depths[:,0]): zf = self.depths[dind, 1] zstr = '%.2f-%.2f' % (z0, zf) @@ -536,14 +524,13 @@ def _alphabeticalBenchmarkFirst(key): if not (zstr in std[region] and zstr in corr[region]): continue if len(std[region][zstr]) != len(corr[region][zstr]): continue if len(std[region][zstr]) == 0: continue - - fig = plt.figure(figsize=(6.0,6.0)) - post.TaylorDiagram(np.asarray(std[region][zstr]), - np.asarray(corr[region][zstr]), - 1.0,fig,colors) - fig.savefig(os.path.join(self.output_path, - "%s_spatial_variance_%s.png" % (region, zstr))) - plt.close() + ax, aux = post.TaylorDiagram(np.asarray(std[region][zstr]), + np.asarray(corr[region][zstr]), + 1.0,fig,colors,True,220+dind+1) + ax.set_title(zstr + ' ' + self.depths_units) + fig.savefig(os.path.join(self.output_path, + "%s_spatial_variance.png" % (region))) + plt.close() def modelPlots(self,m): """For a given model, create the plots of the analysis results. @@ -576,6 +563,10 @@ def modelPlots(self,m): if group.variables[vname][...].size <= 1: continue var = Variable(filename=fname,groupname="MeanState",variable_name=vname) + # YW + ##print(self.limits.keys()) + ##print(pname) + if (var.spatial or (var.ndata is not None)) and not var.temporal: # grab plotting options @@ -583,6 +574,9 @@ def modelPlots(self,m): if pname not in space_opts: continue opts = space_opts[pname] + # YW + ##print('... is used in space_opts') + # add to html layout page.addFigure(opts["section"], pname, @@ -592,20 +586,20 @@ def modelPlots(self,m): # plot variable for region in self.regions: - fig, axes = plt.subplots(self.depths.shape[0], 1, - figsize = (6.5, 2.8*self.depths.shape[0])) - + nax = self.depths.shape[0] + fig = plt.figure() for dind, z0 in enumerate(self.depths[:,0]): zf = self.depths[dind,1] zstr = '%.2f-%.2f' % (z0, zf) - ax = axes.flat[dind] var2 = Variable(filename=fname, groupname = "MeanState", variable_name=vname.replace(zstr_0, zstr)) - var2.plot(ax, region = region, - vmin = self.limits[pname]["min"], - vmax = self.limits[pname]["max"], - cmap = self.limits[pname]["cmap"]) - fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (m.name,region,pname))) + ax = var2.plot(None, fig, nax, region = region, + vmin = self.limits[pname]["min"], + vmax = self.limits[pname]["max"], + cmap = self.limits[pname]["cmap"]) + ax.set_title(zstr + ' ' + self.depths_units) + fig.savefig(os.path.join(self.output_path, + "%s_%s_%s.png" % (m.name,region,pname))) plt.close() # Jumping through hoops to get the benchmark plotted and in the html output @@ -621,65 +615,68 @@ def modelPlots(self,m): # plot variable for region in self.regions: - fig, axes = plt.subplots(self.depths.shape[0], 1, - figsize = (6.5, 2.8*self.depths.shape[0])) + nax = self.depths.shape[0] + fig = plt.figure() for dind, z0 in enumerate(self.depths[:,0]): zf = self.depths[dind,1] zstr = '%.2f-%.2f' % (z0, zf) - ax = axes.flat[dind] obs = Variable(filename=bname,groupname="MeanState", variable_name=vname.replace(zstr_0, zstr)) - obs.plot(ax, region = region, - vmin = self.limits[pname]["min"], - vmax = self.limits[pname]["max"], - cmap = self.limits[pname]["cmap"]) + ax = obs.plot(None, fig, nax, region = region, + vmin = self.limits[pname]["min"], + vmax = self.limits[pname]["max"], + cmap = self.limits[pname]["cmap"]) + ax.set_title(zstr + ' ' + self.depths_units) fig.savefig(os.path.join(self.output_path,"Benchmark_%s_%s.png" % (region,pname))) plt.close() - if not (var.spatial or (var.ndata is not None)) and var.temporal: - # grab the benchmark dataset to plot along with - try: - obs = Variable(filename=bname,groupname="MeanState", - variable_name=vname).convert(var.unit) - except: - continue + if not (var.spatial or (var.ndata is not None)) and var.temporal: + # grab the benchmark dataset to plot along with + try: + obs = Variable(filename=bname,groupname="MeanState", + variable_name=vname).convert(var.unit) + except: + continue - # grab plotting options - if pname not in time_opts: continue - opts = time_opts[pname] + # grab plotting options + if pname not in time_opts: continue + opts = time_opts[pname] - # add to html layout - page.addFigure(opts["section"], - pname, - opts["pattern"], - side = opts["sidelbl"], - legend = opts["haslegend"]) + # add to html layout + page.addFigure(opts["section"], + pname, + opts["pattern"], + side = opts["sidelbl"], + legend = opts["haslegend"]) - # plot variable - for region in self.regions: - if region not in vname: continue - fig, axes = plt.subplots(self.depths.shape[0], 1, - figsize = (6.5, 2.8*self.depths.shape[0])) - for dind, z0 in enumerate(self.depths[:,0]): - zf = self.depths[dind,1] - zstr = '%.2f-%.2f' % (z0, zf) + # plot variable + for region in self.regions: + if region not in vname: continue + fig, axes = plt.subplots(self.depths.shape[0], 1, + figsize = (6.5, 2.8*self.depths.shape[0])) + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind,1] + zstr = '%.2f-%.2f' % (z0, zf) + if self.depths.shape[0] == 1: + ax = axes + else: ax = axes.flat[dind] - var2 = Variable(filename=fname, groupname = "MeanState", - variable_name=vname.replace(zstr_0, zstr)) - obs = Variable(filename=bname,groupname="MeanState", - variable_name=vname.replace(zstr_0, zstr)).convert(var2.unit) - obs.plot(ax, lw = 2, color = 'k', alpha = 0.5) - var2.plot(ax, lw = 2, color = color, label = m.name, - ticks =opts["ticks"], - ticklabels=opts["ticklabels"]) - dy = 0.05*(self.limits[pname][region]["max"]-self.limits[pname][region]["min"]) - ax.set_ylim(self.limits[pname][region]["min"]-dy, - self.limits[pname][region]["max"]+dy) - ylbl = opts["ylabel"] - if ylbl == "unit": ylbl = post.UnitStringToMatplotlib(var.unit) - ax.set_ylabel(ylbl) - fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (m.name,region,pname))) - plt.close() + var2 = Variable(filename=fname, groupname = "MeanState", + variable_name=vname.replace(zstr_0, zstr)) + obs = Variable(filename=bname,groupname="MeanState", + variable_name=vname.replace(zstr_0, zstr)).convert(var2.unit) + obs.plot(ax, lw = 2, color = 'k', alpha = 0.5) + var2.plot(ax, lw = 2, color = color, label = m.name, + ticks =opts["ticks"], + ticklabels=opts["ticklabels"]) + dy = 0.05*(self.limits[pname][region]["max"]-self.limits[pname][region]["min"]) + ax.set_ylim(self.limits[pname][region]["min"]-dy, + self.limits[pname][region]["max"]+dy) + ylbl = opts["ylabel"] + if ylbl == "unit": ylbl = post.UnitStringToMatplotlib(var.unit) + ax.set_ylabel(ylbl) + fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (m.name,region,pname))) + plt.close() logger.info("[%s][%s] Success" % (self.longname,m.name)) diff --git a/src/ILAMB/Confrontation.py b/src/ILAMB/Confrontation.py index 481f89d4..686093b5 100644 --- a/src/ILAMB/Confrontation.py +++ b/src/ILAMB/Confrontation.py @@ -15,6 +15,7 @@ import logging logger = logging.getLogger("%i" % MPI.COMM_WORLD.rank) + def getVariableList(dataset): """Extracts the list of variables in the dataset that aren't dimensions or the bounds of dimensions. diff --git a/src/ILAMB/Post.py b/src/ILAMB/Post.py index b58e0c95..e106fd35 100644 --- a/src/ILAMB/Post.py +++ b/src/ILAMB/Post.py @@ -73,7 +73,7 @@ def ColorBar(ax,**keywords): if ticks is not None: cb.set_ticks(ticks) if ticklabels is not None: cb.set_ticklabels(ticklabels) -def TaylorDiagram(stddev,corrcoef,refstd,fig,colors,normalize=True): +def TaylorDiagram(stddev,corrcoef,refstd,fig,colors,normalize=True,position=111): """Plot a Taylor diagram. This is adapted from the code by Yannick Copin found here: @@ -121,7 +121,7 @@ def TaylorDiagram(stddev,corrcoef,refstd,fig,colors,normalize=True): extremes=(0,np.pi/2,smin,smax), grid_locator1=gl1, tick_formatter1=tf1) - ax = FA.FloatingSubplot(fig, 111, grid_helper=ghelper) + ax = FA.FloatingSubplot(fig, position, grid_helper=ghelper) fig.add_subplot(ax) # adjust axes @@ -141,26 +141,26 @@ def TaylorDiagram(stddev,corrcoef,refstd,fig,colors,normalize=True): ax.axis["bottom"].set_visible(False) ax.grid(True) - ax = ax.get_aux_axes(tr) + aux = ax.get_aux_axes(tr) # Plot data corrcoef = corrcoef.clip(-1,1) for i in range(len(corrcoef)): - ax.plot(np.arccos(corrcoef[i]),stddev[i],'o',color=colors[i],mew=0,ms=8) + aux.plot(np.arccos(corrcoef[i]),stddev[i],'o',color=colors[i],mew=0,ms=8) # Add reference point and stddev contour - l, = ax.plot([0],refstd,'k*',ms=12,mew=0) + l, = aux.plot([0],refstd,'k*',ms=12,mew=0) t = np.linspace(0, np.pi/2) r = np.zeros_like(t) + refstd - ax.plot(t,r, 'k--') + aux.plot(t,r, 'k--') # centralized rms contours rs,ts = np.meshgrid(np.linspace(smin,smax), np.linspace(0,np.pi/2)) rms = np.sqrt(refstd**2 + rs**2 - 2*refstd*rs*np.cos(ts)) - contours = ax.contour(ts,rs,rms,5,colors='k',alpha=0.4) - ax.clabel(contours,fmt='%1.1f') + contours = aux.contour(ts,rs,rms,5,colors='k',alpha=0.4) + aux.clabel(contours,fmt='%1.1f') - return ax + return ax, aux class HtmlFigure(): diff --git a/src/ILAMB/Variable.py b/src/ILAMB/Variable.py index c37da677..3e986979 100644 --- a/src/ILAMB/Variable.py +++ b/src/ILAMB/Variable.py @@ -1147,7 +1147,7 @@ def _checkLayer(layer,dataset): for key in attributes.keys(): V.setncattr(key,attributes[key]) - def plot(self,ax,**keywords): + def plot(self,ax,fig=None,nax=1,**keywords): """Plots the variable on the given matplotlib axis. The behavior of this routine depends on the type of variable @@ -1161,6 +1161,10 @@ def plot(self,ax,**keywords): ---------- ax : matplotlib.axes._subplots.AxesSubplot The matplotlib axes object onto which you wish to plot the variable + fig: matplotlib.figure.Figure, optional + The matplotlib figure onto which the ax is located + nax: int, optional + The number of axes that the figure is expected to have lw : float, optional The line width to use when plotting alpha : float, optional @@ -1242,7 +1246,7 @@ def plot(self,ax,**keywords): extents[0] = max(extents[0]-dx,-180); extents[1] = min(extents[1]+dx,+180) extents[2] = max(extents[2]-dy,- 90); extents[3] = min(extents[3]+dy,+ 90) lon_mid = 0.5*(extents[0]+extents[1]) - + # ...but the data might cross the dateline, but not be global if(lon_empty[ 0]== 0 and lon_empty[-1]==(self.lon.size-1) and @@ -1253,12 +1257,11 @@ def plot(self,ax,**keywords): extents[1] = wrap_lon.max() dx = percent_pad*(extents[1]-extents[0]) extents[0] -= dx; extents[1] += dx - + # find the middle centroid by mean angle lons = self.lon[np.where(self.data.mask.all(axis=-2)==False)[0]] lons = lons/360*2*np.pi lon_mid = np.arctan2(np.sin(lons).mean(),np.cos(lons).mean())/2/np.pi*360 - else: extents = [self.lon.min(),self.lon.max(), self.lat.min(),self.lat.max()] @@ -1283,11 +1286,19 @@ def plot(self,ax,**keywords): extents = [-180,180,-90,90] aspect_ratio = 0.5 lon_mid = 0. - + # make the plot w = 7.5; h = w*aspect_ratio - fig,ax = plt.subplots(figsize=(w,h), - subplot_kw={'projection':proj}) + if fig == None: + fig,ax = plt.subplots(figsize=(w,h), + subplot_kw={'projection':proj}) + else: + if len(fig.axes) >= nax: + raise ValueError('More axes are plotted than expected by the figure.') + ax = fig.add_subplot(nax*100 + 11 + len(fig.axes), + projection = proj) + fig.set_figwidth(w) + fig.set_figheight(w * aspect_ratio * nax) # keep original aspect ratio if self.ndata is None: lat = np.hstack([self.lat_bnds[:,0],self.lat_bnds[-1,-1]]) lon = np.hstack([self.lon_bnds[:,0],self.lon_bnds[-1,-1]]) From 2e13bcc0b8a8b0bc2b037b8dfb789d9f71456d1b Mon Sep 17 00:00:00 2001 From: Yaoping Wang Date: Fri, 10 Sep 2021 00:01:44 -0400 Subject: [PATCH 05/18] speed up WIP --- src/ILAMB/ConfSoilMoisture.py | 200 ++++++++++++++++------------------ src/ILAMB/ModelResult.py | 10 +- src/ILAMB/Variable.py | 4 +- src/ILAMB/ilamblib.py | 47 +++++++- 4 files changed, 145 insertions(+), 116 deletions(-) diff --git a/src/ILAMB/ConfSoilMoisture.py b/src/ILAMB/ConfSoilMoisture.py index 5632c3b8..39cb8c5a 100644 --- a/src/ILAMB/ConfSoilMoisture.py +++ b/src/ILAMB/ConfSoilMoisture.py @@ -14,6 +14,7 @@ from .Confrontation import getVariableList from .Confrontation import Confrontation import numpy as np +import time # DEBUG import logging @@ -41,8 +42,7 @@ def __init__(self,**keywords): # of the data depth_name = depth_name[0] depth_bnd_name = [d for d in dset.variables.keys() \ - if depth_name in d and \ - ("bound" in d or "bnd" in d)] + if depth_name in d and ("bound" in d or "bnd" in d)] if len(depth_bnd_name) > 0: depth_bnd_name = depth_bnd_name[0] @@ -69,7 +69,13 @@ def stageData(self,m): unit = "" with Dataset(self.source) as dset: var = dset.variables[self.variable] + + print('stage observation ' + self.variable) # DEBUG + tstart = time.time() #DEBUG obs_t,obs_tb,obs_cb,obs_b,obs_e,cal = il.GetTime(var) + tend = time.time() # DEBUG + print( "il.GetTime took " + str((tend - tstart)/60) + " minutes." ) # DEBUG + obs_nt = obs_t.size obs_mem = var.size*8e-6 unit = var.units @@ -77,22 +83,24 @@ def stageData(self,m): if climatology: info += "[climatology]" obs_cb = (obs_cb-1850)*365. - t0 = obs_cb[0]; tf = obs_cb[1] + obs_t0 = obs_cb[0]; obs_tf = obs_cb[1] else: - t0 = obs_tb[0,0]; tf = obs_tb[-1,1] + obs_t0 = obs_tb[0,0]; obs_tf = obs_tb[-1,1] - dname = [name for name in dset.variables.keys() \ - if name.lower() in ["depth_bnds", "depth_bounds"]] - if len(dname) == 0: + obs_dname = [name for name in dset.variables.keys() \ + if name.lower() in ["depth_bnds", "depth_bounds"]] + if len(obs_dname) == 0: # if there is no depth, assume the data is surface - obs_z0 = 0; obs_zf = 0.1; obs_nd = 0 + obs_z0 = 0; obs_zf = 0.1; obs_z_bnd = np.array([[0, 0.1]]); obs_nd = 0 + obs_dname = None else: - dname = dname[0] - obs_z0 = np.min(dset.variables[dname]) - obs_zf = np.max(dset.variables[dname]) - obs_nd = dset.variables[dname].shape[0] + obs_dname = obs_dname[0] + obs_z0 = np.min(dset.variables[obs_dname]) + obs_zf = np.max(dset.variables[obs_dname]) + obs_z_bnd = dset.variables[obs_dname][...] + obs_nd = dset.variables[obs_dname].shape[0] - info += " contents span years %.1f to %.1f and depths %.1f to %.1f, est memory %d [Mb]" % (t0/365.+1850,tf/365.+1850,obs_z0,obs_zf,obs_mem) + info += " contents span years %.1f to %.1f and depths %.1f to %.1f, est memory %d [Mb]" % (obs_t0/365.+1850,obs_tf/365.+1850,obs_z0,obs_zf,obs_mem) logger.info("[%s][%s]%s" % (self.name,self.variable,info)) # to peak at the model, we need any variable that could be @@ -115,16 +123,24 @@ def stageData(self,m): mod_zf = -2147483648 mod_nd = 999 for fname in m.variables[vname]: + + print('stage model ' + vname) # DEBUG + with Dataset(fname) as dset: var = dset.variables[vname] - mod_t,mod_tb,mod_cb,mod_b,mod_e,cal = il.GetTime(var,t0=t0-m.shift, - tf=tf-m.shift) + + tstart = time.time() # DEBUG + mod_t,mod_tb,mod_cb,mod_b,mod_e,cal = il.GetTime(var,t0=obs_t0-m.shift, + tf=obs_tf-m.shift) + tend = time.time() # DEBUG + print( "il.GetTime took " + str((tend - tstart)/60) + " minutes." ) # DEBUG + if mod_t is None: info += "\n %s does not overlap the reference" % (fname) continue mod_t += m.shift mod_tb += m.shift - ind = np.where((mod_tb[:,0] >= t0)*(mod_tb[:,1] <= tf))[0] + ind = np.where((mod_tb[:,0] >= obs_t0)*(mod_tb[:,1] <= obs_tf))[0] if ind.size == 0: info += "\n %s does not overlap the reference" % (fname) continue @@ -133,14 +149,14 @@ def stageData(self,m): mod_t0 = min(mod_t0,mod_tb[ 0,0]) mod_tf = max(mod_tf,mod_tb[-1,1]) - dname = [name for name in dset.variables.keys() \ - if name.lower() in ["depth_bnds", "depth_bounds"]] - if len(dname) == 0: + mod_dname = [name for name in dset.variables.keys() \ + if name.lower() in ["depth_bnds", "depth_bounds"]] + if len(mod_dname) == 0: # if there is no depth, assume the data is surface - z0 = 0; zf = 0.1; mod_nd = 0 + z0 = 0; zf = 0.1; mod_nd = 0; mod_dname = None else: - dname = dname[0] - temp = dset.variables[dname][...] + mod_dname = mod_dname[0] + temp = dset.variables[mod_dname][...] ind = (temp[:,1] > obs_z0)*(temp[:,0] < obs_zf) if sum(ind) == 0: info += "\n %s does not overlap the reference" % (fname) @@ -159,10 +175,9 @@ def stageData(self,m): info += "\n total est memory = %d [Mb]" % mod_mem logger.info("[%s][%s][%s] reading model data from possibly many files%s" % (self.name,m.name,vname,info)) if mod_t0 > mod_tf: - logger.debug("[%s] Could not find [%s] in the model results in the given time frame, tinput = [%.1f,%.1f]" % (self.name,",".join(possible),t0,tf)) + logger.debug("[%s] Could not find [%s] in the model results in the given time frame, tinput = [%.1f,%.1f]" % (self.name,",".join(possible),mod_t0,mod_tf)) raise il.VarNotInModel() - # yield the results by observational depths def _addDepth(v): v.depth = np.asarray([.05]) @@ -174,101 +189,70 @@ def _addDepth(v): return v info = "" - for i in range(self.depths.shape[0]): - z0 = max(self.depths[i,0], obs_z0, mod_z0) - zf = min(self.depths[i,1], obs_zf, mod_zf) - if z0 >= zf: + for i in range(obs_z_bnd.shape[0]): + ind = (self.depths[:,0] < obs_z_bnd[i,1]) & \ + (self.depths[:,1] > obs_z_bnd[i,0]) & \ + (self.depths[:,0] < mod_zf) & \ + (self.depths[:,1] > mod_z0) + if sum(ind) == 0: continue + z0 = min(self.depths[ind,0]) + zf = max(self.depths[ind,1]) - obs_mem *= (mod_tf-mod_t0)/(tf-t0) - mod_t0 = max(mod_t0,t0) - mod_tf = min(mod_tf,tf) - ns = int(np.floor(max(obs_mem,mod_mem)/mem_slab))+1 - ns = min(min(max(1,ns),mod_nt),obs_nt) - logger.info("[%s][%s] building depths %.1f to %.1f in %d slabs" \ - % (self.name,m.name,z0,zf,ns)) - - # across what times? - slab_t = (mod_tf-mod_t0)*np.linspace(0,1,ns+1)+mod_t0 - slab_t = np.floor(slab_t / 365)*365 + bnd_months[(np.abs(bnd_months[:,np.newaxis] - (slab_t % 365))).argmin(axis=0)] - - obs_tb = None; mod_tb = None - obs_di = []; mod_di = [] - for j in range(ns): - # YW - print(j, slab_t[j], slab_t[j+1]) + mod_t0 = max(mod_t0,obs_t0) + mod_tf = min(mod_tf,obs_tf) + logger.info("[%s][%s] building depths %.1f to %.1f in loop %d" % (self.name,m.name,z0,zf,i)) - # get reference variable + # get reference variable + if obs_dname is None: obs = Variable(filename = self.source, variable_name = self.variable, alternate_vars = self.alternate_vars, - t0 = slab_t[j], - tf = slab_t[j+1] \ - ).trim(t = [slab_t[j],slab_t[j+1]]) - if obs_tb is None: - obs_tb = obs.time_bnds[...] - else: - if np.allclose(obs_tb[-1],obs.time_bnds[0]): - obs.data = obs.data[1:] - obs.time = obs.time[1:] - obs.time_bnds = obs.time_bnds[1:] - assert np.allclose(obs.time_bnds[0,0],obs_tb[-1,1]) - obs_tb = obs.time_bnds[...] - if not obs.layered: - obs = _addDepth(obs) - else: - obs = obs.trim(d = [z0, zf]).integrateInDepth(z0 = z0, zf = zf, mean = True) - - # YW - print(obs.unit, obs.data, obs.time, obs_tb, obs.lat, obs.lat_bnds, - obs.lon, obs.lon_bnds) - - obs_di.append(Variable(name = "depthint%.2f-%.2f" % (z0, zf), - unit = obs.unit, - data = obs.data, - time = obs.time, time_bnds = obs_tb, - lat = obs.lat, lat_bnds = obs.lat_bnds, - lon = obs.lon, lon_bnds = obs.lon_bnds)) - - # get model variable + t0 = mod_t0, + tf = mod_tf).trim(t = [mod_t0,mod_tf]) + obs = _addDepth(obs) + else: + obs = Variable(filename = self.source, + variable_name = self.variable, + alternate_vars = self.alternate_vars, + t0 = mod_t0, + tf = mod_tf, + z0 = z0, + zf = zf).trim(t = [mod_t0,mod_tf]) + obs = obs.integrateInDepth(z0 = z0, zf = zf, mean = True) + obs.name = "depthint%.2f-%.2f" % (z0, zf) + + print("obs ", obs.name, obs.unit, obs.data, obs.time, obs_tb, obs.lat, obs.lat_bnds, + obs.lon, obs.lon_bnds) # DEBUG + + # get model variable + if mod_dname is None: mod = m.extractTimeSeries(self.variable, alt_vars = self.alternate_vars, expression = self.derived, - initial_time = slab_t[j], - final_time = slab_t[j+1] \ - ).trim(t=[slab_t[j],slab_t[j+1]]).convert(obs.unit) - if mod_tb is None: - mod_tb = mod.time_bnds - else: - if np.allclose(mod_tb[-1],mod.time_bnds[0]): - mod.data = mod.data[1:] - mod.time = mod.time[1:] - mod.time_bnds = mod.time_bnds[1:] - assert np.allclose(mod.time_bnds[0,0],mod_tb[-1,1]) - mod_tb = mod.time_bnds[...] - assert obs.time.size == mod.time.size - if not mod.layered: - mod = _addDepth(mod) - else: - mod = mod.trim(d = [z0, zf]).integrateInDepth(z0 = z0, zf = zf, mean = True) + initial_time = mod_t0, + final_time = mod_tf).trim(t=[mod_t0,mod_tf]).convert(obs.unit) + mod = _addDepth(mod) + else: + mod = m.extractTimeSeries(self.variable, + alt_vars = self.alternate_vars, + expression = self.derived, + initial_time = mod_t0, + final_time = mod_tf, + initial_depth= z0, + final_depth = zf).trim(t=[mod_t0,mod_tf]).convert(obs.unit) + mod = mod.trim(d = [z0, zf]).integrateInDepth(z0 = z0, zf = zf, mean = True) + mod.name = "depthint%.2f-%.2f" % (z0, zf) - # YW - print(mod.unit, mod.data, mod.time, mod_tb, mod.lat, mod.lat_bnds, - mod.lon, mod.lon_bnds) + print("mod ", mod.name, mod.unit, mod.data, mod.time, mod_tb, mod.lat, mod.lat_bnds, + mod.lon, mod.lon_bnds) # DEBUG - mod_di.append(Variable(name = "depthint%.2f-%.2f" % (z0, zf), - unit = mod.unit, - data = mod.data, - time = mod.time, time_bnds = mod_tb, - lat = mod.lat, lat_bnds = mod.lat_bnds, - lon = mod.lon, lon_bnds = mod.lon_bnds)) + assert obs.time.size == mod.time.size - obs_tmp = il.CombineVariables(obs_di) - mod_tmp = il.CombineVariables(mod_di) - obs_tmp.name = obs_tmp.name.split("_")[0] - mod_tmp.name = mod_tmp.name.split("_")[0] + obs.name = obs.name.split("_")[0] + mod.name = mod.name.split("_")[0] - yield obs_tmp, mod_tmp, z0, zf + yield obs, mod, z0, zf def confront(self,m): @@ -541,6 +525,7 @@ def modelPlots(self,m): this routine. """ + self._relationship(m) bname = os.path.join(self.output_path,"%s_Benchmark.nc" % (self.name )) fname = os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)) if not os.path.isfile(bname): return @@ -574,8 +559,7 @@ def modelPlots(self,m): if pname not in space_opts: continue opts = space_opts[pname] - # YW - ##print('... is used in space_opts') + ##print('... is used in space_opts') # DEBUG # add to html layout page.addFigure(opts["section"], diff --git a/src/ILAMB/ModelResult.py b/src/ILAMB/ModelResult.py index 3e024dbd..9e8250be 100644 --- a/src/ILAMB/ModelResult.py +++ b/src/ILAMB/ModelResult.py @@ -214,7 +214,7 @@ def _shiftLon(lon): self.land_area = np.ma.sum(self.land_areas) return - def extractTimeSeries(self,variable,lats=None,lons=None,alt_vars=[],initial_time=-1e20,final_time=1e20,output_unit="",expression=None,convert_calendar=True): + def extractTimeSeries(self,variable,lats=None,lons=None,alt_vars=[],initial_time=-1e20,final_time=1e20,output_unit="",expression=None,convert_calendar=True,initial_depth=None,final_depth=None): """Extracts a time series of the given variable from the model. Parameters @@ -236,6 +236,8 @@ def extractTimeSeries(self,variable,lats=None,lons=None,alt_vars=[],initial_time a 1D array of longitude locations at which to extract information expression : str, optional an algebraic expression describing how to combine model outputs + initial_depth, final_depth: float, optional + include model results between these depths # YW Returns ------- @@ -268,8 +270,10 @@ def extractTimeSeries(self,variable,lats=None,lons=None,alt_vars=[],initial_time area = self.land_areas, convert_calendar = convert_calendar, t0 = initial_time - self.shift, - tf = final_time - self.shift) - + tf = final_time - self.shift, + z0 = initial_depth, + zf = final_depth) + if var.time is None: continue tmin = min(tmin,var.time_bnds.min()) tmax = max(tmax,var.time_bnds.max()) diff --git a/src/ILAMB/Variable.py b/src/ILAMB/Variable.py index 3e986979..adaa8c96 100644 --- a/src/ILAMB/Variable.py +++ b/src/ILAMB/Variable.py @@ -129,8 +129,10 @@ def __init__(self,**keywords): assert variable_name is not None t0 = keywords.get("t0",None) tf = keywords.get("tf",None) + z0 = keywords.get("z0",None) # YW + zf = keywords.get("zf",None) # YW convert_calendar = keywords.get("convert_calendar",True) - out = il.FromNetCDF4(filename,variable_name,alternate_vars,t0,tf,group=groupname,convert_calendar=convert_calendar) + out = il.FromNetCDF4(filename,variable_name,alternate_vars,t0,tf,group=groupname,convert_calendar=convert_calendar,z0=z0,zf=zf) # YW data,data_bnds,unit,name,time,time_bnds,lat,lat_bnds,lon,lon_bnds,depth,depth_bnds,cbounds,ndata,calendar,attr = out # Add handling for some units which cf_units does not support diff --git a/src/ILAMB/ilamblib.py b/src/ILAMB/ilamblib.py index 7ccb03ad..0cb3ade7 100644 --- a/src/ILAMB/ilamblib.py +++ b/src/ILAMB/ilamblib.py @@ -610,7 +610,7 @@ def _removeLeapDay(t,v,datum=None,calendar=None,t0=None,tf=None): return tdata,vdata -def FromNetCDF4(filename,variable_name,alternate_vars=[],t0=None,tf=None,group=None,convert_calendar=True): +def FromNetCDF4(filename,variable_name,alternate_vars=[],t0=None,tf=None,group=None,convert_calendar=True,z0=None,zf=None): """Extracts data from a netCDF4 datafile for use in a Variable object. Intended to be used inside of the Variable constructor. Some of @@ -631,6 +631,9 @@ def FromNetCDF4(filename,variable_name,alternate_vars=[],t0=None,tf=None,group=N tf : float, optional If temporal, specifying the final time can reduce memory usage and speed up access time. + z0, zf : float, optional # YW + Specifying the first and last depths can reduce memory + usage and speed up access time. Returns ------- @@ -802,6 +805,7 @@ def FromNetCDF4(filename,variable_name,alternate_vars=[],t0=None,tf=None,group=N else: v = var[begin:(end+1),...] if v_bnd: v_bnd = v_bnd[begin:(end+1),...] + if lat_name is not None: lat = grp.variables[lat_name] [...] if lat_bnd_name is not None: lat_bnd = grp.variables[lat_bnd_name] [...] if lon_name is not None: lon = grp.variables[lon_name] [...] @@ -829,6 +833,42 @@ def FromNetCDF4(filename,variable_name,alternate_vars=[],t0=None,tf=None,group=N if depth_bnd is not None: depth_bnd = Unit(dunit).convert(depth_bnd,Unit("Pa"),inplace=True) depth_bnd = -np.log(depth_bnd/Pb)*R*Tb/M/g + + print("il.FromNetCDF4 " + filename) # DEBUG + print(depth) # DEBUG + print(depth_bnd) # DEBUG + print(v.shape) # DEBUG + + # YW + if z0 is not None: + if zf is None: + raise ValueError("Mismatched starting depth %f." % z0) + if depth_bnd is not None: + ind = (depth_bnd[:, 0] < zf) & (depth_bnd[:, 1] > z0) + depth_bnd = depth_bnd[ind, :] + depth = depth[ind] + else: + ind = (depth >= z0) & (depth <= zf) + depth = depth[ind] + v_depth_sub = [] + for vs in v.shape: + if vs == len(ind): + v_depth_sub.append( ind ) + else: + v_depth_sub.append( np.ones(vs, dtype = bool) ) + v = v[np.ix_(v_depth_sub)] + else: + if zf is not None: + raise ValueError("Mismatched ending depth %f." % zf) + + print(depth) # DEBUG + print(depth_bnd) # DEBUG + print(v.shape) + else: + if (z0 is not None) or (zf is not None): + raise ValueError("Vertical subscript is used but there is no layered dimension in %s." % filename) + + if data_name is not None: data = len(grp.dimensions[data_name]) # if we have data sites, there may be lat/lon/depth data to @@ -1365,9 +1405,8 @@ def AnalysisMeanStateSpace(ref,com,**keywords): ref_timeint.name = "timeint_of_%s" % name - # YW - print(benchmark_dataset) - print(ref_timeint.name) + print(benchmark_dataset) # DEBUG + print(ref_timeint.name) # DEBUG ref_timeint.toNetCDF4(benchmark_dataset,group="MeanState") for region in regions: From a57c713c8c52a32c03d657483fba8c57211273bd Mon Sep 17 00:00:00 2001 From: Yaoping Wang Date: Wed, 15 Sep 2021 11:06:50 -0400 Subject: [PATCH 06/18] confrontation buggy --- bin/ilamb-run | 30 +-- src/ILAMB/ConfSoilMoisture.py | 451 +++++++++++++++++++++++++++------- src/ILAMB/Confrontation.py | 58 +---- src/ILAMB/ilamblib.py | 7 +- 4 files changed, 381 insertions(+), 165 deletions(-) diff --git a/bin/ilamb-run b/bin/ilamb-run index 73dddf9a..e57f3fd0 100644 --- a/bin/ilamb-run +++ b/bin/ilamb-run @@ -303,8 +303,8 @@ def BuildLocalWorkList(M,C,skip_cache=False): for c in C: for m in M: if skip_cache: - - # if we want to skip we have to check that it is complete + + # if we want to skip we have to check that it is complete fname = os.path.join(c.output_path,"%s_%s.nc" % (c.name,m.name)) complete = False if os.path.isfile(fname): @@ -379,24 +379,6 @@ def WorkConfront(W,verbose=False,clean=False): # try to run the confrontation try: t0 = time.time() - - ## YW - #print(m) - #print(m.path) - #print(m.color) - #print(m.filter) - #print(m.regex) - #print(m.shift) - #print(m.name) - #print(m.confrontations) - #print(m.cell_areas) - #print(m.land_fraction) - #print(m.land_areas) - #print(m.land_area) - #print(m.variables) - #print(m.names) - #print(m.extents) - #print(m.paths) c.confront(m) dt = time.time()-t0 @@ -412,7 +394,7 @@ def WorkConfront(W,verbose=False,clean=False): logger.debug("[%s][%s]\n%s" % (c.longname,m.name,format_exc())) if verbose: print((" {0:>%d} {1:<%d} %s%s%s" % (maxCL,maxML,FAIL,ex.__class__.__name__,ENDC)).format(c.longname,m.name)) - + def WorkPost(M,C,W,S,verbose=False,skip_plots=False): """Performs the post-processing @@ -435,7 +417,9 @@ def WorkPost(M,C,W,S,verbose=False,skip_plots=False): enable to skip plotting """ maxCL = 45; maxML = 20 - for c in C: c.determinePlotLimits() + for c in C: + print(c.name, c.source, c.variable, c.output_path) # DEBUG + c.determinePlotLimits() for i,w in enumerate(W): m,c = w try: @@ -457,7 +441,7 @@ def WorkPost(M,C,W,S,verbose=False,skip_plots=False): sys.stdout.flush() sys.stdout.flush(); comm.Barrier() - + for i,c in enumerate(C): try: c.compositePlots() diff --git a/src/ILAMB/ConfSoilMoisture.py b/src/ILAMB/ConfSoilMoisture.py index 39cb8c5a..f9317fd4 100644 --- a/src/ILAMB/ConfSoilMoisture.py +++ b/src/ILAMB/ConfSoilMoisture.py @@ -70,11 +70,7 @@ def stageData(self,m): with Dataset(self.source) as dset: var = dset.variables[self.variable] - print('stage observation ' + self.variable) # DEBUG - tstart = time.time() #DEBUG obs_t,obs_tb,obs_cb,obs_b,obs_e,cal = il.GetTime(var) - tend = time.time() # DEBUG - print( "il.GetTime took " + str((tend - tstart)/60) + " minutes." ) # DEBUG obs_nt = obs_t.size obs_mem = var.size*8e-6 @@ -91,13 +87,12 @@ def stageData(self,m): if name.lower() in ["depth_bnds", "depth_bounds"]] if len(obs_dname) == 0: # if there is no depth, assume the data is surface - obs_z0 = 0; obs_zf = 0.1; obs_z_bnd = np.array([[0, 0.1]]); obs_nd = 0 + obs_z0 = 0; obs_zf = 0.1; obs_nd = 0 obs_dname = None else: obs_dname = obs_dname[0] obs_z0 = np.min(dset.variables[obs_dname]) obs_zf = np.max(dset.variables[obs_dname]) - obs_z_bnd = dset.variables[obs_dname][...] obs_nd = dset.variables[obs_dname].shape[0] info += " contents span years %.1f to %.1f and depths %.1f to %.1f, est memory %d [Mb]" % (obs_t0/365.+1850,obs_tf/365.+1850,obs_z0,obs_zf,obs_mem) @@ -123,17 +118,10 @@ def stageData(self,m): mod_zf = -2147483648 mod_nd = 999 for fname in m.variables[vname]: - - print('stage model ' + vname) # DEBUG - with Dataset(fname) as dset: var = dset.variables[vname] - - tstart = time.time() # DEBUG mod_t,mod_tb,mod_cb,mod_b,mod_e,cal = il.GetTime(var,t0=obs_t0-m.shift, tf=obs_tf-m.shift) - tend = time.time() # DEBUG - print( "il.GetTime took " + str((tend - tstart)/60) + " minutes." ) # DEBUG if mod_t is None: info += "\n %s does not overlap the reference" % (fname) @@ -189,21 +177,20 @@ def _addDepth(v): return v info = "" - for i in range(obs_z_bnd.shape[0]): - ind = (self.depths[:,0] < obs_z_bnd[i,1]) & \ - (self.depths[:,1] > obs_z_bnd[i,0]) & \ - (self.depths[:,0] < mod_zf) & \ - (self.depths[:,1] > mod_z0) - if sum(ind) == 0: + for i in range(self.depths.shape[0]): + z0 = max(self.depths[i,0], obs_z0, mod_z0) + zf = min(self.depths[i,1], obs_zf, mod_zf) + if z0 >= zf: continue - z0 = min(self.depths[ind,0]) - zf = max(self.depths[ind,1]) mod_t0 = max(mod_t0,obs_t0) mod_tf = min(mod_tf,obs_tf) logger.info("[%s][%s] building depths %.1f to %.1f in loop %d" % (self.name,m.name,z0,zf,i)) # get reference variable + print('Loading obs ' + str(z0) + '-' + str(zf)) + tstart = time.time() # DEBUG + if obs_dname is None: obs = Variable(filename = self.source, variable_name = self.variable, @@ -222,10 +209,15 @@ def _addDepth(v): obs = obs.integrateInDepth(z0 = z0, zf = zf, mean = True) obs.name = "depthint%.2f-%.2f" % (z0, zf) + tend = time.time() # DEBUG + print("Loading obs " + str(z0) + '-' + str(zf) + ' took ' + str((tend - tstart) / 60)) # DEBUG print("obs ", obs.name, obs.unit, obs.data, obs.time, obs_tb, obs.lat, obs.lat_bnds, obs.lon, obs.lon_bnds) # DEBUG # get model variable + print('Loading model ' + str(z0) + '-' + str(zf)) + tstart = time.time() # DEBUG + if mod_dname is None: mod = m.extractTimeSeries(self.variable, alt_vars = self.alternate_vars, @@ -244,6 +236,8 @@ def _addDepth(v): mod = mod.trim(d = [z0, zf]).integrateInDepth(z0 = z0, zf = zf, mean = True) mod.name = "depthint%.2f-%.2f" % (z0, zf) + tend = time.time() # DEBUG + print("Loading model " + str(z0) + '-' + str(zf) + ' took ' + str((tend - tstart) / 60)) # DEBUG print("mod ", mod.name, mod.unit, mod.data, mod.time, mod_tb, mod.lat, mod.lat_bnds, mod.lon, mod.lon_bnds) # DEBUG @@ -278,10 +272,9 @@ def confront(self,m): # Get the depth-integrated observation and model data for each slab. for obs,mod,z0,zf in self.stageData(m): - #YW - print('Staging data ... %.2f-%.2f' % (z0, zf)) - print(obs.name) - print(mod.name) + print('Staging data ... %.2f-%.2f' % (z0, zf)) # DEBUG + print(obs.name) # DEBUG + print(mod.name) # DEBUG if obs.spatial: il.AnalysisMeanStateSpace(obs, mod, dataset = fcm.mod_dset, @@ -311,59 +304,6 @@ def confront(self,m): logger.info("[%s][%s] Success" % (self.longname,m.name)) - def computeOverallScore(self,m): - """Computes the overall composite score for a given model. - - This routine opens the netCDF results file associated with - this confrontation-model pair, and then looks for a "scalars" - group in the dataset as well as any subgroups that may be - present. For each grouping of scalars, it will blend any value - with the word "Score" in the name to render an overall score, - overwriting the existing value if present. - - Parameters - ---------- - m : ILAMB.ModelResult.ModelResult - the model results - - """ - - def _computeOverallScore(scalars): - """Given a netCDF4 group of scalars, blend them into an overall score""" - scores = {} - variables = [v for v in scalars.variables.keys() if "Score" in v and "Overall" not in v] - for region in self.regions: - overall_score = 0. - sum_of_weights = 0. - for v in variables: - if region not in v: continue - score = v.replace(region,"").strip() - weight = 1. - if score in self.weight: weight = self.weight[score] - overall_score += weight*scalars.variables[v][...] - sum_of_weights += weight - overall_score /= max(sum_of_weights,1e-12) - scores["Overall Score %s" % region] = overall_score - return scores - - fname = os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)) - if not os.path.isfile(fname): return - with Dataset(fname,mode="r+") as dataset: - datasets = [dataset.groups[grp] for grp in dataset.groups if "scalars" not in grp] - groups = [grp for grp in dataset.groups if "scalars" not in grp] - datasets.append(dataset) - groups .append(None) - for dset,grp in zip(datasets,groups): - if "scalars" in dset.groups: - scalars = dset.groups["scalars"] - score = _computeOverallScore(scalars) - for key in score.keys(): - if key in scalars.variables: - scalars.variables[key][0] = score[key] - else: - Variable(data=score[key],name=key,unit="1").toNetCDF4(dataset,group=grp) - - def compositePlots(self): """Renders plots which display information of all models. @@ -450,10 +390,9 @@ def compositePlots(self): ticklabels = time_opts["cycle"]["ticklabels"], vmin = self.limits["cycle"][region]["min"]-dy, vmax = self.limits["cycle"][region]["max"]+dy) - #ylbl = time_opts["cycle"]["ylabel"] - #if ylbl == "unit": ylbl = post.UnitStringToMatplotlib(var.unit) - ylbl = zstr + ' '+ self.depths_units + ylbl = post.UnitStringToMatplotlib(var.unit) ax.set_ylabel(ylbl) + ax.set_title(zstr + ' '+ self.depths_units) fig.savefig(os.path.join(self.output_path,"%s_compcycle.png" % (region))) plt.close() @@ -516,6 +455,7 @@ def _alphabeticalBenchmarkFirst(key): "%s_spatial_variance.png" % (region))) plt.close() + def modelPlots(self,m): """For a given model, create the plots of the analysis results. @@ -548,10 +488,6 @@ def modelPlots(self,m): if group.variables[vname][...].size <= 1: continue var = Variable(filename=fname,groupname="MeanState",variable_name=vname) - # YW - ##print(self.limits.keys()) - ##print(pname) - if (var.spatial or (var.ndata is not None)) and not var.temporal: # grab plotting options @@ -660,7 +596,352 @@ def modelPlots(self,m): ylbl = opts["ylabel"] if ylbl == "unit": ylbl = post.UnitStringToMatplotlib(var.unit) ax.set_ylabel(ylbl) + ax.set_title(zstr + ' ' + self.depths_units) fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (m.name,region,pname))) plt.close() logger.info("[%s][%s] Success" % (self.longname,m.name)) + + def _relationship(self,m,nbin=25): + """ + Modified to plot by depths. + """ + def _retrieveData(filename): + key_list = [] + with Dataset(filename,mode="r") as dset: + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind, 1] + zstr = '%.2f-%.2f' % (z0, zf) + key = [v for v in dset.groups["MeanState"].variables.keys() \ + if ("timeint_" in v) and (zstr in v)] + if len(key) == 0: + raise "Unable to retrieve data for relationship " + zstr + key_list.append(key) + return [Variable(filename = filename, + groupname = "MeanState", + variable_name = key) for key in key_list] + + def _applyRefMask(ref,com): + tmp = ref.interpolate(lat=com.lat,lat_bnds=com.lat_bnds, + lon=com.lon,lon_bnds=com.lon_bnds) + com.data.mask += tmp.data.mask + return com + + def _checkLim(data,lim): + if lim is None: + lim = [min(data.min(),data.min()), + max(data.max(),data.max())] + delta = 1e-8*(lim[1]-lim[0]) + lim[0] -= delta + lim[1] += delta + else: + assert type(lim) == type([]) + assert len (lim) == 2 + return lim + + def _limitExtents(vars): + lim = [+1e20,-1e20] + for v in vars: + lmin,lmax = _checkLim(v.data,None) + lim[0] = min(lmin,lim[0]) + lim[1] = max(lmax,lim[1]) + return lim + + def _buildDistributionResponse(ind,dep,ind_lim=None,dep_lim=None,region=None,nbin=25,eps=3e-3): + + r = Regions() + + # Checks on the input parameters + assert np.allclose(ind.data.shape,dep.data.shape) + ind_lim = _checkLim(ind.data,ind_lim) + dep_lim = _checkLim(dep.data,dep_lim) + + # Mask data + mask = ind.data.mask + dep.data.mask + if region is not None: mask += r.getMask(region,ind) + x = ind.data[mask==False].flatten() + y = dep.data[mask==False].flatten() + + # Compute normalized 2D distribution + dist,xedges,yedges = np.histogram2d(x,y, + bins = [nbin,nbin], + range = [ind_lim,dep_lim]) + dist = np.ma.masked_values(dist.T,0).astype(float) + dist /= dist.sum() + + # Compute the functional response + which_bin = np.digitize(x,xedges).clip(1,xedges.size-1)-1 + mean = np.ma.zeros(xedges.size-1) + std = np.ma.zeros(xedges.size-1) + cnt = np.ma.zeros(xedges.size-1) + np.seterr(under='ignore') + for i in range(mean.size): + yi = y[which_bin==i] + cnt [i] = yi.size + mean[i] = yi.mean() + std [i] = yi.std() + mean = np.ma.masked_array(mean,mask = (cnt/cnt.sum()) < eps) + std = np.ma.masked_array( std,mask = (cnt/cnt.sum()) < eps) + np.seterr(under='warn') + return dist,xedges,yedges,mean,std + + def _scoreDistribution(ref,com): + mask = ref.mask + com.mask + ref = np.ma.masked_array(ref.data,mask=mask).compressed() + com = np.ma.masked_array(com.data,mask=mask).compressed() + return np.sqrt(((np.sqrt(ref)-np.sqrt(com))**2).sum())/np.sqrt(2) + + def _scoreFunction(ref,com): + mask = ref.mask + com.mask + ref = np.ma.masked_array(ref.data,mask=mask).compressed() + com = np.ma.masked_array(com.data,mask=mask).compressed() + return np.exp(-np.linalg.norm(ref-com)/np.linalg.norm(ref)) + + def _plotDistribution(dist_list,xedges_list,yedges_list, + xlabel_list, ylabel_list, filename): + fig, axes = plt.subplots(len(self.depths[:,0]), 1, + figsize=(3 * len(self.depths[:,0]), 3.),tight_layout=True) + for ind, dist, xedges, yedges, xlabel, ylabel in \ + zip(range(len(self.depths[:,0])), dist_list, xedges_list, yedges_list, + xlabel_list, ylabel_list): + ax = axes.flat[ind] + pc = ax.pcolormesh(xedges, yedges, dist, + norm = LogNorm(vmin = 1e-4,vmax = 1e-1), + cmap = 'plasma' if 'plasma' in plt.cm.cmap_d else 'summer') + ax.set_xlabel(xlabel,fontsize = 12) + ax.set_ylabel(ylabel,fontsize = 12 if len(ylabel) <= 60 else 10) + ax.set_xlim(xedges[0],xedges[-1]) + ax.set_ylim(yedges[0],yedges[-1]) + fig.colorbar(pc, cax = fig.add_axes([0.95, 0.1, 0.02, 0.8]), + orientation="vertical",label="Fraction of total datasites") + fig.savefig(filename) + plt.close() + + def _plotDifference(ref_list,com_list,xedges_list,yedges_list,xlabel_list,ylabel_list,filename): + fig, axes = plt.subplots(len(self.depths[:,0]), 1, + figsize=(3 * len(self.depths[:,0]), 3.),tight_layout=True) + for ind, ref, com, xedges, yedges, xlabel, ylabel in \ + zip(range(len(self.depths[:,0])), ref_list, com_list, xedges_list, yedges_list, + xlabel_list, ylabel_list): + ref = np.ma.copy(ref) + com = np.ma.copy(com) + ref.data[np.where(ref.mask)] = 0. + com.data[np.where(com.mask)] = 0. + diff = np.ma.masked_array(com.data-ref.data,mask=ref.mask*com.mask) + lim = np.abs(diff).max() + + pc = ax.pcolormesh(xedges, yedges, diff, + cmap = 'Spectral_r', + vmin = -lim, vmax = +lim) + ax.set_xlabel(xlabel,fontsize = 12) + ax.set_ylabel(ylabel,fontsize = 12 if len(ylabel) <= 60 else 10) + ax.set_xlim(xedges[0],xedges[-1]) + ax.set_ylim(yedges[0],yedges[-1]) + fig.colorbar(pc,cax = fig.add_axes([0.95, 0.1, 0.02, 0.8]), + orientation="vertical",label="Distribution Difference") + fig.savefig(filename) + plt.close() + + def _plotFunction(ref_mean_list,ref_std_list,com_mean_list,com_std_list, + xedges_list,yedges_list,xlabel_list,ylabel_list,color,filename): + fig, axes = plt.subplots(len(self.depths[:,0]), 1, + figsize=(3 * len(self.depths[:,0]), 3.),tight_layout=True) + for ind, dist, xedges, yedges, xlabel, ylabel in \ + zip(range(len(self.depths[:,0])), dist_list, xedges_list, yedges_list, + xlabel_list, ylabel_list): + + xe = 0.5*(xedges[:-1]+xedges[1:]) + delta = 0.1*np.diff(xedges).mean() + + # reference function + ref_x = xe - delta + ref_y = ref_mean + ref_e = ref_std + if not (ref_mean.mask==False).all(): + ind = np.where(ref_mean.mask==False) + ref_x = xe [ind]-delta + ref_y = ref_mean[ind] + ref_e = ref_std [ind] + + # comparison function + com_x = xe + delta + com_y = com_mean + com_e = com_std + if not (com_mean.mask==False).all(): + ind = np.where(com_mean.mask==False) + com_x = xe [ind]-delta + com_y = com_mean[ind] + com_e = com_std [ind] + + ax = axes.flat[ind] + ax.errorbar(ref_x,ref_y,yerr=ref_e,fmt='-o',color='k') + ax.errorbar(com_x,com_y,yerr=com_e,fmt='-o',color=color) + ax.set_xlabel(xlabel,fontsize = 12) + ax.set_ylabel(ylabel,fontsize = 12 if len(ylabel) <= 60 else 10) + ax.set_xlim(xedges[0],xedges[-1]) + ax.set_ylim(yedges[0],yedges[-1]) + fig.savefig(filename) + plt.close() + + # If there are no relationships to analyze, get out of here + if self.relationships is None: return + + # Get the HTML page + page = [page for page in self.layout.pages if "Relationships" in page.name] + if len(page) == 0: return + page = page[0] + + # Try to get the dependent data from the model and obs + try: + ref_dep_list = _retrieveData(os.path.join(self.output_path,"%s_%s.nc" % (self.name,"Benchmark"))) + com_dep_list = _retrieveData(os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name ))) + com_dep_list = [_applyRefMask(ref_dep, com_dep) for ref_dep,com_dep in zip(ref_dep_list,com_dep_list)] + dep_name = self.longname.split("/")[0] + dep_min = self.limits["timeint"]["min"] + dep_max = self.limits["timeint"]["max"] + except: + return + + with Dataset(os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)),mode="r+") as results: + + # Grab/create a relationship and scalars group + group = None + if "Relationships" not in results.groups: + group = results.createGroup("Relationships") + else: + group = results.groups["Relationships"] + if "scalars" not in group.groups: + scalars = group.createGroup("scalars") + else: + scalars = group.groups["scalars"] + + # for each relationship... + for c in self.relationships: + + # try to get the independent data from the model and obs + try: + ref_ind_list = _retrieveData(os.path.join(c.output_path,"%s_%s.nc" % (c.name,"Benchmark"))) + com_ind_list = _retrieveData(os.path.join(c.output_path,"%s_%s.nc" % (c.name,m.name ))) + com_ind_list = _applyRefMask(ref_ind,com_ind) + ind_name = c.longname.split("/")[0] + ind_min = c.limits["timeint"]["min"]-1e-12 + ind_max = c.limits["timeint"]["max"]+1e-12 + except: + continue + + + # Add figures to the html page + page.addFigure(c.longname, + "benchmark_rel_%s" % ind_name, + "Benchmark_RNAME_rel_%s.png" % ind_name, + legend = False, + benchmark = False) + page.addFigure(c.longname, + "rel_%s" % ind_name, + "MNAME_RNAME_rel_%s.png" % ind_name, + legend = False, + benchmark = False) + page.addFigure(c.longname, + "rel_diff_%s" % ind_name, + "MNAME_RNAME_rel_diff_%s.png" % ind_name, + legend = False, + benchmark = False) + page.addFigure(c.longname, + "rel_func_%s" % ind_name, + "MNAME_RNAME_rel_func_%s.png" % ind_name, + legend = False, + benchmark = False) + + # Try to get the dependent data from the model and obs + try: + ref_dep_list = _retrieveData(os.path.join(self.output_path,"%s_%s.nc" % (self.name,"Benchmark"))) + com_dep_list = _retrieveData(os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name ))) + com_dep_list = [_applyRefMask(ref_dep, com_dep) for ref_dep,com_dep in zip(ref_dep_list,com_dep_list)] + dep_name = self.longname.split("/")[0] + dep_min = self.limits["timeint"]["min"] + dep_max = self.limits["timeint"]["max"] + except: + return + + # Analysis over regions + lim_dep = [dep_min,dep_max] + lim_ind = [ind_min,ind_max] + longname = c.longname.split('/')[0] + for region in self.regions: + ref_dist_list = [] + ref_xedges_list = [] + ref_yedges_list = [] + ref_mean_list = [] + ref_std_list = [] + + com_dist_list = [] + com_xedges_list = [] + com_yedges_list = [] + com_mean_list = [] + com_std_list = [] + + for dind, ref_dep, ref_ind in zip(range(len(ref_dep_list)), ref_dep_list, ref_ind_list): + # Check on data shape + if not np.allclose(ref_dep.data.shape,ref_ind.data.shape): + msg = "[%s][%s] Data size mismatch in relationship: %s %s vs. %s %s" % (self.longname,m.name,dep_name,str(ref_dep.data.shape),ind_name,str(ref_ind.data.shape)) + logger.debug(msg) + raise ValueError + + ref_dist, ref_xedges, ref_yedges, ref_mean, ref_std = _buildDistributionResponse(ref_ind,ref_dep,ind_lim=lim_ind,dep_lim=lim_dep,region=region) + com_dist, com_xedges, com_yedges, com_mean, com_std = _buildDistributionResponse(com_ind,com_dep,ind_lim=lim_ind,dep_lim=lim_dep,region=region) + + ref_dist_list.append(ref_dist) + ref_xedges_list.append(ref_xedges) + ref_yedges_list.append(ref_yedges) + ref_mean_list.append(ref_mean) + ref_std_list.append(ref_std) + + com_dist_list.append(com_dist) + com_xedges_list.append(com_xedges) + com_yedges_list.append(com_yedges) + com_mean_list.append(com_mean) + com_std_list.append(com_std) + + # Make the plots + _plotDistribution(ref_dist_list,ref_xedges_list,ref_yedges_list, + "%s/%s, %s" % (ind_name, c.name,post.UnitStringToMatplotlib(ref_ind.unit)), + "%s/%s, %s" % (dep_name,self.name,post.UnitStringToMatplotlib(ref_dep.unit)), + os.path.join(self.output_path,"%s_%s_rel_%s.png" % ("Benchmark",region,ind_name))) + _plotDistribution(com_dist_list,com_xedges_list,com_yedges_list, + "%s/%s, %s" % (ind_name,m.name,post.UnitStringToMatplotlib(com_ind.unit)), + "%s/%s, %s" % (dep_name,m.name,post.UnitStringToMatplotlib(com_dep.unit)), + os.path.join(self.output_path,"%s_%s_rel_%s.png" % (m.name,region,ind_name))) + _plotDifference (ref_dist_list,com_dist_list,ref_xedges_list,ref_yedges_list, + "%s/%s, %s" % (ind_name,m.name,post.UnitStringToMatplotlib(com_ind.unit)), + "%s/%s, %s" % (dep_name,m.name,post.UnitStringToMatplotlib(com_dep.unit)), + os.path.join(self.output_path,"%s_%s_rel_diff_%s.png" % (m.name,region,ind_name))) + _plotFunction(ref_mean_list,ref_std_list,com_mean_list, + com_std_list,ref_xedges_list,ref_yedges_list, + "%s, %s" % (ind_name,post.UnitStringToMatplotlib(com_ind.unit)), + "%s, %s" % (dep_name,post.UnitStringToMatplotlib(com_dep.unit)), + m.color, + os.path.join(self.output_path,"%s_%s_rel_func_%s.png" % (m.name,region,ind_name))) + + # Score the distribution + score_list = [] + for ref_dist, com_dist in zip(ref_dist_list, com_dist_list): + score = _scoreDistribution(ref_dist,com_dist) + score_list.append(score) + score = np.mean(score_list) # !!!!!!! May be wrong? + sname = "%s Hellinger Distance %s" % (longname,region) + if sname in scalars.variables: + scalars.variables[sname][0] = score + else: + Variable(name = sname, + unit = "1", + data = score).toNetCDF4(results,group="Relationships") + + # Score the functional response + score = _scoreFunction(ref_dist[3],com_dist[3]) + sname = "%s RMSE Score %s" % (longname,region) + if sname in scalars.variables: + scalars.variables[sname][0] = score + else: + Variable(name = sname, + unit = "1", + data = score).toNetCDF4(results,group="Relationships") diff --git a/src/ILAMB/Confrontation.py b/src/ILAMB/Confrontation.py index 686093b5..b2afa0d9 100644 --- a/src/ILAMB/Confrontation.py +++ b/src/ILAMB/Confrontation.py @@ -462,9 +462,11 @@ def determinePlotLimits(self): # For those limits which we built up data across all models, compute the percentiles for pname in limits.keys(): - if "data" in limits[pname]: + print(pname) # DEBUG + if ("data" in limits[pname]) and (len(limits[pname]['data']) > 0): + print(limits[pname]['data']) # DEBUG limits[pname]["min"],limits[pname]["max"] = np.percentile(limits[pname]["data"],[1,99]) - + # Second pass to plot legends (FIX: only for master?) for pname in limits.keys(): @@ -524,58 +526,6 @@ def determinePlotLimits(self): self.limits = limits - def computeOverallScore(self,m): - """Computes the overall composite score for a given model. - - This routine opens the netCDF results file associated with - this confrontation-model pair, and then looks for a "scalars" - group in the dataset as well as any subgroups that may be - present. For each grouping of scalars, it will blend any value - with the word "Score" in the name to render an overall score, - overwriting the existing value if present. - - Parameters - ---------- - m : ILAMB.ModelResult.ModelResult - the model results - - """ - - def _computeOverallScore(scalars): - """Given a netCDF4 group of scalars, blend them into an overall score""" - scores = {} - variables = [v for v in scalars.variables.keys() if "Score" in v and "Overall" not in v] - for region in self.regions: - overall_score = 0. - sum_of_weights = 0. - for v in variables: - if region not in v: continue - score = v.replace(region,"").strip() - weight = 1. - if score in self.weight: weight = self.weight[score] - overall_score += weight*scalars.variables[v][...] - sum_of_weights += weight - overall_score /= max(sum_of_weights,1e-12) - scores["Overall Score %s" % region] = overall_score - return scores - - fname = os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)) - if not os.path.isfile(fname): return - with Dataset(fname,mode="r+") as dataset: - datasets = [dataset.groups[grp] for grp in dataset.groups if "scalars" not in grp] - groups = [grp for grp in dataset.groups if "scalars" not in grp] - datasets.append(dataset) - groups .append(None) - for dset,grp in zip(datasets,groups): - if "scalars" in dset.groups: - scalars = dset.groups["scalars"] - score = _computeOverallScore(scalars) - for key in score.keys(): - if key in scalars.variables: - scalars.variables[key][0] = score[key] - else: - Variable(data=score[key],name=key,unit="1").toNetCDF4(dataset,group=grp) - def compositePlots(self): """Renders plots which display information of all models. diff --git a/src/ILAMB/ilamblib.py b/src/ILAMB/ilamblib.py index 0cb3ade7..4ec9bc50 100644 --- a/src/ILAMB/ilamblib.py +++ b/src/ILAMB/ilamblib.py @@ -853,10 +853,11 @@ def FromNetCDF4(filename,variable_name,alternate_vars=[],t0=None,tf=None,group=N v_depth_sub = [] for vs in v.shape: if vs == len(ind): - v_depth_sub.append( ind ) + v_depth_sub.append( list(np.where(ind)[0]) ) else: - v_depth_sub.append( np.ones(vs, dtype = bool) ) - v = v[np.ix_(v_depth_sub)] + v_depth_sub.append( list(range(vs)) ) + #print( v_depth_sub ) # DEBUG + v = v[np.ix_(*v_depth_sub)] else: if zf is not None: raise ValueError("Mismatched ending depth %f." % zf) From cf2d457042d013f4ee41371dd8237a9c3e8ec63a Mon Sep 17 00:00:00 2001 From: Yaoping Wang Date: Mon, 20 Sep 2021 16:47:10 -0400 Subject: [PATCH 07/18] default relationships working w/ several models --- bin/ilamb-run | 2 - src/ILAMB/ConfSoilMoisture.py | 133 ++++++++++++++------------ src/ILAMB/Confrontation.py | 174 +++++++++++++++++++--------------- src/ILAMB/Variable.py | 8 +- src/ILAMB/ilamblib.py | 12 ++- 5 files changed, 184 insertions(+), 145 deletions(-) diff --git a/bin/ilamb-run b/bin/ilamb-run index e57f3fd0..d20c6004 100644 --- a/bin/ilamb-run +++ b/bin/ilamb-run @@ -379,7 +379,6 @@ def WorkConfront(W,verbose=False,clean=False): # try to run the confrontation try: t0 = time.time() - c.confront(m) dt = time.time()-t0 proc[rank] += dt @@ -629,7 +628,6 @@ S = Scoreboard(args.config[0], C = MatchRelationshipConfrontation(S.list()) - if len(args.study_limits) == 2: args.study_limits[1] += 1 for c in C: c.study_limits = (np.asarray(args.study_limits)-1850)*365. diff --git a/src/ILAMB/ConfSoilMoisture.py b/src/ILAMB/ConfSoilMoisture.py index f9317fd4..e61b222e 100644 --- a/src/ILAMB/ConfSoilMoisture.py +++ b/src/ILAMB/ConfSoilMoisture.py @@ -211,8 +211,8 @@ def _addDepth(v): tend = time.time() # DEBUG print("Loading obs " + str(z0) + '-' + str(zf) + ' took ' + str((tend - tstart) / 60)) # DEBUG - print("obs ", obs.name, obs.unit, obs.data, obs.time, obs_tb, obs.lat, obs.lat_bnds, - obs.lon, obs.lon_bnds) # DEBUG + print("obs ", obs.name, obs.unit, obs.time[0], obs.time[-1], + obs.lat[0], obs.lat[-1], obs.lon[0], obs.lon[-1]) # DEBUG # get model variable print('Loading model ' + str(z0) + '-' + str(zf)) @@ -238,8 +238,8 @@ def _addDepth(v): tend = time.time() # DEBUG print("Loading model " + str(z0) + '-' + str(zf) + ' took ' + str((tend - tstart) / 60)) # DEBUG - print("mod ", mod.name, mod.unit, mod.data, mod.time, mod_tb, mod.lat, mod.lat_bnds, - mod.lon, mod.lon_bnds) # DEBUG + print("mod ", mod.name, mod.unit, mod.data, mod.time[0], mod.time[-1], + mod.lat[0], mod.lat[-1], mod.lon[0], mod.lon[-1]) # DEBUG assert obs.time.size == mod.time.size @@ -272,9 +272,8 @@ def confront(self,m): # Get the depth-integrated observation and model data for each slab. for obs,mod,z0,zf in self.stageData(m): - print('Staging data ... %.2f-%.2f' % (z0, zf)) # DEBUG - print(obs.name) # DEBUG - print(mod.name) # DEBUG + print('Confronting data ' + obs.name + ' v.s. ' + mod.name + \ + '... %.2f-%.2f' % (z0, zf)) # DEBUG if obs.spatial: il.AnalysisMeanStateSpace(obs, mod, dataset = fcm.mod_dset, @@ -371,7 +370,8 @@ def compositePlots(self): for region in self.regions: if region not in cycle: continue fig, axes = plt.subplots(self.depths.shape[0], 1, - figsize = (6.5, 2.8*self.depths.shape[0])) + figsize = (6.5, 2.8*self.depths.shape[0]), + sharex = True, sharey = True) for dind, z0 in enumerate(self.depths[:,0]): zf = self.depths[dind, 1] zstr = '%.2f-%.2f' % (z0, zf) @@ -573,7 +573,8 @@ def modelPlots(self,m): for region in self.regions: if region not in vname: continue fig, axes = plt.subplots(self.depths.shape[0], 1, - figsize = (6.5, 2.8*self.depths.shape[0])) + figsize = (6.5, 2.8*self.depths.shape[0]), + sharex = True, sharey = True) for dind, z0 in enumerate(self.depths[:,0]): zf = self.depths[dind,1] zstr = '%.2f-%.2f' % (z0, zf) @@ -606,7 +607,7 @@ def _relationship(self,m,nbin=25): """ Modified to plot by depths. """ - def _retrieveData(filename): + def _retrieveSM(filename): key_list = [] with Dataset(filename,mode="r") as dset: for dind, z0 in enumerate(self.depths[:,0]): @@ -616,11 +617,19 @@ def _retrieveData(filename): if ("timeint_" in v) and (zstr in v)] if len(key) == 0: raise "Unable to retrieve data for relationship " + zstr - key_list.append(key) + key_list.append(key[0]) return [Variable(filename = filename, groupname = "MeanState", variable_name = key) for key in key_list] - + + def _retrieveData(filename): + key = None + with Dataset(filename,mode="r") as dset: + key = [v for v in dset.groups["MeanState"].variables.keys() if "timeint_" in v] + return Variable(filename = filename, + groupname = "MeanState", + variable_name = key[0]) + def _applyRefMask(ref,com): tmp = ref.interpolate(lat=com.lat,lat_bnds=com.lat_bnds, lon=com.lon,lon_bnds=com.lon_bnds) @@ -698,31 +707,33 @@ def _scoreFunction(ref,com): return np.exp(-np.linalg.norm(ref-com)/np.linalg.norm(ref)) def _plotDistribution(dist_list,xedges_list,yedges_list, - xlabel_list, ylabel_list, filename): + xlabel, ylabel, filename): fig, axes = plt.subplots(len(self.depths[:,0]), 1, - figsize=(3 * len(self.depths[:,0]), 3.),tight_layout=True) - for ind, dist, xedges, yedges, xlabel, ylabel in \ - zip(range(len(self.depths[:,0])), dist_list, xedges_list, yedges_list, - xlabel_list, ylabel_list): - ax = axes.flat[ind] + figsize=(3.5, 3 * len(self.depths[:,0])), + sharex = True, sharey = True, tight_layout=True) + for dind, dist, xedges, yedges in \ + zip(range(len(self.depths[:,0])), dist_list, xedges_list, yedges_list): + ax = axes.flat[dind] pc = ax.pcolormesh(xedges, yedges, dist, norm = LogNorm(vmin = 1e-4,vmax = 1e-1), cmap = 'plasma' if 'plasma' in plt.cm.cmap_d else 'summer') - ax.set_xlabel(xlabel,fontsize = 12) - ax.set_ylabel(ylabel,fontsize = 12 if len(ylabel) <= 60 else 10) + ax.set_xlabel(xlabel,fontsize = 8) + ax.set_ylabel(ylabel,fontsize = 8 if len(ylabel) <= 60 else 6) ax.set_xlim(xedges[0],xedges[-1]) ax.set_ylim(yedges[0],yedges[-1]) - fig.colorbar(pc, cax = fig.add_axes([0.95, 0.1, 0.02, 0.8]), + ax.set_title(('%.2f-%.2f' % (self.depths[dind,0], self.depths[dind,1])) \ + + self.depths_units) + fig.colorbar(pc, cax = fig.add_axes([0.97, 0.1, 0.02, 0.8]), orientation="vertical",label="Fraction of total datasites") - fig.savefig(filename) + fig.savefig(filename, bbox_inches = 'tight') plt.close() - def _plotDifference(ref_list,com_list,xedges_list,yedges_list,xlabel_list,ylabel_list,filename): + def _plotDifference(ref_list,com_list,xedges_list,yedges_list,xlabel,ylabel,filename): fig, axes = plt.subplots(len(self.depths[:,0]), 1, - figsize=(3 * len(self.depths[:,0]), 3.),tight_layout=True) - for ind, ref, com, xedges, yedges, xlabel, ylabel in \ - zip(range(len(self.depths[:,0])), ref_list, com_list, xedges_list, yedges_list, - xlabel_list, ylabel_list): + figsize=(3.5, 3 * len(self.depths[:,0])), + sharex = True, sharey = True, tight_layout=True) + for dind, ref, com, xedges, yedges in \ + zip(range(len(self.depths[:,0])), ref_list, com_list, xedges_list, yedges_list): ref = np.ma.copy(ref) com = np.ma.copy(com) ref.data[np.where(ref.mask)] = 0. @@ -730,25 +741,28 @@ def _plotDifference(ref_list,com_list,xedges_list,yedges_list,xlabel_list,ylabel diff = np.ma.masked_array(com.data-ref.data,mask=ref.mask*com.mask) lim = np.abs(diff).max() + ax = axes.flat[dind] pc = ax.pcolormesh(xedges, yedges, diff, - cmap = 'Spectral_r', - vmin = -lim, vmax = +lim) - ax.set_xlabel(xlabel,fontsize = 12) - ax.set_ylabel(ylabel,fontsize = 12 if len(ylabel) <= 60 else 10) + cmap = 'Spectral_r', vmin = -lim, vmax = +lim) + ax.set_xlabel(xlabel,fontsize = 8) + ax.set_ylabel(ylabel,fontsize = 8 if len(ylabel) <= 60 else 6) ax.set_xlim(xedges[0],xedges[-1]) ax.set_ylim(yedges[0],yedges[-1]) - fig.colorbar(pc,cax = fig.add_axes([0.95, 0.1, 0.02, 0.8]), + ax.set_title(('%.2f-%.2f' % (self.depths[dind,0], self.depths[dind,1])) \ + + self.depths_units) + fig.colorbar(pc,cax = fig.add_axes([0.97, 0.1, 0.02, 0.8]), orientation="vertical",label="Distribution Difference") - fig.savefig(filename) + fig.savefig(filename, bbox_inches = 'tight') plt.close() def _plotFunction(ref_mean_list,ref_std_list,com_mean_list,com_std_list, - xedges_list,yedges_list,xlabel_list,ylabel_list,color,filename): + xedges_list,yedges_list,xlabel,ylabel,color,filename): fig, axes = plt.subplots(len(self.depths[:,0]), 1, - figsize=(3 * len(self.depths[:,0]), 3.),tight_layout=True) - for ind, dist, xedges, yedges, xlabel, ylabel in \ - zip(range(len(self.depths[:,0])), dist_list, xedges_list, yedges_list, - xlabel_list, ylabel_list): + figsize=(3.5, 3 * len(self.depths[:,0])), + sharex = True, sharey = True, tight_layout=True) + for dind, ref_mean, ref_std, com_mean, com_std, xedges, yedges in \ + zip(range(len(self.depths[:,0])), ref_mean_list, ref_std_list, + com_mean_list, com_std_list, xedges_list, yedges_list): xe = 0.5*(xedges[:-1]+xedges[1:]) delta = 0.1*np.diff(xedges).mean() @@ -773,14 +787,16 @@ def _plotFunction(ref_mean_list,ref_std_list,com_mean_list,com_std_list, com_y = com_mean[ind] com_e = com_std [ind] - ax = axes.flat[ind] + ax = axes.flat[dind] ax.errorbar(ref_x,ref_y,yerr=ref_e,fmt='-o',color='k') ax.errorbar(com_x,com_y,yerr=com_e,fmt='-o',color=color) - ax.set_xlabel(xlabel,fontsize = 12) - ax.set_ylabel(ylabel,fontsize = 12 if len(ylabel) <= 60 else 10) + ax.set_xlabel(xlabel,fontsize = 8) + ax.set_ylabel(ylabel,fontsize = 8 if len(ylabel) <= 60 else 6) ax.set_xlim(xedges[0],xedges[-1]) ax.set_ylim(yedges[0],yedges[-1]) - fig.savefig(filename) + ax.set_title(('%.2f-%.2f' % (self.depths[dind,0], self.depths[dind,1])) \ + + self.depths_units) + fig.savefig(filename, bbox_inches = 'tight') plt.close() # If there are no relationships to analyze, get out of here @@ -793,8 +809,8 @@ def _plotFunction(ref_mean_list,ref_std_list,com_mean_list,com_std_list, # Try to get the dependent data from the model and obs try: - ref_dep_list = _retrieveData(os.path.join(self.output_path,"%s_%s.nc" % (self.name,"Benchmark"))) - com_dep_list = _retrieveData(os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name ))) + ref_dep_list = _retrieveSM(os.path.join(self.output_path,"%s_%s.nc" % (self.name,"Benchmark"))) + com_dep_list = _retrieveSM(os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name ))) com_dep_list = [_applyRefMask(ref_dep, com_dep) for ref_dep,com_dep in zip(ref_dep_list,com_dep_list)] dep_name = self.longname.split("/")[0] dep_min = self.limits["timeint"]["min"] @@ -820,9 +836,9 @@ def _plotFunction(ref_mean_list,ref_std_list,com_mean_list,com_std_list, # try to get the independent data from the model and obs try: - ref_ind_list = _retrieveData(os.path.join(c.output_path,"%s_%s.nc" % (c.name,"Benchmark"))) - com_ind_list = _retrieveData(os.path.join(c.output_path,"%s_%s.nc" % (c.name,m.name ))) - com_ind_list = _applyRefMask(ref_ind,com_ind) + ref_ind = _retrieveData(os.path.join(c.output_path,"%s_%s.nc" % (c.name,"Benchmark"))) + com_ind = _retrieveData(os.path.join(c.output_path,"%s_%s.nc" % (c.name,m.name ))) + com_ind = _applyRefMask(ref_ind,com_ind) ind_name = c.longname.split("/")[0] ind_min = c.limits["timeint"]["min"]-1e-12 ind_max = c.limits["timeint"]["max"]+1e-12 @@ -852,16 +868,6 @@ def _plotFunction(ref_mean_list,ref_std_list,com_mean_list,com_std_list, legend = False, benchmark = False) - # Try to get the dependent data from the model and obs - try: - ref_dep_list = _retrieveData(os.path.join(self.output_path,"%s_%s.nc" % (self.name,"Benchmark"))) - com_dep_list = _retrieveData(os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name ))) - com_dep_list = [_applyRefMask(ref_dep, com_dep) for ref_dep,com_dep in zip(ref_dep_list,com_dep_list)] - dep_name = self.longname.split("/")[0] - dep_min = self.limits["timeint"]["min"] - dep_max = self.limits["timeint"]["max"] - except: - return # Analysis over regions lim_dep = [dep_min,dep_max] @@ -880,7 +886,8 @@ def _plotFunction(ref_mean_list,ref_std_list,com_mean_list,com_std_list, com_mean_list = [] com_std_list = [] - for dind, ref_dep, ref_ind in zip(range(len(ref_dep_list)), ref_dep_list, ref_ind_list): + for dind, ref_dep, com_dep in zip(range(len(ref_dep_list)), ref_dep_list, + com_dep_list): # Check on data shape if not np.allclose(ref_dep.data.shape,ref_ind.data.shape): msg = "[%s][%s] Data size mismatch in relationship: %s %s vs. %s %s" % (self.longname,m.name,dep_name,str(ref_dep.data.shape),ind_name,str(ref_ind.data.shape)) @@ -927,7 +934,8 @@ def _plotFunction(ref_mean_list,ref_std_list,com_mean_list,com_std_list, for ref_dist, com_dist in zip(ref_dist_list, com_dist_list): score = _scoreDistribution(ref_dist,com_dist) score_list.append(score) - score = np.mean(score_list) # !!!!!!! May be wrong? + score = np.sum(np.array(score_list)*(self.depths[:,1] - self.depths[:,0])) / \ + (self.depths[-1,1] - self.depths[0,0]) sname = "%s Hellinger Distance %s" % (longname,region) if sname in scalars.variables: scalars.variables[sname][0] = score @@ -937,7 +945,12 @@ def _plotFunction(ref_mean_list,ref_std_list,com_mean_list,com_std_list, data = score).toNetCDF4(results,group="Relationships") # Score the functional response - score = _scoreFunction(ref_dist[3],com_dist[3]) + score_list = [] + for ref_mean, com_mean in zip(ref_mean_list, com_mean_list): + score = _scoreFunction(ref_mean,com_mean) + score_list.append(score) + score = np.sum(np.array(score_list)*(self.depths[:,1] - self.depths[:,0])) / \ + (self.depths[-1,1] - self.depths[0,0]) sname = "%s RMSE Score %s" % (longname,region) if sname in scalars.variables: scalars.variables[sname][0] = score diff --git a/src/ILAMB/Confrontation.py b/src/ILAMB/Confrontation.py index b2afa0d9..ed592305 100644 --- a/src/ILAMB/Confrontation.py +++ b/src/ILAMB/Confrontation.py @@ -10,12 +10,10 @@ from mpl_toolkits.axes_grid1 import make_axes_locatable from mpi4py import MPI from sympy import sympify -import cftime as cf import logging logger = logging.getLogger("%i" % MPI.COMM_WORLD.rank) - def getVariableList(dataset): """Extracts the list of variables in the dataset that aren't dimensions or the bounds of dimensions. @@ -150,8 +148,9 @@ def __init__(self,**keywords): self.study_limits = [] # Make sure the source data exists - - if not os.path.isfile(self.source): + try: + os.stat(self.source) + except: msg = "\n\nI am looking for data for the %s confrontation here\n\n" % self.name msg += "%s\n\nbut I cannot find it. " % self.source msg += "Did you download the data? Have you set the ILAMB_ROOT envronment variable?\n" @@ -179,14 +178,14 @@ def __init__(self,**keywords): self.lbls = ["site%d" % s for s in range(len(dataset.dimensions["data"]))] if "time" in dataset.dimensions: t = dataset.variables["time"] - tdata = t[[0,-1]] if "bounds" in t.ncattrs(): - tdata = dataset.variables[t.bounds] - tdata = [tdata[0,0],tdata[-1,1]] - tdata = cf.num2date(tdata,units=t.units,calendar=t.calendar) - y0 = tdata[0].year - yf = tdata[1].year - + t = dataset.variables[t.bounds][...] + y0 = int(t[ 0,0]/365.+1850.) + yf = int(t[-1,1]/365.+1850.)-1 + else: + y0 = int(round(t[ 0]/365.)+1850.) + yf = int(round(t[-1]/365.)+1850.)-1 + if self.hasSites: pages.append(post.HtmlSitePlotsPage("SitePlots","Site Plots")) pages[-1].setHeader("CNAME / RNAME / MNAME") @@ -364,9 +363,8 @@ def confront(self,m): # Read in some options and run the mean state analysis mass_weighting = self.keywords.get("mass_weighting",False) skip_rmse = self.keywords.get("skip_rmse" ,False) - skip_iav = self.keywords.get("skip_iav" ,True ) + skip_iav = self.keywords.get("skip_iav" ,False) skip_cycle = self.keywords.get("skip_cycle" ,False) - rmse_score_basis = self.keywords.get("rmse_score_basis","cycle") if obs.spatial: il.AnalysisMeanStateSpace(obs,mod,dataset = fcm.mod_dset, regions = self.regions, @@ -377,8 +375,7 @@ def confront(self,m): skip_rmse = skip_rmse, skip_iav = skip_iav, skip_cycle = skip_cycle, - mass_weighting = mass_weighting, - rmse_score_basis = rmse_score_basis) + mass_weighting = mass_weighting) else: il.AnalysisMeanStateSites(obs,mod,dataset = fcm.mod_dset, regions = self.regions, @@ -404,39 +401,34 @@ def determinePlotLimits(self): called before calling any plotting routine. """ + max_str = "up99" + min_str = "dn99" + if self.keywords.get("limit_type","99per") == "minmax": + max_str = "max" + min_str = "min" - filelist = glob.glob(os.path.join(self.output_path,"*.nc")) - benchmark_file = [f for f in filelist if "Benchmark" in f] - - # There may be regions in which there is no benchmark data and - # these should be weeded out. If the plotting phase occurs in - # the same run as the analysis phase, this is not needed. - if benchmark_file: - with Dataset(benchmark_file[0]) as dset: Vs = getVariableList(dset.groups["MeanState"]) - Vs = [v for v in Vs if "timeint" in v] - if Vs: - self.pruneRegions(Variable(filename = benchmark_file[0], - variable_name = Vs[0], - groupname = "MeanState")) - # Determine the min/max of variables over all models limits = {} - for fname in filelist: + prune = False + for fname in glob.glob(os.path.join(self.output_path,"*.nc")): with Dataset(fname) as dataset: if "MeanState" not in dataset.groups: continue - variables = getVariableList(dataset.groups["MeanState"]) + group = dataset.groups["MeanState"] + variables = [v for v in group.variables.keys() if v not in group.dimensions.keys()] for vname in variables: - var = dataset.groups["MeanState"].variables[vname] - if var[...].size <= 1: continue + var = group.variables[vname] pname = vname.split("_")[0] - - """If the plot is a time series, it has been averaged over regions - already and we need a separate dictionary for the - region as well. These can be based on the - percentiles from the attributes of the netCDF - variables.""" - if pname in time_opts: - region = vname.split("_")[-1] + region = vname.split("_")[-1] + if var[...].size <= 1: continue + if pname in space_opts: + if pname not in limits: + limits[pname] = {} + limits[pname]["min"] = +1e20 + limits[pname]["max"] = -1e20 + limits[pname]["unit"] = post.UnitStringToMatplotlib(var.getncattr("units")) + limits[pname]["min"] = min(limits[pname]["min"],var.getncattr(min_str)) + limits[pname]["max"] = max(limits[pname]["max"],var.getncattr(max_str)) + elif pname in time_opts: if pname not in limits: limits[pname] = {} if region not in limits[pname]: limits[pname][region] = {} @@ -445,27 +437,11 @@ def determinePlotLimits(self): limits[pname][region]["unit"] = post.UnitStringToMatplotlib(var.getncattr("units")) limits[pname][region]["min"] = min(limits[pname][region]["min"],var.getncattr("min")) limits[pname][region]["max"] = max(limits[pname][region]["max"],var.getncattr("max")) - - else: - """If the plot is spatial, we want to set the limits as a percentile - of all data across models and the - benchmark. So here we load the data up and in - another pass will compute the percentiles.""" - if pname not in limits: - limits[pname] = {} - limits[pname]["min"] = +1e20 - limits[pname]["max"] = -1e20 - limits[pname]["unit"] = post.UnitStringToMatplotlib(var.getncattr("units")) - limits[pname]["data"] = var[...].compressed() - else: - limits[pname]["data"] = np.hstack([limits[pname]["data"],var[...].compressed()]) - - # For those limits which we built up data across all models, compute the percentiles - for pname in limits.keys(): - print(pname) # DEBUG - if ("data" in limits[pname]) and (len(limits[pname]['data']) > 0): - print(limits[pname]['data']) # DEBUG - limits[pname]["min"],limits[pname]["max"] = np.percentile(limits[pname]["data"],[1,99]) + if not prune and "Benchmark" in fname and pname == "timeint": + prune = True + self.pruneRegions(Variable(filename = fname, + variable_name = vname, + groupname = "MeanState")) # Second pass to plot legends (FIX: only for master?) for pname in limits.keys(): @@ -485,12 +461,10 @@ def determinePlotLimits(self): if "score" in pname: limits[pname]["min"] = 0 limits[pname]["max"] = 1 - + limits[pname]["cmap"] = opts["cmap"] if limits[pname]["cmap"] == "choose": limits[pname]["cmap"] = self.cmap - if "score" in pname: - limits[pname]["cmap"] = plt.cm.get_cmap(limits[pname]["cmap"],3) - + # Plot a legend for each key if opts["haslegend"]: fig,ax = plt.subplots(figsize=(6.8,1.0),tight_layout=True) @@ -526,6 +500,58 @@ def determinePlotLimits(self): self.limits = limits + def computeOverallScore(self,m): + """Computes the overall composite score for a given model. + + This routine opens the netCDF results file associated with + this confrontation-model pair, and then looks for a "scalars" + group in the dataset as well as any subgroups that may be + present. For each grouping of scalars, it will blend any value + with the word "Score" in the name to render an overall score, + overwriting the existing value if present. + + Parameters + ---------- + m : ILAMB.ModelResult.ModelResult + the model results + + """ + + def _computeOverallScore(scalars): + """Given a netCDF4 group of scalars, blend them into an overall score""" + scores = {} + variables = [v for v in scalars.variables.keys() if "Score" in v and "Overall" not in v] + for region in self.regions: + overall_score = 0. + sum_of_weights = 0. + for v in variables: + if region not in v: continue + score = v.replace(region,"").strip() + weight = 1. + if score in self.weight: weight = self.weight[score] + overall_score += weight*scalars.variables[v][...] + sum_of_weights += weight + overall_score /= max(sum_of_weights,1e-12) + scores["Overall Score %s" % region] = overall_score + return scores + + fname = os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)) + if not os.path.isfile(fname): return + with Dataset(fname,mode="r+") as dataset: + datasets = [dataset.groups[grp] for grp in dataset.groups if "scalars" not in grp] + groups = [grp for grp in dataset.groups if "scalars" not in grp] + datasets.append(dataset) + groups .append(None) + for dset,grp in zip(datasets,groups): + if "scalars" in dset.groups: + scalars = dset.groups["scalars"] + score = _computeOverallScore(scalars) + for key in score.keys(): + if key in scalars.variables: + scalars.variables[key][0] = score[key] + else: + Variable(data=score[key],name=key,unit="1").toNetCDF4(dataset,group=grp) + def compositePlots(self): """Renders plots which display information of all models. @@ -679,7 +705,6 @@ def modelPlots(self,m): # grab plotting options if pname not in self.limits.keys(): continue - if pname not in space_opts: continue opts = space_opts[pname] # add to html layout @@ -727,13 +752,9 @@ def modelPlots(self,m): if not (var.spatial or (var.ndata is not None)) and var.temporal: # grab the benchmark dataset to plot along with - try: - obs = Variable(filename=bname,groupname="MeanState",variable_name=vname).convert(var.unit) - except: - continue + obs = Variable(filename=bname,groupname="MeanState",variable_name=vname).convert(var.unit) # grab plotting options - if pname not in time_opts: continue opts = time_opts[pname] # add to html layout @@ -834,10 +855,10 @@ def generateHtml(self): for vname in grp.variables.keys(): found = False for region in self.regions: - if vname.endswith(" %s" % region): + if region in vname: found = True var = grp.variables[vname] - name = ''.join(vname.rsplit(" %s" % region,1)) + name = vname.replace(region,"") metrics[mname][region][name] = Variable(name = name, unit = var.units, data = var[...]) @@ -947,8 +968,9 @@ def _scoreFunction(ref,com): def _plotDistribution(dist,xedges,yedges,xlabel,ylabel,filename): fig,ax = plt.subplots(figsize=(6,5.25),tight_layout=True) pc = ax.pcolormesh(xedges, yedges, dist, - norm = LogNorm(vmin = 1e-4,vmax = 1e-1), - cmap = 'plasma' if 'plasma' in plt.cm.cmap_d else 'summer') + norm = LogNorm(), + cmap = 'plasma' if 'plasma' in plt.cm.cmap_d else 'summer', + vmin = 1e-4, vmax = 1e-1) div = make_axes_locatable(ax) fig.colorbar(pc,cax=div.append_axes("right",size="5%",pad=0.05), orientation="vertical",label="Fraction of total datasites") diff --git a/src/ILAMB/Variable.py b/src/ILAMB/Variable.py index adaa8c96..46088836 100644 --- a/src/ILAMB/Variable.py +++ b/src/ILAMB/Variable.py @@ -132,6 +132,7 @@ def __init__(self,**keywords): z0 = keywords.get("z0",None) # YW zf = keywords.get("zf",None) # YW convert_calendar = keywords.get("convert_calendar",True) + out = il.FromNetCDF4(filename,variable_name,alternate_vars,t0,tf,group=groupname,convert_calendar=convert_calendar,z0=z0,zf=zf) # YW data,data_bnds,unit,name,time,time_bnds,lat,lat_bnds,lon,lon_bnds,depth,depth_bnds,cbounds,ndata,calendar,attr = out @@ -342,7 +343,8 @@ def integrateInTime(self,**keywords): # the integrated array should be masked where *all* data in time was previously masked mask = False if self.data.ndim > 1 and self.data.mask.size > 1: - mask = np.apply_along_axis(np.all,0,self.data.mask[ind]) + ##mask = np.apply_along_axis(np.all,0,self.data.mask[ind]) + mask = np.all(self.data.mask[ind], 0) integral = np.ma.masked_array(integral,mask=mask,copy=False) # handle units @@ -464,6 +466,7 @@ def integrateInDepth(self,**keywords): dz = np.expand_dims(dz,axis=-1) args.append(range(self.lat.size)) args.append(range(self.lon.size)) + ind = np.ix_(*args) # approximate the integral by nodal integration (rectangle rule) @@ -475,7 +478,8 @@ def integrateInDepth(self,**keywords): # the integrated array should be masked where *all* data in depth was previously masked mask = False if self.data.ndim > 1 and self.data.mask.size > 1: - mask = np.apply_along_axis(np.all,axis,self.data.mask[ind]) + ##mask = np.apply_along_axis(np.all,axis,self.data.mask[ind]) # YW + mask = np.all(self.data.mask[ind], axis) # YW; faster integral = np.ma.masked_array(integral,mask=mask,copy=False) # handle units diff --git a/src/ILAMB/ilamblib.py b/src/ILAMB/ilamblib.py index 4ec9bc50..2153d63b 100644 --- a/src/ILAMB/ilamblib.py +++ b/src/ILAMB/ilamblib.py @@ -779,6 +779,11 @@ def FromNetCDF4(filename,variable_name,alternate_vars=[],t0=None,tf=None,group=N depth_bnd_name = grp.variables[depth_name].bounds if (depth_name in grp.variables and "bounds" in grp.variables[depth_name].ncattrs()) else None if depth_bnd_name not in grp.variables: depth_bnd_name = None + if depth_bnd_name is None: # YW; soil moisture data set setup + dummy = [d for d in grp.variables.keys() if depth_name in d and ("bound" in d or "bnd" in d)] + if len(dummy) > 0: + depth_bnd_name = dummy[0] + elif len(missed) >= 1: raise ValueError("Ambiguous choice of values for the layered dimension [%s] in %s" % (",".join(missed),filename)) else: @@ -1318,9 +1323,9 @@ def AnalysisMeanStateSpace(ref,com,**keywords): Parameters ---------- - obs : ILAMB.Variable.Variable + ref : ILAMB.Variable.Variable the observational (reference) variable - mod : ILAMB.Variable.Variable + com : ILAMB.Variable.Variable the model (comparison) variable regions : list of str, optional the regions overwhich to apply the analysis @@ -1406,9 +1411,6 @@ def AnalysisMeanStateSpace(ref,com,**keywords): ref_timeint.name = "timeint_of_%s" % name - print(benchmark_dataset) # DEBUG - print(ref_timeint.name) # DEBUG - ref_timeint.toNetCDF4(benchmark_dataset,group="MeanState") for region in regions: From f59de76939d33ea492374be93b1fcf39d0ea6063 Mon Sep 17 00:00:00 2001 From: Yaoping Wang Date: Mon, 20 Sep 2021 16:52:31 -0400 Subject: [PATCH 08/18] un-edited ConfNBP --- src/ILAMB/ConfNBP.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/ILAMB/ConfNBP.py b/src/ILAMB/ConfNBP.py index 68a04803..b9354208 100644 --- a/src/ILAMB/ConfNBP.py +++ b/src/ILAMB/ConfNBP.py @@ -264,8 +264,7 @@ def NBPplot(V,vmin,vmax,colors,fname): Y = []; L = [] for key in V: if key == "Benchmark": continue - # 2021/04/03 YW Seems uncessary? - ##if V[key].time[0] > V["Benchmark"].time[0]+10: continue + if V[key].time[0] > V["Benchmark"].time[0]+10: continue L.append(key) Y.append(V[key].data[-1]) Y = np.asarray(Y); L = np.asarray(L) From 0d2314eb6cf292f701a056ff7f2877177e77d492 Mon Sep 17 00:00:00 2001 From: Yaoping Wang Date: Tue, 21 Sep 2021 00:09:05 -0400 Subject: [PATCH 09/18] begin sensitivity addition --- bin/ilamb-run | 64 +++++++++++++++++++++++++++++++++++ src/ILAMB/ConfSoilMoisture.py | 11 ++++++ src/ILAMB/Scoreboard.py | 2 ++ src/ILAMB/Sensitivity.py | 3 ++ 4 files changed, 80 insertions(+) create mode 100644 src/ILAMB/Sensitivity.py diff --git a/bin/ilamb-run b/bin/ilamb-run index d20c6004..36643d6f 100644 --- a/bin/ilamb-run +++ b/bin/ilamb-run @@ -74,6 +74,7 @@ def InitializeModels(model_root,models=[],verbose=False,filter="",regex="",model if log: logger.debug("[%s]" % mname,format_exc()) continue M.append(m) + max_model_name_len = max(max_model_name_len,len(mname)) break M = sorted(M,key=lambda m: m.name.upper()) @@ -248,6 +249,65 @@ def MatchRelationshipConfrontation(C): found = True return C +def MatchSensitivityConfrontation(C): + # YW + """Match sensitivity strings to confrontation longnames + + We allow for sensitivity maps to be studied by specifying the + sensitivity longname in the configure file. This routine loops + over all defined sensitivity maps and finds the matching + confrontation. + + Parameters + ---------- + C : list of ILAMB.Confrontation.Confrontation + the confrontation list + + Returns + ------- + C : list of ILAMB.Confrontation.Confrontation + the same list with sensitivity maps linked to confrontations. + """ + for c in C: + if not hasattr(c, "sensitivities"): continue + if c.sensitivities is None: continue + for i,longname in enumerate(c.sensitivities): + found = False + for cor in C: + if longname.lower() == cor.longname.lower(): + c.sensitivities[i] = cor + found = True + return C + +def MatchConfrontation(C): + """Match relationship strings to confrontation longnames + + We allow for relationships to be studied by specifying the + confrontation longname in the configure file. This routine loops + over all defined relationships and finds the matching + confrontation. (NOTE: this really belongs inside the Scoreboard + object) + + Parameters + ---------- + C : list of ILAMB.Confrontation.Confrontation + the confrontation list + + Returns + ------- + C : list of ILAMB.Confrontation.Confrontation + the same list with relationships linked to confrontations + """ + for c in C: + if c.relationships is None: continue + for i,longname in enumerate(c.relationships): + found = False + for cor in C: + if longname.lower() == cor.longname.lower(): + c.relationships[i] = cor + found = True + return C + def FilterConfrontationList(C,match_list): """Filter the confrontation list @@ -628,6 +688,10 @@ S = Scoreboard(args.config[0], C = MatchRelationshipConfrontation(S.list()) +C = MatchSensitivityConfrontation(C) # YW + +import pdb; pdb.set_trace() + if len(args.study_limits) == 2: args.study_limits[1] += 1 for c in C: c.study_limits = (np.asarray(args.study_limits)-1850)*365. diff --git a/src/ILAMB/ConfSoilMoisture.py b/src/ILAMB/ConfSoilMoisture.py index e61b222e..64c81645 100644 --- a/src/ILAMB/ConfSoilMoisture.py +++ b/src/ILAMB/ConfSoilMoisture.py @@ -276,6 +276,7 @@ def confront(self,m): '... %.2f-%.2f' % (z0, zf)) # DEBUG if obs.spatial: + # Calculate mean state il.AnalysisMeanStateSpace(obs, mod, dataset = fcm.mod_dset, regions = self.regions, benchmark_dataset = fcm.obs_dset, @@ -287,6 +288,13 @@ def confront(self,m): skip_cycle = skip_cycle, mass_weighting = mass_weighting, rmse_score_basis = rmse_score_basis) + + # Calculate standard deviation state + + + # Calculate trend state + + else: il.AnalysisMeanStateSites(obs, mod, dataset = fcm.mod_dset, regions = self.regions, @@ -298,6 +306,9 @@ def confront(self,m): skip_iav = skip_iav, skip_cycle = skip_cycle, mass_weighting = mass_weighting) + + # Calculate sensitivity by partial correlation - Need an option + fcm.mod_dset.setncattr("complete",1) if self.master: fcm.obs_dset.setncattr("complete",1) logger.info("[%s][%s] Success" % (self.longname,m.name)) diff --git a/src/ILAMB/Scoreboard.py b/src/ILAMB/Scoreboard.py index 9715a57f..4fbf957d 100644 --- a/src/ILAMB/Scoreboard.py +++ b/src/ILAMB/Scoreboard.py @@ -44,6 +44,7 @@ def __init__(self, name): self.plot_unit = None self.space_mean = True self.relationships = None + self.sensitivities = None # YW self.ctype = None self.regions = None self.skip_rmse = False @@ -102,6 +103,7 @@ def _to_bool(a): node.space_mean = _to_bool(node.space_mean) if node.regions is not None: node.regions = node.regions.split(",") if node.relationships is not None: node.relationships = node.relationships.split(",") + if node.sensitivities is not None: node.sensitivities = node.sensitivities.split(",") if node.alternate_vars is not None: node.alternate_vars = node.alternate_vars.split(",") else: diff --git a/src/ILAMB/Sensitivity.py b/src/ILAMB/Sensitivity.py new file mode 100644 index 00000000..aee1f0af --- /dev/null +++ b/src/ILAMB/Sensitivity.py @@ -0,0 +1,3 @@ +class Sensitivity(object): + def __init__(self): + pass From 841fcda2de128bde8d4cd087d975f954c32f3248 Mon Sep 17 00:00:00 2001 From: Yaoping Wang Date: Sun, 17 Oct 2021 00:18:33 -0400 Subject: [PATCH 10/18] trend & sensitivity WIP --- bin/ilamb-run | 7 +- src/ILAMB/ConfSoilMoisture.py | 621 ++++++++++++++++++++++++------- src/ILAMB/Confrontation.py | 3 +- src/ILAMB/Variable.py | 466 ++++++++++++++++++++++- src/ILAMB/constants.py | 57 +++ src/ILAMB/ilamblib.py | 377 ++++++++++++++++++- test/test_SoilMoisture.py | 109 ++++++ test/test_partial_corr_tensor.py | 191 ++++++++++ test/test_trend_tensor.py | 203 ++++++++++ 9 files changed, 1900 insertions(+), 134 deletions(-) create mode 100644 test/test_SoilMoisture.py create mode 100644 test/test_partial_corr_tensor.py create mode 100644 test/test_trend_tensor.py diff --git a/bin/ilamb-run b/bin/ilamb-run index 36643d6f..31007195 100644 --- a/bin/ilamb-run +++ b/bin/ilamb-run @@ -279,8 +279,10 @@ def MatchSensitivityConfrontation(C): found = True return C +""" +# YW: Appears to duplicate MatchRelationshipConfrontation def MatchConfrontation(C): - """Match relationship strings to confrontation longnames + """ """Match relationship strings to confrontation longnames We allow for relationships to be studied by specifying the confrontation longname in the configure file. This routine loops @@ -297,7 +299,7 @@ def MatchConfrontation(C): ------- C : list of ILAMB.Confrontation.Confrontation the same list with relationships linked to confrontations - """ + """ """ for c in C: if c.relationships is None: continue for i,longname in enumerate(c.relationships): @@ -307,6 +309,7 @@ def MatchConfrontation(C): c.relationships[i] = cor found = True return C +""" def FilterConfrontationList(C,match_list): """Filter the confrontation list diff --git a/src/ILAMB/ConfSoilMoisture.py b/src/ILAMB/ConfSoilMoisture.py index 64c81645..50eb3c48 100644 --- a/src/ILAMB/ConfSoilMoisture.py +++ b/src/ILAMB/ConfSoilMoisture.py @@ -15,6 +15,7 @@ from .Confrontation import Confrontation import numpy as np import time # DEBUG +from copy import deepcopy import logging @@ -27,6 +28,16 @@ def __init__(self,**keywords): # Calls the regular constructor super(ConfSoilMoisture,self).__init__(**keywords) + # Trend State page // insert before the Data Information page + pages.insert(-2, post.HtmlPage('TrendState', 'Trend State')) + pages[-2].setHeader('CNAME / RNAME / MNAME') + pages[-2].setSections(['Trend over the period in space', + 'Trend of the regional mean over the period']) + if self.sensitivities is not None: + pages.insert(-2, post.HtmlPage('Sensitivities', 'Partial Correlation Relationships')) + pages[-2].setHeader('CNAME / RNAME / MNAME') + pages[-2].setSections(list(self.sensitivities)) + # Get/modify depths with Dataset(self.source) as dset: v = dset.variables[self.variable] @@ -249,6 +260,57 @@ def _addDepth(v): yield obs, mod, z0, zf + def stageRef(self,m): + """Extract the data that will be done partial correlations with soil moisture data.""" + + # Check the order of magnitude of the data and convert to help avoid roundoff errors + def _reduceRoundoffErrors(var): + if "s-1" in var.unit: return var.convert(var.unit.replace("s-1","d-1")) + if "kg" in var.unit: return var.convert(var.unit.replace("kg" ,"g" )) + return var + + def _getOrder(var): + return np.log10(np.abs(var.data).clip(1e-16)).mean() + order = _getOrder(obs) + count = 0 + + obs_list = [] + mod_list = [] + for sens in self.sensitivities: + obs = Variable(filename = sens.source, + variable_name = sens.variable, + alternate_vars = sens.alternate_vars, + t0 = None if len(self.study_limits) != 2 else self.study_limits[0], + tf = None if len(self.study_limits) != 2 else self.study_limits[1]) + if obs.time is None: raise il.NotTemporalVariable() + self.pruneRegions(obs) + + # Try to extract a commensurate quantity from the model + mod = m.extractTimeSeries(sens.variable, + alt_vars = sens.alternate_vars, + expression = sens.derived, + initial_time = obs.time_bnds[ 0,0], + final_time = obs.time_bnds[-1,1], + lats = None if obs.spatial else obs.lat, + lons = None if obs.spatial else obs.lon) + obs,mod = il.MakeComparable(obs,mod, + mask_ref = True, + clip_ref = True, + extents = self.extents, + logstring = "[%s][%s]" % (self.longname,m.name)) + + while order < -2 and count < 2: + obs = _reduceRoundoffErrors(obs) + order = _getOrder(obs) + count += 1 + + # convert the model data to the same unit + mod = mod.convert(obs.unit) + obs_list.append(obs) + mod_list.append(mod) + return obs_list, mod_list + + def confront(self,m): mod_file = os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)) obs_file = os.path.join(self.output_path,"%s_Benchmark.nc" % (self.name, )) @@ -277,7 +339,7 @@ def confront(self,m): if obs.spatial: # Calculate mean state - il.AnalysisMeanStateSpace(obs, mod, dataset = fcm.mod_dset, + il.AnalysisMeanStateSpace(obs, mod, dataset = fcm.mod_dset, regions = self.regions, benchmark_dataset = fcm.obs_dset, table_unit = self.table_unit, @@ -289,12 +351,15 @@ def confront(self,m): mass_weighting = mass_weighting, rmse_score_basis = rmse_score_basis) - # Calculate standard deviation state - - # Calculate trend state - - + il.AnalysisTrendStateSpace(obs, mod, dataset = fcm.mod_dset, + regions = self.regions, + benchmark_dataset = fcm.obs_dset, + table_unit = self.table_unit, + plot_unit = self.plot_unit, + space_mean = self.space_mean, + skip_cycle = skip_cycle, + mass_weighting = mass_weighting) else: il.AnalysisMeanStateSites(obs, mod, dataset = fcm.mod_dset, regions = self.regions, @@ -306,8 +371,48 @@ def confront(self,m): skip_iav = skip_iav, skip_cycle = skip_cycle, mass_weighting = mass_weighting) - - # Calculate sensitivity by partial correlation - Need an option + # !!! TO-DO: Add AnalysisTrendStateSites + + # Calculate sensitivity by partial correlation + if self.sensitivities is not None: + obs_comparable = deepcopy(obs) + mod_comparable = deepcopy(mod) + + obs_indep_list = [] + mod_indep_list = [] + for indep in self.sensitivities: + obs_indep, mod_indep = self.stageRef(indep) + obs_comparable, obs_indep = il.MakeComparable(obs_comparable, obs_indep, + mask_ref = True, clip_ref = True, + extents = self.extents, + logstring = "[%s][%s]MakeComparable" % \ + (obs.variable_name, obs_indep.variable_name)) + mod_comparable, mod_indep = il.MakeComparable(mod_comparable, mod_indep, + mask_ref = True, clip_ref = True, + extents = self.extents, + logstring = "[%s][%s]MakeComparable" % \ + (mod.variable_name, mod_indep.variable_name)) + obs_indep_list.append(obs_indep) + mod_indep_list.append(mod_indep) + # (second pass) + for k, obs_indep, mod_indep in zip(range(len(obs_indep_list)), + obs_indep_list, mod_indep_list): + obs_comparable, obs_indep = il.MakeComparable(obs_comparable, obs_indep, + mask_ref = True, clip_ref = True, + extents = self.extents, + logstring = "[%s][%s]MakeComparable" % \ + (obs.variable_name, obs_indep.variable_name)) + mod_comparable, mod_indep = il.MakeComparable(mod_comparable, mod_indep, + mask_ref = True, clip_ref = True, + extents = self.extents, + logstring = "[%s][%s]MakeComparable" % \ + (mod.variable_name, mod_indep_variable_name)) + + if obs.spatial: + il.AnalysisPartialCorrSpace(obs_comparable, mod_comparable, obs_indep_list, mod_indep_list) + else: + # !!! TO-DO: Add AnalysisPartialCorrSites + pass fcm.mod_dset.setncattr("complete",1) if self.master: fcm.obs_dset.setncattr("complete",1) @@ -467,6 +572,146 @@ def _alphabeticalBenchmarkFirst(key): plt.close() + # Get the HTML page + page = [page for page in self.layout.pages if "TrendState" in page.name][0] + + models = [] + colors = [] + corr = {} + cycle = {} + has_cycle = False + has_std = False + for fname in glob.glob(os.path.join(self.output_path,"*.nc")): + dataset = Dataset(fname) + if "TrendState" not in dataset.groups: continue + dset = dataset.groups["TrendState"] + models.append(dataset.getncattr("name")) + colors.append(dataset.getncattr("color")) + for region in self.regions: + if region not in cycle: cycle[region] = {} + + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind,1] + zstr = '%.2f-%.2f' % (z0, zf) + + if zstr not in cycle[region]: cycle[region][zstr] = [] + + key = [v for v in dset.variables.keys() if ("trend_cycle_" in v and zstr in v and region in v)] + if len(key)>0: + has_cycle = True + cycle[region][zstr].append(Variable(filename=fname,groupname="TrendState", + variable_name=key[0])) + + if zstr not in std[region]: std[region][zstr] = [] + if zstr not in corr[region]: corr[region][zstr] = [] + + key = [] + if "scalars" in dset.groups: + key = [v for v in dset.groups["scalars"].variables.keys() \ + if ("Spatial Distribution Score" in v and zstr in v and region in v)] + if len(key) > 0: + has_std = True + sds = dset.groups["scalars"].variables[key[0]] + corr[region][zstr].append(sds.getncattr("R" )) + std [region][zstr].append(sds.getncattr("std")) + + # composite annual cycle plot + if has_cycle and len(models) > 0: + page.addFigure("Trend of the regional mean over the period", + "comptrendcycle", + "RNAME_comptrendcycle.png", + side = "ANNUAL CYCLE OF TREND", + legend = False) + + for region in self.regions: + if region not in cycle: continue + fig, axes = plt.subplots(self.depths.shape[0], 1, + figsize = (6.5, 2.8*self.depths.shape[0]), + sharex = True, sharey = True) + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind, 1] + zstr = '%.2f-%.2f' % (z0, zf) + + if self.depths.shape[0] == 1: + ax = axes + else: + ax = axes.flat[dind] + + for name,color,var in zip(models,colors,cycle[region][zstr]): + dy = 0.05*(self.limits["cycle"][region]["max"] - \ + self.limits["cycle"][region]["min"]) + + var.plot(ax, lw=2, color=color, label=name, + ticks = time_opts["cycle"]["ticks"], + ticklabels = time_opts["cycle"]["ticklabels"], + vmin = self.limits["cycle"][region]["min"]-dy, + vmax = self.limits["cycle"][region]["max"]+dy) + ylbl = post.UnitStringToMatplotlib(var.unit) + ax.set_ylabel(ylbl) + ax.set_title(zstr + ' '+ self.depths_units) + fig.savefig(os.path.join(self.output_path,"%s_comptrendcycle.png" % (region))) + plt.close() + + # plot legends with model colors (sorted with Benchmark data on top) + page.addFigure("Trend of the regional mean over the period", + "legend_comptrendcycle", + "legend_comptrendcycle.png", + side = "MODEL COLORS", + legend = False) + def _alphabeticalBenchmarkFirst(key): + key = key[0].lower() + if key == "BENCHMARK": return "A" + return key + tmp = sorted(zip(models,colors),key=_alphabeticalBenchmarkFirst) + fig,ax = plt.subplots() + for model,color in tmp: + ax.plot(0,0,'o',mew=0,ms=8,color=color,label=model) + handles,labels = ax.get_legend_handles_labels() + plt.close() + + ncol = np.ceil(float(len(models))/11.).astype(int) + if ncol > 0: + fig,ax = plt.subplots(figsize=(3.*ncol,2.8),tight_layout=True) + ax.legend(handles,labels,loc="upper right",ncol=ncol,fontsize=10,numpoints=1) + ax.axis(False) + fig.savefig(os.path.join(self.output_path,"legend_comptrendcycle.png")) + plt.close() + + ## !!! TO-DO? Implement the std calculation for spatial trends. + ## spatial distribution Taylor plot + #if has_std: + # page.addFigure("Temporally integrated period mean", + # "spatial_variance", + # "RNAME_spatial_variance.png", + # side = "SPATIAL TAYLOR DIAGRAM", + # legend = False) + # page.addFigure("Temporally integrated period mean", + # "legend_spatial_variance", + # "legend_spatial_variance.png", + # side = "MODEL COLORS", + # legend = False) + #if "Benchmark" in models: colors.pop(models.index("Benchmark")) + #for region in self.regions: + # if not (region in std and region in corr): continue + # + # fig = plt.figure(figsize=(12.0,12.0)) + # for dind, z0 in enumerate(self.depths[:,0]): + # zf = self.depths[dind, 1] + # zstr = '%.2f-%.2f' % (z0, zf) + # + # if not (zstr in std[region] and zstr in corr[region]): continue + # if len(std[region][zstr]) != len(corr[region][zstr]): continue + # if len(std[region][zstr]) == 0: continue + # ax, aux = post.TaylorDiagram(np.asarray(std[region][zstr]), + # np.asarray(corr[region][zstr]), + # 1.0,fig,colors,True,220+dind+1) + # ax.set_title(zstr + ' ' + self.depths_units) + # fig.savefig(os.path.join(self.output_path, + # "%s_spatial_variance.png" % (region))) + # plt.close() + + + def modelPlots(self,m): """For a given model, create the plots of the analysis results. @@ -477,73 +722,46 @@ def modelPlots(self,m): """ self._relationship(m) + self._sensitivity(m) bname = os.path.join(self.output_path,"%s_Benchmark.nc" % (self.name )) fname = os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)) if not os.path.isfile(bname): return if not os.path.isfile(fname): return # get the HTML page - page = [page for page in self.layout.pages if "MeanState" in page.name][0] - - with Dataset(fname) as dataset: - group = dataset.groups["MeanState"] - variables = getVariableList(group) - color = dataset.getncattr("color") - for vname in variables: - # The other depths will be handled in plotting - zstr_0 = '%.2f-%.2f' % (self.depths[0,0], self.depths[0,1]) - if not zstr_0 in vname: continue - - # is this a variable we need to plot? - pname = vname.split("_")[0] - if group.variables[vname][...].size <= 1: continue - var = Variable(filename=fname,groupname="MeanState",variable_name=vname) - - if (var.spatial or (var.ndata is not None)) and not var.temporal: - - # grab plotting options - if pname not in self.limits.keys(): continue - if pname not in space_opts: continue - opts = space_opts[pname] - - ##print('... is used in space_opts') # DEBUG - - # add to html layout - page.addFigure(opts["section"], - pname, - opts["pattern"], - side = opts["sidelbl"], - legend = opts["haslegend"]) - - # plot variable - for region in self.regions: - nax = self.depths.shape[0] - fig = plt.figure() - for dind, z0 in enumerate(self.depths[:,0]): - zf = self.depths[dind,1] - zstr = '%.2f-%.2f' % (z0, zf) - var2 = Variable(filename=fname, groupname = "MeanState", - variable_name=vname.replace(zstr_0, zstr)) - ax = var2.plot(None, fig, nax, region = region, - vmin = self.limits[pname]["min"], - vmax = self.limits[pname]["max"], - cmap = self.limits[pname]["cmap"]) - ax.set_title(zstr + ' ' + self.depths_units) - fig.savefig(os.path.join(self.output_path, - "%s_%s_%s.png" % (m.name,region,pname))) - plt.close() - - # Jumping through hoops to get the benchmark plotted and in the html output - if self.master and (pname == "timeint" or pname == "phase" or pname == "iav"): + for grp in ['MeanState', 'TrendState']: + page = [page for page in self.layout.pages if grp in page.name][0] + + with Dataset(fname) as dataset: + group = dataset.groups[grp] + variables = getVariableList(group) + color = dataset.getncattr("color") + for vname in variables: + # The other depths will be handled in plotting + zstr_0 = '%.2f-%.2f' % (self.depths[0,0], self.depths[0,1]) + if not zstr_0 in vname: continue + + # is this a variable we need to plot? + pname = vname.split("_")[0] + if group.variables[vname][...].size <= 1: continue + var = Variable(filename=fname,groupname=grp,variable_name=vname) + + if (var.spatial or (var.ndata is not None)) and not var.temporal: + + # grab plotting options + if pname not in self.limits.keys(): continue + if pname not in space_opts: continue opts = space_opts[pname] - + + ##print('... is used in space_opts') # DEBUG + # add to html layout page.addFigure(opts["section"], - "benchmark_%s" % pname, - opts["pattern"].replace("MNAME","Benchmark"), - side = opts["sidelbl"].replace("MODEL","BENCHMARK"), - legend = True) - + pname, + opts["pattern"], + side = opts["sidelbl"], + legend = opts["haslegend"]) + # plot variable for region in self.regions: nax = self.depths.shape[0] @@ -551,67 +769,96 @@ def modelPlots(self,m): for dind, z0 in enumerate(self.depths[:,0]): zf = self.depths[dind,1] zstr = '%.2f-%.2f' % (z0, zf) - obs = Variable(filename=bname,groupname="MeanState", - variable_name=vname.replace(zstr_0, zstr)) - ax = obs.plot(None, fig, nax, region = region, - vmin = self.limits[pname]["min"], - vmax = self.limits[pname]["max"], - cmap = self.limits[pname]["cmap"]) + var2 = Variable(filename=fname, groupname = grp, + variable_name=vname.replace(zstr_0, zstr)) + ax = var2.plot(None, fig, nax, region = region, + vmin = self.limits[pname]["min"], + vmax = self.limits[pname]["max"], + cmap = self.limits[pname]["cmap"]) ax.set_title(zstr + ' ' + self.depths_units) - fig.savefig(os.path.join(self.output_path,"Benchmark_%s_%s.png" % (region,pname))) + fig.savefig(os.path.join(self.output_path, + "%s_%s_%s.png" % (m.name,region,pname))) plt.close() - - if not (var.spatial or (var.ndata is not None)) and var.temporal: - # grab the benchmark dataset to plot along with - try: - obs = Variable(filename=bname,groupname="MeanState", - variable_name=vname).convert(var.unit) - except: - continue - - # grab plotting options - if pname not in time_opts: continue - opts = time_opts[pname] - - # add to html layout - page.addFigure(opts["section"], - pname, - opts["pattern"], - side = opts["sidelbl"], - legend = opts["haslegend"]) - - # plot variable - for region in self.regions: - if region not in vname: continue - fig, axes = plt.subplots(self.depths.shape[0], 1, - figsize = (6.5, 2.8*self.depths.shape[0]), - sharex = True, sharey = True) - for dind, z0 in enumerate(self.depths[:,0]): - zf = self.depths[dind,1] - zstr = '%.2f-%.2f' % (z0, zf) - if self.depths.shape[0] == 1: - ax = axes - else: - ax = axes.flat[dind] - - var2 = Variable(filename=fname, groupname = "MeanState", - variable_name=vname.replace(zstr_0, zstr)) - obs = Variable(filename=bname,groupname="MeanState", - variable_name=vname.replace(zstr_0, zstr)).convert(var2.unit) - obs.plot(ax, lw = 2, color = 'k', alpha = 0.5) - var2.plot(ax, lw = 2, color = color, label = m.name, - ticks =opts["ticks"], - ticklabels=opts["ticklabels"]) - dy = 0.05*(self.limits[pname][region]["max"]-self.limits[pname][region]["min"]) - ax.set_ylim(self.limits[pname][region]["min"]-dy, - self.limits[pname][region]["max"]+dy) - ylbl = opts["ylabel"] - if ylbl == "unit": ylbl = post.UnitStringToMatplotlib(var.unit) - ax.set_ylabel(ylbl) - ax.set_title(zstr + ' ' + self.depths_units) - fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (m.name,region,pname))) - plt.close() - + + # Jumping through hoops to get the benchmark plotted and in the html output + if self.master and (pname == "timeint" or pname == "phase" or pname == "iav"): + opts = space_opts[pname] + + # add to html layout + page.addFigure(opts["section"], + "benchmark_%s" % pname, + opts["pattern"].replace("MNAME","Benchmark"), + side = opts["sidelbl"].replace("MODEL","BENCHMARK"), + legend = True) + + # plot variable + for region in self.regions: + nax = self.depths.shape[0] + fig = plt.figure() + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind,1] + zstr = '%.2f-%.2f' % (z0, zf) + obs = Variable(filename=bname,groupname=grp, + variable_name=vname.replace(zstr_0, zstr)) + ax = obs.plot(None, fig, nax, region = region, + vmin = self.limits[pname]["min"], + vmax = self.limits[pname]["max"], + cmap = self.limits[pname]["cmap"]) + ax.set_title(zstr + ' ' + self.depths_units) + fig.savefig(os.path.join(self.output_path,"Benchmark_%s_%s.png" % (region,pname))) + plt.close() + + if not (var.spatial or (var.ndata is not None)) and var.temporal: + # grab the benchmark dataset to plot along with + try: + obs = Variable(filename=bname,groupname=grp, + variable_name=vname).convert(var.unit) + except: + continue + + # grab plotting options + if pname not in time_opts: continue + opts = time_opts[pname] + + # add to html layout + page.addFigure(opts["section"], + pname, + opts["pattern"], + side = opts["sidelbl"], + legend = opts["haslegend"]) + + # plot variable + for region in self.regions: + if region not in vname: continue + fig, axes = plt.subplots(self.depths.shape[0], 1, + figsize = (6.5, 2.8*self.depths.shape[0]), + sharex = True, sharey = True) + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind,1] + zstr = '%.2f-%.2f' % (z0, zf) + if self.depths.shape[0] == 1: + ax = axes + else: + ax = axes.flat[dind] + + var2 = Variable(filename=fname, groupname = grp, + variable_name=vname.replace(zstr_0, zstr)) + obs = Variable(filename=bname,groupname=grp, + variable_name=vname.replace(zstr_0, zstr)).convert(var2.unit) + obs.plot(ax, lw = 2, color = 'k', alpha = 0.5) + var2.plot(ax, lw = 2, color = color, label = m.name, + ticks =opts["ticks"], + ticklabels=opts["ticklabels"]) + dy = 0.05*(self.limits[pname][region]["max"]-self.limits[pname][region]["min"]) + ax.set_ylim(self.limits[pname][region]["min"]-dy, + self.limits[pname][region]["max"]+dy) + ylbl = opts["ylabel"] + if ylbl == "unit": ylbl = post.UnitStringToMatplotlib(var.unit) + ax.set_ylabel(ylbl) + ax.set_title(zstr + ' ' + self.depths_units) + fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (m.name,region,pname))) + plt.close() + logger.info("[%s][%s] Success" % (self.longname,m.name)) def _relationship(self,m,nbin=25): @@ -646,7 +893,7 @@ def _applyRefMask(ref,com): lon=com.lon,lon_bnds=com.lon_bnds) com.data.mask += tmp.data.mask return com - + def _checkLim(data,lim): if lim is None: lim = [min(data.min(),data.min()), @@ -856,7 +1103,6 @@ def _plotFunction(ref_mean_list,ref_std_list,com_mean_list,com_std_list, except: continue - # Add figures to the html page page.addFigure(c.longname, "benchmark_rel_%s" % ind_name, @@ -879,7 +1125,6 @@ def _plotFunction(ref_mean_list,ref_std_list,com_mean_list,com_std_list, legend = False, benchmark = False) - # Analysis over regions lim_dep = [dep_min,dep_max] lim_ind = [ind_min,ind_max] @@ -969,3 +1214,123 @@ def _plotFunction(ref_mean_list,ref_std_list,com_mean_list,com_std_list, Variable(name = sname, unit = "1", data = score).toNetCDF4(results,group="Relationships") + + + def _sensitivities(self, m): + # If there are no sensitivities to analyze, get out of here + if self.sensitivities is None: return + + def _retrieveCorr(cname, filename): + # Grab by depth!!!!!!!!! + with Dataset(filename,mode="r") as dset: + key = [v for v in dset.groups["Sensitivies"].variables.keys() \ + if "partial_correlation_" in v and cname in v] + key2 = [v for v in dset.groups["Sensitivies"].variables.keys() \ + if "partial_pvalue_" in v and cname in v] + return Variable(filename = filename, + groupname = "Sensitivities", + variable_name = key[0]), \ + Variable(filename = filename, + groupname = "Sensitivities", + variable_name = key[1]) + + def _retrieveBias(cname, filename): + with Dataset(filename,mode="r") as dset: + key = [v for v in dset.groups["Sensitivies"].variables.keys() \ + if "sensitivity_bias_map_" in v and cname in v] + key2 = [v for v in dset.groups["Sensitivies"].variables.keys() \ + if "sensitivity_biasscore_map_" in v and cname in v] + return Variable(filename = filename, + groupname = "Sensitivities", + variable_name = key[0]), \ + Variable(filename = filename, + groupname = "Sensitivities", + variable_name = key[1]) + + def _plotMap(var, pval, vminmax, title, fout): + # !!! Need to determine projection + + fig, axes = plt.figure() + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind,1] + zstr = '%.2f-%.2f' % (z0, zf) + ax = axes.flat[dind] + cf = ax.contourf(var.lon, var.lat, var, + vmin = vminmax[0], vmax = vminmax[1], + cmap = 'Spectral_r') + ax.set_title(zstr + ' ' + self.depths_units) + fig.colorbar(cf, cax = fig.add_axes([0.97, 0.1, 0.02, 0.8]), + orientation="vertical",label=title) + fig.savefig(fout) + plt.close() + + + # Get the HTML page + page = [page for page in self.layout.pages if "Sensitivities" in page.name] + if len(page) == 0: return + page = page[0] + + with Dataset(os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)), + mode="r+") as results: + # Grab/create a sensitivity and scalars group + group = None + if "Sensitivities" not in results.groups: + return + else: + group = results.groups["Sensitivities"] + if "scalars" not in group.groups: + scalars = group.createGroup("scalars") + else: + scalars = group.groups["scalars"] + + # for each sensitivity relationship... + for c in self.sensitivities: + # try to get the sensitivity map from the model and obs + try: + ref_corr_list, ref_corr_p_list = _retrieveCorr(c.name, os.path.join(c.output_path,"%s_%s.nc" % (self.name,"Benchmark"))) + com_corr_list, com_corr_p_list = _retrieveCorr(c.name, os.path.join(c.output_path,"%s_%s.nc" % (self.name,m.name))) + com_bias_map_list, com_biasscore_map_list = _retrieveBias(c.name, os.path.join(c.output_path,"%s_%s.nc" % (self.name,m.name))) + ref_name = self.longname.split('/')[0] + ref_minmax_list = [(ref_corr.data.min(), ref_corr.data.max()) \ + for ref_corr in ref_corr_list] + com_name = c.longname.split('/')[0] + com_minmax_list = [(com_corr.data.min(), com_corr.data.max()) \ + for com_corr in com_corr_list] + except: + continue + + # Add figures to the html page + page.addFigure(c.longname, + "benchmark_sens_%s" % com_name, + "Benchmark_RNAME_sens_%s.png" % com_name, + legend = False, + benchmark = False) + page.addFigure(c.longname, + "sens_%s" % com_name, + "MNAME_RNAME_sens_%s.png" % com_name, + legend = False, + benchmark = False) + page.addFigure(c.longname, + "sens_diff_%s" % com_name, + "MNAME_RNAME_sens_diff_%s.png" % com_name, + legend = False, + benchmark = False) + + # Analysis over regions + ## for region in self.regions: ## !!! TO-DO: something other than None to accept + # Make the plots + _plotMap(ref_corr_list, ref_corr_p_list, ref_minmax_list, + "%s/%s, %s" % (ref_name, self.name, + post.UnitStringToMatplotlib(ref_corr.unit)), + os.path.join(self.output_path, + "%s_%s_rel_%s.png" % ("Benchmark",region,ref_name))) + _plotMap(com_corr_list, com_corr_p_list, com_minmax_list, + "%s/%s, %s" % (com_name, c.name, + post.UnitStringToMatplotlib(com_corr.unit)), + os.path.join(self.output_path, + "%s_%s_rel_%s.png" % ("Benchmark",region,com_name))) + _plotMap(com_bias_map_list, None, + "Bias, %s/%s, %s" % (ref_name, com_name, + post.UnitStringToMatplotlib(ref_corr.unit))) + _plotMap(com_biasscore_map_list, None, + "Bias Score, %s/%s" % (ref_name, com_name)) diff --git a/src/ILAMB/Confrontation.py b/src/ILAMB/Confrontation.py index ed592305..d2253ce3 100644 --- a/src/ILAMB/Confrontation.py +++ b/src/ILAMB/Confrontation.py @@ -143,6 +143,7 @@ def __init__(self,**keywords): self.plot_unit = keywords.get("plot_unit",None) self.space_mean = keywords.get("space_mean",True) self.relationships = keywords.get("relationships",None) + self.sensitivities = keywords.get("sensitivities",None) # YW self.keywords = keywords self.extents = np.asarray([[-90.,+90.],[-180.,+180.]]) self.study_limits = [] @@ -163,7 +164,7 @@ def __init__(self,**keywords): pages.append(post.HtmlPage("MeanState","Mean State")) pages[-1].setHeader("CNAME / RNAME / MNAME") pages[-1].setSections(["Temporally integrated period mean", - "Spatially integrated regional mean"]) + "Spatially integrated regional mean"]) # Datasites page self.hasSites = False diff --git a/src/ILAMB/Variable.py b/src/ILAMB/Variable.py index 46088836..890c87af 100644 --- a/src/ILAMB/Variable.py +++ b/src/ILAMB/Variable.py @@ -10,6 +10,9 @@ import numpy as np import matplotlib.pyplot as plt import warnings +from scipy.stats import linregress +import dask.array as dsa + def _shiftLon(lon): return (lon<=180)*lon + (lon>180)*(lon-360) + (lon<-180)*360 @@ -36,6 +39,92 @@ def _createBnds(x): x_bnds[ 0,0] = x[ 0] - 0.5*(x[ 1]-x[ 0]) x_bnds[-1,1] = x[-1] + 0.5*(x[-1]-x[-2]) return x_bnds + +def _olsTensor(Y, x): + """ Repeated calculation of linear regression in the spatial dimensions. + + Parameters + ---------- + Y : np.ma.array + The variable of interest. The first dimension will be assumed to be + time (replicate observations). + x : np.array or np.ma.array + The time variable of interest. If one-dimensional, will be propagated + to the dimensionality of Y. If having the same dimensionality as Y, + must be a masked array. + + Returns + ------- + r : np.ma.array + The trend. If x only has a time dimension, `r` is a scalar. + Otherwise, `r` has the same dimensionality as x[1:]. + p : np.ma.array + The two-sided p-values of the trend. If x only has a time + dimension, `p` is a scalar. Otherwise, `p` has the same + dimensionality as x[1:]. + """ + if type(Y) != np.ma.core.MaskedArray: + raise TypeError('Y must be a masked array') + if Y.shape[0] < 3: + raise ValueError('At least three observations are needed') + + if (type(x) != np.ma.core.MaskedArray) and (type(x) != np.ndarray): + raise TypeError('x must be either masked or ordinary numpy array') + if (not np.allclose(x.shape, Y.shape)) and (len(x.shape) != 1): + raise ValueError('x must be either 1-dimensional or has the same shape as Y') + + # homogenize the shape and mask of x and Y + if type(Y.mask) == bool: + Y.mask = np.full(Y.shape, Y.mask) + if type(x) == np.ma.core.MaskedArray: + if type(x.mask) == bool: + x.mask = np.full(x.shape, x.mask) + else: + x = np.ma.array(x, mask = np.full(x.shape, False)) + + orig_shape = Y.shape + Y = Y.reshape(Y.shape[0], 1, int(np.prod(Y.shape[1:]))) + if len(x.shape) != 1: + x = x.reshape(Y.shape) + else: + x = np.ma.array(np.broadcast_to(x.data.reshape(-1,1,1), Y.shape), + mask = np.broadcast_to(x.mask.reshape(-1,1,1), Y.shape)) + x = np.ma.array(x.data, mask = x.mask | Y.mask) + Y = np.ma.array(Y, mask = x.mask) + + # add constant term + x = np.ma.concatenate([np.ma.array(np.ones(Y.shape), mask = Y.mask), x], axis = 1) + + # calculate the regression coefficients; treating the masked points as if zero. + xx = np.where(x.mask == False, x.data, 0.) + yy = np.where(Y.mask == False, Y.data, 0.) + beta = np.einsum('ijk,jlk->ilk', + np.einsum('ijk,ljk->ilk', + np.linalg.pinv(np.einsum('ijk,ilk->jlk',xx,xx \ + ).transpose(2,0,1)).transpose(1,2,0), + xx), yy) + + # calculate the p-value + from scipy.stats import t + dof = np.sum(Y.mask == False, axis = 0) - 2 + resid = yy - np.einsum('ijk,jlk->ilk', xx, beta) + mse = np.sum(np.power(resid,2), axis=0) / dof + std = np.ma.sum(np.ma.power(x[:,[1],:] - \ + np.ma.mean(x[:,[1],:],axis=0,keepdims=True), 2), axis = 0) + tval = beta / np.sqrt(mse/std) + pval = 2 * t.sf(np.abs(tval), dof) + + # discard intercept & restore shape + beta = np.ma.array(beta[1,:], mask = np.sum(Y.mask==False, axis = 0)<3) + pval = np.ma.array(pval[1,:], mask = np.sum(Y.mask==False, axis = 0)<3) + if len(orig_shape) > 1: + beta = beta.reshape(orig_shape[1:]) + pval = pval.reshape(orig_shape[1:]) + else: + beta = float(beta.data) + pval = float(pval.data) + return beta, pval + class Variable: r"""A class for managing variables and their analysis. @@ -134,7 +223,13 @@ def __init__(self,**keywords): convert_calendar = keywords.get("convert_calendar",True) out = il.FromNetCDF4(filename,variable_name,alternate_vars,t0,tf,group=groupname,convert_calendar=convert_calendar,z0=z0,zf=zf) # YW - data,data_bnds,unit,name,time,time_bnds,lat,lat_bnds,lon,lon_bnds,depth,depth_bnds,cbounds,ndata,calendar,attr = out + data,data_bnds,unit,name,time,time_bnds,lat,lat_bnds,lon,lon_bnds,depth,depth_bnds,cbou def _retrieveData(filename): + key = None + with Dataset(filename,mode="r") as dset: + key = [v for v in dset.groups["MeanState"].variables.keys() if "timeint_" in v] + return Variable(filename = filename, + groupname = "MeanState", + variable_name = key[0])nds,ndata,calendar,attr = out # Add handling for some units which cf_units does not support unit = unit.replace("psu","1e-3") @@ -349,7 +444,7 @@ def integrateInTime(self,**keywords): # handle units unit = Unit(self.unit) - name = self.name + "_integrated_over_time" + name = self.name + "_integral_over_time" if mean: @@ -623,6 +718,133 @@ def _integrate(var,areas): depth_bnds = self.depth_bnds, name = name) + def trendInTime(self,**keywords): + r"""YW: Compute the grid-by-grid trend in the variable + over a given time period. Ignore missing values. + + Parameters + ---------- + t0 : float, optional + initial time in days since 1/1/1850 + tf : float, optional + final time in days since 1/1/1850 + mean: bool, optional + average over a year instead of summing + + Returns + ------- + trend : ILAMB.Variable.Variable + a Variable instance with the trend value along with the + appropriate name and unit change + + trend_p : ILAMB.Variable.Variable + a Variable instance with the p-value of the trend + """ + if not self.temporal: raise il.NotTemporalVariable() + t0 = keywords.get("t0",self.time_bnds[:,0].min()) + tf = keywords.get("tf",self.time_bnds[:,1].max()) + mean = keywords.get("mean",False) + + # find which time bounds are included even partially in the interval [t0,tf] + time_bnds = np.copy(self.time_bnds) + ind = np.where((t0time_bnds[:,0]))[0] + time_bnds[(t0>time_bnds[:,0])*(t0time_bnds[:,0])*(tf 1 and self.data.mask.size > 1: + ##mask = np.apply_along_axis(np.all,0,self.data.mask[ind]) + mask = np.all(self.data.mask[ind], 0) + data = np.ma.masked_array(self.data[ind],mask=mask,copy=False) + if self.data_bnds is not None: + data_bnds = np.ma.concatenate([self.data_bnds[...,0][ind], + self.data_bnds[...,1][ind]], axis = -1) + + # integrate the data by year + year = np.array([cf.num2date(i, "days since 1850-1-1 00:00:00","noleap").year \ + for i in (time_bnds[:,0] + time_bnds[:,1])*0.5]) + dt = (time_bnds[:,1]-time_bnds[:,0]) + # (expand this dt to the other dimensions of the data array (i.e. space or datasites)) + for i in range(self.data.ndim-1): dt = np.expand_dims(dt,axis=-1) + + np.seterr(over='ignore',under='ignore') + integral = [] + if self.data_bnds is not None: + integral_bnd = [] + for yy in np.unique(year): + ind2 = np.where(year == yy)[0] + temp = (data[ind2]*dt[ind2]).sum(axis=0, keepdims=True) + if self.data_bnds is not None: + temp_bnd = np.ma.concatenate([(data_bnds[...,0][ind2] * dt[ind2] \ + ).sum(axis=0, keepdims=True), + (data_bnds[...,1][ind2] * dt[ind2] \ + ).sum(axis=0, keepdims=True)], axis = -1) + if mean: + # divide thru by the non-masked amount of time + if self.data.mask.size > 1: + dt2 = (dt[ind2]*(mask[ind2]==0)).sum(axis=0) + else: + dt2 = dt[ind2].sum(axis=0) + temp = temp / dt2 + if self.data_bnds is not None: + temp_bnd[...,0] = temp_bnd[...,0] / dt2 + temp_bnd[...,1] = temp_bnd[...,1] / dt2 + integral.append(temp) + if self.data_bnds is not None: + integral_bnd.append(temp_bnd) + integral = np.ma.stack(integral, axis = 0) + if self.data_bnds is not None: + integral_bnd = np.ma.stack(integral_bnd, axis = 0) + np.seterr(over='raise',under='raise') + + if not mean: + # if not a mean, we need to potentially handle unit conversions + unit0 = Unit("d")*unit + unit = Unit(unit0.format().split()[-1]) + if not isinstance(integral.mask, np.ndarray): + if integral.mask == True: + integral=np.ma.masked_array(data=integral.data, + mask=np.ones(integral.shape,dtype='bool')) + else: + integral=np.ma.masked_array(data=integral.data, + mask=np.zeros(integral.shape,dtype='bool')) + unit0.convert(integral,unit,inplace=True) + if integral_bnd is not None: + unit0.convert(integral_bnd,unit,inplace=True) + + # calculate the trend and the significance + trend, trend_p = _olsTensor(integral, np.mean(time_bnds, axis = 1)) + if integral_bnd is not None: + trend_lower_bnd, trend_lower_bnd_p = _olsTensor(integral_bnd[0,...], + np.mean(time_bnds, axis =1)) + trend_upper_bnd, trend_upper_bnd_p = _olsTensor(integral_bnd[1,...], + np.mean(time_bnds, axis =1)) + + trend_bnd = np.ma.stack([trend_lower_bnd[np.newaxis, ...], + trend_upper_bnd[np.newaxis, ...]], axis = 0) + trend_bnd_p = np.ma.stack([trend_lower_bnd_p[np.newaxis, ...], + trend_upper_bnd_p[np.newaxis, ...]], axis = 0) + + # handle units + unit = Unit(self.unit + "/year") + name = self.name + "_trend_over_time" + + return Variable(data = trend, data_bnds = trend_bnd, + unit = "%s" % unit, name = name, + lat = self.lat, lat_bnds = self.lat_bnds, + lon = self.lon, lon_bnds = self.lon_bnds, + depth = self.depth, depth_bnds = self.depth_bnds, + area = self.area, ndata = self.ndata), \ + Variable(data = trend_p, data_bnds = trend_bnd_p, unit = None, + name = name.replace("trend", "trend_p"), + lat = self.lat, lat_bnds = self.lat_bnds, + lon = self.lon, lon_bnds = self.lon_bnds, + depth = self.depth, depth_bnds = self.depth_bnds, + area = self.area, ndata = self.ndata) + + def siteStats(self,region=None,weight=None,intabs=False): """Computes the mean and standard deviation of the variable over all data sites. @@ -692,6 +914,53 @@ def annualCycle(self): depth_bnds = self.depth_bnds, ndata = self.ndata) + def trendAnnualCycle(self): + """YW: Computes annual cycle of the linear trend for the variable. + + For each site/cell/depth in the variable, compute the annual cycle of linear trend. + + Returns + ------- + trend : ILAMB.Variable.Variable + The annual cycle trend values. + trend_p : ILAMB.Variable.Variable + The annual cycle trend significance values. + """ + if not self.temporal: raise il.NotTemporalVariable() + assert self.monthly + assert self.time.size > 11 + begin = np.argmin(self.time[:11]%365) + end = begin+int(self.time[begin:].size/12.)*12 + shp = (-1,12) + self.data.shape[1:] + v = self.data[begin:end,...].reshape(shp) + trend, trend_p = _olsTensor(v, np.arange(v.shape[0])) + return Variable(data = trend, + unit = self.unit + "/year", + name = "annual_cycle_trend_of_%s" % self.name, + time = mid_months, + time_bnds = np.asarray([bnd_months[:-1],bnd_months[1:]]).T, + lat = self.lat, + lat_bnds = self.lat_bnds, + lon = self.lon, + lon_bnds = self.lon_bnds, + area = self.area, + depth = self.depth, + depth_bnds = self.depth_bnds, + ndata = self.ndata), + Variable(data = trend_p, + unit = self.unit + "/year", + name = "annual_cycle_trend_p_of_%s" % self.name, + time = mid_months, + time_bnds = np.asarray([bnd_months[:-1],bnd_months[1:]]).T, + lat = self.lat, + lat_bnds = self.lat_bnds, + lon = self.lon, + lon_bnds = self.lon_bnds, + area = self.area, + depth = self.depth, + depth_bnds = self.depth_bnds, + ndata = self.ndata) + def timeOfExtrema(self,etype="max"): """Returns the time of the specified extrema. @@ -1529,6 +1798,199 @@ def _correlation(x,y,axes=None): time=out_time,time_bnds=out_time_bnds,ndata=out_ndata, lat=out_lat,lon=out_lon,area=out_area) + + def partialCorrelation(self, var_indep_list, ctype, region = None): + """Computes the correlation between variable and independent variables. + + Parameters + ---------- + var_indep_list: list of ILAMB.Variable.Variable + The variables with which we will compute partial correlation + ctype : str + The correlation type, one of {"spatial","temporal","spatiotemporal"}. + Currently only "temporal" is allowed. + region : str, optional + The region over which to perform a spatial correlation + + Notes + ----- + Need to better think about what correlation means when data + are masked. The sums ignore the data but then the number of + items *n* is not constant and should be reduced for masked + values. + """ + def _covarTensor(tensor3d): + """ Covariance matrix calculation for each data poinat along an + extra dimension, e.g., in space""" + if tensor3d.dtype != np.ndarray: + raise TypeError('Input must be numpy array.') + if len(tensor3d.shape) != 3: + raise TypeError('Input must have 3 dimensions.') + N = tensor3d.shape[0] + m1 = tensor3d - tensor3d.sum(0, keepdims=True) / N + y_out = np.einsum('ijk,ilk->jlk',m1,m1)/(N-1) + return y_out + + def _partialCorrTensor(x, y, covar_list): + """ Repeated calculation of partial correlation in the spatial dimensions. + + Parameters + ---------- + x : np.ma.array + The independent variable. The first dimension will be assumed to be + time (replicate observations). + y : np.ma.array + The dependent variable. y must have the same dimensionality as x. + covar_list : list of np.ma.array objects + The covariate variables. Each variable must have the same + dimensionality as x and y. + + Returns + ------- + r : np.ma.array + The partial correlation. If x only has a time dimension, `r` is a + a scalar. Otherwise, `r` has the same dimensionality as x[1:]. + p : np.ma.array + The two-sided p-values of the partial correlation. If x only has + a time dimension, `p` is a scalar. Otherwise, `p` has the same + dimensionality as x[1:]. + """ + if type(x) != np.ma.core.MaskedArray: + raise TypeError('x must be a masked array') + if type(y) != np.ma.core.MaskedArray: + raise TypeError('y must be a masked array') + for vv in covar_list: + if type(vv) != np.ma.core.MaskedArray: + raise ValueError('covar_list must be masked arrays') + if not np.allclose(x.shape, y.shape): + raise ValueError('x and y must be the same shape') + for vv in covar_list: + if not np.allclose(x.shape, vv.shape): + raise ValueError('x and covar_list must be the same shape') + if x.shape[0] < 3: + raise ValueError('At least three observations are needed') + + x0 = x.copy() + y0 = y.copy() + + orig_shape = x.shape + if len(orig_shape) == 1: + x = x.reshape(-1, 1, 1) # extra 2nd dimension for concat + y = y.reshape(-1, 1, 1) + covar_relist = [] + for vv in covar_list: + covar_relist.append(vv.reshape(-1, 1, 1)) + else: + new_shape = orig_shape[0], 1, np.prod(orig_shape[1:]) + x = x.reshape(new_shape) + y = y.reshape(new_shape) + covar_relist = [] + for vv in covar_list: + covar_relist.append(vv.reshape(new_shape)) + covar_list = covar_relist + covar_relist = []; del covar_relist + + data = np.ma.concatenate([x,y] + covar_list, axis = 1) + del x, y; covar_list = []; del covar_list + + # remove invalid points + retain_ind = np.any(np.all(data.mask == False, axis = 1), axis = 0) + if sum(retain_ind) == 0: + raise ValueError('At least one valid spatial data point is needed') + ## print(retain_ind) # DEBUG + data = data[:, :, retain_ind] + + # TO-DO: Need to think of a way to deal with the different number + # of data points in space. Right now it imposes the minimum + # overlapping number of valid data points. + drop_replica = np.all(np.all(data.mask == False, axis = 2), axis = 1) + ## print(drop_replica) # DEBUG + if sum(drop_replica) < 3: + raise ValueError('At least three valid observations are needed') + data = data[drop_replica, :, :] + + # calculate the partial correlation and significance (translated from pingouin) + V = _covar_tensor(data) + ##print(data.shape) # DEBUG + ##print(V.shape) # DEBUG + + Vi = np.linalg.pinv(V.transpose(2,0,1)).transpose(1,2,0) + D = np.zeros(Vi.shape) + for ii in np.arange(Vi.shape[0]): + D[ii,ii,:] = np.sqrt( 1 / Vi[ii,ii,:] ) + pcor = -1 * np.einsum('jik,ilk->jlk', np.einsum('jik,ilk->jlk',D,Vi), D) + ## print(-1 * D[:,:,5] @ Vi[:,:,5] @ D[:,:,5] - pcor[:,:,5]) # check if correct + r = pcor[0, 1, :] + + from scipy.stats import t + n = data.shape[0] + k = data.shape[1] - 2 + dof = n - k - 2 + tval = r * np.sqrt(dof / (1 - r**2)) + pval = 2 * t.sf(np.abs(tval), dof) + + # restore shape + def _restore_shape(array, retain_ind, orig_shape): + array_restore = np.ma.empty(len(retain_ind)) + array_restore.mask = retain_ind == False + array_restore.data[retain_ind] = array + array_restore = array_restore.reshape(orig_shape[1:]) + return array_restore + + # DEBUG restore shape + ##return x0[drop_replica,:][0,:],_restore_shape(data[0,0,:],retain_ind,orig_shape), \ + ## y0[drop_replica,:][0,:],_restore_shape(data[0,1,:],retain_ind,orig_shape) + + if len(orig_shape) == 1: + return r[0], pval[0] + else: + r_restore = _restore_shape(r, retain_ind, orig_shape) + p_restore = _restore_shape(pval, retain_ind, orig_shape) + return r_restore, p_restore + + + # checks on data consistency + assert region is None + assert self.data.shape == var.data.shape + assert ctype is "temporal" + + # determine arguments for functions + axes = None + out_time = None + out_lat = None + out_lon = None + out_area = None + out_ndata = None + if ctype == "temporal": + axes = 0 + if self.spatial: + out_lat = self.lat + out_lon = self.lon + out_area = self.area + elif self.ndata: + out_ndata = self.ndata + else: + raise ValueError("Currently only allowing temporal partial correlations.") + out_time_bnds = None + if out_time is not None: out_time_bnds = self.time_bnds + + result = {} + for i, y in enumerate(var_indep_list): + r, p = _partialCorrTensor(x, y, var_indep_list[:i] + var_indep_list[(i+1):]) + r = Variable(data=r,unit="1", + name="%s_partial_correlation_of_%s_and_%s" % \ + (ctype,self.name,y.name), + time=out_time,time_bnds=out_time_bnds,ndata=out_ndata, + lat=out_lat,lon=out_lon,area=out_area) + p = Variable(data=r,unit="1", + name="%s_partial_pvalue_of_%s_and_%s" % \ + (ctype,self.name,y.name), + time=out_time,time_bnds=out_time_bnds,ndata=out_ndata, + lat=out_lat,lon=out_lon,area=out_area) + result[y.name] = {'r': r, 'p': p} + return result + + def bias(self,var): """Computes the bias between a given variable and this variable. diff --git a/src/ILAMB/constants.py b/src/ILAMB/constants.py index 1f39c19b..7bd5cd93 100644 --- a/src/ILAMB/constants.py +++ b/src/ILAMB/constants.py @@ -185,6 +185,27 @@ "sidelbl" :"MODEL MAX MONTH", "haslegend" :True } +space_opts["trend_bias"] = { "name" :"Temporally integrated period mean bias", + !!! "cmap" :"bias", + "sym" :True, + "ticks" :None, + "ticklabels":None, + "label" :"unit" , + "section" :"Temporally integrated period mean", + "pattern" :"MNAME_RNAME_bias.png", + "sidelbl" :"BIAS", + "haslegend" :True } +space_opts["trend_biasscore"] = { "name" :"Temporally integrated period mean bias", + !!! "cmap" :"bias", + "sym" :True, + "ticks" :None, + "ticklabels":None, + "label" :"unit" , + "section" :"Temporally integrated period mean", + "pattern" :"MNAME_RNAME_bias.png", + "sidelbl" :"BIAS", + "haslegend" :True } + time_opts = {} time_opts["spaceint"] = { "name" : "Spatially integrated regional mean", @@ -222,3 +243,39 @@ "ticks" : mid_months/365.+1850., "ticklabels" : lbl_months, "ylabel" : "unit"} + +time_opts["trend"] = { "name" : "Spatially integrated regional mean", + !!! "section" : "Spatially integrated regional mean", + "haslegend" : False, + "pattern" : "MNAME_RNAME_spaceint.png", + "sidelbl" : "REGIONAL MEAN", + "ticks" : None, + "ticklabels" : None, + "ylabel" : "unit"} + +time_opts["trend_p"] = { "name" : "Accumulated mean", + !!! "section" : "Spatially integrated regional mean", + "haslegend" : False, + "pattern" : "MNAME_RNAME_accumulate.png", + "sidelbl" : "ACCUMULATION", + "ticks" : None, + "ticklabels" : None, + "ylabel" : "unit"} + +time_opts["trend_cycle"] = { "name" : "Spatially integrated regional mean cycle", + !!! "section" : "Spatially integrated regional mean", + "haslegend" : False, + "pattern" : "MNAME_RNAME_cycle.png", + "sidelbl" : "ANNUAL CYCLE", + "ticks" : mid_months/365.+1850., + "ticklabels" : lbl_months, + "ylabel" : "unit"} + +time_opts["trend_dtcycle"] = { "name" : "Spatially integrated regional mean detrended cycle", + !!! "section" : "Spatially integrated regional mean", + "haslegend" : False, + "pattern" : "MNAME_RNAME_dtcycle.png", + "sidelbl" : "MONTHLY ANOMALY", + "ticks" : mid_months/365.+1850., + "ticklabels" : lbl_months, + "ylabel" : "unit"} diff --git a/src/ILAMB/ilamblib.py b/src/ILAMB/ilamblib.py index 2153d63b..7cb53d19 100644 --- a/src/ILAMB/ilamblib.py +++ b/src/ILAMB/ilamblib.py @@ -10,6 +10,7 @@ import logging,re,os import cftime as cf from pkg_resources import parse_version, get_distribution +from scipy.stats import linregress logger = logging.getLogger("%i" % MPI.COMM_WORLD.rank) @@ -1111,7 +1112,8 @@ def AnalysisMeanStateSites(ref,com,**keywords): # quantities bias = REF_timeint.bias(COM_timeint) cREF = Variable(name = "centralized %s" % REF.name, unit = REF.unit, - data = np.ma.masked_array(REF.data-REF_timeint.data[np.newaxis,...],mask=REF.data.mask), + data = np.ma.masked_array(REF.data-REF_timeint.data[np.newaxis,...], + mask=REF.data.mask), time = REF.time, time_bnds = REF.time_bnds, lat = REF.lat , lat_bnds = REF.lat_bnds, lon = REF.lon , lon_bnds = REF.lon_bnds, @@ -1524,6 +1526,7 @@ def AnalysisMeanStateSpace(ref,com,**keywords): del shift_map,shift_score_map # IAV: maps, scalars, scores + # !!!!!!!!!!!!! Why is this not plotted? if not skip_iav: REF_iav = Variable(data = np.ma.masked_array(REF.data-ExtendAnnualCycle(REF.time,ref_cycle.data,ref_cycle.time),mask=REF.data.mask), unit = unit, @@ -1660,6 +1663,378 @@ def AnalysisMeanStateSpace(ref,com,**keywords): return + +def AnalysisTrendStateSpace(ref,com,**keywords): + """Perform a trend state analysis. + + This trend state analysis examines the model trend state in space + and time. We compute the variable's trend over the time period + at each spatial cell or data site as appropriate, as well as the + bias and RMSE relative to the observational variable. We will + output maps of the period trend values and bias. For each spatial + cell or data site we also estimate the phase of the variable by + finding the mean time of year when the maximum occurs and the + phase shift by computing the difference in phase with respect to + the observational variable. In the spatial dimension, we compute a + spatial mean for each of the desired regions and an average annual + cycle. + + Parameters + ---------- + ref : ILAMB.Variable.Variable + the observational (reference) variable + com : ILAMB.Variable.Variable + the model (comparison) variable + regions : list of str, optional + the regions overwhich to apply the analysis + dataset : netCDF4.Dataset, optional + a open dataset in write mode for caching the results of the + analysis which pertain to the model + benchmark_dataset : netCDF4.Dataset, optional + a open dataset in write mode for caching the results of the + analysis which pertain to the observations + space_mean : bool, optional + disable to compute sums of the variable over space instead of + mean values + table_unit : str, optional + the unit to use when displaying output in tables on the HTML page + plots_unit : str, optional + the unit to use when displaying output on plots on the HTML page + + """ + from .Variable import Variable + regions = keywords.get("regions" ,["global"]) + dataset = keywords.get("dataset" ,None) + benchmark_dataset = keywords.get("benchmark_dataset",None) + space_mean = keywords.get("space_mean" ,True) + table_unit = keywords.get("table_unit" ,None) + plot_unit = keywords.get("plot_unit" ,None) + mass_weighting = keywords.get("mass_weighting" ,False) + skip_cycle = keywords.get("skip_cycle" ,False) + ref_trend = keywords.get("ref_trend" ,None) + ref_trend_p = keywords.get("ref_trend_p" ,None) + com_trend = keywords.get("com_trend" ,None) + com_trend_p = keywords.get("com_trend_p" ,None) + ILAMBregions = Regions() + spatial = ref.spatial + + # Convert str types to booleans + if type(skip_cycle) == type(""): + skip_cycle = (skip_cycle.lower() == "true") + + # Check if we need to skip parts of the analysis + if not ref.monthly : skip_cycle = True + if ref.time.size < 12: skip_cycle = True + name = ref.name + + # Interpolate both reference and comparison to a grid composed of + # their cell breaks + ref.convert(plot_unit) + com.convert(plot_unit) + lat,lon,lat_bnds,lon_bnds = _composeGrids(ref,com) + REF = ref.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) + COM = com.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) + unit = REF.unit + area = REF.area + ndata = REF.ndata + + # Find the trend values over the time period + if ref_trend is None: + ref_trend, ref_trend_p = ref.convert(plot_unit).trendInTime(mean=True) + REF_trend, REF_trend_p = REF.convert(plot_unit).trendInTime(mean=True) + else: + ref_trend.convert(plot_unit) + REF_trend = ref_trend.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) + REF_trend_p = ref_trend_p.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) + if com_trend is None: + com_trend, com_trend_p = com.convert(plot_unit).trendInTime(mean=True) + COM_trend, COM_trend_p = COM.convert(plot_unit).trendInTime(mean=True) + else: + com_trend.convert(plot_unit) + COM_trend = com_trend.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) + COM_trend_p = com_trend_p.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) + normalizer = REF_trend.data if mass_weighting else None + + # Prepare spatial mean + ref_and_com = (REF_trend.data.mask == False) * (COM_trend.data.mask == False) + ref_not_com = (REF_trend.data.mask == False) * (COM_trend.data.mask == True ) + com_not_ref = (REF_trend.data.mask == True ) * (COM_trend.data.mask == False) + if ref.time.size > 1: + if benchmark_dataset is not None: + for region in regions: + ref_spaceint = REF.integrateInSpace(region=region,mean=True).convert(table_unit) + ref_spaceint.name = "spaceint_of_%s_over_%s" % (name,region) + + ref_union_spaceint = Variable(name = "REF_and_com", unit = REF.unit, + data = np.ma.masked_array(REF.data,mask=(ref_and_com==False)), + lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, + area = REF.area).integrateInSpace(region=region, + mean=space_mean).convert(table_unit) + + ref_comp_spaceint = Variable(name = "REF_not_com", unit = REF.unit, + data = np.ma.masked_array(REF.data,mask=(ref_not_com==False)), + lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, + area = REF.area).integrateInSpace(region=region, + mean=space_mean).convert(table_unit) + if dataset is not None: + for region in regions: + com_spaceint = COM.integrateInSpace(region=region,mean=True).convert(table_unit) + com_spaceint.name = "spaceint_of_%s_over_%s" % (name,region) + + com_union_spaceint = Variable(name = "ref_and_COM", unit = COM.unit, + data = np.ma.masked_array(COM.data,mask=(ref_and_com==False)), + lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, + area = COM.area).integrateInSpace(region=region, + mean=space_mean).convert(table_unit) + com_comp_spaceint = Variable(name = "COM_not_ref", unit = COM.unit, + data = np.ma.masked_array(COM.data,mask=(com_not_ref==False)), + lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, + area = COM.area).integrateInSpace(region=region, + mean=space_mean).convert(table_unit) + + # Report period trend values over all possible representations of land + if benchmark_dataset is not None: + ref_trend.name = "trend_of_%s" % name + ref_trend.toNetCDF4(benchmark_dataset,group="TrendState") + ref_trend_p.name = "trend_p_of_%s" % name + ref_trend_p.toNetCDF4(benchmark_dataset,group="TrendState") + + for region in regions: + # reference period trend of the average time series on original grid + ref_period_trend, ref_period_trend_p = \ + ref_spaceint.convert(plot_unit).trendInTime(mean=True) + ref_period_trend.name = "Period Trend (original grids) %s %s" % (name, region) + ref_period_trend.toNetCDF4(benchmark_dataset,group="TrendState") + ref_period_trend_p.name = "Period Trend P (original grids) %s %s" % (name, region) + ref_period_trend_p.toNetCDF4(benchmark_dataset,group="TrendState") + + if dataset is not None: + com_trend.name = "trend_of_%s" % name + com_trend.toNetCDF4(dataset,group="TrendState") + com_trend_p.name = "trend_p_of_%s" % name + com_trend_p.toNetCDF4(dataset,group="TrendState") + + for region in regions: + # reference period trend on intersection of land + ref_union_trend, ref_union_trend_p = \ + ref_union_spaceint.convert(plot_unit).trendInTime(mean=True) + ref_union_trend.name = "Benchmark Period Trend (intersection) %s %s" % (name, region) + ref_union_trend.toNetCDF4(dataset,group="TrendState") + ref_union_trend_p.name = "Benchmark Period Trend P (intersection) %s %s" % \ + (name, region) + ref_union_trend_p.toNetCDF4(dataset,group="TrendState") + + # reference period mean on complement of land + ref_comp_trend, ref_comp_trend_p = \ + ref_comp_spaceint.convert(plot_unit).trendInTime(mean=True) + ref_comp_trend.name = "Benchmark Period Trend (complement) %s %s" % (name, region) + ref_comp_trend.toNetCDF4(dataset,group="TrendState") + ref_comp_trend_p.name = "Benchmark Period Trend P (complement) %s %s" % (name, region) + ref_comp_trend_p.toNetCDF4(dataset,group="TrendState") + + # comparison period mean on original grid + com_period_trend, com_period_trend_p = \ + com_spaceint.convert(plot_unit).trendInTime(mean=True) + com_period_trend.name = "Period Trend (original grids) %s %s" % (name, region) + com_period_trend.toNetCDF4(dataset,group="TrendState") + com_period_trend_p.name = "Period Trend P (original grids) %s %s" % (name, region) + com_period_trend_p.toNetCDF4(dataset,group="TrendState") + + # comparison period mean on intersection of land + com_union_trend, com_union_trend_p = \ + com_union_spaceint.convert(plot_unit).trendInTime(mean=True) + com_union_trend.name = "Model Period Trend (intersection) %s %s" % (name, region) + com_union_trend.toNetCDF4(dataset,group="TrendState") + com_union_trend_p.name = "Model Period Trend P (intersection) %s %s" % (name, region) + com_union_trend_p.toNetCDF4(dataset,group="TrendState") + + # comparison period mean on complement of land + com_comp_trend, com_comp_trend_p = \ + com_comp_spaceint.convert(plot_unit).trendInTime(mean=True) + com_comp_trend.name = "Model Period Trend (complement) %s %s" % (name, region) + com_comp_trend.toNetCDF4(dataset,group="TrendState") + com_comp_trend_p.name = "Model Period Trend P (complement) %s %s" % (name, region) + com_comp_trend_p.toNetCDF4(dataset,group="TrendState") + + # Now that we are done reporting on the intersection / complement, + # set all masks to the intersection + REF.data.mask += np.ones(REF.time.size,dtype=bool)[:,np.newaxis,np.newaxis] * (ref_and_com==False) + COM.data.mask += np.ones(COM.time.size,dtype=bool)[:,np.newaxis,np.newaxis] * (ref_and_com==False) + REF_trend.data.mask = (ref_and_com==False) + REF_trend_p.data.mask = (ref_and_com==False) + COM_trend.data.mask = (ref_and_com==False) + COM_trend_p.data.mask = (ref_and_com==False) + if mass_weighting: normalizer.mask = (ref_and_com==False) + + # Spatial Distribution: scalars and scores + if dataset is not None: + for region in regions: + space_std,space_cor,sd_score = REF_trend.spatialDistribution(COM_trend,region=region) + sd_score.name = "Trend Spatial Distribution Score %s %s" % (name, region) + sd_score.toNetCDF4(dataset,group="TrendState", + attributes={"std":space_std.data, + "R" :space_cor.data}) + + # Cycle: maps, scalars, and scores + if not skip_cycle: + ref_trend_cycle_map = REF.trendAnnualCycle() + ref_maxt_map = ref_trend_cycle_map.timeOfExtrema(etype="max") + ref_maxt_map.name = "trend_phase_map_of_%s" % name + com_trend_cycle_map = COM.trendAnnualCycle() + com_maxt_map = com_trend_cycle_map.timeOfExtrema(etype="max") + com_maxt_map.name = "trend_phase_map_of_%s" % name + shift_map = ref_maxt_map.phaseShift(com_maxt_map) + shift_map.name = "trend_shift_map_of_%s" % name + shift_score_map = ScoreSeasonalCycle(shift_map) + shift_score_map.name = "trend_shiftscore_map_of_%s" % name + shift_map.data /= 30.; shift_map.unit = "months" + if benchmark_dataset is not None: + ref_maxt_map.toNetCDF4(benchmark_dataset,group="TrendState") + for region in regions: + ref_trend_cycle = ref_spaceint.trendAnnualCyclce() + ref_trend_cycle.name = "trend_cycle_of_%s_over_%s" % (name,region) + ref_trend_cycle.toNetCDF4(benchmark_dataset,group="TrendState") + ref_dtcycle = deepcopy(ref_trend_cycle) + ref_dtcycle.data -= ref_trend_cycle.data.mean() + ref_dtcycle.name = "trend_dtcycle_of_%s_over_%s" % (name,region) + ref_dtcycle.toNetCDF4(benchmark_dataset,group="TrendState") + if dataset is not None: + com_maxt_map .toNetCDF4(dataset,group="TrendState") + shift_map .toNetCDF4(dataset,group="TrendState") + shift_score_map.toNetCDF4(dataset,group="TrendState") + for region in regions: + com_trend_cycle = com_spaceint.trendAnnualCycle() + com_trend_cycle.name = "trend_cycle_of_%s_over_%s" % (name,region) + com_trend_cycle.toNetCDF4(dataset,group="TrendState") + com_dtcycle = deepcopy(com_trend_cycle) + com_dtcycle.data -= com_trend_cycle.data.mean() + com_dtcycle.name = "trend_dtcycle_of_%s_over_%s" % (name,region) + com_dtcycle.toNetCDF4(dataset,group="TrendState") + shift = shift_map.integrateInSpace(region=region,mean=True,intabs=True) + shift_score = shift_score_map.integrateInSpace(region=region,mean=True, + weight=normalizer) + shift.name = "Trend Phase Shift %s %s" % (name, region) + shift.toNetCDF4(dataset,group="TrendState") + shift_score.name = "Trend Seasonal Cycle Score %s %s" % (name, region) + shift_score.toNetCDF4(dataset,group="TrendState") + + del shift_map,shift_score_map + + # Bias: maps, scalars, and scores + bias = REF_trend.bias(COM_trend).convert(plot_unit) + # !!! TO-DO: Use the confidence interval of REF_trend instead of the REF_trend + bias_score_map = Score(bias, REF_trend) + bias_score_map.data.mask = (ref_and_com==False) # for some reason I need to explicitly force the mask + if dataset is not None: + bias.name = "trend_bias_map_of_%s" % name + bias.toNetCDF4(dataset,group="TrendState") + bias_score_map.name = "trend_biasscore_map_of_%s" % name + bias_score_map.toNetCDF4(dataset,group="TrendState") + for region in regions: + bias_val = bias.integrateInSpace(region=region,mean=True).convert(plot_unit) + bias_val.name = "Trend Bias %s %s" % (name, region) + bias_val.toNetCDF4(dataset,group="TrendState") + bias_score = bias_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) + bias_score.name = "Trend Bias Score %s %s" % (name, region) + bias_score.toNetCDF4(dataset,group="TrendState") + del bias,bias_score_map + + return + + +def AnalysisPartialCorrSpace(ref,com,ref_indep_list,com_indep_list,**keywords): + """Perform a partial correlation analysis. + + This partial correlation analysis examines the partial correlation + between ref/com and the corresponding list of independent variables + in space and time. We compute the variable's partial correlation over + the time period at each spatial cell or data site as appropriate. We + will output maps of the period partial correlation values and bias. + + Parameters + ---------- + ref : ILAMB.Variable.Variable + the observational (reference) variable + com : ILAMB.Variable.Variable + the model (comparison) variable + ref_indep_list : list of ILAMB.Variable.Variable + the observational independent variables in partial correlation + ref_indep_list : list of ILAMB.Variable.Variable + the model (comparison) independent variables in partial correlation + regions : list of str, optional + the regions overwhich to apply the analysis + dataset : netCDF4.Dataset, optional + a open dataset in write mode for caching the results of the + analysis which pertain to the model + benchmark_dataset : netCDF4.Dataset, optional + a open dataset in write mode for caching the results of the + analysis which pertain to the observations + """ + from .Variable import Variable + regions = keywords.get("regions" ,["global"]) + dataset = keywords.get("dataset" ,None) + benchmark_dataset = keywords.get("benchmark_dataset",None) + table_unit = keywords.get("table_unit" ,None) + plot_unit = keywords.get("plot_unit" ,None) + mass_weighting = keywords.get("mass_weighting" ,False) + ref_corr = keywords.get("ref_corr" ,None) + com_corr = keywords.get("com_corr" ,None) + ILAMBregions = Regions() + spatial = ref.spatial + name = ref.name + + # Interpolate both reference and comparison to a grid composed of + # their cell breaks + ref.convert(plot_unit) + com.convert(plot_unit) + lat,lon,lat_bnds,lon_bnds = _composeGrids(ref,com) + REF = ref.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) + COM = com.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) + ref_and_com = (REF.data.mask == False) * (REF.data.mask == False) + REF_indep_list = [rr.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) \ + for rr in ref_indep_list] + COM_indep_list = [cc.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) \ + for cc in com_indep_list] + unit = ref.unit + area = ref.area + ndata = ref.ndata + + # Find the partial correlation values over the time period + assert ref_corr is None + assert com_corr is None + + # Write to file + for region in regions: + ref_corr = ref.partialCorrelation(ref_indep_list, ctype = "temporal", region = region) + com_corr = com.partialCorrelation(com_indep_list, ctype = "temporal", region = region) + REF_corr = REF.partialCorrelation(REF_indep_list, ctype = "temporal", region = region) + COM_corr = COM.partialCorrelation(COM_indep_list, ctype = "temporal", region = region) + + for pp in ref_corr.keys(): + for ss in ['r', 'p']: + temp = ref_corr[pp][ss] + temp = 'Benchmark (original grids) ' + temp.name + ' ' + region + temp.toNetCDF4(benchmark_dataset, group = 'Sensitivities') + + for ss in ['r', 'p']: + temp = com_corr[pp][ss] + temp = 'Model (original grids) ' + temp.name + ' ' + region + temp.toNetCDF4(dataset, group = 'Sensitivities') + + # Calculate bias and write bias to file + bias = REF_corr[pp][ss].bias(COM_corr[pp][ss]) + # !!! TO-DO: Use the confidence interval of REF_corr instead of the REF_corr + bias_score_map = Score(bias, REF_corr[pp][ss]) + bias_score_map.data.mask = ref_and_com == False + bias.name = 'sensitivity_bias_map_of_%s_and_%s' % (self.name, name) + bias.toNetCDF4(dataset, group = 'Sensitivities') + bias_score_map.name = 'sensitivity_biasscore_map_of_%s' % (self.name, name) + bias_score_map.toNetCDF4(dataset, group = 'Sensitivities') + + del bias, bias_score_map + + return + def ClipTime(v,t0,tf): """Remove time from a variable based on input bounds. diff --git a/test/test_SoilMoisture.py b/test/test_SoilMoisture.py new file mode 100644 index 00000000..fa3fe5c3 --- /dev/null +++ b/test/test_SoilMoisture.py @@ -0,0 +1,109 @@ +from ILAMB.Variable import Variable +import ILAMB.ilamblib as il +import numpy as np +import os + +def test_integrateInTime(variables): + head = "\n--- Testing integrateInTime() " + print "%s%s\n" % (head,"-"*(120-len(head))) + for vdict in variables: + var = vdict["var"] + try: + vdict["timeint"] = var.integrateInTime() + vdict["timeint_mean"] = var.integrateInTime(mean=True) + print vdict["timeint"] + print vdict["timeint_mean"] + except il.NotTemporalVariable: + pass + +def test_integrateInSpace(variables): + head = "\n--- Testing integrateInSpace() " + print "%s%s\n" % (head,"-"*(120-len(head))) + for vdict in variables: + var = vdict["var"] + try: + vdict["spaceint"] = var.integrateInSpace() + vdict["spaceint_mean"] = var.integrateInSpace(mean=True) + vdict["spaceint_amazon"] = var.integrateInSpace(region="amazon") + vdict["spaceint_amazon_mean"] = var.integrateInSpace(region="amazon",mean=True) + print vdict["spaceint"] + print vdict["spaceint_mean"] + print vdict["spaceint_amazon"] + print vdict["spaceint_amazon_mean"] + except il.NotSpatialVariable: + pass + +def test_annualCycle(variables): + head = "\n--- Testing annualCycle() " + print "%s%s\n" % (head,"-"*(120-len(head))) + for vdict in variables: + var = vdict["var"] + try: + # note: not testing std, max, and min (assuming ok since functions are similar) + vdict["cycle"],junk,junk,junk = var.annualCycle() + print vdict["cycle"] + except il.NotTemporalVariable: + pass + + +def test_correlation(variables): + head = "\n--- Testing correlation() " + print "%s%s\n" % (head,"-"*(120-len(head))) + for vdict in variables: + var = vdict["var"] + try: + if var.spatial or var.ndata: + vdict["corr_spatial"] = var.correlation(var,"spatial") + print vdict["corr_spatial"] + if var.temporal: + vdict["corr_temporal"] = var.correlation(var,"temporal") + print vdict["corr_temporal"] + if var.spatial and var.temporal: + vdict["corr_both"] = var.correlation(var,"spatiotemporal") + print vdict["corr_both"] + + except il.NotTemporalVariable: + pass + +def test_bias(variables): + head = "\n--- Testing bias() " + print "%s%s\n" % (head,"-"*(120-len(head))) + for vdict in variables: + var = vdict["var"] + try: + vdict["bias"] = var.bias(var) + print vdict["bias"] + except il.NotSpatialVariable: + pass + +# Setup different types of variables +gpp = {} +gpp["var"] = Variable(filename = os.environ["ILAMB_ROOT"]+"/DATA/gpp/FLUXNET-MTE/derived/gpp.nc", + variable_name = "gpp") +le = {} +le["var"] = Variable(filename = os.environ["ILAMB_ROOT"]+"/DATA/le/FLUXNET/derived/le.nc", + variable_name = "le") +co2 = {} +co2["var"] = Variable(filename = os.environ["ILAMB_ROOT"]+"/DATA/co2/MAUNA.LOA/derived/co2_1959-2013.nc", + variable_name = "co2") +pi = {} +pi["var"] = Variable(data = np.pi, + unit = "-", + name = "pi") + +variables = [gpp,le,co2,pi] + +head = "\n--- Found the following variables for testing " +print "%s%s\n" % (head,"-"*(120-len(head))) +for vdict in variables: + print vdict["var"] + +test_integrateInTime(variables) +test_integrateInSpace(variables) +test_annualCycle(variables) +test_timeOfExtrema(variables) +test_interpolate(variables) +test_phaseShift(variables) +test_correlation(variables) +test_bias(variables) + diff --git a/test/test_partial_corr_tensor.py b/test/test_partial_corr_tensor.py new file mode 100644 index 00000000..10e9da09 --- /dev/null +++ b/test/test_partial_corr_tensor.py @@ -0,0 +1,191 @@ +import numpy as np +from numpy.core.arrayprint import array2string +import pingouin as pg +import pandas as pd +from copy import deepcopy + +# Generate the randomly correlated data +def gen_test(n): + size = 4 + + def gen_cov(): + np.random.seed(800) + cov = np.sqrt(np.random.rand(size, size)) * (2*(np.random.rand(size, size) > 0.5 - 0.5)) + cov = cov@cov.T + return cov + cov = gen_cov() + # print(cov) # DEBUG + + data = np.random.multivariate_normal(np.arange(size), cov, n) + + data = pd.DataFrame(data, index = np.arange(n), + columns = ['x', 'y', 'c1', 'c2']) + + return data + + +# Test calculating the partial correlation in vectorized manner +def _covar_tensor(tensor3d): + N = tensor3d.shape[0] + m1 = tensor3d - tensor3d.sum(0, keepdims=True) / N + y_out = np.einsum('ijk,ilk->jlk',m1,m1)/(N-1) + return y_out + + +def partial_corr_tensor(x, y, covar_list): + """ Repeated calculation of partial correlation in the spatial dimensions. + + Parameters + ---------- + x : np.ma.array + The independent variable. The first dimension will be assumed to be + time (replicate observations). + y : np.ma.array + The dependent variable. y must have the same dimensionality as x. + covar_list : list of np.ma.array objects + The covariate variables. Each variable must have the same + dimensionality as x and y. + + Returns + ------- + r : np.ma.array + The partial correlation. If x only has a time dimension, `r` is a + a scalar. Otherwise, `r` has the same dimensionality as x[1:]. + p : np.ma.array + The two-sided p-values of the partial correlation. If x only has + a time dimension, `p` is a scalar. Otherwise, `p` has the same + dimensionality as x[1:]. + """ + if type(x) != np.ma.core.MaskedArray: + raise TypeError('x must be a masked array') + if type(y) != np.ma.core.MaskedArray: + raise TypeError('y must be a masked array') + for vv in covar_list: + if type(vv) != np.ma.core.MaskedArray: + raise ValueError('covar_list must be masked arrays') + if not np.allclose(x.shape, y.shape): + raise ValueError('x and y must be the same shape') + for vv in covar_list: + if not np.allclose(x.shape, vv.shape): + raise ValueError('x and covar_list must be the same shape') + if x.shape[0] < 3: + raise ValueError('At least three observations are needed') + + x0 = x.copy() + y0 = y.copy() + + orig_shape = x.shape + if len(orig_shape) == 1: + x = x.reshape(-1, 1, 1) # extra 2nd dimension for concat + y = y.reshape(-1, 1, 1) + covar_relist = [] + for vv in covar_list: + covar_relist.append(vv.reshape(-1, 1, 1)) + else: + new_shape = orig_shape[0], 1, np.prod(orig_shape[1:]) + x = x.reshape(new_shape) + y = y.reshape(new_shape) + covar_relist = [] + for vv in covar_list: + covar_relist.append(vv.reshape(new_shape)) + covar_list = covar_relist + covar_relist = [] + + data = np.ma.concatenate([x,y] + covar_list, axis = 1) + del x, y + covar_list = [] + + # remove invalid points + retain_ind = np.any(np.all(data.mask == False, axis = 1), axis = 0) + if sum(retain_ind) == 0: + raise ValueError('At least one valid spatial data point is needed') + ## print(retain_ind) # DEBUG + data = data[:, :, retain_ind] + + # TO-DO: Need to think of a way to deal with the different number + # of data points in space. Right now it imposes the minimum + # overlapping number of valid data points. + drop_replica = np.all(np.all(data.mask == False, axis = 2), axis = 1) + ## print(drop_replica) # DEBUG + if sum(drop_replica) < 3: + raise ValueError('At least three valid observations are needed') + data = data[drop_replica, :, :] + + # calculate the partial correlation and significance (translated from pingouin) + V = _covar_tensor(data) + ##print(data.shape) # DEBUG + ##print(V.shape) # DEBUG + + Vi = np.linalg.inv(V.transpose(2,0,1)).transpose(1,2,0) + D = np.zeros(Vi.shape) + for ii in np.arange(Vi.shape[0]): + D[ii,ii,:] = np.sqrt( 1 / Vi[ii,ii,:] ) + pcor = -1 * np.einsum('jik,ilk->jlk', np.einsum('jik,ilk->jlk',D,Vi), D) + ## print(-1 * D[:,:,5] @ Vi[:,:,5] @ D[:,:,5] - pcor[:,:,5]) # check if correct + r = pcor[0, 1, :] + + from scipy.stats import t + n = data.shape[0] + k = data.shape[1] - 2 + dof = n - k - 2 + tval = r * np.sqrt(dof / (1 - r**2)) + pval = 2 * t.sf(np.abs(tval), dof) + + # restore shape + def _restore_shape(array, retain_ind, orig_shape): + array_restore = np.ma.empty(len(retain_ind)) + array_restore.mask = retain_ind == False + array_restore.data[retain_ind] = array + array_restore = array_restore.reshape(orig_shape[1:]) + return array_restore + + # DEBUG restore shape + ##return x0[drop_replica,:][0,:], _restore_shape(data[0,0,:], retain_ind, orig_shape), \ + ## y0[drop_replica,:][0,:], _restore_shape(data[0,1,:], retain_ind, orig_shape) + + if len(orig_shape) == 1: + return r[0], pval[0] + else: + r_restore = _restore_shape(r, retain_ind, orig_shape) + p_restore = _restore_shape(pval, retain_ind, orig_shape) + return r_restore, p_restore + + +if __name__ == '__main__': + + # DEBUG covariance calculation + """ + data = gen_test(100) + print(data) # DEBUG + print( np.cov(data.values.T) ) + print( _covar_tensor(data.values.reshape(*data.shape, 1))[:, :, 0] ) + """ + + # DEBUG partial correlation calculation + data = gen_test(500) + data.iloc[5:60, :] = np.nan + x = np.ma.masked_where(np.isnan(data['x'].values), + data['x'].values).reshape(25, 20) + y = np.ma.masked_where(np.isnan(data['y'].values), + data['y'].values).reshape(25, 20) + c1 = np.ma.masked_where(np.isnan(data['c1'].values), + data['c1'].values).reshape(25, 20) + c2 = np.ma.masked_where(np.isnan(data['c2'].values), + data['c2'].values).reshape(25, 20) + + # ----- DEBUG restore shape + #x0, x, y0, y = partial_corr_tensor(x, y, [c1, c2]) + #print(x0.data, x.data) + #print(y0.data, y.data) + + # ----- DEBUG the actual partial correlation + which = 6 + stats = pg.partial_corr(data.iloc[which::20, :].dropna(axis = 1, how = 'all'), x = 'x', y = 'y', covar = ['c1', 'c2']) + print(stats) + + # Tensor-calculate the partial correlation + r, p = partial_corr_tensor(x, y, [c1, c2]) + print(r[which]) + print(p[which]) + print(r) + print(p) diff --git a/test/test_trend_tensor.py b/test/test_trend_tensor.py new file mode 100644 index 00000000..0890d3a6 --- /dev/null +++ b/test/test_trend_tensor.py @@ -0,0 +1,203 @@ +from re import L +import numpy as np +from scipy.stats import linregress +import pandas as pd +from copy import deepcopy +from time import time + + +def gen_test(*args): + """ Generate data with a linear trend + + Parameters + ---------- + *args : tuple of integers + The size of the generated array along each dimension. + """ + np.random.seed(500) + if len(args) == 1: + data = 3 + 0.005 * np.arange(args[0]) + np.random.randn(args[0]) + else: + data = 3 + 0.005 * np.broadcast_to(np.arange(args[0]).reshape(-1, *((1,)*(len(args)-1))), args) + \ + np.random.randn(*args) + return data + + +def _ols_tensor(Y, x): + """ Repeated calculation of linear regression in the spatial dimensions. + + Parameters + ---------- + Y : np.ma.array + The variable of interest. The first dimension will be assumed to be + time (replicate observations). + x : np.array or np.ma.array + The time variable of interest. If one-dimensional, will be propagated + to the dimensionality of Y. If having the same dimensionality as Y, + must be a masked array. + + Returns + ------- + r : np.ma.array + The trend. If x only has a time dimension, `r` is a scalar. + Otherwise, `r` has the same dimensionality as x[1:]. + p : np.ma.array + The two-sided p-values of the trend. If x only has a time + dimension, `p` is a scalar. Otherwise, `p` has the same + dimensionality as x[1:]. + """ + if type(Y) != np.ma.core.MaskedArray: + raise TypeError('Y must be a masked array') + if Y.shape[0] < 3: + raise ValueError('At least three observations are needed') + + if (type(x) != np.ma.core.MaskedArray) and (type(x) != np.ndarray): + raise TypeError('x must be either masked or ordinary numpy array') + if (not np.allclose(x.shape, Y.shape)) and (len(x.shape) != 1): + raise ValueError('x must be either 1-dimensional or has the same shape as Y') + + # homogenize the shape and mask of x and Y + if type(Y.mask) == bool: + Y.mask = np.full(Y.shape, Y.mask) + if type(x) == np.ma.core.MaskedArray: + if type(x.mask) == bool: + x.mask = np.full(x.shape, x.mask) + else: + x = np.ma.array(x, mask = np.full(x.shape, False)) + + orig_shape = Y.shape + Y = Y.reshape(Y.shape[0], 1, int(np.prod(Y.shape[1:]))) + if len(x.shape) != 1: + x = x.reshape(Y.shape) + else: + x = np.ma.array(np.broadcast_to(x.data.reshape(-1,1,1), Y.shape), + mask = np.broadcast_to(x.mask.reshape(-1,1,1), Y.shape)) + x = np.ma.array(x.data, mask = x.mask | Y.mask) + Y = np.ma.array(Y, mask = x.mask) + + # add constant term + x = np.ma.concatenate([np.ma.array(np.ones(Y.shape), mask = Y.mask), x], axis = 1) + + # calculate the regression coefficients; treating the masked points as if zero. + xx = np.where(x.mask == False, x.data, 0.) + yy = np.where(Y.mask == False, Y.data, 0.) + beta = np.einsum('ijk,jlk->ilk', + np.einsum('ijk,ljk->ilk', + np.linalg.pinv(np.einsum('ijk,ilk->jlk',xx,xx).transpose(2,0,1) \ + ).transpose(1,2,0), + xx), yy) + + # calculate the p-value + from scipy.stats import t + dof = np.sum(Y.mask == False, axis = 0) - 2 + resid = yy - np.einsum('ijk,jlk->ilk', xx, beta) + mse = np.sum(np.power(resid,2), axis=0) / dof + std = np.ma.sum(np.ma.power(x[:,[1],:] - np.ma.mean(x[:,[1],:],axis=0,keepdims=True), 2), axis = 0) + tval = beta / np.sqrt(mse/std) + pval = 2 * t.sf(np.abs(tval), dof) + + # discard intercept & restore shape + beta = np.ma.array(beta[1,:], mask = np.sum(Y.mask==False, axis = 0)<3) + pval = np.ma.array(pval[1,:], mask = np.sum(Y.mask==False, axis = 0)<3) + if len(orig_shape) > 1: + beta = beta.reshape(orig_shape[1:]) + pval = pval.reshape(orig_shape[1:]) + else: + beta = float(beta.data) + pval = float(pval.data) + return beta, pval + + +if __name__ == '__main__': + # DEBUG trend calculation + # (1) 1-D case + data = gen_test(100) + data = np.ma.array(data, mask = False) + + beta, pval = _ols_tensor(data, np.arange(100)) + res = linregress(np.arange(100), data) + print('Test 1') + print(beta, res.slope) + print(pval, res.pvalue) + + # (2) 2-D case + # (2.1) Without missing values + data = gen_test(50, 3) + data = np.ma.array(data, mask = False) + + beta, pval = _ols_tensor(data, np.arange(50)) + which = 2 + res = linregress(np.arange(50), data[:, which]) + print('Test 2.1') + print(beta[which], res.slope) + print(pval[which], res.pvalue) + + # (2.2) With missing values + data = gen_test(50, 3) + mask = np.full(data.shape, False) + mask[0,1:3] = True + mask[:,2] = True + data = np.ma.array(data, mask = mask) + # print(data) # DEBUG + # (2.2.1) 1-D x + t = np.arange(50) + + beta, pval = _ols_tensor(data, np.arange(50)) + which = 1 + x = np.arange(50) + x = x[data.mask[:,which] == False] + res = linregress(x, data.data[data.mask[:,which] == False, which]) + print('Test 2.2.1') + print(beta[which], res.slope) + print(pval[which], res.pvalue) + + # (2.2.2) 2-D x + mask2 = np.full(data.shape, False) + mask2[0,1:3] = True + mask2[:,2] = True + t = np.ma.array(np.stack([np.arange(50), np.arange(50), + np.arange(50)], axis = 1), + mask = mask2) + beta, pval = _ols_tensor(data, t) + which = 1 + x = np.arange(50) + x = x[data.mask[:,which] == False] + res = linregress(x, data.data[data.mask[:,which] == False, which]) + print('Test 2.2.2') + print(beta[which], res.slope) + print(pval[which], res.pvalue) + + # (3) 3-D case + data = gen_test(50, 300, 500) + mask = np.full(data.shape, False) + mask[0,1:3,4:6] = True + mask[:,2,4] = True + data = np.ma.array(data, mask = mask) + # (3.1) 1-D x + beta, pval = _ols_tensor(data, np.arange(50)) + which = slice(None), 2, 5 + x = np.arange(50) + y = data.data[which] + y[data.mask[which]] = np.nan + res = linregress(x[~np.isnan(y)], y[~np.isnan(y)]) + print('Test 3.1') + print(beta[which[1:]], res.slope) + print(pval[which[1:]], res.pvalue) + + # (3.2) 3-D x + t = np.broadcast_to(np.arange(50).reshape(50,1,1), + data.shape) + beta, pval = _ols_tensor(data, t) + which = slice(None), 2, 5 + x = np.arange(50) + y = data.data[which] + y[data.mask[which]] = np.nan + res = linregress(x[~np.isnan(y)], y[~np.isnan(y)]) + print('Test 3.2') + print(beta[which[1:]], res.slope) + print(pval[which[1:]], res.pvalue) + + # ----- DEBUG restore shape + #x0, x, y0, y = partial_corr_tensor(x, y, [c1, c2]) + #print(x0.data, x.data) + #print(y0.data, y.data) From 925dade2ab405b701a9f22d2bd924f8330849fb8 Mon Sep 17 00:00:00 2001 From: Yaoping Wang Date: Sun, 24 Oct 2021 22:03:45 -0400 Subject: [PATCH 11/18] added trend & sens, first trial --- bin/ilamb-run | 3 - src/ILAMB/ConfSoilMoisture.py | 790 +++++++++++++++++----------------- src/ILAMB/Confrontation.py | 17 +- src/ILAMB/Scoreboard.py | 23 +- src/ILAMB/Variable.py | 97 ++--- src/ILAMB/constants.py | 202 ++++++--- src/ILAMB/ilamblib.py | 203 +++++---- 7 files changed, 772 insertions(+), 563 deletions(-) diff --git a/bin/ilamb-run b/bin/ilamb-run index 31007195..7299c472 100644 --- a/bin/ilamb-run +++ b/bin/ilamb-run @@ -693,8 +693,6 @@ C = MatchRelationshipConfrontation(S.list()) C = MatchSensitivityConfrontation(C) # YW -import pdb; pdb.set_trace() - if len(args.study_limits) == 2: args.study_limits[1] += 1 for c in C: c.study_limits = (np.asarray(args.study_limits)-1850)*365. @@ -735,7 +733,6 @@ sys.stdout.flush(); comm.Barrier() W = BuildLocalWorkList(M,C,skip_cache=True) - WorkConfront(W,not args.quiet,args.clean) sys.stdout.flush(); comm.Barrier() diff --git a/src/ILAMB/ConfSoilMoisture.py b/src/ILAMB/ConfSoilMoisture.py index 50eb3c48..a02f71e7 100644 --- a/src/ILAMB/ConfSoilMoisture.py +++ b/src/ILAMB/ConfSoilMoisture.py @@ -28,16 +28,6 @@ def __init__(self,**keywords): # Calls the regular constructor super(ConfSoilMoisture,self).__init__(**keywords) - # Trend State page // insert before the Data Information page - pages.insert(-2, post.HtmlPage('TrendState', 'Trend State')) - pages[-2].setHeader('CNAME / RNAME / MNAME') - pages[-2].setSections(['Trend over the period in space', - 'Trend of the regional mean over the period']) - if self.sensitivities is not None: - pages.insert(-2, post.HtmlPage('Sensitivities', 'Partial Correlation Relationships')) - pages[-2].setHeader('CNAME / RNAME / MNAME') - pages[-2].setSections(list(self.sensitivities)) - # Get/modify depths with Dataset(self.source) as dset: v = dset.variables[self.variable] @@ -332,6 +322,9 @@ def confront(self,m): skip_cycle = self.keywords.get("skip_cycle" ,False) rmse_score_basis = self.keywords.get("rmse_score_basis","cycle") + # Read in some options to decide whether to run the trend state analysis + skip_trend = self.keywords.get("skip_trend" ,False) + # Get the depth-integrated observation and model data for each slab. for obs,mod,z0,zf in self.stageData(m): print('Confronting data ' + obs.name + ' v.s. ' + mod.name + \ @@ -352,14 +345,15 @@ def confront(self,m): rmse_score_basis = rmse_score_basis) # Calculate trend state - il.AnalysisTrendStateSpace(obs, mod, dataset = fcm.mod_dset, - regions = self.regions, - benchmark_dataset = fcm.obs_dset, - table_unit = self.table_unit, - plot_unit = self.plot_unit, - space_mean = self.space_mean, - skip_cycle = skip_cycle, - mass_weighting = mass_weighting) + if not skip_trend: + il.AnalysisTrendStateSpace(obs, mod, dataset = fcm.mod_dset, + regions = self.regions, + benchmark_dataset = fcm.obs_dset, + table_unit = self.table_unit, + plot_unit = self.plot_unit, + space_mean = self.space_mean, + skip_cycle = skip_cycle, + mass_weighting = mass_weighting) else: il.AnalysisMeanStateSites(obs, mod, dataset = fcm.mod_dset, regions = self.regions, @@ -385,12 +379,12 @@ def confront(self,m): obs_comparable, obs_indep = il.MakeComparable(obs_comparable, obs_indep, mask_ref = True, clip_ref = True, extents = self.extents, - logstring = "[%s][%s]MakeComparable" % \ + logstring = "[%s][%s]MakeComparablePass1" % \ (obs.variable_name, obs_indep.variable_name)) mod_comparable, mod_indep = il.MakeComparable(mod_comparable, mod_indep, mask_ref = True, clip_ref = True, extents = self.extents, - logstring = "[%s][%s]MakeComparable" % \ + logstring = "[%s][%s]MakeComparablePass1" % \ (mod.variable_name, mod_indep.variable_name)) obs_indep_list.append(obs_indep) mod_indep_list.append(mod_indep) @@ -400,16 +394,17 @@ def confront(self,m): obs_comparable, obs_indep = il.MakeComparable(obs_comparable, obs_indep, mask_ref = True, clip_ref = True, extents = self.extents, - logstring = "[%s][%s]MakeComparable" % \ + logstring = "[%s][%s]MakeComparablePass2" % \ (obs.variable_name, obs_indep.variable_name)) mod_comparable, mod_indep = il.MakeComparable(mod_comparable, mod_indep, mask_ref = True, clip_ref = True, extents = self.extents, - logstring = "[%s][%s]MakeComparable" % \ + logstring = "[%s][%s]MakeComparablePass2" % \ (mod.variable_name, mod_indep_variable_name)) if obs.spatial: - il.AnalysisPartialCorrSpace(obs_comparable, mod_comparable, obs_indep_list, mod_indep_list) + il.AnalysisPartialCorrSpace(obs_comparable, mod_comparable, + obs_indep_list, mod_indep_list) else: # !!! TO-DO: Add AnalysisPartialCorrSites pass @@ -419,6 +414,119 @@ def confront(self,m): logger.info("[%s][%s] Success" % (self.longname,m.name)) + def determinePlotLimits(self): + """Determine the limits of all plots which are inclusive of all ranges. + + The routine will open all netCDF files in the output path and + add the maximum and minimum of all variables which are + designated to be plotted. If legends are desired for a given + plot, these are rendered here as well. This routine should be + called before calling any plotting routine. + """ + max_str = "up99" + min_str = "dn99" + if self.keywords.get("limit_type","99per") == "minmax": + max_str = "max" + min_str = "min" + + # Determine the min/max of variables over all models + limits = {} + prune = False + for fname in glob.glob(os.path.join(self.output_path,"*.nc")): + with Dataset(fname) as dataset: + for pn in ["MeanState", "TrendState"]: + if pn not in dataset.groups: continue + + limits[pn] = {} + + group = dataset.groups[pn] + variables = [v for v in group.variables.keys() \ + if v not in group.dimensions.keys()] + for vname in variables: + var = group.variables[vname] + pname = vname.split("_")[0] + region = vname.split("_")[-1] + if var[...].size <= 1: continue + if pname in space_opts[pn]: + if pname not in limits[pn]: + limits[pn][pname] = {} + limits[pn][pname]["min"] = +1e20 + limits[pn][pname]["max"] = -1e20 + limits[pn][pname]["unit"] = post.UnitStringToMatplotlib(var.getncattr("units")) + limits[pn][pname]["min"] = min(limits[pn][pname]["min"],var.getncattr(min_str)) + limits[pn][pname]["max"] = max(limits[pn][pname]["max"],var.getncattr(max_str)) + elif pname in time_opts[pn]: + if pname not in limits[pn]: limits[pn][pname] = {} + if region not in limits[pn][pname]: + limits[pn][pname][region] = {} + limits[pn][pname][region]["min"] = +1e20 + limits[pn][pname][region]["max"] = -1e20 + limits[pn][pname][region]["unit"] = post.UnitStringToMatplotlib(var.getncattr("units")) + limits[pn][pname][region]["min"] = min(limits[pn][pname][region]["min"],var.getncattr("min")) + limits[pn][pname][region]["max"] = max(limits[pn][pname][region]["max"],var.getncattr("max")) + if not prune and "Benchmark" in fname and pname == "timeint": + prune = True + self.pruneRegions(Variable(filename = fname, + variable_name = vname, + groupname = pn)) + + # Second pass to plot legends (FIX: only for master?) + for pn in ["MeanState", "TrendState"]: + if not pn in limits.keys(): continue + for pname in limits[pn].keys(): + try: + opts = space_opts[pn][pname] + except: + continue + + # Determine plot limits and colormap + if opts["sym"]: + vabs = max(abs(limits[pn][pname]["min"]),abs(limits[pn][pname]["min"])) + limits[pn][pname]["min"] = -vabs + limits[pn][pname]["max"] = vabs + + # if a score, force to be [0,1] + if "score" in pname: + limits[pn][pname]["min"] = 0 + limits[pn][pname]["max"] = 1 + + limits[pn][pname]["cmap"] = opts["cmap"] + if limits[pn][pname]["cmap"] == "choose": limits[pn][pname]["cmap"] = self.cmap + + # Plot a legend for each key + if opts["haslegend"]: + fig,ax = plt.subplots(figsize=(6.8,1.0),tight_layout=True) + label = opts["label"] + if label == "unit": label = limits[pn][pname]["unit"] + post.ColorBar(ax, + vmin = limits[pn][pname]["min"], + vmax = limits[pn][pname]["max"], + cmap = limits[pn][pname]["cmap"], + ticks = opts["ticks"], + ticklabels = opts["ticklabels"], + label = label) + fig.savefig(os.path.join(self.output_path,"legend_%s.png" % (pname))) + plt.close() + + # Determine min/max of relationship variables + for fname in glob.glob(os.path.join(self.output_path,"*.nc")): + with Dataset(fname) as dataset: + for g in dataset.groups.keys(): + if "relationship" not in g: continue + grp = dataset.groups[g] + if g not in limits: + limits[g] = {} + limits[g]["xmin"] = +1e20 + limits[g]["xmax"] = -1e20 + limits[g]["ymin"] = +1e20 + limits[g]["ymax"] = -1e20 + limits[g]["xmin"] = min(limits[g]["xmin"],grp.variables["ind_bnd"][ 0, 0]) + limits[g]["xmax"] = max(limits[g]["xmax"],grp.variables["ind_bnd"][-1,-1]) + limits[g]["ymin"] = min(limits[g]["ymin"],grp.variables["dep_bnd"][ 0, 0]) + limits[g]["ymax"] = max(limits[g]["ymax"],grp.variables["dep_bnd"][-1,-1]) + + self.limits = limits + def compositePlots(self): """Renders plots which display information of all models. @@ -430,286 +538,152 @@ def compositePlots(self): if not self.master: return # get the HTML page - page = [page for page in self.layout.pages if "MeanState" in page.name][0] - - models = [] - colors = [] - corr = {} - std = {} - cycle = {} - has_cycle = False - has_std = False - for fname in glob.glob(os.path.join(self.output_path,"*.nc")): - dataset = Dataset(fname) - if "MeanState" not in dataset.groups: continue - dset = dataset.groups["MeanState"] - models.append(dataset.getncattr("name")) - colors.append(dataset.getncattr("color")) - for region in self.regions: - if region not in cycle: cycle[region] = {} - if region not in std: std[region] = {} - if region not in corr: corr[region] = {} + for pn, ffix in zip(['MeanState', 'TrendState'], ['mean', 'trend']): + page = [page for page in self.layout.pages if pn in page.name][0] + + models = [] + colors = [] + corr = {} + std = {} + cycle = {} + has_cycle = False + has_std = False + for fname in glob.glob(os.path.join(self.output_path,"*.nc")): + dataset = Dataset(fname) + if pn not in dataset.groups: continue + dset = dataset.groups[pn] + models.append(dataset.getncattr("name")) + colors.append(dataset.getncattr("color")) + for region in self.regions: + if region not in cycle: cycle[region] = {} + if region not in std: std[region] = {} + if region not in corr: corr[region] = {} + + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind,1] + zstr = '%.2f-%.2f' % (z0, zf) + + if zstr not in cycle[region]: cycle[region][zstr] = [] + + key = [v for v in dset.variables.keys() \ + if ("cycle_" in v and zstr in v and region in v)] + if len(key)>0: + has_cycle = True + cycle[region][zstr].append(Variable(filename=fname,groupname=pn, + variable_name=key[0])) + + if zstr not in std[region]: std[region][zstr] = [] + if zstr not in corr[region]: corr[region][zstr] = [] + + key = [] + if "scalars" in dset.groups: + key = [v for v in dset.groups["scalars"].variables.keys() \ + if ("Spatial Distribution Score" in v and zstr \ + in v and region in v)] + if len(key) > 0: + has_std = True + sds = dset.groups["scalars"].variables[key[0]] + corr[region][zstr].append(sds.getncattr("R" )) + std [region][zstr].append(sds.getncattr("std")) + + # composite annual cycle plot + if has_cycle and len(models) > 0: + page.addFigure("Spatially integrated regional mean", + ffix + "_compcycle", + "RNAME_" + ffix + "_compcycle.png", + side = "ANNUAL CYCLE", + legend = False) + for region in self.regions: + if region not in cycle: continue + fig, axes = plt.subplots(self.depths.shape[0], 1, + figsize = (6.5, 2.8*self.depths.shape[0]), + sharex = True, sharey = True) for dind, z0 in enumerate(self.depths[:,0]): - zf = self.depths[dind,1] + zf = self.depths[dind, 1] zstr = '%.2f-%.2f' % (z0, zf) - - if zstr not in cycle[region]: cycle[region][zstr] = [] - - key = [v for v in dset.variables.keys() if ("cycle_" in v and zstr in v and region in v)] - if len(key)>0: - has_cycle = True - cycle[region][zstr].append(Variable(filename=fname,groupname="MeanState", - variable_name=key[0])) - - if zstr not in std[region]: std[region][zstr] = [] - if zstr not in corr[region]: corr[region][zstr] = [] - - key = [] - if "scalars" in dset.groups: - key = [v for v in dset.groups["scalars"].variables.keys() \ - if ("Spatial Distribution Score" in v and zstr in v and region in v)] - if len(key) > 0: - has_std = True - sds = dset.groups["scalars"].variables[key[0]] - corr[region][zstr].append(sds.getncattr("R" )) - std [region][zstr].append(sds.getncattr("std")) - - # composite annual cycle plot - if has_cycle and len(models) > 0: + + if self.depths.shape[0] == 1: + ax = axes + else: + ax = axes.flat[dind] + + for name,color,var in zip(models,colors,cycle[region][zstr]): + dy = 0.05*(self.limits[pn]["cycle"][region]["max"] - \ + self.limits[pn]["cycle"][region]["min"]) + + var.plot(ax, lw=2, color=color, label=name, + ticks = time_opts[ffix]["cycle"]["ticks"], + ticklabels = time_opts[ffix]["cycle"]["ticklabels"], + vmin = self.limits[pn]["cycle"][region]["min"]-dy, + vmax = self.limits[pn]["cycle"][region]["max"]+dy) + ylbl = post.UnitStringToMatplotlib(var.unit) + ax.set_ylabel(ylbl) + ax.set_title(zstr + ' '+ self.depths_units) + fig.savefig(os.path.join(self.output_path, + "%s_" + ffix + "_compcycle.png" % (region))) + plt.close() + + # plot legends with model colors (sorted with Benchmark data on top) page.addFigure("Spatially integrated regional mean", - "compcycle", - "RNAME_compcycle.png", - side = "ANNUAL CYCLE", - legend = False) - - for region in self.regions: - if region not in cycle: continue - fig, axes = plt.subplots(self.depths.shape[0], 1, - figsize = (6.5, 2.8*self.depths.shape[0]), - sharex = True, sharey = True) - for dind, z0 in enumerate(self.depths[:,0]): - zf = self.depths[dind, 1] - zstr = '%.2f-%.2f' % (z0, zf) - - if self.depths.shape[0] == 1: - ax = axes - else: - ax = axes.flat[dind] - - for name,color,var in zip(models,colors,cycle[region][zstr]): - dy = 0.05*(self.limits["cycle"][region]["max"] - \ - self.limits["cycle"][region]["min"]) - - var.plot(ax, lw=2, color=color, label=name, - ticks = time_opts["cycle"]["ticks"], - ticklabels = time_opts["cycle"]["ticklabels"], - vmin = self.limits["cycle"][region]["min"]-dy, - vmax = self.limits["cycle"][region]["max"]+dy) - ylbl = post.UnitStringToMatplotlib(var.unit) - ax.set_ylabel(ylbl) - ax.set_title(zstr + ' '+ self.depths_units) - fig.savefig(os.path.join(self.output_path,"%s_compcycle.png" % (region))) - plt.close() - - # plot legends with model colors (sorted with Benchmark data on top) - page.addFigure("Spatially integrated regional mean", - "legend_compcycle", - "legend_compcycle.png", - side = "MODEL COLORS", - legend = False) - def _alphabeticalBenchmarkFirst(key): - key = key[0].lower() - if key == "BENCHMARK": return "A" - return key - tmp = sorted(zip(models,colors),key=_alphabeticalBenchmarkFirst) - fig,ax = plt.subplots() - for model,color in tmp: - ax.plot(0,0,'o',mew=0,ms=8,color=color,label=model) - handles,labels = ax.get_legend_handles_labels() - plt.close() - - ncol = np.ceil(float(len(models))/11.).astype(int) - if ncol > 0: - fig,ax = plt.subplots(figsize=(3.*ncol,2.8),tight_layout=True) - ax.legend(handles,labels,loc="upper right",ncol=ncol,fontsize=10,numpoints=1) - ax.axis(False) - fig.savefig(os.path.join(self.output_path,"legend_compcycle.png")) - fig.savefig(os.path.join(self.output_path,"legend_spatial_variance.png")) - fig.savefig(os.path.join(self.output_path,"legend_temporal_variance.png")) - plt.close() - - # spatial distribution Taylor plot - if has_std: - page.addFigure("Temporally integrated period mean", - "spatial_variance", - "RNAME_spatial_variance.png", - side = "SPATIAL TAYLOR DIAGRAM", - legend = False) - page.addFigure("Temporally integrated period mean", - "legend_spatial_variance", - "legend_spatial_variance.png", + "legend_" + ffix + "_compcycle", + "legend_" + ffix + "_compcycle.png", side = "MODEL COLORS", legend = False) - if "Benchmark" in models: colors.pop(models.index("Benchmark")) - for region in self.regions: - if not (region in std and region in corr): continue - - fig = plt.figure(figsize=(12.0,12.0)) - for dind, z0 in enumerate(self.depths[:,0]): - zf = self.depths[dind, 1] - zstr = '%.2f-%.2f' % (z0, zf) - - if not (zstr in std[region] and zstr in corr[region]): continue - if len(std[region][zstr]) != len(corr[region][zstr]): continue - if len(std[region][zstr]) == 0: continue - ax, aux = post.TaylorDiagram(np.asarray(std[region][zstr]), - np.asarray(corr[region][zstr]), - 1.0,fig,colors,True,220+dind+1) - ax.set_title(zstr + ' ' + self.depths_units) - fig.savefig(os.path.join(self.output_path, - "%s_spatial_variance.png" % (region))) + def _alphabeticalBenchmarkFirst(key): + key = key[0].lower() + if key == "BENCHMARK": return "A" + return key + tmp = sorted(zip(models,colors),key=_alphabeticalBenchmarkFirst) + fig,ax = plt.subplots() + for model,color in tmp: + ax.plot(0,0,'o',mew=0,ms=8,color=color,label=model) + handles,labels = ax.get_legend_handles_labels() plt.close() - - - # Get the HTML page - page = [page for page in self.layout.pages if "TrendState" in page.name][0] - - models = [] - colors = [] - corr = {} - cycle = {} - has_cycle = False - has_std = False - for fname in glob.glob(os.path.join(self.output_path,"*.nc")): - dataset = Dataset(fname) - if "TrendState" not in dataset.groups: continue - dset = dataset.groups["TrendState"] - models.append(dataset.getncattr("name")) - colors.append(dataset.getncattr("color")) + + ncol = np.ceil(float(len(models))/11.).astype(int) + if ncol > 0: + fig,ax = plt.subplots(figsize=(3.*ncol,2.8),tight_layout=True) + ax.legend(handles,labels,loc="upper right",ncol=ncol,fontsize=10,numpoints=1) + ax.axis(False) + fig.savefig(os.path.join(self.output_path,"legend_" + ffix + "_compcycle.png")) + fig.savefig(os.path.join(self.output_path, + "legend_" + ffix + "_spatial_variance.png")) + fig.savefig(os.path.join(self.output_path, + "legend_" + ffix + "_temporal_variance.png")) + plt.close() + + # spatial distribution Taylor plot + if has_std: + page.addFigure("Temporally integrated period " + ffix, + ffix + "_spatial_variance", + "RNAME_" + ffix + "_spatial_variance.png", + side = "SPATIAL TAYLOR DIAGRAM", + legend = False) + page.addFigure("Temporally integrated period " + ffix, + "legend_" + ffix + "_spatial_variance", + "legend_" + ffix + "_spatial_variance.png", + side = "MODEL COLORS", + legend = False) + if "Benchmark" in models: colors.pop(models.index("Benchmark")) for region in self.regions: - if region not in cycle: cycle[region] = {} - + if not (region in std and region in corr): continue + + fig = plt.figure(figsize=(12.0,12.0)) for dind, z0 in enumerate(self.depths[:,0]): - zf = self.depths[dind,1] + zf = self.depths[dind, 1] zstr = '%.2f-%.2f' % (z0, zf) - - if zstr not in cycle[region]: cycle[region][zstr] = [] - - key = [v for v in dset.variables.keys() if ("trend_cycle_" in v and zstr in v and region in v)] - if len(key)>0: - has_cycle = True - cycle[region][zstr].append(Variable(filename=fname,groupname="TrendState", - variable_name=key[0])) - - if zstr not in std[region]: std[region][zstr] = [] - if zstr not in corr[region]: corr[region][zstr] = [] - - key = [] - if "scalars" in dset.groups: - key = [v for v in dset.groups["scalars"].variables.keys() \ - if ("Spatial Distribution Score" in v and zstr in v and region in v)] - if len(key) > 0: - has_std = True - sds = dset.groups["scalars"].variables[key[0]] - corr[region][zstr].append(sds.getncattr("R" )) - std [region][zstr].append(sds.getncattr("std")) - - # composite annual cycle plot - if has_cycle and len(models) > 0: - page.addFigure("Trend of the regional mean over the period", - "comptrendcycle", - "RNAME_comptrendcycle.png", - side = "ANNUAL CYCLE OF TREND", - legend = False) - - for region in self.regions: - if region not in cycle: continue - fig, axes = plt.subplots(self.depths.shape[0], 1, - figsize = (6.5, 2.8*self.depths.shape[0]), - sharex = True, sharey = True) - for dind, z0 in enumerate(self.depths[:,0]): - zf = self.depths[dind, 1] - zstr = '%.2f-%.2f' % (z0, zf) - - if self.depths.shape[0] == 1: - ax = axes - else: - ax = axes.flat[dind] - - for name,color,var in zip(models,colors,cycle[region][zstr]): - dy = 0.05*(self.limits["cycle"][region]["max"] - \ - self.limits["cycle"][region]["min"]) - - var.plot(ax, lw=2, color=color, label=name, - ticks = time_opts["cycle"]["ticks"], - ticklabels = time_opts["cycle"]["ticklabels"], - vmin = self.limits["cycle"][region]["min"]-dy, - vmax = self.limits["cycle"][region]["max"]+dy) - ylbl = post.UnitStringToMatplotlib(var.unit) - ax.set_ylabel(ylbl) - ax.set_title(zstr + ' '+ self.depths_units) - fig.savefig(os.path.join(self.output_path,"%s_comptrendcycle.png" % (region))) - plt.close() - - # plot legends with model colors (sorted with Benchmark data on top) - page.addFigure("Trend of the regional mean over the period", - "legend_comptrendcycle", - "legend_comptrendcycle.png", - side = "MODEL COLORS", - legend = False) - def _alphabeticalBenchmarkFirst(key): - key = key[0].lower() - if key == "BENCHMARK": return "A" - return key - tmp = sorted(zip(models,colors),key=_alphabeticalBenchmarkFirst) - fig,ax = plt.subplots() - for model,color in tmp: - ax.plot(0,0,'o',mew=0,ms=8,color=color,label=model) - handles,labels = ax.get_legend_handles_labels() - plt.close() - - ncol = np.ceil(float(len(models))/11.).astype(int) - if ncol > 0: - fig,ax = plt.subplots(figsize=(3.*ncol,2.8),tight_layout=True) - ax.legend(handles,labels,loc="upper right",ncol=ncol,fontsize=10,numpoints=1) - ax.axis(False) - fig.savefig(os.path.join(self.output_path,"legend_comptrendcycle.png")) - plt.close() - - ## !!! TO-DO? Implement the std calculation for spatial trends. - ## spatial distribution Taylor plot - #if has_std: - # page.addFigure("Temporally integrated period mean", - # "spatial_variance", - # "RNAME_spatial_variance.png", - # side = "SPATIAL TAYLOR DIAGRAM", - # legend = False) - # page.addFigure("Temporally integrated period mean", - # "legend_spatial_variance", - # "legend_spatial_variance.png", - # side = "MODEL COLORS", - # legend = False) - #if "Benchmark" in models: colors.pop(models.index("Benchmark")) - #for region in self.regions: - # if not (region in std and region in corr): continue - # - # fig = plt.figure(figsize=(12.0,12.0)) - # for dind, z0 in enumerate(self.depths[:,0]): - # zf = self.depths[dind, 1] - # zstr = '%.2f-%.2f' % (z0, zf) - # - # if not (zstr in std[region] and zstr in corr[region]): continue - # if len(std[region][zstr]) != len(corr[region][zstr]): continue - # if len(std[region][zstr]) == 0: continue - # ax, aux = post.TaylorDiagram(np.asarray(std[region][zstr]), - # np.asarray(corr[region][zstr]), - # 1.0,fig,colors,True,220+dind+1) - # ax.set_title(zstr + ' ' + self.depths_units) - # fig.savefig(os.path.join(self.output_path, - # "%s_spatial_variance.png" % (region))) - # plt.close() - + + if not (zstr in std[region] and zstr in corr[region]): continue + if len(std[region][zstr]) != len(corr[region][zstr]): continue + if len(std[region][zstr]) == 0: continue + ax, aux = post.TaylorDiagram(np.asarray(std[region][zstr]), + np.asarray(corr[region][zstr]), + 1.0,fig,colors,True,220+dind+1) + ax.set_title(zstr + ' ' + self.depths_units) + fig.savefig(os.path.join(self.output_path, + "%s_" + ffix + "_spatial_variance.png" % (region))) + plt.close() def modelPlots(self,m): @@ -719,7 +693,6 @@ def modelPlots(self,m): netCDF file which results from the analysis and create plots. Note that determinePlotLimits should be called before this routine. - """ self._relationship(m) self._sensitivity(m) @@ -729,29 +702,30 @@ def modelPlots(self,m): if not os.path.isfile(fname): return # get the HTML page - for grp in ['MeanState', 'TrendState']: - page = [page for page in self.layout.pages if grp in page.name][0] + for pn, ffix in zip(['MeanState', 'TrendState'], ["mean", "trend"]): + page = [page for page in self.layout.pages if pn in page.name][0] with Dataset(fname) as dataset: - group = dataset.groups[grp] + group = dataset.groups[pn] variables = getVariableList(group) color = dataset.getncattr("color") for vname in variables: # The other depths will be handled in plotting zstr_0 = '%.2f-%.2f' % (self.depths[0,0], self.depths[0,1]) if not zstr_0 in vname: continue - + # is this a variable we need to plot? - pname = vname.split("_")[0] if group.variables[vname][...].size <= 1: continue - var = Variable(filename=fname,groupname=grp,variable_name=vname) - + var = Variable(filename=fname,groupname=pn,variable_name=vname) + + pname = vname.split("_")[1] + if (var.spatial or (var.ndata is not None)) and not var.temporal: - + # grab plotting options - if pname not in self.limits.keys(): continue - if pname not in space_opts: continue - opts = space_opts[pname] + if pname not in self.limits[pn].keys(): continue + if pname not in space_opts[ffix]: continue + opts = space_opts[ffix][pname] ##print('... is used in space_opts') # DEBUG @@ -761,7 +735,7 @@ def modelPlots(self,m): opts["pattern"], side = opts["sidelbl"], legend = opts["haslegend"]) - + # plot variable for region in self.regions: nax = self.depths.shape[0] @@ -769,20 +743,22 @@ def modelPlots(self,m): for dind, z0 in enumerate(self.depths[:,0]): zf = self.depths[dind,1] zstr = '%.2f-%.2f' % (z0, zf) - var2 = Variable(filename=fname, groupname = grp, + var2 = Variable(filename=fname, groupname = pn, variable_name=vname.replace(zstr_0, zstr)) ax = var2.plot(None, fig, nax, region = region, - vmin = self.limits[pname]["min"], - vmax = self.limits[pname]["max"], - cmap = self.limits[pname]["cmap"]) + vmin = self.limits[pn][pname]["min"], + vmax = self.limits[pn][pname]["max"], + cmap = self.limits[pn][pname]["cmap"]) ax.set_title(zstr + ' ' + self.depths_units) fig.savefig(os.path.join(self.output_path, - "%s_%s_%s.png" % (m.name,region,pname))) + "%s_%s_%s_%s.png" % (m.name,region,ffix, + pname))) plt.close() - + # Jumping through hoops to get the benchmark plotted and in the html output - if self.master and (pname == "timeint" or pname == "phase" or pname == "iav"): - opts = space_opts[pname] + if self.master and (pname == "timeint" or \ + pname == "phase" or pname == "iav"): + opts = space_opts[ffix][pname] # add to html layout page.addFigure(opts["section"], @@ -790,7 +766,7 @@ def modelPlots(self,m): opts["pattern"].replace("MNAME","Benchmark"), side = opts["sidelbl"].replace("MODEL","BENCHMARK"), legend = True) - + # plot variable for region in self.regions: nax = self.depths.shape[0] @@ -798,27 +774,30 @@ def modelPlots(self,m): for dind, z0 in enumerate(self.depths[:,0]): zf = self.depths[dind,1] zstr = '%.2f-%.2f' % (z0, zf) - obs = Variable(filename=bname,groupname=grp, + obs = Variable(filename=bname,groupname=pn, variable_name=vname.replace(zstr_0, zstr)) ax = obs.plot(None, fig, nax, region = region, - vmin = self.limits[pname]["min"], - vmax = self.limits[pname]["max"], - cmap = self.limits[pname]["cmap"]) + vmin = self.limits[pn][pname]["min"], + vmax = self.limits[pn][pname]["max"], + cmap = self.limits[pn][pname]["cmap"]) ax.set_title(zstr + ' ' + self.depths_units) - fig.savefig(os.path.join(self.output_path,"Benchmark_%s_%s.png" % (region,pname))) + fig.savefig(os.path.join(self.output_path, + "Benchmark_%s_%s_%s.png" % (region, + ffix, + pname))) plt.close() if not (var.spatial or (var.ndata is not None)) and var.temporal: # grab the benchmark dataset to plot along with try: - obs = Variable(filename=bname,groupname=grp, + obs = Variable(filename=bname,groupname=pn, variable_name=vname).convert(var.unit) except: continue # grab plotting options - if pname not in time_opts: continue - opts = time_opts[pname] + if pname not in time_opts[ffix]: continue + opts = time_opts[ffix][pname] # add to html layout page.addFigure(opts["section"], @@ -841,22 +820,25 @@ def modelPlots(self,m): else: ax = axes.flat[dind] - var2 = Variable(filename=fname, groupname = grp, + var2 = Variable(filename=fname, groupname = pn, variable_name=vname.replace(zstr_0, zstr)) - obs = Variable(filename=bname,groupname=grp, - variable_name=vname.replace(zstr_0, zstr)).convert(var2.unit) + obs = Variable(filename=bname,groupname=pn, + variable_name=vname.replace(zstr_0, zstr)).convert(var2.unit) obs.plot(ax, lw = 2, color = 'k', alpha = 0.5) var2.plot(ax, lw = 2, color = color, label = m.name, ticks =opts["ticks"], ticklabels=opts["ticklabels"]) - dy = 0.05*(self.limits[pname][region]["max"]-self.limits[pname][region]["min"]) - ax.set_ylim(self.limits[pname][region]["min"]-dy, - self.limits[pname][region]["max"]+dy) + dy = 0.05*(self.limits[pn][pname][region]["max"] - \ + self.limits[pn][pname][region]["min"]) + ax.set_ylim(self.limits[pn][pname][region]["min"]-dy, + self.limits[pn][pname][region]["max"]+dy) ylbl = opts["ylabel"] if ylbl == "unit": ylbl = post.UnitStringToMatplotlib(var.unit) ax.set_ylabel(ylbl) ax.set_title(zstr + ' ' + self.depths_units) - fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (m.name,region,pname))) + fig.savefig(os.path.join(self.output_path, + "%s_%s_%s_%s.png" % (m.name,region,ffix, + pname))) plt.close() logger.info("[%s][%s] Success" % (self.longname,m.name)) @@ -1071,12 +1053,13 @@ def _plotFunction(ref_mean_list,ref_std_list,com_mean_list,com_std_list, com_dep_list = _retrieveSM(os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name ))) com_dep_list = [_applyRefMask(ref_dep, com_dep) for ref_dep,com_dep in zip(ref_dep_list,com_dep_list)] dep_name = self.longname.split("/")[0] - dep_min = self.limits["timeint"]["min"] - dep_max = self.limits["timeint"]["max"] + dep_min = self.limits["MeanState"]["timeint"]["min"] + dep_max = self.limits["MeanState"]["timeint"]["max"] except: return - with Dataset(os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)),mode="r+") as results: + with Dataset(os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)), + mode="r+") as results: # Grab/create a relationship and scalars group group = None @@ -1098,8 +1081,8 @@ def _plotFunction(ref_mean_list,ref_std_list,com_mean_list,com_std_list, com_ind = _retrieveData(os.path.join(c.output_path,"%s_%s.nc" % (c.name,m.name ))) com_ind = _applyRefMask(ref_ind,com_ind) ind_name = c.longname.split("/")[0] - ind_min = c.limits["timeint"]["min"]-1e-12 - ind_max = c.limits["timeint"]["max"]+1e-12 + ind_min = c.limits["MeanState"]["timeint"]["min"]-1e-12 + ind_max = c.limits["MeanState"]["timeint"]["max"]+1e-12 except: continue @@ -1247,24 +1230,6 @@ def _retrieveBias(cname, filename): groupname = "Sensitivities", variable_name = key[1]) - def _plotMap(var, pval, vminmax, title, fout): - # !!! Need to determine projection - - fig, axes = plt.figure() - for dind, z0 in enumerate(self.depths[:,0]): - zf = self.depths[dind,1] - zstr = '%.2f-%.2f' % (z0, zf) - ax = axes.flat[dind] - cf = ax.contourf(var.lon, var.lat, var, - vmin = vminmax[0], vmax = vminmax[1], - cmap = 'Spectral_r') - ax.set_title(zstr + ' ' + self.depths_units) - fig.colorbar(cf, cax = fig.add_axes([0.97, 0.1, 0.02, 0.8]), - orientation="vertical",label=title) - fig.savefig(fout) - plt.close() - - # Get the HTML page page = [page for page in self.layout.pages if "Sensitivities" in page.name] if len(page) == 0: return @@ -1285,20 +1250,6 @@ def _plotMap(var, pval, vminmax, title, fout): # for each sensitivity relationship... for c in self.sensitivities: - # try to get the sensitivity map from the model and obs - try: - ref_corr_list, ref_corr_p_list = _retrieveCorr(c.name, os.path.join(c.output_path,"%s_%s.nc" % (self.name,"Benchmark"))) - com_corr_list, com_corr_p_list = _retrieveCorr(c.name, os.path.join(c.output_path,"%s_%s.nc" % (self.name,m.name))) - com_bias_map_list, com_biasscore_map_list = _retrieveBias(c.name, os.path.join(c.output_path,"%s_%s.nc" % (self.name,m.name))) - ref_name = self.longname.split('/')[0] - ref_minmax_list = [(ref_corr.data.min(), ref_corr.data.max()) \ - for ref_corr in ref_corr_list] - com_name = c.longname.split('/')[0] - com_minmax_list = [(com_corr.data.min(), com_corr.data.max()) \ - for com_corr in com_corr_list] - except: - continue - # Add figures to the html page page.addFigure(c.longname, "benchmark_sens_%s" % com_name, @@ -1316,21 +1267,90 @@ def _plotMap(var, pval, vminmax, title, fout): legend = False, benchmark = False) - # Analysis over regions - ## for region in self.regions: ## !!! TO-DO: something other than None to accept - # Make the plots - _plotMap(ref_corr_list, ref_corr_p_list, ref_minmax_list, - "%s/%s, %s" % (ref_name, self.name, - post.UnitStringToMatplotlib(ref_corr.unit)), - os.path.join(self.output_path, - "%s_%s_rel_%s.png" % ("Benchmark",region,ref_name))) - _plotMap(com_corr_list, com_corr_p_list, com_minmax_list, - "%s/%s, %s" % (com_name, c.name, - post.UnitStringToMatplotlib(com_corr.unit)), - os.path.join(self.output_path, - "%s_%s_rel_%s.png" % ("Benchmark",region,com_name))) - _plotMap(com_bias_map_list, None, - "Bias, %s/%s, %s" % (ref_name, com_name, - post.UnitStringToMatplotlib(ref_corr.unit))) - _plotMap(com_biasscore_map_list, None, - "Bias Score, %s/%s" % (ref_name, com_name)) + # Get the sensitivity map from the model and obs + try: + ref_corr_list, ref_corr_p_list = _retrieveCorr(c.name, os.path.join(c.output_path, "%s_%s.nc" % (self.name,"Benchmark"))) + com_corr_list, com_corr_p_list = _retrieveCorr(c.name, os.path.join(c.output_path, "%s_%s.nc" % (self.name,m.name))) + com_bias_map_list, com_biasscore_map_list = _retrieveBias(c.name, os.path.join(c.output_path, "%s_%s.nc" % (self.name,m.name))) + + ref_name = self.longname.split('/')[0] + ref_min = np.min([ref_corr.data.min() for ref_corr in ref_corr_list]) + ref_max = np.max([ref_corr.data.max() for ref_corr in ref_corr_list]) + com_name = c.longname.split('/')[0] + com_min = np.min([com_corr.data.min() for com_corr in com_corr_list]) + com_max = np.max([com_corr.data.max() for com_corr in com_corr_list]) + diff_min = np.min([com_bias_map.data.min() \ + for com_bias_map in com_bias_map_list]) + diff_max = np.min([com_bias_map.data.max() \ + for com_bias_map in com_bias_map_list]) + except: + continue + + r = Regions() + for region in self.regions: + nax = self.depths.shape[0] + fig1 = plt.figure() + fig2 = plt.figure() + fig3 = plt.figure() + + score_list = [] + + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind,1] + zstr = '%.2f-%.2f' % (z0, zf) + + # Make the plots + ax1 = ref_corr_list[dind].plot(None, fig, nax, region = region, + vmin = ref_min, vmax = ref_max, + cmap = 'RdBu') + # ---- mask the p-value + ref_temp = deepcopy(ref_corr_p_list[dind]) + ref_temp.data.mask += r.getMask(region, ref_temp) + lat = np.hstack([ref_temp.lat_bnds[:,0],ref_temp.lat_bnds[-1,-1]]) + lon = np.hstack([ref_temp.lon_bnds[:,0],ref_temp.lon_bnds[-1,-1]]) + ax1.pcolormesh(lon, lat, + np.ma.masked_array(ref_temp.data > 0.05, + ref_temp.data <= 0.05), + cmap = 'Grays', vmin = 0.5, vmax = 1.5, alpha = 0.5) + ax1.set_title(zstr + ' ' + self.depths_units) + + ax2 = com_corr_list[dind].plot(None, fig, nax, region = region, + vmin = com_min, vmax = com_max, + cmap = 'RdBu') + # ---- mask the p-value + com_temp = deepcopy(com_corr_p_list[dind]) + com_temp.data.mask += r.getMask(region, com_temp) + lat = np.hstack([com_temp.lat_bnds[:,0],com_temp.lat_bnds[-1,-1]]) + lon = np.hstack([com_temp.lon_bnds[:,0],com_temp.lon_bnds[-1,-1]]) + ax2.pcolormesh(lon, lat, + np.ma.masked_array(com_temp.data > 0.05, + com_temp.data <= 0.05), + cmap = 'Grays', vmin = 0.5, vmax = 1.5, + alpha = 0.5) + ax2.set_title(zstr + ' ' + self.depths_units) + + ax3 = com_bias_map_list[dind].plot(None, fig, nax, region = region, + vmin = diff_min, + vmax = diff_max, cmap = 'RdBu') + ax3.set_title(zstr + ' ' + self.depths_units) + + # Score the functional response over the regions + score = _scoreFunction(ref_temp,com_temp) + score_list.append(score) + + del ref_temp, com_temp + + score = np.sum(np.array(score_list)*(self.depths[:,1] - \ + self.depths[:,0])) / \ + (self.depths[-1,1] - self.depths[0,0]) + sname = "%s RMSE Score %s" % (com_name,region) + if sname in scalars.variables: + scalars.variables[sname][0] = score + else: + Variable(name = sname, + unit = "1", + data = score).toNetCDF4(results,group="Sensitivites") + + + # This is gone into ILAMB.Confrontation.Confrontation.computeOverallScore(m) + # Figure out how to add!!! diff --git a/src/ILAMB/Confrontation.py b/src/ILAMB/Confrontation.py index d2253ce3..ca5b025b 100644 --- a/src/ILAMB/Confrontation.py +++ b/src/ILAMB/Confrontation.py @@ -142,6 +142,7 @@ def __init__(self,**keywords): self.table_unit = keywords.get("table_unit",None) self.plot_unit = keywords.get("plot_unit",None) self.space_mean = keywords.get("space_mean",True) + # !!! Trend keywords self.relationships = keywords.get("relationships",None) self.sensitivities = keywords.get("sensitivities",None) # YW self.keywords = keywords @@ -166,6 +167,13 @@ def __init__(self,**keywords): pages[-1].setSections(["Temporally integrated period mean", "Spatially integrated regional mean"]) + # Trend State page, YW + if self.___: + pages.insert(-2, post.HtmlPage('TrendState', 'Trend State')) + pages[-2].setHeader('CNAME / RNAME / MNAME') + pages[-2].setSections(["Temporally integrated period trend", + "Spatially integrated regional trend"]) + # Datasites page self.hasSites = False self.lbls = None @@ -207,10 +215,18 @@ def __init__(self,**keywords): pages.append(post.HtmlPage("Relationships","Relationships")) pages[-1].setHeader("CNAME / RNAME / MNAME") pages[-1].setSections(list(self.relationships)) + + # Sensitivities page, YW + if self.sensitivites is not None: + pages.insert(-2, post.HtmlPage('Sensitivities', 'Partial Correlation Relationships')) + pages[-2].setHeader('CNAME / RNAME / MNAME') + pages[-2].setSections(list(self.sensitivities)) + pages.append(post.HtmlAllModelsPage("AllModels","All Models")) pages[-1].setHeader("CNAME / RNAME / MNAME") pages[-1].setSections([]) pages[-1].setRegions(self.regions) + pages.append(post.HtmlPage("DataInformation","Data Information")) pages[-1].setSections([]) pages[-1].text = "\n" @@ -498,7 +514,6 @@ def determinePlotLimits(self): limits[g]["ymin"] = min(limits[g]["ymin"],grp.variables["dep_bnd"][ 0, 0]) limits[g]["ymax"] = max(limits[g]["ymax"],grp.variables["dep_bnd"][-1,-1]) - self.limits = limits def computeOverallScore(self,m): diff --git a/src/ILAMB/Scoreboard.py b/src/ILAMB/Scoreboard.py index 4fbf957d..699e6d36 100644 --- a/src/ILAMB/Scoreboard.py +++ b/src/ILAMB/Scoreboard.py @@ -49,6 +49,7 @@ def __init__(self, name): self.regions = None self.skip_rmse = False self.skip_iav = True + self.skip_trend = False # YW self.mass_weighting = False self.weight = 1 # if a dataset has no weight specified, it is implicitly 1 self.sum_weight_children = 0 # what is the sum of the weights of my children? @@ -265,13 +266,29 @@ def _loadScores(node): for ind,m in enumerate(global_model_list): fname = "%s/%s_%s.nc" % (node.confrontation.output_path,node.confrontation.name,m.name) if os.path.isfile(fname): + # YW: added extra groups try: dataset = Dataset(fname) - grp = dataset.groups["MeanState"].groups["scalars"] except: continue - if "Overall Score global" in grp.variables: - data[ind] = grp.variables["Overall Score global"][0] + + # !!! TO-DO: allow user to modify the weights + weights = {"MeanState": 2, "TrendState": 1, "Sensitivities": 1} + overall_score = 0. + sum_of_weights = 0. + for pn in ["MeanState", "TrendState", "Sensitivities"]: + try: + grp = dataset.groups["pn"].groups["scalars"] + except: + continue + if "Overall Score global" in grp.variables: + overal_score += grp.variables["Overall Score global"][0] * \ + weights[pn] + sum_of_weights += weights[pn] + overall_score /= max(sum_of_weights, 1e-12) + + if sum_of_weights > 1e-12: + data[ind] = overall_score mask[ind] = 0 else: data[ind] = -999. diff --git a/src/ILAMB/Variable.py b/src/ILAMB/Variable.py index 890c87af..871a1434 100644 --- a/src/ILAMB/Variable.py +++ b/src/ILAMB/Variable.py @@ -223,13 +223,7 @@ def __init__(self,**keywords): convert_calendar = keywords.get("convert_calendar",True) out = il.FromNetCDF4(filename,variable_name,alternate_vars,t0,tf,group=groupname,convert_calendar=convert_calendar,z0=z0,zf=zf) # YW - data,data_bnds,unit,name,time,time_bnds,lat,lat_bnds,lon,lon_bnds,depth,depth_bnds,cbou def _retrieveData(filename): - key = None - with Dataset(filename,mode="r") as dset: - key = [v for v in dset.groups["MeanState"].variables.keys() if "timeint_" in v] - return Variable(filename = filename, - groupname = "MeanState", - variable_name = key[0])nds,ndata,calendar,attr = out + data,data_bnds,unit,name,time,time_bnds,lat,lat_bnds,lon,lon_bnds,depth,depth_bnds,cbounds,ndata,calendar,attr = out # Add handling for some units which cf_units does not support unit = unit.replace("psu","1e-3") @@ -719,7 +713,7 @@ def _integrate(var,areas): name = name) def trendInTime(self,**keywords): - r"""YW: Compute the grid-by-grid trend in the variable + r"""YW: Compute the grid-by-grid annual trend in the variable over a given time period. Ignore missing values. Parameters @@ -770,33 +764,17 @@ def trendInTime(self,**keywords): for i in range(self.data.ndim-1): dt = np.expand_dims(dt,axis=-1) np.seterr(over='ignore',under='ignore') - integral = [] - if self.data_bnds is not None: - integral_bnd = [] - for yy in np.unique(year): - ind2 = np.where(year == yy)[0] - temp = (data[ind2]*dt[ind2]).sum(axis=0, keepdims=True) - if self.data_bnds is not None: - temp_bnd = np.ma.concatenate([(data_bnds[...,0][ind2] * dt[ind2] \ - ).sum(axis=0, keepdims=True), - (data_bnds[...,1][ind2] * dt[ind2] \ - ).sum(axis=0, keepdims=True)], axis = -1) - if mean: - # divide thru by the non-masked amount of time - if self.data.mask.size > 1: - dt2 = (dt[ind2]*(mask[ind2]==0)).sum(axis=0) - else: - dt2 = dt[ind2].sum(axis=0) - temp = temp / dt2 - if self.data_bnds is not None: - temp_bnd[...,0] = temp_bnd[...,0] / dt2 - temp_bnd[...,1] = temp_bnd[...,1] / dt2 - integral.append(temp) + if self.monthly and (self.time.size > 11): + begin = np.argmin(self.time[:11]%365) + end = begin+int(self.time[begin:].size/12.)*12 + shp = (-1,12) + data.shape[1:] + integral = data[begin:end,...].reshape(shp) if self.data_bnds is not None: - integral_bnd.append(temp_bnd) - integral = np.ma.stack(integral, axis = 0) - if self.data_bnds is not None: - integral_bnd = np.ma.stack(integral_bnd, axis = 0) + shp = (-1,12) + data_bnds.shape[1:] + integral_bnd = data_bnds[begin:end,...].reshape(shp) + else: + integral = data + integral_bnd = data_bnds np.seterr(over='raise',under='raise') if not mean: @@ -815,6 +793,8 @@ def trendInTime(self,**keywords): unit0.convert(integral_bnd,unit,inplace=True) # calculate the trend and the significance + # !!! TO-DO: Change the trend_lower_bnd & trend_upper_bnd to be based on + # the confidence interval of the linear regression trend, trend_p = _olsTensor(integral, np.mean(time_bnds, axis = 1)) if integral_bnd is not None: trend_lower_bnd, trend_lower_bnd_p = _olsTensor(integral_bnd[0,...], @@ -828,7 +808,7 @@ def trendInTime(self,**keywords): trend_upper_bnd_p[np.newaxis, ...]], axis = 0) # handle units - unit = Unit(self.unit + "/year") + unit = Unit(self.unit + " year$^{-1}$") name = self.name + "_trend_over_time" return Variable(data = trend, data_bnds = trend_bnd, @@ -838,7 +818,7 @@ def trendInTime(self,**keywords): depth = self.depth, depth_bnds = self.depth_bnds, area = self.area, ndata = self.ndata), \ Variable(data = trend_p, data_bnds = trend_bnd_p, unit = None, - name = name.replace("trend", "trend_p"), + name = name.replace("trend", "trendp"), lat = self.lat, lat_bnds = self.lat_bnds, lon = self.lon, lon_bnds = self.lon_bnds, depth = self.depth, depth_bnds = self.depth_bnds, @@ -949,7 +929,7 @@ def trendAnnualCycle(self): ndata = self.ndata), Variable(data = trend_p, unit = self.unit + "/year", - name = "annual_cycle_trend_p_of_%s" % self.name, + name = "annual_cycle_trendp_of_%s" % self.name, time = mid_months, time_bnds = np.asarray([bnd_months[:-1],bnd_months[1:]]).T, lat = self.lat, @@ -1814,10 +1794,9 @@ def partialCorrelation(self, var_indep_list, ctype, region = None): Notes ----- - Need to better think about what correlation means when data - are masked. The sums ignore the data but then the number of - items *n* is not constant and should be reduced for masked - values. + Need to better think about what correlation means when data are masked. + The sums ignore the data but then the number of items *n* is not + constant and should be reduced for masked values. """ def _covarTensor(tensor3d): """ Covariance matrix calculation for each data poinat along an @@ -1928,7 +1907,7 @@ def _partialCorrTensor(x, y, covar_list): dof = n - k - 2 tval = r * np.sqrt(dof / (1 - r**2)) pval = 2 * t.sf(np.abs(tval), dof) - + # restore shape def _restore_shape(array, retain_ind, orig_shape): array_restore = np.ma.empty(len(retain_ind)) @@ -1974,17 +1953,39 @@ def _restore_shape(array, retain_ind, orig_shape): out_time_bnds = None if out_time is not None: out_time_bnds = self.time_bnds + # if monthly data, remove the seasonal cycle + if self.monthly and (self.time.size > 11): + begin = np.argmin(self.time[:11]%365) + end = begin+int(self.time[begin:].size/12.)*12 + shp = (-1,12) + self.data.shape[1:] + x_mean = np.ma.mean(self.data[begin:end,...].reshape(shp), + axis = 1, keepdims = True) + x = self.data[begin:end,...] - \ + np.broadcast_to(x_mean, shp).reshape((-1,) + self.data.shape[1:]) + y_list = [] + for i, y in enumerate(var_indep_list): + begin = np.argmin(y.time[:11]%365) + end = begin+int(y.time[begin:].size/12.)*12 + shp = (-1,12) + y.data.shape[1:] + y_mean = np.ma.mean(y.data[begin:end,...].reshape(shp), + axis = 1, keepdims = True) + y_temp = y.data[being:end,...] - \ + np.broadcast_to(y_mean, shp).reshape((-1,) + y.data.shape[1:]) + y_list.append(y_temp) + else: + x = self.data + y_list = [var_indep_list[i].data for i in range(len(var_indep_list))] + + # calculate the partial correlation result = {} - for i, y in enumerate(var_indep_list): - r, p = _partialCorrTensor(x, y, var_indep_list[:i] + var_indep_list[(i+1):]) + for i, y in enumerate(y_list): + r, p = _partialCorrTensor(x, y, y_list[:i] + y_list[(i+1):]) r = Variable(data=r,unit="1", - name="%s_partial_correlation_of_%s_and_%s" % \ - (ctype,self.name,y.name), + name="%s_partial_correlation_of_%s_and_%s" % (ctype,self.name,y.name), time=out_time,time_bnds=out_time_bnds,ndata=out_ndata, lat=out_lat,lon=out_lon,area=out_area) p = Variable(data=r,unit="1", - name="%s_partial_pvalue_of_%s_and_%s" % \ - (ctype,self.name,y.name), + name="%s_partial_pvalue_of_%s_and_%s" % (ctype,self.name,y.name), time=out_time,time_bnds=out_time_bnds,ndata=out_ndata, lat=out_lat,lon=out_lon,area=out_area) result[y.name] = {'r': r, 'p': p} diff --git a/src/ILAMB/constants.py b/src/ILAMB/constants.py index 7bd5cd93..4081eeb1 100644 --- a/src/ILAMB/constants.py +++ b/src/ILAMB/constants.py @@ -185,26 +185,129 @@ "sidelbl" :"MODEL MAX MONTH", "haslegend" :True } -space_opts["trend_bias"] = { "name" :"Temporally integrated period mean bias", - !!! "cmap" :"bias", - "sym" :True, - "ticks" :None, - "ticklabels":None, - "label" :"unit" , - "section" :"Temporally integrated period mean", - "pattern" :"MNAME_RNAME_bias.png", - "sidelbl" :"BIAS", - "haslegend" :True } -space_opts["trend_biasscore"] = { "name" :"Temporally integrated period mean bias", - !!! "cmap" :"bias", - "sym" :True, + +for pn, ffix in zip(["MeanState", "TrendState"], ["mean", "trend"]): + space_opts[pn] = {} + space_opts[pn]["timeint"] = { "name" :"Temporally integrated period " + ffix, + "cmap" :"choose", + "sym" :False, + "ticks" :None, + "ticklabels":None, + "label" :"unit", + "section" :"Temporally integrated period " + ffix, + "pattern" :"MNAME_RNAME_" + ffix + "_timeint.png", + "sidelbl" :"MODEL " + ffix.upper(), + "haslegend" :True } + + space_opts[pn]["timeintremap"] = { "name" :"Temporally integrated remapped period " + ffix, + "cmap" :"choose", + "sym" :False, + "ticks" :None, + "ticklabels":None, + "label" :"unit", + "section" :"Temporally integrated period " + ffix, + "pattern" :"MNAME_RNAME_" + ffix + "_timeintremap.png", + "sidelbl" :"MAPPED MODEL " + ffix.upper(), + "haslegend" :True } + + space_opts[pn]["bias"] = { "name" :"Temporally integrated period " + ffix + " bias", + "cmap" :"bias", + "sym" :True, + "ticks" :None, + "ticklabels":None, + "label" :"unit" , + "section" :"Temporally integrated period " + ffix, + "pattern" :"MNAME_RNAME_" + ffix + "_bias.png", + "sidelbl" :"BIAS IN " + ffix.upper(), + "haslegend" :True } + + space_opts[pn]["biasscore"] = { "name":"Temporally integrated period "+ffix+" bias score", + "cmap" :"score", + "sym" :False, + "ticks" :None, + "ticklabels":None, + "label" :"unit" , + "section" :"Temporally integrated period " + ffix, + "pattern" :"MNAME_RNAME_" + ffix + "_biasscore.png", + "sidelbl" :"BIAS SCORE IN " + ffix.upper(), + "haslegend" :True } + + space_opts[pn]["rmse"] = { "name":"Temporally integrated period " + ffix + " rmse", + "cmap" :"YlOrRd", + "sym" :False, + "ticks" :None, + "ticklabels":None, + "label" :"unit" , + "section" :"Temporally integrated period " + ffix, + "pattern" :"MNAME_RNAME_" + ffix + "_rmse.png", + "sidelbl" :"RMSE " + ffix.upper(), + "haslegend" :True } + + space_opts[pn]["rmsescore"] = { "name":"Temporally integrated period "+ffix+" rmse score", + "cmap" :"score", + "sym" :False, + "ticks" :None, + "ticklabels":None, + "label" :"unit" , + "section" :"Temporally integrated period " + ffix, + "pattern" :"MNAME_RNAME_" + ffix + "_rmsescore.png", + "sidelbl" :"RMSE SCORE " + ffix.upper(), + "haslegend" :True } + + space_opts[pn]["iav"] = { "name" :"Interannual variability", + "cmap" :"Reds", + "sym" :False, + "ticks" :None, + "ticklabels":None, + "label" :"unit" , + "section" :"Temporally integrated period mean", + "pattern" :"MNAME_RNAME_" + ffix + "_iav.png", + "sidelbl" :"MODEL INTERANNUAL VARIABILITY", + "haslegend" :True } + + space_opts[pn]["iavscore"] = { "name" :"Interannual variability score", + "cmap" :"score", + "sym" :False, "ticks" :None, "ticklabels":None, "label" :"unit" , "section" :"Temporally integrated period mean", - "pattern" :"MNAME_RNAME_bias.png", - "sidelbl" :"BIAS", + "pattern" :"MNAME_RNAME_" + ffix + "_iavscore.png", + "sidelbl" :"INTERANNUAL VARIABILITY SCORE", "haslegend" :True } + + space_opts[pn]["shift"] = { "name" :"Temporally integrated " + ffix + " phase shift", + "cmap" :"PRGn", + "sym" :True, + "ticks" :None, + "ticklabels":None, + "label" :"unit" , + "section" :"Temporally integrated period " + ffix, + "pattern" :"MNAME_RNAME_" + ffix + "_shift.png", + "sidelbl" :"DIFFERENCE IN MAX MONTH IN " + ffix.upper(), + "haslegend" :True } + + space_opts[pn]["shiftscore"] = { "name":"Temporally integrated "+ffix+" phase shift score", + "cmap" :"score", + "sym" :False, + "ticks" :None, + "ticklabels":None, + "label" :"unit" , + "section" :"Temporally integrated period " + ffix, + "pattern" :"MNAME_RNAME_" + ffix + "_shiftscore.png", + "sidelbl" :"SEASONAL CYCLE SCORE " + ffix.upper(), + "haslegend" :True } + + space_opts[pn]["phase"] = { "name":"Temporally integrated period " + ffix + " max month", + "cmap" :"jet", + "sym" :False, + "ticks" :mid_months, + "ticklabels":lbl_months, + "label" :"month", + "section" :"Temporally integrated period " + ffix, + "pattern" :"MNAME_RNAME_" + ffix + "_phase.png", + "sidelbl" :"MODEL MAX MONTH " + ffix.upper(), + "haslegend" :True } time_opts = {} @@ -244,38 +347,41 @@ "ticklabels" : lbl_months, "ylabel" : "unit"} -time_opts["trend"] = { "name" : "Spatially integrated regional mean", - !!! "section" : "Spatially integrated regional mean", - "haslegend" : False, - "pattern" : "MNAME_RNAME_spaceint.png", - "sidelbl" : "REGIONAL MEAN", - "ticks" : None, - "ticklabels" : None, - "ylabel" : "unit"} - -time_opts["trend_p"] = { "name" : "Accumulated mean", - !!! "section" : "Spatially integrated regional mean", - "haslegend" : False, - "pattern" : "MNAME_RNAME_accumulate.png", - "sidelbl" : "ACCUMULATION", - "ticks" : None, - "ticklabels" : None, - "ylabel" : "unit"} +for pn, ffix in zip(["MeanState", "TrendState"], ["mean", "trend"]): + space_opts[pn] = {} -time_opts["trend_cycle"] = { "name" : "Spatially integrated regional mean cycle", - !!! "section" : "Spatially integrated regional mean", - "haslegend" : False, - "pattern" : "MNAME_RNAME_cycle.png", - "sidelbl" : "ANNUAL CYCLE", - "ticks" : mid_months/365.+1850., - "ticklabels" : lbl_months, - "ylabel" : "unit"} + time_opts[pn]["spaceint"] = { "name" : "Spatially integrated regional " + ffix, + "section" : "Spatially integrated regional " + ffix, + "haslegend" : False, + "pattern" : "MNAME_RNAME_" + ffix + "_spaceint.png", + "sidelbl" : "REGIONAL " + ffix.upper(), + "ticks" : None, + "ticklabels" : None, + "ylabel" : "unit" } -time_opts["trend_dtcycle"] = { "name" : "Spatially integrated regional mean detrended cycle", - !!! "section" : "Spatially integrated regional mean", - "haslegend" : False, - "pattern" : "MNAME_RNAME_dtcycle.png", - "sidelbl" : "MONTHLY ANOMALY", - "ticks" : mid_months/365.+1850., - "ticklabels" : lbl_months, - "ylabel" : "unit"} + time_opts[pn]["accumulate"] = { "name" : "Accumulated " + ffix, + "section" : "Spatially integrated regional " + ffix, + "haslegend" : False, + "pattern" : "MNAME_RNAME_" + ffix + "_accumulate.png", + "sidelbl" : "ACCUMULATION", + "ticks" : None, + "ticklabels" : None, + "ylabel" : "unit" } + + time_opts[pn]["cycle"] = { "name" : "Spatially integrated regional " + ffix + " cycle", + "section" : "Spatially integrated regional " + ffix, + "haslegend" : False, + "pattern" : "MNAME_RNAME_" + ffix + "_cycle.png", + "sidelbl" : "ANNUAL CYCLE OF " + ffix.uper(), + "ticks" : mid_months/365.+1850., + "ticklabels" : lbl_months, + "ylabel" : "unit" } + + time_opts[pn]["dtcycle"] = { "name" : "Spatially integrated regional " + ffix + " detrended cycle", + "section" : "Spatially integrated regional " + ffix, + "haslegend" : False, + "pattern" : "MNAME_RNAME_" + ffix + "_dtcycle.png", + "sidelbl" : "MONTHLY ANOMALY OF " + ffix.upper(), + "ticks" : mid_months/365.+1850., + "ticklabels" : lbl_months, + "ylabel" : "unit"} diff --git a/src/ILAMB/ilamblib.py b/src/ILAMB/ilamblib.py index 7cb53d19..309d5b23 100644 --- a/src/ILAMB/ilamblib.py +++ b/src/ILAMB/ilamblib.py @@ -1411,7 +1411,7 @@ def AnalysisMeanStateSpace(ref,com,**keywords): com_not_ref = (REF_timeint.data.mask == True ) * (COM_timeint.data.mask == False) if benchmark_dataset is not None: - ref_timeint.name = "timeint_of_%s" % name + ref_timeint.name = "mean_timeint_of_%s" % name ref_timeint.toNetCDF4(benchmark_dataset,group="MeanState") for region in regions: @@ -1423,7 +1423,7 @@ def AnalysisMeanStateSpace(ref,com,**keywords): if dataset is not None: - com_timeint.name = "timeint_of_%s" % name + com_timeint.name = "mean_timeint_of_%s" % name com_timeint.toNetCDF4(dataset,group="MeanState") for region in regions: @@ -1485,24 +1485,24 @@ def AnalysisMeanStateSpace(ref,com,**keywords): if not skip_cycle: ref_cycle = REF.annualCycle() ref_maxt_map = ref_cycle.timeOfExtrema(etype="max") - ref_maxt_map.name = "phase_map_of_%s" % name + ref_maxt_map.name = "mean_phase_map_of_%s" % name com_cycle = COM.annualCycle() com_maxt_map = com_cycle.timeOfExtrema(etype="max") - com_maxt_map.name = "phase_map_of_%s" % name + com_maxt_map.name = "mean_phase_map_of_%s" % name shift_map = ref_maxt_map.phaseShift(com_maxt_map) - shift_map.name = "shift_map_of_%s" % name + shift_map.name = "mean_shift_map_of_%s" % name shift_score_map = ScoreSeasonalCycle(shift_map) - shift_score_map.name = "shiftscore_map_of_%s" % name + shift_score_map.name = "mean_shiftscore_map_of_%s" % name shift_map.data /= 30.; shift_map.unit = "months" if benchmark_dataset is not None: ref_maxt_map.toNetCDF4(benchmark_dataset,group="MeanState") for region in regions: ref_mean_cycle = ref_cycle.integrateInSpace(region=region,mean=True) - ref_mean_cycle.name = "cycle_of_%s_over_%s" % (name,region) + ref_mean_cycle.name = "mean_cycle_of_%s_over_%s" % (name,region) ref_mean_cycle.toNetCDF4(benchmark_dataset,group="MeanState") ref_dtcycle = deepcopy(ref_mean_cycle) ref_dtcycle.data -= ref_mean_cycle.data.mean() - ref_dtcycle.name = "dtcycle_of_%s_over_%s" % (name,region) + ref_dtcycle.name = "mean_dtcycle_of_%s_over_%s" % (name,region) ref_dtcycle.toNetCDF4(benchmark_dataset,group="MeanState") if dataset is not None: com_maxt_map.toNetCDF4(dataset,group="MeanState") @@ -1510,11 +1510,11 @@ def AnalysisMeanStateSpace(ref,com,**keywords): shift_score_map.toNetCDF4(dataset,group="MeanState") for region in regions: com_mean_cycle = com_cycle.integrateInSpace(region=region,mean=True) - com_mean_cycle.name = "cycle_of_%s_over_%s" % (name,region) + com_mean_cycle.name = "mean_cycle_of_%s_over_%s" % (name,region) com_mean_cycle.toNetCDF4(dataset,group="MeanState") com_dtcycle = deepcopy(com_mean_cycle) com_dtcycle.data -= com_mean_cycle.data.mean() - com_dtcycle.name = "dtcycle_of_%s_over_%s" % (name,region) + com_dtcycle.name = "mean_dtcycle_of_%s_over_%s" % (name,region) com_dtcycle.toNetCDF4(dataset,group="MeanState") shift = shift_map.integrateInSpace(region=region,mean=True,intabs=True) shift_score = shift_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) @@ -1544,12 +1544,12 @@ def AnalysisMeanStateSpace(ref,com,**keywords): area = area, ndata = ndata), REF_iav) if benchmark_dataset is not None: - REF_iav.name = "iav_map_of_%s" % name + REF_iav.name = "mean_iav_map_of_%s" % name REF_iav.toNetCDF4(benchmark_dataset,group="MeanState") if dataset is not None: - COM_iav.name = "iav_map_of_%s" % name + COM_iav.name = "mean_iav_map_of_%s" % name COM_iav.toNetCDF4(dataset,group="MeanState") - iav_score_map.name = "iavscore_map_of_%s" % name + iav_score_map.name = "mean_iavscore_map_of_%s" % name iav_score_map.toNetCDF4(dataset,group="MeanState") for region in regions: iav_score = iav_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) @@ -1560,17 +1560,19 @@ def AnalysisMeanStateSpace(ref,com,**keywords): # Bias: maps, scalars, and scores bias = REF_timeint.bias(COM_timeint).convert(plot_unit) cREF = Variable(name = "centralized %s" % name, unit = REF.unit, - data = np.ma.masked_array(REF.data-REF_timeint.data[np.newaxis,...],mask=REF.data.mask), + data = np.ma.masked_array(REF.data-REF_timeint.data[np.newaxis,...], + mask=REF.data.mask), time = REF.time, time_bnds = REF.time_bnds, ndata = REF.ndata, - lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, area = REF.area).convert(plot_unit) + lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, + area = REF.area).convert(plot_unit) REF_std = cREF.rms() if skip_rmse: del cREF bias_score_map = Score(bias,REF_std if REF.time.size > 1 else REF_timeint) bias_score_map.data.mask = (ref_and_com==False) # for some reason I need to explicitly force the mask if dataset is not None: - bias.name = "bias_map_of_%s" % name + bias.name = "mean_bias_map_of_%s" % name bias.toNetCDF4(dataset,group="MeanState") - bias_score_map.name = "biasscore_map_of_%s" % name + bias_score_map.name = "mean_biasscore_map_of_%s" % name bias_score_map.toNetCDF4(dataset,group="MeanState") for region in regions: bias_val = bias.integrateInSpace(region=region,mean=True).convert(plot_unit) @@ -1586,12 +1588,12 @@ def AnalysisMeanStateSpace(ref,com,**keywords): if benchmark_dataset is not None: for region in regions: ref_spaceint = REF.integrateInSpace(region=region,mean=True) - ref_spaceint.name = "spaceint_of_%s_over_%s" % (name,region) + ref_spaceint.name = "mean_spaceint_of_%s_over_%s" % (name,region) ref_spaceint.toNetCDF4(benchmark_dataset,group="MeanState") if dataset is not None: for region in regions: com_spaceint = COM.integrateInSpace(region=region,mean=True) - com_spaceint.name = "spaceint_of_%s_over_%s" % (name,region) + com_spaceint.name = "mean_spaceint_of_%s_over_%s" % (name,region) com_spaceint.toNetCDF4(dataset,group="MeanState") # RMSE: maps, scalars, and scores @@ -1621,9 +1623,9 @@ def AnalysisMeanStateSpace(ref,com,**keywords): del cREF,cCOM rmse_score_map = Score(crmse,REF_std) if dataset is not None: - rmse.name = "rmse_map_of_%s" % name + rmse.name = "mean_rmse_map_of_%s" % name rmse.toNetCDF4(dataset,group="MeanState") - rmse_score_map.name = "rmsescore_map_of_%s" % name + rmse_score_map.name = "mean_rmsescore_map_of_%s" % name rmse_score_map.toNetCDF4(dataset,group="MeanState") for region in regions: rmse_val = rmse.integrateInSpace(region=region,mean=True).convert(plot_unit) @@ -1648,9 +1650,9 @@ def AnalysisMeanStateSpace(ref,com,**keywords): crmse = ref_dtcycle.rmse(com_dtcycle).convert(plot_unit) rmse_score_map = Score(crmse,REF_std) if dataset is not None: - rmse.name = "rmse_map_of_%s" % name + rmse.name = "mean_rmse_map_of_%s" % name rmse.toNetCDF4(dataset,group="MeanState") - rmse_score_map.name = "rmsescore_map_of_%s" % name + rmse_score_map.name = "mean_rmsescore_map_of_%s" % name rmse_score_map.toNetCDF4(dataset,group="MeanState") for region in regions: rmse_val = rmse.integrateInSpace(region=region,mean=True).convert(plot_unit) @@ -1660,7 +1662,7 @@ def AnalysisMeanStateSpace(ref,com,**keywords): rmse_score.name = "RMSE Score %s %s" % (name, region) rmse_score.toNetCDF4(dataset,group="MeanState") del rmse,crmse,rmse_score_map - + return @@ -1711,14 +1713,18 @@ def AnalysisTrendStateSpace(ref,com,**keywords): plot_unit = keywords.get("plot_unit" ,None) mass_weighting = keywords.get("mass_weighting" ,False) skip_cycle = keywords.get("skip_cycle" ,False) + skip_rmse = keywords.get("skip_rmse" ,False) ref_trend = keywords.get("ref_trend" ,None) ref_trend_p = keywords.get("ref_trend_p" ,None) com_trend = keywords.get("com_trend" ,None) com_trend_p = keywords.get("com_trend_p" ,None) + rmse_score_basis = keywords.get("rmse_score_basis" ,"cycle") ILAMBregions = Regions() spatial = ref.spatial # Convert str types to booleans + if type(skip_rmse) == type(""): + skip_rmse = (skip_rmse.lower() == "true") if type(skip_cycle) == type(""): skip_cycle = (skip_cycle.lower() == "true") @@ -1737,6 +1743,9 @@ def AnalysisTrendStateSpace(ref,com,**keywords): unit = REF.unit area = REF.area ndata = REF.ndata + + REF_timeint = REF.integrateInTime(mean=True).convert(plot_unit) + normalizer = REF_timeint.data if mass_weighting else None # Find the trend values over the time period if ref_trend is None: @@ -1763,7 +1772,7 @@ def AnalysisTrendStateSpace(ref,com,**keywords): if benchmark_dataset is not None: for region in regions: ref_spaceint = REF.integrateInSpace(region=region,mean=True).convert(table_unit) - ref_spaceint.name = "spaceint_of_%s_over_%s" % (name,region) + ref_spaceint.name = "trend_spaceint_of_%s_over_%s" % (name,region) ref_union_spaceint = Variable(name = "REF_and_com", unit = REF.unit, data = np.ma.masked_array(REF.data,mask=(ref_and_com==False)), @@ -1779,7 +1788,7 @@ def AnalysisTrendStateSpace(ref,com,**keywords): if dataset is not None: for region in regions: com_spaceint = COM.integrateInSpace(region=region,mean=True).convert(table_unit) - com_spaceint.name = "spaceint_of_%s_over_%s" % (name,region) + com_spaceint.name = "trend_spaceint_of_%s_over_%s" % (name,region) com_union_spaceint = Variable(name = "ref_and_COM", unit = COM.unit, data = np.ma.masked_array(COM.data,mask=(ref_and_com==False)), @@ -1796,7 +1805,7 @@ def AnalysisTrendStateSpace(ref,com,**keywords): if benchmark_dataset is not None: ref_trend.name = "trend_of_%s" % name ref_trend.toNetCDF4(benchmark_dataset,group="TrendState") - ref_trend_p.name = "trend_p_of_%s" % name + ref_trend_p.name = "trendp_of_%s" % name ref_trend_p.toNetCDF4(benchmark_dataset,group="TrendState") for region in regions: @@ -1811,7 +1820,7 @@ def AnalysisTrendStateSpace(ref,com,**keywords): if dataset is not None: com_trend.name = "trend_of_%s" % name com_trend.toNetCDF4(dataset,group="TrendState") - com_trend_p.name = "trend_p_of_%s" % name + com_trend_p.name = "trendp_of_%s" % name com_trend_p.toNetCDF4(dataset,group="TrendState") for region in regions: @@ -1869,34 +1878,36 @@ def AnalysisTrendStateSpace(ref,com,**keywords): # Spatial Distribution: scalars and scores if dataset is not None: for region in regions: - space_std,space_cor,sd_score = REF_trend.spatialDistribution(COM_trend,region=region) - sd_score.name = "Trend Spatial Distribution Score %s %s" % (name, region) + space_std,space_cor,sd_score = REF_trend.spatialDistribution(COM_trend, + region=region) + sd_score.name = "Spatial Distribution Score %s %s" % (name, region) sd_score.toNetCDF4(dataset,group="TrendState", attributes={"std":space_std.data, "R" :space_cor.data}) # Cycle: maps, scalars, and scores if not skip_cycle: - ref_trend_cycle_map = REF.trendAnnualCycle() - ref_maxt_map = ref_trend_cycle_map.timeOfExtrema(etype="max") - ref_maxt_map.name = "trend_phase_map_of_%s" % name - com_trend_cycle_map = COM.trendAnnualCycle() - com_maxt_map = com_trend_cycle_map.timeOfExtrema(etype="max") - com_maxt_map.name = "trend_phase_map_of_%s" % name - shift_map = ref_maxt_map.phaseShift(com_maxt_map) - shift_map.name = "trend_shift_map_of_%s" % name - shift_score_map = ScoreSeasonalCycle(shift_map) - shift_score_map.name = "trend_shiftscore_map_of_%s" % name + ref_trend_cycle_map, _ = REF.trendAnnualCycle() + ref_maxt_map = ref_trend_cycle_map.timeOfExtrema(etype="max") + ref_maxt_map.name = "trend_phase_map_of_%s" % name + com_trend_cycle_map, _ = COM.trendAnnualCycle() + com_maxt_map = com_trend_cycle_map.timeOfExtrema(etype="max") + com_maxt_map.name = "trend_phase_map_of_%s" % name + shift_map = ref_maxt_map.phaseShift(com_maxt_map) + shift_map.name = "trend_shift_map_of_%s" % name + shift_score_map = ScoreSeasonalCycle(shift_map) + shift_score_map.name = "trend_shiftscore_map_of_%s" % name shift_map.data /= 30.; shift_map.unit = "months" + if benchmark_dataset is not None: ref_maxt_map.toNetCDF4(benchmark_dataset,group="TrendState") for region in regions: ref_trend_cycle = ref_spaceint.trendAnnualCyclce() ref_trend_cycle.name = "trend_cycle_of_%s_over_%s" % (name,region) ref_trend_cycle.toNetCDF4(benchmark_dataset,group="TrendState") - ref_dtcycle = deepcopy(ref_trend_cycle) - ref_dtcycle.data -= ref_trend_cycle.data.mean() - ref_dtcycle.name = "trend_dtcycle_of_%s_over_%s" % (name,region) + ref_dtcycle = deepcopy(ref_trend_cycle) + ref_dtcycle.data -= ref_trend_cycle.data.mean() + ref_dtcycle.name = "trend_dtcycle_of_%s_over_%s" % (name,region) ref_dtcycle.toNetCDF4(benchmark_dataset,group="TrendState") if dataset is not None: com_maxt_map .toNetCDF4(dataset,group="TrendState") @@ -1906,16 +1917,16 @@ def AnalysisTrendStateSpace(ref,com,**keywords): com_trend_cycle = com_spaceint.trendAnnualCycle() com_trend_cycle.name = "trend_cycle_of_%s_over_%s" % (name,region) com_trend_cycle.toNetCDF4(dataset,group="TrendState") - com_dtcycle = deepcopy(com_trend_cycle) - com_dtcycle.data -= com_trend_cycle.data.mean() - com_dtcycle.name = "trend_dtcycle_of_%s_over_%s" % (name,region) + com_dtcycle = deepcopy(com_trend_cycle) + com_dtcycle.data -= com_trend_cycle.data.mean() + com_dtcycle.name = "trend_dtcycle_of_%s_over_%s" % (name,region) com_dtcycle.toNetCDF4(dataset,group="TrendState") shift = shift_map.integrateInSpace(region=region,mean=True,intabs=True) shift_score = shift_score_map.integrateInSpace(region=region,mean=True, weight=normalizer) shift.name = "Trend Phase Shift %s %s" % (name, region) shift.toNetCDF4(dataset,group="TrendState") - shift_score.name = "Trend Seasonal Cycle Score %s %s" % (name, region) + shift_score.name = "Seasonal Cycle Score %s %s" % (name, region) shift_score.toNetCDF4(dataset,group="TrendState") del shift_map,shift_score_map @@ -1935,10 +1946,32 @@ def AnalysisTrendStateSpace(ref,com,**keywords): bias_val.name = "Trend Bias %s %s" % (name, region) bias_val.toNetCDF4(dataset,group="TrendState") bias_score = bias_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) - bias_score.name = "Trend Bias Score %s %s" % (name, region) + bias_score.name = "Bias Score %s %s" % (name, region) bias_score.toNetCDF4(dataset,group="TrendState") del bias,bias_score_map + if not skip_rmse: + if rmse_score_basis != 'cycle': + raise 'rmse_score_basis must be cycle to be able to calculate RMSE' + if skip_cycle: + raise 'skip_cycle must be True to calculate cycle-based RMSE' + + rmse_map = ref_trend_cycle_map.rmse(com_trend_cycle_map).convert(plot_unit) + rmse_score_map = Score(rmse_map, REF_trend) + if dataset is not None: + rmse_map.name = 'trend_rmse_map_of_%s' % name + rmse_map.toNetCDF4(dataset,group='TrendState') + rmse_score_map.name = 'trend_rmsescore_map_of_%s' % name + rmse_score_map.toNetCDF4(dataset,group='TrendState') + for region in regions: + rmse = rmse_map.integrateInSpace(region=region,mean=True).convert(plot_unit) + rmse.name = 'Trend RMSE %s %s' % (name, region) + rmse.toNetCDF(dataset,group='TrendState') + rmse_score = rmse_score_map.integrateInSpace(region=region,mean=True, + weight=normalizer) + rmse_score.name = 'RMSE Score %s %s' % (name, region) + rmse_score.toNetCDF4(dataset,group='TrendState') + del rmse_map, rmse_score_map return @@ -1979,9 +2012,10 @@ def AnalysisPartialCorrSpace(ref,com,ref_indep_list,com_indep_list,**keywords): mass_weighting = keywords.get("mass_weighting" ,False) ref_corr = keywords.get("ref_corr" ,None) com_corr = keywords.get("com_corr" ,None) + skip_rmse = keywords.get("skip_rmse" ,False) ILAMBregions = Regions() spatial = ref.spatial - name = ref.name + name = ref.name # Interpolate both reference and comparison to a grid composed of # their cell breaks @@ -1999,39 +2033,58 @@ def AnalysisPartialCorrSpace(ref,com,ref_indep_list,com_indep_list,**keywords): area = ref.area ndata = ref.ndata + REF_timeint = REF.integrateInTime(mean=True).convert(plot_unit) + normalizer = REF_timeint.data if mass_weighting else None + # Find the partial correlation values over the time period assert ref_corr is None assert com_corr is None - # Write to file - for region in regions: - ref_corr = ref.partialCorrelation(ref_indep_list, ctype = "temporal", region = region) - com_corr = com.partialCorrelation(com_indep_list, ctype = "temporal", region = region) - REF_corr = REF.partialCorrelation(REF_indep_list, ctype = "temporal", region = region) - COM_corr = COM.partialCorrelation(COM_indep_list, ctype = "temporal", region = region) - - for pp in ref_corr.keys(): - for ss in ['r', 'p']: - temp = ref_corr[pp][ss] - temp = 'Benchmark (original grids) ' + temp.name + ' ' + region - temp.toNetCDF4(benchmark_dataset, group = 'Sensitivities') - - for ss in ['r', 'p']: - temp = com_corr[pp][ss] - temp = 'Model (original grids) ' + temp.name + ' ' + region - temp.toNetCDF4(dataset, group = 'Sensitivities') - - # Calculate bias and write bias to file + ref_corr = ref.partialCorrelation(ref_indep_list, ctype = "temporal") + com_corr = com.partialCorrelation(com_indep_list, ctype = "temporal") + REF_corr = REF.partialCorrelation(REF_indep_list, ctype = "temporal") + COM_corr = COM.partialCorrelation(COM_indep_list, ctype = "temporal") + for pp in ref_corr.keys(): + for ss in ['r', 'p']: + temp = ref_corr[pp][ss] + temp = 'Benchmark (original grids) ' + temp.name + ' ' + region + temp.toNetCDF4(benchmark_dataset, group = 'Sensitivities') + + temp = com_corr[pp][ss] + temp = 'Model (original grids) ' + temp.name + ' ' + region + temp.toNetCDF4(dataset, group = 'Sensitivities') + + # Spatial Distribution: scalars and scores + if dataset is not None: + for region in regions: + space_std,space_cor,sd_score = \ + REF_corr[pp][ss].spatialDistribution(COM_corr[pp][ss], + region=region) + sd_score.name = "Sensitivity Spatial Distribution Score %s %s %s" % (name, pp, + region) + sd_score.toNetCDF4(dataset,group="Sensitivities", + attributes={"std":space_std.data, + "R" :space_cor.data}) + + # Bias: maps, scalars, and scores bias = REF_corr[pp][ss].bias(COM_corr[pp][ss]) # !!! TO-DO: Use the confidence interval of REF_corr instead of the REF_corr bias_score_map = Score(bias, REF_corr[pp][ss]) - bias_score_map.data.mask = ref_and_com == False - bias.name = 'sensitivity_bias_map_of_%s_and_%s' % (self.name, name) - bias.toNetCDF4(dataset, group = 'Sensitivities') - bias_score_map.name = 'sensitivity_biasscore_map_of_%s' % (self.name, name) - bias_score_map.toNetCDF4(dataset, group = 'Sensitivities') - - del bias, bias_score_map + bias_score_map.data.mask = (ref_and_com == False) # for some reason I need to explicitly force the mask + if dataset is not None: + bias.name = 'sensitivity_bias_map_of_%s_and_%s' % (name, pp) + bias.toNetCDF4(dataset, group = 'Sensitivities') + bias_score_map.name = 'sensitivity_biasscore_map_of_%s_and_%s' % (name, pp) + bias_score_map.toNetCDF4(dataset, group = 'Sensitivities') + for region in regions: + bias_val = bias.integrateInSpace(region=region,mean=True).convert(plot_unit) + bias_val.name = 'Sensitivity Bias %s %s %s' % (name, pp, region) + bias_val.toNetCDF4(dataset, group = 'Sensitivities') + bias_score = bias_score_map.integrateInSpace(region=region,mean=True, + weight=normalizer) + bias_score.name = 'Bias Score %s %s %s' % (name, pp, region) + bias_score.toNetCDF4(dataset, group = 'Sensitivities') + del bias,bias_score_map return From 97e0222d758000863e90b354041725b41967638a Mon Sep 17 00:00:00 2001 From: Yaoping Wang Date: Wed, 27 Oct 2021 18:36:54 -0400 Subject: [PATCH 12/18] sens & trend framework finished --- bin/ilamb-run | 13 +- src/ILAMB/ConfSoilMoisture.py | 310 +++++++++++++++++++++------------- src/ILAMB/Confrontation.py | 26 +-- src/ILAMB/ModelResult.py | 1 + src/ILAMB/Variable.py | 185 ++++++++++++++------ src/ILAMB/constants.py | 4 +- src/ILAMB/ilamblib.py | 276 ++++++++++++++++-------------- 7 files changed, 493 insertions(+), 322 deletions(-) diff --git a/bin/ilamb-run b/bin/ilamb-run index 7299c472..c3575de2 100644 --- a/bin/ilamb-run +++ b/bin/ilamb-run @@ -409,6 +409,7 @@ def BuildLocalWorkList(M,C,skip_cache=False): return localW + def WorkConfront(W,verbose=False,clean=False): """Performs the confrontation analysis @@ -457,6 +458,7 @@ def WorkConfront(W,verbose=False,clean=False): if verbose: print((" {0:>%d} {1:<%d} %s%s%s" % (maxCL,maxML,FAIL,ex.__class__.__name__,ENDC)).format(c.longname,m.name)) + def WorkPost(M,C,W,S,verbose=False,skip_plots=False): """Performs the post-processing @@ -479,9 +481,7 @@ def WorkPost(M,C,W,S,verbose=False,skip_plots=False): enable to skip plotting """ maxCL = 45; maxML = 20 - for c in C: - print(c.name, c.source, c.variable, c.output_path) # DEBUG - c.determinePlotLimits() + for c in C: c.determinePlotLimits() for i,w in enumerate(W): m,c = w try: @@ -493,6 +493,9 @@ def WorkPost(M,C,W,S,verbose=False,skip_plots=False): proc[rank] += dt if verbose: dt = datetime.timedelta(seconds=max(1,int(np.round(dt)))) + print((" {0:>%d} {1:<%d} %sCompleted%s {2:>8}" % (maxCL,maxML,OK,ENDC)).format(c.longname,m.name,str(dt))) + + print((" {0:>%d} {1:<%d} %sCompleted%s {2:>8}" % (maxCL,maxML,OK,ENDC)).format(c.longname,m.name,str(dt))) sys.stdout.flush() except Exception as ex: @@ -552,6 +555,7 @@ class MPIFileHandler(logging.FileHandler): def _open(self): stream = MPI.File.Open( self.comm, self.baseFilename, self.mode ) + stream.Set_atomicity(True) return stream @@ -690,8 +694,7 @@ S = Scoreboard(args.config[0], rmse_score_basis = args.rmse_score_basis) C = MatchRelationshipConfrontation(S.list()) - -C = MatchSensitivityConfrontation(C) # YW +C = MatchSensitivityConfrontation(C) if len(args.study_limits) == 2: args.study_limits[1] += 1 diff --git a/src/ILAMB/ConfSoilMoisture.py b/src/ILAMB/ConfSoilMoisture.py index a02f71e7..49a68704 100644 --- a/src/ILAMB/ConfSoilMoisture.py +++ b/src/ILAMB/ConfSoilMoisture.py @@ -189,9 +189,7 @@ def _addDepth(v): logger.info("[%s][%s] building depths %.1f to %.1f in loop %d" % (self.name,m.name,z0,zf,i)) # get reference variable - print('Loading obs ' + str(z0) + '-' + str(zf)) - tstart = time.time() # DEBUG - + tstart = time.time() if obs_dname is None: obs = Variable(filename = self.source, variable_name = self.variable, @@ -209,16 +207,13 @@ def _addDepth(v): zf = zf).trim(t = [mod_t0,mod_tf]) obs = obs.integrateInDepth(z0 = z0, zf = zf, mean = True) obs.name = "depthint%.2f-%.2f" % (z0, zf) - - tend = time.time() # DEBUG - print("Loading obs " + str(z0) + '-' + str(zf) + ' took ' + str((tend - tstart) / 60)) # DEBUG - print("obs ", obs.name, obs.unit, obs.time[0], obs.time[-1], - obs.lat[0], obs.lat[-1], obs.lon[0], obs.lon[-1]) # DEBUG + tend = time.time() + logger.info("[%s][%s] loading the reference depths %.1f to %.1f took %f minutes" % (self.name,m.name,z0, zf,(tend-tstart)/60)) + ##print("obs ", obs.name, obs.unit, obs.time[0], obs.time[-1], + ## obs.lat[0], obs.lat[-1], obs.lon[0], obs.lon[-1]) # DEBUG # get model variable - print('Loading model ' + str(z0) + '-' + str(zf)) tstart = time.time() # DEBUG - if mod_dname is None: mod = m.extractTimeSeries(self.variable, alt_vars = self.alternate_vars, @@ -236,11 +231,10 @@ def _addDepth(v): final_depth = zf).trim(t=[mod_t0,mod_tf]).convert(obs.unit) mod = mod.trim(d = [z0, zf]).integrateInDepth(z0 = z0, zf = zf, mean = True) mod.name = "depthint%.2f-%.2f" % (z0, zf) - tend = time.time() # DEBUG - print("Loading model " + str(z0) + '-' + str(zf) + ' took ' + str((tend - tstart) / 60)) # DEBUG - print("mod ", mod.name, mod.unit, mod.data, mod.time[0], mod.time[-1], - mod.lat[0], mod.lat[-1], mod.lon[0], mod.lon[-1]) # DEBUG + logger.info("[%s][%s] loading the model depths %.1f to %.1f took %f minutes" % (self.name,m.name,z0, zf,(tend-tstart)/60)) + ##print("mod ", mod.name, mod.unit, mod.time[0], mod.time[-1], + ## mod.lat[0], mod.lat[-1], mod.lon[0], mod.lon[-1]) # DEBUG assert obs.time.size == mod.time.size @@ -261,8 +255,6 @@ def _reduceRoundoffErrors(var): def _getOrder(var): return np.log10(np.abs(var.data).clip(1e-16)).mean() - order = _getOrder(obs) - count = 0 obs_list = [] mod_list = [] @@ -289,13 +281,15 @@ def _getOrder(var): extents = self.extents, logstring = "[%s][%s]" % (self.longname,m.name)) + order = _getOrder(obs) + count = 0 while order < -2 and count < 2: obs = _reduceRoundoffErrors(obs) order = _getOrder(obs) count += 1 - # convert the model data to the same unit - mod = mod.convert(obs.unit) + # convert the model data to the same unit + mod = mod.convert(obs.unit) obs_list.append(obs) mod_list.append(mod) return obs_list, mod_list @@ -324,11 +318,12 @@ def confront(self,m): # Read in some options to decide whether to run the trend state analysis skip_trend = self.keywords.get("skip_trend" ,False) + if type(skip_trend) == type(""): + skip_trend = (skip_trend.lower() == "true") # Get the depth-integrated observation and model data for each slab. for obs,mod,z0,zf in self.stageData(m): - print('Confronting data ' + obs.name + ' v.s. ' + mod.name + \ - '... %.2f-%.2f' % (z0, zf)) # DEBUG + logger.info("[%s][%s] confronting the depths %.1f to %.1f" % (self.name,m.name,z0, zf)) if obs.spatial: # Calculate mean state @@ -372,39 +367,43 @@ def confront(self,m): obs_comparable = deepcopy(obs) mod_comparable = deepcopy(mod) - obs_indep_list = [] - mod_indep_list = [] - for indep in self.sensitivities: - obs_indep, mod_indep = self.stageRef(indep) - obs_comparable, obs_indep = il.MakeComparable(obs_comparable, obs_indep, - mask_ref = True, clip_ref = True, - extents = self.extents, - logstring = "[%s][%s]MakeComparablePass1" % \ - (obs.variable_name, obs_indep.variable_name)) - mod_comparable, mod_indep = il.MakeComparable(mod_comparable, mod_indep, - mask_ref = True, clip_ref = True, - extents = self.extents, - logstring = "[%s][%s]MakeComparablePass1" % \ - (mod.variable_name, mod_indep.variable_name)) - obs_indep_list.append(obs_indep) - mod_indep_list.append(mod_indep) + obs_indep_list, mod_indep_list = self.stageRef(m) + + # Find the minimum overlapping time periods between obs_comparable and + # all the obs_indep, mod_comparable and all the mod_indep + obs_t0 = obs_comparable.time_bnds[0,0] + obs_tf = obs_comparable.time_bnds[-1,1] + mod_t0 = mod_comparable.time_bnds[0,0] + mod_tf = mod_comparable.time_bnds[-1,1] + for obs_indep, mod_indep in zip(obs_indep_list, mod_indep_list): + obs_indep = il.ClipTime(obs_indep, obs_t0, obs_tf) + obs_t0 = max(obs_t0, obs_indep.time_bnds[ 0,0]) + obs_tf = min(obs_tf, obs_indep.time_bnds[-1,1]) + + mod_indep = il.ClipTime(mod_indep, mod_t0, mod_tf) + mod_t0 = max(mod_t0, mod_indep.time_bnds[ 0,0]) + mod_tf = min(mod_tf, mod_indep.time_bnds[-1,1]) + # (second pass) - for k, obs_indep, mod_indep in zip(range(len(obs_indep_list)), - obs_indep_list, mod_indep_list): - obs_comparable, obs_indep = il.MakeComparable(obs_comparable, obs_indep, - mask_ref = True, clip_ref = True, - extents = self.extents, - logstring = "[%s][%s]MakeComparablePass2" % \ - (obs.variable_name, obs_indep.variable_name)) - mod_comparable, mod_indep = il.MakeComparable(mod_comparable, mod_indep, - mask_ref = True, clip_ref = True, - extents = self.extents, - logstring = "[%s][%s]MakeComparablePass2" % \ - (mod.variable_name, mod_indep_variable_name)) + obs_comparable = il.ClipTime(obs_comparable, obs_t0, obs_tf) + mod_comparable = il.ClipTime(mod_comparable, mod_t0, mod_tf) + + obs_indep_list_update = [] + mod_indep_list_update = [] + for obs_indep, mod_indep in zip(obs_indep_list, mod_indep_list): + obs_indep_list_update.append(il.ClipTime(obs_indep, obs_t0, obs_tf)) + mod_indep_list_update.append(il.ClipTime(mod_indep, mod_t0, mod_tf)) + obs_indep_list = [oil for oil in obs_indep_list_update] + mod_indep_list = [mil for mil in mod_indep_list_update] + obs_indep_list_update = [] + mod_indep_list_update = [] if obs.spatial: - il.AnalysisPartialCorrSpace(obs_comparable, mod_comparable, - obs_indep_list, mod_indep_list) + il.AnalysisPartialCorrSpace(obs_comparable, mod_comparable, + obs_indep_list, mod_indep_list, + benchmark_dataset = fcm.obs_dset, + dataset = fcm.mod_dset, + mass_weighting = mass_weighting) else: # !!! TO-DO: Add AnalysisPartialCorrSites pass @@ -434,17 +433,16 @@ def determinePlotLimits(self): prune = False for fname in glob.glob(os.path.join(self.output_path,"*.nc")): with Dataset(fname) as dataset: - for pn in ["MeanState", "TrendState"]: + for pn,ffix in zip(["MeanState", "TrendState"], ["mean", "trend"]): if pn not in dataset.groups: continue - limits[pn] = {} - group = dataset.groups[pn] variables = [v for v in group.variables.keys() \ if v not in group.dimensions.keys()] for vname in variables: + if (ffix + "_") != vname[:(len(ffix)+1)]: continue var = group.variables[vname] - pname = vname.split("_")[0] + pname = vname.split("_")[1] region = vname.split("_")[-1] if var[...].size <= 1: continue if pname in space_opts[pn]: @@ -539,7 +537,10 @@ def compositePlots(self): # get the HTML page for pn, ffix in zip(['MeanState', 'TrendState'], ['mean', 'trend']): - page = [page for page in self.layout.pages if pn in page.name][0] + try: + page = [page for page in self.layout.pages if pn in page.name][0] + except: + continue models = [] colors = [] @@ -588,7 +589,7 @@ def compositePlots(self): # composite annual cycle plot if has_cycle and len(models) > 0: - page.addFigure("Spatially integrated regional mean", + page.addFigure("Spatially integrated regional " + ffix, ffix + "_compcycle", "RNAME_" + ffix + "_compcycle.png", side = "ANNUAL CYCLE", @@ -613,19 +614,19 @@ def compositePlots(self): self.limits[pn]["cycle"][region]["min"]) var.plot(ax, lw=2, color=color, label=name, - ticks = time_opts[ffix]["cycle"]["ticks"], - ticklabels = time_opts[ffix]["cycle"]["ticklabels"], + ticks = time_opts[pn]["cycle"]["ticks"], + ticklabels = time_opts[pn]["cycle"]["ticklabels"], vmin = self.limits[pn]["cycle"][region]["min"]-dy, vmax = self.limits[pn]["cycle"][region]["max"]+dy) ylbl = post.UnitStringToMatplotlib(var.unit) ax.set_ylabel(ylbl) ax.set_title(zstr + ' '+ self.depths_units) fig.savefig(os.path.join(self.output_path, - "%s_" + ffix + "_compcycle.png" % (region))) + "%s_%s_compcycle.png" % (region, ffix))) plt.close() # plot legends with model colors (sorted with Benchmark data on top) - page.addFigure("Spatially integrated regional mean", + page.addFigure("Spatially integrated regional " + ffix, "legend_" + ffix + "_compcycle", "legend_" + ffix + "_compcycle.png", side = "MODEL COLORS", @@ -682,7 +683,7 @@ def _alphabeticalBenchmarkFirst(key): 1.0,fig,colors,True,220+dind+1) ax.set_title(zstr + ' ' + self.depths_units) fig.savefig(os.path.join(self.output_path, - "%s_" + ffix + "_spatial_variance.png" % (region))) + "%s_%s_spatial_variance.png" % (region, ffix))) plt.close() @@ -703,8 +704,11 @@ def modelPlots(self,m): # get the HTML page for pn, ffix in zip(['MeanState', 'TrendState'], ["mean", "trend"]): - page = [page for page in self.layout.pages if pn in page.name][0] - + try: + page = [page for page in self.layout.pages if pn in page.name][0] + except: + continue + with Dataset(fname) as dataset: group = dataset.groups[pn] variables = getVariableList(group) @@ -713,19 +717,19 @@ def modelPlots(self,m): # The other depths will be handled in plotting zstr_0 = '%.2f-%.2f' % (self.depths[0,0], self.depths[0,1]) if not zstr_0 in vname: continue + if (ffix + "_") != vname[:(len(ffix)+1)]: continue # is this a variable we need to plot? if group.variables[vname][...].size <= 1: continue var = Variable(filename=fname,groupname=pn,variable_name=vname) - pname = vname.split("_")[1] if (var.spatial or (var.ndata is not None)) and not var.temporal: # grab plotting options if pname not in self.limits[pn].keys(): continue - if pname not in space_opts[ffix]: continue - opts = space_opts[ffix][pname] + if pname not in space_opts[pn]: continue + opts = space_opts[pn][pname] ##print('... is used in space_opts') # DEBUG @@ -758,7 +762,7 @@ def modelPlots(self,m): # Jumping through hoops to get the benchmark plotted and in the html output if self.master and (pname == "timeint" or \ pname == "phase" or pname == "iav"): - opts = space_opts[ffix][pname] + opts = space_opts[pn][pname] # add to html layout page.addFigure(opts["section"], @@ -796,8 +800,8 @@ def modelPlots(self,m): continue # grab plotting options - if pname not in time_opts[ffix]: continue - opts = time_opts[ffix][pname] + if pname not in time_opts[pn]: continue + opts = time_opts[pn][pname] # add to html layout page.addFigure(opts["section"], @@ -1199,36 +1203,87 @@ def _plotFunction(ref_mean_list,ref_std_list,com_mean_list,com_std_list, data = score).toNetCDF4(results,group="Relationships") - def _sensitivities(self, m): + def _sensitivity(self, m): # If there are no sensitivities to analyze, get out of here if self.sensitivities is None: return - def _retrieveCorr(cname, filename): - # Grab by depth!!!!!!!!! + def _retrieveCorr(variable, filename, alternate_vars): with Dataset(filename,mode="r") as dset: - key = [v for v in dset.groups["Sensitivies"].variables.keys() \ - if "partial_correlation_" in v and cname in v] - key2 = [v for v in dset.groups["Sensitivies"].variables.keys() \ - if "partial_pvalue_" in v and cname in v] - return Variable(filename = filename, - groupname = "Sensitivities", - variable_name = key[0]), \ - Variable(filename = filename, - groupname = "Sensitivities", - variable_name = key[1]) + varlist = [variable] + alternate_vars + corr_list_orig = [] + corr_list_itrp = [] + pval_list_orig = [] + pval_list_itrp = [] + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind,1] + zstr = '%.2f-%.2f' % (z0, zf) - def _retrieveBias(cname, filename): + key = [v for v in dset.groups["Sensitivities"].variables.keys() \ + if "partial_correlation_" in v and zstr in v and \ + sum([vv in v for vv in varlist]) > 0 and \ + 'original grids' in v] + if len(key) > 0: + corr_list_orig.append(Variable(filename = filename, + groupname = "Sensitivities", + variable_name = key[0])) + key2 = [v for v in dset.groups["Sensitivities"].variables.keys() \ + if "partial_correlation_" in v and zstr in v and \ + sum([vv in v for vv in varlist]) > 0 and \ + 'common grids' in v] + if len(key2) > 0: + corr_list_itrp.append(Variable(filename = filename, + groupname = "Sensitivities", + variable_name = key2[0])) + key3 = [v for v in dset.groups["Sensitivities"].variables.keys() \ + if "partial_pvalue_" in v and zstr in v and \ + sum([vv in v for vv in varlist]) > 0 and \ + 'original grids' in v] + if len(key3) > 0: + pval_list_orig.append(Variable(filename = filename, + groupname = "Sensitivities", + variable_name = key3[0])) + key4 = [v for v in dset.groups["Sensitivities"].variables.keys() \ + if "partial_pvalue_" in v and zstr in v and \ + sum([vv in v for vv in varlist]) > 0 and \ + 'common grids' in v] + if len(key4) > 0: + pval_list_itrp.append(Variable(filename = filename, + groupname = "Sensitivities", + variable_name = key4[0])) + return corr_list_orig, corr_list_itrp, pval_list_orig, pval_list_itrp + + def _retrieveBias(variable, filename, alternate_vars): with Dataset(filename,mode="r") as dset: - key = [v for v in dset.groups["Sensitivies"].variables.keys() \ - if "sensitivity_bias_map_" in v and cname in v] - key2 = [v for v in dset.groups["Sensitivies"].variables.keys() \ - if "sensitivity_biasscore_map_" in v and cname in v] - return Variable(filename = filename, - groupname = "Sensitivities", - variable_name = key[0]), \ - Variable(filename = filename, - groupname = "Sensitivities", - variable_name = key[1]) + varlist = [variable] + alternate_vars + key_list = [] + key2_list = [] + + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind,1] + zstr = '%.2f-%.2f' % (z0, zf) + + key = [v for v in dset.groups["Sensitivities"].variables.keys() \ + if "sensitivity_bias_map_" in v and zstr in v and \ + sum([vv in v for vv in varlist]) > 0] + key2 = [v for v in dset.groups["Sensitivities"].variables.keys() \ + if "sensitivity_biasscore_map_" in v and zstr in v and \ + sum([vv in v for vv in varlist]) > 0] + if len(key) > 0: + key_list.append(key[0]) + if len(key2) > 0: + key2_list.append(key2[0]) + return [Variable(filename = filename, + groupname = "Sensitivities", + variable_name = kk) for kk in key_list], \ + [Variable(filename = filename, + groupname = "Sensitivities", + variable_name = kk) for kk in key2_list] + + def _scoreFunction(ref,com): + mask = ref.mask + com.mask + ref = np.ma.masked_array(ref.data,mask=mask).compressed() + com = np.ma.masked_array(com.data,mask=mask).compressed() + return np.exp(-np.linalg.norm(ref-com)/np.linalg.norm(ref)) # Get the HTML page page = [page for page in self.layout.pages if "Sensitivities" in page.name] @@ -1237,6 +1292,7 @@ def _retrieveBias(cname, filename): with Dataset(os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)), mode="r+") as results: + # Grab/create a sensitivity and scalars group group = None if "Sensitivities" not in results.groups: @@ -1249,7 +1305,28 @@ def _retrieveBias(cname, filename): scalars = group.groups["scalars"] # for each sensitivity relationship... - for c in self.sensitivities: + for c in self.sensitivities: + # Get the sensitivity map from the model and obs + try: + import pdb; pdb.set_trace() + + ref_corr_list, REF_corr_list, ref_corr_p_list, REF_corr_p_list = _retrieveCorr(c.variable, os.path.join(self.output_path, "%s_%s.nc" % (self.name,"Benchmark")), c.alternate_vars) + com_corr_list, COM_corr_list, com_corr_p_list, COM_corr_p_list = _retrieveCorr(c.variable, os.path.join(self.output_path, "%s_%s.nc" % (self.name,m.name)), c.alternate_vars) + com_bias_map_list, com_biasscore_map_list = _retrieveBias(c.variable, os.path.join(self.output_path, "%s_%s.nc" % (self.name,m.name)), c.alternate_vars) + + ref_name = self.longname.split('/')[0] + ref_min = np.min([ref_corr.data.min() for ref_corr in ref_corr_list]) + ref_max = np.max([ref_corr.data.max() for ref_corr in ref_corr_list]) + com_name = c.longname.split('/')[0] + com_min = np.min([com_corr.data.min() for com_corr in com_corr_list]) + com_max = np.max([com_corr.data.max() for com_corr in com_corr_list]) + diff_min = np.min([com_bias_map.data.min() \ + for com_bias_map in com_bias_map_list]) + diff_max = np.min([com_bias_map.data.max() \ + for com_bias_map in com_bias_map_list]) + except: + continue + # Add figures to the html page page.addFigure(c.longname, "benchmark_sens_%s" % com_name, @@ -1267,25 +1344,6 @@ def _retrieveBias(cname, filename): legend = False, benchmark = False) - # Get the sensitivity map from the model and obs - try: - ref_corr_list, ref_corr_p_list = _retrieveCorr(c.name, os.path.join(c.output_path, "%s_%s.nc" % (self.name,"Benchmark"))) - com_corr_list, com_corr_p_list = _retrieveCorr(c.name, os.path.join(c.output_path, "%s_%s.nc" % (self.name,m.name))) - com_bias_map_list, com_biasscore_map_list = _retrieveBias(c.name, os.path.join(c.output_path, "%s_%s.nc" % (self.name,m.name))) - - ref_name = self.longname.split('/')[0] - ref_min = np.min([ref_corr.data.min() for ref_corr in ref_corr_list]) - ref_max = np.max([ref_corr.data.max() for ref_corr in ref_corr_list]) - com_name = c.longname.split('/')[0] - com_min = np.min([com_corr.data.min() for com_corr in com_corr_list]) - com_max = np.max([com_corr.data.max() for com_corr in com_corr_list]) - diff_min = np.min([com_bias_map.data.min() \ - for com_bias_map in com_bias_map_list]) - diff_max = np.min([com_bias_map.data.max() \ - for com_bias_map in com_bias_map_list]) - except: - continue - r = Regions() for region in self.regions: nax = self.depths.shape[0] @@ -1300,7 +1358,7 @@ def _retrieveBias(cname, filename): zstr = '%.2f-%.2f' % (z0, zf) # Make the plots - ax1 = ref_corr_list[dind].plot(None, fig, nax, region = region, + ax1 = ref_corr_list[dind].plot(None, fig1, nax, region = region, vmin = ref_min, vmax = ref_max, cmap = 'RdBu') # ---- mask the p-value @@ -1311,10 +1369,10 @@ def _retrieveBias(cname, filename): ax1.pcolormesh(lon, lat, np.ma.masked_array(ref_temp.data > 0.05, ref_temp.data <= 0.05), - cmap = 'Grays', vmin = 0.5, vmax = 1.5, alpha = 0.5) + cmap = 'Greys', vmin = 0.5, vmax = 1.5, alpha = 0.5) ax1.set_title(zstr + ' ' + self.depths_units) - ax2 = com_corr_list[dind].plot(None, fig, nax, region = region, + ax2 = com_corr_list[dind].plot(None, fig2, nax, region = region, vmin = com_min, vmax = com_max, cmap = 'RdBu') # ---- mask the p-value @@ -1325,21 +1383,33 @@ def _retrieveBias(cname, filename): ax2.pcolormesh(lon, lat, np.ma.masked_array(com_temp.data > 0.05, com_temp.data <= 0.05), - cmap = 'Grays', vmin = 0.5, vmax = 1.5, + cmap = 'Greys', vmin = 0.5, vmax = 1.5, alpha = 0.5) ax2.set_title(zstr + ' ' + self.depths_units) - ax3 = com_bias_map_list[dind].plot(None, fig, nax, region = region, + ax3 = com_bias_map_list[dind].plot(None, fig3, nax, region = region, vmin = diff_min, vmax = diff_max, cmap = 'RdBu') ax3.set_title(zstr + ' ' + self.depths_units) # Score the functional response over the regions - score = _scoreFunction(ref_temp,com_temp) + score = _scoreFunction(REF_corr_list[dind].data, COM_corr_list[dind].data) score_list.append(score) del ref_temp, com_temp + + fig1.savefig(os.path.join(self.output_path, + "Benchmark_%s_sens_%s.png" % (region, com_name))) + fig2.savefig(os.path.join(self.output_path, + "%s_%s_sens_%s.png" % (m.name, region, com_name))) + fig3.savefig(os.path.join(self.output_path, + "%s_%s_sens_diff_%s.png" % (m.name, region, + com_name))) + plt.close(fig1) + plt.close(fig2) + plt.close(fig3) + score = np.sum(np.array(score_list)*(self.depths[:,1] - \ self.depths[:,0])) / \ (self.depths[-1,1] - self.depths[0,0]) @@ -1349,7 +1419,7 @@ def _retrieveBias(cname, filename): else: Variable(name = sname, unit = "1", - data = score).toNetCDF4(results,group="Sensitivites") + data = score).toNetCDF4(results,group="Sensitivities") # This is gone into ILAMB.Confrontation.Confrontation.computeOverallScore(m) diff --git a/src/ILAMB/Confrontation.py b/src/ILAMB/Confrontation.py index ca5b025b..1517099f 100644 --- a/src/ILAMB/Confrontation.py +++ b/src/ILAMB/Confrontation.py @@ -142,7 +142,6 @@ def __init__(self,**keywords): self.table_unit = keywords.get("table_unit",None) self.plot_unit = keywords.get("plot_unit",None) self.space_mean = keywords.get("space_mean",True) - # !!! Trend keywords self.relationships = keywords.get("relationships",None) self.sensitivities = keywords.get("sensitivities",None) # YW self.keywords = keywords @@ -168,10 +167,13 @@ def __init__(self,**keywords): "Spatially integrated regional mean"]) # Trend State page, YW - if self.___: - pages.insert(-2, post.HtmlPage('TrendState', 'Trend State')) - pages[-2].setHeader('CNAME / RNAME / MNAME') - pages[-2].setSections(["Temporally integrated period trend", + skip_trend = self.keywords.get("skip_trend" ,False) + if type(skip_trend) == type(""): + skip_trend = (skip_trend.lower() == "true") + if not skip_trend: + pages.append(post.HtmlPage('TrendState', 'Trend State')) + pages[-1].setHeader('CNAME / RNAME / MNAME') + pages[-1].setSections(["Temporally integrated period trend", "Spatially integrated regional trend"]) # Datasites page @@ -217,10 +219,10 @@ def __init__(self,**keywords): pages[-1].setSections(list(self.relationships)) # Sensitivities page, YW - if self.sensitivites is not None: - pages.insert(-2, post.HtmlPage('Sensitivities', 'Partial Correlation Relationships')) - pages[-2].setHeader('CNAME / RNAME / MNAME') - pages[-2].setSections(list(self.sensitivities)) + if self.sensitivities is not None: + pages.append(post.HtmlPage('Sensitivities', 'Partial Correlation Relationships')) + pages[-1].setHeader('CNAME / RNAME / MNAME') + pages[-1].setSections(list(self.sensitivities)) pages.append(post.HtmlAllModelsPage("AllModels","All Models")) pages[-1].setHeader("CNAME / RNAME / MNAME") @@ -434,7 +436,8 @@ def determinePlotLimits(self): variables = [v for v in group.variables.keys() if v not in group.dimensions.keys()] for vname in variables: var = group.variables[vname] - pname = vname.split("_")[0] + ## pname = vname.split("_")[0] + pname = vname.split("_")[1] # YW: to match the prefixed part in ilamblib.py region = vname.split("_")[-1] if var[...].size <= 1: continue if pname in space_opts: @@ -713,7 +716,8 @@ def modelPlots(self,m): for vname in variables: # is this a variable we need to plot? - pname = vname.split("_")[0] + ## pname = vname.split("_")[0] + pname = vname.split("_")[1] # YW: to deal with the prefixed name part in ilamblib if group.variables[vname][...].size <= 1: continue var = Variable(filename=fname,groupname="MeanState",variable_name=vname) diff --git a/src/ILAMB/ModelResult.py b/src/ILAMB/ModelResult.py index 9e8250be..03d871f5 100644 --- a/src/ILAMB/ModelResult.py +++ b/src/ILAMB/ModelResult.py @@ -264,6 +264,7 @@ def extractTimeSeries(self,variable,lats=None,lons=None,alt_vars=[],initial_time if v not in self.variables: continue for ifile,pathName in enumerate(self.variables[v]): if _skipFile(pathName,altvars,lats,lons,same_site_epsilon): continue + var = Variable(filename = pathName, variable_name = variable, alternate_vars = altvars[1:], diff --git a/src/ILAMB/Variable.py b/src/ILAMB/Variable.py index 871a1434..c2c3436b 100644 --- a/src/ILAMB/Variable.py +++ b/src/ILAMB/Variable.py @@ -5,13 +5,13 @@ import cartopy.feature as cfeature from pylab import get_cmap from cf_units import Unit +import cftime as cf from . import ilamblib as il from . import Post as post import numpy as np import matplotlib.pyplot as plt import warnings from scipy.stats import linregress -import dask.array as dsa def _shiftLon(lon): @@ -40,6 +40,17 @@ def _createBnds(x): x_bnds[-1,1] = x[-1] + 0.5*(x[-1]-x[-2]) return x_bnds + +def _normalize(ma_array): + temp = np.where(ma_array.mask, np.nan, ma_array.data) + n_mean = np.nanmean(temp, axis = 0, keepdims = True) + n_std = np.nanstd(temp, axis = 0, keepdims = True) + temp = (temp - n_mean) / np.where((n_std > 0.) | np.isnan(n_std), n_std, + np.nanmin(n_std[n_std > 0.]) * 1e-3) + temp = np.ma.masked_where(ma_array.mask, temp) + return temp, n_mean, n_std + + def _olsTensor(Y, x): """ Repeated calculation of linear regression in the spatial dimensions. @@ -92,6 +103,10 @@ def _olsTensor(Y, x): x = np.ma.array(x.data, mask = x.mask | Y.mask) Y = np.ma.array(Y, mask = x.mask) + # normalize + x, _, x_scale = _normalize(x) + Y, _, Y_scale = _normalize(Y) + # add constant term x = np.ma.concatenate([np.ma.array(np.ones(Y.shape), mask = Y.mask), x], axis = 1) @@ -109,14 +124,28 @@ def _olsTensor(Y, x): dof = np.sum(Y.mask == False, axis = 0) - 2 resid = yy - np.einsum('ijk,jlk->ilk', xx, beta) mse = np.sum(np.power(resid,2), axis=0) / dof - std = np.ma.sum(np.ma.power(x[:,[1],:] - \ - np.ma.mean(x[:,[1],:],axis=0,keepdims=True), 2), axis = 0) + # somehow, unable to apply np.ma.mean on x[:,[1],:] + temp = x[:,[1],:] + temp.data[temp.mask] = np.nan + temp = temp.data + std = np.nansum(np.power(temp - np.nanmean(temp, axis = 0, keepdims = True), 2), axis = 0) + + # somehow, using masked array here results in underflow error; had to use np.nan + np.seterr(divide='ignore', invalid='ignore') + beta = beta[1, :] # discard intercept tval = beta / np.sqrt(mse/std) + np.seterr(divide='raise', invalid='raise') pval = 2 * t.sf(np.abs(tval), dof) - # discard intercept & restore shape - beta = np.ma.array(beta[1,:], mask = np.sum(Y.mask==False, axis = 0)<3) - pval = np.ma.array(pval[1,:], mask = np.sum(Y.mask==False, axis = 0)<3) + # scale the beta + beta = beta * Y_scale / x_scale + + # mask the data + tval = np.ma.masked_invalid(tval) + pval = np.ma.array(pval, mask = tval.mask) + beta = np.ma.array(beta, mask = tval.mask) + + # restore shape if len(orig_shape) > 1: beta = beta.reshape(orig_shape[1:]) pval = pval.reshape(orig_shape[1:]) @@ -750,7 +779,8 @@ def trendInTime(self,**keywords): mask = False if self.data.ndim > 1 and self.data.mask.size > 1: ##mask = np.apply_along_axis(np.all,0,self.data.mask[ind]) - mask = np.all(self.data.mask[ind], 0) + mask = np.broadcast_to(np.all(self.data.mask[ind], 0, keepdims = True), + self.data.mask.shape) data = np.ma.masked_array(self.data[ind],mask=mask,copy=False) if self.data_bnds is not None: data_bnds = np.ma.concatenate([self.data_bnds[...,0][ind], @@ -768,10 +798,12 @@ def trendInTime(self,**keywords): begin = np.argmin(self.time[:11]%365) end = begin+int(self.time[begin:].size/12.)*12 shp = (-1,12) + data.shape[1:] - integral = data[begin:end,...].reshape(shp) + integral = np.mean(data[begin:end,...].reshape(shp), axis = 0) if self.data_bnds is not None: shp = (-1,12) + data_bnds.shape[1:] - integral_bnd = data_bnds[begin:end,...].reshape(shp) + integral_bnd = np.mean(data_bnds[begin:end,...].reshape(shp), axis = 0) + else: + integral_bnd = None else: integral = data integral_bnd = data_bnds @@ -783,11 +815,11 @@ def trendInTime(self,**keywords): unit = Unit(unit0.format().split()[-1]) if not isinstance(integral.mask, np.ndarray): if integral.mask == True: - integral=np.ma.masked_array(data=integral.data, - mask=np.ones(integral.shape,dtype='bool')) - else: - integral=np.ma.masked_array(data=integral.data, - mask=np.zeros(integral.shape,dtype='bool')) + integral=np.ma.masked_array(data=integral.data, + mask=np.ones(integral.shape,dtype='bool')) + else: + integral=np.ma.masked_array(data=integral.data, + mask=np.zeros(integral.shape,dtype='bool')) unit0.convert(integral,unit,inplace=True) if integral_bnd is not None: unit0.convert(integral_bnd,unit,inplace=True) @@ -795,20 +827,23 @@ def trendInTime(self,**keywords): # calculate the trend and the significance # !!! TO-DO: Change the trend_lower_bnd & trend_upper_bnd to be based on # the confidence interval of the linear regression - trend, trend_p = _olsTensor(integral, np.mean(time_bnds, axis = 1)) + trend, trend_p = _olsTensor(integral, np.arange(integral.shape[0])) if integral_bnd is not None: trend_lower_bnd, trend_lower_bnd_p = _olsTensor(integral_bnd[0,...], - np.mean(time_bnds, axis =1)) + np.arange(integral.shape[0])) trend_upper_bnd, trend_upper_bnd_p = _olsTensor(integral_bnd[1,...], - np.mean(time_bnds, axis =1)) + np.arange(integral.shape[0])) trend_bnd = np.ma.stack([trend_lower_bnd[np.newaxis, ...], trend_upper_bnd[np.newaxis, ...]], axis = 0) trend_bnd_p = np.ma.stack([trend_lower_bnd_p[np.newaxis, ...], trend_upper_bnd_p[np.newaxis, ...]], axis = 0) + else: + trend_bnd = None + trend_bnd_p = None # handle units - unit = Unit(self.unit + " year$^{-1}$") + unit = Unit(self.unit + " year-1") name = self.name + "_trend_over_time" return Variable(data = trend, data_bnds = trend_bnd, @@ -817,7 +852,7 @@ def trendInTime(self,**keywords): lon = self.lon, lon_bnds = self.lon_bnds, depth = self.depth, depth_bnds = self.depth_bnds, area = self.area, ndata = self.ndata), \ - Variable(data = trend_p, data_bnds = trend_bnd_p, unit = None, + Variable(data = trend_p, data_bnds = trend_bnd_p, unit = '1', name = name.replace("trend", "trendp"), lat = self.lat, lat_bnds = self.lat_bnds, lon = self.lon, lon_bnds = self.lon_bnds, @@ -926,7 +961,7 @@ def trendAnnualCycle(self): area = self.area, depth = self.depth, depth_bnds = self.depth_bnds, - ndata = self.ndata), + ndata = self.ndata), \ Variable(data = trend_p, unit = self.unit + "/year", name = "annual_cycle_trendp_of_%s" % self.name, @@ -1801,7 +1836,7 @@ def partialCorrelation(self, var_indep_list, ctype, region = None): def _covarTensor(tensor3d): """ Covariance matrix calculation for each data poinat along an extra dimension, e.g., in space""" - if tensor3d.dtype != np.ndarray: + if type(tensor3d) != type(np.array([[],[]])): raise TypeError('Input must be numpy array.') if len(tensor3d.shape) != 3: raise TypeError('Input must have 3 dimensions.') @@ -1848,10 +1883,7 @@ def _partialCorrTensor(x, y, covar_list): raise ValueError('x and covar_list must be the same shape') if x.shape[0] < 3: raise ValueError('At least three observations are needed') - - x0 = x.copy() - y0 = y.copy() - + orig_shape = x.shape if len(orig_shape) == 1: x = x.reshape(-1, 1, 1) # extra 2nd dimension for concat @@ -1866,52 +1898,80 @@ def _partialCorrTensor(x, y, covar_list): covar_relist = [] for vv in covar_list: covar_relist.append(vv.reshape(new_shape)) - covar_list = covar_relist - covar_relist = []; del covar_relist + # normalization does not affect calculated partial correlation + x, _, _ = _normalize(x) + y, _, _ = _normalize(y) + covar_list = [] + for vv in covar_relist: + vv, _, _ = _normalize(vv) + covar_list.append(vv) data = np.ma.concatenate([x,y] + covar_list, axis = 1) del x, y; covar_list = []; del covar_list - # remove invalid points + # remove invalid spatial points retain_ind = np.any(np.all(data.mask == False, axis = 1), axis = 0) if sum(retain_ind) == 0: raise ValueError('At least one valid spatial data point is needed') ## print(retain_ind) # DEBUG data = data[:, :, retain_ind] - + # TO-DO: Need to think of a way to deal with the different number # of data points in space. Right now it imposes the minimum # overlapping number of valid data points. - drop_replica = np.all(np.all(data.mask == False, axis = 2), axis = 1) - ## print(drop_replica) # DEBUG - if sum(drop_replica) < 3: - raise ValueError('At least three valid observations are needed') - data = data[drop_replica, :, :] - + + # decide whether to drop the spatial grids that contain invalid points in time, + # or the observational replicates that have invalid spatial grids. + retain_a = np.all(np.all(data.mask == False, axis = 1), axis = 0) + retain_b = np.all(np.all(data.mask == False, axis = 1), axis = 1) + if np.sum(retain_a) > np.sum(retain_b): + data = data[..., retain_a] + else: + if np.sum(retain_b) < 3: + raise ValueError('At least three valid observations are needed') + data = data[retain_b, ...] + retain_a = None + # calculate the partial correlation and significance (translated from pingouin) - V = _covar_tensor(data) + V = _covarTensor(data.data) ##print(data.shape) # DEBUG ##print(V.shape) # DEBUG Vi = np.linalg.pinv(V.transpose(2,0,1)).transpose(1,2,0) D = np.zeros(Vi.shape) for ii in np.arange(Vi.shape[0]): + np.seterr(divide='ignore', invalid='ignore') D[ii,ii,:] = np.sqrt( 1 / Vi[ii,ii,:] ) + np.seterr(divide='raise', invalid='raise') + set("raise") pcor = -1 * np.einsum('jik,ilk->jlk', np.einsum('jik,ilk->jlk',D,Vi), D) ## print(-1 * D[:,:,5] @ Vi[:,:,5] @ D[:,:,5] - pcor[:,:,5]) # check if correct r = pcor[0, 1, :] - + from scipy.stats import t n = data.shape[0] k = data.shape[1] - 2 dof = n - k - 2 + np.seterr(divide='ignore', invalid='ignore') tval = r * np.sqrt(dof / (1 - r**2)) + np.seterr(divide='raise', invalid='raise') pval = 2 * t.sf(np.abs(tval), dof) + r = np.ma.masked_where(np.isnan(r), r) + pval = np.ma.masked_where(np.isnan(pval), pval) + # restore shape - def _restore_shape(array, retain_ind, orig_shape): + def _restore_shape(array, retain_ind, orig_shape, retain_a = None): + if retain_a is not None: + array_temp = np.ma.empty(len(retain_a)) + array_temp.mask = retain_a == False + array_temp.mask[retain_a] = array.mask + array_temp.data[retain_a] = array + array = array_temp + del array_temp array_restore = np.ma.empty(len(retain_ind)) array_restore.mask = retain_ind == False + array_restore.mask[retain_ind] = array.mask array_restore.data[retain_ind] = array array_restore = array_restore.reshape(orig_shape[1:]) return array_restore @@ -1923,14 +1983,14 @@ def _restore_shape(array, retain_ind, orig_shape): if len(orig_shape) == 1: return r[0], pval[0] else: - r_restore = _restore_shape(r, retain_ind, orig_shape) - p_restore = _restore_shape(pval, retain_ind, orig_shape) + r_restore = _restore_shape(r, retain_ind, orig_shape, retain_a) + p_restore = _restore_shape(pval, retain_ind, orig_shape, retain_a) return r_restore, p_restore - # checks on data consistency assert region is None - assert self.data.shape == var.data.shape + for var in var_indep_list: + assert self.data.shape == var.data.shape assert ctype is "temporal" # determine arguments for functions @@ -1957,20 +2017,31 @@ def _restore_shape(array, retain_ind, orig_shape): if self.monthly and (self.time.size > 11): begin = np.argmin(self.time[:11]%365) end = begin+int(self.time[begin:].size/12.)*12 - shp = (-1,12) + self.data.shape[1:] - x_mean = np.ma.mean(self.data[begin:end,...].reshape(shp), - axis = 1, keepdims = True) + shp = ((end-begin)//12, 12) + self.data.shape[1:] + + # somehow I get floating point error doing this using np.mean or np.ma.mean + temp = self.data[begin:end,...].data.copy() + temp[self.data[begin:end,...].mask == True] = np.nan + x_mean = np.nanmean(temp, axis = 0, keepdims=True) + x_mean = np.ma.masked_where(np.isnan(x_mean), x_mean) + x = self.data[begin:end,...] - \ - np.broadcast_to(x_mean, shp).reshape((-1,) + self.data.shape[1:]) + np.broadcast_to(x_mean, shp).reshape((-1,)+self.data.shape[1:]) + y_list = [] for i, y in enumerate(var_indep_list): begin = np.argmin(y.time[:11]%365) end = begin+int(y.time[begin:].size/12.)*12 - shp = (-1,12) + y.data.shape[1:] - y_mean = np.ma.mean(y.data[begin:end,...].reshape(shp), - axis = 1, keepdims = True) - y_temp = y.data[being:end,...] - \ - np.broadcast_to(y_mean, shp).reshape((-1,) + y.data.shape[1:]) + shp = ((end-begin)//12, 12) + y.data.shape[1:] + + # somehow I get floating point error doing this using np.mean or np.ma.mean + temp = y.data[begin:end,...].data.copy() + temp[y.data[begin:end,...].mask == True] = np.nan + y_mean = np.nanmean(temp, axis = 0, keepdims=True) + y_mean = np.ma.masked_where(np.isnan(y_mean), y_mean) + + y_temp = y.data[begin:end,...] - \ + np.broadcast_to(y_mean, shp).reshape((-1,)+y.data.shape[1:]) y_list.append(y_temp) else: x = self.data @@ -1980,15 +2051,19 @@ def _restore_shape(array, retain_ind, orig_shape): result = {} for i, y in enumerate(y_list): r, p = _partialCorrTensor(x, y, y_list[:i] + y_list[(i+1):]) + r = np.where( np.abs(r) < 1e-8, 0., r ) # control for floating point precision + r = Variable(data=r,unit="1", - name="%s_partial_correlation_of_%s_and_%s" % (ctype,self.name,y.name), + name="%s_partial_correlation_of_%s_and_%s" % (ctype,self.name, + var_indep_list[i].name), time=out_time,time_bnds=out_time_bnds,ndata=out_ndata, lat=out_lat,lon=out_lon,area=out_area) - p = Variable(data=r,unit="1", - name="%s_partial_pvalue_of_%s_and_%s" % (ctype,self.name,y.name), + p = Variable(data=p,unit="1", + name="%s_partial_pvalue_of_%s_and_%s" % (ctype,self.name, + var_indep_list[i].name), time=out_time,time_bnds=out_time_bnds,ndata=out_ndata, lat=out_lat,lon=out_lon,area=out_area) - result[y.name] = {'r': r, 'p': p} + result[var_indep_list[i].name] = {'r': r, 'p': p} return result diff --git a/src/ILAMB/constants.py b/src/ILAMB/constants.py index 4081eeb1..5161d81c 100644 --- a/src/ILAMB/constants.py +++ b/src/ILAMB/constants.py @@ -348,7 +348,7 @@ "ylabel" : "unit"} for pn, ffix in zip(["MeanState", "TrendState"], ["mean", "trend"]): - space_opts[pn] = {} + time_opts[pn] = {} time_opts[pn]["spaceint"] = { "name" : "Spatially integrated regional " + ffix, "section" : "Spatially integrated regional " + ffix, @@ -372,7 +372,7 @@ "section" : "Spatially integrated regional " + ffix, "haslegend" : False, "pattern" : "MNAME_RNAME_" + ffix + "_cycle.png", - "sidelbl" : "ANNUAL CYCLE OF " + ffix.uper(), + "sidelbl" : "ANNUAL CYCLE OF " + ffix.upper(), "ticks" : mid_months/365.+1850., "ticklabels" : lbl_months, "ylabel" : "unit" } diff --git a/src/ILAMB/ilamblib.py b/src/ILAMB/ilamblib.py index 309d5b23..d2ad2ee7 100644 --- a/src/ILAMB/ilamblib.py +++ b/src/ILAMB/ilamblib.py @@ -840,10 +840,16 @@ def FromNetCDF4(filename,variable_name,alternate_vars=[],t0=None,tf=None,group=N depth_bnd = Unit(dunit).convert(depth_bnd,Unit("Pa"),inplace=True) depth_bnd = -np.log(depth_bnd/Pb)*R*Tb/M/g - print("il.FromNetCDF4 " + filename) # DEBUG - print(depth) # DEBUG - print(depth_bnd) # DEBUG - print(v.shape) # DEBUG + ## DEBUG + #print("il.FromNetCDF4, soil moisture, " + filename) + #if t is not None: + # print(t[0], t[-1]) + #else: + # print("None", "None") + #print(begin, end, calendar) + #print(depth) + #print(depth_bnd) + #print(v.shape) # YW if z0 is not None: @@ -868,9 +874,11 @@ def FromNetCDF4(filename,variable_name,alternate_vars=[],t0=None,tf=None,group=N if zf is not None: raise ValueError("Mismatched ending depth %f." % zf) - print(depth) # DEBUG - print(depth_bnd) # DEBUG - print(v.shape) + #print(z0, zf) + #print(depth) + #print(depth_bnd) + #print(v.shape) + #print("********") else: if (z0 is not None) or (zf is not None): raise ValueError("Vertical subscript is used but there is no layered dimension in %s." % filename) @@ -1746,129 +1754,171 @@ def AnalysisTrendStateSpace(ref,com,**keywords): REF_timeint = REF.integrateInTime(mean=True).convert(plot_unit) normalizer = REF_timeint.data if mass_weighting else None - + + # Prepare spatial mean + ref_and_com = (REF.data.mask == False) * (COM.data.mask == False) + ref_not_com = (REF.data.mask == False) * (COM.data.mask == True ) + com_not_ref = (REF.data.mask == True ) * (COM.data.mask == False) + # Find the trend values over the time period if ref_trend is None: ref_trend, ref_trend_p = ref.convert(plot_unit).trendInTime(mean=True) REF_trend, REF_trend_p = REF.convert(plot_unit).trendInTime(mean=True) else: - ref_trend.convert(plot_unit) + ref_trend.convert(plot_unit + "/year") REF_trend = ref_trend.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) REF_trend_p = ref_trend_p.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) + + # Report period trend values over all possible representations of land + if benchmark_dataset is not None: + ref_trend.name = "trend_of_%s" % name + ref_trend.toNetCDF4(benchmark_dataset,group="TrendState") + ref_trend_p.name = "trendp_of_%s" % name + ref_trend_p.toNetCDF4(benchmark_dataset,group="TrendState") + + # Similar as above, but for the comparison (model) data if com_trend is None: com_trend, com_trend_p = com.convert(plot_unit).trendInTime(mean=True) COM_trend, COM_trend_p = COM.convert(plot_unit).trendInTime(mean=True) else: - com_trend.convert(plot_unit) + com_trend.convert(plot_unit + "/year") COM_trend = com_trend.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) COM_trend_p = com_trend_p.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) - normalizer = REF_trend.data if mass_weighting else None - # Prepare spatial mean - ref_and_com = (REF_trend.data.mask == False) * (COM_trend.data.mask == False) - ref_not_com = (REF_trend.data.mask == False) * (COM_trend.data.mask == True ) - com_not_ref = (REF_trend.data.mask == True ) * (COM_trend.data.mask == False) - if ref.time.size > 1: + if dataset is not None: + com_trend.name = "trend_of_%s" % name + com_trend.toNetCDF4(dataset,group="TrendState") + com_trend_p.name = "trendp_of_%s" % name + com_trend_p.toNetCDF4(dataset,group="TrendState") + + # Cycle: maps, scalars, and scores + if not skip_cycle: + ref_trend_cycle_map, _ = REF.trendAnnualCycle() + ref_maxt_map = ref_trend_cycle_map.timeOfExtrema(etype="max") + ref_maxt_map.name = "trend_phase_map_of_%s" % name + com_trend_cycle_map, _ = COM.trendAnnualCycle() + com_maxt_map = com_trend_cycle_map.timeOfExtrema(etype="max") + com_maxt_map.name = "trend_phase_map_of_%s" % name + shift_map = ref_maxt_map.phaseShift(com_maxt_map) + shift_map.name = "trend_shift_map_of_%s" % name + shift_score_map = ScoreSeasonalCycle(shift_map) + shift_score_map.name = "trend_shiftscore_map_of_%s" % name + shift_map.data /= 30.; shift_map.unit = "months" + if benchmark_dataset is not None: + ref_maxt_map.toNetCDF4(benchmark_dataset,group="TrendState") for region in regions: ref_spaceint = REF.integrateInSpace(region=region,mean=True).convert(table_unit) ref_spaceint.name = "trend_spaceint_of_%s_over_%s" % (name,region) + ref_trend_cycle, _ = ref_spaceint.trendAnnualCycle() + ref_trend_cycle.name = "trend_cycle_of_%s_over_%s" % (name,region) + ref_trend_cycle.toNetCDF4(benchmark_dataset,group="TrendState") + + ref_dtcycle = deepcopy(ref_trend_cycle) + ref_dtcycle.data -= ref_trend_cycle.data.mean() + ref_dtcycle.name = "trend_dtcycle_of_%s_over_%s" % (name,region) + ref_dtcycle.toNetCDF4(benchmark_dataset,group="TrendState") + + # reference period trend on intersection of land ref_union_spaceint = Variable(name = "REF_and_com", unit = REF.unit, data = np.ma.masked_array(REF.data,mask=(ref_and_com==False)), + time = REF.time, time_bnds = REF.time_bnds, lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, area = REF.area).integrateInSpace(region=region, mean=space_mean).convert(table_unit) + ref_union_trend, ref_union_trend_p = \ + ref_union_spaceint.convert(plot_unit).trendAnnualCycle() + ref_union_trend.name = "Benchmark Period Trend (intersection) %s %s" % \ + (name, region) + ref_union_trend.toNetCDF4(dataset,group="TrendState") + ref_union_trend_p.name = "Benchmark Period Trend P (intersection) %s %s" % \ + (name, region) + ref_union_trend_p.toNetCDF4(dataset,group="TrendState") + + # reference period mean on complement of land ref_comp_spaceint = Variable(name = "REF_not_com", unit = REF.unit, data = np.ma.masked_array(REF.data,mask=(ref_not_com==False)), + time = REF.time, time_bnds = REF.time_bnds, lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, area = REF.area).integrateInSpace(region=region, mean=space_mean).convert(table_unit) + ref_comp_trend, ref_comp_trend_p = \ + ref_comp_spaceint.convert(plot_unit).trendAnnualCycle() + ref_comp_trend.name = "Benchmark Period Trend (complement) %s %s" % (name, region) + ref_comp_trend.toNetCDF4(dataset,group="TrendState") + ref_comp_trend_p.name = "Benchmark Period Trend P (complement) %s %s" % \ + (name, region) + ref_comp_trend_p.toNetCDF4(dataset,group="TrendState") + if dataset is not None: + com_maxt_map .toNetCDF4(dataset,group="TrendState") + shift_map .toNetCDF4(dataset,group="TrendState") + shift_score_map.toNetCDF4(dataset,group="TrendState") for region in regions: com_spaceint = COM.integrateInSpace(region=region,mean=True).convert(table_unit) com_spaceint.name = "trend_spaceint_of_%s_over_%s" % (name,region) + com_trend_cycle, _ = com_spaceint.trendAnnualCycle() + com_trend_cycle.name = "trend_cycle_of_%s_over_%s" % (name,region) + com_trend_cycle.toNetCDF4(dataset,group="TrendState") + + com_dtcycle = deepcopy(com_trend_cycle) + com_dtcycle.data -= com_trend_cycle.data.mean() + com_dtcycle.name = "trend_dtcycle_of_%s_over_%s" % (name,region) + com_dtcycle.toNetCDF4(dataset,group="TrendState") + + shift = shift_map.integrateInSpace(region=region,mean=True,intabs=True) + shift_score = shift_score_map.integrateInSpace(region=region,mean=True, + weight=normalizer) + shift.name = "Trend Phase Shift %s %s" % (name, region) + shift.toNetCDF4(dataset,group="TrendState") + shift_score.name = "Seasonal Cycle Score %s %s" % (name, region) + shift_score.toNetCDF4(dataset,group="TrendState") + + # comparison period mean on original grid com_union_spaceint = Variable(name = "ref_and_COM", unit = COM.unit, data = np.ma.masked_array(COM.data,mask=(ref_and_com==False)), + time = REF.time, time_bnds = REF.time_bnds, lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, area = COM.area).integrateInSpace(region=region, mean=space_mean).convert(table_unit) + com_period_trend, com_period_trend_p = \ + com_spaceint.convert(plot_unit).trendAnnualCycle() + com_period_trend.name = "Period Trend (original grids) %s %s" % (name, region) + com_period_trend.toNetCDF4(dataset,group="TrendState") + com_period_trend_p.name = "Period Trend P (original grids) %s %s" % (name, region) + com_period_trend_p.toNetCDF4(dataset,group="TrendState") + + # comparison period mean on intersection of land com_comp_spaceint = Variable(name = "COM_not_ref", unit = COM.unit, data = np.ma.masked_array(COM.data,mask=(com_not_ref==False)), + time = REF.time, time_bnds = REF.time_bnds, lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, area = COM.area).integrateInSpace(region=region, mean=space_mean).convert(table_unit) - - # Report period trend values over all possible representations of land - if benchmark_dataset is not None: - ref_trend.name = "trend_of_%s" % name - ref_trend.toNetCDF4(benchmark_dataset,group="TrendState") - ref_trend_p.name = "trendp_of_%s" % name - ref_trend_p.toNetCDF4(benchmark_dataset,group="TrendState") - - for region in regions: - # reference period trend of the average time series on original grid - ref_period_trend, ref_period_trend_p = \ - ref_spaceint.convert(plot_unit).trendInTime(mean=True) - ref_period_trend.name = "Period Trend (original grids) %s %s" % (name, region) - ref_period_trend.toNetCDF4(benchmark_dataset,group="TrendState") - ref_period_trend_p.name = "Period Trend P (original grids) %s %s" % (name, region) - ref_period_trend_p.toNetCDF4(benchmark_dataset,group="TrendState") - - if dataset is not None: - com_trend.name = "trend_of_%s" % name - com_trend.toNetCDF4(dataset,group="TrendState") - com_trend_p.name = "trendp_of_%s" % name - com_trend_p.toNetCDF4(dataset,group="TrendState") - - for region in regions: - # reference period trend on intersection of land - ref_union_trend, ref_union_trend_p = \ - ref_union_spaceint.convert(plot_unit).trendInTime(mean=True) - ref_union_trend.name = "Benchmark Period Trend (intersection) %s %s" % (name, region) - ref_union_trend.toNetCDF4(dataset,group="TrendState") - ref_union_trend_p.name = "Benchmark Period Trend P (intersection) %s %s" % \ - (name, region) - ref_union_trend_p.toNetCDF4(dataset,group="TrendState") - - # reference period mean on complement of land - ref_comp_trend, ref_comp_trend_p = \ - ref_comp_spaceint.convert(plot_unit).trendInTime(mean=True) - ref_comp_trend.name = "Benchmark Period Trend (complement) %s %s" % (name, region) - ref_comp_trend.toNetCDF4(dataset,group="TrendState") - ref_comp_trend_p.name = "Benchmark Period Trend P (complement) %s %s" % (name, region) - ref_comp_trend_p.toNetCDF4(dataset,group="TrendState") - - # comparison period mean on original grid - com_period_trend, com_period_trend_p = \ - com_spaceint.convert(plot_unit).trendInTime(mean=True) - com_period_trend.name = "Period Trend (original grids) %s %s" % (name, region) - com_period_trend.toNetCDF4(dataset,group="TrendState") - com_period_trend_p.name = "Period Trend P (original grids) %s %s" % (name, region) - com_period_trend_p.toNetCDF4(dataset,group="TrendState") - - # comparison period mean on intersection of land - com_union_trend, com_union_trend_p = \ - com_union_spaceint.convert(plot_unit).trendInTime(mean=True) - com_union_trend.name = "Model Period Trend (intersection) %s %s" % (name, region) - com_union_trend.toNetCDF4(dataset,group="TrendState") - com_union_trend_p.name = "Model Period Trend P (intersection) %s %s" % (name, region) - com_union_trend_p.toNetCDF4(dataset,group="TrendState") - - # comparison period mean on complement of land - com_comp_trend, com_comp_trend_p = \ - com_comp_spaceint.convert(plot_unit).trendInTime(mean=True) - com_comp_trend.name = "Model Period Trend (complement) %s %s" % (name, region) - com_comp_trend.toNetCDF4(dataset,group="TrendState") - com_comp_trend_p.name = "Model Period Trend P (complement) %s %s" % (name, region) - com_comp_trend_p.toNetCDF4(dataset,group="TrendState") + com_union_trend, com_union_trend_p = \ + com_union_spaceint.convert(plot_unit).trendAnnualCycle() + com_union_trend.name = "Model Period Trend (intersection) %s %s" % (name, region) + com_union_trend.toNetCDF4(dataset,group="TrendState") + com_union_trend_p.name = "Model Period Trend P (intersection) %s %s" % \ + (name, region) + com_union_trend_p.toNetCDF4(dataset,group="TrendState") + + # comparison period mean on complement of land + com_comp_trend, com_comp_trend_p = \ + com_comp_spaceint.convert(plot_unit).trendAnnualCycle() + com_comp_trend.name = "Model Period Trend (complement) %s %s" % (name, region) + com_comp_trend.toNetCDF4(dataset,group="TrendState") + com_comp_trend_p.name = "Model Period Trend P (complement) %s %s" % (name, region) + com_comp_trend_p.toNetCDF4(dataset,group="TrendState") # Now that we are done reporting on the intersection / complement, # set all masks to the intersection REF.data.mask += np.ones(REF.time.size,dtype=bool)[:,np.newaxis,np.newaxis] * (ref_and_com==False) COM.data.mask += np.ones(COM.time.size,dtype=bool)[:,np.newaxis,np.newaxis] * (ref_and_com==False) + ref_and_com = (REF_trend.data.mask == False) * (COM_trend.data.mask == False) REF_trend.data.mask = (ref_and_com==False) REF_trend_p.data.mask = (ref_and_com==False) COM_trend.data.mask = (ref_and_com==False) @@ -1885,52 +1935,6 @@ def AnalysisTrendStateSpace(ref,com,**keywords): attributes={"std":space_std.data, "R" :space_cor.data}) - # Cycle: maps, scalars, and scores - if not skip_cycle: - ref_trend_cycle_map, _ = REF.trendAnnualCycle() - ref_maxt_map = ref_trend_cycle_map.timeOfExtrema(etype="max") - ref_maxt_map.name = "trend_phase_map_of_%s" % name - com_trend_cycle_map, _ = COM.trendAnnualCycle() - com_maxt_map = com_trend_cycle_map.timeOfExtrema(etype="max") - com_maxt_map.name = "trend_phase_map_of_%s" % name - shift_map = ref_maxt_map.phaseShift(com_maxt_map) - shift_map.name = "trend_shift_map_of_%s" % name - shift_score_map = ScoreSeasonalCycle(shift_map) - shift_score_map.name = "trend_shiftscore_map_of_%s" % name - shift_map.data /= 30.; shift_map.unit = "months" - - if benchmark_dataset is not None: - ref_maxt_map.toNetCDF4(benchmark_dataset,group="TrendState") - for region in regions: - ref_trend_cycle = ref_spaceint.trendAnnualCyclce() - ref_trend_cycle.name = "trend_cycle_of_%s_over_%s" % (name,region) - ref_trend_cycle.toNetCDF4(benchmark_dataset,group="TrendState") - ref_dtcycle = deepcopy(ref_trend_cycle) - ref_dtcycle.data -= ref_trend_cycle.data.mean() - ref_dtcycle.name = "trend_dtcycle_of_%s_over_%s" % (name,region) - ref_dtcycle.toNetCDF4(benchmark_dataset,group="TrendState") - if dataset is not None: - com_maxt_map .toNetCDF4(dataset,group="TrendState") - shift_map .toNetCDF4(dataset,group="TrendState") - shift_score_map.toNetCDF4(dataset,group="TrendState") - for region in regions: - com_trend_cycle = com_spaceint.trendAnnualCycle() - com_trend_cycle.name = "trend_cycle_of_%s_over_%s" % (name,region) - com_trend_cycle.toNetCDF4(dataset,group="TrendState") - com_dtcycle = deepcopy(com_trend_cycle) - com_dtcycle.data -= com_trend_cycle.data.mean() - com_dtcycle.name = "trend_dtcycle_of_%s_over_%s" % (name,region) - com_dtcycle.toNetCDF4(dataset,group="TrendState") - shift = shift_map.integrateInSpace(region=region,mean=True,intabs=True) - shift_score = shift_score_map.integrateInSpace(region=region,mean=True, - weight=normalizer) - shift.name = "Trend Phase Shift %s %s" % (name, region) - shift.toNetCDF4(dataset,group="TrendState") - shift_score.name = "Seasonal Cycle Score %s %s" % (name, region) - shift_score.toNetCDF4(dataset,group="TrendState") - - del shift_map,shift_score_map - # Bias: maps, scalars, and scores bias = REF_trend.bias(COM_trend).convert(plot_unit) # !!! TO-DO: Use the confidence interval of REF_trend instead of the REF_trend @@ -1966,7 +1970,7 @@ def AnalysisTrendStateSpace(ref,com,**keywords): for region in regions: rmse = rmse_map.integrateInSpace(region=region,mean=True).convert(plot_unit) rmse.name = 'Trend RMSE %s %s' % (name, region) - rmse.toNetCDF(dataset,group='TrendState') + rmse.toNetCDF4(dataset,group='TrendState') rmse_score = rmse_score_map.integrateInSpace(region=region,mean=True, weight=normalizer) rmse_score.name = 'RMSE Score %s %s' % (name, region) @@ -2035,6 +2039,7 @@ def AnalysisPartialCorrSpace(ref,com,ref_indep_list,com_indep_list,**keywords): REF_timeint = REF.integrateInTime(mean=True).convert(plot_unit) normalizer = REF_timeint.data if mass_weighting else None + del REF_timeint # Find the partial correlation values over the time period assert ref_corr is None @@ -2046,14 +2051,27 @@ def AnalysisPartialCorrSpace(ref,com,ref_indep_list,com_indep_list,**keywords): COM_corr = COM.partialCorrelation(COM_indep_list, ctype = "temporal") for pp in ref_corr.keys(): for ss in ['r', 'p']: + print(pp, ss) # DEBUB + temp = ref_corr[pp][ss] - temp = 'Benchmark (original grids) ' + temp.name + ' ' + region + temp.name = 'Benchmark (original grids) ' + temp.name temp.toNetCDF4(benchmark_dataset, group = 'Sensitivities') temp = com_corr[pp][ss] - temp = 'Model (original grids) ' + temp.name + ' ' + region + temp.name = 'Model (original grids) ' + temp.name + temp.toNetCDF4(dataset, group = 'Sensitivities') + + temp = REF_corr[pp][ss] + temp.name = 'Benchmark (common grids) ' + temp.name + temp.toNetCDF4(benchmark_dataset, group = 'Sensitivities') + + temp = COM_corr[pp][ss] + temp.name = 'Model (common grids) ' + temp.name temp.toNetCDF4(dataset, group = 'Sensitivities') + if ss == 'p': + continue + # Spatial Distribution: scalars and scores if dataset is not None: for region in regions: From 85ff1e985897c766eadb1cfa6b873ad723e53f3d Mon Sep 17 00:00:00 2001 From: Yaoping Wang Date: Sun, 28 Nov 2021 00:25:52 -0500 Subject: [PATCH 13/18] trend sensitivity WIP --- src/ILAMB/ConfSoilMoisture.py | 65 +++++++++++++++++++++-------------- src/ILAMB/Confrontation.py | 6 ++-- src/ILAMB/Post.py | 29 +++++++++++----- src/ILAMB/Variable.py | 15 ++++---- src/ILAMB/constants.py | 2 +- src/ILAMB/ilamblib.py | 16 +++------ 6 files changed, 76 insertions(+), 57 deletions(-) diff --git a/src/ILAMB/ConfSoilMoisture.py b/src/ILAMB/ConfSoilMoisture.py index 49a68704..3c4d7cca 100644 --- a/src/ILAMB/ConfSoilMoisture.py +++ b/src/ILAMB/ConfSoilMoisture.py @@ -445,15 +445,12 @@ def determinePlotLimits(self): pname = vname.split("_")[1] region = vname.split("_")[-1] if var[...].size <= 1: continue - if pname in space_opts[pn]: - if pname not in limits[pn]: - limits[pn][pname] = {} - limits[pn][pname]["min"] = +1e20 - limits[pn][pname]["max"] = -1e20 - limits[pn][pname]["unit"] = post.UnitStringToMatplotlib(var.getncattr("units")) - limits[pn][pname]["min"] = min(limits[pn][pname]["min"],var.getncattr(min_str)) - limits[pn][pname]["max"] = max(limits[pn][pname]["max"],var.getncattr(max_str)) - elif pname in time_opts[pn]: + if pname in time_opts[pn]: + """If the plot is a time series, it has been averaged over regions + already and we need a separate dictionary for the + region as well. These can be based on the + percentiles from the attributes of the netCDF + variables.""" if pname not in limits[pn]: limits[pn][pname] = {} if region not in limits[pn][pname]: limits[pn][pname][region] = {} @@ -462,6 +459,21 @@ def determinePlotLimits(self): limits[pn][pname][region]["unit"] = post.UnitStringToMatplotlib(var.getncattr("units")) limits[pn][pname][region]["min"] = min(limits[pn][pname][region]["min"],var.getncattr("min")) limits[pn][pname][region]["max"] = max(limits[pn][pname][region]["max"],var.getncattr("max")) + else: + """If the plot is spatial, we want to set the limits as a percentile + of all data across models and the + benchmark. So here we load the data up and in + another pass will compute the percentiles.""" + if pname not in limits[pn]: + limits[pn][pname] = {} + limits[pn][pname]["min"] = +1e20 + limits[pn][pname]["max"] = -1e20 + limits[pn][pname]["unit"] = post.UnitStringToMatplotlib(var.getncattr("units")) + limits[pn][pname]["data"] = var[...].compressed() + else: + limits[pn][pname]["data"] = np.hstack([limits[pn][pname]["data"],var[...].compressed()]) + limits[pn][pname]["min"] = min(limits[pn][pname]["min"],var.getncattr(min_str)) + limits[pn][pname]["max"] = max(limits[pn][pname]["max"],var.getncattr(max_str)) if not prune and "Benchmark" in fname and pname == "timeint": prune = True self.pruneRegions(Variable(filename = fname, @@ -469,7 +481,7 @@ def determinePlotLimits(self): groupname = pn)) # Second pass to plot legends (FIX: only for master?) - for pn in ["MeanState", "TrendState"]: + for pn, ffix in zip(["MeanState", "TrendState"], ["mean", "trend"]): if not pn in limits.keys(): continue for pname in limits[pn].keys(): try: @@ -479,7 +491,8 @@ def determinePlotLimits(self): # Determine plot limits and colormap if opts["sym"]: - vabs = max(abs(limits[pn][pname]["min"]),abs(limits[pn][pname]["min"])) + vabs = max(abs(limits[pn][pname]["min"]), + abs(limits[pn][pname]["min"])) limits[pn][pname]["min"] = -vabs limits[pn][pname]["max"] = vabs @@ -503,9 +516,14 @@ def determinePlotLimits(self): ticks = opts["ticks"], ticklabels = opts["ticklabels"], label = label) - fig.savefig(os.path.join(self.output_path,"legend_%s.png" % (pname))) + fig.savefig(os.path.join(self.output_path,"legend_%s.png" % (ffix + "_" + pname))) plt.close() + # For those limits which we built up data across all models, compute the percentiles + for pname in limits.keys(): + if "data" in limits[pname]: + limits[pn][pname]["min"],limits[pn][pname]["max"] = np.percentile(limits[pn][pname]["data"],[1,99]) + # Determine min/max of relationship variables for fname in glob.glob(os.path.join(self.output_path,"*.nc")): with Dataset(fname) as dataset: @@ -703,7 +721,7 @@ def modelPlots(self,m): if not os.path.isfile(fname): return # get the HTML page - for pn, ffix in zip(['MeanState', 'TrendState'], ["mean", "trend"]): + for pn, ffix in zip(["MeanState", "TrendState"], ["mean", "trend"]): try: page = [page for page in self.layout.pages if pn in page.name][0] except: @@ -725,17 +743,14 @@ def modelPlots(self,m): pname = vname.split("_")[1] if (var.spatial or (var.ndata is not None)) and not var.temporal: - # grab plotting options if pname not in self.limits[pn].keys(): continue if pname not in space_opts[pn]: continue - opts = space_opts[pn][pname] - - ##print('... is used in space_opts') # DEBUG - + opts = space_opts[pn][pname] + # add to html layout page.addFigure(opts["section"], - pname, + ffix + "_" + pname, opts["pattern"], side = opts["sidelbl"], legend = opts["haslegend"]) @@ -758,7 +773,7 @@ def modelPlots(self,m): "%s_%s_%s_%s.png" % (m.name,region,ffix, pname))) plt.close() - + # Jumping through hoops to get the benchmark plotted and in the html output if self.master and (pname == "timeint" or \ pname == "phase" or pname == "iav"): @@ -766,7 +781,7 @@ def modelPlots(self,m): # add to html layout page.addFigure(opts["section"], - "benchmark_%s" % pname, + "benchmark_%s" % (ffix + "_" + pname), opts["pattern"].replace("MNAME","Benchmark"), side = opts["sidelbl"].replace("MODEL","BENCHMARK"), legend = True) @@ -790,7 +805,7 @@ def modelPlots(self,m): ffix, pname))) plt.close() - + if not (var.spatial or (var.ndata is not None)) and var.temporal: # grab the benchmark dataset to plot along with try: @@ -805,11 +820,11 @@ def modelPlots(self,m): # add to html layout page.addFigure(opts["section"], - pname, + ffix + "_" + pname, opts["pattern"], side = opts["sidelbl"], legend = opts["haslegend"]) - + # plot variable for region in self.regions: if region not in vname: continue @@ -1308,8 +1323,6 @@ def _scoreFunction(ref,com): for c in self.sensitivities: # Get the sensitivity map from the model and obs try: - import pdb; pdb.set_trace() - ref_corr_list, REF_corr_list, ref_corr_p_list, REF_corr_p_list = _retrieveCorr(c.variable, os.path.join(self.output_path, "%s_%s.nc" % (self.name,"Benchmark")), c.alternate_vars) com_corr_list, COM_corr_list, com_corr_p_list, COM_corr_p_list = _retrieveCorr(c.variable, os.path.join(self.output_path, "%s_%s.nc" % (self.name,m.name)), c.alternate_vars) com_bias_map_list, com_biasscore_map_list = _retrieveBias(c.variable, os.path.join(self.output_path, "%s_%s.nc" % (self.name,m.name)), c.alternate_vars) diff --git a/src/ILAMB/Confrontation.py b/src/ILAMB/Confrontation.py index 1517099f..6cb60e4f 100644 --- a/src/ILAMB/Confrontation.py +++ b/src/ILAMB/Confrontation.py @@ -161,7 +161,7 @@ def __init__(self,**keywords): pages = [] # Mean State page - pages.append(post.HtmlPage("MeanState","Mean State")) + pages.append(post.HtmlPage("MeanState", "Mean State")) pages[-1].setHeader("CNAME / RNAME / MNAME") pages[-1].setSections(["Temporally integrated period mean", "Spatially integrated regional mean"]) @@ -171,8 +171,8 @@ def __init__(self,**keywords): if type(skip_trend) == type(""): skip_trend = (skip_trend.lower() == "true") if not skip_trend: - pages.append(post.HtmlPage('TrendState', 'Trend State')) - pages[-1].setHeader('CNAME / RNAME / MNAME') + pages.append(post.HtmlPage("TrendState", "Trend State")) + pages[-1].setHeader("CNAME / RNAME / MNAME") pages[-1].setSections(["Temporally integrated period trend", "Spatially integrated regional trend"]) diff --git a/src/ILAMB/Post.py b/src/ILAMB/Post.py index e106fd35..dd2dde24 100644 --- a/src/ILAMB/Post.py +++ b/src/ILAMB/Post.py @@ -615,15 +615,16 @@ def _populatePlots(self): for section in page.sections: if len(page.figures[section]) == 0: continue for figure in page.figures[section]: - if (figure.name in ["spatial_variance","compcycle","profile", - "legend_spatial_variance","legend_compcycle"]): continue # ignores + if sum([n in figure.name for n in \ + ["spatial_variance","compcycle","profile", + "legend_spatial_variance","legend_compcycle"]]): continue # ignores if "benchmark" in figure.name: if figure.name not in bench: bench.append(figure.name) continue if figure not in self.plots: self.plots.append(figure) if not figure.legend: self.nolegend.append(figure.name) self.nobench = [plot.name for plot in self.plots if "benchmark_%s" % (plot.name) not in bench] - + def __str__(self): if self.plots is None: self._populatePlots() @@ -669,16 +670,28 @@ def __str__(self):