diff --git a/bin/ilamb-run b/bin/ilamb-run index a1e753a..8869e13 100644 --- a/bin/ilamb-run +++ b/bin/ilamb-run @@ -74,10 +74,11 @@ def InitializeModels(model_root,models=[],verbose=False,filter="",regex="",model if log: logger.debug("[%s]" % mname,format_exc()) continue M.append(m) + max_model_name_len = max(max_model_name_len,len(mname)) break M = sorted(M,key=lambda m: m.name.upper()) - + # assign unique colors clrs = il.GenerateDistinctColors(len(M)) for m in M: @@ -248,6 +249,68 @@ def MatchRelationshipConfrontation(C): found = True return C +def MatchSensitivityConfrontation(C): + # YW + """Match sensitivity strings to confrontation longnames + + We allow for sensitivity maps to be studied by specifying the + sensitivity longname in the configure file. This routine loops + over all defined sensitivity maps and finds the matching + confrontation. + + Parameters + ---------- + C : list of ILAMB.Confrontation.Confrontation + the confrontation list + + Returns + ------- + C : list of ILAMB.Confrontation.Confrontation + the same list with sensitivity maps linked to confrontations. + """ + for c in C: + if not hasattr(c, "sensitivities"): continue + if c.sensitivities is None: continue + for i,longname in enumerate(c.sensitivities): + found = False + for cor in C: + if longname.lower() == cor.longname.lower(): + c.sensitivities[i] = cor + found = True + return C + +""" +# YW: Appears to duplicate MatchRelationshipConfrontation +def MatchConfrontation(C): + """ """Match relationship strings to confrontation longnames + + We allow for relationships to be studied by specifying the + confrontation longname in the configure file. This routine loops + over all defined relationships and finds the matching + confrontation. (NOTE: this really belongs inside the Scoreboard + object) + + Parameters + ---------- + C : list of ILAMB.Confrontation.Confrontation + the confrontation list + + Returns + ------- + C : list of ILAMB.Confrontation.Confrontation + the same list with relationships linked to confrontations + """ """ + for c in C: + if c.relationships is None: continue + for i,longname in enumerate(c.relationships): + found = False + for cor in C: + if longname.lower() == cor.longname.lower(): + c.relationships[i] = cor + found = True + return C +""" + def FilterConfrontationList(C,match_list): """Filter the confrontation list @@ -303,8 +366,8 @@ def BuildLocalWorkList(M,C,skip_cache=False): for c in C: for m in M: if skip_cache: - - # if we want to skip we have to check that it is complete + + # if we want to skip we have to check that it is complete fname = os.path.join(c.output_path,"%s_%s.nc" % (c.name,m.name)) complete = False if os.path.isfile(fname): @@ -346,6 +409,7 @@ def BuildLocalWorkList(M,C,skip_cache=False): return localW + def WorkConfront(W,verbose=False,clean=False): """Performs the confrontation analysis @@ -378,7 +442,7 @@ def WorkConfront(W,verbose=False,clean=False): # try to run the confrontation try: - t0 = time.time() + t0 = time.time() c.confront(m) dt = time.time()-t0 proc[rank] += dt @@ -393,7 +457,8 @@ def WorkConfront(W,verbose=False,clean=False): logger.debug("[%s][%s]\n%s" % (c.longname,m.name,format_exc())) if verbose: print((" {0:>%d} {1:<%d} %s%s%s" % (maxCL,maxML,FAIL,ex.__class__.__name__,ENDC)).format(c.longname,m.name)) - + + def WorkPost(M,C,W,S,verbose=False,skip_plots=False): """Performs the post-processing @@ -428,6 +493,9 @@ def WorkPost(M,C,W,S,verbose=False,skip_plots=False): proc[rank] += dt if verbose: dt = datetime.timedelta(seconds=max(1,int(np.round(dt)))) + print((" {0:>%d} {1:<%d} %sCompleted%s {2:>8}" % (maxCL,maxML,OK,ENDC)).format(c.longname,m.name,str(dt))) + + print((" {0:>%d} {1:<%d} %sCompleted%s {2:>8}" % (maxCL,maxML,OK,ENDC)).format(c.longname,m.name,str(dt))) sys.stdout.flush() except Exception as ex: @@ -438,7 +506,7 @@ def WorkPost(M,C,W,S,verbose=False,skip_plots=False): sys.stdout.flush() sys.stdout.flush(); comm.Barrier() - + for i,c in enumerate(C): try: c.compositePlots() @@ -487,6 +555,7 @@ class MPIFileHandler(logging.FileHandler): def _open(self): stream = MPI.File.Open( self.comm, self.baseFilename, self.mode ) + stream.Set_atomicity(True) return stream @@ -611,6 +680,7 @@ if args.model_setup is None: models_path=args.build_dir[0]) else: M = ParseModelSetup(args.model_setup[0],args.models,not args.quiet,filter=args.filter[0],models_path=args.build_dir[0]) + if rank == 0 and not args.quiet: print("\nParsing config file %s...\n" % args.config[0]) S = Scoreboard(args.config[0], regions = args.regions, @@ -622,7 +692,10 @@ S = Scoreboard(args.config[0], mem_per_pair = args.mem_per_pair, run_title = args.run_title, rmse_score_basis = args.rmse_score_basis) + C = MatchRelationshipConfrontation(S.list()) +C = MatchSensitivityConfrontation(C) + if len(args.study_limits) == 2: args.study_limits[1] += 1 for c in C: c.study_limits = (np.asarray(args.study_limits)-1850)*365. @@ -662,6 +735,7 @@ if rank==0 and not args.quiet: print("\nRunning model-confrontation pairs...\n") sys.stdout.flush(); comm.Barrier() W = BuildLocalWorkList(M,C,skip_cache=True) + WorkConfront(W,not args.quiet,args.clean) sys.stdout.flush(); comm.Barrier() diff --git a/src/ILAMB/ConfAlbedo.py b/src/ILAMB/ConfAlbedo.py index d509d97..66186ea 100644 --- a/src/ILAMB/ConfAlbedo.py +++ b/src/ILAMB/ConfAlbedo.py @@ -32,7 +32,7 @@ class ConfAlbedo(Confrontation): def __init__(self,**keywords): super(ConfAlbedo,self).__init__(**keywords) self.derived = "rsus / rsds" - + def stageData(self,m): energy_threshold = float(self.keywords.get("energy_threshold",10)) diff --git a/src/ILAMB/ConfIOMB.py b/src/ILAMB/ConfIOMB.py index e21e7ef..a929180 100644 --- a/src/ILAMB/ConfIOMB.py +++ b/src/ILAMB/ConfIOMB.py @@ -166,7 +166,12 @@ def stageData(self,m): # part of the expression to look at the time info = "" possible = [self.variable,] + self.alternate_vars - if self.derived is not None: possible += [str(s) for s in sympify(self.derived).free_symbols] + if self.derived is not None: + if isinstance(self.derived, list): + for dr in self.derived: + possible += [str(s) for s in sympify(dr).free_symbols] + else: + possible += [str(s) for s in sympify(self.derived).free_symbols] vname = [v for v in possible if v in m.variables.keys()] if len(vname) == 0: logger.debug("[%s] Could not find [%s] in the model results" % (self.name,",".join(possible))) diff --git a/src/ILAMB/ConfNBP.py b/src/ILAMB/ConfNBP.py index 05a4dd2..b935420 100644 --- a/src/ILAMB/ConfNBP.py +++ b/src/ILAMB/ConfNBP.py @@ -8,7 +8,10 @@ import numpy as np import os,glob -def SpaceLabels(y,ymin,maxit=1000): +def SpaceLabels(y,ymin,maxit=1000): + if len(y) == 1: + return y + for j in range(maxit): dy = np.diff(y) for i in range(1,y.size-1): @@ -267,13 +270,14 @@ def NBPplot(V,vmin,vmax,colors,fname): Y = np.asarray(Y); L = np.asarray(L) ind = np.argsort(Y) Y = Y[ind]; L = L[ind] - + fig = plt.figure(figsize=(11.8,5.8)) ax = fig.add_subplot(1,1,1,position=[0.06,0.06,0.8,0.92]) data_range = vmax-vmin fig_height = fig.get_figheight() font_size = 10 dy = 0.05*data_range + y = SpaceLabels(Y.copy(),data_range/fig_height*font_size/50.) v = V["Benchmark"] for i in range(L.size): diff --git a/src/ILAMB/ConfSoilMoisture.py b/src/ILAMB/ConfSoilMoisture.py new file mode 100644 index 0000000..626b13a --- /dev/null +++ b/src/ILAMB/ConfSoilMoisture.py @@ -0,0 +1,1591 @@ +from . import ilamblib as il +from .Variable import * +from .Regions import Regions +from .constants import space_opts,time_opts,mid_months,bnd_months +import os,glob,re +from netCDF4 import Dataset +from . import Post as post +import pylab as plt +from matplotlib.colors import LogNorm, Normalize +from matplotlib.cm import ScalarMappable +import cartopy.crs as ccrs +from mpl_toolkits.axes_grid1 import make_axes_locatable +from mpi4py import MPI +from sympy import sympify +import cftime as cf +from .Confrontation import getVariableList +from .Confrontation import Confrontation +import numpy as np +import time +from copy import deepcopy +import warnings + +import logging +logger = logging.getLogger("%i" % MPI.COMM_WORLD.rank) + + +def _createBnds(x): + x = np.asarray(x) + x_bnds = np.zeros((x.size,2)) + x_bnds[+1:,0] = 0.5*(x[:-1]+x[+1:]) + x_bnds[:-1,1] = 0.5*(x[:-1]+x[+1:]) + if x.size == 1: + x_bnds[ ...] = x + else: + x_bnds[ 0,0] = x[ 0] - 0.5*(x[ 1]-x[ 0]) + x_bnds[-1,1] = x[-1] + 0.5*(x[-1]-x[-2]) + return x_bnds + + +class ConfSoilMoisture(Confrontation): + + def __init__(self,**keywords): + # Calls the regular constructor + super(ConfSoilMoisture,self).__init__(**keywords) + + # Get/modify depths + with Dataset(self.source) as dset: + v = dset.variables[self.variable] + depth_name = [d for d in v.dimensions if d in ["layer","depth","levgrnd"]] + if len(depth_name) == 0: + # if there is no depth dimension, we assume the data is + # top 10cm + self.depths = np.asarray([[0., .1]],dtype=float) + self.depths_units = 'm' + else: + # if there are depths, then make sure that the depths + # at which we will compute are in the range of depths + # of the data + depth_name = depth_name[0] + depth_bnd_name = [d for d in dset.variables.keys() \ + if depth_name in d and ("bound" in d or "bnd" in d)] + + if len(depth_bnd_name) > 0: + depth_bnd_name = depth_bnd_name[0] + data = dset.variables[depth_bnd_name][...].data + self.depths = data + self.depths_units = dset.variables[depth_bnd_name].units + else: + data = dset.variables[depth_name][...] + data = _createBnds(data) + self.depths = data + self.depths_units = dset.variables[depth_name].units + + def stageData(self,m): + """ Extract Model data with interpolation to the confrontation + depth.""" + mem_slab = self.keywords.get("mem_slab",100000.) # Mb + + # peak at the reference dataset without reading much into memory + info = "" + unit = "" + with Dataset(self.source) as dset: + var = dset.variables[self.variable] + + obs_t,obs_tb,obs_cb,obs_b,obs_e,cal = il.GetTime(var) + + obs_nt = obs_t.size + obs_mem = var.size*8e-6 + unit = var.units + climatology = False if obs_cb is None else True + if climatology: + info += "[climatology]" + obs_cb = (obs_cb-1850)*365. + obs_t0 = obs_cb[0]; obs_tf = obs_cb[1] + else: + obs_t0 = obs_tb[0,0]; obs_tf = obs_tb[-1,1] + + obs_dname = [name for name in dset.variables.keys() \ + if name.lower() in ["depth", "layer", "levgrnd"]] + obs_dbndname = [name for name in dset.variables.keys() \ + if name.lower() in ["depth_bnds", "depth_bounds", + "layer_bnds", "layer_bounds", + "levgrnd_bnds", "levgrnd_bounds"]] + if len(obs_dbndname) > 0: + obs_dname = obs_dbndname[0] + obs_z0 = np.min(dset.variables[obs_dname]) + obs_zf = np.max(dset.variables[obs_dname]) + elif len(obs_dname) > 0: + obs_dname = obs_dname[0] + data = _createBnds(dset.variables[obs_dname][...].data) + obs_z0 = np.min(data) + obs_zf = np.max(data) + else: + # if there is no depth, assume the data is surface + obs_z0 = 0; obs_zf = 0.1 + obs_dname = None + info += " contents span years %.1f to %.1f and depths %.1f to %.1f, est memory %d [Mb]" % (obs_t0/365.+1850,obs_tf/365.+1850,obs_z0,obs_zf,obs_mem) + logger.info("[%s][%s]%s" % (self.name,self.variable,info)) + + # to peak at the model, we need any variable that could be + # part of the expression to look at the time + info = "" + possible = [self.variable,] + self.alternate_vars + if self.derived is not None: + if isinstance(self.derived, list): + for dr in self.derived: + possible += [str(s) for s in sympify(dr).free_symbols] + else: + possible += [str(s) for s in sympify(self.derived).free_symbols] + vname = [v for v in possible if v in m.variables.keys()] + if len(vname) == 0: + logger.debug("[%s] Could not find [%s] in the model results" % (self.name,",".join(possible))) + raise il.VarNotInModel() + vname = vname[0] + + # peak at the model dataset without reading much into memory + mod_nt = 0 + mod_mem = 0. + mod_t0 = 2147483647 + mod_tf = -2147483648 + mod_z0 = 2147483647 + mod_zf = -2147483648 + for fname in m.variables[vname]: + with Dataset(fname) as dset: + var = dset.variables[vname] + mod_t,mod_tb,mod_cb,mod_b,mod_e,cal = il.GetTime(var,t0=obs_t0-m.shift, + tf=obs_tf-m.shift) + + if mod_t is None: + info += "\n %s does not overlap the reference" % (fname) + continue + mod_t += m.shift + mod_tb += m.shift + ind = np.where((mod_tb[:,0] >= obs_t0)*(mod_tb[:,1] <= obs_tf))[0] + if ind.size == 0: + info += "\n %s does not overlap the reference" % (fname) + continue + mod_t = mod_t [ind] + mod_tb = mod_tb[ind] + mod_t0 = min(mod_t0,mod_tb[ 0,0]) + mod_tf = max(mod_tf,mod_tb[-1,1]) + + mod_dname = [name for name in dset.variables.keys() \ + if name.lower() in ["depth", "layer", "levgrnd", "solay"]] + mod_dbndname = [name for name in dset.variables.keys() \ + if name.lower() in ["depth_bnds", "depth_bounds", + "layer_bnds", "layer_bounds", + "levgrnd_bnds", "levgrnd_bounds", + "solay_bnds", "solay_bounds"]] + if len(mod_dbndname) > 0: + mod_dname = mod_dbndname[0] + temp = dset.variables[mod_dname][...] + ind = (temp[:,1] > obs_z0)*(temp[:,0] < obs_zf) + if sum(ind) == 0: + info += "\n %s does not overlap the reference" % (fname) + continue + z0 = np.min(temp[ind, :]) + zf = np.max(temp[ind, :]) + elif len(mod_dname) > 0: + mod_dname = mod_dname[0] + data = _createBnds(dset.variables[mod_dname][...].data) + z0 = np.min(data) + zf = np.max(data) + else: + # if there is no depth, assume the data is surface + z0 = 0; zf = 0.1; mod_dname = None + + mod_z0 = min(mod_z0,z0) + mod_zf = max(mod_zf,zf) + + nt = mod_t.size + mod_nt += nt + mem = (var.size/var.shape[0]*nt)*8e-6 + mod_mem += mem + info += "\n %s spans years %.1f to %.1f and depths %.1f to %.1f, est memory in time bounds %d [Mb]" % (fname,mod_t.min()/365.+1850,mod_t.max()/365.+1850,mod_z0,mod_zf,mem) + info += "\n total est memory = %d [Mb]" % mod_mem + logger.info("[%s][%s][%s] reading model data from possibly many files%s" % (self.name,m.name,vname,info)) + if mod_t0 > mod_tf: + logger.debug("[%s] Could not find [%s] in the model results in the given time frame, tinput = [%.1f,%.1f]" % (self.name,",".join(possible),mod_t0,mod_tf)) + raise il.VarNotInModel() + + # yield the results by observational depths + def _addDepth(v): + v.depth = np.asarray([.05]) + v.depth_bnds = np.asarray([[0.,.1]]) + shp = list(v.data.shape) + shp.insert(1,1) + v.data.shape = shp + v.layered = True + return v + + info = "" + for i in range(self.depths.shape[0]): + z0 = max(self.depths[i,0], obs_z0, mod_z0) + zf = min(self.depths[i,1], obs_zf, mod_zf) + if z0 >= zf: + continue + + mod_t0 = max(mod_t0,obs_t0) + mod_tf = min(mod_tf,obs_tf) + logger.info("[%s][%s] building depths %.1f to %.1f in loop %d" % (self.name,m.name,z0,zf,i)) + + # get reference variable + tstart = time.time() + if obs_dname is None: + obs = Variable(filename = self.source, + variable_name = self.variable, + alternate_vars = self.alternate_vars, + t0 = mod_t0, + tf = mod_tf).trim(t = [mod_t0,mod_tf]) + obs = _addDepth(obs) + else: + obs = Variable(filename = self.source, + variable_name = self.variable, + alternate_vars = self.alternate_vars, + t0 = mod_t0, + tf = mod_tf, + z0 = z0, + zf = zf).trim(t = [mod_t0,mod_tf]) + obs = obs.integrateInDepth(z0 = z0, zf = zf, mean = True) + obs.name = "%.2f-%.2f %s" % (z0, zf, self.depths_units) + tend = time.time() + logger.info("[%s][%s] loading the reference depths %.1f to %.1f took %f minutes" % (self.name,m.name,z0, zf,(tend-tstart)/60)) + + # get model variable + tstart = time.time() + if mod_dname is None: + mod = m.extractTimeSeries(self.variable, + alt_vars = self.alternate_vars, + expression = self.derived, + initial_time = mod_t0, + final_time = mod_tf).trim(t=[mod_t0,mod_tf]).convert(obs.unit) + mod = _addDepth(mod) + else: + mod = m.extractTimeSeries(self.variable, + alt_vars = self.alternate_vars, + expression = self.derived, + initial_time = mod_t0, + final_time = mod_tf, + initial_depth= z0, + final_depth = zf).trim(t=[mod_t0,mod_tf]).convert(obs.unit) + mod = mod.trim(d = [z0, zf]).integrateInDepth(z0 = z0, zf = zf, mean = True) + + mod.name = "%.2f-%.2f %s" % (z0, zf, self.depths_units) + tend = time.time() + logger.info("[%s][%s] loading the model depths %.1f to %.1f took %f minutes" % (self.name,m.name,z0, zf,(tend-tstart)/60)) + + assert obs.time.size == mod.time.size + + obs.name = obs.name.split("_")[0] + mod.name = mod.name.split("_")[0] + + yield obs, mod, z0, zf + + + def stageRef(self,m): + """Extract the data that will be done partial correlations with soil moisture data.""" + + # Check the order of magnitude of the data and convert to help avoid roundoff errors + def _reduceRoundoffErrors(var): + if "s-1" in var.unit: return var.convert(var.unit.replace("s-1","d-1")) + if "kg" in var.unit: return var.convert(var.unit.replace("kg" ,"g" )) + return var + + def _getOrder(var): + return np.log10(np.abs(var.data).clip(1e-16)).mean() + + obs_list = {} + mod_list = {} + for sens in self.sensitivities: + obs = Variable(filename = sens.source, + variable_name = sens.variable, + alternate_vars = sens.alternate_vars, + t0 = None if len(self.study_limits) != 2 else self.study_limits[0], + tf = None if len(self.study_limits) != 2 else self.study_limits[1]) + if obs.time is None: raise il.NotTemporalVariable() + self.pruneRegions(obs) + + # Try to extract a commensurate quantity from the model + mod = m.extractTimeSeries(sens.variable, + alt_vars = sens.alternate_vars, + expression = sens.derived, + initial_time = obs.time_bnds[ 0,0], + final_time = obs.time_bnds[-1,1], + lats = None if obs.spatial else obs.lat, + lons = None if obs.spatial else obs.lon) + obs,mod = il.MakeComparable(obs,mod, + mask_ref = True, + clip_ref = True, + extents = self.extents, + logstring = "[%s][%s]" % (self.longname,m.name)) + + order = _getOrder(obs) + count = 0 + while order < -2 and count < 2: + obs = _reduceRoundoffErrors(obs) + order = _getOrder(obs) + count += 1 + + # convert the model data to the same unit + mod = mod.convert(obs.unit) + obs_list[sens.longname.split('/')[0]] = obs + mod_list[sens.longname.split('/')[0]] = mod + return obs_list, mod_list + + + def confront(self,m): + mod_file = os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)) + obs_file = os.path.join(self.output_path,"%s_Benchmark.nc" % (self.name, )) + with il.FileContextManager(self.master,mod_file,obs_file) as fcm: + + # Encode some names and colors + fcm.mod_dset.setncatts({"name" :m.name, + "color":m.color, + "complete":0}) + if self.master: + fcm.obs_dset.setncatts({"name" :"Benchmark", + "color":np.asarray([0.5,0.5,0.5]), + "complete":0}) + + # Read in some options and run the mean state analysis + mass_weighting = self.keywords.get("mass_weighting",False) + skip_rmse = self.keywords.get("skip_rmse" ,False) + skip_iav = self.keywords.get("skip_iav" ,False) + skip_cycle = self.keywords.get("skip_cycle" ,False) + rmse_score_basis = self.keywords.get("rmse_score_basis","cycle") + + # Read in some options to decide whether to run the trend state analysis + skip_trend = self.keywords.get("skip_trend" ,False) + if type(skip_trend) == type(""): + skip_trend = (skip_trend.lower() == "true") + + # Get the depth-integrated observation and model data for each slab. + for obs,mod,z0,zf in self.stageData(m): + logger.info("[%s][%s] confronting the depths %.1f to %.1f" % (self.name,m.name,z0, zf)) + + if obs.spatial: + # Calculate mean state + il.AnalysisMeanStateSpace(obs, mod, dataset = fcm.mod_dset, + regions = self.regions, + benchmark_dataset = fcm.obs_dset, + table_unit = self.table_unit, + plot_unit = self.plot_unit, + space_mean = self.space_mean, + skip_rmse = skip_rmse, + skip_iav = skip_iav, + skip_cycle = skip_cycle, + mass_weighting = mass_weighting, + rmse_score_basis = rmse_score_basis) + + # Calculate trend state + if not skip_trend: + il.AnalysisTrendStateSpace(obs, mod, dataset = fcm.mod_dset, + regions = self.regions, + benchmark_dataset = fcm.obs_dset, + table_unit = self.table_unit, + plot_unit = self.plot_unit, + space_mean = self.space_mean, + skip_cycle = skip_cycle, + mass_weighting = mass_weighting) + else: + il.AnalysisMeanStateSites(obs, mod, dataset = fcm.mod_dset, + regions = self.regions, + benchmark_dataset = fcm.obs_dset, + table_unit = self.table_unit, + plot_unit = self.plot_unit, + space_mean = self.space_mean, + skip_rmse = skip_rmse, + skip_iav = skip_iav, + skip_cycle = skip_cycle, + mass_weighting = mass_weighting) + # !!! TO-DO: Add AnalysisTrendStateSites + + # Calculate sensitivity by partial correlation + if self.sensitivities is not None: + obs_comparable = deepcopy(obs) + mod_comparable = deepcopy(mod) + + obs_indep_list, mod_indep_list = self.stageRef(m) + + # Find the minimum overlapping time periods between obs_comparable and + # all the obs_indep, mod_comparable and all the mod_indep + obs_t0 = obs_comparable.time_bnds[0,0] + obs_tf = obs_comparable.time_bnds[-1,1] + mod_t0 = mod_comparable.time_bnds[0,0] + mod_tf = mod_comparable.time_bnds[-1,1] + for c in self.sensitivities: + obs_indep = obs_indep_list[c.longname.split('/')[0]] + mod_indep = mod_indep_list[c.longname.split('/')[0]] + + obs_indep = il.ClipTime(obs_indep, obs_t0, obs_tf) + obs_t0 = max(obs_t0, obs_indep.time_bnds[ 0,0]) + obs_tf = min(obs_tf, obs_indep.time_bnds[-1,1]) + + mod_indep = il.ClipTime(mod_indep, mod_t0, mod_tf) + mod_t0 = max(mod_t0, mod_indep.time_bnds[ 0,0]) + mod_tf = min(mod_tf, mod_indep.time_bnds[-1,1]) + + # (second pass) + obs_comparable = il.ClipTime(obs_comparable, obs_t0, obs_tf) + mod_comparable = il.ClipTime(mod_comparable, mod_t0, mod_tf) + + for c in self.sensitivities: + obs_indep = obs_indep_list[c.longname.split('/')[0]] + mod_indep = mod_indep_list[c.longname.split('/')[0]] + + obs_indep_list[c.longname.split('/')[0]] = \ + il.ClipTime(obs_indep, obs_t0, obs_tf) + mod_indep_list[c.longname.split('/')[0]] = \ + il.ClipTime(mod_indep, mod_t0, mod_tf) + + if obs.spatial: + il.AnalysisPartialCorrSpace(obs_comparable, mod_comparable, + obs_indep_list, mod_indep_list, + benchmark_dataset = fcm.obs_dset, + dataset = fcm.mod_dset, + mass_weighting = mass_weighting, + regions = self.regions) + else: + # TO-DO: Add AnalysisPartialCorrSites + pass + + fcm.mod_dset.setncattr("complete",1) + if self.master: fcm.obs_dset.setncattr("complete",1) + logger.info("[%s][%s] Success" % (self.longname,m.name)) + + + def determinePlotLimits(self): + """Determine the limits of all plots which are inclusive of all ranges. + + The routine will open all netCDF files in the output path and + add the maximum and minimum of all variables which are + designated to be plotted. If legends are desired for a given + plot, these are rendered here as well. This routine should be + called before calling any plotting routine. + """ + max_str = "up99" + min_str = "dn99" + if self.keywords.get("limit_type","99per") == "minmax": + max_str = "max" + min_str = "min" + + # Determine the min/max of variables over all models + limits = {} + prune = False + for fname in glob.glob(os.path.join(self.output_path,"*.nc")): + with Dataset(fname) as dataset: + for pn,ffix in zip(["MeanState", "TrendState"], ["mean", "trend"]): + if pn not in dataset.groups: continue + if pn not in limits: limits[pn] = {} + group = dataset.groups[pn] + variables = [v for v in group.variables.keys() \ + if v not in group.dimensions.keys()] + for vname in variables: + if (ffix + "_") != vname[:(len(ffix)+1)]: continue + var = group.variables[vname] + pname = vname.split("_")[1] + region = vname.split("_")[-1] + if var[...].size <= 1: continue + if pname in time_opts[pn]: + """If the plot is a time series, it has been averaged over regions + already and we need a separate dictionary for the + region as well. These can be based on the + percentiles from the attributes of the netCDF + variables.""" + if pname not in limits[pn]: limits[pn][pname] = {} + if region not in limits[pn][pname]: + limits[pn][pname][region] = {} + limits[pn][pname][region]["min"] = +1e20 + limits[pn][pname][region]["max"] = -1e20 + limits[pn][pname][region]["unit"] = post.UnitStringToMatplotlib(var.getncattr("units")) + limits[pn][pname][region]["min"] = min(limits[pn][pname][region]["min"],var.getncattr("min")) + limits[pn][pname][region]["max"] = max(limits[pn][pname][region]["max"],var.getncattr("max")) + else: + """If the plot is spatial, we want to set the limits as a percentile + of all data across models and the + benchmark. So here we load the data up and in + another pass will compute the percentiles.""" + if pname not in limits[pn]: + limits[pn][pname] = {} + limits[pn][pname]["min"] = +1e20 + limits[pn][pname]["max"] = -1e20 + limits[pn][pname]["unit"] = post.UnitStringToMatplotlib(var.getncattr("units")) + limits[pn][pname]["data"] = var[...].compressed() + else: + limits[pn][pname]["data"] = np.hstack([limits[pn][pname]["data"],var[...].compressed()]) + limits[pn][pname]["min"] = min(limits[pn][pname]["min"],var.getncattr(min_str)) + limits[pn][pname]["max"] = max(limits[pn][pname]["max"],var.getncattr(max_str)) + if not prune and "Benchmark" in fname and pname == "timeint": + prune = True + self.pruneRegions(Variable(filename = fname, + variable_name = vname, + groupname = pn)) + + # Second pass to plot legends (FIX: only for master?) + for pn, ffix in zip(["MeanState", "TrendState"], ["mean", "trend"]): + if not pn in limits.keys(): continue + for pname in limits[pn].keys(): + try: + opts = space_opts[pn][pname] + except: + continue + + # Determine plot limits and colormap + if opts["sym"]: + vabs = max(abs(limits[pn][pname]["min"]), + abs(limits[pn][pname]["min"])) + limits[pn][pname]["min"] = -vabs + limits[pn][pname]["max"] = vabs + + # if a score, force to be [0,1] + if "score" in pname: + limits[pn][pname]["min"] = 0 + limits[pn][pname]["max"] = 1 + + limits[pn][pname]["cmap"] = opts["cmap"] + if limits[pn][pname]["cmap"] == "choose": limits[pn][pname]["cmap"] = self.cmap + + # Plot a legend for each key + if opts["haslegend"]: + fig,ax = plt.subplots(figsize=(6.8,1.0),tight_layout=True) + label = opts["label"] + if label == "unit": label = limits[pn][pname]["unit"] + post.ColorBar(ax, + vmin = limits[pn][pname]["min"], + vmax = limits[pn][pname]["max"], + cmap = limits[pn][pname]["cmap"], + ticks = opts["ticks"], + ticklabels = opts["ticklabels"], + label = label) + fig.savefig(os.path.join(self.output_path,"legend_%s.png" % (ffix + "_" + pname))) + plt.close() + + # For those limits which we built up data across all models, compute the percentiles + for pname in limits.keys(): + if "data" in limits[pname]: + limits[pn][pname]["min"],limits[pn][pname]["max"] = np.percentile(limits[pn][pname]["data"],[1,99]) + + # Determine min/max of relationship variables + for fname in glob.glob(os.path.join(self.output_path,"*.nc")): + with Dataset(fname) as dataset: + for g in dataset.groups.keys(): + if "relationship" not in g: continue + grp = dataset.groups[g] + if g not in limits: + limits[g] = {} + limits[g]["xmin"] = +1e20 + limits[g]["xmax"] = -1e20 + limits[g]["ymin"] = +1e20 + limits[g]["ymax"] = -1e20 + limits[g]["xmin"] = min(limits[g]["xmin"],grp.variables["ind_bnd"][ 0, 0]) + limits[g]["xmax"] = max(limits[g]["xmax"],grp.variables["ind_bnd"][-1,-1]) + limits[g]["ymin"] = min(limits[g]["ymin"],grp.variables["dep_bnd"][ 0, 0]) + limits[g]["ymax"] = max(limits[g]["ymax"],grp.variables["dep_bnd"][-1,-1]) + + self.limits = limits + + def compositePlots(self): + """Renders plots which display information of all models. + + This routine renders plots which contain information from all + models. Thus only the master process will run this routine, + and only after all analysis has finished. + + """ + if not self.master: return + + # get the HTML page + for pn, ffix in zip(['MeanState', 'TrendState', 'Sensitivities'], + ['mean', 'trend', 'sensitivities']): + try: + page = [page for page in self.layout.pages if pn in page.name][0] + except: + continue + + models = [] + colors = [] + corr = {} + std = {} + cycle = {} + has_cycle = False + has_std = False + for fname in glob.glob(os.path.join(self.output_path,"*.nc")): + dataset = Dataset(fname) + if pn not in dataset.groups: continue + dset = dataset.groups[pn] + models.append(dataset.getncattr("name")) + colors.append(dataset.getncattr("color")) + for region in self.regions: + if region not in cycle: cycle[region] = {} + if region not in std : std[region] = {} + if region not in corr : corr[region] = {} + + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind,1] + zstr = '%.2f-%.2f %s' % (z0, zf, self.depths_units) + + if zstr not in cycle[region]: cycle[region][zstr] = [] + + key = [v for v in dset.variables.keys() \ + if ("cycle_" in v and zstr in v and region in v)] + if len(key)>0: + has_cycle = True + cycle[region][zstr].append(Variable(filename=fname,groupname=pn, + variable_name=key[0])) + + if zstr not in std[region]: std[region][zstr] = [] + if zstr not in corr[region]: corr[region][zstr] = [] + + key = [] + if "scalars" in dset.groups: + key = [v for v in dset.groups["scalars"].variables.keys() \ + if ("Spatial Distribution Score" in v and zstr \ + in v and region in v)] + if len(key) == 1: + has_std = True + sds = dset.groups["scalars"].variables[key[0]] + corr[region][zstr].append(sds.getncattr("R" )) + std [region][zstr].append(sds.getncattr("std")) + elif len(key) > 1: + has_std = True + corr[region][zstr] = {} + std [region][zstr] = {} + for c in self.sensitivities: + longname = c.longname.split("/")[0] + kk = [k3 for k3 in key if longname in k3] + if len(kk) == 0: + continue + if longname not in std[region][zstr]: std[region][zstr][longname] = [] + if longname not in corr[region][zstr]: corr[region][zstr][longname] = [] + sds = dset.groups["scalars"].variables[kk[0]] + corr[region][zstr][longname].append(sds.getncattr("R" )) + std [region][zstr][longname].append(sds.getncattr("std")) + + + def _alphabeticalBenchmarkFirst(key): + key = key[0].lower() + if key == "BENCHMARK": return "A" + return key + tmp = sorted(zip(models,colors),key=_alphabeticalBenchmarkFirst) + fig,ax = plt.subplots() + for model,color in tmp: + ax.plot(0,0,'o',mew=0,ms=8,color=color,label=model) + handles,labels = ax.get_legend_handles_labels() + plt.close() + + ncol = np.ceil(float(len(models))/11.).astype(int) + if ncol > 0: + fig,ax = plt.subplots(figsize=(3.*ncol,2.8),tight_layout=True) + ax.legend(handles,labels,loc="upper right",ncol=ncol,fontsize=10,numpoints=1) + ax.axis(False) + fig.savefig(os.path.join(self.output_path,"legend_" + ffix + "_compcycle.png")) + if ffix == 'sensitivities': + for c in self.sensitivities: + longname = c.longname.split('/')[0] + fig.savefig(os.path.join(self.output_path, + "legend_sens_spatial_variance_" + \ + longname + ".png")) + else: + fig.savefig(os.path.join(self.output_path, + "legend_" + ffix + "_temporal_variance.png")) + fig.savefig(os.path.join(self.output_path, + "legend_" + ffix + "_spatial_variance.png")) + plt.close() + + # composite annual cycle plot + if has_cycle and len(models) > 0: + page.addFigure("Spatially integrated regional " + ffix, + ffix + "_compcycle", + "RNAME_" + ffix + "_compcycle.png", + side = "ANNUAL CYCLE", + legend = False) + + for region in self.regions: + if region not in cycle: continue + fig, axes = plt.subplots(self.depths.shape[0], 1, + figsize = (6.5, 2.8*self.depths.shape[0]), + sharex = True, sharey = True) + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind, 1] + zstr = '%.2f-%.2f %s' % (z0, zf, self.depths_units) + + if self.depths.shape[0] == 1: + ax = axes + else: + ax = axes.flat[dind] + + for name,color,var in zip(models,colors,cycle[region][zstr]): + dy = 0.05*(self.limits[pn]["cycle"][region]["max"] - \ + self.limits[pn]["cycle"][region]["min"]) + + var.plot(ax, lw=2, color=color, label=name, + ticks = time_opts[pn]["cycle"]["ticks"], + ticklabels = time_opts[pn]["cycle"]["ticklabels"], + vmin = self.limits[pn]["cycle"][region]["min"]-dy, + vmax = self.limits[pn]["cycle"][region]["max"]+dy) + ylbl = post.UnitStringToMatplotlib(var.unit) + ax.set_ylabel(ylbl) + ax.set_title(zstr) + fig.savefig(os.path.join(self.output_path, + "%s_%s_compcycle.png" % (region, ffix))) + plt.close() + + # plot legends with model colors (sorted with Benchmark data on top) + page.addFigure("Spatially integrated regional " + ffix, + "legend_" + ffix + "_compcycle", + "legend_" + ffix + "_compcycle.png", + side = "MODEL COLORS", + legend = False) + + # spatial distribution Taylor plot + if has_std: + if "Benchmark" in models: colors.pop(models.index("Benchmark")) + + if ffix != 'sensitivities': + page.addFigure("Temporally integrated period " + ffix, + ffix + "_spatial_variance", + "RNAME_" + ffix + "_spatial_variance.png", + side = "SPATIAL TAYLOR DIAGRAM", + legend = False) + page.addFigure("Temporally integrated period " + ffix, + "legend_" + ffix + "_spatial_variance", + "legend_" + ffix + "_spatial_variance.png", + side = "MODEL COLORS", + legend = False) + for region in self.regions: + if not (region in std and region in corr): continue + + fig = plt.figure(figsize=(12.0,12.0)) + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind, 1] + zstr = '%.2f-%.2f %s' % (z0, zf, self.depths_units) + + if not (zstr in std[region] and zstr in corr[region]): continue + if len(std[region][zstr]) != len(corr[region][zstr]): continue + if len(std[region][zstr]) == 0: continue + ax, aux = post.TaylorDiagram(np.asarray(std[region][zstr]), + np.asarray(corr[region][zstr]), + 1.0,fig,colors,True,220+dind+1) + ax.set_title(zstr) + fig.savefig(os.path.join(self.output_path, + "%s_%s_spatial_variance.png" % (region, ffix))) + plt.close() + else: + for c in self.sensitivities: + longname = c.longname.split('/')[0] + page.addFigure(c.longname, + "sens_spatial_variance_%s" % longname, + "RNAME_sens_spatial_variance_%s.png" % longname, + side = "SPATIAL TAYLOR DIAGRAM", legend = False) + page.addFigure(c.longname, + "legend_sens_spatial_variance_%s" % longname, + "legend_sens_spatial_variance_%s.png" % longname, + side = "MODEL COLORS", legend = False) + for region in self.regions: + if not (region in std and region in corr): continue + fig = plt.figure(figsize=(12.0,12.0)) + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind, 1] + zstr = '%.2f-%.2f %s' % (z0, zf, self.depths_units) + + if not (zstr in std[region] and zstr in corr[region]): continue + if not (longname in std[region][zstr] and \ + longname in corr[region][zstr]): continue + if len(std[region][zstr][longname]) != \ + len(corr[region][zstr][longname]): continue + if len(std[region][zstr][longname]) == 0: continue + ax, aux = post.TaylorDiagram( \ + np.asarray(std [region][zstr][longname]), + np.asarray(corr[region][zstr][longname]), + 1.0,fig,colors,True,220+dind+1) + ax.set_title(zstr) + fig.savefig(os.path.join(self.output_path, + "%s_sens_spatial_variance_%s.png" \ + % (region, longname))) + plt.close() + + + def modelPlots(self,m): + """For a given model, create the plots of the analysis results. + + This routine will extract plotting information out of the + netCDF file which results from the analysis and create + plots. Note that determinePlotLimits should be called before + this routine. + """ + self._relationship(m) + self._sensitivity(m) + bname = os.path.join(self.output_path,"%s_Benchmark.nc" % (self.name )) + fname = os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)) + if not os.path.isfile(bname): return + if not os.path.isfile(fname): return + + # get the HTML page + for pn, ffix in zip(["MeanState", "TrendState"], ["mean", "trend"]): + try: + page = [page for page in self.layout.pages if pn in page.name][0] + except: + continue + + with Dataset(fname) as dataset: + group = dataset.groups[pn] + variables = getVariableList(group) + color = dataset.getncattr("color") + for vname in variables: + # The other depths will be handled in plotting + zstr_0 = '%.2f-%.2f %s' % (self.depths[0,0], self.depths[0,1], self.depths_units) + + if not zstr_0 in vname: continue + if (ffix + "_") != vname[:(len(ffix)+1)]: continue + + # is this a variable we need to plot? + if group.variables[vname][...].size <= 1: continue + var = Variable(filename=fname,groupname=pn,variable_name=vname) + pname = vname.split("_")[1] + + if (var.spatial or (var.ndata is not None)) and not var.temporal: + # grab plotting options + if pname not in self.limits[pn].keys(): continue + if pname not in space_opts[pn]: continue + opts = space_opts[pn][pname] + + # add to html layout + page.addFigure(opts["section"], + ffix + "_" + pname, + opts["pattern"], + side = opts["sidelbl"], + legend = opts["haslegend"]) + + # plot variable + for region in self.regions: + nax = self.depths.shape[0] + fig = plt.figure() + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind,1] + zstr = '%.2f-%.2f %s' % (z0, zf, self.depths_units) + var2 = Variable(filename=fname, groupname = pn, + variable_name=vname.replace(zstr_0, zstr)) + ax = var2.plot(None, fig, nax, region = region, + vmin = self.limits[pn][pname]["min"], + vmax = self.limits[pn][pname]["max"], + cmap = self.limits[pn][pname]["cmap"]) + if (ffix == 'trend') and (pname == 'timeint'): + var3 = Variable(filename=fname, groupname = pn, + variable_name=vname.replace(zstr_0, zstr \ + ).replace('trend','trendp')) + lat = np.hstack([var3.lat_bnds[:,0],var3.lat_bnds[-1,-1]]) + lon = np.hstack([var3.lon_bnds[:,0],var3.lon_bnds[-1,-1]]) + ax.pcolormesh(lon, lat, + np.ma.masked_array(var3.data > 0.05, + var3.data <= 0.05), + cmap = 'Greys', vmin = 0.5, vmax = 1.5, + alpha = 0.3, transform = ccrs.PlateCarree()) + ax.set_title(zstr) + fig.savefig(os.path.join(self.output_path, + "%s_%s_%s_%s.png" % (m.name,region,ffix, + pname))) + plt.close() + + # Jumping through hoops to get the benchmark plotted and in the html output + if self.master and (pname == "timeint" or \ + pname == "phase" or pname == "iav"): + opts = space_opts[pn][pname] + + # add to html layout + page.addFigure(opts["section"], + "benchmark_%s" % (ffix + "_" + pname), + opts["pattern"].replace("MNAME","Benchmark"), + side = opts["sidelbl"].replace("MODEL","BENCHMARK"), + legend = True) + + # plot variable + for region in self.regions: + nax = self.depths.shape[0] + fig = plt.figure() + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind,1] + zstr = '%.2f-%.2f %s' % (z0, zf, self.depths_units) + obs = Variable(filename=bname,groupname=pn, + variable_name=vname.replace(zstr_0, zstr)) + ax = obs.plot(None, fig, nax, region = region, + vmin = self.limits[pn][pname]["min"], + vmax = self.limits[pn][pname]["max"], + cmap = self.limits[pn][pname]["cmap"]) + ax.set_title(zstr) + if (ffix == 'trend') and (pname == 'timeint'): + obs1 = Variable(filename=bname, groupname = pn, + variable_name=vname.replace(zstr_0, zstr \ + ).replace('trend','trendp')) + lat = np.hstack([obs1.lat_bnds[:,0],obs1.lat_bnds[-1,-1]]) + lon = np.hstack([obs1.lon_bnds[:,0],obs1.lon_bnds[-1,-1]]) + ax.pcolormesh(lon, lat, + np.ma.masked_array(obs1.data > 0.05, + obs1.data <= 0.05), + cmap = 'Greys', vmin = 0.5, vmax = 1.5, + alpha = 0.3, transform = ccrs.PlateCarree()) + fig.savefig(os.path.join(self.output_path, + "Benchmark_%s_%s_%s.png" % (region, + ffix, + pname))) + plt.close() + + if not (var.spatial or (var.ndata is not None)) and var.temporal: + # grab the benchmark dataset to plot along with + try: + obs = Variable(filename=bname,groupname=pn, + variable_name=vname).convert(var.unit) + except: + continue + + # grab plotting options + if pname not in time_opts[pn]: continue + opts = time_opts[pn][pname] + + # add to html layout + page.addFigure(opts["section"], + ffix + "_" + pname, + opts["pattern"], + side = opts["sidelbl"], + legend = opts["haslegend"]) + + # plot variable + for region in self.regions: + if region not in vname: continue + fig, axes = plt.subplots(self.depths.shape[0], 1, + figsize = (6.5, 2.8*self.depths.shape[0]), + sharex = True, sharey = True) + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind,1] + zstr = '%.2f-%.2f %s' % (z0, zf, self.depths_units) + if self.depths.shape[0] == 1: + ax = axes + else: + ax = axes.flat[dind] + + var2 = Variable(filename=fname, groupname = pn, + variable_name=vname.replace(zstr_0, zstr)) + obs = Variable(filename=bname,groupname=pn, + variable_name=vname.replace(zstr_0, zstr)).convert(var2.unit) + obs.plot(ax, lw = 2, color = 'k', alpha = 0.3) + var2.plot(ax, lw = 2, color = color, label = m.name, + ticks =opts["ticks"], + ticklabels=opts["ticklabels"]) + dy = 0.05*(self.limits[pn][pname][region]["max"] - \ + self.limits[pn][pname][region]["min"]) + ax.set_ylim(self.limits[pn][pname][region]["min"]-dy, + self.limits[pn][pname][region]["max"]+dy) + ylbl = opts["ylabel"] + if ylbl == "unit": ylbl = post.UnitStringToMatplotlib(var.unit) + ax.set_ylabel(ylbl) + ax.set_title(zstr) + fig.savefig(os.path.join(self.output_path, + "%s_%s_%s_%s.png" % (m.name,region,ffix, + pname))) + plt.close() + + logger.info("[%s][%s] Success" % (self.longname,m.name)) + + def _relationship(self,m,nbin=25): + """ + Modified to plot by depths. + """ + def _retrieveSM(filename): + key_list = [] + with Dataset(filename,mode="r") as dset: + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind, 1] + zstr = '%.2f-%.2f %s' % (z0, zf, self.depths_units) + key = [v for v in dset.groups["MeanState"].variables.keys() \ + if ("timeint_" in v) and (zstr in v)] + if len(key) == 0: + raise "Unable to retrieve data for relationship " + zstr + key_list.append(key[0]) + return [Variable(filename = filename, + groupname = "MeanState", + variable_name = key) for key in key_list] + + def _retrieveData(filename): + key = None + with Dataset(filename,mode="r") as dset: + key = [v for v in dset.groups["MeanState"].variables.keys() if "timeint_" in v] + return Variable(filename = filename, + groupname = "MeanState", + variable_name = key[0]) + + def _applyRefMask(ref,com): + tmp = ref.interpolate(lat=com.lat,lat_bnds=com.lat_bnds, + lon=com.lon,lon_bnds=com.lon_bnds) + com.data.mask += tmp.data.mask + return com + + def _checkLim(data,lim): + if lim is None: + lim = [min(data.min(),data.min()), + max(data.max(),data.max())] + delta = 1e-8*(lim[1]-lim[0]) + lim[0] -= delta + lim[1] += delta + else: + assert type(lim) == type([]) + assert len (lim) == 2 + return lim + + def _limitExtents(vars): + lim = [+1e20,-1e20] + for v in vars: + lmin,lmax = _checkLim(v.data,None) + lim[0] = min(lmin,lim[0]) + lim[1] = max(lmax,lim[1]) + return lim + + def _buildDistributionResponse(ind,dep,ind_lim=None,dep_lim=None,region=None,nbin=25,eps=3e-3): + + r = Regions() + + # Checks on the input parameters + assert np.allclose(ind.data.shape,dep.data.shape) + ind_lim = _checkLim(ind.data,ind_lim) + dep_lim = _checkLim(dep.data,dep_lim) + + # Mask data + mask = ind.data.mask + dep.data.mask + if region is not None: mask += r.getMask(region,ind) + x = ind.data[mask==False].flatten() + y = dep.data[mask==False].flatten() + + # Compute normalized 2D distribution + dist,xedges,yedges = np.histogram2d(x,y, + bins = [nbin,nbin], + range = [ind_lim,dep_lim]) + dist = np.ma.masked_values(dist.T,0).astype(float) + dist /= dist.sum() + + # Compute the functional response + which_bin = np.digitize(x,xedges).clip(1,xedges.size-1)-1 + mean = np.ma.zeros(xedges.size-1) + std = np.ma.zeros(xedges.size-1) + cnt = np.ma.zeros(xedges.size-1) + np.seterr(under='ignore') + for i in range(mean.size): + yi = y[which_bin==i] + cnt [i] = yi.size + mean[i] = yi.mean() + std [i] = yi.std() + mean = np.ma.masked_array(mean,mask = (cnt/cnt.sum()) < eps) + std = np.ma.masked_array( std,mask = (cnt/cnt.sum()) < eps) + np.seterr(under='warn') + return dist,xedges,yedges,mean,std + + def _scoreDistribution(ref,com): + mask = ref.mask + com.mask + ref = np.ma.masked_array(ref.data,mask=mask).compressed() + com = np.ma.masked_array(com.data,mask=mask).compressed() + return np.sqrt(((np.sqrt(ref)-np.sqrt(com))**2).sum())/np.sqrt(2) + + def _scoreFunction(ref,com): + mask = ref.mask + com.mask + ref = np.ma.masked_array(ref.data,mask=mask).compressed() + com = np.ma.masked_array(com.data,mask=mask).compressed() + return np.exp(-np.linalg.norm(ref-com)/np.linalg.norm(ref)) + + def _plotDistribution(dist_list,xedges_list,yedges_list, + xlabel, ylabel, filename): + fig, axes = plt.subplots(len(self.depths[:,0]), 1, + figsize=(3.5, 3 * len(self.depths[:,0])), + sharex = True, sharey = True, tight_layout=True) + for dind, dist, xedges, yedges in \ + zip(range(len(self.depths[:,0])), dist_list, xedges_list, yedges_list): + ax = axes.flat[dind] + pc = ax.pcolormesh(xedges, yedges, dist, + norm = LogNorm(vmin = 1e-4,vmax = 1e-1), + cmap = 'plasma' if 'plasma' in plt.cm.cmap_d else 'summer') + ax.set_xlabel(xlabel,fontsize = 8) + ax.set_ylabel(ylabel,fontsize = 8 if len(ylabel) <= 60 else 6) + ax.set_xlim(xedges[0],xedges[-1]) + ax.set_ylim(yedges[0],yedges[-1]) + ax.set_title(('%.2f-%.2f' % (self.depths[dind,0], self.depths[dind,1])) \ + + self.depths_units) + warnings.filterwarnings(action = 'ignore') + fig.colorbar(pc, cax = fig.add_axes([0.97, 0.1, 0.02, 0.8]), + orientation="vertical",label="Fraction of total datasites") + fig.savefig(filename, bbox_inches = 'tight') + warnings.filterwarnings(action = 'default') + plt.close() + + def _plotDifference(ref_list,com_list,xedges_list,yedges_list,xlabel,ylabel,filename): + fig, axes = plt.subplots(len(self.depths[:,0]), 1, + figsize=(3.5, 3 * len(self.depths[:,0])), + sharex = True, sharey = True, tight_layout=True) + for dind, ref, com, xedges, yedges in \ + zip(range(len(self.depths[:,0])), ref_list, com_list, xedges_list, yedges_list): + ref = np.ma.copy(ref) + com = np.ma.copy(com) + ref.data[np.where(ref.mask)] = 0. + com.data[np.where(com.mask)] = 0. + diff = np.ma.masked_array(com.data-ref.data,mask=ref.mask*com.mask) + lim = np.abs(diff).max() + + ax = axes.flat[dind] + pc = ax.pcolormesh(xedges, yedges, diff, + cmap = 'Spectral_r', vmin = -lim, vmax = +lim) + ax.set_xlabel(xlabel,fontsize = 8) + ax.set_ylabel(ylabel,fontsize = 8 if len(ylabel) <= 60 else 6) + ax.set_xlim(xedges[0],xedges[-1]) + ax.set_ylim(yedges[0],yedges[-1]) + ax.set_title(('%.2f-%.2f' % (self.depths[dind,0], self.depths[dind,1])) \ + + self.depths_units) + warnings.filterwarnings(action = 'ignore') + fig.colorbar(pc,cax = fig.add_axes([0.97, 0.1, 0.02, 0.8]), + orientation="vertical",label="Distribution Difference") + fig.savefig(filename, bbox_inches = 'tight') + warnings.filterwarnings(action = 'default') + plt.close() + + def _plotFunction(ref_mean_list,ref_std_list,com_mean_list,com_std_list, + xedges_list,yedges_list,xlabel,ylabel,color,filename): + fig, axes = plt.subplots(len(self.depths[:,0]), 1, + figsize=(3.5, 3 * len(self.depths[:,0])), + sharex = True, sharey = True, tight_layout=True) + for dind, ref_mean, ref_std, com_mean, com_std, xedges, yedges in \ + zip(range(len(self.depths[:,0])), ref_mean_list, ref_std_list, + com_mean_list, com_std_list, xedges_list, yedges_list): + + xe = 0.5*(xedges[:-1]+xedges[1:]) + delta = 0.1*np.diff(xedges).mean() + + # reference function + ref_x = xe - delta + ref_y = ref_mean + ref_e = ref_std + if not (ref_mean.mask==False).all(): + ind = np.where(ref_mean.mask==False) + ref_x = xe [ind]-delta + ref_y = ref_mean[ind] + ref_e = ref_std [ind] + + # comparison function + com_x = xe + delta + com_y = com_mean + com_e = com_std + if not (com_mean.mask==False).all(): + ind = np.where(com_mean.mask==False) + com_x = xe [ind]-delta + com_y = com_mean[ind] + com_e = com_std [ind] + + ax = axes.flat[dind] + ax.errorbar(ref_x,ref_y,yerr=ref_e,fmt='-o',color='k') + ax.errorbar(com_x,com_y,yerr=com_e,fmt='-o',color=color) + ax.set_xlabel(xlabel,fontsize = 8) + ax.set_ylabel(ylabel,fontsize = 8 if len(ylabel) <= 60 else 6) + ax.set_xlim(xedges[0],xedges[-1]) + ax.set_ylim(yedges[0],yedges[-1]) + ax.set_title(('%.2f-%.2f' % (self.depths[dind,0], self.depths[dind,1])) \ + + self.depths_units) + warnings.filterwarnings(action = 'ignore') + fig.savefig(filename, bbox_inches = 'tight') + warnings.filterwarnings(action = 'default') + plt.close() + + def _composeSubset(a_dep, a_ind): + lat,lon,lat_bnds,lon_bnds = il._composeGrids(a_dep, a_ind) + + lat_min = max(np.min(a_dep.lat), np.min(a_ind.lat)) + lat_max = min(np.max(a_dep.lat), np.max(a_ind.lat)) + temp = (lat >= lat_min) & (lat <= lat_max) + lat = lat[temp] + lat_bnds = lat_bnds[temp, :] + lon_min = max(np.min(a_dep.lon), np.min(a_ind.lon)) + lon_max = min(np.max(a_dep.lon), np.max(a_ind.lon)) + temp = (lon >= lon_min) & (lon <= lon_max) + lon = lon[temp] + lon_bnds = lon_bnds[temp, :] + + new_dep = a_dep.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) + new_ind= a_ind.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) + + return new_dep, new_ind + + # If there are no relationships to analyze, get out of here + if self.relationships is None: return + + # Get the HTML page + page = [page for page in self.layout.pages if "Relationships" in page.name] + if len(page) == 0: return + page = page[0] + + # Try to get the dependent data from the model and obs + try: + ref_dep_list = _retrieveSM(os.path.join(self.output_path,"%s_%s.nc" % (self.name,"Benchmark"))) + com_dep_list = _retrieveSM(os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name ))) + com_dep_list = [_applyRefMask(ref_dep, com_dep) for ref_dep,com_dep in zip(ref_dep_list,com_dep_list)] + dep_name = self.longname.split("/")[0] + dep_min = self.limits["MeanState"]["timeint"]["min"] + dep_max = self.limits["MeanState"]["timeint"]["max"] + except: + return + + with Dataset(os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)), + mode="r+") as results: + + # Grab/create a relationship and scalars group + group = None + if "Relationships" not in results.groups: + group = results.createGroup("Relationships") + else: + group = results.groups["Relationships"] + if "scalars" not in group.groups: + scalars = group.createGroup("scalars") + else: + scalars = group.groups["scalars"] + + # for each relationship... + for c in self.relationships: + # try to get the independent data from the model and obs + try: + ref_ind = _retrieveData(os.path.join(c.output_path,"%s_%s.nc" % (c.name,"Benchmark"))) + com_ind = _retrieveData(os.path.join(c.output_path,"%s_%s.nc" % (c.name,m.name ))) + com_ind = _applyRefMask(ref_ind,com_ind) + ind_name = c.longname.split("/")[0] + if "MeanState" in c.limits.keys(): + ind_min = c.limits["MeanState"]["timeint"]["min"]-1e-12 + ind_max = c.limits["MeanState"]["timeint"]["max"]+1e-12 + else: + ind_min = c.limits["timeint"]["min"]-1e-12 + ind_max = c.limits["timeint"]["max"]+1e-12 + except: + continue + + # Add figures to the html page + page.addFigure(c.longname, + "benchmark_rel_%s" % ind_name, + "Benchmark_RNAME_rel_%s.png" % ind_name, + legend = False, + benchmark = False) + page.addFigure(c.longname, + "rel_%s" % ind_name, + "MNAME_RNAME_rel_%s.png" % ind_name, + legend = False, + benchmark = False) + page.addFigure(c.longname, + "rel_diff_%s" % ind_name, + "MNAME_RNAME_rel_diff_%s.png" % ind_name, + legend = False, + benchmark = False) + page.addFigure(c.longname, + "rel_func_%s" % ind_name, + "MNAME_RNAME_rel_func_%s.png" % ind_name, + legend = False, + benchmark = False) + + # Analysis over regions + lim_dep = [dep_min,dep_max] + lim_ind = [ind_min,ind_max] + longname = c.longname.split('/')[0] + for region in self.regions: + ref_dist_list = [] + ref_xedges_list = [] + ref_yedges_list = [] + ref_mean_list = [] + ref_std_list = [] + + com_dist_list = [] + com_xedges_list = [] + com_yedges_list = [] + com_mean_list = [] + com_std_list = [] + + for dind, ref_dep, com_dep in zip(range(len(ref_dep_list)), ref_dep_list, + com_dep_list): + # Check on data shape + if not np.allclose(ref_dep.data.shape,ref_ind.data.shape): + try: + # try to subset to the overlapping region + REF_dep, REF_ind = _composeSubset(ref_dep, ref_ind) + except: + msg = "[%s][%s] Data size mismatch in relationship: %s %s vs. %s %s" % (self.longname,m.name,dep_name,str(ref_dep.data.shape),ind_name,str(ref_ind.data.shape)) + logger.debug(msg) + raise ValueError + else: + REF_dep, REF_ind = ref_dep, ref_ind + + if not np.allclose(com_dep.data.shape,com_ind.data.shape): + try: + # try to subset to the overlapping region + COM_dep, COM_ind = _composeSubset(com_dep, com_ind) + except: + msg = "[%s][%s] Data size mismatch in relationship: %s %s vs. %s %s" % (self.longname,m.name,dep_name,str(com_dep.data.shape),ind_name,str(com_ind.data.shape)) + logger.debug(msg) + raise ValueError + else: + COM_dep, COM_ind = com_dep, com_ind + + ref_dist, ref_xedges, ref_yedges, ref_mean, ref_std = _buildDistributionResponse(REF_ind,REF_dep,ind_lim=lim_ind,dep_lim=lim_dep,region=region) + com_dist, com_xedges, com_yedges, com_mean, com_std = _buildDistributionResponse(COM_ind,COM_dep,ind_lim=lim_ind,dep_lim=lim_dep,region=region) + + ref_dist_list.append(ref_dist) + ref_xedges_list.append(ref_xedges) + ref_yedges_list.append(ref_yedges) + ref_mean_list.append(ref_mean) + ref_std_list.append(ref_std) + + com_dist_list.append(com_dist) + com_xedges_list.append(com_xedges) + com_yedges_list.append(com_yedges) + com_mean_list.append(com_mean) + com_std_list.append(com_std) + + # Make the plots + _plotDistribution(ref_dist_list,ref_xedges_list,ref_yedges_list, + "%s/%s, %s" % (ind_name, c.name,post.UnitStringToMatplotlib(ref_ind.unit)), + "%s/%s, %s" % (dep_name,self.name,post.UnitStringToMatplotlib(ref_dep.unit)), + os.path.join(self.output_path,"%s_%s_rel_%s.png" % ("Benchmark",region,ind_name))) + _plotDistribution(com_dist_list,com_xedges_list,com_yedges_list, + "%s/%s, %s" % (ind_name,m.name,post.UnitStringToMatplotlib(com_ind.unit)), + "%s/%s, %s" % (dep_name,m.name,post.UnitStringToMatplotlib(com_dep.unit)), + os.path.join(self.output_path,"%s_%s_rel_%s.png" % (m.name,region,ind_name))) + _plotDifference (ref_dist_list,com_dist_list,ref_xedges_list,ref_yedges_list, + "%s/%s, %s" % (ind_name,m.name,post.UnitStringToMatplotlib(com_ind.unit)), + "%s/%s, %s" % (dep_name,m.name,post.UnitStringToMatplotlib(com_dep.unit)), + os.path.join(self.output_path,"%s_%s_rel_diff_%s.png" % (m.name,region,ind_name))) + _plotFunction(ref_mean_list,ref_std_list,com_mean_list, + com_std_list,ref_xedges_list,ref_yedges_list, + "%s, %s" % (ind_name,post.UnitStringToMatplotlib(com_ind.unit)), + "%s, %s" % (dep_name,post.UnitStringToMatplotlib(com_dep.unit)), + m.color, + os.path.join(self.output_path,"%s_%s_rel_func_%s.png" % (m.name,region,ind_name))) + + # Score the distribution + score_list = [] + for ref_dist, com_dist in zip(ref_dist_list, com_dist_list): + score = _scoreDistribution(ref_dist,com_dist) + score_list.append(score) + score = np.sum(np.array(score_list)*(self.depths[:,1] - self.depths[:,0])) / \ + (self.depths[-1,1] - self.depths[0,0]) + sname = "%s Hellinger Distance %s" % (longname,region) + if sname in scalars.variables: + scalars.variables[sname][0] = score + else: + Variable(name = sname, + unit = "1", + data = score).toNetCDF4(results,group="Relationships") + + # Score the functional response + score_list = [] + for ref_mean, com_mean in zip(ref_mean_list, com_mean_list): + score = _scoreFunction(ref_mean,com_mean) + score_list.append(score) + score = np.sum(np.array(score_list)*(self.depths[:,1] - self.depths[:,0])) / \ + (self.depths[-1,1] - self.depths[0,0]) + sname = "%s RMSE Score %s" % (longname,region) + if sname in scalars.variables: + scalars.variables[sname][0] = score + else: + Variable(name = sname, + unit = "1", + data = score).toNetCDF4(results,group="Relationships") + + + def _sensitivity(self, m): + # If there are no sensitivities to analyze, get out of here + if self.sensitivities is None: return + + def _retrieveCorr(longname, filename): + longname = longname.split('/')[0] + with Dataset(filename,mode="r") as dset: + corr_list_orig = [] + corr_list_itrp = [] + pval_list_orig = [] + pval_list_itrp = [] + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind,1] + zstr = '%.2f-%.2f %s' % (z0, zf, self.depths_units) + + key = [v for v in dset.groups["Sensitivities"].variables.keys() \ + if "partial_correlation_" in v and zstr in v and \ + longname in v and 'original grids' in v] + if len(key) > 0: + corr_list_orig.append(Variable(filename = filename, + groupname = "Sensitivities", + variable_name = key[0])) + key2 = [v for v in dset.groups["Sensitivities"].variables.keys() \ + if "partial_correlation_" in v and zstr in v and \ + longname in v and 'common grids' in v] + if len(key2) > 0: + corr_list_itrp.append(Variable(filename = filename, + groupname = "Sensitivities", + variable_name = key2[0])) + key3 = [v for v in dset.groups["Sensitivities"].variables.keys() \ + if "partial_pvalue_" in v and zstr in v and \ + longname in v and 'original grids' in v] + if len(key3) > 0: + pval_list_orig.append(Variable(filename = filename, + groupname = "Sensitivities", + variable_name = key3[0])) + key4 = [v for v in dset.groups["Sensitivities"].variables.keys() \ + if "partial_pvalue_" in v and zstr in v and \ + longname in v and 'common grids' in v] + if len(key4) > 0: + pval_list_itrp.append(Variable(filename = filename, + groupname = "Sensitivities", + variable_name = key4[0])) + return corr_list_orig, corr_list_itrp, pval_list_orig, pval_list_itrp + + def _retrieveBias(longname, filename): + longname = longname.split('/')[0] + with Dataset(filename,mode="r") as dset: + key_list = [] + key2_list = [] + + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind,1] + zstr = '%.2f-%.2f %s' % (z0, zf, self.depths_units) + + key = [v for v in dset.groups["Sensitivities"].variables.keys() \ + if "sensitivity_bias_map_" in v and zstr in v and \ + longname in v] + key2 = [v for v in dset.groups["Sensitivities"].variables.keys() \ + if "sensitivity_biasscore_map_" in v and zstr in v and \ + longname in v] + if len(key) > 0: + key_list.append(key[0]) + if len(key2) > 0: + key2_list.append(key2[0]) + return [Variable(filename = filename, + groupname = "Sensitivities", + variable_name = kk) for kk in key_list], \ + [Variable(filename = filename, + groupname = "Sensitivities", + variable_name = kk) for kk in key2_list] + + # Get the HTML page + page = [page for page in self.layout.pages if "Sensitivities" in page.name] + if len(page) == 0: return + page = page[0] + + with Dataset(os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)), + mode="r+") as results: + + # Grab/create a sensitivity and scalars group + group = None + if "Sensitivities" not in results.groups: + return + else: + group = results.groups["Sensitivities"] + if "scalars" not in group.groups: + scalars = group.createGroup("scalars") + else: + scalars = group.groups["scalars"] + + # for each sensitivity relationship... + for c in self.sensitivities: + # Get the sensitivity map from the model and obs + try: + ref_corr_list, REF_corr_list, ref_corr_p_list, REF_corr_p_list = _retrieveCorr(c.longname, os.path.join(self.output_path, "%s_%s.nc" % (self.name,"Benchmark"))) + com_corr_list, COM_corr_list, com_corr_p_list, COM_corr_p_list = _retrieveCorr(c.longname, os.path.join(self.output_path, "%s_%s.nc" % (self.name,m.name))) + com_bias_map_list, com_biasscore_map_list = _retrieveBias(c.longname, os.path.join(self.output_path, "%s_%s.nc" % (self.name,m.name))) + + com_name = c.longname.split('/')[0] + + ref_min = np.min([ref_corr.data.min() for ref_corr in ref_corr_list]) + ref_max = np.max([ref_corr.data.max() for ref_corr in ref_corr_list]) + com_min = np.min([com_corr.data.min() for com_corr in com_corr_list]) + com_max = np.max([com_corr.data.max() for com_corr in com_corr_list]) + diff_min = np.min([com_bias_map.data.min() \ + for com_bias_map in com_bias_map_list]) + diff_max = np.max([com_bias_map.data.max() \ + for com_bias_map in com_bias_map_list]) + except: + continue + + ref_min = max(min(ref_min, com_min), -1) + ref_max = min(max(ref_max, com_max), 1) + com_min = ref_min + com_max = ref_max + diff_min = max(- max(abs(diff_min), abs(diff_max)), -1) + diff_max = min(- diff_min, 1) + + # Add figures to the html page + page.addFigure(c.longname, + "benchmark_sens_%s" % com_name, + "Benchmark_RNAME_sens_%s.png" % com_name, + side = "BENCHMARK SENSITIVITY", + legend = True, + benchmark = False) + page.addFigure(c.longname, + "sens_%s" % com_name, + "MNAME_RNAME_sens_%s.png" % com_name, + side = "MODEL SENSITIVITY", + legend = True, + benchmark = False) + page.addFigure(c.longname, + "sens_diff_%s" % com_name, + "MNAME_RNAME_sens_diff_%s.png" % com_name, + side = "BIAS IN SENSITIVITY", + legend = True, + benchmark = False) + + r = Regions() + for region in self.regions: + nax = self.depths.shape[0] + fig1 = plt.figure() + fig2 = plt.figure() + fig3 = plt.figure() + + for dind, z0 in enumerate(self.depths[:,0]): + zf = self.depths[dind,1] + zstr = '%.2f-%.2f %s' % (z0, zf, self.depths_units) + + # Make the plots + ax1 = ref_corr_list[dind].plot(None, fig1, nax, region = region, + vmin = ref_min, vmax = ref_max, + cmap = 'jet') + # ---- mask the p-value + ref_temp = deepcopy(ref_corr_p_list[dind]) + ref_temp.data.mask += r.getMask(region, ref_temp) + lat = np.hstack([ref_temp.lat_bnds[:,0],ref_temp.lat_bnds[-1,-1]]) + lon = np.hstack([ref_temp.lon_bnds[:,0],ref_temp.lon_bnds[-1,-1]]) + ax1.pcolormesh(lon, lat, + np.ma.masked_array(ref_temp.data > 0.05, + ref_temp.data <= 0.05), + cmap = 'Greys', vmin = 0.5, vmax = 1.5, alpha = 0.3, + transform = ccrs.PlateCarree()) + ax1.set_title(zstr) + + ax2 = com_corr_list[dind].plot(None, fig2, nax, region = region, + vmin = com_min, vmax = com_max, + cmap = 'jet') + # ---- mask the p-value + com_temp = deepcopy(com_corr_p_list[dind]) + com_temp.data.mask += r.getMask(region, com_temp) + lat = np.hstack([com_temp.lat_bnds[:,0],com_temp.lat_bnds[-1,-1]]) + lon = np.hstack([com_temp.lon_bnds[:,0],com_temp.lon_bnds[-1,-1]]) + ax2.pcolormesh(lon, lat, + np.ma.masked_array(com_temp.data > 0.05, + com_temp.data <= 0.05), + cmap = 'Greys', vmin = 0.5, vmax = 1.5, alpha = 0.3, + transform = ccrs.PlateCarree()) + ax2.set_title(zstr) + + ax3 = com_bias_map_list[dind].plot(None, fig3, nax, region = region, + vmin = diff_min, vmax = diff_max, + cmap = 'RdBu') + ax3.set_title(zstr) + + del ref_temp, com_temp + + fig1.savefig(os.path.join(self.output_path, + "Benchmark_%s_sens_%s.png" % (region, com_name))) + fig2.savefig(os.path.join(self.output_path, + "%s_%s_sens_%s.png" % (m.name, region, com_name))) + fig3.savefig(os.path.join(self.output_path, + "%s_%s_sens_diff_%s.png" % (m.name, region, + com_name))) + plt.close(fig1) + plt.close(fig2) + plt.close(fig3) + + fig,ax = plt.subplots(figsize=(6.8,1.0),tight_layout=True) + post.ColorBar(ax, vmin = ref_min, vmax = ref_max, + cmap = "jet", label = "1") + fig.savefig(os.path.join(self.output_path, + "legend_sens_%s.png" % com_name)) + plt.close() + + fig,ax = plt.subplots(figsize=(6.8,1.0),tight_layout=True) + post.ColorBar(ax, vmin = diff_min, vmax = diff_max, + cmap = "RdBu", label = "1") + fig.savefig(os.path.join(self.output_path, + "legend_sens_diff_%s.png" % com_name)) + plt.close() diff --git a/src/ILAMB/Confrontation.py b/src/ILAMB/Confrontation.py index 481f89d..eefdd3b 100644 --- a/src/ILAMB/Confrontation.py +++ b/src/ILAMB/Confrontation.py @@ -10,7 +10,6 @@ from mpl_toolkits.axes_grid1 import make_axes_locatable from mpi4py import MPI from sympy import sympify -import cftime as cf import logging logger = logging.getLogger("%i" % MPI.COMM_WORLD.rank) @@ -124,7 +123,6 @@ class Confrontation(object): change the types of plot limits, one of ['minmax', '99per' (default)] """ def __init__(self,**keywords): - # Initialize self.master = True self.name = keywords.get("name",None) @@ -132,7 +130,7 @@ def __init__(self,**keywords): self.variable = keywords.get("variable",None) self.output_path = keywords.get("output_path","./") self.alternate_vars = keywords.get("alternate_vars",[]) - self.derived = keywords.get("derived",None) + self.derived = keywords.get("derived",[]) self.regions = list(keywords.get("regions",["global"])) self.data = None self.cmap = keywords.get("cmap","jet") @@ -144,13 +142,15 @@ def __init__(self,**keywords): self.plot_unit = keywords.get("plot_unit",None) self.space_mean = keywords.get("space_mean",True) self.relationships = keywords.get("relationships",None) + self.sensitivities = keywords.get("sensitivities",None) # YW self.keywords = keywords self.extents = np.asarray([[-90.,+90.],[-180.,+180.]]) self.study_limits = [] # Make sure the source data exists - - if not os.path.isfile(self.source): + try: + os.stat(self.source) + except: msg = "\n\nI am looking for data for the %s confrontation here\n\n" % self.name msg += "%s\n\nbut I cannot find it. " % self.source msg += "Did you download the data? Have you set the ILAMB_ROOT envronment variable?\n" @@ -160,10 +160,20 @@ def __init__(self,**keywords): pages = [] # Mean State page - pages.append(post.HtmlPage("MeanState","Mean State")) + pages.append(post.HtmlPage("MeanState", "Mean State")) pages[-1].setHeader("CNAME / RNAME / MNAME") pages[-1].setSections(["Temporally integrated period mean", - "Spatially integrated regional mean"]) + "Spatially integrated regional mean"]) + + # Trend State page + skip_trend = self.keywords.get("skip_trend", True) + if type(skip_trend) == type(""): + skip_trend = (skip_trend.lower() == "true") + if not skip_trend: + pages.append(post.HtmlPage("TrendState", "Trend State")) + pages[-1].setHeader("CNAME / RNAME / MNAME") + pages[-1].setSections(["Temporally integrated period trend", + "Spatially integrated regional trend"]) # Datasites page self.hasSites = False @@ -178,14 +188,14 @@ def __init__(self,**keywords): self.lbls = ["site%d" % s for s in range(len(dataset.dimensions["data"]))] if "time" in dataset.dimensions: t = dataset.variables["time"] - tdata = t[[0,-1]] if "bounds" in t.ncattrs(): - tdata = dataset.variables[t.bounds] - tdata = [tdata[0,0],tdata[-1,1]] - tdata = cf.num2date(tdata,units=t.units,calendar=t.calendar) - y0 = tdata[0].year - yf = tdata[1].year - + t = dataset.variables[t.bounds][...] + y0 = int(t[ 0,0]/365.+1850.) + yf = int(t[-1,1]/365.+1850.)-1 + else: + y0 = int(round(t[ 0]/365.)+1850.) + yf = int(round(t[-1]/365.)+1850.)-1 + if self.hasSites: pages.append(post.HtmlSitePlotsPage("SitePlots","Site Plots")) pages[-1].setHeader("CNAME / RNAME / MNAME") @@ -206,10 +216,18 @@ def __init__(self,**keywords): pages.append(post.HtmlPage("Relationships","Relationships")) pages[-1].setHeader("CNAME / RNAME / MNAME") pages[-1].setSections(list(self.relationships)) + + # Sensitivities page, YW + if self.sensitivities is not None: + pages.append(post.HtmlPage('Sensitivities', 'Partial Correlation Relationships')) + pages[-1].setHeader('CNAME / RNAME / MNAME') + pages[-1].setSections(list(self.sensitivities)) + pages.append(post.HtmlAllModelsPage("AllModels","All Models")) pages[-1].setHeader("CNAME / RNAME / MNAME") pages[-1].setSections([]) pages[-1].setRegions(self.regions) + pages.append(post.HtmlPage("DataInformation","Data Information")) pages[-1].setSections([]) pages[-1].text = "\n" @@ -245,8 +263,13 @@ def _attribute_sort(attr): "Spatial Distribution Score" :1.} def requires(self): - if self.derived is not None: - ands = [arg.name for arg in sympify(self.derived).free_symbols] + if (self.derived is not None) and (len(self.derived) > 0): + if isinstance(self.derived, list): + ands = [] + for d in self.derived: + ands = ands + [arg.name for arg in sympify(d).free_symbols] + else: + ands = [arg.name for arg in sympify(self.derived).free_symbols] ors = [] else: ands = [] @@ -299,6 +322,7 @@ def stageData(self,m): final_time = obs.time_bnds[-1,1], lats = None if obs.spatial else obs.lat, lons = None if obs.spatial else obs.lon) + obs,mod = il.MakeComparable(obs,mod, mask_ref = True, clip_ref = True, @@ -363,9 +387,8 @@ def confront(self,m): # Read in some options and run the mean state analysis mass_weighting = self.keywords.get("mass_weighting",False) skip_rmse = self.keywords.get("skip_rmse" ,False) - skip_iav = self.keywords.get("skip_iav" ,True ) + skip_iav = self.keywords.get("skip_iav" ,False) skip_cycle = self.keywords.get("skip_cycle" ,False) - rmse_score_basis = self.keywords.get("rmse_score_basis","cycle") if obs.spatial: il.AnalysisMeanStateSpace(obs,mod,dataset = fcm.mod_dset, regions = self.regions, @@ -376,8 +399,7 @@ def confront(self,m): skip_rmse = skip_rmse, skip_iav = skip_iav, skip_cycle = skip_cycle, - mass_weighting = mass_weighting, - rmse_score_basis = rmse_score_basis) + mass_weighting = mass_weighting) else: il.AnalysisMeanStateSites(obs,mod,dataset = fcm.mod_dset, regions = self.regions, @@ -403,39 +425,37 @@ def determinePlotLimits(self): called before calling any plotting routine. """ + max_str = "up99" + min_str = "dn99" + if self.keywords.get("limit_type","99per") == "minmax": + max_str = "max" + min_str = "min" - filelist = glob.glob(os.path.join(self.output_path,"*.nc")) - benchmark_file = [f for f in filelist if "Benchmark" in f] - - # There may be regions in which there is no benchmark data and - # these should be weeded out. If the plotting phase occurs in - # the same run as the analysis phase, this is not needed. - if benchmark_file: - with Dataset(benchmark_file[0]) as dset: Vs = getVariableList(dset.groups["MeanState"]) - Vs = [v for v in Vs if "timeint" in v] - if Vs: - self.pruneRegions(Variable(filename = benchmark_file[0], - variable_name = Vs[0], - groupname = "MeanState")) - # Determine the min/max of variables over all models limits = {} - for fname in filelist: + prune = False + for fname in glob.glob(os.path.join(self.output_path,"*.nc")): with Dataset(fname) as dataset: if "MeanState" not in dataset.groups: continue - variables = getVariableList(dataset.groups["MeanState"]) + group = dataset.groups["MeanState"] + variables = [v for v in group.variables.keys() if v not in group.dimensions.keys()] for vname in variables: - var = dataset.groups["MeanState"].variables[vname] + var = group.variables[vname] + if not "_" in vname: + pname = vname + else: + pname = vname.split("_")[1] # YW: to match the prefixed part in ilamblib.py + region = vname.split("_")[-1] if var[...].size <= 1: continue - pname = vname.split("_")[0] - - """If the plot is a time series, it has been averaged over regions - already and we need a separate dictionary for the - region as well. These can be based on the - percentiles from the attributes of the netCDF - variables.""" - if pname in time_opts: - region = vname.split("_")[-1] + if pname in space_opts: + if pname not in limits: + limits[pname] = {} + limits[pname]["min"] = +1e20 + limits[pname]["max"] = -1e20 + limits[pname]["unit"] = post.UnitStringToMatplotlib(var.getncattr("units")) + limits[pname]["min"] = min(limits[pname]["min"],var.getncattr(min_str)) + limits[pname]["max"] = max(limits[pname]["max"],var.getncattr(max_str)) + elif pname in time_opts: if pname not in limits: limits[pname] = {} if region not in limits[pname]: limits[pname][region] = {} @@ -444,26 +464,12 @@ def determinePlotLimits(self): limits[pname][region]["unit"] = post.UnitStringToMatplotlib(var.getncattr("units")) limits[pname][region]["min"] = min(limits[pname][region]["min"],var.getncattr("min")) limits[pname][region]["max"] = max(limits[pname][region]["max"],var.getncattr("max")) + if not prune and "Benchmark" in fname and pname == "timeint": + prune = True + self.pruneRegions(Variable(filename = fname, + variable_name = vname, + groupname = "MeanState")) - else: - """If the plot is spatial, we want to set the limits as a percentile - of all data across models and the - benchmark. So here we load the data up and in - another pass will compute the percentiles.""" - if pname not in limits: - limits[pname] = {} - limits[pname]["min"] = +1e20 - limits[pname]["max"] = -1e20 - limits[pname]["unit"] = post.UnitStringToMatplotlib(var.getncattr("units")) - limits[pname]["data"] = var[...].compressed() - else: - limits[pname]["data"] = np.hstack([limits[pname]["data"],var[...].compressed()]) - - # For those limits which we built up data across all models, compute the percentiles - for pname in limits.keys(): - if "data" in limits[pname]: - limits[pname]["min"],limits[pname]["max"] = np.percentile(limits[pname]["data"],[1,99]) - # Second pass to plot legends (FIX: only for master?) for pname in limits.keys(): @@ -482,12 +488,10 @@ def determinePlotLimits(self): if "score" in pname: limits[pname]["min"] = 0 limits[pname]["max"] = 1 - + limits[pname]["cmap"] = opts["cmap"] if limits[pname]["cmap"] == "choose": limits[pname]["cmap"] = self.cmap - if "score" in pname: - limits[pname]["cmap"] = plt.cm.get_cmap(limits[pname]["cmap"],3) - + # Plot a legend for each key if opts["haslegend"]: fig,ax = plt.subplots(figsize=(6.8,1.0),tight_layout=True) @@ -520,7 +524,6 @@ def determinePlotLimits(self): limits[g]["ymin"] = min(limits[g]["ymin"],grp.variables["dep_bnd"][ 0, 0]) limits[g]["ymax"] = max(limits[g]["ymax"],grp.variables["dep_bnd"][-1,-1]) - self.limits = limits def computeOverallScore(self,m): @@ -718,71 +721,68 @@ def modelPlots(self,m): variables = getVariableList(group) color = dataset.getncattr("color") for vname in variables: + # is this a variable we need to plot? + if not "_" in vname: + pname = vname + else: + pname = vname.split("_")[1] # YW: to deal with the prefixed name part in ilamblib + if group.variables[vname][...].size <= 1: continue + var = Variable(filename=fname,groupname="MeanState",variable_name=vname) + + if (var.spatial or (var.ndata is not None)) and not var.temporal: + + # grab plotting options + if pname not in self.limits.keys(): continue + opts = space_opts[pname] + + # add to html layout + page.addFigure(opts["section"], + pname, + opts["pattern"], + side = opts["sidelbl"], + legend = opts["haslegend"]) + + # plot variable + for region in self.regions: + ax = var.plot(None, + region = region, + vmin = self.limits[pname]["min"], + vmax = self.limits[pname]["max"], + cmap = self.limits[pname]["cmap"]) + fig = ax.get_figure() + fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (m.name,region,pname))) + plt.close() + + # Jumping through hoops to get the benchmark plotted and in the html output + if self.master and (pname == "timeint" or pname == "phase" or pname == "iav"): - # is this a variable we need to plot? - pname = vname.split("_")[0] - if group.variables[vname][...].size <= 1: continue - var = Variable(filename=fname,groupname="MeanState",variable_name=vname) - - if (var.spatial or (var.ndata is not None)) and not var.temporal: - - # grab plotting options - if pname not in self.limits.keys(): continue - if pname not in space_opts: continue opts = space_opts[pname] # add to html layout page.addFigure(opts["section"], - pname, - opts["pattern"], - side = opts["sidelbl"], - legend = opts["haslegend"]) + "benchmark_%s" % pname, + opts["pattern"].replace("MNAME","Benchmark"), + side = opts["sidelbl"].replace("MODEL","BENCHMARK"), + legend = True) # plot variable + obs = Variable(filename=bname,groupname="MeanState",variable_name=vname) for region in self.regions: - ax = var.plot(None, - region = region, - vmin = self.limits[pname]["min"], - vmax = self.limits[pname]["max"], - cmap = self.limits[pname]["cmap"]) + ax = obs.plot(None, + region = region, + vmin = self.limits[pname]["min"], + vmax = self.limits[pname]["max"], + cmap = self.limits[pname]["cmap"]) fig = ax.get_figure() - fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (m.name,region,pname))) + fig.savefig(os.path.join(self.output_path,"Benchmark_%s_%s.png" % (region,pname))) plt.close() - # Jumping through hoops to get the benchmark plotted and in the html output - if self.master and (pname == "timeint" or pname == "phase" or pname == "iav"): - - opts = space_opts[pname] - - # add to html layout - page.addFigure(opts["section"], - "benchmark_%s" % pname, - opts["pattern"].replace("MNAME","Benchmark"), - side = opts["sidelbl"].replace("MODEL","BENCHMARK"), - legend = True) - - # plot variable - obs = Variable(filename=bname,groupname="MeanState",variable_name=vname) - for region in self.regions: - ax = obs.plot(None, - region = region, - vmin = self.limits[pname]["min"], - vmax = self.limits[pname]["max"], - cmap = self.limits[pname]["cmap"]) - fig = ax.get_figure() - fig.savefig(os.path.join(self.output_path,"Benchmark_%s_%s.png" % (region,pname))) - plt.close() - if not (var.spatial or (var.ndata is not None)) and var.temporal: # grab the benchmark dataset to plot along with - try: - obs = Variable(filename=bname,groupname="MeanState",variable_name=vname).convert(var.unit) - except: - continue + obs = Variable(filename=bname,groupname="MeanState",variable_name=vname).convert(var.unit) # grab plotting options - if pname not in time_opts: continue opts = time_opts[pname] # add to html layout @@ -883,10 +883,10 @@ def generateHtml(self): for vname in grp.variables.keys(): found = False for region in self.regions: - if vname.endswith(" %s" % region): + if region in vname: found = True var = grp.variables[vname] - name = ''.join(vname.rsplit(" %s" % region,1)) + name = vname.replace(region,"") metrics[mname][region][name] = Variable(name = name, unit = var.units, data = var[...]) @@ -996,8 +996,9 @@ def _scoreFunction(ref,com): def _plotDistribution(dist,xedges,yedges,xlabel,ylabel,filename): fig,ax = plt.subplots(figsize=(6,5.25),tight_layout=True) pc = ax.pcolormesh(xedges, yedges, dist, - norm = LogNorm(vmin = 1e-4,vmax = 1e-1), - cmap = 'plasma' if 'plasma' in plt.cm.cmap_d else 'summer') + norm = LogNorm(), + cmap = 'plasma' if 'plasma' in plt.cm.cmap_d else 'summer', + vmin = 1e-4, vmax = 1e-1) div = make_axes_locatable(ax) fig.colorbar(pc,cax=div.append_axes("right",size="5%",pad=0.05), orientation="vertical",label="Fraction of total datasites") diff --git a/src/ILAMB/ModelResult.py b/src/ILAMB/ModelResult.py index 1b30f5a..5669e45 100644 --- a/src/ILAMB/ModelResult.py +++ b/src/ILAMB/ModelResult.py @@ -101,6 +101,7 @@ def _get(key,dset): variables = {} names = {} paths = self.paths if self.paths is not None else [self.path] + for path in paths: for subdir, dirs, files in os.walk(path,followlinks=True): for fileName in files: @@ -119,6 +120,7 @@ def _get(key,dset): # populate dictionary for which variables are in which files for key in dataset.variables.keys(): + if key not in variables: variables[key] = [] variables[key].append(pathName) @@ -170,7 +172,7 @@ def _getGridInformation(self): """ def _shiftLon(lon): return (lon<=180)*lon + (lon>180)*(lon-360) + (lon<-180)*360 - + # Are there cell areas associated with this model? area_name = None area_name = "area" if "area" in self.variables.keys() else area_name @@ -186,13 +188,13 @@ def _shiftLon(lon): with Dataset(self.variables["lat_bnds"][0]) as f: x = f.variables["lat_bnds"][...] with Dataset(self.variables["lon_bnds"][0]) as f: - y = f.variables["lon_bnds"][...] + y = _shiftLon(f.variables["lon_bnds"][...]) s = y.mean(axis=1).argmin() - y = np.roll(_shiftLon(y),-s,axis=0) + y = np.roll(y,-s,axis=0) if y[ 0,0] > y[ 0,1]: y[ 0,0] = -180. if y[-1,0] > y[-1,1]: y[-1,1] = +180. self.cell_areas = il.CellAreas(None,None,lat_bnds=x,lon_bnds=y) - + # Now we do the same for land fractions frac_name = None frac_name = "landfrac" if "landfrac" in self.variables.keys() else frac_name @@ -214,7 +216,7 @@ def _shiftLon(lon): self.land_area = np.ma.sum(self.land_areas) return - def extractTimeSeries(self,variable,lats=None,lons=None,alt_vars=[],initial_time=-1e20,final_time=1e20,output_unit="",expression=None,convert_calendar=True): + def extractTimeSeries(self,variable,lats=None,lons=None,alt_vars=[],initial_time=-1e20,final_time=1e20,output_unit="",expression=[],convert_calendar=True,initial_depth=None,final_depth=None): """Extracts a time series of the given variable from the model. Parameters @@ -234,8 +236,10 @@ def extractTimeSeries(self,variable,lats=None,lons=None,alt_vars=[],initial_time a 1D array of latitude locations at which to extract information lons : numpy.ndarray, optional a 1D array of longitude locations at which to extract information - expression : str, optional + expression : str or list of str, optional an algebraic expression describing how to combine model outputs + initial_depth, final_depth: float, optional + include model results between these depths # YW Returns ------- @@ -244,6 +248,7 @@ def extractTimeSeries(self,variable,lats=None,lons=None,alt_vars=[],initial_time """ # prepend the target variable to the list of possible variables + altvars = list(alt_vars) altvars.insert(0,variable) @@ -258,7 +263,9 @@ def extractTimeSeries(self,variable,lats=None,lons=None,alt_vars=[],initial_time tmin = 1e20 tmax = -1e20 same_site_epsilon = 0.5 + for v in altvars: + if v not in self.variables: continue for ifile,pathName in enumerate(self.variables[v]): if _skipFile(pathName,altvars,lats,lons,same_site_epsilon): continue @@ -268,12 +275,17 @@ def extractTimeSeries(self,variable,lats=None,lons=None,alt_vars=[],initial_time area = self.land_areas, convert_calendar = convert_calendar, t0 = initial_time - self.shift, - tf = final_time - self.shift) + tf = final_time - self.shift, + z0 = initial_depth, + zf = final_depth) + if var.time is None: continue tmin = min(tmin,var.time_bnds.min()) tmax = max(tmax,var.time_bnds.max()) + if ((var.time_bnds.max() < initial_time - self.shift) or (var.time_bnds.min() > final_time - self.shift)): continue + if lats is not None and var.ndata: r = np.sqrt((lats[:,np.newaxis]-var.lat)**2 + (lons[:,np.newaxis]-var.lon)**2) @@ -289,19 +301,41 @@ def extractTimeSeries(self,variable,lats=None,lons=None,alt_vars=[],initial_time if lats is not None and var.spatial: var = var.extractDatasites(lats,lons) var.time += self.shift var.time_bnds += self.shift + V.append(var) + if len(V) > 0: break # If we didn't find any files, try to put together the # variable from a given expression if len(V) == 0: - if expression is not None: - v = self.derivedVariable(variable, - expression, - lats = lats, - lons = lons, - initial_time = initial_time, - final_time = final_time) + if (expression is not None) and (len(expression) > 0): + if isinstance(expression, list): + for e in expression: + v = None + try: + v = self.derivedVariable(variable, + e, + lats = lats, + lons = lons, + initial_time = initial_time, + final_time = final_time) + except: + pass + if v is not None: + break + if v is None: + tstr = "" + if tmin < tmax: tstr = " in the given time frame, tinput = [%.1f,%.1f], tmodel = [%.1f,%.1f]" % (initial_time,final_time,tmin+self.shift,tmax+self.shift) + logger.debug("[%s] Could not find [%s] in the model results%s" % (self.name,",".join(altvars),tstr)) + raise il.VarNotInModel() + else: + v = self.derivedVariable(variable, + expression, + lats = lats, + lons = lons, + initial_time = initial_time, + final_time = final_time) else: tstr = "" if tmin < tmax: tstr = " in the given time frame, tinput = [%.1f,%.1f], tmodel = [%.1f,%.1f]" % (initial_time,final_time,tmin+self.shift,tmax+self.shift) @@ -310,7 +344,6 @@ def extractTimeSeries(self,variable,lats=None,lons=None,alt_vars=[],initial_time else: v = il.CombineVariables(V) - return v def derivedVariable(self,variable_name,expression,lats=None,lons=None,initial_time=-1e20,final_time=1e20,convert_calendar=True): @@ -409,6 +442,7 @@ def derivedVariable(self,variable_name,expression,lats=None,lons=None,initial_ti np.seterr(divide='ignore',invalid='ignore') result,unit = il.SympifyWithArgsUnits(expression,args,units) + result = result.astype(var.data.data.dtype) np.seterr(divide='raise',invalid='raise') mask += np.isnan(result) result = np.ma.masked_array(np.nan_to_num(result),mask=mask) diff --git a/src/ILAMB/Post.py b/src/ILAMB/Post.py index b58e0c9..dd2dde2 100644 --- a/src/ILAMB/Post.py +++ b/src/ILAMB/Post.py @@ -73,7 +73,7 @@ def ColorBar(ax,**keywords): if ticks is not None: cb.set_ticks(ticks) if ticklabels is not None: cb.set_ticklabels(ticklabels) -def TaylorDiagram(stddev,corrcoef,refstd,fig,colors,normalize=True): +def TaylorDiagram(stddev,corrcoef,refstd,fig,colors,normalize=True,position=111): """Plot a Taylor diagram. This is adapted from the code by Yannick Copin found here: @@ -121,7 +121,7 @@ def TaylorDiagram(stddev,corrcoef,refstd,fig,colors,normalize=True): extremes=(0,np.pi/2,smin,smax), grid_locator1=gl1, tick_formatter1=tf1) - ax = FA.FloatingSubplot(fig, 111, grid_helper=ghelper) + ax = FA.FloatingSubplot(fig, position, grid_helper=ghelper) fig.add_subplot(ax) # adjust axes @@ -141,26 +141,26 @@ def TaylorDiagram(stddev,corrcoef,refstd,fig,colors,normalize=True): ax.axis["bottom"].set_visible(False) ax.grid(True) - ax = ax.get_aux_axes(tr) + aux = ax.get_aux_axes(tr) # Plot data corrcoef = corrcoef.clip(-1,1) for i in range(len(corrcoef)): - ax.plot(np.arccos(corrcoef[i]),stddev[i],'o',color=colors[i],mew=0,ms=8) + aux.plot(np.arccos(corrcoef[i]),stddev[i],'o',color=colors[i],mew=0,ms=8) # Add reference point and stddev contour - l, = ax.plot([0],refstd,'k*',ms=12,mew=0) + l, = aux.plot([0],refstd,'k*',ms=12,mew=0) t = np.linspace(0, np.pi/2) r = np.zeros_like(t) + refstd - ax.plot(t,r, 'k--') + aux.plot(t,r, 'k--') # centralized rms contours rs,ts = np.meshgrid(np.linspace(smin,smax), np.linspace(0,np.pi/2)) rms = np.sqrt(refstd**2 + rs**2 - 2*refstd*rs*np.cos(ts)) - contours = ax.contour(ts,rs,rms,5,colors='k',alpha=0.4) - ax.clabel(contours,fmt='%1.1f') + contours = aux.contour(ts,rs,rms,5,colors='k',alpha=0.4) + aux.clabel(contours,fmt='%1.1f') - return ax + return ax, aux class HtmlFigure(): @@ -615,15 +615,16 @@ def _populatePlots(self): for section in page.sections: if len(page.figures[section]) == 0: continue for figure in page.figures[section]: - if (figure.name in ["spatial_variance","compcycle","profile", - "legend_spatial_variance","legend_compcycle"]): continue # ignores + if sum([n in figure.name for n in \ + ["spatial_variance","compcycle","profile", + "legend_spatial_variance","legend_compcycle"]]): continue # ignores if "benchmark" in figure.name: if figure.name not in bench: bench.append(figure.name) continue if figure not in self.plots: self.plots.append(figure) if not figure.legend: self.nolegend.append(figure.name) self.nobench = [plot.name for plot in self.plots if "benchmark_%s" % (plot.name) not in bench] - + def __str__(self): if self.plots is None: self._populatePlots() @@ -669,16 +670,28 @@ def __str__(self):