Skip to content

Commit

Permalink
Add compute_fl_diagnostics_quantiles (#1746)
Browse files Browse the repository at this point in the history
* adapt ranking of pixels for elevation bands

* add compute_fl_diagnostics_quantiles with tests

* pep8

* add whats-new
  • Loading branch information
pat-schmitt authored Dec 30, 2024
1 parent 8de5db4 commit 5ac2eea
Show file tree
Hide file tree
Showing 5 changed files with 206 additions and 12 deletions.
12 changes: 12 additions & 0 deletions docs/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,18 @@ v1.6.x (not released)
Enhancements
~~~~~~~~~~~~

- Added ``tasks.compute_fl_diagnostics_quantiles``, this task is designed to
calculate quantiles from multiple fl_diagnostic files. It enables users to
compute metrics such as the median flowline across various GCM projections
(:pull:`1746`).
By `Patrick Schmitt <https://github.com/pat-schmitt>`_
- New ranking options for the ``distribute.assign_points_to_band`` task, which
determines the order in which pixels "melt away" when redistributing flowline
model runs into 2D for visualization purposes. It is now possible to combine
different variables from ``gridded_data`` (e.g. ``slope``,
``dis_from_border``, ...) to define this ranking, providing greater flexibility
and control over how the melting sequence is visualized (:pull:`1746`).
By `Patrick Schmitt <https://github.com/pat-schmitt>`_

Bug fixes
~~~~~~~~~
Expand Down
77 changes: 77 additions & 0 deletions oggm/core/flowline.py
Original file line number Diff line number Diff line change
Expand Up @@ -4592,3 +4592,80 @@ def clean_merged_flowlines(gdir, buffer=None):

# Finally write the flowlines
gdir.write_pickle(allfls, 'model_flowlines')


@entity_task(log)
def compute_fl_diagnostics_quantiles(gdir,
input_filesuffixes,
quantiles=0.5,
output_filesuffix='_median',
):
"""Compute quantile fl_diagnostics (e.g. median flowline of projections)
This function takes a number of fl_diagnostic files and compute out of them
quantiles. This could be useful for example for calculating a median
flowline for different gcm projections which use the same scenario
(e.g. ssp126).
Parameters
----------
gdir : :py:class:`oggm.GlacierDirectory`
the glacier directory to process
input_filesuffixes : list
a list of fl_diagnostic filesuffixes which should be considered for
computing
quantiles : float or list
The quantiles to compute. Could be a float for a single quantile or a
list of floats for severel quantile calculations.
Default is 0.5 (the median).
output_filesuffix : str
The output_filesuffix of the resulting fl_diagnostics.
Default is '_median'.
"""

# if quantiles is a list with one element, only use this
if isinstance(quantiles, list):
if len(quantiles) == 1:
quantiles = quantiles[0]

# get filepath of all fl_diagnostic files
all_fl_diag_paths = []
for filesuffix in input_filesuffixes:
all_fl_diag_paths.append(gdir.get_filepath('fl_diagnostics',
filesuffix=filesuffix))

# assume all have the same number of flowlines, extract the base structure
# and number of fl_ids from the first file
with xr.open_dataset(all_fl_diag_paths[0]) as ds:
ds_base = ds.load()
fl_ids = ds.flowlines.data

# help function to open all fl_diag files as one with open_mfdataset
def add_filename_coord(ds):
filepath = ds.encoding["source"]
filename = os.path.basename(filepath).split(".")[0]
return ds.expand_dims({'filename': [filename]})

# now loop through all flowlines and save back in the same structure
fl_diag_dss = []
for fl_id in fl_ids:
with xr.open_mfdataset(all_fl_diag_paths,
preprocess=lambda ds: add_filename_coord(ds),
group=f'fl_{fl_id}') as ds_fl:
with warnings.catch_warnings():
# ignore warnings for NaNs
warnings.simplefilter("ignore", category=RuntimeWarning)
fl_diag_dss.append(ds_fl.load().quantile(quantiles,
dim='filename'))

# for writing into the nice output structure
output_filepath = gdir.get_filepath('fl_diagnostics',
filesuffix=output_filesuffix)
encode = {}
for v in fl_diag_dss[0]:
encode[v] = {'zlib': True, 'complevel': 5}

ds_base.to_netcdf(output_filepath, 'w')
for fl_id, ds in zip(fl_ids, fl_diag_dss):
ds.to_netcdf(output_filepath, 'a', group='fl_{}'.format(fl_id),
encoding=encode)
54 changes: 42 additions & 12 deletions oggm/sandbox/distribute_2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,9 @@ def add_smoothed_glacier_topo(gdir, outline_offset=-40,

@entity_task(log, writes=['gridded_data'])
def assign_points_to_band(gdir, topo_variable='glacier_topo_smoothed',
elevation_weight=1.003):
ranking_variables=None,
ranking_variables_weights=None,
):
"""Assigns glacier grid points to flowline elevation bands and ranks them.
Creates two variables in gridded_data.nc:
Expand All @@ -123,18 +125,25 @@ def assign_points_to_band(gdir, topo_variable='glacier_topo_smoothed',
topo_variable : str
the topography to read from `gridded_data.nc` (could be smoothed, or
smoothed differently).
elevation_weight : float
how much weight to give to the elevation of the grid point versus the
thickness. Arbitrary number, might be tuned differently.
ranking_variables : list
A list of distributed variables used for ranking pixels of each
elevation band. All variables inside of 'gridded_data' can be used (e.g.
'topo_smoothed', 'slope', 'dis_from_border', 'distributed_thickness',
'aspect'). Default is 'distributed_thickness'.
ranking_variables_weights : list
A list of weights, corresponding to the ranking_variables. Larger values
assign more weight to the corresponding ranking_variable. Must be in the
same order as ranking_variables! Default is [1].
"""
# We need quite a few data from the gridded dataset
with xr.open_dataset(gdir.get_filepath('gridded_data')) as ds:
ds_grid = ds.load()
topo_data = ds[topo_variable].data.copy()
glacier_mask = ds.glacier_mask.data == 1
topo_data_flat = topo_data[glacier_mask]
band_index = topo_data * np.nan # container
per_band_rank = topo_data * np.nan # container
distrib_thick = ds.distributed_thickness.data
weighting_per_pixel = topo_data * 0. # container

# For the flowline we need the model flowlines only
fls = gdir.read_pickle('model_flowlines')
Expand Down Expand Up @@ -173,15 +182,36 @@ def assign_points_to_band(gdir, topo_variable='glacier_topo_smoothed',
# assert np.nanmin(band_index) == 0
assert np.all(np.isfinite(band_index[glacier_mask]))

# Ok now assign within band using ice thickness weighted by elevation
# We rank the pixels within one band by elevation, but also add
# a penalty is added to higher elevation grid points
min_alt = np.nanmin(topo_data)
weighted_thick = ((topo_data - min_alt + 1) * elevation_weight) * distrib_thick
# Ok now we rank the pixels within each band, which variables are considered
# can be defined by the user. All variables are normalized (between 0 and 1)
# and multiplied by a weight and summed up. If the weight is negative the
# normalized variable will be inverted (0 becomes 1 and vice versa).
if ranking_variables is None:
# the default variables to use
ranking_variables = ['distributed_thickness']
if ranking_variables_weights is None:
# the default weights to use
ranking_variables_weights = [1]
assert len(ranking_variables) == len(ranking_variables_weights)

# normalize all values to the range of 0 to 1
def min_max_normalization(data):
data = np.where(glacier_mask, data, np.nan)
return (data - np.nanmin(data)) / (np.nanmax(data) - np.nanmin(data))

for var, var_weight in zip(ranking_variables,
ranking_variables_weights):
normalized_var = min_max_normalization(ds_grid[var].data)
# for negative weights we invert the normalized variable
# (smallest becomes largest and vice versa)
if var_weight < 0:
normalized_var = 1 - normalized_var
weighting_per_pixel += normalized_var * np.abs(var_weight)

# now rank the pixel of each band individually
for band_id in np.unique(np.sort(band_index[glacier_mask])):
# We work per band here
is_band = band_index == band_id
per_band_rank[is_band] = mstats.rankdata(weighted_thick[is_band])
per_band_rank[is_band] = mstats.rankdata(weighting_per_pixel[is_band])

with ncDataset(gdir.get_filepath('gridded_data'), 'a') as nc:
vn = 'band_index'
Expand Down
1 change: 1 addition & 0 deletions oggm/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
from oggm.core.flowline import run_from_climate_data
from oggm.core.flowline import run_constant_climate
from oggm.core.flowline import run_with_hydro
from oggm.core.flowline import compute_fl_diagnostics_quantiles
from oggm.core.dynamic_spinup import run_dynamic_spinup
from oggm.core.dynamic_spinup import run_dynamic_melt_f_calibration
from oggm.utils import copy_to_basedir
Expand Down
74 changes: 74 additions & 0 deletions oggm/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3198,6 +3198,80 @@ def test_elevation_feedback(self, hef_gdir):
plt.legend()
plt.show()

@pytest.mark.slow
def test_fl_diag_quantiles(self, hef_gdir):
cfg.PARAMS['store_fl_diagnostics'] = True

# conduct three runs from which to calculate the quantiles
output_suffixes = ['_run1', '_run2', '_run3']
for i, output_suffix in enumerate(output_suffixes):
run_random_climate(hef_gdir, y0=1985-i*5, nyears=10,
output_filesuffix=output_suffix, seed=i)

# only calculate the median
workflow.execute_entity_task(tasks.compute_fl_diagnostics_quantiles,
hef_gdir,
input_filesuffixes=output_suffixes,
quantiles=0.5,
output_filesuffix='_median'
)

# calculate 5th and 95th quantiles together
workflow.execute_entity_task(tasks.compute_fl_diagnostics_quantiles,
hef_gdir,
input_filesuffixes=output_suffixes,
quantiles=[0.05, 0.95],
output_filesuffix='_iqr'
)

ft = 'fl_diagnostics'
with xr.open_dataset(
hef_gdir.get_filepath(ft, filesuffix=output_suffixes[0])) as ds:
fl_ids = ds.flowlines.data

for fl_id in fl_ids:
# open data of current flowline
ds_runs = []
for output_suffix in output_suffixes:
fp = hef_gdir.get_filepath(ft, filesuffix=output_suffix)
with xr.open_dataset(fp, group=f'fl_{fl_id}') as ds:
ds_runs.append(ds.load())
fp = hef_gdir.get_filepath(ft, filesuffix='_median')
with xr.open_dataset(fp, group=f'fl_{fl_id}') as ds:
ds_median = ds.load()
fp = hef_gdir.get_filepath(ft, filesuffix='_iqr')
with xr.open_dataset(fp, group=f'fl_{fl_id}') as ds:
ds_iqr = ds.load()

# the median flowline should never be the smallest or largest
# value, compared to the values of the runs (as we have three runs)
variables_to_check = ['volume_m3', 'area_m2', 'thickness_m']
for var in variables_to_check:
var_das = []
for ds_run in ds_runs:
var_das.append(ds_run[var])
var_stack = xr.concat(var_das, dim='runs')

var_min = var_stack.min(dim='runs')
var_max = var_stack.max(dim='runs')

var_median = ds_median[var]
is_median_equal_to_min = (var_median == var_min).any()
is_median_equal_to_max = (var_median == var_max).any()

assert is_median_equal_to_min
assert is_median_equal_to_max

# median should be larger/smaller than 5th/95th quantile
var_5th = ds_iqr.loc[{'quantile': 0.05}][var]
var_95th = ds_iqr.loc[{'quantile': 0.95}][var]

is_median_larger_than_5th_q = (var_median >= var_5th).all()
is_median_smaller_than_95th_q = (var_median <= var_95th).all()

assert is_median_larger_than_5th_q
assert is_median_smaller_than_95th_q


@pytest.mark.usefixtures('with_class_wd')
class TestDynamicSpinup:
Expand Down

0 comments on commit 5ac2eea

Please sign in to comment.