Skip to content

Commit

Permalink
Merge pull request #240 from NREL/gb/bc_kwargs
Browse files Browse the repository at this point in the history
Gb/bc kwargs
  • Loading branch information
grantbuster authored Nov 5, 2024
2 parents 9ac7518 + 6e46d6c commit b8ebeeb
Show file tree
Hide file tree
Showing 8 changed files with 190 additions and 62 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ classifiers=[
"Programming Language :: Python :: 3.11",
]
dependencies = [
"NREL-rex>=0.2.90",
"NREL-rex>=0.2.91",
"NREL-phygnn>=0.0.23",
"NREL-gaps>=0.6.13",
"NREL-farms>=1.0.4",
Expand Down
7 changes: 6 additions & 1 deletion sup3r/bias/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,7 +621,12 @@ def _read_base_sup3r_data(dh, base_dset, base_gid):
gid_raster = np.arange(len(dh.meta))
gid_raster = gid_raster.reshape(dh.shape[:2])
idy, idx = np.where(np.isin(gid_raster, base_gid))
base_data = dh.data[[base_dset]][idy, idx].squeeze(axis=-1)
if dh.data.loaded:
# faster direct access of numpy array if loaded
base_data = dh.data[base_dset].data[idy, idx]
else:
base_data = dh.data[base_dset].data.vindex[idy, idx]

assert base_data.shape[0] == len(base_gid)
assert base_data.shape[1] == len(dh.time_index)
return base_data.mean(axis=0)
Expand Down
150 changes: 104 additions & 46 deletions sup3r/bias/bias_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@

import dask.array as da
import numpy as np
import pandas as pd
from rex.utilities.bc_utils import QuantileDeltaMapping
from scipy.ndimage import gaussian_filter

from sup3r.preprocessing import Rasterizer
from sup3r.preprocessing.utilities import make_time_index_from_kws

logger = logging.getLogger(__name__)

Expand All @@ -27,7 +27,7 @@ def _get_factors(target, shape, var_names, bias_fp, threshold=0.1):
"""Get bias correction factors from sup3r's standard resource
This was stripped without any change from original
`get_spatial_bc_factors` to allow re-use in other `*_bc_factors`
`_get_spatial_bc_factors` to allow re-use in other `*_bc_factors`
functions.
Parameters
Expand Down Expand Up @@ -76,7 +76,7 @@ def _get_factors(target, shape, var_names, bias_fp, threshold=0.1):
return out


def get_spatial_bc_factors(lat_lon, feature_name, bias_fp, threshold=0.1):
def _get_spatial_bc_factors(lat_lon, feature_name, bias_fp, threshold=0.1):
"""Get bc factors (scalar/adder) for the given feature for the given
domain (specified by lat_lon).
Expand Down Expand Up @@ -114,7 +114,7 @@ def get_spatial_bc_factors(lat_lon, feature_name, bias_fp, threshold=0.1):
)


def get_spatial_bc_quantiles(
def _get_spatial_bc_quantiles(
lat_lon: Union[np.ndarray, da.core.Array],
base_dset: str,
feature_name: str,
Expand Down Expand Up @@ -200,7 +200,7 @@ def get_spatial_bc_quantiles(
>>> lat_lon = np.array([
... [39.649033, -105.46875 ],
... [39.649033, -104.765625]])
>>> params = get_spatial_bc_quantiles(
>>> params = _get_spatial_bc_quantiles(
... lat_lon, "ghi", "rsds", "./dist_params.hdf")
"""
Expand Down Expand Up @@ -297,7 +297,7 @@ def local_linear_bc(
out = data * scalar + adder
"""

out = get_spatial_bc_factors(lat_lon, feature_name, bias_fp)
out = _get_spatial_bc_factors(lat_lon, feature_name, bias_fp)
scalar, adder = out['scalar'], out['adder']
# 3D bias correction factors have seasonal/monthly correction in last axis
if len(scalar.shape) == 3 and len(adder.shape) == 3:
Expand Down Expand Up @@ -402,8 +402,8 @@ def monthly_local_linear_bc(
out : np.ndarray
out = data * scalar + adder
"""
time_index = pd.date_range(**date_range_kwargs)
out = get_spatial_bc_factors(lat_lon, feature_name, bias_fp)
time_index = make_time_index_from_kws(date_range_kwargs)
out = _get_spatial_bc_factors(lat_lon, feature_name, bias_fp)
scalar, adder = out['scalar'], out['adder']

assert len(scalar.shape) == 3, 'Monthly bias correct needs 3D scalars'
Expand Down Expand Up @@ -471,6 +471,7 @@ def local_qdm_bc(
no_trend=False,
delta_denom_min=None,
delta_denom_zero=None,
delta_range=None,
out_range=None,
max_workers=1,
):
Expand Down Expand Up @@ -536,6 +537,11 @@ def local_qdm_bc(
division by a very small number making delta blow up and resulting
in very large output bias corrected values. See equation 4 of
Cannon et al., 2015 for the delta term.
delta_range : tuple | None
Option to set a (min, max) on the delta term in QDM. This can help
prevent QDM from making non-realistic increases/decreases in
otherwise physical values. See equation 4 of Cannon et al., 2015 for
the delta term.
out_range : None | tuple
Option to set floor/ceiling values on the output data.
max_workers: int | None
Expand Down Expand Up @@ -583,12 +589,15 @@ def local_qdm_bc(
"""
# Confirm that the given time matches the expected data size
time_index = pd.date_range(**date_range_kwargs)
assert (
data.shape[2] == time_index.size
), 'Time should align with data 3rd dimension'

params = get_spatial_bc_quantiles(
msg = f'data was expected to be a 3D array but got shape {data.shape}'
assert data.ndim == 3, msg
time_index = make_time_index_from_kws(date_range_kwargs)
msg = (f'Time should align with data 3rd dimension but got data '
f'{data.shape} and time_index length '
f'{time_index.size}: {time_index}')
assert data.shape[-1] == time_index.size, msg

params = _get_spatial_bc_quantiles(
lat_lon=lat_lon,
base_dset=base_dset,
feature_name=feature_name,
Expand Down Expand Up @@ -635,6 +644,7 @@ def local_qdm_bc(
log_base=cfg['log_base'],
delta_denom_min=delta_denom_min,
delta_denom_zero=delta_denom_zero,
delta_range=delta_range,
)

subset_idx = nearest_window_idx == window_idx
Expand All @@ -654,10 +664,17 @@ def local_qdm_bc(
output = np.maximum(output, np.min(out_range))
output = np.minimum(output, np.max(out_range))

if np.isnan(output).any():
msg = ('Presrat bias correction resulted in NaN values! If this is a '
'relative QDM, you may try setting ``delta_denom_min`` or '
'``delta_denom_zero``')
logger.error(msg)
raise RuntimeError(msg)

return output


def get_spatial_bc_presrat(
def _get_spatial_bc_presrat(
lat_lon: np.array,
base_dset: str,
feature_name: str,
Expand Down Expand Up @@ -766,7 +783,7 @@ def get_spatial_bc_presrat(
>>> lat_lon = np.array([
... [39.649033, -105.46875 ],
... [39.649033, -104.765625]])
>>> params = get_spatial_bc_quantiles(
>>> params = _get_spatial_bc_quantiles(
... lat_lon, "ghi", "rsds", "./dist_params.hdf")
"""
Expand All @@ -788,12 +805,12 @@ def get_spatial_bc_presrat(
)


def apply_presrat_bc(data, time_index, base_params, bias_params,
bias_fut_params, bias_tau_fut, k_factor,
time_window_center, dist='empirical', sampling='invlog',
log_base=10, relative=True, no_trend=False,
zero_rate_threshold=1.157e-7, out_range=None,
max_workers=1):
def _apply_presrat_bc(data, time_index, base_params, bias_params,
bias_fut_params, bias_tau_fut, k_factor,
time_window_center, dist='empirical', sampling='invlog',
log_base=10, relative=True, no_trend=False,
delta_denom_min=None, delta_range=None, out_range=None,
max_workers=1):
"""Run PresRat to bias correct data from input parameters and not from bias
correction file on disk.
Expand Down Expand Up @@ -868,13 +885,18 @@ def apply_presrat_bc(data, time_index, base_params, bias_params,
:class:`rex.utilities.bc_utils.QuantileDeltaMapping`. Note that this
assumes that params_mh is the data distribution representative for the
target data.
zero_rate_threshold : float, default=1.157e-7
Threshold value used to determine the zero rate in the observed
historical dataset and the minimum value in the denominator in relative
QDM. For instance, 0.01 means that anything less than that will be
considered negligible, hence equal to zero. Dai 2006 defined this as
1mm/day. Pierce 2015 used 0.01mm/day. We recommend 0.01mm/day
(1.157e-7 kg/m2/s).
delta_denom_min : float | None
Option to specify a minimum value for the denominator term in the
calculation of a relative delta value. This prevents division by a
very small number making delta blow up and resulting in very large
output bias corrected values. See equation 4 of Cannon et al., 2015
for the delta term. If this is not set, the ``zero_rate_threshold``
calculated as part of the presrat bias calculation will be used
delta_range : tuple | None
Option to set a (min, max) on the delta term in QDM. This can help
prevent QDM from making non-realistic increases/decreases in
otherwise physical values. See equation 4 of Cannon et al., 2015 for
the delta term.
out_range : None | tuple
Option to set floor/ceiling values on the output data.
max_workers : int | None
Expand Down Expand Up @@ -904,7 +926,8 @@ def apply_presrat_bc(data, time_index, base_params, bias_params,
relative=relative,
sampling=sampling,
log_base=log_base,
delta_denom_min=zero_rate_threshold,
delta_denom_min=delta_denom_min,
delta_range=delta_range,
)

# input 3D shape (spatial, spatial, temporal)
Expand All @@ -928,6 +951,13 @@ def apply_presrat_bc(data, time_index, base_params, bias_params,
data_unbiased = np.maximum(data_unbiased, np.min(out_range))
data_unbiased = np.minimum(data_unbiased, np.max(out_range))

if np.isnan(data_unbiased).any():
msg = ('Presrat bias correction resulted in NaN values! If this is a '
'relative QDM, you may try setting ``delta_denom_min`` or '
'``delta_denom_zero``')
logger.error(msg)
raise RuntimeError(msg)

return data_unbiased


Expand All @@ -941,6 +971,9 @@ def local_presrat_bc(data: np.ndarray,
threshold=0.1,
relative=True,
no_trend=False,
delta_denom_min=None,
delta_range=None,
k_range=None,
out_range=None,
max_workers=1,
):
Expand Down Expand Up @@ -996,18 +1029,34 @@ def local_presrat_bc(data: np.ndarray,
:class:`rex.utilities.bc_utils.QuantileDeltaMapping`. Note that this
assumes that params_mh is the data distribution representative for the
target data.
delta_denom_min : float | None
Option to specify a minimum value for the denominator term in the
calculation of a relative delta value. This prevents division by a
very small number making delta blow up and resulting in very large
output bias corrected values. See equation 4 of Cannon et al., 2015
for the delta term. If this is not set, the ``zero_rate_threshold``
calculated as part of the presrat bias calculation will be used
delta_range : tuple | None
Option to set a (min, max) on the delta term in QDM. This can help
prevent QDM from making non-realistic increases/decreases in
otherwise physical values. See equation 4 of Cannon et al., 2015 for
the delta term.
k_range : tuple | None
Option to set a (min, max) value for the k-factor multiplier
out_range : None | tuple
Option to set floor/ceiling values on the output data.
max_workers : int | None
Max number of workers to use for QDM process pool
"""
time_index = pd.date_range(**date_range_kwargs)
assert data.ndim == 3, 'data was expected to be a 3D array'
assert (
data.shape[-1] == time_index.size
), 'The last dimension of data should be time'

params = get_spatial_bc_presrat(
time_index = make_time_index_from_kws(date_range_kwargs)
msg = f'data was expected to be a 3D array but got shape {data.shape}'
assert data.ndim == 3, msg
msg = (f'Time should align with data 3rd dimension but got data '
f'{data.shape} and time_index length '
f'{time_index.size}: {time_index}')
assert data.shape[-1] == time_index.size, msg

params = _get_spatial_bc_presrat(
lat_lon, base_dset, feature_name, bias_fp, threshold
)
cfg = params['cfg']
Expand All @@ -1022,21 +1071,30 @@ def local_presrat_bc(data: np.ndarray,
sampling = cfg['sampling']
log_base = cfg['log_base']
zero_rate_threshold = cfg['zero_rate_threshold']
delta_denom_min = delta_denom_min or zero_rate_threshold

if k_range is not None:
k_factor = np.maximum(k_factor, np.min(k_range))
k_factor = np.minimum(k_factor, np.max(k_range))

logger.debug(f'Presrat K Factor has shape {k_factor.shape} and ranges '
f'from {k_factor.min()} to {k_factor.max()}')

if lr_padded_slice is not None:
spatial_slice = (lr_padded_slice[0], lr_padded_slice[1])
base_params = base_params[spatial_slice]
bias_params = bias_params[spatial_slice]
bias_fut_params = bias_fut_params[spatial_slice]

data_unbiased = apply_presrat_bc(data, time_index, base_params,
bias_params, bias_fut_params,
bias_tau_fut, k_factor,
time_window_center, dist=dist,
sampling=sampling, log_base=log_base,
relative=relative, no_trend=no_trend,
zero_rate_threshold=zero_rate_threshold,
out_range=out_range,
max_workers=max_workers)
data_unbiased = _apply_presrat_bc(data, time_index, base_params,
bias_params, bias_fut_params,
bias_tau_fut, k_factor,
time_window_center, dist=dist,
sampling=sampling, log_base=log_base,
relative=relative, no_trend=no_trend,
delta_denom_min=delta_denom_min,
delta_range=delta_range,
out_range=out_range,
max_workers=max_workers)

return data_unbiased
26 changes: 17 additions & 9 deletions sup3r/bias/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from rex import Resource

import sup3r.bias.bias_transforms
from sup3r.bias.bias_transforms import get_spatial_bc_factors, local_qdm_bc
from sup3r.bias.bias_transforms import _get_spatial_bc_factors, local_qdm_bc
from sup3r.preprocessing.utilities import (
_parse_time_slice,
get_date_range_kwargs,
Expand Down Expand Up @@ -56,7 +56,7 @@ def lin_bc(handler, bc_files, threshold=0.1):
and dset_adder.lower() in dsets
)
if feature not in completed and check:
out = get_spatial_bc_factors(
out = _get_spatial_bc_factors(
lat_lon=handler.lat_lon,
feature_name=feature,
bias_fp=fp,
Expand Down Expand Up @@ -268,11 +268,19 @@ def bias_correct_features(

time_slice = _parse_time_slice(time_slice)
for feat in features:
input_handler[feat][..., time_slice] = bias_correct_feature(
source_feature=feat,
input_handler=input_handler,
time_slice=time_slice,
bc_method=bc_method,
bc_kwargs=bc_kwargs,
)
try:
input_handler[feat][..., time_slice] = bias_correct_feature(
source_feature=feat,
input_handler=input_handler,
time_slice=time_slice,
bc_method=bc_method,
bc_kwargs=bc_kwargs,
)
except Exception as e:
msg = (f'Could not run bias correction method {bc_method} on '
f'feature {feat} time slice {time_slice} with input '
f'handler of class {type(input_handler)} with shape '
f'{input_handler.shape}. Received error: {e}')
logger.exception(msg)
raise RuntimeError(msg) from e
return input_handler
8 changes: 6 additions & 2 deletions sup3r/pipeline/strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,8 +261,12 @@ def init_input_handler(self):

InputHandler = get_input_handler_class(self.input_handler_name)
input_handler_kwargs = copy.deepcopy(self.input_handler_kwargs)
features = [] if self.head_node else self.features
input_handler_kwargs['features'] = features

input_handler_kwargs['features'] = self.features
if self.head_node:
input_handler_kwargs['features'] = []
input_handler_kwargs['chunks'] = 'auto'

input_handler_kwargs['time_slice'] = slice(None)

return InputHandler(**input_handler_kwargs)
Expand Down
Loading

0 comments on commit b8ebeeb

Please sign in to comment.