Merge pull request #240 from NREL/gb/bc_kwargs

Gb/bc kwargs
NREL · Nov 5, 2024 · b8ebeeb · b8ebeeb
2 parents 9ac7518 + 6e46d6c
commit b8ebeeb
Show file tree

Hide file tree

Showing 8 changed files with 190 additions and 62 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -27,7 +27,7 @@ classifiers=[
   "Programming Language :: Python :: 3.11",
 ]
 dependencies = [
-  "NREL-rex>=0.2.90",
+  "NREL-rex>=0.2.91",
   "NREL-phygnn>=0.0.23",
   "NREL-gaps>=0.6.13",
   "NREL-farms>=1.0.4",

diff --git a/sup3r/bias/base.py b/sup3r/bias/base.py
@@ -621,7 +621,12 @@ def _read_base_sup3r_data(dh, base_dset, base_gid):
         gid_raster = np.arange(len(dh.meta))
         gid_raster = gid_raster.reshape(dh.shape[:2])
         idy, idx = np.where(np.isin(gid_raster, base_gid))
-        base_data = dh.data[[base_dset]][idy, idx].squeeze(axis=-1)
+        if dh.data.loaded:
+            # faster direct access of numpy array if loaded
+            base_data = dh.data[base_dset].data[idy, idx]
+        else:
+            base_data = dh.data[base_dset].data.vindex[idy, idx]
+
         assert base_data.shape[0] == len(base_gid)
         assert base_data.shape[1] == len(dh.time_index)
         return base_data.mean(axis=0)

diff --git a/sup3r/bias/bias_transforms.py b/sup3r/bias/bias_transforms.py
@@ -14,11 +14,11 @@
 
 import dask.array as da
 import numpy as np
-import pandas as pd
 from rex.utilities.bc_utils import QuantileDeltaMapping
 from scipy.ndimage import gaussian_filter
 
 from sup3r.preprocessing import Rasterizer
+from sup3r.preprocessing.utilities import make_time_index_from_kws
 
 logger = logging.getLogger(__name__)
 
@@ -27,7 +27,7 @@ def _get_factors(target, shape, var_names, bias_fp, threshold=0.1):
     """Get bias correction factors from sup3r's standard resource
 
     This was stripped without any change from original
-    `get_spatial_bc_factors` to allow re-use in other `*_bc_factors`
+    `_get_spatial_bc_factors` to allow re-use in other `*_bc_factors`
     functions.
 
     Parameters
@@ -76,7 +76,7 @@ def _get_factors(target, shape, var_names, bias_fp, threshold=0.1):
     return out
 
 
-def get_spatial_bc_factors(lat_lon, feature_name, bias_fp, threshold=0.1):
+def _get_spatial_bc_factors(lat_lon, feature_name, bias_fp, threshold=0.1):
     """Get bc factors (scalar/adder) for the given feature for the given
     domain (specified by lat_lon).
 
@@ -114,7 +114,7 @@ def get_spatial_bc_factors(lat_lon, feature_name, bias_fp, threshold=0.1):
     )
 
 
-def get_spatial_bc_quantiles(
+def _get_spatial_bc_quantiles(
     lat_lon: Union[np.ndarray, da.core.Array],
     base_dset: str,
     feature_name: str,
@@ -200,7 +200,7 @@ def get_spatial_bc_quantiles(
     >>> lat_lon = np.array([
     ...              [39.649033, -105.46875 ],
     ...              [39.649033, -104.765625]])
-    >>> params = get_spatial_bc_quantiles(
+    >>> params = _get_spatial_bc_quantiles(
     ...             lat_lon, "ghi", "rsds", "./dist_params.hdf")
 
     """
@@ -297,7 +297,7 @@ def local_linear_bc(
         out = data * scalar + adder
     """
 
-    out = get_spatial_bc_factors(lat_lon, feature_name, bias_fp)
+    out = _get_spatial_bc_factors(lat_lon, feature_name, bias_fp)
     scalar, adder = out['scalar'], out['adder']
     # 3D bias correction factors have seasonal/monthly correction in last axis
     if len(scalar.shape) == 3 and len(adder.shape) == 3:
@@ -402,8 +402,8 @@ def monthly_local_linear_bc(
     out : np.ndarray
         out = data * scalar + adder
     """
-    time_index = pd.date_range(**date_range_kwargs)
-    out = get_spatial_bc_factors(lat_lon, feature_name, bias_fp)
+    time_index = make_time_index_from_kws(date_range_kwargs)
+    out = _get_spatial_bc_factors(lat_lon, feature_name, bias_fp)
     scalar, adder = out['scalar'], out['adder']
 
     assert len(scalar.shape) == 3, 'Monthly bias correct needs 3D scalars'
@@ -471,6 +471,7 @@ def local_qdm_bc(
     no_trend=False,
     delta_denom_min=None,
     delta_denom_zero=None,
+    delta_range=None,
     out_range=None,
     max_workers=1,
 ):
@@ -536,6 +537,11 @@ def local_qdm_bc(
         division by a very small number making delta blow up and resulting
         in very large output bias corrected values. See equation 4 of
         Cannon et al., 2015 for the delta term.
+    delta_range : tuple | None
+        Option to set a (min, max) on the delta term in QDM. This can help
+        prevent QDM from making non-realistic increases/decreases in
+        otherwise physical values. See equation 4 of Cannon et al., 2015 for
+        the delta term.
     out_range : None | tuple
         Option to set floor/ceiling values on the output data.
     max_workers: int | None
@@ -583,12 +589,15 @@ def local_qdm_bc(
 
     """
     # Confirm that the given time matches the expected data size
-    time_index = pd.date_range(**date_range_kwargs)
-    assert (
-        data.shape[2] == time_index.size
-    ), 'Time should align with data 3rd dimension'
-
-    params = get_spatial_bc_quantiles(
+    msg = f'data was expected to be a 3D array but got shape {data.shape}'
+    assert data.ndim == 3, msg
+    time_index = make_time_index_from_kws(date_range_kwargs)
+    msg = (f'Time should align with data 3rd dimension but got data '
+           f'{data.shape} and time_index length '
+           f'{time_index.size}: {time_index}')
+    assert data.shape[-1] == time_index.size, msg
+
+    params = _get_spatial_bc_quantiles(
         lat_lon=lat_lon,
         base_dset=base_dset,
         feature_name=feature_name,
@@ -635,6 +644,7 @@ def local_qdm_bc(
             log_base=cfg['log_base'],
             delta_denom_min=delta_denom_min,
             delta_denom_zero=delta_denom_zero,
+            delta_range=delta_range,
         )
 
         subset_idx = nearest_window_idx == window_idx
@@ -654,10 +664,17 @@ def local_qdm_bc(
         output = np.maximum(output, np.min(out_range))
         output = np.minimum(output, np.max(out_range))
 
+    if np.isnan(output).any():
+        msg = ('Presrat bias correction resulted in NaN values! If this is a '
+               'relative QDM, you may try setting ``delta_denom_min`` or '
+               '``delta_denom_zero``')
+        logger.error(msg)
+        raise RuntimeError(msg)
+
     return output
 
 
-def get_spatial_bc_presrat(
+def _get_spatial_bc_presrat(
     lat_lon: np.array,
     base_dset: str,
     feature_name: str,
@@ -766,7 +783,7 @@ def get_spatial_bc_presrat(
     >>> lat_lon = np.array([
     ...              [39.649033, -105.46875 ],
     ...              [39.649033, -104.765625]])
-    >>> params = get_spatial_bc_quantiles(
+    >>> params = _get_spatial_bc_quantiles(
     ...             lat_lon, "ghi", "rsds", "./dist_params.hdf")
 
     """
@@ -788,12 +805,12 @@ def get_spatial_bc_presrat(
     )
 
 
-def apply_presrat_bc(data, time_index, base_params, bias_params,
-                     bias_fut_params, bias_tau_fut, k_factor,
-                     time_window_center, dist='empirical', sampling='invlog',
-                     log_base=10, relative=True, no_trend=False,
-                     zero_rate_threshold=1.157e-7, out_range=None,
-                     max_workers=1):
+def _apply_presrat_bc(data, time_index, base_params, bias_params,
+                      bias_fut_params, bias_tau_fut, k_factor,
+                      time_window_center, dist='empirical', sampling='invlog',
+                      log_base=10, relative=True, no_trend=False,
+                      delta_denom_min=None, delta_range=None, out_range=None,
+                      max_workers=1):
     """Run PresRat to bias correct data from input parameters and not from bias
     correction file on disk.
 
@@ -868,13 +885,18 @@ def apply_presrat_bc(data, time_index, base_params, bias_params,
         :class:`rex.utilities.bc_utils.QuantileDeltaMapping`. Note that this
         assumes that params_mh is the data distribution representative for the
         target data.
-    zero_rate_threshold : float, default=1.157e-7
-        Threshold value used to determine the zero rate in the observed
-        historical dataset and the minimum value in the denominator in relative
-        QDM. For instance, 0.01 means that anything less than that will be
-        considered negligible, hence equal to zero. Dai 2006 defined this as
-        1mm/day. Pierce 2015 used 0.01mm/day. We recommend 0.01mm/day
-        (1.157e-7 kg/m2/s).
+    delta_denom_min : float | None
+        Option to specify a minimum value for the denominator term in the
+        calculation of a relative delta value. This prevents division by a
+        very small number making delta blow up and resulting in very large
+        output bias corrected values. See equation 4 of Cannon et al., 2015
+        for the delta term. If this is not set, the ``zero_rate_threshold``
+        calculated as part of the presrat bias calculation will be used
+    delta_range : tuple | None
+        Option to set a (min, max) on the delta term in QDM. This can help
+        prevent QDM from making non-realistic increases/decreases in
+        otherwise physical values. See equation 4 of Cannon et al., 2015 for
+        the delta term.
     out_range : None | tuple
         Option to set floor/ceiling values on the output data.
     max_workers : int | None
@@ -904,7 +926,8 @@ def apply_presrat_bc(data, time_index, base_params, bias_params,
                                    relative=relative,
                                    sampling=sampling,
                                    log_base=log_base,
-                                   delta_denom_min=zero_rate_threshold,
+                                   delta_denom_min=delta_denom_min,
+                                   delta_range=delta_range,
                                    )
 
         # input 3D shape (spatial, spatial, temporal)
@@ -928,6 +951,13 @@ def apply_presrat_bc(data, time_index, base_params, bias_params,
         data_unbiased = np.maximum(data_unbiased, np.min(out_range))
         data_unbiased = np.minimum(data_unbiased, np.max(out_range))
 
+    if np.isnan(data_unbiased).any():
+        msg = ('Presrat bias correction resulted in NaN values! If this is a '
+               'relative QDM, you may try setting ``delta_denom_min`` or '
+               '``delta_denom_zero``')
+        logger.error(msg)
+        raise RuntimeError(msg)
+
     return data_unbiased
 
 
@@ -941,6 +971,9 @@ def local_presrat_bc(data: np.ndarray,
                      threshold=0.1,
                      relative=True,
                      no_trend=False,
+                     delta_denom_min=None,
+                     delta_range=None,
+                     k_range=None,
                      out_range=None,
                      max_workers=1,
                      ):
@@ -996,18 +1029,34 @@ def local_presrat_bc(data: np.ndarray,
         :class:`rex.utilities.bc_utils.QuantileDeltaMapping`. Note that this
         assumes that params_mh is the data distribution representative for the
         target data.
+    delta_denom_min : float | None
+        Option to specify a minimum value for the denominator term in the
+        calculation of a relative delta value. This prevents division by a
+        very small number making delta blow up and resulting in very large
+        output bias corrected values. See equation 4 of Cannon et al., 2015
+        for the delta term. If this is not set, the ``zero_rate_threshold``
+        calculated as part of the presrat bias calculation will be used
+    delta_range : tuple | None
+        Option to set a (min, max) on the delta term in QDM. This can help
+        prevent QDM from making non-realistic increases/decreases in
+        otherwise physical values. See equation 4 of Cannon et al., 2015 for
+        the delta term.
+    k_range : tuple | None
+        Option to set a (min, max) value for the k-factor multiplier
     out_range : None | tuple
         Option to set floor/ceiling values on the output data.
     max_workers : int | None
         Max number of workers to use for QDM process pool
     """
-    time_index = pd.date_range(**date_range_kwargs)
-    assert data.ndim == 3, 'data was expected to be a 3D array'
-    assert (
-        data.shape[-1] == time_index.size
-    ), 'The last dimension of data should be time'
-
-    params = get_spatial_bc_presrat(
+    time_index = make_time_index_from_kws(date_range_kwargs)
+    msg = f'data was expected to be a 3D array but got shape {data.shape}'
+    assert data.ndim == 3, msg
+    msg = (f'Time should align with data 3rd dimension but got data '
+           f'{data.shape} and time_index length '
+           f'{time_index.size}: {time_index}')
+    assert data.shape[-1] == time_index.size, msg
+
+    params = _get_spatial_bc_presrat(
         lat_lon, base_dset, feature_name, bias_fp, threshold
     )
     cfg = params['cfg']
@@ -1022,21 +1071,30 @@ def local_presrat_bc(data: np.ndarray,
     sampling = cfg['sampling']
     log_base = cfg['log_base']
     zero_rate_threshold = cfg['zero_rate_threshold']
+    delta_denom_min = delta_denom_min or zero_rate_threshold
+
+    if k_range is not None:
+        k_factor = np.maximum(k_factor, np.min(k_range))
+        k_factor = np.minimum(k_factor, np.max(k_range))
+
+    logger.debug(f'Presrat K Factor has shape {k_factor.shape} and ranges '
+                 f'from {k_factor.min()} to {k_factor.max()}')
 
     if lr_padded_slice is not None:
         spatial_slice = (lr_padded_slice[0], lr_padded_slice[1])
         base_params = base_params[spatial_slice]
         bias_params = bias_params[spatial_slice]
         bias_fut_params = bias_fut_params[spatial_slice]
 
-    data_unbiased = apply_presrat_bc(data, time_index, base_params,
-                                     bias_params, bias_fut_params,
-                                     bias_tau_fut, k_factor,
-                                     time_window_center, dist=dist,
-                                     sampling=sampling, log_base=log_base,
-                                     relative=relative, no_trend=no_trend,
-                                     zero_rate_threshold=zero_rate_threshold,
-                                     out_range=out_range,
-                                     max_workers=max_workers)
+    data_unbiased = _apply_presrat_bc(data, time_index, base_params,
+                                      bias_params, bias_fut_params,
+                                      bias_tau_fut, k_factor,
+                                      time_window_center, dist=dist,
+                                      sampling=sampling, log_base=log_base,
+                                      relative=relative, no_trend=no_trend,
+                                      delta_denom_min=delta_denom_min,
+                                      delta_range=delta_range,
+                                      out_range=out_range,
+                                      max_workers=max_workers)
 
     return data_unbiased
diff --git a/sup3r/bias/utilities.py b/sup3r/bias/utilities.py
@@ -9,7 +9,7 @@
 from rex import Resource
 
 import sup3r.bias.bias_transforms
-from sup3r.bias.bias_transforms import get_spatial_bc_factors, local_qdm_bc
+from sup3r.bias.bias_transforms import _get_spatial_bc_factors, local_qdm_bc
 from sup3r.preprocessing.utilities import (
     _parse_time_slice,
     get_date_range_kwargs,
@@ -56,7 +56,7 @@ def lin_bc(handler, bc_files, threshold=0.1):
                     and dset_adder.lower() in dsets
                 )
             if feature not in completed and check:
-                out = get_spatial_bc_factors(
+                out = _get_spatial_bc_factors(
                     lat_lon=handler.lat_lon,
                     feature_name=feature,
                     bias_fp=fp,
@@ -268,11 +268,19 @@ def bias_correct_features(
 
     time_slice = _parse_time_slice(time_slice)
     for feat in features:
-        input_handler[feat][..., time_slice] = bias_correct_feature(
-            source_feature=feat,
-            input_handler=input_handler,
-            time_slice=time_slice,
-            bc_method=bc_method,
-            bc_kwargs=bc_kwargs,
-        )
+        try:
+            input_handler[feat][..., time_slice] = bias_correct_feature(
+                source_feature=feat,
+                input_handler=input_handler,
+                time_slice=time_slice,
+                bc_method=bc_method,
+                bc_kwargs=bc_kwargs,
+            )
+        except Exception as e:
+            msg = (f'Could not run bias correction method {bc_method} on '
+                   f'feature {feat} time slice {time_slice} with input '
+                   f'handler of class {type(input_handler)} with shape '
+                   f'{input_handler.shape}. Received error: {e}')
+            logger.exception(msg)
+            raise RuntimeError(msg) from e
     return input_handler
diff --git a/sup3r/pipeline/strategy.py b/sup3r/pipeline/strategy.py
@@ -261,8 +261,12 @@ def init_input_handler(self):
 
         InputHandler = get_input_handler_class(self.input_handler_name)
         input_handler_kwargs = copy.deepcopy(self.input_handler_kwargs)
-        features = [] if self.head_node else self.features
-        input_handler_kwargs['features'] = features
+
+        input_handler_kwargs['features'] = self.features
+        if self.head_node:
+            input_handler_kwargs['features'] = []
+            input_handler_kwargs['chunks'] = 'auto'
+
         input_handler_kwargs['time_slice'] = slice(None)
 
         return InputHandler(**input_handler_kwargs)