Skip to content

Commit

Permalink
bug fix for esoteric edge case with missing leap day in source data
Browse files Browse the repository at this point in the history
  • Loading branch information
grantbuster committed Nov 5, 2024
1 parent 8085a43 commit 3bcee20
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 20 deletions.
27 changes: 16 additions & 11 deletions sup3r/bias/bias_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@

import dask.array as da
import numpy as np
import pandas as pd
from rex.utilities.bc_utils import QuantileDeltaMapping
from scipy.ndimage import gaussian_filter

from sup3r.preprocessing import Rasterizer
from sup3r.preprocessing.utilities import make_time_index_from_kws

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -402,7 +402,7 @@ def monthly_local_linear_bc(
out : np.ndarray
out = data * scalar + adder
"""
time_index = pd.date_range(**date_range_kwargs)
time_index = make_time_index_from_kws(date_range_kwargs)
out = _get_spatial_bc_factors(lat_lon, feature_name, bias_fp)
scalar, adder = out['scalar'], out['adder']

Expand Down Expand Up @@ -589,10 +589,13 @@ def local_qdm_bc(
"""
# Confirm that the given time matches the expected data size
time_index = pd.date_range(**date_range_kwargs)
assert (
data.shape[2] == time_index.size
), 'Time should align with data 3rd dimension'
msg = f'data was expected to be a 3D array but got shape {data.shape}'
assert data.ndim == 3, msg
time_index = make_time_index_from_kws(date_range_kwargs)
msg = (f'Time should align with data 3rd dimension but got data '
f'{data.shape} and time_index length '
f'{time_index.size}: {time_index}')
assert data.shape[-1] == time_index.size, msg

params = _get_spatial_bc_quantiles(
lat_lon=lat_lon,
Expand Down Expand Up @@ -1031,11 +1034,13 @@ def local_presrat_bc(data: np.ndarray,
max_workers : int | None
Max number of workers to use for QDM process pool
"""
time_index = pd.date_range(**date_range_kwargs)
assert data.ndim == 3, 'data was expected to be a 3D array'
assert (
data.shape[-1] == time_index.size
), 'The last dimension of data should be time'
time_index = make_time_index_from_kws(date_range_kwargs)
msg = f'data was expected to be a 3D array but got shape {data.shape}'
assert data.ndim == 3, msg
msg = (f'Time should align with data 3rd dimension but got data '
f'{data.shape} and time_index length '
f'{time_index.size}: {time_index}')
assert data.shape[-1] == time_index.size, msg

params = _get_spatial_bc_presrat(
lat_lon, base_dset, feature_name, bias_fp, threshold
Expand Down
22 changes: 15 additions & 7 deletions sup3r/bias/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,11 +268,19 @@ def bias_correct_features(

time_slice = _parse_time_slice(time_slice)
for feat in features:
input_handler[feat][..., time_slice] = bias_correct_feature(
source_feature=feat,
input_handler=input_handler,
time_slice=time_slice,
bc_method=bc_method,
bc_kwargs=bc_kwargs,
)
try:
input_handler[feat][..., time_slice] = bias_correct_feature(
source_feature=feat,
input_handler=input_handler,
time_slice=time_slice,
bc_method=bc_method,
bc_kwargs=bc_kwargs,
)
except Exception as e:
msg = (f'Could not run bias correction method {bc_method} on '
f'feature {feat} time slice {time_slice} with input '
f'handler of class {type(input_handler)} with shape '
f'{input_handler.shape}. Received error: {e}')
logger.exception(msg)
raise RuntimeError(msg) from e
return input_handler
55 changes: 53 additions & 2 deletions sup3r/preprocessing/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,18 +128,69 @@ def wrapper(self, *args, **kwargs):
def get_date_range_kwargs(time_index):
"""Get kwargs for pd.date_range from a DatetimeIndex. This is used to
provide a concise time_index representation which can be passed through
the cli and avoid logging lengthly time indices."""
the cli and avoid logging lengthly time indices.
Parameters
----------
time_index : pd.DatetimeIndex
Output time index.
Returns
-------
kwargs : dict
Dictionary to pass to pd.date_range(). Can also include kwarg
``drop_leap``
"""
freq = (
f'{(time_index[-1] - time_index[0]).total_seconds() / 60}min'
if len(time_index) == 2
else pd.infer_freq(time_index)
)
return {

kwargs = {
'start': time_index[0].strftime('%Y-%m-%d %H:%M:%S'),
'end': time_index[-1].strftime('%Y-%m-%d %H:%M:%S'),
'freq': freq,
}

nominal_ti = pd.date_range(**kwargs)
uneven_freq = len(time_index.diff()[1:].unique()) > 1

if uneven_freq and len(nominal_ti) > len(time_index):
kwargs['drop_leap'] = True

elif uneven_freq:
msg = (f'Got uneven frequency for time index: {time_index}')
warn(msg)
logger.warning(msg)

return kwargs


def make_time_index_from_kws(date_range_kwargs):
"""Function to make a pandas DatetimeIndex from the
``get_date_range_kwargs`` outputs
Parameters
----------
date_range_kwargs : dict
Dictionary to pass to pd.date_range(), typically produced from
``get_date_range_kwargs()``. Can also include kwarg ``drop_leap``
Returns
-------
time_index : pd.DatetimeIndex
Output time index.
"""
drop_leap = date_range_kwargs.pop('drop_leap', False)
time_index = pd.date_range(**date_range_kwargs)

if drop_leap:
leap_mask = (time_index.month == 2) & (time_index.day == 29)
time_index = time_index[~leap_mask]

return time_index


def _compute_chunks_if_dask(arr):
return (
Expand Down

0 comments on commit 3bcee20

Please sign in to comment.