Skip to content

Commit

Permalink
added skill evaluation to bias calculation framework
Browse files Browse the repository at this point in the history
  • Loading branch information
grantbuster committed Aug 24, 2023
1 parent f3803a8 commit 274c1c6
Show file tree
Hide file tree
Showing 2 changed files with 197 additions and 23 deletions.
182 changes: 160 additions & 22 deletions sup3r/bias/bias_calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from rex.utilities.fun_utils import get_fun_call_str
from scipy.ndimage.filters import gaussian_filter
from scipy.spatial import KDTree
from scipy.stats import ks_2samp
from scipy import stats

import sup3r.preprocessing.data_handling
from sup3r.utilities import VERSION_RECORD, ModuleName
Expand Down Expand Up @@ -151,7 +151,7 @@ def compare_dists(base_data, bias_data, adder=0, scalar=1):
out : float
KS test statistic
"""
out = ks_2samp(base_data, bias_data * scalar + adder)
out = stats.ks_2samp(base_data, bias_data * scalar + adder)
return out.statistic

@classmethod
Expand Down Expand Up @@ -492,8 +492,30 @@ class LinearCorrection(DataRetrievalBase):
available data (no season bias correction)
"""

# size of the time dimension, 1 is no time-based bias correction
NT = 1
"""size of the time dimension, 1 is no time-based bias correction"""

def __init__(self, *args, **kwargs):
"""
Parameters
----------
*args : list
Same positional args as DataRetrievalBase
**kwargs : dict
Same keyword args as DataRetrievalBase
"""
super().__init__(*args, **kwargs)

keys = [f'{self.bias_feature}_scalar',
f'{self.bias_feature}_adder',
f'bias_{self.bias_feature}_mean',
f'bias_{self.bias_feature}_std',
f'base_{self.base_dset}_mean',
f'base_{self.base_dset}_std',
]
self.out = {k: np.full((*self.bias_gid_raster.shape, self.NT),
np.nan, np.float32)
for k in keys}

@staticmethod
def get_linear_correction(bias_data, base_data, bias_feature, base_dset):
Expand Down Expand Up @@ -597,7 +619,7 @@ def fill_and_smooth(self, out, fill_extend=True, smooth_extend=0,
"""
for key, arr in out.items():
nan_mask = np.isnan(arr[..., 0])
for idt in range(self.NT):
for idt in range(arr.shape[-1]):

arr_smooth = arr[..., idt]

Expand Down Expand Up @@ -705,17 +727,6 @@ def run(self, knn, threshold=0.6, fp_out=None, max_workers=None,
"""
logger.debug('Starting linear correction calculation...')

keys = [f'{self.bias_feature}_scalar',
f'{self.bias_feature}_adder',
f'bias_{self.bias_feature}_mean',
f'bias_{self.bias_feature}_std',
f'base_{self.base_dset}_mean',
f'base_{self.base_dset}_std',
]
out = {k: np.full((*self.bias_gid_raster.shape, self.NT),
np.nan, np.float32)
for k in keys}

logger.info('Initialized scalar / adder with shape: {}'
.format(self.bias_gid_raster.shape))

Expand All @@ -735,7 +746,7 @@ def run(self, knn, threshold=0.6, fp_out=None, max_workers=None,
daily_reduction,
self.bias_ti, self.decimals)
for key, arr in single_out.items():
out[key][raster_loc] = arr
self.out[key][raster_loc] = arr

logger.info('Completed bias calculations for {} out of {} '
'sites'.format(i + 1, len(self.bias_meta)))
Expand Down Expand Up @@ -765,19 +776,19 @@ def run(self, knn, threshold=0.6, fp_out=None, max_workers=None,
raster_loc = futures[future]
single_out = future.result()
for key, arr in single_out.items():
out[key][raster_loc] = arr
self.out[key][raster_loc] = arr

logger.info('Completed bias calculations for {} out of {} '
'sites'.format(i + 1, len(futures)))

logger.info('Finished calculating bias correction factors.')

out = self.fill_and_smooth(out, fill_extend, smooth_extend,
smooth_interior)
self.out = self.fill_and_smooth(self.out, fill_extend, smooth_extend,
smooth_interior)

self.write_outputs(fp_out, out)
self.write_outputs(fp_out, self.out)

return out
return self.out


class MonthlyLinearCorrection(LinearCorrection):
Expand All @@ -786,8 +797,8 @@ class MonthlyLinearCorrection(LinearCorrection):
This calculation operates on single bias sites on a montly basis
"""

# size of the time dimension, 12 is monthly bias correction
NT = 12
"""size of the time dimension, 12 is monthly bias correction"""

@classmethod
def _run_single(cls, bias_data, base_fps, bias_feature, base_dset,
Expand Down Expand Up @@ -823,3 +834,130 @@ def _run_single(cls, bias_data, base_fps, bias_feature, base_dset,
out[k][month - 1] = v

return out


class SkillAssessment(MonthlyLinearCorrection):
"""Calculate historical skill of one dataset compared to another."""

PERCENTILES = (1, 5, 25, 50, 75, 95, 99)
"""Data percentiles to report."""

def __init__(self, *args, **kwargs):
"""
Parameters
----------
*args : list
Same positional args as DataRetrievalBase
**kwargs : dict
Same keyword args as DataRetrievalBase
"""
super().__init__(*args, **kwargs)

monthly_keys = [f'{self.bias_feature}_scalar',
f'{self.bias_feature}_adder',
f'bias_{self.bias_feature}_mean_monthly',
f'bias_{self.bias_feature}_std_monthly',
f'base_{self.base_dset}_mean_monthly',
f'base_{self.base_dset}_std_monthly',
]

annual_keys = [f'{self.bias_feature}_ks_stat',
f'{self.bias_feature}_ks_p',
f'{self.bias_feature}_bias',
f'bias_{self.bias_feature}_mean',
f'bias_{self.bias_feature}_std',
f'bias_{self.bias_feature}_skew',
f'bias_{self.bias_feature}_kurtosis',
f'base_{self.base_dset}_mean',
f'base_{self.base_dset}_std',
f'base_{self.base_dset}_skew',
f'base_{self.base_dset}_kurtosis',
]

self.out = {k: np.full((*self.bias_gid_raster.shape, self.NT),
np.nan, np.float32)
for k in monthly_keys}

arr = np.full((*self.bias_gid_raster.shape, 1), np.nan, np.float32)
for k in annual_keys:
self.out[k] = arr.copy()

for p in self.PERCENTILES:
base_k = f'base_{self.base_dset}_percentile_{p}'
bias_k = f'bias_{self.bias_feature}_percentile_{p}'
self.out[base_k] = arr.copy()
self.out[bias_k] = arr.copy()

@classmethod
def _run_skill_eval(cls, bias_data, base_data, bias_feature, base_dset):
"""Run skill assessment metrics on 1D datasets at a single site.
Note we run the KS test on the mean=0 distributions as per:
S. Brands et al. 2013 https://doi.org/10.1007/s00382-013-1742-8
"""

out = {}
bias_mean = np.nanmean(bias_data)
base_mean = np.nanmean(base_data)
out[f'{bias_feature}_bias'] = (bias_mean - base_mean)

out[f'bias_{bias_feature}_mean'] = bias_mean
out[f'bias_{bias_feature}_std'] = np.nanstd(bias_data)
out[f'bias_{bias_feature}_skew'] = stats.skew(bias_data)
out[f'bias_{bias_feature}_kurtosis'] = stats.kurtosis(bias_data)

out[f'base_{base_dset}_mean'] = base_mean
out[f'base_{base_dset}_std'] = np.nanstd(base_data)
out[f'base_{base_dset}_skew'] = stats.skew(base_data)
out[f'base_{base_dset}_kurtosis'] = stats.kurtosis(base_data)

ks_out = stats.ks_2samp(base_data - base_mean, bias_data - bias_mean)
out[f'{bias_feature}_ks_stat'] = ks_out.statistic
out[f'{bias_feature}_ks_p'] = ks_out.pvalue

for p in cls.PERCENTILES:
base_k = f'base_{base_dset}_percentile_{p}'
bias_k = f'bias_{bias_feature}_percentile_{p}'
out[base_k] = np.percentile(base_data, p)
out[bias_k] = np.percentile(bias_data, p)

return out

@classmethod
def _run_single(cls, bias_data, base_fps, bias_feature, base_dset,
base_gid, base_handler, daily_reduction, bias_ti,
decimals):
"""Do a skill assessment at a single site"""

base_data, base_ti = cls.get_base_data(base_fps, base_dset,
base_gid, base_handler,
daily_reduction=daily_reduction,
decimals=decimals)

arr = np.full(cls.NT, np.nan, dtype=np.float32)
out = {f'bias_{bias_feature}_mean_monthly': arr.copy(),
f'bias_{bias_feature}_std_monthly': arr.copy(),
f'base_{base_dset}_mean_monthly': arr.copy(),
f'base_{base_dset}_std_monthly': arr.copy(),
f'{bias_feature}_scalar': arr.copy(),
f'{bias_feature}_adder': arr.copy(),
}

out.update(cls._run_skill_eval(bias_data, base_data,
bias_feature, base_dset))

for month in range(1, 13):
bias_mask = bias_ti.month == month
base_mask = base_ti.month == month

if any(bias_mask) and any(base_mask):
mout = cls.get_linear_correction(bias_data[bias_mask],
base_data[base_mask],
bias_feature,
base_dset)
for k, v in mout.items():
if not k.endswith(('_scalar', '_adder')):
k += '_monthly'
out[k][month - 1] = v

return out
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@
import numpy as np
import pytest
import xarray as xr
from scipy import stats

from sup3r import CONFIG_DIR, TEST_DATA_DIR
from sup3r.bias.bias_calc import LinearCorrection, MonthlyLinearCorrection
from sup3r.bias.bias_calc import (LinearCorrection, MonthlyLinearCorrection,
SkillAssessment)
from sup3r.bias.bias_transforms import local_linear_bc, monthly_local_linear_bc
from sup3r.models import Sup3rGan
from sup3r.pipeline.forward_pass import ForwardPass, ForwardPassStrategy
Expand Down Expand Up @@ -430,3 +432,37 @@ def test_qa_integration():
data_bc = qa.get_source_dset(feature, feature)

assert np.allclose(data_bc, data_truth, equal_nan=True)


def test_skill_assessment():
"""Test the skill assessment of a climate model vs. historical data"""
calc = SkillAssessment(FP_NSRDB, FP_CC, 'ghi', 'rsds', TARGET, SHAPE,
bias_handler='DataHandlerNCforCC')

# test a known in-bounds gid
bias_gid = 5
dist, base_gid = calc.get_base_gid(bias_gid, 1)
bias_data = calc.get_bias_data(bias_gid)
base_data, base_ti = calc.get_base_data(calc.base_fps, calc.base_dset,
base_gid, calc.base_handler,
daily_reduction='avg')
bias_coord = calc.bias_meta.loc[bias_gid, ['latitude', 'longitude']]
base_coord = calc.base_meta.loc[base_gid, ['latitude', 'longitude']]
true_dist = bias_coord.values - base_coord.values
true_dist = np.hypot(true_dist[0], true_dist[1])
assert np.allclose(true_dist, dist)
assert true_dist < 0.1
iloc = np.where(calc.bias_gid_raster == bias_gid)
iloc += (0, )

out = calc.run(knn=1, threshold=0.6, fill_extend=True, max_workers=1)

base_mean = base_data.mean()
bias_mean = bias_data.mean()
assert np.allclose(out['base_ghi_mean'][iloc], base_mean)
assert np.allclose(out['bias_rsds_mean'][iloc], bias_mean)
assert np.allclose(out['rsds_bias'][iloc], bias_mean - base_mean)

ks = stats.ks_2samp(base_data - base_mean, bias_data - bias_mean)
assert np.allclose(out['rsds_ks_stat'][iloc], ks.statistic)
assert np.allclose(out['rsds_ks_p'][iloc], ks.pvalue)

0 comments on commit 274c1c6

Please sign in to comment.