From 0d0716ef955bb30326467f0f9004bf92cf179f10 Mon Sep 17 00:00:00 2001 From: "Kenneth S. Hsu" Date: Wed, 1 May 2024 05:44:37 -0700 Subject: [PATCH] #510 (#511) * Debugging * Addressed tine A-DEC vs Y-DEC * Undoing the change * Removed debugger * Reversed the Y and A --- chainladder/core/base.py | 29 ++++++---- chainladder/core/correlation.py | 94 ++++++++++----------------------- chainladder/core/triangle.py | 31 +++++++---- 3 files changed, 68 insertions(+), 86 deletions(-) diff --git a/chainladder/core/base.py b/chainladder/core/base.py index 5ddf1af9..5b25750b 100644 --- a/chainladder/core/base.py +++ b/chainladder/core/base.py @@ -256,8 +256,7 @@ def _to_datetime(data, fields, period_end=False, format=None): def _development_lag(origin, valuation): """For tabular format, this will convert the origin/valuation difference to a development lag""" - return ((valuation - origin) / (365.25/12)).dt.round('1d').dt.days - + return ((valuation - origin) / (365.25 / 12)).dt.round("1d").dt.days @staticmethod def _get_grain(dates, trailing=False, kind="origin"): @@ -274,9 +273,19 @@ def _get_grain(dates, trailing=False, kind="origin"): months = dates.dt.month.unique() diffs = np.diff(np.sort(months)) if len(dates.unique()) == 1: - grain = "Y" if float('.'.join(pd.__version__.split('.')[:-1])) < 2.2 else "A" + grain = ( + "A" + if version.Version(pd.__version__) >= version.Version("2.2.0") + else "Y" + ) + elif len(months) == 1: - grain = "Y" if float('.'.join(pd.__version__.split('.')[:-1])) < 2.2 else "A" + grain = ( + "A" + if version.Version(pd.__version__) >= version.Version("2.2.0") + else "Y" + ) + elif np.all(diffs == 6): grain = "2Q" elif np.all(diffs == 3): @@ -402,7 +411,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): return obj else: raise NotImplementedError() - + def _interchange_dataframe(self, data): """ Convert an object supporting the __dataframe__ protocol to a pandas DataFrame. @@ -420,12 +429,14 @@ def _interchange_dataframe(self, data): # Check if pandas version is greater than 1.5.2 if version.parse(pd.__version__) >= version.parse("1.5.2"): return pd.api.interchange.from_dataframe(data) - + else: # Raise an error prompting the user to upgrade pandas - raise NotImplementedError("Your version of pandas does not support the DataFrame interchange API. " - "Please upgrade pandas to a version greater than 1.5.2 to use this feature.") - + raise NotImplementedError( + "Your version of pandas does not support the DataFrame interchange API. " + "Please upgrade pandas to a version greater than 1.5.2 to use this feature." + ) + def __array_function__(self, func, types, args, kwargs): from chainladder.utils.utility_functions import concat diff --git a/chainladder/core/correlation.py b/chainladder/core/correlation.py index 01626106..a9bdfb37 100644 --- a/chainladder/core/correlation.py +++ b/chainladder/core/correlation.py @@ -8,11 +8,7 @@ from scipy.special import comb -from scipy.stats import ( - binom, - norm, - rankdata -) +from scipy.stats import binom, norm, rankdata from typing import TYPE_CHECKING @@ -52,11 +48,7 @@ class DevelopmentCorrelation: to be significant. """ - def __init__( - self, - triangle, - p_critical: float = 0.5 - ): + def __init__(self, triangle, p_critical: float = 0.5): self.p_critical = p_critical # Check that critical value is a probability @@ -69,19 +61,15 @@ def __init__( m1 = triangle.link_ratio # Rank link ratios by development period, assigning a score of 1 for the lowest - m1_val = xp.apply_along_axis( - func1d=rankdata, - axis=2, - arr=m1.values - ) * (m1.values * 0 + 1) + m1_val = xp.apply_along_axis(func1d=rankdata, axis=2, arr=m1.values) * ( + m1.values * 0 + 1 + ) # Remove the last element from each column, and then rank again m2 = triangle[triangle.valuation < triangle.valuation_date].link_ratio - m2.values = xp.apply_along_axis( - func1d=rankdata, - axis=2, - arr=m2.values - ) * (m2.values * 0 + 1) + m2.values = xp.apply_along_axis(func1d=rankdata, axis=2, arr=m2.values) * ( + m2.values * 0 + 1 + ) m1 = m2.copy() @@ -122,33 +110,20 @@ def __init__( self.t_variance = 2 / ((I - 2) * (I - 3)) # array of t values - self.t = pd.DataFrame( - self.t[0, 0, ...], - columns=k, - index=["T_k"] - ) + self.t = pd.DataFrame(self.t[0, 0, ...], columns=k, index=["T_k"]) # array of weights - self.weights = pd.DataFrame( - weight[0, 0, ...], - columns=k, - index=["I-k-1"] - ) + self.weights = pd.DataFrame(weight[0, 0, ...], columns=k, index=["I-k-1"]) # final big T self.t_expectation = pd.DataFrame( - t_expectation[..., 0, 0], - columns=triangle.vdims, - index=idx + t_expectation[..., 0, 0], columns=triangle.vdims, index=idx ) # table of Spearman's rank coefficients Tk, can be used to verify consistency with paper - self.corr = pd.concat([ - self.t, - self.weights - ]) + self.corr = pd.concat([self.t, self.weights]) - self.corr.columns.names = ['k'] + self.corr.columns.names = ["k"] # construct confidence interval based on selection of p_critical self.confidence_interval = ( @@ -198,18 +173,9 @@ class ValuationCorrelation: The variance value of Z. """ - def __init__( - self, - triangle: Triangle, - p_critical: float = 0.1, - total: bool = True - ): - - def pZlower( - z: int, - n: int, - p: float = 0.5 - ) -> float: + def __init__(self, triangle: Triangle, p_critical: float = 0.1, total: bool = True): + + def pZlower(z: int, n: int, p: float = 0.5) -> float: return min(1, 2 * binom.cdf(z, n, p)) self.p_critical = p_critical @@ -223,31 +189,27 @@ def pZlower( lr = triangle.link_ratio # Rank link ratios for each column - m1 = xp.apply_along_axis( - func1d=rankdata, - axis=2, - arr=lr.values) * (lr.values * 0 + 1) - - med = xp.nanmedian( - a=m1, - axis=2, - keepdims=True + m1 = xp.apply_along_axis(func1d=rankdata, axis=2, arr=lr.values) * ( + lr.values * 0 + 1 ) + med = xp.nanmedian(a=m1, axis=2, keepdims=True) + # print("med:\n", med) + m1large = (xp.nan_to_num(m1) > med) + (lr.values * 0) m1small = (xp.nan_to_num(m1) < med) + (lr.values * 0) m2large = triangle.link_ratio m2large.values = m1large m2small = triangle.link_ratio m2small.values = m1small - S = xp.nan_to_num(m2small.dev_to_val().sum(axis=2).set_backend('numpy').values) - L = xp.nan_to_num(m2large.dev_to_val().sum(axis=2).set_backend('numpy').values) + S = xp.nan_to_num(m2small.dev_to_val().sum(axis=2).set_backend("numpy").values) + L = xp.nan_to_num(m2large.dev_to_val().sum(axis=2).set_backend("numpy").values) z = xp.minimum(L, S) n = L + S m = xp.floor((n - 1) / 2) c = comb(n - 1, m) - EZ = (n / 2) - c * n / (2 ** n) - VarZ = n * (n - 1) / 4 - c * n * (n - 1) / (2 ** n) + EZ - EZ ** 2 + EZ = (n / 2) - c * n / (2**n) + VarZ = n * (n - 1) / 4 - c * n * (n - 1) / (2**n) + EZ - EZ**2 if not self.total: T = [] for i in range(0, xp.max(m1large.shape[2:]) + 1): @@ -296,9 +258,7 @@ def pZlower( ) -def validate_critical( - p_critical: float -) -> None: +def validate_critical(p_critical: float) -> None: """ Checks whether value passed to the p_critical parameter in ValuationCorrelation or DevelopmentCorrelation classes is a percentage, that is, between 0 and 1. @@ -311,4 +271,4 @@ def validate_critical( if 0 <= p_critical <= 1: pass else: - raise ValueError('p_critical must be between 0 and 1.') + raise ValueError("p_critical must be between 0 and 1.") diff --git a/chainladder/core/triangle.py b/chainladder/core/triangle.py index 9b6ad873..9499a6c0 100644 --- a/chainladder/core/triangle.py +++ b/chainladder/core/triangle.py @@ -6,6 +6,7 @@ import numpy as np import copy import warnings +from packaging import version from chainladder.core.base import TriangleBase from chainladder.utils.sparse import sp from chainladder.core.slice import VirtualColumns @@ -125,7 +126,7 @@ def __init__( return elif not isinstance(data, pd.DataFrame) and hasattr(data, "__dataframe__"): data = self._interchange_dataframe(data) - + index, columns, origin, development = self._input_validation( data, index, columns, origin, development ) @@ -276,7 +277,7 @@ def __init__( self.ddims = obj.ddims self.values = obj.values self.valuation_date = pd.Timestamp(options.ULT_VAL) - + @staticmethod def _split_ult(data, index, columns, origin, development): """Deal with triangles with ultimate values""" @@ -330,17 +331,25 @@ def origin(self): if self.is_pattern and len(self.odims) == 1: return pd.Series(["(All)"]) else: - freq = {"Y": "Y" if float('.'.join(pd.__version__.split('.')[:-1])) < 2.2 else "A", - "S": "2Q", "H": "2Q"}.get( - self.origin_grain, self.origin_grain - ) + freq = { + "Y": ( + "A" + if version.Version(pd.__version__) >= version.Version("2.2.0") + else "Y" + ), + "S": "2Q", + "H": "2Q", + }.get(self.origin_grain, self.origin_grain) freq = freq if freq == "M" else freq + "-" + self.origin_close return pd.DatetimeIndex(self.odims, name="origin").to_period(freq=freq) @origin.setter def origin(self, value): self._len_check(self.origin, value) - freq = {"Y": "Y" if float('.'.join(pd.__version__.split('.')[:-1])) < 2.2 else "A", "S": "2Q"}.get(self.origin_grain, self.origin_grain) + freq = { + "Y": "Y" if float(".".join(pd.__version__.split(".")[:-1])) < 2.2 else "A", + "S": "2Q", + }.get(self.origin_grain, self.origin_grain) freq = freq if freq == "M" else freq + "-" + self.origin_close value = pd.PeriodIndex(list(value), freq=freq) self.odims = value.to_timestamp().values @@ -693,9 +702,11 @@ def grain(self, grain="", trailing=False, inplace=False): obj.origin_close = origin_period_end d_start = pd.Period( obj.valuation[0], - freq=dgrain_old - if dgrain_old == "M" - else dgrain_old + obj.origin.freqstr[-4:], + freq=( + dgrain_old + if dgrain_old == "M" + else dgrain_old + obj.origin.freqstr[-4:] + ), ).to_timestamp(how="s") if len(obj.ddims) > 1 and obj.origin.to_timestamp(how="s")[0] != d_start: addl_ts = (