Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
* Debugging

* Addressed tine A-DEC vs Y-DEC

* Undoing the change

* Removed debugger

* Reversed the Y and A
  • Loading branch information
kennethshsu authored May 1, 2024
1 parent 71a6ced commit 0d0716e
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 86 deletions.
29 changes: 20 additions & 9 deletions chainladder/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,7 @@ def _to_datetime(data, fields, period_end=False, format=None):
def _development_lag(origin, valuation):
"""For tabular format, this will convert the origin/valuation
difference to a development lag"""
return ((valuation - origin) / (365.25/12)).dt.round('1d').dt.days

return ((valuation - origin) / (365.25 / 12)).dt.round("1d").dt.days

@staticmethod
def _get_grain(dates, trailing=False, kind="origin"):
Expand All @@ -274,9 +273,19 @@ def _get_grain(dates, trailing=False, kind="origin"):
months = dates.dt.month.unique()
diffs = np.diff(np.sort(months))
if len(dates.unique()) == 1:
grain = "Y" if float('.'.join(pd.__version__.split('.')[:-1])) < 2.2 else "A"
grain = (
"A"
if version.Version(pd.__version__) >= version.Version("2.2.0")
else "Y"
)

elif len(months) == 1:
grain = "Y" if float('.'.join(pd.__version__.split('.')[:-1])) < 2.2 else "A"
grain = (
"A"
if version.Version(pd.__version__) >= version.Version("2.2.0")
else "Y"
)

elif np.all(diffs == 6):
grain = "2Q"
elif np.all(diffs == 3):
Expand Down Expand Up @@ -402,7 +411,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
return obj
else:
raise NotImplementedError()

def _interchange_dataframe(self, data):
"""
Convert an object supporting the __dataframe__ protocol to a pandas DataFrame.
Expand All @@ -420,12 +429,14 @@ def _interchange_dataframe(self, data):
# Check if pandas version is greater than 1.5.2
if version.parse(pd.__version__) >= version.parse("1.5.2"):
return pd.api.interchange.from_dataframe(data)

else:
# Raise an error prompting the user to upgrade pandas
raise NotImplementedError("Your version of pandas does not support the DataFrame interchange API. "
"Please upgrade pandas to a version greater than 1.5.2 to use this feature.")

raise NotImplementedError(
"Your version of pandas does not support the DataFrame interchange API. "
"Please upgrade pandas to a version greater than 1.5.2 to use this feature."
)

def __array_function__(self, func, types, args, kwargs):
from chainladder.utils.utility_functions import concat

Expand Down
94 changes: 27 additions & 67 deletions chainladder/core/correlation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,7 @@

from scipy.special import comb

from scipy.stats import (
binom,
norm,
rankdata
)
from scipy.stats import binom, norm, rankdata

from typing import TYPE_CHECKING

Expand Down Expand Up @@ -52,11 +48,7 @@ class DevelopmentCorrelation:
to be significant.
"""

def __init__(
self,
triangle,
p_critical: float = 0.5
):
def __init__(self, triangle, p_critical: float = 0.5):
self.p_critical = p_critical

# Check that critical value is a probability
Expand All @@ -69,19 +61,15 @@ def __init__(
m1 = triangle.link_ratio

# Rank link ratios by development period, assigning a score of 1 for the lowest
m1_val = xp.apply_along_axis(
func1d=rankdata,
axis=2,
arr=m1.values
) * (m1.values * 0 + 1)
m1_val = xp.apply_along_axis(func1d=rankdata, axis=2, arr=m1.values) * (
m1.values * 0 + 1
)

# Remove the last element from each column, and then rank again
m2 = triangle[triangle.valuation < triangle.valuation_date].link_ratio
m2.values = xp.apply_along_axis(
func1d=rankdata,
axis=2,
arr=m2.values
) * (m2.values * 0 + 1)
m2.values = xp.apply_along_axis(func1d=rankdata, axis=2, arr=m2.values) * (
m2.values * 0 + 1
)

m1 = m2.copy()

Expand Down Expand Up @@ -122,33 +110,20 @@ def __init__(
self.t_variance = 2 / ((I - 2) * (I - 3))

# array of t values
self.t = pd.DataFrame(
self.t[0, 0, ...],
columns=k,
index=["T_k"]
)
self.t = pd.DataFrame(self.t[0, 0, ...], columns=k, index=["T_k"])

# array of weights
self.weights = pd.DataFrame(
weight[0, 0, ...],
columns=k,
index=["I-k-1"]
)
self.weights = pd.DataFrame(weight[0, 0, ...], columns=k, index=["I-k-1"])

# final big T
self.t_expectation = pd.DataFrame(
t_expectation[..., 0, 0],
columns=triangle.vdims,
index=idx
t_expectation[..., 0, 0], columns=triangle.vdims, index=idx
)

# table of Spearman's rank coefficients Tk, can be used to verify consistency with paper
self.corr = pd.concat([
self.t,
self.weights
])
self.corr = pd.concat([self.t, self.weights])

self.corr.columns.names = ['k']
self.corr.columns.names = ["k"]

# construct confidence interval based on selection of p_critical
self.confidence_interval = (
Expand Down Expand Up @@ -198,18 +173,9 @@ class ValuationCorrelation:
The variance value of Z.
"""

def __init__(
self,
triangle: Triangle,
p_critical: float = 0.1,
total: bool = True
):

def pZlower(
z: int,
n: int,
p: float = 0.5
) -> float:
def __init__(self, triangle: Triangle, p_critical: float = 0.1, total: bool = True):

def pZlower(z: int, n: int, p: float = 0.5) -> float:
return min(1, 2 * binom.cdf(z, n, p))

self.p_critical = p_critical
Expand All @@ -223,31 +189,27 @@ def pZlower(
lr = triangle.link_ratio

# Rank link ratios for each column
m1 = xp.apply_along_axis(
func1d=rankdata,
axis=2,
arr=lr.values) * (lr.values * 0 + 1)

med = xp.nanmedian(
a=m1,
axis=2,
keepdims=True
m1 = xp.apply_along_axis(func1d=rankdata, axis=2, arr=lr.values) * (
lr.values * 0 + 1
)

med = xp.nanmedian(a=m1, axis=2, keepdims=True)
# print("med:\n", med)

m1large = (xp.nan_to_num(m1) > med) + (lr.values * 0)
m1small = (xp.nan_to_num(m1) < med) + (lr.values * 0)
m2large = triangle.link_ratio
m2large.values = m1large
m2small = triangle.link_ratio
m2small.values = m1small
S = xp.nan_to_num(m2small.dev_to_val().sum(axis=2).set_backend('numpy').values)
L = xp.nan_to_num(m2large.dev_to_val().sum(axis=2).set_backend('numpy').values)
S = xp.nan_to_num(m2small.dev_to_val().sum(axis=2).set_backend("numpy").values)
L = xp.nan_to_num(m2large.dev_to_val().sum(axis=2).set_backend("numpy").values)
z = xp.minimum(L, S)
n = L + S
m = xp.floor((n - 1) / 2)
c = comb(n - 1, m)
EZ = (n / 2) - c * n / (2 ** n)
VarZ = n * (n - 1) / 4 - c * n * (n - 1) / (2 ** n) + EZ - EZ ** 2
EZ = (n / 2) - c * n / (2**n)
VarZ = n * (n - 1) / 4 - c * n * (n - 1) / (2**n) + EZ - EZ**2
if not self.total:
T = []
for i in range(0, xp.max(m1large.shape[2:]) + 1):
Expand Down Expand Up @@ -296,9 +258,7 @@ def pZlower(
)


def validate_critical(
p_critical: float
) -> None:
def validate_critical(p_critical: float) -> None:
"""
Checks whether value passed to the p_critical parameter in ValuationCorrelation or DevelopmentCorrelation
classes is a percentage, that is, between 0 and 1.
Expand All @@ -311,4 +271,4 @@ def validate_critical(
if 0 <= p_critical <= 1:
pass
else:
raise ValueError('p_critical must be between 0 and 1.')
raise ValueError("p_critical must be between 0 and 1.")
31 changes: 21 additions & 10 deletions chainladder/core/triangle.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import numpy as np
import copy
import warnings
from packaging import version
from chainladder.core.base import TriangleBase
from chainladder.utils.sparse import sp
from chainladder.core.slice import VirtualColumns
Expand Down Expand Up @@ -125,7 +126,7 @@ def __init__(
return
elif not isinstance(data, pd.DataFrame) and hasattr(data, "__dataframe__"):
data = self._interchange_dataframe(data)

index, columns, origin, development = self._input_validation(
data, index, columns, origin, development
)
Expand Down Expand Up @@ -276,7 +277,7 @@ def __init__(
self.ddims = obj.ddims
self.values = obj.values
self.valuation_date = pd.Timestamp(options.ULT_VAL)

@staticmethod
def _split_ult(data, index, columns, origin, development):
"""Deal with triangles with ultimate values"""
Expand Down Expand Up @@ -330,17 +331,25 @@ def origin(self):
if self.is_pattern and len(self.odims) == 1:
return pd.Series(["(All)"])
else:
freq = {"Y": "Y" if float('.'.join(pd.__version__.split('.')[:-1])) < 2.2 else "A",
"S": "2Q", "H": "2Q"}.get(
self.origin_grain, self.origin_grain
)
freq = {
"Y": (
"A"
if version.Version(pd.__version__) >= version.Version("2.2.0")
else "Y"
),
"S": "2Q",
"H": "2Q",
}.get(self.origin_grain, self.origin_grain)
freq = freq if freq == "M" else freq + "-" + self.origin_close
return pd.DatetimeIndex(self.odims, name="origin").to_period(freq=freq)

@origin.setter
def origin(self, value):
self._len_check(self.origin, value)
freq = {"Y": "Y" if float('.'.join(pd.__version__.split('.')[:-1])) < 2.2 else "A", "S": "2Q"}.get(self.origin_grain, self.origin_grain)
freq = {
"Y": "Y" if float(".".join(pd.__version__.split(".")[:-1])) < 2.2 else "A",
"S": "2Q",
}.get(self.origin_grain, self.origin_grain)
freq = freq if freq == "M" else freq + "-" + self.origin_close
value = pd.PeriodIndex(list(value), freq=freq)
self.odims = value.to_timestamp().values
Expand Down Expand Up @@ -693,9 +702,11 @@ def grain(self, grain="", trailing=False, inplace=False):
obj.origin_close = origin_period_end
d_start = pd.Period(
obj.valuation[0],
freq=dgrain_old
if dgrain_old == "M"
else dgrain_old + obj.origin.freqstr[-4:],
freq=(
dgrain_old
if dgrain_old == "M"
else dgrain_old + obj.origin.freqstr[-4:]
),
).to_timestamp(how="s")
if len(obj.ddims) > 1 and obj.origin.to_timestamp(how="s")[0] != d_start:
addl_ts = (
Expand Down

0 comments on commit 0d0716e

Please sign in to comment.