From 0d0716ef955bb30326467f0f9004bf92cf179f10 Mon Sep 17 00:00:00 2001
From: "Kenneth S. Hsu" <kennethshsu@gmail.com>
Date: Wed, 1 May 2024 05:44:37 -0700
Subject: [PATCH] #510 (#511)

* Debugging

* Addressed tine A-DEC vs Y-DEC

* Undoing the change

* Removed debugger

* Reversed the Y and A
---
 chainladder/core/base.py        | 29 ++++++----
 chainladder/core/correlation.py | 94 ++++++++++-----------------------
 chainladder/core/triangle.py    | 31 +++++++----
 3 files changed, 68 insertions(+), 86 deletions(-)

diff --git a/chainladder/core/base.py b/chainladder/core/base.py
index 5ddf1af9..5b25750b 100644
--- a/chainladder/core/base.py
+++ b/chainladder/core/base.py
@@ -256,8 +256,7 @@ def _to_datetime(data, fields, period_end=False, format=None):
     def _development_lag(origin, valuation):
         """For tabular format, this will convert the origin/valuation
         difference to a development lag"""
-        return ((valuation - origin) / (365.25/12)).dt.round('1d').dt.days
-
+        return ((valuation - origin) / (365.25 / 12)).dt.round("1d").dt.days
 
     @staticmethod
     def _get_grain(dates, trailing=False, kind="origin"):
@@ -274,9 +273,19 @@ def _get_grain(dates, trailing=False, kind="origin"):
         months = dates.dt.month.unique()
         diffs = np.diff(np.sort(months))
         if len(dates.unique()) == 1:
-            grain = "Y" if float('.'.join(pd.__version__.split('.')[:-1])) < 2.2 else "A"
+            grain = (
+                "A"
+                if version.Version(pd.__version__) >= version.Version("2.2.0")
+                else "Y"
+            )
+
         elif len(months) == 1:
-            grain = "Y" if float('.'.join(pd.__version__.split('.')[:-1])) < 2.2 else "A"
+            grain = (
+                "A"
+                if version.Version(pd.__version__) >= version.Version("2.2.0")
+                else "Y"
+            )
+
         elif np.all(diffs == 6):
             grain = "2Q"
         elif np.all(diffs == 3):
@@ -402,7 +411,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
             return obj
         else:
             raise NotImplementedError()
-        
+
     def _interchange_dataframe(self, data):
         """
         Convert an object supporting the __dataframe__ protocol to a pandas DataFrame.
@@ -420,12 +429,14 @@ def _interchange_dataframe(self, data):
         # Check if pandas version is greater than 1.5.2
         if version.parse(pd.__version__) >= version.parse("1.5.2"):
             return pd.api.interchange.from_dataframe(data)
-        
+
         else:
             # Raise an error prompting the user to upgrade pandas
-            raise NotImplementedError("Your version of pandas does not support the DataFrame interchange API. "
-                                    "Please upgrade pandas to a version greater than 1.5.2 to use this feature.")
-            
+            raise NotImplementedError(
+                "Your version of pandas does not support the DataFrame interchange API. "
+                "Please upgrade pandas to a version greater than 1.5.2 to use this feature."
+            )
+
     def __array_function__(self, func, types, args, kwargs):
         from chainladder.utils.utility_functions import concat
 
diff --git a/chainladder/core/correlation.py b/chainladder/core/correlation.py
index 01626106..a9bdfb37 100644
--- a/chainladder/core/correlation.py
+++ b/chainladder/core/correlation.py
@@ -8,11 +8,7 @@
 
 from scipy.special import comb
 
-from scipy.stats import (
-    binom,
-    norm,
-    rankdata
-)
+from scipy.stats import binom, norm, rankdata
 
 from typing import TYPE_CHECKING
 
@@ -52,11 +48,7 @@ class DevelopmentCorrelation:
         to be significant.
     """
 
-    def __init__(
-            self,
-            triangle,
-            p_critical: float = 0.5
-    ):
+    def __init__(self, triangle, p_critical: float = 0.5):
         self.p_critical = p_critical
 
         # Check that critical value is a probability
@@ -69,19 +61,15 @@ def __init__(
         m1 = triangle.link_ratio
 
         # Rank link ratios by development period, assigning a score of 1 for the lowest
-        m1_val = xp.apply_along_axis(
-            func1d=rankdata,
-            axis=2,
-            arr=m1.values
-        ) * (m1.values * 0 + 1)
+        m1_val = xp.apply_along_axis(func1d=rankdata, axis=2, arr=m1.values) * (
+            m1.values * 0 + 1
+        )
 
         # Remove the last element from each column, and then rank again
         m2 = triangle[triangle.valuation < triangle.valuation_date].link_ratio
-        m2.values = xp.apply_along_axis(
-            func1d=rankdata,
-            axis=2,
-            arr=m2.values
-        ) * (m2.values * 0 + 1)
+        m2.values = xp.apply_along_axis(func1d=rankdata, axis=2, arr=m2.values) * (
+            m2.values * 0 + 1
+        )
 
         m1 = m2.copy()
 
@@ -122,33 +110,20 @@ def __init__(
         self.t_variance = 2 / ((I - 2) * (I - 3))
 
         # array of t values
-        self.t = pd.DataFrame(
-            self.t[0, 0, ...],
-            columns=k,
-            index=["T_k"]
-        )
+        self.t = pd.DataFrame(self.t[0, 0, ...], columns=k, index=["T_k"])
 
         # array of weights
-        self.weights = pd.DataFrame(
-            weight[0, 0, ...],
-            columns=k,
-            index=["I-k-1"]
-        )
+        self.weights = pd.DataFrame(weight[0, 0, ...], columns=k, index=["I-k-1"])
 
         # final big T
         self.t_expectation = pd.DataFrame(
-            t_expectation[..., 0, 0],
-            columns=triangle.vdims,
-            index=idx
+            t_expectation[..., 0, 0], columns=triangle.vdims, index=idx
         )
 
         # table of Spearman's rank coefficients Tk, can be used to verify consistency with paper
-        self.corr = pd.concat([
-            self.t,
-            self.weights
-        ])
+        self.corr = pd.concat([self.t, self.weights])
 
-        self.corr.columns.names = ['k']
+        self.corr.columns.names = ["k"]
 
         # construct confidence interval based on selection of p_critical
         self.confidence_interval = (
@@ -198,18 +173,9 @@ class ValuationCorrelation:
         The variance value of Z.
     """
 
-    def __init__(
-            self,
-            triangle: Triangle,
-            p_critical: float = 0.1,
-            total: bool = True
-    ):
-
-        def pZlower(
-            z: int,
-            n: int,
-            p: float = 0.5
-        ) -> float:
+    def __init__(self, triangle: Triangle, p_critical: float = 0.1, total: bool = True):
+
+        def pZlower(z: int, n: int, p: float = 0.5) -> float:
             return min(1, 2 * binom.cdf(z, n, p))
 
         self.p_critical = p_critical
@@ -223,31 +189,27 @@ def pZlower(
         lr = triangle.link_ratio
 
         # Rank link ratios for each column
-        m1 = xp.apply_along_axis(
-            func1d=rankdata,
-            axis=2,
-            arr=lr.values) * (lr.values * 0 + 1)
-
-        med = xp.nanmedian(
-            a=m1,
-            axis=2,
-            keepdims=True
+        m1 = xp.apply_along_axis(func1d=rankdata, axis=2, arr=lr.values) * (
+            lr.values * 0 + 1
         )
 
+        med = xp.nanmedian(a=m1, axis=2, keepdims=True)
+        # print("med:\n", med)
+
         m1large = (xp.nan_to_num(m1) > med) + (lr.values * 0)
         m1small = (xp.nan_to_num(m1) < med) + (lr.values * 0)
         m2large = triangle.link_ratio
         m2large.values = m1large
         m2small = triangle.link_ratio
         m2small.values = m1small
-        S = xp.nan_to_num(m2small.dev_to_val().sum(axis=2).set_backend('numpy').values)
-        L = xp.nan_to_num(m2large.dev_to_val().sum(axis=2).set_backend('numpy').values)
+        S = xp.nan_to_num(m2small.dev_to_val().sum(axis=2).set_backend("numpy").values)
+        L = xp.nan_to_num(m2large.dev_to_val().sum(axis=2).set_backend("numpy").values)
         z = xp.minimum(L, S)
         n = L + S
         m = xp.floor((n - 1) / 2)
         c = comb(n - 1, m)
-        EZ = (n / 2) - c * n / (2 ** n)
-        VarZ = n * (n - 1) / 4 - c * n * (n - 1) / (2 ** n) + EZ - EZ ** 2
+        EZ = (n / 2) - c * n / (2**n)
+        VarZ = n * (n - 1) / 4 - c * n * (n - 1) / (2**n) + EZ - EZ**2
         if not self.total:
             T = []
             for i in range(0, xp.max(m1large.shape[2:]) + 1):
@@ -296,9 +258,7 @@ def pZlower(
             )
 
 
-def validate_critical(
-        p_critical: float
-) -> None:
+def validate_critical(p_critical: float) -> None:
     """
     Checks whether value passed to the p_critical parameter in ValuationCorrelation or DevelopmentCorrelation
     classes is a percentage, that is, between 0 and 1.
@@ -311,4 +271,4 @@ def validate_critical(
     if 0 <= p_critical <= 1:
         pass
     else:
-        raise ValueError('p_critical must be between 0 and 1.')
+        raise ValueError("p_critical must be between 0 and 1.")
diff --git a/chainladder/core/triangle.py b/chainladder/core/triangle.py
index 9b6ad873..9499a6c0 100644
--- a/chainladder/core/triangle.py
+++ b/chainladder/core/triangle.py
@@ -6,6 +6,7 @@
 import numpy as np
 import copy
 import warnings
+from packaging import version
 from chainladder.core.base import TriangleBase
 from chainladder.utils.sparse import sp
 from chainladder.core.slice import VirtualColumns
@@ -125,7 +126,7 @@ def __init__(
             return
         elif not isinstance(data, pd.DataFrame) and hasattr(data, "__dataframe__"):
             data = self._interchange_dataframe(data)
-        
+
         index, columns, origin, development = self._input_validation(
             data, index, columns, origin, development
         )
@@ -276,7 +277,7 @@ def __init__(
             self.ddims = obj.ddims
             self.values = obj.values
             self.valuation_date = pd.Timestamp(options.ULT_VAL)
-    
+
     @staticmethod
     def _split_ult(data, index, columns, origin, development):
         """Deal with triangles with ultimate values"""
@@ -330,17 +331,25 @@ def origin(self):
         if self.is_pattern and len(self.odims) == 1:
             return pd.Series(["(All)"])
         else:
-            freq = {"Y": "Y" if float('.'.join(pd.__version__.split('.')[:-1])) < 2.2 else "A", 
-                    "S": "2Q", "H": "2Q"}.get(
-                self.origin_grain, self.origin_grain
-            )
+            freq = {
+                "Y": (
+                    "A"
+                    if version.Version(pd.__version__) >= version.Version("2.2.0")
+                    else "Y"
+                ),
+                "S": "2Q",
+                "H": "2Q",
+            }.get(self.origin_grain, self.origin_grain)
             freq = freq if freq == "M" else freq + "-" + self.origin_close
             return pd.DatetimeIndex(self.odims, name="origin").to_period(freq=freq)
 
     @origin.setter
     def origin(self, value):
         self._len_check(self.origin, value)
-        freq = {"Y": "Y" if float('.'.join(pd.__version__.split('.')[:-1])) < 2.2 else "A", "S": "2Q"}.get(self.origin_grain, self.origin_grain)
+        freq = {
+            "Y": "Y" if float(".".join(pd.__version__.split(".")[:-1])) < 2.2 else "A",
+            "S": "2Q",
+        }.get(self.origin_grain, self.origin_grain)
         freq = freq if freq == "M" else freq + "-" + self.origin_close
         value = pd.PeriodIndex(list(value), freq=freq)
         self.odims = value.to_timestamp().values
@@ -693,9 +702,11 @@ def grain(self, grain="", trailing=False, inplace=False):
             obj.origin_close = origin_period_end
             d_start = pd.Period(
                 obj.valuation[0],
-                freq=dgrain_old
-                if dgrain_old == "M"
-                else dgrain_old + obj.origin.freqstr[-4:],
+                freq=(
+                    dgrain_old
+                    if dgrain_old == "M"
+                    else dgrain_old + obj.origin.freqstr[-4:]
+                ),
             ).to_timestamp(how="s")
             if len(obj.ddims) > 1 and obj.origin.to_timestamp(how="s")[0] != d_start:
                 addl_ts = (