From 6a89d413065a0ecb5109f8a0153d257b4066a6ad Mon Sep 17 00:00:00 2001 From: Kenneth Hsu Date: Thu, 16 May 2024 17:10:12 -0700 Subject: [PATCH 01/10] Added tests per #514 --- .../development/tests/test_development.py | 157 +++++++++++------- 1 file changed, 101 insertions(+), 56 deletions(-) diff --git a/chainladder/development/tests/test_development.py b/chainladder/development/tests/test_development.py index 19ad0f62..09ef29fc 100644 --- a/chainladder/development/tests/test_development.py +++ b/chainladder/development/tests/test_development.py @@ -119,6 +119,13 @@ def test_drophighlow(): ) assert np.all(lhs == rhs) + tri = cl.load_sample("prism")["Paid"].sum().grain("OYDQ") + no_drop = cl.Development().fit_transform(tri).cdf_.to_frame().values + drop_high = cl.Development(drop_high=True).fit_transform(tri).cdf_.to_frame().values + drop_low = cl.Development(drop_low=True).fit_transform(tri).cdf_.to_frame().values + assert (drop_low >= no_drop).all() + assert (no_drop >= drop_high).all() + def test_dropabovebelow(): raa = cl.load_sample("raa") @@ -173,91 +180,129 @@ def test_assymetric_development(atol): dev2 = cl.Development(n_periods=1, average="regression").fit(quarterly) assert xp.allclose(dev.ldf_.values, dev2.ldf_.values, atol=atol) + def test_hilo_multiple_indices(clrd): - tri = clrd.groupby('LOB')['CumPaidLoss'].sum() + tri = clrd.groupby("LOB")["CumPaidLoss"].sum() assert ( - cl.Development(n_periods=5).fit(tri).ldf_.loc['wkcomp'] == - cl.Development(n_periods=5).fit(tri.loc['wkcomp']).ldf_) + cl.Development(n_periods=5).fit(tri).ldf_.loc["wkcomp"] + == cl.Development(n_periods=5).fit(tri.loc["wkcomp"]).ldf_ + ) assert ( - cl.Development(drop_low=2).fit(tri).ldf_.loc['wkcomp'] == - cl.Development(drop_low=2).fit(tri.loc['wkcomp']).ldf_) + cl.Development(drop_low=2).fit(tri).ldf_.loc["wkcomp"] + == cl.Development(drop_low=2).fit(tri.loc["wkcomp"]).ldf_ + ) + def test_new_drop_1(clrd): - clrd = clrd.groupby('LOB')[["IncurLoss","CumPaidLoss"]].sum() - #n_periods - return compare_new_drop(cl.Development(n_periods = 4).fit(clrd),clrd) + clrd = clrd.groupby("LOB")[["IncurLoss", "CumPaidLoss"]].sum() + # n_periods + return compare_new_drop(cl.Development(n_periods=4).fit(clrd), clrd) + def test_new_drop_2(clrd): - clrd = clrd.groupby('LOB')[["IncurLoss","CumPaidLoss"]].sum() - #single drop and drop_valuation - return compare_new_drop(cl.Development(drop = ("1992",12),drop_valuation = 1993).fit(clrd),clrd) + clrd = clrd.groupby("LOB")[["IncurLoss", "CumPaidLoss"]].sum() + # single drop and drop_valuation + return compare_new_drop( + cl.Development(drop=("1992", 12), drop_valuation=1993).fit(clrd), clrd + ) + def test_new_drop_3(clrd): - clrd = clrd.groupby('LOB')[["IncurLoss","CumPaidLoss"]].sum() - #multiple drop and drop_valuation - return compare_new_drop(cl.Development(drop = [("1992",12),("1996",24)],drop_valuation = [1993,1995]).fit(clrd),clrd) + clrd = clrd.groupby("LOB")[["IncurLoss", "CumPaidLoss"]].sum() + # multiple drop and drop_valuation + return compare_new_drop( + cl.Development( + drop=[("1992", 12), ("1996", 24)], drop_valuation=[1993, 1995] + ).fit(clrd), + clrd, + ) + def test_new_drop_4(clrd): - clrd = clrd.groupby('LOB')[["IncurLoss","CumPaidLoss"]].sum() - #drop_hi/low without preserve - return compare_new_drop(cl.Development(drop_high = 1, drop_low = 1).fit(clrd),clrd) + clrd = clrd.groupby("LOB")[["IncurLoss", "CumPaidLoss"]].sum() + # drop_hi/low without preserve + return compare_new_drop(cl.Development(drop_high=1, drop_low=1).fit(clrd), clrd) + def test_new_drop_5(clrd): - clrd = clrd.groupby('LOB')[["IncurLoss","CumPaidLoss"]].sum() - #drop_hi/low without preserve - return compare_new_drop(cl.Development(drop_high = 1, drop_low = 1,preserve = 3).fit(clrd),clrd) + clrd = clrd.groupby("LOB")[["IncurLoss", "CumPaidLoss"]].sum() + # drop_hi/low without preserve + return compare_new_drop( + cl.Development(drop_high=1, drop_low=1, preserve=3).fit(clrd), clrd + ) + def test_new_drop_5a(clrd): - clrd = clrd.groupby('LOB')[["IncurLoss","CumPaidLoss"]].sum() - #drop_hi/low without preserve + clrd = clrd.groupby("LOB")[["IncurLoss", "CumPaidLoss"]].sum() + # drop_hi/low without preserve assert np.array_equal( - cl.Development(drop_high = 1, drop_low = 1,preserve = 3)._set_weight_func(clrd.age_to_age, clrd.age_to_age).values, - cl.Development(drop_high = True, drop_low = [True, True, True, True, True, True, True, True, True] ,preserve = 3)._set_weight_func(clrd.age_to_age).values, - True + cl.Development(drop_high=1, drop_low=1, preserve=3) + ._set_weight_func(clrd.age_to_age, clrd.age_to_age) + .values, + cl.Development( + drop_high=True, + drop_low=[True, True, True, True, True, True, True, True, True], + preserve=3, + ) + ._set_weight_func(clrd.age_to_age) + .values, + True, ) + def test_new_drop_6(clrd): - clrd = clrd.groupby('LOB')[["IncurLoss","CumPaidLoss"]].sum() - #drop_above/below without preserve - return compare_new_drop(cl.Development(drop_above = 1.01,drop_below = 0.95).fit(clrd),clrd) + clrd = clrd.groupby("LOB")[["IncurLoss", "CumPaidLoss"]].sum() + # drop_above/below without preserve + return compare_new_drop( + cl.Development(drop_above=1.01, drop_below=0.95).fit(clrd), clrd + ) + def test_new_drop_7(clrd): - clrd = clrd.groupby('LOB')[["IncurLoss","CumPaidLoss"]].sum() - #drop_above/below with preserve - return compare_new_drop(cl.Development(drop_above = 1.01,drop_below = 0.95,preserve=3).fit(clrd),clrd) + clrd = clrd.groupby("LOB")[["IncurLoss", "CumPaidLoss"]].sum() + # drop_above/below with preserve + return compare_new_drop( + cl.Development(drop_above=1.01, drop_below=0.95, preserve=3).fit(clrd), clrd + ) -def compare_new_drop(dev,tri): - assert ( - np.array_equal( - dev._set_weight_func(tri.age_to_age, tri.age_to_age).values, - dev.transform(tri).age_to_age.values*0+1, - True - ) + +def compare_new_drop(dev, tri): + assert np.array_equal( + dev._set_weight_func(tri.age_to_age, tri.age_to_age).values, + dev.transform(tri).age_to_age.values * 0 + 1, + True, ) + def test_4d_drop(clrd): - clrd = clrd.groupby("LOB").sum()[["CumPaidLoss","IncurLoss"]] + clrd = clrd.groupby("LOB").sum()[["CumPaidLoss", "IncurLoss"]] assert ( - cl.Development(n_periods = 4).fit_transform(clrd.iloc[0,0]).link_ratio == - cl.Development(n_periods = 4).fit_transform(clrd).link_ratio.iloc[0,0]) + cl.Development(n_periods=4).fit_transform(clrd.iloc[0, 0]).link_ratio + == cl.Development(n_periods=4).fit_transform(clrd).link_ratio.iloc[0, 0] + ) + def test_pipeline(clrd): - clrd = clrd.groupby('LOB')[["IncurLoss","CumPaidLoss"]].sum() + clrd = clrd.groupby("LOB")[["IncurLoss", "CumPaidLoss"]].sum() dev1 = cl.Development( - n_periods = 7, - drop_valuation = 1995, - drop = ("1992",12), - drop_above = 1.05, - drop_below = .95, - drop_high = 1, - drop_low = 1 + n_periods=7, + drop_valuation=1995, + drop=("1992", 12), + drop_above=1.05, + drop_below=0.95, + drop_high=1, + drop_low=1, ).fit(clrd) - pipe = cl.Pipeline(steps=[ - ('n_periods', cl.Development(n_periods = 7)), - ('drop_valuation', cl.Development(drop_valuation = 1995)), - ('drop', cl.Development(drop = ("1992",12))), - ('drop_abovebelow', cl.Development(drop_above = 1.05, drop_below = .95)), - ('drop_hilo', cl.Development(drop_high = 1, drop_low = 1))] + pipe = cl.Pipeline( + steps=[ + ("n_periods", cl.Development(n_periods=7)), + ("drop_valuation", cl.Development(drop_valuation=1995)), + ("drop", cl.Development(drop=("1992", 12))), + ("drop_abovebelow", cl.Development(drop_above=1.05, drop_below=0.95)), + ("drop_hilo", cl.Development(drop_high=1, drop_low=1)), + ] ) dev2 = pipe.fit(X=clrd) - assert np.array_equal(dev1.w_v2_.values,dev2.named_steps.drop_hilo.w_v2_.values,True) \ No newline at end of file + assert np.array_equal( + dev1.w_v2_.values, dev2.named_steps.drop_hilo.w_v2_.values, True + ) From 22d5d8d9560ec83225b53ec2182a5b3501b1eea8 Mon Sep 17 00:00:00 2001 From: Kenneth Hsu Date: Thu, 16 May 2024 17:14:21 -0700 Subject: [PATCH 02/10] Removed secondary_rank --- chainladder/development/base.py | 23 ++++++++--------- chainladder/development/development.py | 34 ++++++++++++++++++++------ 2 files changed, 36 insertions(+), 21 deletions(-) diff --git a/chainladder/development/base.py b/chainladder/development/base.py index a1a2107c..769583df 100644 --- a/chainladder/development/base.py +++ b/chainladder/development/base.py @@ -356,9 +356,10 @@ def _param_array_helper(self, size, param, default_value): param_array = param_array.astype(type(default_value)) return param_array.to_numpy() - def _set_weight_func(self, factor, secondary_rank=None): + def _set_weight_func(self, factor): w = (~np.isnan(factor.values)).astype(float) w = w * self._assign_n_periods_weight_func(factor) + if self.drop is not None: w = w * self._drop_func(factor) @@ -369,9 +370,11 @@ def _set_weight_func(self, factor, secondary_rank=None): w = w * self._drop_x_func(factor) if (self.drop_high is not None) | (self.drop_low is not None): - w = w * self._drop_n_func(factor * num_to_nan(w), secondary_rank) + w = w * self._drop_n_func(factor * num_to_nan(w)) + w_tri = factor.copy() w_tri.values = num_to_nan(w) + return w_tri def _assign_n_periods_weight_func(self, factor): @@ -517,18 +520,11 @@ def _drop_x_func(self, factor): return w.transpose((0, 1, 3, 2)).astype(float) # for drop_high and drop_low - def _drop_n_func(self, factor, secondary_rank=None): + def _drop_n_func(self, factor): # getting dimensions of factor for various manipulation factor_val = factor.values.copy() - # secondary rank is the optional triangle that breaks ties in factor - # the original use case is for dropping the link ratio of 1 with the lowest loss value - # (pass in a reverse rank of loss to drop link of ratio of 1 with the highest loss value) - # leaving to user to ensure that secondary rank is the same dimensions as factor - # also leaving to user to pick whether to trim head or tail - if secondary_rank is None: - sec_rank_val = factor_val.copy() - else: - sec_rank_val = secondary_rank.values.copy() + sec_rank_val = factor_val.copy() + factor_len = factor_val.shape[3] indices = factor_val.shape[0] columns = factor_val.shape[1] @@ -538,16 +534,17 @@ def _drop_n_func(self, factor, secondary_rank=None): drop_high_array[:, :, :] = self._param_array_helper( factor_len, self.drop_high, 0 )[None, None] + drop_low_array = np.zeros((indices, columns, factor_len)) drop_low_array[:, :, :] = self._param_array_helper( factor_len, self.drop_low, 0 )[None, None] + preserve_array = np.zeros((indices, columns, factor_len)) preserve_array[:, :, :] = self._param_array_helper( factor_len, self.preserve, self.preserve )[None, None] - # ranking factors by itself and secondary rank factor_ranks = np.lexsort((sec_rank_val, factor_val), axis=2).argsort(axis=2) # setting up starting weights diff --git a/chainladder/development/development.py b/chainladder/development/development.py index 48f5c6a2..f91d3d7f 100644 --- a/chainladder/development/development.py +++ b/chainladder/development/development.py @@ -119,27 +119,36 @@ def fit(self, X, y=None, sample_weight=None): from chainladder.utils.utility_functions import num_to_nan # Triangle must be cumulative and in "development" mode + obj = self._set_fit_groups(X).incr_to_cum().val_to_dev().copy() xp = obj.get_array_module() if self.fillna: tri_array = num_to_nan((obj + self.fillna).values) else: tri_array = num_to_nan(obj.values.copy()) - average_ = self._validate_assumption(X, self.average, axis=3)[... , :X.shape[3]-1] + average_ = self._validate_assumption(X, self.average, axis=3)[ + ..., : X.shape[3] - 1 + ] self.average_ = average_.flatten() - n_periods_ = self._validate_assumption(X, self.n_periods, axis=3)[... , :X.shape[3]-1] + n_periods_ = self._validate_assumption(X, self.n_periods, axis=3)[ + ..., : X.shape[3] - 1 + ] x, y = tri_array[..., :-1], tri_array[..., 1:] exponent = xp.array( - [{"regression": 0, "volume": 1, "simple": 2}[x] - for x in average_[0, 0, 0]] + [{"regression": 0, "volume": 1, "simple": 2}[x] for x in average_[0, 0, 0]] ) exponent = xp.nan_to_num(exponent * (y * 0 + 1)) link_ratio = y / x if hasattr(X, "w_v2_"): - self.w_v2_ = self._set_weight_func(obj.age_to_age * X.w_v2_,obj.iloc[...,:-1,:-1]) + self.w_v2_ = self._set_weight_func( + factor=obj.age_to_age * X.w_v2_, + ) else: - self.w_v2_ = self._set_weight_func(obj.age_to_age,obj.iloc[...,:-1,:-1]) + self.w_v2_ = self._set_weight_func( + factor=obj.age_to_age, + ) + self.w_ = self._assign_n_periods_weight( obj, n_periods_ ) * self._drop_adjustment(obj, link_ratio) @@ -164,7 +173,7 @@ def fit(self, X, y=None, sample_weight=None): self.sigma_ = self._param_property(obj, params, 1) self.std_err_ = self._param_property(obj, params, 2) resid = -obj.iloc[..., :-1] * self.ldf_.values + obj.iloc[..., 1:].values - std = xp.sqrt((1 / num_to_nan(w)) * (self.sigma_ ** 2).values) + std = xp.sqrt((1 / num_to_nan(w)) * (self.sigma_**2).values) resid = resid / num_to_nan(std) self.std_residuals_ = resid[resid.valuation < obj.valuation_date] return self @@ -184,7 +193,16 @@ def transform(self, X): """ X_new = X.copy() X_new.group_index = self._set_transform_groups(X_new) - triangles = ["std_err_", "ldf_", "sigma_","std_residuals_","average_", "w_", "sigma_interpolation","w_v2_"] + triangles = [ + "std_err_", + "ldf_", + "sigma_", + "std_residuals_", + "average_", + "w_", + "sigma_interpolation", + "w_v2_", + ] for item in triangles: setattr(X_new, item, getattr(self, item)) X_new._set_slicers() From 0181b699861aca044167e5154d1ad9ba18d0b979 Mon Sep 17 00:00:00 2001 From: Kenneth Hsu Date: Thu, 16 May 2024 17:28:38 -0700 Subject: [PATCH 03/10] Addressed "Resampling with a PeriodIndex is deprecated. Cast index to DatetimeIndex before resampling instead." --- chainladder/core/triangle.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/chainladder/core/triangle.py b/chainladder/core/triangle.py index c8852045..7a777a2d 100644 --- a/chainladder/core/triangle.py +++ b/chainladder/core/triangle.py @@ -666,10 +666,12 @@ def grain(self, grain="", trailing=False, inplace=False): "M": ["Y", "S", "Q", "M"], "S": ["S", "Y"], } + if ograin_new not in valid.get(ograin_old, []) or dgrain_new not in valid.get( dgrain_old, [] ): raise ValueError("New grain not compatible with existing grain") + if ( self.is_cumulative is None and dgrain_old != dgrain_new @@ -678,25 +680,33 @@ def grain(self, grain="", trailing=False, inplace=False): raise AttributeError( "The is_cumulative attribute must be set before using grain method." ) + if valid["M"].index(ograin_new) > valid["M"].index(dgrain_new): raise ValueError("Origin grain must be coarser than development grain") + if self.is_full and not self.is_ultimate and not self.is_val_tri: warnings.warn("Triangle includes extraneous development lags") + obj = self.dev_to_val() + if ograin_new != ograin_old: freq = {"Y": "Y", "S": "2Q"}.get(ograin_new, ograin_new) + if trailing or (obj.origin.freqstr[-3:] != "DEC" and ograin_old != "M"): origin_period_end = self.origin[-1].strftime("%b").upper() else: origin_period_end = "DEC" + indices = ( - pd.Series(range(len(self.origin)), index=self.origin) + pd.Series(range(len(self.origin)), index=self.origin.to_timestamp()) .resample("-".join([freq, origin_period_end])) .indices ) + groups = pd.concat( [pd.Series([k] * len(v), index=v) for k, v in indices.items()], axis=0 ).values + obj = obj.groupby(groups, axis=2).sum() obj.origin_close = origin_period_end d_start = pd.Period( @@ -707,6 +717,7 @@ def grain(self, grain="", trailing=False, inplace=False): else dgrain_old + obj.origin.freqstr[-4:] ), ).to_timestamp(how="s") + if len(obj.ddims) > 1 and obj.origin.to_timestamp(how="s")[0] != d_start: addl_ts = ( pd.period_range(obj.odims[0], obj.valuation[0], freq=dgrain_old)[ @@ -719,11 +730,13 @@ def grain(self, grain="", trailing=False, inplace=False): addl.ddims = addl_ts obj = concat((addl, obj), axis=-1) obj.values = num_to_nan(obj.values) + if dgrain_old != dgrain_new and obj.shape[-1] > 1: step = self._dstep()[dgrain_old][dgrain_new] d = np.sort( len(obj.development) - np.arange(0, len(obj.development), step) - 1 ) + if obj.is_cumulative: obj = obj.iloc[..., d] else: @@ -731,11 +744,15 @@ def grain(self, grain="", trailing=False, inplace=False): d2 = [d[0]] * (d[0] + 1) + list(np.repeat(np.array(d[1:]), step)) obj = obj.groupby(d2, axis=3).sum() obj.ddims = ddims + obj.development_grain = dgrain_new + obj = obj.dev_to_val() if self.is_val_tri else obj.val_to_dev() + if inplace: self = obj return self + return obj def trend( From cfa416711db31561240ce043d75a89aad5d5ab77 Mon Sep 17 00:00:00 2001 From: Kenneth Hsu Date: Thu, 16 May 2024 17:29:09 -0700 Subject: [PATCH 04/10] Addressed "FutureWarning: 'Y-DEC' is deprecated and will be removed in a future version, please use 'YE-DEC' instead." --- chainladder/core/triangle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chainladder/core/triangle.py b/chainladder/core/triangle.py index 7a777a2d..adba912d 100644 --- a/chainladder/core/triangle.py +++ b/chainladder/core/triangle.py @@ -690,7 +690,7 @@ def grain(self, grain="", trailing=False, inplace=False): obj = self.dev_to_val() if ograin_new != ograin_old: - freq = {"Y": "Y", "S": "2Q"}.get(ograin_new, ograin_new) + freq = {"Y": "YE", "S": "2Q"}.get(ograin_new, ograin_new) if trailing or (obj.origin.freqstr[-3:] != "DEC" and ograin_old != "M"): origin_period_end = self.origin[-1].strftime("%b").upper() From eef91e67ad9150567374b2673dada2015863ba0e Mon Sep 17 00:00:00 2001 From: Kenneth Hsu Date: Thu, 16 May 2024 18:46:00 -0700 Subject: [PATCH 05/10] Undoing the changes --- chainladder/core/triangle.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/chainladder/core/triangle.py b/chainladder/core/triangle.py index adba912d..89a4b02d 100644 --- a/chainladder/core/triangle.py +++ b/chainladder/core/triangle.py @@ -690,7 +690,7 @@ def grain(self, grain="", trailing=False, inplace=False): obj = self.dev_to_val() if ograin_new != ograin_old: - freq = {"Y": "YE", "S": "2Q"}.get(ograin_new, ograin_new) + freq = {"Y": "Y", "S": "2Q"}.get(ograin_new, ograin_new) if trailing or (obj.origin.freqstr[-3:] != "DEC" and ograin_old != "M"): origin_period_end = self.origin[-1].strftime("%b").upper() @@ -698,7 +698,7 @@ def grain(self, grain="", trailing=False, inplace=False): origin_period_end = "DEC" indices = ( - pd.Series(range(len(self.origin)), index=self.origin.to_timestamp()) + pd.Series(range(len(self.origin)), index=self.origin) .resample("-".join([freq, origin_period_end])) .indices ) @@ -708,6 +708,7 @@ def grain(self, grain="", trailing=False, inplace=False): ).values obj = obj.groupby(groups, axis=2).sum() + obj.origin_close = origin_period_end d_start = pd.Period( obj.valuation[0], From 7d60060917af63bdd05575b1ac64379f17ea195b Mon Sep 17 00:00:00 2001 From: Kenneth Hsu Date: Thu, 16 May 2024 18:46:14 -0700 Subject: [PATCH 06/10] secondary_rank is not used at all, no tests failed --- chainladder/development/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chainladder/development/base.py b/chainladder/development/base.py index 769583df..637e48c1 100644 --- a/chainladder/development/base.py +++ b/chainladder/development/base.py @@ -356,7 +356,7 @@ def _param_array_helper(self, size, param, default_value): param_array = param_array.astype(type(default_value)) return param_array.to_numpy() - def _set_weight_func(self, factor): + def _set_weight_func(self, factor, secondary_rank=None): w = (~np.isnan(factor.values)).astype(float) w = w * self._assign_n_periods_weight_func(factor) From 1778de01292ee7e4bc1ac29106c0cd4f40191448 Mon Sep 17 00:00:00 2001 From: Kenneth Hsu Date: Tue, 21 May 2024 13:44:21 -0700 Subject: [PATCH 07/10] Undo the changes from master --- chainladder/development/base.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/chainladder/development/base.py b/chainladder/development/base.py index 637e48c1..a1a2107c 100644 --- a/chainladder/development/base.py +++ b/chainladder/development/base.py @@ -359,7 +359,6 @@ def _param_array_helper(self, size, param, default_value): def _set_weight_func(self, factor, secondary_rank=None): w = (~np.isnan(factor.values)).astype(float) w = w * self._assign_n_periods_weight_func(factor) - if self.drop is not None: w = w * self._drop_func(factor) @@ -370,11 +369,9 @@ def _set_weight_func(self, factor, secondary_rank=None): w = w * self._drop_x_func(factor) if (self.drop_high is not None) | (self.drop_low is not None): - w = w * self._drop_n_func(factor * num_to_nan(w)) - + w = w * self._drop_n_func(factor * num_to_nan(w), secondary_rank) w_tri = factor.copy() w_tri.values = num_to_nan(w) - return w_tri def _assign_n_periods_weight_func(self, factor): @@ -520,11 +517,18 @@ def _drop_x_func(self, factor): return w.transpose((0, 1, 3, 2)).astype(float) # for drop_high and drop_low - def _drop_n_func(self, factor): + def _drop_n_func(self, factor, secondary_rank=None): # getting dimensions of factor for various manipulation factor_val = factor.values.copy() - sec_rank_val = factor_val.copy() - + # secondary rank is the optional triangle that breaks ties in factor + # the original use case is for dropping the link ratio of 1 with the lowest loss value + # (pass in a reverse rank of loss to drop link of ratio of 1 with the highest loss value) + # leaving to user to ensure that secondary rank is the same dimensions as factor + # also leaving to user to pick whether to trim head or tail + if secondary_rank is None: + sec_rank_val = factor_val.copy() + else: + sec_rank_val = secondary_rank.values.copy() factor_len = factor_val.shape[3] indices = factor_val.shape[0] columns = factor_val.shape[1] @@ -534,17 +538,16 @@ def _drop_n_func(self, factor): drop_high_array[:, :, :] = self._param_array_helper( factor_len, self.drop_high, 0 )[None, None] - drop_low_array = np.zeros((indices, columns, factor_len)) drop_low_array[:, :, :] = self._param_array_helper( factor_len, self.drop_low, 0 )[None, None] - preserve_array = np.zeros((indices, columns, factor_len)) preserve_array[:, :, :] = self._param_array_helper( factor_len, self.preserve, self.preserve )[None, None] + # ranking factors by itself and secondary rank factor_ranks = np.lexsort((sec_rank_val, factor_val), axis=2).argsort(axis=2) # setting up starting weights From f1fbb4363d9caba24132907bd04215f20cc7250b Mon Sep 17 00:00:00 2001 From: Kenneth Hsu Date: Tue, 21 May 2024 13:46:55 -0700 Subject: [PATCH 08/10] Addressing discussion on PR --- chainladder/development/development.py | 12 +++++++++++- .../development/tests/test_development.py | 18 ++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/chainladder/development/development.py b/chainladder/development/development.py index f91d3d7f..1f49d321 100644 --- a/chainladder/development/development.py +++ b/chainladder/development/development.py @@ -119,13 +119,14 @@ def fit(self, X, y=None, sample_weight=None): from chainladder.utils.utility_functions import num_to_nan # Triangle must be cumulative and in "development" mode - obj = self._set_fit_groups(X).incr_to_cum().val_to_dev().copy() xp = obj.get_array_module() + if self.fillna: tri_array = num_to_nan((obj + self.fillna).values) else: tri_array = num_to_nan(obj.values.copy()) + average_ = self._validate_assumption(X, self.average, axis=3)[ ..., : X.shape[3] - 1 ] @@ -143,10 +144,12 @@ def fit(self, X, y=None, sample_weight=None): if hasattr(X, "w_v2_"): self.w_v2_ = self._set_weight_func( factor=obj.age_to_age * X.w_v2_, + # secondary_rank=obj.iloc[..., :-1, :-1] ) else: self.w_v2_ = self._set_weight_func( factor=obj.age_to_age, + # secondary_rank=obj.iloc[..., :-1, :-1] ) self.w_ = self._assign_n_periods_weight( @@ -154,6 +157,7 @@ def fit(self, X, y=None, sample_weight=None): ) * self._drop_adjustment(obj, link_ratio) w = num_to_nan(self.w_ / (x ** (exponent))) params = WeightedRegression(axis=2, thru_orig=True, xp=xp).fit(x, y, w) + if self.n_periods != 1: params = params.sigma_fill(self.sigma_interpolation) else: @@ -162,11 +166,13 @@ def fit(self, X, y=None, sample_weight=None): "of freedom to support calculation of all regression" " statistics. Only LDFs have been calculated." ) + params.std_err_ = xp.nan_to_num(params.std_err_) + xp.nan_to_num( (1 - xp.nan_to_num(params.std_err_ * 0 + 1)) * params.sigma_ / xp.swapaxes(xp.sqrt(x ** (2 - exponent))[..., 0:1, :], -1, -2) ) + params = xp.concatenate((params.slope_, params.sigma_, params.std_err_), 3) params = xp.swapaxes(params, 2, 3) self.ldf_ = self._param_property(obj, params, 0) @@ -176,6 +182,7 @@ def fit(self, X, y=None, sample_weight=None): std = xp.sqrt((1 / num_to_nan(w)) * (self.sigma_**2).values) resid = resid / num_to_nan(std) self.std_residuals_ = resid[resid.valuation < obj.valuation_date] + return self def transform(self, X): @@ -205,7 +212,9 @@ def transform(self, X): ] for item in triangles: setattr(X_new, item, getattr(self, item)) + X_new._set_slicers() + return X_new def _param_property(self, X, params, idx): @@ -220,4 +229,5 @@ def _param_property(self, X, params, idx): obj.is_cumulative = False obj.virtual_columns.columns = {} obj._set_slicers() + return obj diff --git a/chainladder/development/tests/test_development.py b/chainladder/development/tests/test_development.py index 09ef29fc..c1311c90 100644 --- a/chainladder/development/tests/test_development.py +++ b/chainladder/development/tests/test_development.py @@ -266,6 +266,24 @@ def test_new_drop_7(clrd): ) +def test_new_drop_8(): + tri = cl.load_sample("prism")["Paid"].sum().grain("OYDQ") + + try: + cl.Development(drop_high=False).fit_transform(tri) + except: + assert False + + assert ( + cl.Development(drop_high=True).fit(tri).cdf_.to_frame().fillna(0).values + == cl.Development(drop_high=1).fit(tri).cdf_.to_frame().fillna(0).values + ).all() + assert ( + cl.Development(drop_high=True).fit(tri).cdf_.to_frame().fillna(0).values + >= cl.Development(drop_high=2).fit(tri).cdf_.to_frame().fillna(0).values + ).all() + + def compare_new_drop(dev, tri): assert np.array_equal( dev._set_weight_func(tri.age_to_age, tri.age_to_age).values, From 813fef036897c4fbfee38139146aab0363f9ff7e Mon Sep 17 00:00:00 2001 From: Kenneth Hsu Date: Tue, 21 May 2024 13:52:43 -0700 Subject: [PATCH 09/10] Seperated tests --- chainladder/development/tests/test_development.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/chainladder/development/tests/test_development.py b/chainladder/development/tests/test_development.py index c1311c90..825fae1e 100644 --- a/chainladder/development/tests/test_development.py +++ b/chainladder/development/tests/test_development.py @@ -274,6 +274,10 @@ def test_new_drop_8(): except: assert False + +def test_new_drop_9(): + tri = cl.load_sample("prism")["Paid"].sum().grain("OYDQ") + assert ( cl.Development(drop_high=True).fit(tri).cdf_.to_frame().fillna(0).values == cl.Development(drop_high=1).fit(tri).cdf_.to_frame().fillna(0).values From 1bc0567e6f6f24ffb1f1d5957d74d850795d1657 Mon Sep 17 00:00:00 2001 From: Kenneth Hsu Date: Tue, 21 May 2024 14:01:35 -0700 Subject: [PATCH 10/10] New tests --- .../development/tests/test_development.py | 68 +++++++++++++++++-- 1 file changed, 61 insertions(+), 7 deletions(-) diff --git a/chainladder/development/tests/test_development.py b/chainladder/development/tests/test_development.py index 825fae1e..f8b481a0 100644 --- a/chainladder/development/tests/test_development.py +++ b/chainladder/development/tests/test_development.py @@ -1,5 +1,6 @@ import numpy as np import chainladder as cl +import pytest def test_full_slice(): @@ -278,14 +279,67 @@ def test_new_drop_8(): def test_new_drop_9(): tri = cl.load_sample("prism")["Paid"].sum().grain("OYDQ") + lhs = cl.Development(drop_high=True).fit(tri).cdf_.to_frame().fillna(0).values + rhs = cl.Development(drop_high=1).fit(tri).cdf_.to_frame().fillna(0).values + assert (lhs == rhs).all() + + +@pytest.mark.xfail +def test_new_drop_10(): + data = { + "valuation": [ + 1981, + 1982, + 1983, + 1984, + 1985, + 1982, + 1983, + 1984, + 1985, + ], + "origin": [ + 1981, + 1982, + 1983, + 1984, + 1985, + 1981, + 1982, + 1983, + 1984, + ], + "values": [ + 100, + 200, + 300, + 400, + 500, + 200, + 200, + 300, + 800, + ], + } + + tri = cl.Triangle( + pd.DataFrame(data), + origin="origin", + development="valuation", + columns=["values"], + cumulative=True, + ) + + assert np.round( + cl.Development(drop_high=1).fit(tri).cdf_.to_frame().values.flatten()[0], 4 + ) == (200 + 300 + 800) / (200 + 300 + 400) + assert ( - cl.Development(drop_high=True).fit(tri).cdf_.to_frame().fillna(0).values - == cl.Development(drop_high=1).fit(tri).cdf_.to_frame().fillna(0).values - ).all() - assert ( - cl.Development(drop_high=True).fit(tri).cdf_.to_frame().fillna(0).values - >= cl.Development(drop_high=2).fit(tri).cdf_.to_frame().fillna(0).values - ).all() + np.round( + cl.Development(drop_high=2).fit(tri).cdf_.to_frame().values.flatten()[0], 4 + ) + == 1.0000 + ) def compare_new_drop(dev, tri):