diff --git a/chainladder/core/triangle.py b/chainladder/core/triangle.py index c8852045..89a4b02d 100644 --- a/chainladder/core/triangle.py +++ b/chainladder/core/triangle.py @@ -666,10 +666,12 @@ def grain(self, grain="", trailing=False, inplace=False): "M": ["Y", "S", "Q", "M"], "S": ["S", "Y"], } + if ograin_new not in valid.get(ograin_old, []) or dgrain_new not in valid.get( dgrain_old, [] ): raise ValueError("New grain not compatible with existing grain") + if ( self.is_cumulative is None and dgrain_old != dgrain_new @@ -678,26 +680,35 @@ def grain(self, grain="", trailing=False, inplace=False): raise AttributeError( "The is_cumulative attribute must be set before using grain method." ) + if valid["M"].index(ograin_new) > valid["M"].index(dgrain_new): raise ValueError("Origin grain must be coarser than development grain") + if self.is_full and not self.is_ultimate and not self.is_val_tri: warnings.warn("Triangle includes extraneous development lags") + obj = self.dev_to_val() + if ograin_new != ograin_old: freq = {"Y": "Y", "S": "2Q"}.get(ograin_new, ograin_new) + if trailing or (obj.origin.freqstr[-3:] != "DEC" and ograin_old != "M"): origin_period_end = self.origin[-1].strftime("%b").upper() else: origin_period_end = "DEC" + indices = ( pd.Series(range(len(self.origin)), index=self.origin) .resample("-".join([freq, origin_period_end])) .indices ) + groups = pd.concat( [pd.Series([k] * len(v), index=v) for k, v in indices.items()], axis=0 ).values + obj = obj.groupby(groups, axis=2).sum() + obj.origin_close = origin_period_end d_start = pd.Period( obj.valuation[0], @@ -707,6 +718,7 @@ def grain(self, grain="", trailing=False, inplace=False): else dgrain_old + obj.origin.freqstr[-4:] ), ).to_timestamp(how="s") + if len(obj.ddims) > 1 and obj.origin.to_timestamp(how="s")[0] != d_start: addl_ts = ( pd.period_range(obj.odims[0], obj.valuation[0], freq=dgrain_old)[ @@ -719,11 +731,13 @@ def grain(self, grain="", trailing=False, inplace=False): addl.ddims = addl_ts obj = concat((addl, obj), axis=-1) obj.values = num_to_nan(obj.values) + if dgrain_old != dgrain_new and obj.shape[-1] > 1: step = self._dstep()[dgrain_old][dgrain_new] d = np.sort( len(obj.development) - np.arange(0, len(obj.development), step) - 1 ) + if obj.is_cumulative: obj = obj.iloc[..., d] else: @@ -731,11 +745,15 @@ def grain(self, grain="", trailing=False, inplace=False): d2 = [d[0]] * (d[0] + 1) + list(np.repeat(np.array(d[1:]), step)) obj = obj.groupby(d2, axis=3).sum() obj.ddims = ddims + obj.development_grain = dgrain_new + obj = obj.dev_to_val() if self.is_val_tri else obj.val_to_dev() + if inplace: self = obj return self + return obj def trend( diff --git a/chainladder/development/development.py b/chainladder/development/development.py index 48f5c6a2..1f49d321 100644 --- a/chainladder/development/development.py +++ b/chainladder/development/development.py @@ -121,30 +121,43 @@ def fit(self, X, y=None, sample_weight=None): # Triangle must be cumulative and in "development" mode obj = self._set_fit_groups(X).incr_to_cum().val_to_dev().copy() xp = obj.get_array_module() + if self.fillna: tri_array = num_to_nan((obj + self.fillna).values) else: tri_array = num_to_nan(obj.values.copy()) - average_ = self._validate_assumption(X, self.average, axis=3)[... , :X.shape[3]-1] + + average_ = self._validate_assumption(X, self.average, axis=3)[ + ..., : X.shape[3] - 1 + ] self.average_ = average_.flatten() - n_periods_ = self._validate_assumption(X, self.n_periods, axis=3)[... , :X.shape[3]-1] + n_periods_ = self._validate_assumption(X, self.n_periods, axis=3)[ + ..., : X.shape[3] - 1 + ] x, y = tri_array[..., :-1], tri_array[..., 1:] exponent = xp.array( - [{"regression": 0, "volume": 1, "simple": 2}[x] - for x in average_[0, 0, 0]] + [{"regression": 0, "volume": 1, "simple": 2}[x] for x in average_[0, 0, 0]] ) exponent = xp.nan_to_num(exponent * (y * 0 + 1)) link_ratio = y / x if hasattr(X, "w_v2_"): - self.w_v2_ = self._set_weight_func(obj.age_to_age * X.w_v2_,obj.iloc[...,:-1,:-1]) + self.w_v2_ = self._set_weight_func( + factor=obj.age_to_age * X.w_v2_, + # secondary_rank=obj.iloc[..., :-1, :-1] + ) else: - self.w_v2_ = self._set_weight_func(obj.age_to_age,obj.iloc[...,:-1,:-1]) + self.w_v2_ = self._set_weight_func( + factor=obj.age_to_age, + # secondary_rank=obj.iloc[..., :-1, :-1] + ) + self.w_ = self._assign_n_periods_weight( obj, n_periods_ ) * self._drop_adjustment(obj, link_ratio) w = num_to_nan(self.w_ / (x ** (exponent))) params = WeightedRegression(axis=2, thru_orig=True, xp=xp).fit(x, y, w) + if self.n_periods != 1: params = params.sigma_fill(self.sigma_interpolation) else: @@ -153,20 +166,23 @@ def fit(self, X, y=None, sample_weight=None): "of freedom to support calculation of all regression" " statistics. Only LDFs have been calculated." ) + params.std_err_ = xp.nan_to_num(params.std_err_) + xp.nan_to_num( (1 - xp.nan_to_num(params.std_err_ * 0 + 1)) * params.sigma_ / xp.swapaxes(xp.sqrt(x ** (2 - exponent))[..., 0:1, :], -1, -2) ) + params = xp.concatenate((params.slope_, params.sigma_, params.std_err_), 3) params = xp.swapaxes(params, 2, 3) self.ldf_ = self._param_property(obj, params, 0) self.sigma_ = self._param_property(obj, params, 1) self.std_err_ = self._param_property(obj, params, 2) resid = -obj.iloc[..., :-1] * self.ldf_.values + obj.iloc[..., 1:].values - std = xp.sqrt((1 / num_to_nan(w)) * (self.sigma_ ** 2).values) + std = xp.sqrt((1 / num_to_nan(w)) * (self.sigma_**2).values) resid = resid / num_to_nan(std) self.std_residuals_ = resid[resid.valuation < obj.valuation_date] + return self def transform(self, X): @@ -184,10 +200,21 @@ def transform(self, X): """ X_new = X.copy() X_new.group_index = self._set_transform_groups(X_new) - triangles = ["std_err_", "ldf_", "sigma_","std_residuals_","average_", "w_", "sigma_interpolation","w_v2_"] + triangles = [ + "std_err_", + "ldf_", + "sigma_", + "std_residuals_", + "average_", + "w_", + "sigma_interpolation", + "w_v2_", + ] for item in triangles: setattr(X_new, item, getattr(self, item)) + X_new._set_slicers() + return X_new def _param_property(self, X, params, idx): @@ -202,4 +229,5 @@ def _param_property(self, X, params, idx): obj.is_cumulative = False obj.virtual_columns.columns = {} obj._set_slicers() + return obj diff --git a/chainladder/development/tests/test_development.py b/chainladder/development/tests/test_development.py index 19ad0f62..f8b481a0 100644 --- a/chainladder/development/tests/test_development.py +++ b/chainladder/development/tests/test_development.py @@ -1,5 +1,6 @@ import numpy as np import chainladder as cl +import pytest def test_full_slice(): @@ -119,6 +120,13 @@ def test_drophighlow(): ) assert np.all(lhs == rhs) + tri = cl.load_sample("prism")["Paid"].sum().grain("OYDQ") + no_drop = cl.Development().fit_transform(tri).cdf_.to_frame().values + drop_high = cl.Development(drop_high=True).fit_transform(tri).cdf_.to_frame().values + drop_low = cl.Development(drop_low=True).fit_transform(tri).cdf_.to_frame().values + assert (drop_low >= no_drop).all() + assert (no_drop >= drop_high).all() + def test_dropabovebelow(): raa = cl.load_sample("raa") @@ -173,91 +181,204 @@ def test_assymetric_development(atol): dev2 = cl.Development(n_periods=1, average="regression").fit(quarterly) assert xp.allclose(dev.ldf_.values, dev2.ldf_.values, atol=atol) + def test_hilo_multiple_indices(clrd): - tri = clrd.groupby('LOB')['CumPaidLoss'].sum() + tri = clrd.groupby("LOB")["CumPaidLoss"].sum() assert ( - cl.Development(n_periods=5).fit(tri).ldf_.loc['wkcomp'] == - cl.Development(n_periods=5).fit(tri.loc['wkcomp']).ldf_) + cl.Development(n_periods=5).fit(tri).ldf_.loc["wkcomp"] + == cl.Development(n_periods=5).fit(tri.loc["wkcomp"]).ldf_ + ) assert ( - cl.Development(drop_low=2).fit(tri).ldf_.loc['wkcomp'] == - cl.Development(drop_low=2).fit(tri.loc['wkcomp']).ldf_) + cl.Development(drop_low=2).fit(tri).ldf_.loc["wkcomp"] + == cl.Development(drop_low=2).fit(tri.loc["wkcomp"]).ldf_ + ) + def test_new_drop_1(clrd): - clrd = clrd.groupby('LOB')[["IncurLoss","CumPaidLoss"]].sum() - #n_periods - return compare_new_drop(cl.Development(n_periods = 4).fit(clrd),clrd) + clrd = clrd.groupby("LOB")[["IncurLoss", "CumPaidLoss"]].sum() + # n_periods + return compare_new_drop(cl.Development(n_periods=4).fit(clrd), clrd) + def test_new_drop_2(clrd): - clrd = clrd.groupby('LOB')[["IncurLoss","CumPaidLoss"]].sum() - #single drop and drop_valuation - return compare_new_drop(cl.Development(drop = ("1992",12),drop_valuation = 1993).fit(clrd),clrd) + clrd = clrd.groupby("LOB")[["IncurLoss", "CumPaidLoss"]].sum() + # single drop and drop_valuation + return compare_new_drop( + cl.Development(drop=("1992", 12), drop_valuation=1993).fit(clrd), clrd + ) + def test_new_drop_3(clrd): - clrd = clrd.groupby('LOB')[["IncurLoss","CumPaidLoss"]].sum() - #multiple drop and drop_valuation - return compare_new_drop(cl.Development(drop = [("1992",12),("1996",24)],drop_valuation = [1993,1995]).fit(clrd),clrd) + clrd = clrd.groupby("LOB")[["IncurLoss", "CumPaidLoss"]].sum() + # multiple drop and drop_valuation + return compare_new_drop( + cl.Development( + drop=[("1992", 12), ("1996", 24)], drop_valuation=[1993, 1995] + ).fit(clrd), + clrd, + ) + def test_new_drop_4(clrd): - clrd = clrd.groupby('LOB')[["IncurLoss","CumPaidLoss"]].sum() - #drop_hi/low without preserve - return compare_new_drop(cl.Development(drop_high = 1, drop_low = 1).fit(clrd),clrd) + clrd = clrd.groupby("LOB")[["IncurLoss", "CumPaidLoss"]].sum() + # drop_hi/low without preserve + return compare_new_drop(cl.Development(drop_high=1, drop_low=1).fit(clrd), clrd) + def test_new_drop_5(clrd): - clrd = clrd.groupby('LOB')[["IncurLoss","CumPaidLoss"]].sum() - #drop_hi/low without preserve - return compare_new_drop(cl.Development(drop_high = 1, drop_low = 1,preserve = 3).fit(clrd),clrd) + clrd = clrd.groupby("LOB")[["IncurLoss", "CumPaidLoss"]].sum() + # drop_hi/low without preserve + return compare_new_drop( + cl.Development(drop_high=1, drop_low=1, preserve=3).fit(clrd), clrd + ) + def test_new_drop_5a(clrd): - clrd = clrd.groupby('LOB')[["IncurLoss","CumPaidLoss"]].sum() - #drop_hi/low without preserve + clrd = clrd.groupby("LOB")[["IncurLoss", "CumPaidLoss"]].sum() + # drop_hi/low without preserve assert np.array_equal( - cl.Development(drop_high = 1, drop_low = 1,preserve = 3)._set_weight_func(clrd.age_to_age, clrd.age_to_age).values, - cl.Development(drop_high = True, drop_low = [True, True, True, True, True, True, True, True, True] ,preserve = 3)._set_weight_func(clrd.age_to_age).values, - True + cl.Development(drop_high=1, drop_low=1, preserve=3) + ._set_weight_func(clrd.age_to_age, clrd.age_to_age) + .values, + cl.Development( + drop_high=True, + drop_low=[True, True, True, True, True, True, True, True, True], + preserve=3, + ) + ._set_weight_func(clrd.age_to_age) + .values, + True, ) + def test_new_drop_6(clrd): - clrd = clrd.groupby('LOB')[["IncurLoss","CumPaidLoss"]].sum() - #drop_above/below without preserve - return compare_new_drop(cl.Development(drop_above = 1.01,drop_below = 0.95).fit(clrd),clrd) + clrd = clrd.groupby("LOB")[["IncurLoss", "CumPaidLoss"]].sum() + # drop_above/below without preserve + return compare_new_drop( + cl.Development(drop_above=1.01, drop_below=0.95).fit(clrd), clrd + ) + def test_new_drop_7(clrd): - clrd = clrd.groupby('LOB')[["IncurLoss","CumPaidLoss"]].sum() - #drop_above/below with preserve - return compare_new_drop(cl.Development(drop_above = 1.01,drop_below = 0.95,preserve=3).fit(clrd),clrd) + clrd = clrd.groupby("LOB")[["IncurLoss", "CumPaidLoss"]].sum() + # drop_above/below with preserve + return compare_new_drop( + cl.Development(drop_above=1.01, drop_below=0.95, preserve=3).fit(clrd), clrd + ) + + +def test_new_drop_8(): + tri = cl.load_sample("prism")["Paid"].sum().grain("OYDQ") + + try: + cl.Development(drop_high=False).fit_transform(tri) + except: + assert False + + +def test_new_drop_9(): + tri = cl.load_sample("prism")["Paid"].sum().grain("OYDQ") + + lhs = cl.Development(drop_high=True).fit(tri).cdf_.to_frame().fillna(0).values + rhs = cl.Development(drop_high=1).fit(tri).cdf_.to_frame().fillna(0).values + assert (lhs == rhs).all() + + +@pytest.mark.xfail +def test_new_drop_10(): + data = { + "valuation": [ + 1981, + 1982, + 1983, + 1984, + 1985, + 1982, + 1983, + 1984, + 1985, + ], + "origin": [ + 1981, + 1982, + 1983, + 1984, + 1985, + 1981, + 1982, + 1983, + 1984, + ], + "values": [ + 100, + 200, + 300, + 400, + 500, + 200, + 200, + 300, + 800, + ], + } + + tri = cl.Triangle( + pd.DataFrame(data), + origin="origin", + development="valuation", + columns=["values"], + cumulative=True, + ) + + assert np.round( + cl.Development(drop_high=1).fit(tri).cdf_.to_frame().values.flatten()[0], 4 + ) == (200 + 300 + 800) / (200 + 300 + 400) -def compare_new_drop(dev,tri): assert ( - np.array_equal( - dev._set_weight_func(tri.age_to_age, tri.age_to_age).values, - dev.transform(tri).age_to_age.values*0+1, - True + np.round( + cl.Development(drop_high=2).fit(tri).cdf_.to_frame().values.flatten()[0], 4 ) + == 1.0000 + ) + + +def compare_new_drop(dev, tri): + assert np.array_equal( + dev._set_weight_func(tri.age_to_age, tri.age_to_age).values, + dev.transform(tri).age_to_age.values * 0 + 1, + True, ) + def test_4d_drop(clrd): - clrd = clrd.groupby("LOB").sum()[["CumPaidLoss","IncurLoss"]] + clrd = clrd.groupby("LOB").sum()[["CumPaidLoss", "IncurLoss"]] assert ( - cl.Development(n_periods = 4).fit_transform(clrd.iloc[0,0]).link_ratio == - cl.Development(n_periods = 4).fit_transform(clrd).link_ratio.iloc[0,0]) + cl.Development(n_periods=4).fit_transform(clrd.iloc[0, 0]).link_ratio + == cl.Development(n_periods=4).fit_transform(clrd).link_ratio.iloc[0, 0] + ) + def test_pipeline(clrd): - clrd = clrd.groupby('LOB')[["IncurLoss","CumPaidLoss"]].sum() + clrd = clrd.groupby("LOB")[["IncurLoss", "CumPaidLoss"]].sum() dev1 = cl.Development( - n_periods = 7, - drop_valuation = 1995, - drop = ("1992",12), - drop_above = 1.05, - drop_below = .95, - drop_high = 1, - drop_low = 1 + n_periods=7, + drop_valuation=1995, + drop=("1992", 12), + drop_above=1.05, + drop_below=0.95, + drop_high=1, + drop_low=1, ).fit(clrd) - pipe = cl.Pipeline(steps=[ - ('n_periods', cl.Development(n_periods = 7)), - ('drop_valuation', cl.Development(drop_valuation = 1995)), - ('drop', cl.Development(drop = ("1992",12))), - ('drop_abovebelow', cl.Development(drop_above = 1.05, drop_below = .95)), - ('drop_hilo', cl.Development(drop_high = 1, drop_low = 1))] + pipe = cl.Pipeline( + steps=[ + ("n_periods", cl.Development(n_periods=7)), + ("drop_valuation", cl.Development(drop_valuation=1995)), + ("drop", cl.Development(drop=("1992", 12))), + ("drop_abovebelow", cl.Development(drop_above=1.05, drop_below=0.95)), + ("drop_hilo", cl.Development(drop_high=1, drop_low=1)), + ] ) dev2 = pipe.fit(X=clrd) - assert np.array_equal(dev1.w_v2_.values,dev2.named_steps.drop_hilo.w_v2_.values,True) \ No newline at end of file + assert np.array_equal( + dev1.w_v2_.values, dev2.named_steps.drop_hilo.w_v2_.values, True + )