From 6028007c73882c38fdc31c8355dfb3df22c642c9 Mon Sep 17 00:00:00 2001 From: John S Bogaardt Date: Sat, 30 May 2020 12:25:09 -0600 Subject: [PATCH] resolves #81 --- chainladder/core/slice.py | 51 +++++++- chainladder/core/tests/test_correlation.py | 4 +- chainladder/core/tests/test_triangle.py | 110 ++++++++++-------- .../development/tests/test_boostrap.py | 2 +- .../development/tests/test_constant.py | 8 +- .../development/tests/test_development.py | 18 +-- .../development/tests/test_incremental.py | 2 +- chainladder/development/tests/test_munich.py | 4 +- chainladder/methods/tests/test_benktander.py | 6 +- chainladder/methods/tests/test_capecod.py | 4 +- chainladder/methods/tests/test_mack.py | 8 +- chainladder/methods/tests/test_predict.py | 4 +- chainladder/tails/base.py | 16 ++- chainladder/tails/bondy.py | 2 +- chainladder/tails/tests/test_bondy.py | 2 +- chainladder/tails/tests/test_constant.py | 2 +- chainladder/tails/tests/test_exponential.py | 8 +- chainladder/workflow/tests/test_workflow.py | 2 +- 18 files changed, 160 insertions(+), 93 deletions(-) diff --git a/chainladder/core/slice.py b/chainladder/core/slice.py index ff2a6ee6..ebda5438 100644 --- a/chainladder/core/slice.py +++ b/chainladder/core/slice.py @@ -41,6 +41,14 @@ def _contig_slice(self, arr): return slice(max(arr), min_arr, step) return arr + def _update_sub_obj(self, obj): + sub_tris = [ + k for k in obj.__dict__.keys() + if getattr(obj, k).__class__.__name__ in ['Triangle', 'DataFrame']] + for sub_tri in sub_tris: + setattr(obj, sub_tri, getattr(obj, sub_tri).loc[obj.kdims, obj.vdims]) + return obj + class Location(_LocBase): ''' class to generate .loc[] functionality ''' @@ -50,14 +58,16 @@ def __getitem__(self, key): if type(key) == tuple and type(key[0]) == pd.Series: return self.obj[key[0]][key[1]] idx = self.obj._idx_table().loc[key] - return self.get_idx(self.obj._idx_table_format(idx)) + obj = self.get_idx(self.obj._idx_table_format(idx)) + return self._update_sub_obj(obj) class Ilocation(_LocBase): ''' class to generate .iloc[] functionality ''' def __getitem__(self, key): idx = self.obj._idx_table().iloc[key] - return self.get_idx(self.obj._idx_table_format(idx)) + obj = self.get_idx(self.obj._idx_table_format(idx)) + return self._update_sub_obj(obj) class TriangleSlicer: @@ -80,7 +90,7 @@ def _idx_table_format(self, idx): def _idx_table(self): ''' private method that generates a dataframe of triangle indices. - The dataframe is meant ot be sliced using pandas and the resultant + The dataframe is meant to be sliced using pandas and the resultant indices are then to be extracted from the Triangle object. ''' df = pd.DataFrame(list(self.kdims), columns=self.key_labels) @@ -95,19 +105,50 @@ def __getitem__(self, key): if type(key) is pd.DataFrame and 'development' in key.columns: return self._slice_development(key['development']) - elif type(key) is np.ndarray: + if type(key) is np.ndarray: # Presumes that if I have a 1D array, I will want to slice origin. if len(key) == np.prod(self.shape[-2:]) and self.shape[-1] > 1: return self._slice_valuation(key) return self._slice_origin(key) - elif type(key) is pd.Series: + # Does triangle have sub-triangles? + sub_tris = [ + k for k in self.__dict__.keys() + if getattr(self, k).__class__.__name__ == 'Triangle'] + sub_dfs = [ + k for k in self.__dict__.keys() + if getattr(self, k).__class__.__name__ == 'DataFrame'] + # Dont mutate original subtriangle + if len(sub_tris) + len(sub_dfs) > 0: + self = copy.deepcopy(self) + if type(key) is pd.Series: + for sub_tri in sub_tris: + setattr( + self, sub_tri, + getattr(self, sub_tri).iloc[list(self.index[key].index)]) return self.iloc[list(self.index[key].index)] elif key in self.key_labels: # Boolean-indexing of a particular key + for sub_tri in sub_tris: + setattr( + self, sub_tri, getattr(self, sub_tri).index[key]) + for sub_df in sub_dfs: + setattr( + self, sub_df, getattr(self, sub_df)[key]) return self.index[key] else: idx = self._idx_table()[key] idx = self._idx_table_format(idx) + for sub_tri in sub_tris: + setattr( + self, sub_tri, + _LocBase(getattr(self, sub_tri)).get_idx(idx)) + for sub_df in sub_dfs: + if len(idx.columns) == 1: + setattr(self, sub_df, + getattr(self, sub_df).loc[idx.index][idx.columns[0]]) + else: + setattr(self, sub_df, + getattr(self, sub_df).loc[idx.index,idx.columns]) obj = _LocBase(self).get_idx(idx) return obj diff --git a/chainladder/core/tests/test_correlation.py b/chainladder/core/tests/test_correlation.py index 11b2ca50..d0dd39a3 100644 --- a/chainladder/core/tests/test_correlation.py +++ b/chainladder/core/tests/test_correlation.py @@ -15,13 +15,13 @@ def dev_corr_r(data, ci): return r('out<-dfCorTest({},ci={})'.format(data, ci)) def dev_corr_p(data, ci): - return cl.load_dataset(data).development_correlation(p_critical=ci) + return cl.load_sample(data).development_correlation(p_critical=ci) def val_corr_r(data, ci): return r('out<-cyEffTest({},ci={})'.format(data, ci)) def val_corr_p(data, ci): - return cl.load_dataset(data).valuation_correlation(p_critical=ci, total=True) + return cl.load_sample(data).valuation_correlation(p_critical=ci, total=True) data = ['RAA', 'GenIns', 'MW2014'] diff --git a/chainladder/core/tests/test_triangle.py b/chainladder/core/tests/test_triangle.py index bf4a4032..9461e8f5 100644 --- a/chainladder/core/tests/test_triangle.py +++ b/chainladder/core/tests/test_triangle.py @@ -4,8 +4,8 @@ from chainladder.utils.cupy import cp import copy -tri = cl.load_dataset('clrd') -qtr = cl.load_dataset('quarterly') +tri = cl.load_sample('clrd') +qtr = cl.load_sample('quarterly') # Test Triangle slicing def test_slice_by_boolean(): @@ -18,12 +18,12 @@ def test_slice_by_loc(): def test_slice_origin(): - assert cl.load_dataset('raa')[cl.load_dataset('raa').origin>'1985'].shape == \ + assert cl.load_sample('raa')[cl.load_sample('raa').origin>'1985'].shape == \ (1, 1, 5, 10) def test_slice_development(): - assert cl.load_dataset('raa')[cl.load_dataset('raa').development<72].shape == \ + assert cl.load_sample('raa')[cl.load_sample('raa').development<72].shape == \ (1, 1, 10, 5) @@ -32,25 +32,25 @@ def test_slice_by_loc_iloc(): def test_repr(): - tri = cl.load_dataset('raa') + tri = cl.load_sample('raa') np.testing.assert_array_equal(pd.read_html(tri._repr_html_())[0].set_index('Origin').values, tri.to_frame().values) def test_arithmetic_union(): - raa = cl.load_dataset('raa') + raa = cl.load_sample('raa') assert raa.shape == (raa-raa[raa.valuation<'1987']).shape def test_to_frame_unusual(): - a = cl.load_dataset('clrd').groupby(['LOB']).sum().latest_diagonal['CumPaidLoss'].to_frame().values - b = cl.load_dataset('clrd').latest_diagonal['CumPaidLoss'].groupby(['LOB']).sum().to_frame().values + a = cl.load_sample('clrd').groupby(['LOB']).sum().latest_diagonal['CumPaidLoss'].to_frame().values + b = cl.load_sample('clrd').latest_diagonal['CumPaidLoss'].groupby(['LOB']).sum().to_frame().values xp = cp.get_array_module(a) xp.testing.assert_array_equal(a, b) def test_link_ratio(): - tri = cl.load_dataset('RAA') + tri = cl.load_sample('RAA') xp = cp.get_array_module(tri.values) xp.testing.assert_allclose(tri.link_ratio.values*tri.values[:,:,:-1,:-1], tri.values[:,:,:-1,1:], atol=1e-5) @@ -93,18 +93,18 @@ def test_sum_of_diff_eq_diff_of_sum(): def test_append(): - assert cl.load_dataset('raa').append(cl.load_dataset('raa')).sum() == 2*cl.load_dataset('raa') + assert cl.load_sample('raa').append(cl.load_sample('raa')).sum() == 2*cl.load_sample('raa') def test_assign_existing_col(): - tri = cl.load_dataset('quarterly') + tri = cl.load_sample('quarterly') before = tri.shape tri['paid'] = 1/tri['paid'] assert tri.shape == before def test_arithmetic_across_keys(): - x = cl.load_dataset('auto') + x = cl.load_sample('auto') xp = cp.get_array_module(x.values) xp.testing.assert_array_equal((x.sum()-x.iloc[0]).values, x.iloc[1].values) @@ -138,10 +138,10 @@ def test_grain(): xp.testing.assert_array_equal(actual, expected) def test_off_cycle_val_date(): - assert cl.load_dataset('quarterly').valuation_date.strftime('%Y-%m-%d') == '2006-03-31' + assert cl.load_sample('quarterly').valuation_date.strftime('%Y-%m-%d') == '2006-03-31' def test_printer(): - print(cl.load_dataset('abc')) + print(cl.load_sample('abc')) def test_value_order(): @@ -152,53 +152,53 @@ def test_value_order(): def test_trend(): - assert abs((cl.load_dataset('abc').trend(0.05).trend((1/1.05)-1) - - cl.load_dataset('abc')).sum().sum()) < 1e-5 + assert abs((cl.load_sample('abc').trend(0.05).trend((1/1.05)-1) - + cl.load_sample('abc')).sum().sum()) < 1e-5 def test_arithmetic_1(): - x = cl.load_dataset('mortgage') + x = cl.load_sample('mortgage') np.testing.assert_array_equal(-(((x/x)+0)*x), -(+x)) def test_arithmetic_2(): - x = cl.load_dataset('mortgage') + x = cl.load_sample('mortgage') np.testing.assert_array_equal(1-(x/x), 0*x*0) def test_rtruediv(): - raa = cl.load_dataset('raa') + raa = cl.load_sample('raa') xp = cp.get_array_module(raa.values) assert xp.nansum(abs(((1/raa)*raa).values[0,0] - raa._nan_triangle()))< .00001 def test_shift(): - x = cl.load_dataset('quarterly').iloc[0,0] + x = cl.load_sample('quarterly').iloc[0,0] xp = cp.get_array_module(x.values) xp.testing.assert_array_equal(x[x.valuation<=x.valuation_date].values, x.values) def test_quantile_vs_median(): - clrd = cl.load_dataset('clrd') + clrd = cl.load_sample('clrd') xp = cp.get_array_module(clrd.values) xp.testing.assert_array_equal(clrd.quantile(.5)['CumPaidLoss'].values, clrd.median()['CumPaidLoss'].values) def test_grain_returns_valid_tri(): - tri = cl.load_dataset('quarterly') + tri = cl.load_sample('quarterly') assert tri.grain('OYDY').latest_diagonal == tri.latest_diagonal def test_base_minimum_exposure_triangle(): - raa = (cl.load_dataset('raa').latest_diagonal*0+50000).to_frame().reset_index() + raa = (cl.load_sample('raa').latest_diagonal*0+50000).to_frame().reset_index() raa['index'] = raa['index'].astype(str) cl.Triangle(raa, origin='index', - columns=list(cl.load_dataset('raa').columns)) + columns=list(cl.load_sample('raa').columns)) def test_origin_and_value_setters(): - raa = cl.load_dataset('raa') - raa2 = cl.load_dataset('raa') + raa = cl.load_sample('raa') + raa2 = cl.load_sample('raa') raa.columns = list(raa.columns) raa.origin = list(raa.origin) assert np.all((np.all(raa2.origin == raa.origin), @@ -207,70 +207,70 @@ def test_origin_and_value_setters(): np.all(raa2.vdims == raa.vdims))) def test_grain_increm_arg(): - tri = cl.load_dataset('quarterly')['incurred'] + tri = cl.load_sample('quarterly')['incurred'] tri_i = tri.cum_to_incr() np.testing.assert_array_equal(tri_i.grain('OYDY').incr_to_cum(), tri.grain('OYDY')) def test_valdev1(): - a = cl.load_dataset('quarterly').dev_to_val().val_to_dev().values - b = cl.load_dataset('quarterly').values + a = cl.load_sample('quarterly').dev_to_val().val_to_dev().values + b = cl.load_sample('quarterly').values xp = cp.get_array_module(a) xp.testing.assert_array_equal(a,b) def test_valdev2(): - a = cl.load_dataset('quarterly').dev_to_val().grain('OYDY').val_to_dev().values - b = cl.load_dataset('quarterly').grain('OYDY').values + a = cl.load_sample('quarterly').dev_to_val().grain('OYDY').val_to_dev().values + b = cl.load_sample('quarterly').grain('OYDY').values xp = cp.get_array_module(a) xp.testing.assert_array_equal(a,b) def test_valdev3(): - a = cl.load_dataset('quarterly').grain('OYDY').dev_to_val().val_to_dev().values - b = cl.load_dataset('quarterly').grain('OYDY').values + a = cl.load_sample('quarterly').grain('OYDY').dev_to_val().val_to_dev().values + b = cl.load_sample('quarterly').grain('OYDY').values xp = cp.get_array_module(a) xp.testing.assert_array_equal(a,b) #def test_valdev4(): # # Does not work with pandas 0.23, consider requiring only pandas>=0.24 -# raa = cl.load_dataset('raa') +# raa = cl.load_sample('raa') # np.testing.assert_array_equal(raa.dev_to_val()[raa.dev_to_val().development>='1989'].values, # raa[raa.valuation>='1989'].dev_to_val().values) def test_valdev5(): - raa = cl.load_dataset('raa') + raa = cl.load_sample('raa') xp = cp.get_array_module(raa.values) xp.testing.assert_array_equal(raa[raa.valuation>='1989'].latest_diagonal.values, raa.latest_diagonal.values) def test_valdev6(): - raa = cl.load_dataset('raa') + raa = cl.load_sample('raa') xp = cp.get_array_module(raa.values) xp.testing.assert_array_equal(raa.grain('OYDY').latest_diagonal.values, raa.latest_diagonal.grain('OYDY').values) def test_valdev7(): - tri = cl.load_dataset('quarterly') + tri = cl.load_sample('quarterly') xp = cp.get_array_module(tri.values) x = cl.Chainladder().fit(tri).full_expectation_ xp.testing.assert_array_equal(x.dev_to_val().val_to_dev().values, x.values) def test_reassignment(): - raa = cl.load_dataset('clrd') + raa = cl.load_sample('clrd') raa['values'] = raa['CumPaidLoss'] raa['values'] = raa['values'] + raa['CumPaidLoss'] def test_dropna(): - clrd = cl.load_dataset('clrd') + clrd = cl.load_sample('clrd') assert clrd.shape == clrd.dropna().shape assert clrd[clrd['LOB']=='wkcomp'].iloc[-5]['CumPaidLoss'].dropna().shape == (1,1,2,2) def test_commutative(): - tri = cl.load_dataset('quarterly') + tri = cl.load_sample('quarterly') xp = cp.get_array_module(tri.values) full = cl.Chainladder().fit(tri).full_expectation_ assert tri.grain('OYDY').val_to_dev() == tri.val_to_dev().grain('OYDY') @@ -282,13 +282,13 @@ def test_commutative(): xp.nan_to_num(full.val_to_dev().grain('OYDY').values), atol=1e-5) def test_broadcasting(): - t1 = cl.load_dataset('raa') + t1 = cl.load_sample('raa') t2 = tri assert t1.broadcast_axis('columns', t2.columns).shape[1] == t2.shape[1] assert t1.broadcast_axis('index', t2.index).shape[0] == t2.shape[0] def test_slicers_honor_order(): - clrd = cl.load_dataset('clrd').groupby('LOB').sum() + clrd = cl.load_sample('clrd').groupby('LOB').sum() assert clrd.iloc[[1,0], :].iloc[0, 1] == clrd.iloc[1, 1] #row assert clrd.iloc[[1,0], [1, 0]].iloc[0, 0] == clrd.iloc[1, 1] #col assert clrd.loc[:,['CumPaidLoss','IncurLoss']].iloc[0, 0] == clrd.iloc[0,1] @@ -296,26 +296,44 @@ def test_slicers_honor_order(): assert clrd.loc[clrd['LOB']=='comauto', ['CumPaidLoss', 'IncurLoss']] == clrd[clrd['LOB']=='comauto'].iloc[:, [1,0]] def test_exposure_tri(): - x = cl.load_dataset('auto') + x = cl.load_sample('auto') x= x[x.development==12] x = x['paid'].to_frame().T.unstack().reset_index() x.columns=['LOB', 'origin', 'paid'] x.origin = x.origin.astype(str) y = cl.Triangle(x, origin='origin', index='LOB', columns='paid') - x = cl.load_dataset('auto')['paid'] + x = cl.load_sample('auto')['paid'] x = x[x.development==12] assert x == y def test_jagged_1_add(): - raa = cl.load_dataset('raa') + raa = cl.load_sample('raa') raa1 = raa[raa.origin<='1984'] raa2 = raa[raa.origin>'1984'] assert raa2 + raa1 == raa assert raa2.dropna() + raa1.dropna() == raa def test_jagged_2_add(): - raa = cl.load_dataset('raa') + raa = cl.load_sample('raa') raa1 = raa[raa.development<=48] raa2 = raa[raa.development>48] assert raa2 + raa1 == raa assert raa2.dropna() + raa1.dropna() == raa + +def test_subtriangle_slice(): + triangle = cl.load_sample('clrd').groupby('LOB').sum()[['CumPaidLoss', 'IncurLoss']] + dev = cl.Development(average='simple').fit_transform(triangle) + tail = cl.TailCurve().fit_transform(dev) + + # Test dataframe commutive + assert tail.iloc[1].tail_ == tail.tail_.iloc[1] + assert tail.loc['comauto'].tail_ == tail.tail_.loc['comauto'] + assert tail.loc['comauto', 'CumPaidLoss'].tail_ == tail.tail_.loc['comauto', 'CumPaidLoss'] + assert tail[['IncurLoss', 'CumPaidLoss']].tail_ == tail.tail_[['IncurLoss', 'CumPaidLoss']] + assert tail.iloc[:3, 0].tail_ == tail.tail_.iloc[:3,0] + # Test triangle cummutative + assert tail.iloc[1].cdf_ == tail.cdf_.iloc[1] + assert tail.loc['comauto'].cdf_ == tail.cdf_.loc['comauto'] + assert tail.loc['comauto', 'CumPaidLoss'].cdf_ == tail.cdf_.loc['comauto', 'CumPaidLoss'] + assert tail[['IncurLoss', 'CumPaidLoss']].cdf_ == tail.cdf_[['IncurLoss', 'CumPaidLoss']] + assert tail.iloc[:3, 0].cdf_ == tail.cdf_.iloc[:3,0] diff --git a/chainladder/development/tests/test_boostrap.py b/chainladder/development/tests/test_boostrap.py index 12c11b36..486ea3bd 100644 --- a/chainladder/development/tests/test_boostrap.py +++ b/chainladder/development/tests/test_boostrap.py @@ -3,7 +3,7 @@ def test_bs_sample(): - tri = cl.load_dataset('raa') + tri = cl.load_sample('raa') a = cl.Development().fit(cl.BootstrapODPSample(n_sims=40000).fit_transform(tri).mean()).ldf_ b = cl.Development().fit_transform(tri).ldf_ assert np.all(abs(((a-b)/b).values)<.005) diff --git a/chainladder/development/tests/test_constant.py b/chainladder/development/tests/test_constant.py index dc4d9a3f..357925cc 100644 --- a/chainladder/development/tests/test_constant.py +++ b/chainladder/development/tests/test_constant.py @@ -3,20 +3,20 @@ from chainladder.utils.cupy import cp def test_constant_cdf(): - dev = cl.Development().fit(cl.load_dataset('raa')) + dev = cl.Development().fit(cl.load_sample('raa')) xp = cp.get_array_module(dev.ldf_.values) link_ratios = {(num+1)*12: item for num, item in enumerate(dev.ldf_.values[0,0,0,:])} dev_c = cl.DevelopmentConstant( - patterns=link_ratios, style='ldf').fit(cl.load_dataset('raa')) + patterns=link_ratios, style='ldf').fit(cl.load_sample('raa')) xp.testing.assert_allclose(dev.cdf_.values, dev_c.cdf_.values, atol=1e-5) def test_constant_ldf(): - dev = cl.Development().fit(cl.load_dataset('raa')) + dev = cl.Development().fit(cl.load_sample('raa')) xp = cp.get_array_module(dev.ldf_.values) link_ratios = {(num+1)*12: item for num, item in enumerate(dev.ldf_.values[0, 0, 0, :])} dev_c = cl.DevelopmentConstant( - patterns=link_ratios, style='ldf').fit(cl.load_dataset('raa')) + patterns=link_ratios, style='ldf').fit(cl.load_sample('raa')) xp.testing.assert_allclose(dev.ldf_.values, dev_c.ldf_.values, atol=1e-5) diff --git a/chainladder/development/tests/test_development.py b/chainladder/development/tests/test_development.py index 60af9715..24f93f70 100644 --- a/chainladder/development/tests/test_development.py +++ b/chainladder/development/tests/test_development.py @@ -19,7 +19,7 @@ def mack_r(data, alpha, est_sigma): def mack_p(data, average, est_sigma): - return cl.Development(average=average, sigma_interpolation=est_sigma).fit_transform(cl.load_dataset(data)) + return cl.Development(average=average, sigma_interpolation=est_sigma).fit_transform(cl.load_sample(data)) data = ['RAA', 'GenIns', 'MW2014'] @@ -28,26 +28,26 @@ def mack_p(data, average, est_sigma): def test_full_slice(): - assert cl.Development().fit_transform(cl.load_dataset('GenIns')).ldf_ == \ - cl.Development(n_periods=1000).fit_transform(cl.load_dataset('GenIns')).ldf_ + assert cl.Development().fit_transform(cl.load_sample('GenIns')).ldf_ == \ + cl.Development(n_periods=1000).fit_transform(cl.load_sample('GenIns')).ldf_ def test_full_slice2(): - assert cl.Development().fit_transform(cl.load_dataset('GenIns')).ldf_ == \ - cl.Development(n_periods=[1000]*(cl.load_dataset('GenIns').shape[3]-1)).fit_transform(cl.load_dataset('GenIns')).ldf_ + assert cl.Development().fit_transform(cl.load_sample('GenIns')).ldf_ == \ + cl.Development(n_periods=[1000]*(cl.load_sample('GenIns').shape[3]-1)).fit_transform(cl.load_sample('GenIns')).ldf_ def test_drop1(): - raa = cl.load_dataset('raa') + raa = cl.load_sample('raa') assert cl.Development(drop=('1982', 12)).fit(raa).ldf_.values[0, 0, 0, 0] == \ cl.Development(drop_high=[True]+[False]*8).fit(raa).ldf_.values[0, 0, 0, 0] def test_drop2(): - raa = cl.load_dataset('raa') + raa = cl.load_sample('raa') assert cl.Development(drop_valuation='1981').fit(raa).ldf_.values[0, 0, 0, 0] == \ cl.Development(drop_low=[True]+[False]*8).fit(raa).ldf_.values[0, 0, 0, 0] def test_n_periods(): - d = cl.load_dataset('usauto')['incurred'] + d = cl.load_sample('usauto')['incurred'] xp = cp.get_array_module(d.values) return xp.all(xp.around(xp.unique( cl.Development(n_periods=3, average='volume').fit(d).ldf_.values, @@ -87,7 +87,7 @@ def test_mack_std_err(data, averages, est_sigma, atol): xp.testing.assert_allclose(r, p, atol=atol) def test_assymetric_development(): - quarterly = cl.load_dataset('quarterly')['paid'] + quarterly = cl.load_sample('quarterly')['paid'] xp = cp.get_array_module(quarterly.values) dev = cl.Development(n_periods=1, average='simple').fit(quarterly) dev2 = cl.Development(n_periods=1, average='regression').fit(quarterly) diff --git a/chainladder/development/tests/test_incremental.py b/chainladder/development/tests/test_incremental.py index 78aa953f..64d085b3 100644 --- a/chainladder/development/tests/test_incremental.py +++ b/chainladder/development/tests/test_incremental.py @@ -4,7 +4,7 @@ def test_schmidt(): - tri = cl.load_dataset('ia_sample') + tri = cl.load_sample('ia_sample') xp = cp.get_array_module(tri.values) ia = cl.IncrementalAdditive() answer = ia.fit_transform(tri.iloc[0, 0], diff --git a/chainladder/development/tests/test_munich.py b/chainladder/development/tests/test_munich.py index 161a4f9c..5ef1de4a 100644 --- a/chainladder/development/tests/test_munich.py +++ b/chainladder/development/tests/test_munich.py @@ -9,7 +9,7 @@ def test_mcl_paid(): df = r('MunichChainLadder(MCLpaid, MCLincurred)').rx('MCLPaid') - p = cl.MunichAdjustment(paid_to_incurred={'paid':'incurred'}).fit(cl.Development(sigma_interpolation='mack').fit_transform(cl.load_dataset('mcl'))).munich_full_triangle_[0,0,0,:,:] + p = cl.MunichAdjustment(paid_to_incurred={'paid':'incurred'}).fit(cl.Development(sigma_interpolation='mack').fit_transform(cl.load_sample('mcl'))).munich_full_triangle_[0,0,0,:,:] xp = cp.get_array_module(p) arr = xp.array(df[0]) xp.testing.assert_allclose(arr, p, atol=1e-5) @@ -17,7 +17,7 @@ def test_mcl_paid(): def test_mcl_incurred(): df = r('MunichChainLadder(MCLpaid, MCLincurred)').rx('MCLIncurred') - p = cl.MunichAdjustment(paid_to_incurred={'paid':'incurred'}).fit(cl.Development(sigma_interpolation='mack').fit_transform(cl.load_dataset('mcl'))).munich_full_triangle_[1,0,0,:,:] + p = cl.MunichAdjustment(paid_to_incurred={'paid':'incurred'}).fit(cl.Development(sigma_interpolation='mack').fit_transform(cl.load_sample('mcl'))).munich_full_triangle_[1,0,0,:,:] xp = cp.get_array_module(p) arr = xp.array(df[0]) xp.testing.assert_allclose(arr, p, atol=1e-5) diff --git a/chainladder/methods/tests/test_benktander.py b/chainladder/methods/tests/test_benktander.py index e20b175c..c119a4a3 100644 --- a/chainladder/methods/tests/test_benktander.py +++ b/chainladder/methods/tests/test_benktander.py @@ -14,7 +14,7 @@ def atol(): @pytest.mark.parametrize('data', data) def test_benktander_to_chainladder(data, atol): - tri = cl.load_dataset(data) + tri = cl.load_sample(data) a = cl.Chainladder().fit(tri).ibnr_ b = cl.Benktander(apriori=.8, n_iters=255).fit(tri, sample_weight=a).ibnr_ xp = cp.get_array_module(a.values) @@ -22,9 +22,9 @@ def test_benktander_to_chainladder(data, atol): def test_bf_eq_cl_when_using_cl_apriori(): - cl_ult = cl.Chainladder().fit(cl.load_dataset('quarterly')).ultimate_ + cl_ult = cl.Chainladder().fit(cl.load_sample('quarterly')).ultimate_ cl_ult.rename('development', ['apriori']) - bf_ult = cl.BornhuetterFerguson().fit(cl.load_dataset('quarterly'), + bf_ult = cl.BornhuetterFerguson().fit(cl.load_sample('quarterly'), sample_weight=cl_ult).ultimate_ xp = cp.get_array_module(cl_ult.values) xp.testing.assert_allclose(cl_ult.values, bf_ult.values, atol=1e-5) diff --git a/chainladder/methods/tests/test_capecod.py b/chainladder/methods/tests/test_capecod.py index d7a4f6e7..5ad486e0 100644 --- a/chainladder/methods/tests/test_capecod.py +++ b/chainladder/methods/tests/test_capecod.py @@ -1,8 +1,8 @@ import chainladder as cl def test_struhuss(): - X = cl.load_dataset('cc_sample')['loss'] + X = cl.load_sample('cc_sample')['loss'] X = cl.TailConstant(tail=1/0.85).fit_transform(cl.Development().fit_transform(X)) - sample_weight = cl.load_dataset('cc_sample')['exposure'].latest_diagonal + sample_weight = cl.load_sample('cc_sample')['exposure'].latest_diagonal ibnr = int(cl.CapeCod(trend=0.07, decay=0.75).fit(X, sample_weight=sample_weight).ibnr_.sum()) assert ibnr == 17052 diff --git a/chainladder/methods/tests/test_mack.py b/chainladder/methods/tests/test_mack.py index bc2562d3..6853494f 100644 --- a/chainladder/methods/tests/test_mack.py +++ b/chainladder/methods/tests/test_mack.py @@ -22,9 +22,9 @@ def mack_r(data, alpha, est_sigma, tail): def mack_p(data, average, est_sigma, tail): if tail: - return cl.MackChainladder().fit(cl.TailCurve(curve='exponential').fit_transform(cl.Development(average=average, sigma_interpolation=est_sigma).fit_transform(cl.load_dataset(data)))) + return cl.MackChainladder().fit(cl.TailCurve(curve='exponential').fit_transform(cl.Development(average=average, sigma_interpolation=est_sigma).fit_transform(cl.load_sample(data)))) else: - return cl.MackChainladder().fit(cl.Development(average=average, sigma_interpolation=est_sigma).fit_transform(cl.load_dataset(data))) + return cl.MackChainladder().fit(cl.Development(average=average, sigma_interpolation=est_sigma).fit_transform(cl.load_sample(data))) data = ['ABC', 'MW2008'] @@ -34,8 +34,8 @@ def mack_p(data, average, est_sigma, tail): def test_mack_to_triangle(): - assert cl.MackChainladder().fit(cl.TailConstant().fit_transform(cl.Development().fit_transform(cl.load_dataset('ABC')))).summary_ == \ - cl.MackChainladder().fit(cl.Development().fit_transform(cl.load_dataset('ABC'))).summary_ + assert cl.MackChainladder().fit(cl.TailConstant().fit_transform(cl.Development().fit_transform(cl.load_sample('ABC')))).summary_ == \ + cl.MackChainladder().fit(cl.Development().fit_transform(cl.load_sample('ABC'))).summary_ @pytest.mark.parametrize('data', data) diff --git a/chainladder/methods/tests/test_predict.py b/chainladder/methods/tests/test_predict.py index eec73c5b..b4fef399 100644 --- a/chainladder/methods/tests/test_predict.py +++ b/chainladder/methods/tests/test_predict.py @@ -1,5 +1,5 @@ import chainladder as cl -raa = cl.load_dataset('RAA') +raa = cl.load_sample('RAA') raa_1989 = raa[raa.valuation < raa.valuation_date] cl_ult = cl.Chainladder().fit(raa).ultimate_ # Chainladder Ultimate apriori = cl_ult*0+(float(cl_ult.sum())/10) # Mean Chainladder Ultimate @@ -21,7 +21,7 @@ def test_mack_predict(): mack.predict(raa) def test_bs_random_state_predict(): - tri = cl.load_dataset('clrd').groupby('LOB').sum().loc['wkcomp', ['CumPaidLoss', 'EarnedPremNet']] + tri = cl.load_sample('clrd').groupby('LOB').sum().loc['wkcomp', ['CumPaidLoss', 'EarnedPremNet']] X = cl.BootstrapODPSample(random_state=100).fit_transform(tri['CumPaidLoss']) bf = cl.BornhuetterFerguson(apriori=0.6, apriori_sigma=0.1, random_state=42).fit(X, sample_weight=tri['EarnedPremNet'].latest_diagonal) assert bf.predict(X, sample_weight=tri['EarnedPremNet'].latest_diagonal).ibnr_ == bf.ibnr_ diff --git a/chainladder/tails/base.py b/chainladder/tails/base.py index ec3a5aec..40d964cd 100644 --- a/chainladder/tails/base.py +++ b/chainladder/tails/base.py @@ -77,6 +77,8 @@ def transform(self, X): X_new.ldf_._set_slicers() X_new.cdf_._set_slicers() X_new.std_err_._set_slicers() + X_new._ave_period = self._ave_period + X_new.tail_ = TailBase._tail_(X_new) return X_new def fit_transform(self, X, y=None, sample_weight=None): @@ -159,11 +161,17 @@ def _get_tail_weighted_time_period(self, X): time_pd = (xp.log(tail-1)-reg.intercept_)/reg.slope_ return time_pd - @property - def tail_(self): + @staticmethod + def _tail_(self): df = self.cdf_[self.cdf_.development== self.cdf_.development.iloc[-1-self._ave_period[0]]] if np.all(df.values.min(axis=-1) == df.values.max(axis=-1)): - df = df.T.drop_duplicates() - df.index = self.cdf_._idx_table().index + idx = self.cdf_._idx_table() + df = df.T.drop_duplicates().T + df.index = idx.index + df.columns = idx.columns return df + + @property + def tail_(self): + return TailBase._tail_(self) diff --git a/chainladder/tails/bondy.py b/chainladder/tails/bondy.py index 5b6e36ec..fedc6c97 100644 --- a/chainladder/tails/bondy.py +++ b/chainladder/tails/bondy.py @@ -88,7 +88,7 @@ def fit(self, X, y=None, sample_weight=None): self.b_ = pd.DataFrame( self.b_[..., 0, 0], index=idx.index, columns=idx.columns) self.earliest_ldf_ = pd.DataFrame( - self.ldf_.values[..., initial, 0], index=idx.index, columns=idx.columns) + self.ldf_.values[..., 0, initial], index=idx.index, columns=idx.columns) return self def transform(self, X): diff --git a/chainladder/tails/tests/test_bondy.py b/chainladder/tails/tests/test_bondy.py index 12e6856d..264af7ca 100644 --- a/chainladder/tails/tests/test_bondy.py +++ b/chainladder/tails/tests/test_bondy.py @@ -1,6 +1,6 @@ import chainladder as cl def test_bondy1(): - tri = cl.load_dataset('tail_sample')['paid'] + tri = cl.load_sample('tail_sample')['paid'] dev = cl.Development(average='simple').fit_transform(tri) assert round(cl.TailBondy().fit(dev).cdf_.values[0,0,0,-2],3) == 1.028 diff --git a/chainladder/tails/tests/test_constant.py b/chainladder/tails/tests/test_constant.py index 4c4416e1..e3d10194 100644 --- a/chainladder/tails/tests/test_constant.py +++ b/chainladder/tails/tests/test_constant.py @@ -4,7 +4,7 @@ def test_constant_balances(): - raa = cl.load_dataset('quarterly') + raa = cl.load_sample('quarterly') xp = cp.get_array_module(raa.values) assert round(float(xp.prod(cl.TailConstant(1.05, decay=0.8) .fit(raa).ldf_.iloc[0, 1].values[0, 0, 0, -5:])),3) == 1.050 diff --git a/chainladder/tails/tests/test_exponential.py b/chainladder/tails/tests/test_exponential.py index d0a09f02..3139c056 100644 --- a/chainladder/tails/tests/test_exponential.py +++ b/chainladder/tails/tests/test_exponential.py @@ -16,10 +16,10 @@ def mack_r(data, alpha, est_sigma): def mack_p(data, average, est_sigma): - return cl.TailCurve(curve='exponential').fit_transform(cl.Development(average=average, sigma_interpolation=est_sigma).fit_transform(cl.load_dataset(data))) + return cl.TailCurve(curve='exponential').fit_transform(cl.Development(average=average, sigma_interpolation=est_sigma).fit_transform(cl.load_sample(data))) def mack_p_no_tail(data, average, est_sigma): - return cl.Development(average=average, sigma_interpolation=est_sigma).fit_transform(cl.load_dataset(data)) + return cl.Development(average=average, sigma_interpolation=est_sigma).fit_transform(cl.load_sample(data)) data = ['RAA', 'ABC', 'GenIns', 'MW2008', 'MW2014'] # M3IR5 in R fails silently on exponential tail. Python actually computes it. @@ -74,7 +74,7 @@ def test_tail_doesnt_mutate_std_err(data, averages, est_sigma): @pytest.mark.parametrize('averages', averages[0:1]) @pytest.mark.parametrize('est_sigma', est_sigma[0:1]) def test_tail_doesnt_mutate_ldf_(data, averages, est_sigma): - p = mack_p(data, averages[0], est_sigma[0]).ldf_.values[..., :len(cl.load_dataset(data).ddims)-1] + p = mack_p(data, averages[0], est_sigma[0]).ldf_.values[..., :len(cl.load_sample(data).ddims)-1] xp = cp.get_array_module(p) p_no_tail = mack_p_no_tail(data, averages[0], est_sigma[0]).ldf_.values xp.testing.assert_array_equal(p_no_tail, p) @@ -90,6 +90,6 @@ def test_tail_doesnt_mutate_sigma_(data, averages, est_sigma): xp.testing.assert_array_equal(p_no_tail, p) def test_fit_period(): - tri = cl.load_dataset('tail_sample') + tri = cl.load_sample('tail_sample') dev = cl.Development(average='simple').fit_transform(tri) assert round(cl.TailCurve(fit_period=slice(-6,None,None), extrap_periods=10).fit(dev).cdf_['paid'].values[0,0,0,-2],3) == 1.044 diff --git a/chainladder/workflow/tests/test_workflow.py b/chainladder/workflow/tests/test_workflow.py index 5c282ce5..eda28bc2 100644 --- a/chainladder/workflow/tests/test_workflow.py +++ b/chainladder/workflow/tests/test_workflow.py @@ -2,7 +2,7 @@ def test_grid(): # Load Data - clrd = cl.load_dataset('clrd') + clrd = cl.load_sample('clrd') medmal_paid = clrd.groupby('LOB').sum().loc['medmal']['CumPaidLoss'] medmal_prem = clrd.groupby('LOB').sum().loc['medmal']['EarnedPremDIR'].latest_diagonal medmal_prem.rename('development',['premium'])