Skip to content

Commit

Permalink
Allow full origin and development dates at triangle creation
Browse files Browse the repository at this point in the history
  • Loading branch information
jbogaardt committed Jul 26, 2020
1 parent 0b6c9d5 commit 94f0f55
Show file tree
Hide file tree
Showing 2 changed files with 344 additions and 278 deletions.
56 changes: 31 additions & 25 deletions chainladder/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(self, data=None, origin=None, development=None,
else:
self.array_backend = array_backend
if data is None:
' Instance with nothing set'
# Instance with nothing set - Useful for piecemeal triangle creation
return
if columns:
check = data[columns].dtypes
Expand All @@ -39,25 +39,17 @@ def __init__(self, data=None, origin=None, development=None,
# Sanitize inputs
index, columns, origin, development = self._str_to_list(
index, columns, origin, development)
key_gr = origin + self._flatten(development, index)
# Aggregate data

data_agg = data.groupby(key_gr).sum().reset_index().fillna(0)
if not index:
index = ['Total']
data_agg[index[0]] = 'Total'
for item in index:
if pd.api.types.is_numeric_dtype(data_agg[item]):
data_agg[item] = data_agg[item].astype(str)
# Initialize origin and development dates and grains
origin_date = TriangleBase._to_datetime(
data_agg, origin, format=origin_format)
data, origin, format=origin_format)
origin_date.name = 'origin'
self.origin_grain = TriangleBase._get_grain(origin_date)

origin_date = pd.PeriodIndex(origin_date, freq=self.origin_grain).to_timestamp()
m_cnt = {'Y': 12, 'Q': 3, 'M': 1}
if development:
development_date = TriangleBase._to_datetime(
data_agg, development, period_end=True,
data, development, period_end=True,
format=development_format)
self.development_grain = TriangleBase._get_grain(development_date)
col = 'development'
Expand All @@ -66,37 +58,51 @@ def __init__(self, data=None, origin=None, development=None,
pd.tseries.offsets.MonthEnd(m_cnt[self.origin_grain])
self.development_grain = self.origin_grain
col = None
development_date.name = 'development'

# Aggregate data

key_gr = [origin_date, development_date] + \
[data[item] for item in self._flatten(index)]
data_agg = data[columns].groupby(key_gr).sum().reset_index().fillna(0)
if not index:
index = ['Total']
data_agg[index[0]] = 'Total'
for item in index:
if pd.api.types.is_numeric_dtype(data_agg[item]):
data_agg[item] = data_agg[item].astype(str)

# Prep the data for 4D Triangle
self.valuation_date = development_date.max()
origin_date = pd.PeriodIndex(origin_date, freq=self.origin_grain).to_timestamp()
self.valuation_date = data_agg['development'].max()
# Assign object properties
date_axes = self._get_date_axes(origin_date, development_date) # cartesian product
date_axes = self._get_date_axes(data_agg['origin'], data_agg['development']) # cartesian product
dev_lag_unique = TriangleBase._development_lag(date_axes['origin'], date_axes['development'])
dev_lag = TriangleBase._development_lag(pd.Series(origin_date), pd.Series(development_date))
dev_lag = TriangleBase._development_lag(data_agg['origin'], data_agg['development'])
dev = np.sort(dev_lag_unique.unique())
orig = np.sort(date_axes['origin'].unique())
key = data_agg[index].drop_duplicates().reset_index(drop=True)
dev = dict(zip(dev, range(len(dev))))
orig = dict(zip(orig, range(len(orig))))
kdims = {v:k for k, v in key.sum(axis=1).to_dict().items()}
orig_idx = origin_date.map(orig).values[None].T
orig_idx = data_agg['origin'].map(orig).values[None].T
if development:
dev_idx = dev_lag.map(dev).values[None].T
else:
dev_idx = (dev_lag*0).values[None].T
data_agg = data_agg[origin_date<=development_date]
orig_idx = orig_idx[origin_date<=development_date]
dev_idx = dev_idx[origin_date<=development_date]
if sum(origin_date>development_date) > 0:

data_agg = data_agg[data_agg['origin']<=data_agg['development']]
orig_idx = orig_idx[data_agg['origin']<=data_agg['development']]
dev_idx = dev_idx[data_agg['origin']<=data_agg['development']]
if sum(data_agg['origin']>data_agg['development']) > 0:
warnings.warn("Observations with development before origin start have been removed.")
key_idx = data_agg[index].sum(axis=1).map(kdims).values[None].T
val_idx = ((np.ones(len(data_agg))[None].T)*range(len(columns))).reshape((1,-1), order='F').T
coords = np.concatenate(tuple([np.concatenate((orig_idx, dev_idx), axis=1)]*len(columns)), axis=0)
coords = np.concatenate((np.concatenate(tuple([key_idx]*len(columns)), axis=0), val_idx, coords), axis=1)
coords = np.concatenate((np.concatenate(tuple([key_idx]*len(columns)), axis=0), val_idx, coords), axis=1)
amts = data_agg[columns].unstack()
amts.loc[amts==0] = np.nan
amts.loc[amts==0] = sp.nan
amts = amts.values.astype('float64')
values = sp(coords.T, amts, prune=True, fill_value=np.nan,
values = sp(coords.T, amts, prune=True, fill_value=sp.nan,
shape=(len(key), len(columns), len(orig),
len(dev) if development else 1))
self.kdims = np.array(key)
Expand Down
Loading

0 comments on commit 94f0f55

Please sign in to comment.