Skip to content

Commit

Permalink
resolves #129
Browse files Browse the repository at this point in the history
  • Loading branch information
jbogaardt committed Mar 1, 2021
1 parent 3dfb7d6 commit 79ea9a4
Show file tree
Hide file tree
Showing 19 changed files with 255 additions and 119 deletions.
6 changes: 6 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ Available Estimators
+------------------------------+------------------+-------------------------+-----------------------+-----------------------+
| `CaseOutstanding`_ | | | | |
+------------------------------+------------------+-------------------------+-----------------------+-----------------------+
| `TweedieGLM`_ | | | | |
+------------------------------+------------------+-------------------------+-----------------------+-----------------------+
| `DevelopmentML`_ | | | | |
+------------------------------+------------------+-------------------------+-----------------------+-----------------------+

Documentation
-------------
Expand Down Expand Up @@ -85,6 +89,8 @@ code documentation.
.. _VotingChainladder: https://chainladder-python.readthedocs.io/en/latest/modules/workflow.html#votingchainladder
.. _Trend: https://chainladder-python.readthedocs.io/en/latest/modules/adjustments.html#trend
.. _CaseOutstanding: https://chainladder-python.readthedocs.io/en/latest/modules/development.html#caseoutstanding
.. _TweedieGLM: https://chainladder-python.readthedocs.io/en/latest/modules/development.html#tweedieglm
.. _DevelopmentML: https://chainladder-python.readthedocs.io/en/latest/modules/development.html#developmentml
.. _Documentation: https://chainladder-python.readthedocs.io/en/latest/

Getting Started Tutorials
Expand Down
4 changes: 0 additions & 4 deletions chainladder/development/clark.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,6 @@ def solver(x):
obj._set_slicers()
self.ldf_ = obj
self.ldf_.valuation_date = pd.to_datetime(ULT_VAL)
self.sigma_ = self.ldf_ * 0 + 1
self.std_err_ = self.ldf_ * 0 + 1
rows = X.index.set_index(X.key_labels).index
self.omega_ = pd.DataFrame(params[..., 0, 0], index=rows, columns=X.vdims)
self.theta_ = pd.DataFrame(params[..., 0, 1], index=rows, columns=X.vdims)
Expand Down Expand Up @@ -237,8 +235,6 @@ def transform(self, X):
X_new = X.copy()
triangles = [
"ldf_",
"sigma_",
"std_err_",
"omega_",
"theta_",
"incremental_fits_",
Expand Down
4 changes: 1 addition & 3 deletions chainladder/development/constant.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,6 @@ def fit(self, X, y=None, sample_weight=None):
self.ldf_.is_pattern = True
self.ldf_.is_cumulative = False
self.ldf_.valuation_date = pd.to_datetime(ULT_VAL)
self.sigma_ = self.ldf_ * 0 + 1
self.std_err_ = self.ldf_ * 0 + 1
return self

def transform(self, X):
Expand All @@ -97,7 +95,7 @@ def transform(self, X):
X_new : New triangle with transformed attributes.
"""
X_new = X.copy()
triangles = ["ldf_", "sigma_", "std_err_"]
triangles = ["ldf_"]
for item in triangles:
setattr(X_new, item, getattr(self, item))
X_new._set_slicers()
Expand Down
36 changes: 14 additions & 22 deletions chainladder/development/glm.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,35 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.

import pandas as pd
import numpy as np
from patsy import dmatrix
from sklearn.base import BaseEstimator, TransformerMixin
from chainladder.development.base import DevelopmentBase
from chainladder.development.learning import DevelopmentML
from sklearn.linear_model import TweedieRegressor
from sklearn.pipeline import Pipeline


class PatsyFormula(BaseEstimator, TransformerMixin):
""" A sklearn-style wrapper for patsy formulas """
def __init__(self, formula=None):
self.formula = formula

def fit(self, X, y=None, sample_weight=None):
self.design_info_ = dmatrix(self.formula, X).design_info
return self

def transform(self, X):
return dmatrix(self.design_info_, X)
from chainladder.utils.utility_functions import PatsyFormula


class TweedieGLM(DevelopmentBase):
""" This estimator creates development patterns with a GLM using a Tweedie distribution.
The Tweedie family includes several of the more popular distributions including
the normal, ODP poisson, and gamma distributions. This class is a special case
of `DevleopmentML`. It restricts to just GLM using a TweedieRegressor and
provides an R-like formulation of the design matrix.
.. versionadded:: 0.8.1
Parameters
-----------
design_matrix : formula-like
A patsy formula describing the independent variables, X of the GLM
response : str, default None
Name of the response column.
response : str
Column name for the reponse variable of the GLM. If ommitted, then the
first column of the Triangle will be used.
weight : str
Column name of any weight to use in the GLM. If none specified, then an
unweighted regression will be performed.
power : float, default=0
The power determines the underlying target distribution according
to the following table:
Expand Down Expand Up @@ -84,9 +74,10 @@ class TweedieGLM(DevelopmentBase):
"""

def __init__(self, design_matrix='C(development) + C(origin)',
response=None, power=1.0, alpha=1.0, link='log',
response=None, weight=None, power=1.0, alpha=1.0, link='log',
max_iter=100, tol=0.0001, warm_start=False, verbose=0):
self.response=response
self.weight=weight
self.design_matrix = design_matrix
self.power=power
self.alpha=alpha
Expand All @@ -104,7 +95,7 @@ def fit(self, X, y=None, sample_weight=None):
link=self.link, power=self.power, max_iter=self.max_iter,
tol=self.tol, warm_start=self.warm_start,
verbose=self.verbose, fit_intercept=False))]),
y_ml=response).fit(X)
y_ml=response, weight_ml=self.weight).fit(X)
return self

@property
Expand All @@ -119,7 +110,8 @@ def triangle_glm_(self):
def coef_(self):
return pd.Series(
self.model.estimator_ml.named_steps.model.coef_, name='coef_',
index=list(self.model.estimator_ml.named_steps.design_matrix.design_info_.column_name_indexes.keys())
index=list(self.model.estimator_ml.named_steps.design_matrix.
design_info_.column_name_indexes.keys())
).to_frame()

def transform(self, X):
Expand Down
5 changes: 3 additions & 2 deletions chainladder/development/incremental.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ def fit(self, X, y=None, sample_weight=None):
X = X.copy()
if sample_weight.array_backend == "sparse":
sample_weight = sample_weight.set_backend("numpy")
else:
sample_weight = sample_weight.copy()
xp = X.get_array_module()
sample_weight.is_cumulative = False
obj = X.cum_to_incr() / sample_weight.values
Expand Down Expand Up @@ -141,7 +143,6 @@ def fit(self, X, y=None, sample_weight=None):
1/(1+future_trend)-1, axis='valuation', start=X.valuation_date,
end=self.incremental_.valuation_date)
self.ldf_ = obj.incr_to_cum().link_ratio
self.sigma_ = self.std_err_ = 0 * self.ldf_
return self

def transform(self, X):
Expand All @@ -158,6 +159,6 @@ def transform(self, X):
X_new : New triangle with transformed attributes.
"""
X_new = X.copy()
for item in ["incremental_", "ldf_", "sigma_", "std_err_"]:
for item in ["ldf_"]:
X_new.__dict__[item] = self.__dict__[item]
return X_new
83 changes: 50 additions & 33 deletions chainladder/development/learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,42 +8,51 @@
from sklearn.preprocessing import OneHotEncoder, StandardScaler, PolynomialFeatures
from sklearn.compose import ColumnTransformer
from chainladder.development.base import DevelopmentBase
from chainladder import ULT_VAL


class DevelopmentML(DevelopmentBase):
""" A Estimator that interfaces with machine learning (ML) tools that implement
the scikit-learn API.
The `DevelopmentML` estimator is used to generate ``ldf_`` patterns from
the data.
.. versionadded:: 0.8.1
Parameters
----------
estimator_ml : skearn Estimator
Any sklearn compatible regression estimator, including Pipelines and
y_ml : list or str or sklearn_transformer
The response column(s) for the machine learning algorithm. It must be
present within the Triangle.
y_features :
autoregressive : tuple, (autoregressive_col_name, lag, source_col_name)
The subset of response column(s) to use as lagged features for the
Time Series aspects of the model. Predictions from one development period
get used as featues in the next development period.
get used as featues in the next development period. Lags should be negative
integers.
fit_incrementals :
Whether the response variable should be converted to an incremental basis
for fitting.
Attributes
----------
estimator_ml : Estimator
An sklearn-style estimator to predict development patterns
ldf_ : Triangle
The estimated loss development patterns.
cdf_ : Triangle
The estimated cumulative development patterns.
"""
def __init__(self, estimator_ml=None,
y_ml=None, y_features=False, fit_incrementals=True):
def __init__(self, estimator_ml=None, y_ml=None, autoregressive=False,
weight_ml=None, fit_incrementals=True):
self.estimator_ml=estimator_ml
self.y_ml=y_ml
self.y_features=y_features
self.weight_ml = weight_ml
self.autoregressive=autoregressive
self.fit_incrementals=fit_incrementals

def _get_y_names(self):
Expand Down Expand Up @@ -77,14 +86,16 @@ def y_ml_(self):
else:
return transformer

def _get_triangle_ml(self, df):
def _get_triangle_ml(self, df, preds=None):
""" Create fitted Triangle """
from chainladder.core import Triangle
preds = self.estimator_ml.predict(df)
if preds is None:
preds = self.estimator_ml.predict(df)
X_r = [df]
y_r = [preds]
dgrain = {'Y':12, 'Q':3, 'M': 1}[self.development_grain_]
latest_filter = df['origin']+(df['development']-dgrain)/dgrain
ograin = {'Y':1, 'Q':4, 'M': 12}[self.origin_grain_]
latest_filter = (df['origin']+1)*ograin+(df['development']-dgrain)/dgrain
latest_filter = latest_filter == latest_filter.max()
preds=pd.DataFrame(preds.copy())[latest_filter].values
out = df.loc[latest_filter].copy()
Expand All @@ -93,10 +104,12 @@ def _get_triangle_ml(self, df):
out['development'] = out['development'] + dgrain
if len(preds.shape) == 1:
preds = preds[:, None]
if self.y_features:
for num, col in enumerate(self.y_features):
if self.autoregressive:
for num, col in enumerate(self.autoregressive):
out[col[0]]=preds[:, num]
out = out[out['development']<=dev_lags.max()]
if len(out) == 0:
continue
X_r.append(out.copy())
preds = self.estimator_ml.predict(out)
y_r.append(preds.copy())
Expand All @@ -108,8 +121,26 @@ def _get_triangle_ml(self, df):
out['origin'] = out['origin'].map({v: k for k, v in self.origin_encoder_.items()})
out = out.merge(self.valuation_vector_, how='left', on=['origin', 'development'])
return Triangle(
out, origin='origin', development='valuation', index=self._key_labels, columns=self._get_y_names()).dropna()
out, origin='origin', development='valuation',
index=self._key_labels, columns=self._get_y_names(),
cumulative=not self.fit_incrementals).dropna()

def _prep_X_ml(self, X):
""" Preps Triangle data ahead of the pipeline """
if self.fit_incrementals:
X_ = X.cum_to_incr()
else:
X_ = X.copy()
if self.autoregressive:
for i in self.autoregressive:
lag = X[i[2]].shift(i[1])
X_[i[0]] = lag[lag.valuation<=X.valuation_date]
df_base = X.incr_to_cum().to_frame(keepdims=True).reset_index().iloc[:, :-1]
df = df_base.merge(
X.cum_to_incr().to_frame(keepdims=True).reset_index(), how='left',
on=list(df_base.columns)).fillna(0)
df['origin'] = df['origin'].map(self.origin_encoder_)
return df

def fit(self, X, y=None, sample_weight=None):
"""Fit the model with X.
Expand All @@ -129,10 +160,6 @@ def fit(self, X, y=None, sample_weight=None):
Returns the instance itself.
"""

if self.fit_incrementals:
X_ = X.cum_to_incr()
else:
X_ = X.copy()
self._columns = list(X.columns)
self._key_labels = X.key_labels
self.origin_grain_ = X.origin_grain
Expand All @@ -144,24 +171,16 @@ def fit(self, X, y=None, sample_weight=None):
X.valuation.values.reshape(X.shape[-2:], order='F'),
index=X.odims, columns=X.ddims).unstack().reset_index()
self.valuation_vector_.columns=['development', 'origin', 'valuation']
# response as a feature
if self.y_features:
for i in self.y_features:
lag = X[i[2]].shift(i[1])
X_[i[0]] = lag[lag.valuation<=X.valuation_date]

df = X_.to_frame(keepdims=True).reset_index().fillna(0)
df['origin'] = df['origin'].map(self.origin_encoder_)
self.df_ = df # Unncecessary, used for debugging

df = self._prep_X_ml(X)
self.df_ = df
# Fit model
self.estimator_ml.fit(df, self.y_ml_.fit_transform(df).squeeze())
#return self
self.triangle_ml_ = self._get_triangle_ml(df)
return self

@property
def ldf_(self):
from chainladder import ULT_VAL
ldf = self.triangle_ml_.incr_to_cum().link_ratio
ldf.valuation_date = pd.to_datetime(ULT_VAL)
return ldf
Expand All @@ -179,13 +198,11 @@ def transform(self, X):
-------
X_new : New triangle with transformed attributes.
"""

X_new = X.copy()
triangles = [
"ldf_",
]
for item in triangles:
setattr(X_new, item, getattr(self, item))
X_new.sigma_ = X_new.std_err_ = X_new.ldf_ * 0 + 1
X_ml = self._prep_X_ml(X)
y_ml=self.estimator_ml.predict(X_ml)
triangle_ml = self._get_triangle_ml(X_ml, y_ml)
X_new.ldf_ = triangle_ml.incr_to_cum().link_ratio
X_new.ldf_.valuation_date = pd.to_datetime(ULT_VAL)
X_new._set_slicers()
return X_new
3 changes: 1 addition & 2 deletions chainladder/development/outstanding.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ def fit(self, X, y=None, sample_weight=None):
dev.is_pattern=True
dev.is_cumulative=True
self.ldf_ = dev.cum_to_incr()
self.std_err_ = self.sigma_ = self.ldf_ * 0 + 1
return self

@property
Expand Down Expand Up @@ -142,7 +141,7 @@ def transform(self, X):
X_new : New triangle with transformed attributes.
"""
X_new = X.copy()
triangles = ["ldf_", "sigma_", "std_err_"]
triangles = ["ldf_"]
for item in triangles:
setattr(X_new, item, getattr(self, item))
X_new._set_slicers()
Expand Down
2 changes: 1 addition & 1 deletion chainladder/development/tests/test_incremental.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def test_schmidt():
answer = ia.fit_transform(
tri.iloc[0, 0], sample_weight=tri.iloc[0, 1].latest_diagonal
)
answer = answer.incremental_.incr_to_cum().values[0, 0, :, -1]
answer = ia.incremental_.incr_to_cum().values[0, 0, :, -1]
check = xp.array(
[
3483.0,
Expand Down
2 changes: 1 addition & 1 deletion chainladder/methods/mack.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def fit(self, X, y=None, sample_weight=None):
Returns the instance itself.
"""
super().fit(X, y, sample_weight)
if not ("average_" in self.X_ and "w_" in self.X_):
if "sigma_" not in self.X_:
raise ValueError("Triangle not compatible with MackChainladder")
# Caching full_triangle_ for fit as it is called a lot
self.X_._full_triangle_ = self.full_triangle_
Expand Down
Loading

0 comments on commit 79ea9a4

Please sign in to comment.