Skip to content

Commit

Permalink
index to allow numerics
Browse files Browse the repository at this point in the history
  • Loading branch information
jbogaardt committed Jul 9, 2020
1 parent 4386c1f commit d9202e5
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 12 deletions.
12 changes: 10 additions & 2 deletions chainladder/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,20 @@ def __init__(self, data=None, origin=None, development=None,
index, columns, origin, development = self._str_to_list(
index, columns, origin, development)
key_gr = origin + self._flatten(development, index)

# Aggregate data

data_agg = data.groupby(key_gr).sum().reset_index().fillna(0)
if not index:
index = ['Total']
data_agg[index[0]] = 'Total'
for item in index:
if pd.api.types.is_numeric_dtype(data_agg[item]):
data_agg[item] = data_agg[item].astype(str)
# Initialize origin and development dates and grains
origin_date = TriangleBase._to_datetime(
data_agg, origin, format=origin_format)
self.origin_grain = TriangleBase._get_grain(origin_date)

m_cnt = {'Y': 12, 'Q': 3, 'M': 1}
if development:
development_date = TriangleBase._to_datetime(
Expand Down Expand Up @@ -92,14 +96,18 @@ def __init__(self, data=None, origin=None, development=None,
amts = data_agg[columns].unstack().values.astype('float64')
values = sp(coords.T.astype('int32'), amts, shape=(len(key), len(columns), len(orig), len(dev) if development else 1), prune=True)
self.kdims = np.array(key)
self.key_labels = index
for num, item in enumerate(index):
if item in data.columns:
if pd.api.types.is_numeric_dtype(data[item]):
self.kdims[:, num] = self.kdims[:, num].astype(data[item].dtype)
self.odims = np.sort(date_axes['origin'].unique())
if development:
self.ddims = np.sort(dev_lag_unique.unique())
self.ddims = self.ddims*(m_cnt[self.development_grain])
else:
self.ddims = np.array([None])
self.vdims = np.array(columns)
self.key_labels = index
self._set_slicers()
# Create 4D Triangle
if self.array_backend == 'numpy':
Expand Down
11 changes: 7 additions & 4 deletions chainladder/development/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,13 +231,16 @@ def fit(self, X, y=None, sample_weight=None):
for i in [2, 1, 0]:
val = xp.repeat(val[None], tri_array.shape[i], axis=0)
val = xp.nan_to_num(val * (y * 0 + 1))
if xp == cp:
if xp in [cp, sp]:
link_ratio = y / x
else:
link_ratio = xp.divide(y, x, where=xp.nan_to_num(x) != 0)
self.w_ = xp.array(self._assign_n_periods_weight(X) *
self._drop_adjustment(X, link_ratio),
dtype='float16')
if xp == sp:
self.w_ = sp(self._assign_n_periods_weight(X) *
self._drop_adjustment(X, link_ratio))
else:
self.w_ = xp.array(self._assign_n_periods_weight(X) *
self._drop_adjustment(X, link_ratio))
w = self.w_ / (x**(val))
params = WeightedRegression(axis=2, thru_orig=True).fit(x, y, w)
if self.n_periods != 1:
Expand Down
4 changes: 3 additions & 1 deletion chainladder/utils/sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
sp.diagonal = sparse.diagonal
sp.zeros = sparse.zeros
sp.testing.assert_array_equal = np.testing.assert_equal
sp.sqrt = np.sqrt
sp.log = np.log


def nan_to_num(a):
Expand Down Expand Up @@ -93,7 +95,7 @@ def swapaxes(a, axis1, axis2):
def repeat(a, repeats, axis):
"""Repeat elements of an array"""
r = []
for item in range(1, repeats+1):
for item in range(repeats):
coords = a.coords.copy()
coords[axis] = coords[axis]+item
r.append(coords)
Expand Down
15 changes: 10 additions & 5 deletions chainladder/utils/weighted_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
import numpy as np
from chainladder.utils.cupy import cp
from chainladder.utils.sparse import sp
from sklearn.base import BaseEstimator

class WeightedRegression(BaseEstimator):
Expand Down Expand Up @@ -42,8 +43,9 @@ def _fit_OLS(self):
'''
w, x, y, axis = self.w.copy(), self.x.copy(), self.y.copy(), self.axis
xp = cp.get_array_module(x)
x[w == 0] = xp.nan
y[w == 0] = xp.nan
if xp != sp:
x[w == 0] = xp.nan
y[w == 0] = xp.nan
slope = (
(xp.nansum(w*x*y, axis)-xp.nansum(x*w, axis)*xp.nanmean(y, axis)) /
(xp.nansum(w*x*x, axis)-xp.nanmean(x, axis)*xp.nansum(w*x, axis)))
Expand All @@ -62,11 +64,13 @@ def _fit_OLS_thru_orig(self):
residual = (y-fitted_value)*xp.sqrt(w)
wss_residual = xp.nansum(residual**2, axis)
mse_denom = xp.nansum((y*0+1)*(w!=0), axis)-1
mse_denom[mse_denom == 0] = xp.nan
if xp != sp:
mse_denom[mse_denom == 0] = xp.nan
mse = wss_residual / mse_denom
std_err = xp.sqrt(mse/xp.nansum(w*x*x*(y*0+1), axis))
std_err = std_err[..., None]
std_err[std_err == 0] = xp.nan
if xp != sp:
std_err[std_err == 0] = xp.nan
coef = coef[..., None]
sigma = xp.sqrt(mse)[..., None]
self.slope_ = coef
Expand All @@ -92,7 +96,8 @@ def loglinear_interpolation(self, y):
''' Use Cases: generally for filling in last element of sigma_
'''
xp = cp.get_array_module(y)
y[y == 0] = xp.nan
if xp != sp:
y[y == 0] = xp.nan
ly = xp.log(y)
w = xp.nan_to_num(ly*0+1)
reg = WeightedRegression(self.axis, False).fit(None, ly, w)
Expand Down

0 comments on commit d9202e5

Please sign in to comment.