index to allow numerics

casact · Jul 9, 2020 · d9202e5 · d9202e5
1 parent 4386c1f
commit d9202e5
Show file tree

Hide file tree

Showing 4 changed files with 30 additions and 12 deletions.
diff --git a/chainladder/core/base.py b/chainladder/core/base.py
@@ -40,16 +40,20 @@ def __init__(self, data=None, origin=None, development=None,
         index, columns, origin, development = self._str_to_list(
             index, columns, origin, development)
         key_gr = origin + self._flatten(development, index)
-
         # Aggregate data
+
         data_agg = data.groupby(key_gr).sum().reset_index().fillna(0)
         if not index:
             index = ['Total']
             data_agg[index[0]] = 'Total'
+        for item in index:
+            if pd.api.types.is_numeric_dtype(data_agg[item]):
+                data_agg[item] = data_agg[item].astype(str)
         # Initialize origin and development dates and grains
         origin_date = TriangleBase._to_datetime(
             data_agg, origin, format=origin_format)
         self.origin_grain = TriangleBase._get_grain(origin_date)
+
         m_cnt = {'Y': 12, 'Q': 3, 'M': 1}
         if development:
             development_date = TriangleBase._to_datetime(
@@ -92,14 +96,18 @@ def __init__(self, data=None, origin=None, development=None,
         amts = data_agg[columns].unstack().values.astype('float64')
         values = sp(coords.T.astype('int32'), amts, shape=(len(key), len(columns), len(orig), len(dev) if development else 1), prune=True)
         self.kdims = np.array(key)
+        self.key_labels = index
+        for num, item in enumerate(index):
+            if item in data.columns:
+                if pd.api.types.is_numeric_dtype(data[item]):
+                    self.kdims[:, num] = self.kdims[:, num].astype(data[item].dtype)
         self.odims = np.sort(date_axes['origin'].unique())
         if development:
             self.ddims = np.sort(dev_lag_unique.unique())
             self.ddims = self.ddims*(m_cnt[self.development_grain])
         else:
             self.ddims = np.array([None])
         self.vdims = np.array(columns)
-        self.key_labels = index
         self._set_slicers()
         # Create 4D Triangle
         if self.array_backend == 'numpy':

diff --git a/chainladder/development/base.py b/chainladder/development/base.py
@@ -231,13 +231,16 @@ def fit(self, X, y=None, sample_weight=None):
         for i in [2, 1, 0]:
             val = xp.repeat(val[None], tri_array.shape[i], axis=0)
         val = xp.nan_to_num(val * (y * 0 + 1))
-        if xp == cp:
+        if xp in [cp, sp]:
             link_ratio = y / x
         else:
             link_ratio = xp.divide(y, x, where=xp.nan_to_num(x) != 0)
-        self.w_ = xp.array(self._assign_n_periods_weight(X) *
-                           self._drop_adjustment(X, link_ratio),
-                           dtype='float16')
+        if xp == sp:
+            self.w_ = sp(self._assign_n_periods_weight(X) *
+                         self._drop_adjustment(X, link_ratio))
+        else:
+            self.w_ = xp.array(self._assign_n_periods_weight(X) *
+                               self._drop_adjustment(X, link_ratio))
         w = self.w_ / (x**(val))
         params = WeightedRegression(axis=2, thru_orig=True).fit(x, y, w)
         if self.n_periods != 1:

diff --git a/chainladder/utils/sparse.py b/chainladder/utils/sparse.py
@@ -18,6 +18,8 @@
 sp.diagonal = sparse.diagonal
 sp.zeros = sparse.zeros
 sp.testing.assert_array_equal = np.testing.assert_equal
+sp.sqrt = np.sqrt
+sp.log = np.log
 
 
 def nan_to_num(a):
@@ -93,7 +95,7 @@ def swapaxes(a, axis1, axis2):
 def repeat(a, repeats, axis):
     """Repeat elements of an array"""
     r = []
-    for item in range(1, repeats+1):
+    for item in range(repeats):
         coords = a.coords.copy()
         coords[axis] = coords[axis]+item
         r.append(coords)

diff --git a/chainladder/utils/weighted_regression.py b/chainladder/utils/weighted_regression.py
@@ -3,6 +3,7 @@
 # file, You can obtain one at https://mozilla.org/MPL/2.0/.
 import numpy as np
 from chainladder.utils.cupy import cp
+from chainladder.utils.sparse import sp
 from sklearn.base import BaseEstimator
 
 class WeightedRegression(BaseEstimator):
@@ -42,8 +43,9 @@ def _fit_OLS(self):
         '''
         w, x, y, axis = self.w.copy(), self.x.copy(), self.y.copy(), self.axis
         xp = cp.get_array_module(x)
-        x[w == 0] = xp.nan
-        y[w == 0] = xp.nan
+        if xp != sp:
+            x[w == 0] = xp.nan
+            y[w == 0] = xp.nan
         slope = (
             (xp.nansum(w*x*y, axis)-xp.nansum(x*w, axis)*xp.nanmean(y, axis)) /
             (xp.nansum(w*x*x, axis)-xp.nanmean(x, axis)*xp.nansum(w*x, axis)))
@@ -62,11 +64,13 @@ def _fit_OLS_thru_orig(self):
         residual = (y-fitted_value)*xp.sqrt(w)
         wss_residual = xp.nansum(residual**2, axis)
         mse_denom = xp.nansum((y*0+1)*(w!=0), axis)-1
-        mse_denom[mse_denom == 0] = xp.nan
+        if xp != sp:
+            mse_denom[mse_denom == 0] = xp.nan
         mse = wss_residual / mse_denom
         std_err = xp.sqrt(mse/xp.nansum(w*x*x*(y*0+1), axis))
         std_err = std_err[..., None]
-        std_err[std_err == 0] = xp.nan
+        if xp != sp:
+            std_err[std_err == 0] = xp.nan
         coef = coef[..., None]
         sigma = xp.sqrt(mse)[..., None]
         self.slope_ = coef
@@ -92,7 +96,8 @@ def loglinear_interpolation(self, y):
         ''' Use Cases: generally for filling in last element of sigma_
         '''
         xp = cp.get_array_module(y)
-        y[y == 0] = xp.nan
+        if xp != sp:
+            y[y == 0] = xp.nan
         ly = xp.log(y)
         w = xp.nan_to_num(ly*0+1)
         reg = WeightedRegression(self.axis, False).fit(None, ly, w)