documentation and renamed keys to index and values to columns

casact · Jan 23, 2019 · fdbd175 · fdbd175
1 parent baca117
commit fdbd175
Show file tree

Hide file tree

Showing 55 changed files with 1,038 additions and 107 deletions.
diff --git a/chainladder/core/base.py b/chainladder/core/base.py
@@ -16,29 +16,29 @@ def wrapper(*args, **kwargs):
         if X.triangle.ndim != 4:
             raise ValueError('X.triangle must be a 4-dimensional array')
         if len(X.kdims) != X.triangle.shape[0]:
-            raise ValueError('X.keys and X.triangle are misaligned')
+            raise ValueError('X.index and X.triangle are misaligned')
         if len(X.vdims) != X.triangle.shape[1]:
             print(X.vdims, X.shape)
-            raise ValueError('X.values and X.triangle are misaligned')
+            raise ValueError('X.columns and X.triangle are misaligned')
         return X
     return wrapper
 
 
 class TriangleBase:
     def __init__(self, data=None, origin=None, development=None,
-                 values=None, keys=None):
+                 columns=None, index=None):
         # Sanitize Inputs
-        values = [values] if type(values) is str else values
+        columns = [columns] if type(columns) is str else columns
         origin = [origin] if type(origin) is str else origin
         if development is not None and type(development) is str:
             development = [development]
         key_gr = origin if not development else origin+development
-        if not keys:
-            keys = ['Total']
+        if not index:
+            index = ['Total']
             data_agg = data.groupby(key_gr).sum().reset_index()
-            data_agg[keys[0]] = 'Total'
+            data_agg[index[0]] = 'Total'
         else:
-            data_agg = data.groupby(key_gr+keys) \
+            data_agg = data.groupby(key_gr+index) \
                            .sum().reset_index()
         # Convert origin/development to dates
         origin_date = TriangleBase.to_datetime(data_agg, origin)
@@ -52,12 +52,11 @@ def __init__(self, data=None, origin=None, development=None,
             development_date = origin_date
             self.development_grain = self.origin_grain
             col = None
-
         # Prep the data for 4D Triangle
-        data_agg = self.get_axes(data_agg, keys, values,
+        data_agg = self.get_axes(data_agg, index, columns,
                                  origin_date, development_date)
-        data_agg = pd.pivot_table(data_agg, index=keys+['origin'],
-                                  columns=col, values=values,
+        data_agg = pd.pivot_table(data_agg, index=index+['origin'],
+                                  columns=col, values=columns,
                                   aggfunc='sum')
         # Assign object properties
         self.kdims = np.array(data_agg.index.droplevel(-1).unique())
@@ -70,7 +69,7 @@ def __init__(self, data=None, origin=None, development=None,
             self.ddims = np.array([None])
             self.vdims = np.array(data_agg.columns.unique())
         self.valuation_date = development_date.max()
-        self.key_labels = keys
+        self.key_labels = index
         self.iloc = TriangleBase.Ilocation(self)
         self.loc = TriangleBase.Location(self)
         # Create 4D Triangle
@@ -92,12 +91,12 @@ def shape(self):
         return self.triangle.shape
 
     @property
-    def keys(self):
+    def index(self):
         return pd.DataFrame(list(self.kdims), columns=self.key_labels)
 
     @property
-    def values(self):
-        return pd.Series(list(self.vdims), name='values').to_frame()
+    def columns(self):
+        return pd.Series(list(self.vdims), name='columns').to_frame()
 
     @property
     def origin(self):
@@ -109,14 +108,6 @@ def origin(self):
     def development(self):
         return pd.Series(list(self.ddims), name='development').to_frame()
 
-    @property
-    def columns(self):
-        return list(self.ddims)
-
-    @property
-    def index(self):
-        return list(self.odims)
-
     @property
     def latest_diagonal(self):
         return self.get_latest_diagonal()
@@ -254,11 +245,11 @@ def trend(self, trend=0.0, axis=None):
         obj.triangle = obj.triangle*trend
         return obj
 
-    def rename(self, keys=None, values=None, origin=None, development=None):
-        if keys is not None:
-            self.kdims = [keys] if type(keys) is str else keys
-        if values is not None:
-            self.vdims = [values] if type(values) is str else values
+    def rename(self, index=None, columns=None, origin=None, development=None):
+        if index is not None:
+            self.kdims = [index] if type(index) is str else index
+        if columns is not None:
+            self.vdims = [columns] if type(columns) is str else columns
         if origin is not None:
             self.odims = [origin] if type(origin) is str else origin
         if development is not None:
@@ -277,8 +268,8 @@ def __repr__(self):
                    '\nGrain:     ' + 'O' + self.origin_grain + \
                                      'D' + self.development_grain + \
                    '\nShape:     ' + str(self.shape) + \
-                   '\nKeys:      ' + str(self.key_labels) + \
-                   '\nValues:    ' + str(list(self.vdims))
+                   '\nindex:      ' + str(self.key_labels) + \
+                   '\ncolumns:    ' + str(list(self.vdims))
             return data
 
     def _repr_html_(self):
@@ -306,7 +297,7 @@ def _repr_html_(self):
                               + self.development_grain,
                               self.shape, self.key_labels, list(self.vdims)],
                              index=['Valuation:', 'Grain:', 'Shape',
-                                    'Keys:', "Values:"],
+                                    'index:', "columns:"],
                              name='Triangle Summary').to_frame()
             pd.options.display.precision = 0
             return data.to_html(max_rows=pd.options.display.max_rows,
@@ -329,7 +320,7 @@ def to_frame(self, *args, **kwargs):
         if self.shape[:2] == (1, 1):
             return self._repr_format()
         else:
-            raise ValueError('len(keys) and len(values) must be 1.')
+            raise ValueError('len(index) and len(columns) must be 1.')
 
     def to_clipboard(self, *args, **kwargs):
         """ Passthrough of pandas functionality """
@@ -367,9 +358,9 @@ def _validate_arithmetic(self, other):
         if type(other) not in [int, float, np.float64, np.int64]:
             if len(self.vdims) != len(other.vdims):
                 raise ValueError('Triangles must have the same number of \
-                                  values')
+                                  columns')
             if len(self.kdims) != len(other.kdims):
-                raise ValueError('Triangles must have the same number of keys')
+                raise ValueError('Triangles must have the same number of index')
             if len(self.vdims) == 1:
                 other.vdims = np.array([None])
             other = other.triangle
@@ -474,12 +465,12 @@ def groupby(self, by, *args, **kwargs):
     class TriangleGroupBy:
         def __init__(self, old_obj, by):
             obj = copy.deepcopy(old_obj)
-            v1_len = len(obj.keys.index)
+            v1_len = len(obj.index.index)
             if by != -1:
-                indices = obj.keys.groupby(by).indices
-                new_index = obj.keys.groupby(by).count().index
+                indices = obj.index.groupby(by).indices
+                new_index = obj.index.groupby(by).count().index
             else:
-                indices = {'All': np.arange(len(obj.keys))}
+                indices = {'All': np.arange(len(obj.index))}
                 new_index = pd.Index(['All'], name='All')
             groups = list(indices.values())
             v2_len = len(groups)
@@ -573,10 +564,10 @@ def __getitem__(self, key):
         if type(key) is np.ndarray:
             return self._slice_origin(key)
         if type(key) is pd.Series:
-            return self.iloc[list(self.keys[key].index)]
+            return self.iloc[list(self.index[key].index)]
         if key in self.key_labels:
             # Boolean-indexing of a particular key
-            return self.keys[key]
+            return self.index[key]
         idx = self.idx_table()[key]
         idx = self.idx_table_format(idx)
         return TriangleBase.LocBase(self).get_idx(idx)
@@ -654,7 +645,7 @@ def complete_date_range(origin_date, development_date,
         cart_prod = cart_prod[cart_prod['development'] >= cart_prod['origin']]
         return cart_prod
 
-    def get_axes(self, data_agg, groupby, values,
+    def get_axes(self, data_agg, groupby, columns,
                  origin_date, development_date):
         ''' Preps axes for the 4D triangle
         '''
@@ -667,7 +658,7 @@ def get_axes(self, data_agg, groupby, values,
             all_axes.merge(data_agg, how='left',
                            left_on=['origin', 'development'] + groupby,
                            right_on=[origin_date, development_date] + groupby) \
-                    .fillna(0)[['origin', 'development'] + groupby + values]
+                    .fillna(0)[['origin', 'development'] + groupby + columns]
         data_agg['development'] = \
             TriangleBase.development_lag(data_agg['origin'],
                                          data_agg['development'])

diff --git a/chainladder/core/tests/test_triangle.py b/chainladder/core/tests/test_triangle.py
@@ -13,7 +13,7 @@ def test_slice_by_boolean():
 
 
 def test_slice_by_loc():
-    assert tri.loc['Aegis Grp'].loc['comauto'].keys.iloc[0, 0] == 'comauto'
+    assert tri.loc['Aegis Grp'].loc['comauto'].index.iloc[0, 0] == 'comauto'
 
 
 def test_slice_origin():
@@ -27,7 +27,7 @@ def test_slice_development():
 
 
 def test_slice_by_loc_iloc():
-    assert tri.groupby('LOB').sum().loc['comauto'].keys.iloc[0, 0] == 'comauto'
+    assert tri.groupby('LOB').sum().loc['comauto'].index.iloc[0, 0] == 'comauto'
 
 
 def test_link_ratio():
@@ -109,7 +109,7 @@ def test_printer():
 
 
 def test_value_order():
-    assert np.all(tri[['CumPaidLoss', 'BulkLoss']].values == tri[['BulkLoss', 'CumPaidLoss']].values)
+    assert np.all(tri[['CumPaidLoss', 'BulkLoss']].columns == tri[['BulkLoss', 'CumPaidLoss']].columns)
 
 
 def test_trend():

diff --git a/chainladder/core/triangle.py b/chainladder/core/triangle.py
@@ -10,7 +10,7 @@
 class Triangle(TriangleBase):
     """
     The core data structure of the chainladder package
-    
+
     Parameters
     ----------
     data : DataFrame
@@ -22,18 +22,18 @@ class Triangle(TriangleBase):
     development : str or list
         A representation of the development/valuation periods of the triangle
         that will map to the Development dimension
-    values : str or list
-        A representation of the keys of the triangle that will map to the
-        Keys dimension.  If None, then a single 'Total' key will be generated.
-    keys : str or list or None
-        A representation of the keys of the triangle that will map to the
-        Keys dimension.  If None, then a single 'Total' key will be generated.
+    columns : str or list
+        A representation of the numeric data of the triangle that will map to the
+        columns dimension.  If None, then a single 'Total' key will be generated.
+    index : str or list or None
+        A representation of the index of the triangle that will map to the
+        index dimension.  If None, then a single 'Total' key will be generated.
 
     Attributes
     ----------
-    keys
-        Represents all available levels of the key dimension.
-    values
+    index
+        Represents all available levels of the index dimension.
+    columns
         Represents all available levels of the value dimension.
     origin
         Represents all available levels of the origin dimension.

diff --git a/chainladder/development/base.py b/chainladder/development/base.py
@@ -4,6 +4,7 @@
 """
 import numpy as np
 import copy
+import warnings
 from sklearn.base import BaseEstimator
 from chainladder import WeightedRegression
 
@@ -66,8 +67,13 @@ def fit(self, X, y=None, sample_weight=None):
         val = np.nan_to_num(val * (_y * 0 + 1))
         _w = self._assign_n_periods_weight(X) / (_x**(val))
         self.w_ = self._assign_n_periods_weight(X)
-        params = WeightedRegression(_w, _x, _y, axis=2, thru_orig=True) \
-            .fit().sigma_fill(self.sigma_interpolation)
+        params = WeightedRegression(_w, _x, _y, axis=2, thru_orig=True).fit()
+        if self.n_periods != 1:
+            params = params.sigma_fill(self.sigma_interpolation)
+        else:
+            warnings.warn('Setting n_periods=1 does not allow enough degrees of'
+                          '  freedom to support calculation of all regression '
+                          'statistics.  Only LDFs have been calculated.')
         params.std_err_ = np.nan_to_num(params.std_err_) + \
             np.nan_to_num((1-np.nan_to_num(params.std_err_*0+1)) *
             params.sigma_/np.swapaxes(np.sqrt(_x**(2-val))[..., 0:1, :], -1, -2))

diff --git a/chainladder/development/incremental.py b/chainladder/development/incremental.py
@@ -9,6 +9,19 @@
 
 
 class IncrementalAdditive(BaseEstimator):
+    """ The Incremental Additive Method.
+
+    Parameters
+    ----------
+    trend : float (default=0.0)
+        A multiplicative trend amount used to trend each development period to
+        a common level.
+    n_periods : integer, optional (default=-1)
+        number of origin periods to be used in the ldf average calculation. For
+        all origin periods, set n_periods=-1
+
+
+    """
     def __init__(self, trend=0.0, n_periods=-1):
         self.trend = trend
         self.n_periods = n_periods

diff --git a/chainladder/development/munich.py b/chainladder/development/munich.py
@@ -10,10 +10,16 @@
 
 
 class MunichAdjustment(BaseEstimator):
-    """ Munich Chainladder
-        TODO:
-            1. Create 'square' LDF as a triangle obj
-            2. Let it take both Development and Tail objects
+    """Applies the Munich Chainladder adjustment to a set of paid/incurred
+       ldfs.
+
+    Parameters
+    ----------
+    paid_to_incurred : dict
+        A dictionary representing the `values` of paid and incurred triangles
+        where `values` are an appropriate selection from :class:`Triangle`
+        `.values`, such as ``{'paid':'incurred'}``
+
 
     """
     def __init__(self, paid_to_incurred={}):

diff --git a/chainladder/tails/curve.py b/chainladder/tails/curve.py
@@ -8,7 +8,34 @@
 
 
 class TailCurve(TailBase):
-    """ Curve Fit Class Documentation """
+    """Allows for the entry of a constant tail factor to LDFs.
+
+    Parameters
+    ----------
+    curve : str ('exponential', 'inverse_power')
+        The type of curve extrapolation you'd like to use
+    fit_period : slice
+        A slice object representing the range (by index) of ldfs to use in
+        the curve fit.
+    extrap_periods : int
+        Then number of development periods from attachment point to extrapolate
+        the fit.
+    errors : str ('raise' or 'ignore')
+        Whether to raise an error or ignore observations that violate the
+        distribution being fit.  The most common is ldfs < 1.0 will not work
+        in either the `exponential` or `inverse_power` fits.
+
+    Attributes
+    ----------
+    ldf_
+        ldf_ with tail applied.
+    cdf_
+        cdf_ with tail applied.
+    sigma_
+        sigma_ with tail factor applied.
+    std_err_
+        std_err_ with tail factor applied
+    """
     def __init__(self, curve='exponential', fit_period=slice(None, None, None),
                  extrap_periods=100, errors='ignore'):
         self.curve = curve