diff --git a/chainladder/core/__init__.py b/chainladder/core/__init__.py index ff716634..514d58cc 100644 --- a/chainladder/core/__init__.py +++ b/chainladder/core/__init__.py @@ -1,5 +1,6 @@ -from chainladder.core.triangle import Triangle # noqa (API import) +from chainladder.core.legacy import Triangle as LegacyTriangle # noqa (API import) +from chainladder.core.triangle import Triangle from chainladder.core.correlation import ( DevelopmentCorrelation, ValuationCorrelation, -) # noqa (API import) +) # noqa (API import) \ No newline at end of file diff --git a/chainladder/core/_slicing.py b/chainladder/core/_slicing.py new file mode 100644 index 00000000..8191a905 --- /dev/null +++ b/chainladder/core/_slicing.py @@ -0,0 +1,309 @@ +# Most of this file is taken from https://github.com/dask/dask/blob/master/dask/array/slicing.py +# See license at https://github.com/dask/dask/blob/master/LICENSE.txt + +import math +from collections.abc import Iterable +from numbers import Integral, Number + +import numpy as np + + +def normalize_index(idx, shape): + """Normalize slicing indexes + 1. Replaces ellipses with many full slices + 2. Adds full slices to end of index + 3. Checks bounding conditions + 4. Replaces numpy arrays with lists + 5. Posify's slices integers and lists + 6. Normalizes slices to canonical form + Examples + -------- + >>> normalize_index(1, (10,)) + (1,) + >>> normalize_index(-1, (10,)) + (9,) + >>> normalize_index([-1], (10,)) + (array([9]),) + >>> normalize_index(slice(-3, 10, 1), (10,)) + (slice(7, 10, 1),) + >>> normalize_index((Ellipsis, None), (10,)) + (slice(0, 10, 1), None) + """ + if not isinstance(idx, tuple): + idx = (idx,) + idx = replace_ellipsis(len(shape), idx) + n_sliced_dims = 0 + for i in idx: + if hasattr(i, "ndim") and i.ndim >= 1: + n_sliced_dims += i.ndim + elif i is None: + continue + else: + n_sliced_dims += 1 + idx += (slice(None),) * (len(shape) - n_sliced_dims) + if len([i for i in idx if i is not None]) > len(shape): + raise IndexError("Too many indices for array") + + none_shape = [] + i = 0 + for ind in idx: + if ind is not None: + none_shape.append(shape[i]) + i += 1 + else: + none_shape.append(None) + + for i, d in zip(idx, none_shape): + if d is not None: + check_index(i, d) + idx = tuple(map(sanitize_index, idx)) + idx = tuple(map(replace_none, idx, none_shape)) + idx = posify_index(none_shape, idx) + idx = tuple(map(clip_slice, idx, none_shape)) + return idx + + +def replace_ellipsis(n, index): + """Replace ... with slices, :, : ,: + >>> replace_ellipsis(4, (3, Ellipsis, 2)) + (3, slice(None, None, None), slice(None, None, None), 2) + >>> replace_ellipsis(2, (Ellipsis, None)) + (slice(None, None, None), slice(None, None, None), None) + """ + # Careful about using in or index because index may contain arrays + isellipsis = [i for i, ind in enumerate(index) if ind is Ellipsis] + if not isellipsis: + return index + elif len(isellipsis) > 1: + raise IndexError("an index can only have a single ellipsis ('...')") + else: + loc = isellipsis[0] + extra_dimensions = n - (len(index) - sum(i is None for i in index) - 1) + return ( + index[:loc] + (slice(None, None, None),) * extra_dimensions + index[loc + 1 :] + ) + + +def check_index(ind, dimension): + """Check validity of index for a given dimension + Examples + -------- + >>> check_index(3, 5) + >>> check_index(5, 5) + Traceback (most recent call last): + ... + IndexError: Index is not smaller than dimension 5 >= 5 + >>> check_index(6, 5) + Traceback (most recent call last): + ... + IndexError: Index is not smaller than dimension 6 >= 5 + >>> check_index(-1, 5) + >>> check_index(-6, 5) + Traceback (most recent call last): + ... + IndexError: Negative index is not greater than negative dimension -6 <= -5 + >>> check_index([1, 2], 5) + >>> check_index([6, 3], 5) + Traceback (most recent call last): + ... + IndexError: Index out of bounds for dimension 5 + >>> check_index(slice(0, 3), 5) + """ + # unknown dimension, assumed to be in bounds + if isinstance(ind, Iterable): + x = np.asanyarray(ind) + if ( + np.issubdtype(x.dtype, np.integer) + and ((x >= dimension) | (x < -dimension)).any() + ): + raise IndexError("Index out of bounds for dimension {:d}".format(dimension)) + elif x.dtype == bool and len(x) != dimension: + raise IndexError( + "boolean index did not match indexed array; dimension is {:d} " + "but corresponding boolean dimension is {:d}".format(dimension, len(x)) + ) + elif isinstance(ind, slice): + return + elif not isinstance(ind, Integral): + raise IndexError( + "only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and " + "integer or boolean arrays are valid indices" + ) + + elif ind >= dimension: + raise IndexError( + "Index is not smaller than dimension {:d} >= {:d}".format(ind, dimension) + ) + + elif ind < -dimension: + msg = "Negative index is not greater than negative dimension {:d} <= -{:d}" + raise IndexError(msg.format(ind, dimension)) + + +def sanitize_index(ind): + """Sanitize the elements for indexing along one axis + >>> sanitize_index([2, 3, 5]) + array([2, 3, 5]) + >>> sanitize_index([True, False, True, False]) + array([0, 2]) + >>> sanitize_index(np.array([1, 2, 3])) + array([1, 2, 3]) + >>> sanitize_index(np.array([False, True, True])) + array([1, 2]) + >>> type(sanitize_index(np.int32(0))) # doctest: +SKIP + + >>> sanitize_index(0.5) # doctest: +SKIP + Traceback (most recent call last): + ... + IndexError: only integers, slices (`:`), ellipsis (`...`), + numpy.newaxis (`None`) and integer or boolean arrays are valid indices + """ + if ind is None: + return None + elif isinstance(ind, slice): + return slice( + _sanitize_index_element(ind.start), + _sanitize_index_element(ind.stop), + _sanitize_index_element(ind.step), + ) + elif isinstance(ind, Number): + return _sanitize_index_element(ind) + if not hasattr(ind, "dtype") and len(ind) == 0: + ind = np.array([], dtype=np.intp) + ind = np.asarray(ind) + if ind.dtype == np.bool_: + nonzero = np.nonzero(ind) + if len(nonzero) == 1: + # If a 1-element tuple, unwrap the element + nonzero = nonzero[0] + return np.asanyarray(nonzero) + elif np.issubdtype(ind.dtype, np.integer): + return ind + else: + raise IndexError( + "only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and " + "integer or boolean arrays are valid indices" + ) + + +def _sanitize_index_element(ind): + """Sanitize a one-element index.""" + if ind is None: + return None + + return int(ind) + + +def posify_index(shape, ind): + """Flip negative indices around to positive ones + >>> posify_index(10, 3) + 3 + >>> posify_index(10, -3) + 7 + >>> posify_index(10, [3, -3]) + array([3, 7]) + >>> posify_index((10, 20), (3, -3)) + (3, 17) + >>> posify_index((10, 20), (3, [3, 4, -3])) # doctest: +NORMALIZE_WHITESPACE + (3, array([ 3, 4, 17])) + """ + if isinstance(ind, tuple): + return tuple(map(posify_index, shape, ind)) + if isinstance(ind, Integral): + if ind < 0 and not math.isnan(shape): + return ind + shape + else: + return ind + if isinstance(ind, (np.ndarray, list)) and not math.isnan(shape): + ind = np.asanyarray(ind) + return np.where(ind < 0, ind + shape, ind) + if isinstance(ind, slice): + start, stop, step = ind.start, ind.stop, ind.step + + if start < 0: + start += shape + + if not (0 > stop >= step) and stop < 0: + stop += shape + + return slice(start, stop, ind.step) + + return ind + + +def clip_slice(idx, dim): + """ + Clip slice to its effective size given the shape. + + Parameters + ---------- + idx : The index. + dim : The size along the corresponding dimension. + + Returns + ------- + idx : slice + + Examples + -------- + >>> clip_slice(slice(0, 20, 1), 10) + slice(0, 10, 1) + """ + if not isinstance(idx, slice): + return idx + + start, stop, step = idx.start, idx.stop, idx.step + + if step > 0: + start = max(start, 0) + stop = min(stop, dim) + + if start > stop: + start = stop + else: + start = min(start, dim - 1) + stop = max(stop, -1) + + if start < stop: + start = stop + + return slice(start, stop, step) + + +def replace_none(idx, dim): + """ + Normalize slices to canonical form, i.e. + replace ``None`` with the appropriate integers. + + Parameters + ---------- + idx : slice or other index + dim : dimension length + + Examples + -------- + >>> replace_none(slice(None, None, None), 10) + slice(0, 10, 1) + """ + if not isinstance(idx, slice): + return idx + + start, stop, step = idx.start, idx.stop, idx.step + + if step is None: + step = 1 + + if step > 0: + if start is None: + start = 0 + + if stop is None: + stop = dim + else: + if start is None: + start = dim - 1 + + if stop is None: + stop = -1 + + return slice(start, stop, step) \ No newline at end of file diff --git a/chainladder/core/core.py b/chainladder/core/core.py new file mode 100644 index 00000000..8995dea0 --- /dev/null +++ b/chainladder/core/core.py @@ -0,0 +1,1125 @@ +import polars as pl +from ._slicing import normalize_index + +dcol = ( + (pl.col('__development__').dt.year() - + pl.col('__origin__').dt.year()) * 12 + + pl.col('__development__').dt.month() - + pl.col('__origin__').dt.month() + 1 +).cast(pl.UInt16).alias('__development__') + +vcol = pl.date( + year=(pl.col('__origin__').dt.year() + + (pl.col('__origin__').dt.month() + + pl.col('__development__') - 1) // 12 - + pl.when((pl.col('__origin__').dt.month() + + pl.col('__development__') - 1 + ) % 12 == 0) + .then(1).otherwise(0)), + month=(pl.col('__origin__').dt.month() + + pl.col('__development__') - 2) % 12 + 1, day=1 +).dt.month_end().alias('__development__') + + +class TriangleBase: + """ Triangle written exclusively in polars """ + def __init__( + self, data=None, index=None, origin=None, columns=None, + valuation=None, origin_format=None, valuation_format=None, + cumulative=None, pattern=False, trailing=False, lazy=False, + *args, **kwargs + ): + if data is None: + return + # Static attributes + self.columns = [columns] if type(columns) is str else columns + self.is_cumulative = cumulative + self.is_pattern = pattern + self.origin_close = 'DEC' + + index = index or [] + if valuation is None: + data = ( + data.with_columns( + TriangleBase._format_origin( + data, origin, origin_format + ).dt.truncate("1mo").alias('__origin__'), + TriangleBase._format_valuation( + data, origin, origin_format + ).dt.month_end().max().alias('__development__')) + .select( + pl.col(index + self.columns + + ['__origin__', '__development__']))) + else: + data = ( + data.with_columns( + TriangleBase._format_origin( + data, origin, origin_format + ).dt.truncate("1mo").alias('__origin__'), + TriangleBase._format_valuation( + data, valuation, valuation_format + ).dt.month_end().alias('__development__')) + .select( + pl.col(index + self.columns + + ['__origin__', '__development__']))) + if data.select('__development__').lazy().collect().n_unique() > 1: + data = data.select(pl.all().exclude('__development__'), dcol) + self.data = ( + data + .with_columns(pl.lit('Total').alias('Total') if not index else []) + .group_by(pl.all().exclude(columns)) # Needed for cum_to_incr/incr_to_cum + .agg(pl.col(columns).sum()) + .sort(index + ['__origin__', '__development__']) + ) + self.is_lazy = lazy + if not lazy: + self.data = self.data.lazy().collect() + + if trailing: + self.data = self.grain( + f'O{self.origin_grain}D{self.development_grain}', + trailing=True).data + self.properties = {} + + @staticmethod + def from_triangle(triangle): + obj = TriangleBase() + obj.data = triangle.data + obj.columns = triangle.columns + obj.is_cumulative = triangle.is_cumulative + obj.is_pattern = triangle.is_pattern + obj.origin_close = triangle.origin_close + obj.is_lazy = triangle.is_lazy + obj.properties = triangle.properties.copy() + return obj + + @property + def key_labels(self): + if 'key_labels' not in self.properties.keys(): + self.properties['key_labels'] = [ + c for c in self.data.columns + if c not in self.columns + + ['__origin__', '__development__']] + return self.properties['key_labels'] + + @property + def shape(self): + # requires index, columns, origin, valuation, development + return ( + self.index.lazy().select(pl.count()).collect()[0, 0], + len(self.columns), + len(self.origin), + len(self.valuation) if self.is_val_tri else len(self.development)) + + @property + def valuation_date(self): + # requires valuation + return self.valuation.max() + + @property + def index(self): + if 'index' not in self.properties.keys(): + self.properties['index'] = ( + self.data.select(pl.col(self.key_labels)).unique().sort(pl.all()) + ) + return self.properties['index'] + + @property + def origin_grain(self): + if 'origin_grain' not in self.properties.keys(): + months = self.date_matrix.select( + pl.col('__origin__').dt.month().sort().unique() + )['__origin__'] + diffs = months.diff()[1:] + if len(months) == 1: + grain = "Y" + elif (diffs == 6).all(): + grain = "2Q" + elif (diffs == 3).all(): + grain = "Q" + else: + grain = "M" + self.properties['origin_grain'] = grain + return self.properties['origin_grain'] + + @property + def date_matrix(self): + if 'date_matrix' not in self.properties.keys(): + if self.is_val_tri: + self.properties['date_matrix'] = ( + self.data + .group_by('__origin__') + .agg(pl.col('__development__').unique()) + .explode('__development__').select( + pl.col('__origin__'), + pl.col('__development__').alias('__valuation__'), + dcol + )).lazy().collect() + else: + self.properties['date_matrix'] = ( + self.data + .group_by('__origin__') + .agg(pl.col('__development__').unique()) + .explode('__development__') + .with_columns(vcol.alias('__valuation__'))).lazy().collect() + return self.properties['date_matrix'] + + + @property + def origin(self): + if 'origin' not in self.properties.keys(): + self.properties['origin'] = pl.date_range( + start=self.date_matrix['__origin__'].min(), + end=self.date_matrix['__origin__'].max(), + interval={'Y': '12mo', 'M': '1mo', + 'Q': '3mo', '2Q': '6mo'}[self.origin_grain], + eager=True).alias('origin') + return self.properties['origin'] + + @property + def odims(self): + return pl.DataFrame({'odims': range(len(self.origin)), '__origin__': self.origin}) + + @property + def ddims(self): + values = self.valutaion if self.is_val_tri else self.development + return pl.DataFrame({'ddims': range(len(values)), '__development__': values}) + + @property + def development(self): + if 'development' not in self.properties.keys(): + interval = {'Y': 12, '2Q': 6, 'Q': 3, 'M': 1}[self.development_grain] + self.properties['development'] = pl.Series( + 'development', + range(self.date_matrix['__development__'].min(), + self.date_matrix['__development__'].max() + interval, + interval)).cast(pl.UInt16) + return self.properties['development'] + + @property + def valuation(self): + if 'valuation' not in self.properties.keys(): + interval={'Y': '12mo', 'M': '1mo', + 'Q': '3mo', '2Q': '6mo'}[self.development_grain] + valuation_range = self.date_matrix.select( + pl.col('__valuation__').min().alias('vmin').dt.month_start(), + pl.col('__valuation__').max().alias('vmax')) + self.properties['valuation'] = pl.date_range( + start=valuation_range['vmin'][0], + end=valuation_range['vmax'][0], + interval=interval, + eager=True).dt.month_end().alias('valuation') + return self.properties['valuation'] + + @property + def is_full(self): + return (self.data.select(['__origin__', '__valuation__']).n_unique() == + self.shape[2] * self.shape[3]) + + @property + def is_val_tri(self): + if 'is_val_tri' not in self.properties.keys(): + self.properties['is_val_tri'] = dict( + zip(self.data.columns, self.data.dtypes) + )['__development__'] != pl.UInt16 + return self.properties['is_val_tri'] + + @property + def development_grain(self): + if 'development_grain' not in self.properties.keys(): + if len(self.date_matrix['__valuation__'].unique()) == 1: + grain = 'M' + else: + months = self.data.select( + self.date_matrix['__valuation__'] + .dt.month().unique().sort().alias('__development__') + ).lazy().collect()['__development__'] + diffs = months.diff()[1:] + if len(months) == 1: + grain = "Y" + elif (diffs == 6).all(): + grain = "2Q" + elif (diffs == 3).all(): + grain = "Q" + else: + grain = "M" + self.properties['development_grain'] = grain + return self.properties['development_grain'] + + @property + def latest_diagonal(self): + # requires valuation, valuation_date + triangle = self[self.valuation==self.valuation_date] + if not triangle.is_val_tri: + triangle.data = triangle.data.select( + pl.all().exclude('__development__'), vcol) + triangle.properties['is_val_tri'] = True + triangle.properties.pop('date_matrix', None) + triangle.properties.pop('valuation', None) + triangle.properties.pop('development_grain', None) + triangle.properties.pop('development', None) + return triangle + + def val_to_dev(self): + if self.is_val_tri: + obj = TriangleBase.from_triangle(self) + obj.data = obj.data.select(pl.all().exclude('__development__'), dcol) + obj.properties['is_val_tri'] = False + return obj + else: + return self + + def dev_to_val(self): + if not self.is_val_tri: + obj = TriangleBase.from_triangle(self) + obj.data = obj.data.select(pl.all().exclude('__development__'), vcol) + obj.properties['is_val_tri'] = True + return obj + else: + return self + + def lazy(self, *args, **kwargs): + self.data = self.data.lazy(*args, **kwargs) + self.is_lazy = True + return self + + def collect(self, *args, **kwargs): + self.data = self.data.collect(*args, **kwargs) + self.is_lazy = False + return self + + @staticmethod + def _format_origin(data, column, format): + if data.select(column).dtypes[0] in ([pl.Date, pl.Datetime]): + return pl.col(column).cast(pl.Date).dt.month_start() + else: + for f in ['%Y%m', '%Y', format]: + c = ( + pl.col(column) + .cast(pl.Utf8).str.to_date(format=f) + .cast(pl.Date).dt.month_start()) + try: + data.head(10).select(c) + return c + except: + pass + + @staticmethod + def _format_valuation(data, column, format) -> pl.Expr: + if data.select(column).dtypes[0] in ([pl.Date, pl.Datetime]): + return pl.col(column).cast(pl.Date).dt.month_end() + else: + for f in ['%Y%m', '%Y', format]: + c = ( + pl.col(column) + .cast(pl.Utf8).str.to_date(format=f) + .cast(pl.Date).dt.month_start() + .alias('__development__')) + try: + data.head(10).select(c) + break + except: + pass + if f == '%Y': + return (c.dt.offset_by('12mo') + .dt.offset_by('-1d').dt.month_end()) + else: + return c.dt.month_end() + + + def _agg(self, agg, axis=None, *args, **kwargs): + if axis is None: + if max(self.shape) == 1: + axis = 0 + else: + axis = min([num for num, _ in enumerate(self.shape) if _ != 1]) + else: + axis = self._get_axis(axis) + obj = TriangleBase.from_triangle(self) + if axis == 0: + obj.data = ( + obj.data + .group_by(['__origin__', '__development__']) + .agg(getattr(pl.col(self.columns).fill_null(0), agg)(*args, **kwargs)) + .with_columns(*[pl.lit('(All)').alias(c) for c in self.key_labels]) + ) + obj.properties.pop('index', None) + obj.properties.pop('key_labels', None) + elif axis == 1: + obj.data = self.data.select( + pl.col(self.key_labels + ['__origin__', '__development__']), + pl.sum_horizontal(self.columns).alias('0')) + obj.columns = ['0'] + elif axis == 2: + obj.data = ( + self.data + .group_by(self.key_labels + ['__development__']) + .agg(getattr(pl.col(self.columns).fill_null(0), agg)(*args, **kwargs)) + .with_columns(pl.lit(self.origin.min()).alias('__origin__'))) + obj.properties.pop('date_matrix', None) + obj.properties.pop('origin', None) + obj.properties.pop('origin_grain', None) + elif axis == 3: + obj.data = ( + self.data + .group_by(self.key_labels + ['__origin__']) + .agg(getattr(pl.col(self.columns).fill_null(0), agg)(*args, **kwargs)) + .with_columns(pl.lit(self.valuation_date).alias('__development__'))) + obj.properties['is_val_tri'] = True + obj.properties.pop('date_matrix', None) + obj.properties.pop('development', None) + obj.properties.pop('development_grain', None) + obj.properties.pop('valuation', None) + else: + raise ValueError(f'axis {axis} is not supported') + return obj + + def sum(self, axis=None): + return self._agg('sum', axis) + + def mean(self, axis=None): + return self._agg('mean', axis) + + def min(self, axis=None): + return self._agg('min', axis) + + def max(self, axis=None): + return self._agg('max', axis) + + def median(self, axis=None): + return self._agg('median', axis) + + def std(self, axis=None): + return self._agg('std', axis) + + def var(self, axis=None): + return self._agg('var', axis) + + def product(self, axis=None): + return self._agg('product', axis) + + def quantile(self, axis=None, q=0.5): + return self._agg('quantile', axis, quantile=q) + + def _get_axis(self, axis): + ax = { + **{0: 0, 1: 1, 2: 2, 3: 3}, + **{-1: 3, -2: 2, -3: 1, -4: 0}, + **{"index": 0, "columns": 1, "origin": 2, "development": 3}, + } + return ax.get(axis, 0) + + def group_by(self, by, axis=0, *args, **kwargs): + """Group Triangle by index values. If the triangle is convertable to a + DataFrame, then it defaults to pandas groupby functionality. + + Parameters + ---------- + by: str or list + The index to group by + + Returns + ------- + GroupBy object (pandas or Triangle) + """ + return PlTriangleGroupBy(self, by, axis) + + def incr_to_cum(self, inplace=False): + """Method to convert an incremental triangle into a cumulative triangle. + + Parameters + ---------- + inplace: bool + Set to True will update the instance data attribute inplace + + Returns + ------- + Updated instance of triangle accumulated along the origin + """ + + if self.is_cumulative: + return self + triangle = TriangleBase.from_triangle(self) + if self.is_val_tri: + col = pl.col('__development__') + else: + col = vcol + expanded = ( + self.data.lazy() + .select(pl.col(self.key_labels + ['__origin__']), col) + .group_by(self.key_labels + ['__origin__']) + .agg(pl.col('__development__').min()) + .join(self.valuation.to_frame().lazy(), how='cross') + .filter(pl.col('__development__')<=pl.col('valuation')) + .drop('__development__').rename({'valuation':'__development__'})) + triangle.data = ( + self.data.lazy() + .select(pl.all().exclude('__development__'), col) + .join(expanded, how='outer', + left_on=self.key_labels + ['__origin__', '__development__'], + right_on=self.key_labels + ['__origin__', '__development__']) + .sort(by=self.key_labels + ['__origin__', '__development__']) + .group_by(self.key_labels + ['__origin__']) + .agg( + pl.col('__development__'), + pl.col(self.columns).fill_null(pl.lit(0)).cumsum()) + .explode(["__development__"] + self.columns)) + if not self.is_lazy: + triangle.data = triangle.data.collect() + triangle.is_cumulative = True + triangle.properties['is_val_tri'] = True + triangle.properties.pop('date_matrix', None) + if self.is_val_tri: + triangle.properties.pop('valuation', None) + return triangle + else: + triangle.properties.pop('development', None) + return triangle.val_to_dev() + + def cum_to_incr(self, filter_zeros=False): + """Method to convert an cumlative triangle into a incremental triangle. + + Parameters + ---------- + inplace: bool + Set to True will update the instance data attribute inplace + + Returns + ------- + Updated instance of triangle accumulated along the origin + """ + if not self.is_cumulative: + return self + else: + triangle = TriangleBase.from_triangle(self) + triangle.data = ( + self.data.lazy() + .sort(self.key_labels + ['__origin__', '__development__']) + .group_by(self.key_labels + ['__origin__']) + .agg( + pl.col('__development__'), + pl.col(self.columns).diff().fill_null(pl.col(self.columns))) + .explode(["__development__"] + self.columns) + .filter(pl.any_horizontal(pl.col(self.columns) != 0) if filter_zeros else True) + ) + triangle.is_cumulative = False + triangle.properties.pop('date_matrix', None) + if self.is_val_tri: + triangle.properties.pop('valuation', None) + else: + triangle.properties.pop('development', None) + if not self.is_lazy: + triangle.data = triangle.data.collect() + return triangle + + @property + def link_ratio(self): + triangle = TriangleBase.from_triangle(self.incr_to_cum().val_to_dev()) + interval = {'Y': 12, '2Q': 6, 'Q': 3, 'M': 1}[self.development_grain] + triangle.data = ( + triangle.data.lazy() + .sort(['__origin__', '__development__']) + .group_by(self.key_labels + ['__origin__']) + .agg( + (pl.col('__development__') - + pl.lit(interval)).cast(pl.UInt16).alias('__development__'), + (pl.when(pl.col(self.columns).pct_change().is_infinite()) + .then(pl.lit(None)) + .otherwise(pl.col(self.columns).pct_change()) + pl.lit(1.0) + ).keep_name()) + .explode(["__development__"] + self.columns) + .filter(~pl.any_horizontal(pl.col(self.columns).is_null()))) + if not self.is_lazy: + triangle.data = triangle.data.collect() + triangle.is_pattern = True + triangle.is_cumulative = False + triangle.properties.pop('date_matrix', None) + if self.is_val_tri: + triangle.properties.pop('valuation', None) + else: + triangle.properties.pop('development', None) + return triangle + + + def grain(self, grain="", trailing=False, inplace=False): + """Changes the grain of a cumulative triangle. + + Parameters + ---------- + grain : str + The grain to which you want your triangle converted, specified as + 'OXDY' where X and Y can take on values of ``['Y', 'S', 'Q', 'M' + ]`` For example, 'OYDY' for Origin Year/Development Year, 'OQDM' + for Origin quarter/Development Month, etc. + trailing : bool + For partial origin years/quarters, trailing will set the year/quarter + end to that of the latest available from the origin data. + inplace : bool + Whether to mutate the existing Triangle instance or return a new + one. + + Returns + ------- + Triangle + """ + ograin_new = grain[1:2] + dgrain_new = grain[-1] + ograin_new = "S" if ograin_new == "H" else ograin_new + latest_month = self.valuation_date.month + grain_dict = {'Y': 12, 'S': 6, 'Q': 3, 'M': 1} + if trailing: + offset = grain_dict[ograin_new] + offset = str(-((offset - latest_month % offset) % offset)) + 'mo' + origin_close = pl.date(2006, 1, 1).dt.strftime('%b').str.to_uppercase() + origin_close = ( + pl.DataFrame().select( + pl.date(2000, latest_month, 1).dt.strftime('%b').str.to_uppercase() + )[0, 0]) + else: + origin_close = self.origin_close + offset = '0mo' + if self.is_val_tri: + triangle = TriangleBase.from_triangle(self) + else: + triangle = self.dev_to_val() + origin_map = ( + self.origin + .to_frame().lazy() + .group_by_dynamic( + index_column=pl.col('origin'), + every=str(grain_dict[ograin_new]) + 'mo', + offset=offset) + .agg(pl.col('origin').alias('__origin__')) + .explode(pl.col('__origin__')) + .select( + pl.col('origin'), + pl.col('__origin__'))) + data = ( + triangle.data.lazy() + .join(origin_map, how='inner', + left_on='__origin__', + right_on='__origin__') + .drop('__origin__') + .rename({'origin': '__origin__'})) + self.origin_close = origin_close + development_map = ( + self.valuation.dt.month_start().sort() + .to_frame().lazy() + .group_by_dynamic( + index_column=pl.col('valuation'), + every=str(grain_dict[dgrain_new]) + 'mo', + offset=offset) + .agg(pl.col('valuation').alias('__development__')) + .explode(pl.col('__development__')) + .select( + pl.col('valuation').dt.offset_by(str(grain_dict[dgrain_new])+'mo').dt.offset_by('-1d'), + pl.col('__development__').dt.month_end())) + if self.is_cumulative: + development_map = ( + development_map + .group_by('valuation') + .agg(pl.col('__development__').max())) + data = ( + data + .join(development_map, how='inner', + left_on='__development__', + right_on='__development__') + .drop('__development__') + .rename({'valuation': '__development__'}) + .group_by(self.key_labels + ['__origin__', '__development__']).sum()) + triangle.data = data + if not self.is_lazy: + triangle.data = triangle.data.collect() + triangle.properties.pop('date_matrix', None) + if self.origin_grain != ograin_new: + triangle.properties.pop('origin', None) + triangle.properties.pop('origin_grain', None) + if self.development_grain != dgrain_new: + triangle.properties.pop('development', None) + triangle.properties.pop('valuation', None) + triangle.properties.pop('development_grain', None) + if self.is_val_tri: + return triangle + else: + return triangle.val_to_dev() + + def wide(self): + if self.shape[:2] == (1, 1): + return ( + self.data + .with_columns( + (pl.col('__development__').dt.strftime('%Y-%m') + if self.is_val_tri else + pl.col('__development__')).alias('development'), + pl.col('__origin__').alias('origin'), + pl.col(self.columns)) + .sort('development').lazy().collect(streaming=True) + .pivot( + index='origin', + columns='development', + values=self.columns, + aggregate_function='first') + .sort('origin')) + else: + raise ValueError(f'Wide format expects shape of (1, 1), but got {self.shape[:2]}') + + def _get_idx(self, idx): + def _normalize_index(key): + key = normalize_index(key, self.shape) + l = [] + for n, i in enumerate(key): + if type(i) is slice: + start = i.start if i.start > 0 else None + stop = i.stop if i.stop > -1 else None + stop = None if stop == self.shape[n] else stop + step = None if start is None and stop is None else i.step + l.append(slice(start, stop, step)) + else: + l.append(i) + key = tuple(l) + return key + + def _contig_slice(arr): + """ Try to make a contiguous slicer from an array of indices """ + if type(arr) is slice: + return arr + if type(arr) in [int]: + arr = [arr] + if len(arr) == 1: + return slice(arr[0], arr[0] + 1) + if len(arr) == 0: + raise ValueError("Slice returns empty Triangle") + diff = pl.Series(arr).diff() + if max(diff) == min(diff): + step = max(diff) + else: + return arr + step = None if step == 1 else step + min_arr = None if min(arr) == 0 else min(arr) + max_arr = max(arr) + 1 + if step and step < 0: + min_arr, max_arr = max_arr - 1, min_arr - 1 if min_arr else min_arr + return slice(min_arr, max_arr, step) + + idx = _normalize_index(idx) + return (_contig_slice(idx[0]), _contig_slice(idx[1]), + _contig_slice(idx[2]), _contig_slice(idx[3])) + + def __getitem__(self, key): + """ Only returns polars expressions. """ + if type(key) is str: + key = [key] + if type(key) is tuple or type(key) is slice or type(key) is int: + s0, s1, s2, s3 = self._get_idx(key) + return ( + [pl.col(c).is_in(self.index[c]) for c in self.key_labels[s0]], + self.columns[s1], + [pl.col('__origin__').is_in(self.origin[s2])], + [pl.col('__development__').is_in(self.valuation[s3] if self.is_val_tri else self.development[s3])]) + elif type(key) is list: + return self.select(key) + elif type(key) is pl.Series: + triangle = TriangleBase.from_triangle(self) + triangle.properties.pop('date_matrix', None) + if key.name == 'valuation': + key = self.valuation.filter(key) + triangle.properties.pop('development', None) + triangle.properties['valuation'] = key + return triangle.filter(pl.col('__development__').is_in(key) if self.is_val_tri else vcol.is_in(key)) + elif key.name == 'development': + triangle.properties.pop('valuation', None) + key = self.development.filter(key) + triangle.properties['development'] = key + return triangle.filter(dcol.is_in(key) if self.is_val_tri else pl.col('__development__').is_in(key)) + elif key.name == 'origin': + key = self.origin.filter(key) + triangle.properties['origin'] = key + return triangle.filter(pl.col('__origin__').is_in(key)) + else: + raise NotImplementedError() + + def __setitem__(self, key, value): + """ Function for pandas style column setting """ + if type(value) is pl.Expr: + self.data = self.data.select(pl.all().exclude(key), value.alias(key)) + elif type(value) != type(self): + value = self._triangle_literal(value) + value.data = value.data.rename({'__value__': key}) + value.columns = [key] + self.data = ( + self.data.select(pl.all().exclude(key)) + .join(value.data.select([key, '__origin__', '__development__']), + how='left', on=['__origin__', '__development__'])) + else: + if len(set(self.key_labels) - set(value.key_labels)) > 0: + raise ValueError( + f"""Unable to assign triangle with unknown + key_labels {set(self.key_labels) - set(value.key_labels)}.""") + if len(value.columns) > 1: + raise ValueError( + f"""Unable to assign triangle with multiple column values. + Choose one of {value.columns}.""") + value = TriangleBase.from_triangle(value) + index_intersection = list(set(self.key_labels).intersection(set(value.key_labels))) + if len(value.key_labels) == 1: + index_intersection = [] + value.data = value.data.rename({value.columns[0]: key}) + value.columns = [key] + self.data = ( + self.data.lazy().select(pl.all().exclude(key)) + .join( + value.data.lazy().select( + index_intersection + value.columns + ['__origin__', '__development__']), + how='left', on=index_intersection + ['__origin__', '__development__']) + .rename({value.columns[0]: key}) + ) + self.columns = self.columns + [key] + if not self.is_lazy: + self.data = self.data.lazy().collect() + + @staticmethod + def _broadcast_axes(a, b): + def broadcast_index(a, b): + a = TriangleBase.from_triangle(a) + a.data = (b.index.lazy().join( + a.data.lazy().select( + pl.col(a.columns + ['__origin__', '__development__'])), + how='cross')) + return a, b + + def broadcast_columns(a, b): + a = TriangleBase.from_triangle(a) + a.data = (a.data.select( + pl.col(a.key_labels + ['__origin__', '__development__']), + *[pl.col(a.columns[0]).alias(col) for col in b.columns])) + a.columns = b.columns + return a, b + + def broadcast_origin(a, b): + a = TriangleBase.from_triangle(a) + a.data = a.data.drop('__origin__').join( + b.origin.alias('__origin__').to_frame().lazy(), + how='cross') + return a, b + + def broadcast_development(a, b): + a = TriangleBase.from_triangle(a) + a.data = a.data.drop('__development__').join( + (b.valuation if b.is_val_tri else b.development + ).alias('__development__').to_frame().lazy(), + how='cross') + return a, b + a.data = a.data.lazy() + b.data = b.data.lazy() + if a.shape[0] == 1 and b.shape[0] > 1: + a, b = broadcast_index(a, b) + if a.shape[0] > 1 and b.shape[0] == 1: + b, a = broadcast_index(b, a) + if a.shape[1] == 1 and b.shape[1] > 1: + a, b = broadcast_columns(a, b) + if a.shape[1] > 1 and b.shape[1] == 1: + b, a = broadcast_columns(b, a) + if a.shape[2] == 1 and b.shape[2] > 1: + a, b = broadcast_origin(a, b) + if a.shape[2] > 1 and b.shape[2] == 1: + b, a = broadcast_origin(b, a) + if a.shape[3] == 1 and b.shape[3] > 1: + a, b = broadcast_development(a, b) + if a.shape[3] > 1 and b.shape[3] == 1: + b, a = broadcast_development(b, a) + if not a.is_lazy: + a.data = a.data.lazy().collect() + if not b.is_lazy: + b.data = b.data.lazy().collect() + return a, b + + def head(self, n: 'int' = 5): + triangle = TriangleBase.from_triangle(self) + triangle.data = triangle.data.join( + self.index.head(n), + how='semi', + on=self.key_labels) + triangle.properties.pop('index', None) + return triangle + + def tail(self, n: 'int' = 5): + triangle = TriangleBase.from_triangle(self) + triangle.data = triangle.data.join( + self.index.tail(n), + how='semi', + on=self.key_labels) + triangle.properties.pop('index', None) + return triangle + + def filter(self, key, *args, **kwargs): + triangle = TriangleBase.from_triangle(self) + triangle.data = triangle.data.filter(key, *args, **kwargs) + return triangle + + def select(self, key, *args, **kwargs): + triangle = TriangleBase.from_triangle(self) + if type(key) is str: + key = [key] + if len(set(key).intersection(self.key_labels)) ==len(key): + triangle.data = triangle.data.select(pl.col(key + ['__origin__', '__development__'] + self.columns, *args, **kwargs)) + triangle.key_labels = key + elif len(set(key).intersection(self.columns)) ==len(key): + triangle.data = triangle.data.select(pl.col(self.key_labels + ['__origin__', '__development__'] + key, *args, **kwargs)) + triangle.columns = key + else: + raise NotImplementedError() + return triangle + + def join(self, other, on, how, *args, **kwargs): + triangle = TriangleBase.from_triangle(self) + triangle.data = triangle.data.join(other, on, how, *args, **kwargs) + return triangle + + def _compatibility_check(self, other): + if (self.is_val_tri == other.is_val_tri) or (self.shape[3] == 1 or other.shape[3] == 1): + join_index = list(set(self.key_labels).intersection(set(other.key_labels))) + union_index = self.key_labels + [k for k in other.key_labels if k not in self.key_labels] + destination_columns = self.columns + if len(set(self.columns) - set(other.columns)) == 0: + source_columns = list(zip(self.columns, [c + '_right' for c in self.columns])) + else: + source_columns = list(zip(self.columns, [c + '_right' for c in other.columns])) + else: + raise ValueError( + """Triangle arithmetic requires triangles to be broadcastable + or on the same lag basis (development or valuation).""" + ) + return join_index, union_index, source_columns, destination_columns + + def __arithmetic__(self, other, operation): + if type(other) != type(self): + other = self._triangle_literal(other) + valuation = max(self.valuation_date, other.valuation_date) + a, b = TriangleBase._broadcast_axes(self, other) + join_index, union_index, source_columns, destination_columns = \ + a._compatibility_check(b) + a = TriangleBase.from_triangle(a) + if (not (a.is_lazy and b.is_lazy) and len(a.data) == len(b.data) and + (a.data.select(a.key_labels + ['__origin__', '__development__']) == + b.data.select(b.key_labels + ['__origin__', '__development__'])).min().min(axis=1)[0]): + a.data = ( + pl.concat( + (a.data.lazy().collect(), + b.data.lazy().collect() + .rename({k: source_columns[num][1] for num, k in enumerate(b.columns)}) + .drop(b.key_labels + ['__origin__', '__development__'])), how='horizontal') + .lazy() + .select( + pl.col(union_index + ['__origin__', '__development__']), + *[(getattr(pl.col(source_columns[num][0]).fill_null(0), operation)( + pl.col(source_columns[num][1]).fill_null(0))).alias(col) + for num, col in enumerate(destination_columns)])) + else: + a.data = ( + a.data.lazy() + .join( + b.data.lazy() + .rename({k: source_columns[num][1] for num, k in enumerate(b.columns)}), + how='outer', + on=join_index + ['__origin__', '__development__']) + .with_columns( + pl.col('__development__').alias('__valuation__') + if a.is_val_tri else vcol.alias('__valuation__')) + .filter(pl.col('__valuation__') <= valuation) + .select( + pl.col(union_index + ['__origin__', '__development__']), + *[(getattr(pl.col(source_columns[num][0]).fill_null(0), operation)( + pl.col(source_columns[num][1]).fill_null(0))).alias(col) + for num, col in enumerate(destination_columns)]) + ) + if not self.is_lazy: + a.data = a.data.collect() + a.properties = {} + return a + + def _triangle_literal(self, value): + """ Purpose is to densly populate all origin/development entries whether they + exist in triangle data or not.""" + other = TriangleBase.from_triangle(self) + other.data = ( + self.origin.alias('__origin__').to_frame().lazy() + .join( + (self.valuation if self.is_val_tri + else self.development).alias('__development__').to_frame().lazy(), + how='cross') + .filter((pl.col('__development__') if self.is_val_tri + else vcol) <= self.valuation_date) + ).with_columns(pl.lit("Total").alias('Total'), + pl.lit(value).alias('__value__')) + other.columns = ['__value__'] + if not self.is_lazy: + other.data = other.data.collect() + return other + + def _triangle_unary(self, unary, *args): + triangle = TriangleBase.from_triangle(self) + triangle.data = ( + triangle.data.select( + pl.col(self.key_labels + ['__origin__', '__development__']), + *[getattr(pl.col(c), unary)(*args).alias(c) for c in self.columns]) + ) + return triangle + + def __add__(self, other): + return self.__arithmetic__(other, '__add__') + + def __radd__(self, other): + return self.__add__(other) + + def __sub__(self, other): + return self.__arithmetic__(other, '__sub__') + + def __rsub__(self, other): + return -self + other + + def __mul__(self, other): + return self.__arithmetic__(other, '__mul__') + + def __rmul__(self, other): + return self.__mul__(other) + + def __truediv__(self, other): + return self.__arithmetic__(other, '__truediv__') + + def __rtruediv__(self, other): + return (self ** -1) * other + + def __neg__(self): + return self._triangle_unary('__neg__') + + def __abs__(self): + return self._triangle_unary('__abs__') + + def __pow__(self, n): + return self._triangle_unary('__pow__', n) + + def __pos__(self): + return self._triangle_unary('__pos__') + + def __round__(self, n): + return self._triangle_unary('round', n) + + def __len__(self): + return self.shape[0] + + + + def __contains__(self, value): + raise NotImplementedError() + + def __lt__(self, value): + raise NotImplementedError() + + def __le__(self, value): + raise NotImplementedError() + + def copy(self): + return TriangleBase.from_triangle(self) + + def to_frame(self, keepdims=False, implicit_axis=False, *args, **kwargs): + """ Converts a triangle to a pandas.DataFrame. + Parameters + ---------- + keepdims : bool + If True, the triangle will be converted to a DataFrame with all + dimensions intact. The argument will force a consistent DataFrame + format regardless of whether any dimensions are of length 1. + implicit_axis : bool + When keepdims is True, this denotes whether to include the implicit + valuation axis in addition to the origin and development. + Returns + ------- + pandas.DataFrame representation of the Triangle. + """ + if self.shape[:2] == (1, 1) and not keepdims: + return self.wide() + if implicit_axis: + if self.is_val_tri: + return self.data.sort( + pl.col(self.key_labels + ['__origin__', '__development__'])).select( + pl.col(self.key_labels), + pl.col('__origin__').alias('origin'), + pl.col('__development__').alias('valuation'), + dcol.alias('development'), + pl.col(self.columns)) + else: + return self.data.sort( + pl.col(self.key_labels + ['__origin__', '__development__'])).select( + pl.col(self.key_labels), + pl.col('__origin__').alias('origin'), + pl.col('__development__').alias('development'), + vcol.alias('valuation'), + pl.col(self.columns)) + else: + return self.data.sort( + pl.col(self.key_labels + ['__origin__', '__development__'])).select( + pl.col(self.key_labels), + pl.col('__origin__').alias('origin'), + pl.col('__development__').alias('valuation' if self.is_val_tri else 'development'), + pl.col(self.columns)) + + def sort(self): + self.data = self.data.sort(self.key_labels + ['__origin__', '__development__']) + return self + + +class PlTriangleGroupBy: + def __init__(self, obj, by, axis=0, **kwargs): + self.obj = TriangleBase.from_triangle(obj) + self.axis = self.obj._get_axis(axis) + self.by = [by] if type(by) is str else by + if self.axis == 0: + self.groups = obj.data.group_by( + self.by + ['__origin__', '__development__']) + else: + raise NotImplementedError() + self.columns = self.obj.columns + + def __getitem__(self, key): + self.columns = [key] if type(key) is str else key + return self + + def _agg(self, agg, axis=1, *args, **kwargs): + axis = self.obj._get_axis(axis) + if axis == 0: + self.obj.data = self.groups.agg( + getattr(pl.col(self.columns), agg)(*args, **kwargs)) + self.obj.properties.pop('index', None) + self.obj.properties.pop('key_labels', None) + else: + raise ValueError(f'axis {axis} is not supported') + self.obj.columns = self.columns + return self.obj + + def sum(self, axis=0): + return self._agg('sum', axis) + + def mean(self, axis=0): + return self._agg('mean', axis) + + def min(self, axis=0): + return self._agg('min', axis) + + def max(self, axis=0): + return self._agg('max', axis) + + def median(self, axis=0): + return self._agg('median', axis) + + def std(self, axis=0): + return self._agg('std', axis) + + def var(self, axis=0): + return self._agg('var', axis) + + def product(self, axis=0): + return self._agg('product', axis) + + def quantile(self, axis=0, quantile=0.5): + return self._agg('quantile', axis, quantile=quantile) + diff --git a/chainladder/core/legacy.py b/chainladder/core/legacy.py new file mode 100644 index 00000000..12b0afca --- /dev/null +++ b/chainladder/core/legacy.py @@ -0,0 +1,944 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +import pandas as pd +import numpy as np +import copy +import warnings +from chainladder.core.base import TriangleBase +from chainladder.utils.sparse import sp +from chainladder.core.slice import VirtualColumns +from chainladder.core.correlation import DevelopmentCorrelation, ValuationCorrelation +from chainladder.utils.utility_functions import concat, num_to_nan, num_to_value +from chainladder import options + +try: + import dask.bag as db +except: + db = None + + +class Triangle(TriangleBase): + """ + The core data structure of the chainladder package + + Parameters + ---------- + data: DataFrame + A single dataframe that contains columns represeting all other + arguments to the Triangle constructor + origin: str or list + A representation of the accident, reporting or more generally the + origin period of the triangle that will map to the Origin dimension + development: str or list + A representation of the development/valuation periods of the triangle + that will map to the Development dimension + columns: str or list + A representation of the numeric data of the triangle that will map to + the columns dimension. If None, then a single 'Total' key will be + generated. + index: str or list or None + A representation of the index of the triangle that will map to the + index dimension. If None, then a single 'Total' key will be generated. + origin_format: optional str + A string representation of the date format of the origin arg. If + omitted then date format will be inferred by pandas. + development_format: optional str + A string representation of the date format of the development arg. If + omitted then date format will be inferred by pandas. + cumulative: bool + Whether the triangle is cumulative or incremental. This attribute is + required to use the ``grain`` and ``dev_to_val`` methods and will be + automatically set when invoking ``cum_to_incr`` or ``incr_to_cum`` methods. + trailing: bool + When partial origin periods are present, setting trailing to True will + ensure the most recent origin period is a full period and the oldest + origin is partial. If full origin periods are present in the data, then + trailing has no effect. + + Attributes + ---------- + index: Series + Represents all available levels of the index dimension. + columns: Series + Represents all available levels of the value dimension. + origin: DatetimeIndex + Represents all available levels of the origin dimension. + development: Series + Represents all available levels of the development dimension. + key_labels: list + Represents the ``index`` axis labels + virtual_columns: Series + Represents the subset of columns of the triangle that are virtual. + valuation: DatetimeIndex + Represents all valuation dates of each cell in the Triangle. + origin_grain: str + The grain of the origin vector ('Y', 'S', 'Q', 'M') + development_grain: str + The grain of the development vector ('Y', 'S', 'Q', 'M') + shape: tuple + The 4D shape of the triangle instance with axes corresponding to (index, columns, origin, development) + link_ratio, age_to_age + Displays age-to-age ratios for the triangle. + valuation_date : date + The latest valuation date of the data + loc: Triangle + pandas-style ``loc`` accessor + iloc: Triangle + pandas-style ``iloc`` accessor + latest_diagonal: Triangle + The latest diagonal of the triangle + is_cumulative: bool + Whether the triangle is cumulative or not + is_ultimate: bool + Whether the triangle has an ultimate valuation + is_full: bool + Whether lower half of Triangle has been filled in + is_val_tri: + Whether the triangle development period is expressed as valuation + periods. + values: array + 4D numpy array underlying the Triangle instance + T: Triangle + Transpose index and columns of object. Only available when Triangle is + convertible to DataFrame. + """ + + def __init__( + self, + data=None, + origin=None, + development=None, + columns=None, + index=None, + origin_format=None, + development_format=None, + cumulative=None, + array_backend=None, + pattern=False, + trailing=True, + *args, + **kwargs + ): + if data is None: + return + + index, columns, origin, development = self._input_validation( + data, index, columns, origin, development + ) + + # Handle any ultimate vectors in triangles separately + data, ult = self._split_ult(data, index, columns, origin, development) + # Conform origins and developments to datetimes and determine lowest grains + origin_date = self._to_datetime(data, origin, format=origin_format).rename( + "__origin__" + ) + self.origin_grain = self._get_grain( + origin_date, trailing=trailing, kind="origin" + ) + + development_date = self._set_development( + data, development, development_format, origin_date + ) + + self.development_grain = self._get_grain( + development_date, trailing=trailing, kind="development" + ) + + origin_date = origin_date.dt.to_period(self.origin_grain).dt.to_timestamp( + how="s" + ) + + development_date = development_date.dt.to_period( + self.development_grain + ).dt.to_timestamp(how="e") + + # Aggregate dates to the origin/development grains + data_agg = self._aggregate_data( + data, origin_date, development_date, index, columns + ) + + # Fill in missing periods with zeros + date_axes = self._get_date_axes( + data_agg["__origin__"], + data_agg["__development__"], + self.origin_grain, + self.development_grain, + ) + + # Deal with labels + if not index: + index = ["Total"] + data_agg[index[0]] = "Total" + + self.kdims, key_idx = self._set_kdims(data_agg, index) + self.vdims = np.array(columns) + self.odims, orig_idx = self._set_odims(data_agg, date_axes) + self.ddims, dev_idx = self._set_ddims(data_agg, date_axes) + + # Set remaining triangle properties + val_date = data_agg["__development__"].max() + val_date = val_date.compute() if hasattr(val_date, "compute") else val_date + self.key_labels = index + self.valuation_date = val_date + + if cumulative is None: + warnings.warn( + """ + The cumulative property of your triangle is not set. This may result in + undesirable behavior. In a future release this will result in an error.""" + ) + + self.is_cumulative = cumulative + self.virtual_columns = VirtualColumns(self) + self.is_pattern = pattern + + split = self.origin_grain.split("-") + self.origin_grain = {"A": "Y", "2Q": "S"}.get(split[0], split[0]) + + if len(split) == 1: + self.origin_close = "DEC" + else: + self.origin_close = split[1] + + split = self.development_grain.split("-") + self.development_grain = {"A": "Y", "2Q": "S"}.get(split[0], split[0]) + grain_sort = ["Y", "S", "Q", "M"] + self.development_grain = grain_sort[ + max( + grain_sort.index(self.origin_grain), + grain_sort.index(self.development_grain), + ) + ] + + # Coerce malformed triangles to something more predictible + check_origin = ( + pd.period_range( + start=self.odims.min(), + end=self.valuation_date, + freq=self.origin_grain.replace("S", "2Q"), + ) + .to_timestamp() + .values + ) + + if ( + len(check_origin) != len(self.odims) + and pd.to_datetime(options.ULT_VAL) != self.valuation_date + and not self.is_pattern + ): + self.odims = check_origin + + # Set the Triangle values + coords, amts = self._set_values(data_agg, key_idx, columns, orig_idx, dev_idx) + + self.values = num_to_nan( + sp( + coords, + amts, + prune=True, + has_duplicates=False, + sorted=True, + shape=( + len(self.kdims), + len(self.vdims), + len(self.odims), + len(self.ddims), + ), + ) + ) + # Deal with array backend + self.array_backend = "sparse" + if array_backend is None: + array_backend = options.ARRAY_BACKEND + if not options.AUTO_SPARSE or array_backend == "cupy": + self.set_backend(array_backend, inplace=True) + else: + self = self._auto_sparse() + self._set_slicers() + # Deal with special properties + if self.is_pattern: + obj = self.dropna() + self.odims = obj.odims + self.ddims = obj.ddims + self.values = obj.values + if ult: + obj = concat((self.dev_to_val().iloc[..., : len(ult.odims), :], ult), -1) + obj = obj.val_to_dev() + self.odims = obj.odims + self.ddims = obj.ddims + self.values = obj.values + self.valuation_date = pd.Timestamp(options.ULT_VAL) + + @staticmethod + def _split_ult(data, index, columns, origin, development): + """Deal with triangles with ultimate values""" + ult = None + if ( + development + and len(development) == 1 + and data[development[0]].dtype == " 0 and len(u) != len(data): + ult = Triangle( + u, + origin=origin, + development=development, + columns=columns, + index=index, + ) + ult.ddims = pd.DatetimeIndex([options.ULT_VAL]) + data = data[data[development[0]] != options.ULT_VAL] + return data, ult + + @property + def index(self): + return pd.DataFrame(list(self.kdims), columns=self.key_labels) + + @index.setter + def index(self, value): + self._len_check(self.index, value) + if type(value) is pd.DataFrame: + self.kdims = value.values + self.key_labels = list(value.columns) + self._set_slicers() + else: + raise TypeError("index must be a pandas DataFrame") + + @property + def columns(self): + return pd.Index(self.vdims, name="columns") + + @columns.setter + def columns(self, value): + self._len_check(self.columns, value) + self.vdims = [value] if type(value) is str else value + if type(self.vdims) is list: + self.vdims = np.array(self.vdims) + self._set_slicers() + + @property + def origin(self): + if self.is_pattern and len(self.odims) == 1: + return pd.Series(["(All)"]) + else: + freq = {"Y": "A", "S": "2Q", "H": "2Q"}.get( + self.origin_grain, self.origin_grain + ) + freq = freq if freq == "M" else freq + "-" + self.origin_close + return pd.DatetimeIndex(self.odims, name="origin").to_period(freq=freq) + + @origin.setter + def origin(self, value): + self._len_check(self.origin, value) + freq = {"Y": "A", "S": "2Q"}.get(self.origin_grain, self.origin_grain) + freq = freq if freq == "M" else freq + "-" + self.origin_close + value = pd.PeriodIndex(list(value), freq=freq) + self.odims = value.to_timestamp().values + + @property + def development(self): + ddims = self.ddims.copy() + if self.is_val_tri: + formats = {"Y": "%Y", "S": "%YQ%q", "Q": "%YQ%q", "M": "%Y-%m"} + ddims = ddims.to_period(freq=self.development_grain).strftime( + formats[self.development_grain] + ) + elif self.is_pattern: + offset = self._dstep()["M"][self.development_grain] + if self.is_ultimate: + ddims[-1] = ddims[-2] + offset + if self.is_cumulative: + ddims = ["{}-Ult".format(ddims[i]) for i in range(len(ddims))] + else: + ddims = [ + "{}-{}".format(ddims[i], ddims[i] + offset) + for i in range(len(ddims)) + ] + return pd.Series(list(ddims), name="development") + + @development.setter + def development(self, value): + self._len_check(self.development, value) + self.ddims = np.array([value] if type(value) is str else value) + + def set_index(self, value, inplace=False): + """Sets the index of the Triangle""" + if inplace: + self.index = value + return self + else: + new_obj = self.copy() + return new_obj.set_index(value=value, inplace=True) + + @property + def is_val_tri(self): + return type(self.ddims) == pd.DatetimeIndex + + @property + def is_full(self): + return self.nan_triangle.sum().sum() == np.prod(self.shape[-2:]) + + @property + def is_ultimate(self): + return sum(self.valuation >= options.ULT_VAL[:4]) > 0 + + @property + def latest_diagonal(self): + return self[self.valuation == self.valuation_date].sum("development") + + @property + def link_ratio(self): + if not self.is_pattern: + obj = (1 / self.iloc[..., :-1]) * self.iloc[..., 1:].values + if not obj.is_full: + obj = obj[obj.valuation < obj.valuation_date] + if hasattr(obj, "w_"): + w_ = obj.w_[..., : len(obj.odims), :] + obj = obj * w_ if obj.shape == w_.shape else obj + obj.is_pattern = True + obj.is_cumulative = False + obj.values = num_to_nan(obj.values) + return obj + else: + return self + + @property + def age_to_age(self): + return self.link_ratio + + def incr_to_cum(self, inplace=False): + """Method to convert an incremental triangle into a cumulative triangle. + + Parameters + ---------- + inplace: bool + Set to True will update the instance data attribute inplace + + Returns + ------- + Updated instance of triangle accumulated along the origin + """ + if inplace: + xp = self.get_array_module() + if not self.is_cumulative: + if self.is_pattern: + if hasattr(self, "is_additive"): + if self.is_additive: + values = xp.nan_to_num(self.values[..., ::-1]) + values = num_to_value(values, 0) + self.values = ( + xp.cumsum(values, -1)[..., ::-1] * self.nan_triangle + ) + else: + values = xp.nan_to_num(self.values[..., ::-1]) + values = num_to_value(values, 1) + values = xp.cumprod(values, -1)[..., ::-1] + self.values = values * self.nan_triangle + values = num_to_value(values, self.get_array_module(values).nan) + else: + if self.array_backend not in ["sparse", "dask"]: + self.values = ( + xp.cumsum(xp.nan_to_num(self.values), 3) + * self.nan_triangle[None, None, ...] + ) + else: + values = xp.nan_to_num(self.values) + nan_triangle = xp.nan_to_num(self.nan_triangle) + l1 = lambda i: values[..., 0 : i + 1] + l2 = lambda i: l1(i) * nan_triangle[..., i : i + 1] + l3 = lambda i: l2(i).sum(3, keepdims=True) + if db: + bag = db.from_sequence(range(self.shape[-1])) + bag = bag.map(l3) + out = bag.compute(scheduler="threads") + else: + out = [l3(i) for i in range(self.shape[-1])] + self.values = xp.concatenate(out, axis=3) + self.values = num_to_nan(self.values) + self.is_cumulative = True + return self + else: + new_obj = self.copy() + return new_obj.incr_to_cum(inplace=True) + + def cum_to_incr(self, inplace=False): + """Method to convert an cumlative triangle into a incremental triangle. + + Parameters + ---------- + inplace: bool + Set to True will update the instance data attribute inplace + + Returns + ------- + Updated instance of triangle accumulated along the origin + """ + if inplace: + v = self.valuation_date + if self.is_cumulative or self.is_cumulative is None: + if self.is_pattern: + xp = self.get_array_module() + self.values = xp.nan_to_num(self.values) + values = num_to_value(self.values, 1) + diff = self.iloc[..., :-1] / self.iloc[..., 1:].values + self = concat( + ( + diff, + self.iloc[..., -1], + ), + axis=3, + ) + self.values = self.values * self.nan_triangle + else: + diff = self.iloc[..., 1:] - self.iloc[..., :-1].values + self = concat((self.iloc[..., 0], diff), axis=3) + self.is_cumulative = False + self.valuation_date = v + return self + else: + new_obj = self.copy() + return new_obj.cum_to_incr(inplace=True) + + def _dstep(self): + return { + "M": {"Y": 12, "S": 6, "Q": 3, "M": 1}, + "Q": {"Y": 4, "S": 2, "Q": 1}, + "S": {"Y": 2, "S": 1}, + "Y": {"Y": 1}, + } + + def _val_dev(self, sign, inplace=False): + backend = self.array_backend + obj = self.set_backend("sparse") + if not inplace: + obj.values = obj.values.copy() + scale = self._dstep()[obj.development_grain][obj.origin_grain] + offset = np.arange(obj.shape[-2]) * scale + min_slide = -offset.max() + if (obj.values.coords[-2] == np.arange(1)).all(): + # Unique edge case #239 + offset = offset[-1:] * sign + offset = offset[obj.values.coords[-2]] * sign # [0] + obj.values.coords[-1] = obj.values.coords[-1] + offset + ddims = obj.valuation[obj.valuation <= obj.valuation_date] + ddims = len(ddims.drop_duplicates()) + if ddims == 1 and sign == -1: + ddims = len(obj.odims) + if obj.values.density > 0 and obj.values.coords[-1].min() < 0: + obj.values.coords[-1] = obj.values.coords[-1] - min( + obj.values.coords[-1].min(), min_slide + ) + ddims = np.max([np.max(obj.values.coords[-1]) + 1, ddims]) + obj.values.shape = tuple(list(obj.shape[:-1]) + [ddims]) + if options.AUTO_SPARSE == False or backend == "cupy": + obj = obj.set_backend(backend) + else: + obj = obj._auto_sparse() + return obj + + def dev_to_val(self, inplace=False): + """Converts triangle from a development lag triangle to a valuation + triangle. + + Parameters + ---------- + inplace : bool + Whether to mutate the existing Triangle instance or return a new + one. + + Returns + ------- + Triangle + Updated instance of the triangle with valuation periods. + + """ + if self.is_val_tri: + if inplace: + return self + else: + return self.copy() + is_cumulative = self.is_cumulative + if self.is_full: + if is_cumulative: + obj = self.cum_to_incr(inplace=inplace) + else: + obj = self.copy() + if self.is_ultimate: + ultimate = obj.iloc[..., -1:] + obj = obj.iloc[..., :-1] + else: + obj = self + obj = obj._val_dev(1, inplace) + ddims = obj.valuation[obj.valuation <= obj.valuation_date] + obj.ddims = ddims.drop_duplicates().sort_values() + if self.is_full: + if self.is_ultimate: + ultimate.ddims = pd.DatetimeIndex(ultimate.valuation[0:1]) + obj = concat((obj, ultimate), -1) + if is_cumulative: + obj = obj.incr_to_cum(inplace=inplace) + return obj + + def val_to_dev(self, inplace=False): + """Converts triangle from a valuation triangle to a development lag + triangle. + + Parameters + ---------- + inplace : bool + Whether to mutate the existing Triangle instance or return a new + one. + + Returns + ------- + Updated instance of triangle with development lags + """ + if not self.is_val_tri: + if inplace: + return self + else: + return self.copy() + if self.is_ultimate and self.shape[-1] > 1: + ultimate = self.iloc[..., -1:] + ultimate.ddims = np.array([9999]) + obj = self.iloc[..., :-1]._val_dev(-1, inplace) + else: + obj = self.copy()._val_dev(-1, inplace) + val_0 = obj.valuation[0] + if self.ddims.shape[-1] == 1 and self.ddims[0] == self.valuation_date: + origin_0 = pd.to_datetime(obj.odims[-1]) + else: + origin_0 = pd.to_datetime(obj.odims[0]) + lag_0 = (val_0.year - origin_0.year) * 12 + val_0.month - origin_0.month + 1 + scale = self._dstep()["M"][obj.development_grain] + obj.ddims = np.arange(obj.values.shape[-1]) * scale + lag_0 + prune = obj[obj.origin == obj.origin.max()] + if self.is_ultimate and self.shape[-1] > 1: + obj = obj.iloc[..., : (prune.valuation <= prune.valuation_date).sum()] + obj = concat((obj, ultimate), -1) + return obj + + def grain(self, grain="", trailing=False, inplace=False): + """Changes the grain of a cumulative triangle. + + Parameters + ---------- + grain : str + The grain to which you want your triangle converted, specified as + 'OXDY' where X and Y can take on values of ``['Y', 'S', 'Q', 'M' + ]`` For example, 'OYDY' for Origin Year/Development Year, 'OQDM' + for Origin quarter/Development Month, etc. + trailing : bool + For partial origin years/quarters, trailing will set the year/quarter + end to that of the latest available from the origin data. + inplace : bool + Whether to mutate the existing Triangle instance or return a new + one. + + Returns + ------- + Triangle + """ + ograin_old, ograin_new = self.origin_grain, grain[1:2] + dgrain_old, dgrain_new = self.development_grain, grain[-1] + ograin_new = "S" if ograin_new == "H" else ograin_new + valid = { + "Y": ["Y"], + "Q": ["Q", "S", "Y"], + "M": ["Y", "S", "Q", "M"], + "S": ["S", "Y"], + } + if ograin_new not in valid.get(ograin_old, []) or dgrain_new not in valid.get( + dgrain_old, [] + ): + raise ValueError("New grain not compatible with existing grain") + if ( + self.is_cumulative is None + and dgrain_old != dgrain_new + and self.shape[-1] > 1 + ): + raise AttributeError( + "The is_cumulative attribute must be set before using grain method." + ) + if valid["M"].index(ograin_new) > valid["M"].index(dgrain_new): + raise ValueError("Origin grain must be coarser than development grain") + if self.is_full and not self.is_ultimate and not self.is_val_tri: + warnings.warn("Triangle includes extraneous development lags") + obj = self.dev_to_val() + if ograin_new != ograin_old: + freq = {"Y": "A", "S": "2Q"}.get(ograin_new, ograin_new) + if trailing or obj.origin.freqstr[-3:] != "DEC": + origin_period_end = self.origin[-1].strftime("%b").upper() + else: + origin_period_end = "DEC" + indices = ( + pd.Series(range(len(self.origin)), index=self.origin) + .resample("-".join([freq, origin_period_end])) + .indices + ) + groups = pd.concat( + [pd.Series([k] * len(v), index=v) for k, v in indices.items()], axis=0 + ).values + obj = obj.groupby(groups, axis=2).sum() + obj.origin_close = origin_period_end + d_start = pd.Period( + obj.valuation[0], + freq=dgrain_old if dgrain_old == 'M' else dgrain_old + obj.origin.freqstr[-4:] + ).to_timestamp(how='s') + if (len(obj.ddims) > 1 and obj.origin.to_timestamp(how='s')[0] != d_start): + addl_ts = ( + pd.period_range(obj.odims[0], obj.valuation[0], freq=dgrain_old)[:-1] + .to_timestamp() + .values + ) + addl = obj.iloc[..., -len(addl_ts) :] * 0 + addl.ddims = addl_ts + obj = concat((addl, obj), axis=-1) + obj.values = num_to_nan(obj.values) + if dgrain_old != dgrain_new and obj.shape[-1] > 1: + step = self._dstep()[dgrain_old][dgrain_new] + d = np.sort( + len(obj.development) - np.arange(0, len(obj.development), step) - 1 + ) + if obj.is_cumulative: + obj = obj.iloc[..., d] + else: + ddims = obj.ddims[d] + d2 = [d[0]] * (d[0] + 1) + list(np.repeat(np.array(d[1:]), step)) + obj = obj.groupby(d2, axis=3).sum() + obj.ddims = ddims + obj.development_grain = dgrain_new + obj = obj.dev_to_val() if self.is_val_tri else obj.val_to_dev() + if inplace: + self = obj + return self + return obj + + def trend( + self, + trend=0.0, + axis="origin", + start=None, + end=None, + ultimate_lag=None, + **kwargs + ): + """Allows for the trending of a Triangle object along either a valuation + or origin axis. This method trends using days and assumes a years is + 365.25 days long. + + Parameters + ---------- + trend : float + The annual amount of the trend. Use 1/(1+trend)-1 to detrend. + axis : str (options: ['origin', 'valuation']) + The axis on which to apply the trend + start: date + The start date from which trend should be calculated. If none is + provided then the latest date of the triangle is used. + end: date + The end date to which the trend should be calculated. If none is + provided then the earliest period of the triangle is used. + ultimate_lag : int + If ultimate valuations are in the triangle, optionally set the overall + age (in months) of the ultimate to be some lag from the latest non-Ultimate + development + + Returns + ------- + Triangle + updated with multiplicative trend applied. + """ + if axis not in ["origin", "valuation", 2, -2]: + raise ValueError( + "Only origin and valuation axes are supported for trending" + ) + xp = self.get_array_module() + start = pd.to_datetime(start) if type(start) is str else start + start = self.valuation_date if start is None else start + end = pd.to_datetime(end) if type(end) is str else end + end = self.origin[0].to_timestamp() if end is None else end + if axis in ["origin", 2, -2]: + vector = pd.DatetimeIndex( + np.tile( + self.origin.to_timestamp(how="e").values, self.shape[-1] + ).flatten() + ) + else: + vector = self.valuation + lower, upper = (end, start) if end > start else (start, end) + vector = pd.DatetimeIndex( + np.maximum( + np.minimum(np.datetime64(lower), vector.values), np.datetime64(upper) + ) + ) + vector = ( + (start.year - vector.year) * 12 + (start.month - vector.month) + ).values.reshape(self.shape[-2:], order="f") + if self.is_ultimate and ultimate_lag is not None and vector.shape[-1] > 1: + vector[:, -1] = vector[:, -2] + ultimate_lag + trend = ( + xp.array((1 + trend) ** (vector / 12))[None, None, ...] * self.nan_triangle + ) + obj = self.copy() + obj.values = obj.values * trend + return obj + + def broadcast_axis(self, axis, value): + warnings.warn( + """ + Broadcast axis is deprecated in favor of broadcasting + using Triangle arithmetic.""" + ) + return self + + def copy(self): + X = Triangle() + X.__dict__.update(vars(self)) + X._set_slicers() + X.values = X.values.copy() + return X + + def development_correlation(self, p_critical=0.5): + """ + Mack (1997) test for correlations between subsequent development + factors. Results should be within confidence interval range + otherwise too much correlation + + Parameters + ---------- + p_critical: float (default=0.10) + Value between 0 and 1 representing the confidence level for the test. A + value of 0.1 implies 90% confidence. + Returns + ------- + DevelopmentCorrelation object with t, t_critical, t_expectation, + t_variance, and range attributes. + """ + return DevelopmentCorrelation(self, p_critical) + + def valuation_correlation(self, p_critical=0.1, total=False): + """ + Mack test for calendar year effect + A calendar period has impact across developments if the probability of + the number of small (or large) development factors in that period + occurring randomly is less than p_critical + + Parameters + ---------- + p_critical: float (default=0.10) + Value between 0 and 1 representing the confidence level for the test + total: + Whether to calculate valuation correlation in total across all + years (True) consistent with Mack 1993 or for each year separately + (False) consistent with Mack 1997. + Returns + ------- + ValuationCorrelation object with z, z_critical, z_expectation and + z_variance attributes. + + """ + return ValuationCorrelation(self, p_critical, total) + + def shift(self, periods=-1, axis=3): + """Shift elements along an axis by desired number of periods. + + Data that falls beyond the existing shape of the Triangle is eliminated + and new cells default to zero. + + Parameters + ---------- + periods : int + Number of periods to shift. Can be positive or negative. + axis : {2 or 'origin', 3 or 'development', None}, default 3 + Shift direction. + + Returns + ------- + Triangle + updated with shifted elements + + """ + axis = self._get_axis(axis) + if axis < 2: + raise AttributeError( + "Lagging only supported for origin and development axes" + ) + if periods == 0: + return self + if periods > 0: + if axis == 3: + out = concat( + ( + self.iloc[..., 1:].rename("development", self.development[:-1]), + (self.iloc[..., -1:] * 0), + ), + axis=axis, + ) + else: + out = concat( + ( + self.iloc[..., 1:, :].rename("origin", self.origin[:-1]), + (self.iloc[..., -1:, :] * 0), + ), + axis=axis, + ) + else: + if axis == 3: + out = concat( + ( + (self.iloc[..., :1] * 0), + self.iloc[..., :-1].rename("development", self.development[1:]), + ), + axis=axis, + ) + else: + out = concat( + ( + (self.iloc[..., :1, :] * 0), + self.iloc[..., :-1, :].rename("origin", self.origin[1:]), + ), + axis=axis, + ) + if abs(periods) == 1: + return out + else: + return out.shift(periods - 1 if periods > 0 else periods + 1, axis) + + def sort_axis(self, axis): + """Method to sort a Triangle along a given axis + + Parameters + ---------- + axis : in or str + The axis for sorting + + Returns + ------- + Triangle + updated with shifted elements + """ + + axis = self._get_axis(axis) + if axis == 0: + return self.sort_index() + obj = self.copy() + if axis == 1: + sort = pd.Series(self.vdims).sort_values().index + if np.any(sort != pd.Series(self.vdims).index): + obj.values = obj.values[:, list(sort), ...] + obj.vdims = obj.vdims[list(sort)] + if axis == 2: + sort = pd.Series(self.odims).sort_values().index + if np.any(sort != pd.Series(self.odims).index): + obj.values = obj.values[..., list(sort), :] + obj.odims = obj.odims[list(sort)] + if axis == 3: + sort = self.development.sort_values().index + if np.any(sort != self.development.index): + obj.values = obj.values[..., list(sort)] + obj.ddims = obj.ddims[list(sort)] + return obj diff --git a/chainladder/core/tests/test_triangle.py b/chainladder/core/tests/test_triangle.py index 624c09c3..91539f94 100644 --- a/chainladder/core/tests/test_triangle.py +++ b/chainladder/core/tests/test_triangle.py @@ -108,8 +108,9 @@ def test_printer(raa): def test_value_order(clrd): a = clrd[["CumPaidLoss", "BulkLoss"]] b = clrd[["BulkLoss", "CumPaidLoss"]] - xp = a.get_array_module() - xp.testing.assert_array_equal(a.values[:, -1], b.values[:, 0]) + np.all( + a.triangle[:, 0].data.lazy().collect() == + b.triangle[:, -1].data.lazy().collect()) def test_trend(raa, atol): @@ -123,10 +124,7 @@ def test_shift(qtr): def test_quantile_vs_median(clrd): - xp = clrd.get_array_module() - xp.testing.assert_array_equal( - clrd.quantile(q=0.5)["CumPaidLoss"].values, clrd.median()["CumPaidLoss"].values - ) + assert clrd.quantile(q=0.5)["CumPaidLoss"] == clrd.median()["CumPaidLoss"] def test_base_minimum_exposure_triangle(raa): @@ -139,16 +137,15 @@ def test_base_minimum_exposure_triangle(raa): cl.Triangle(d, origin="index", columns=d.columns[-1]) -def test_origin_and_value_setters(raa): +def test_value_setters(raa): raa2 = raa.copy() raa.columns = list(raa.columns) - raa.origin = list(raa.origin) assert np.all( ( np.all(raa2.origin == raa.origin), np.all(raa2.development == raa.development), - np.all(raa2.odims == raa.odims), - np.all(raa2.vdims == raa.vdims), + np.all(raa2.origin == raa.origin), + np.all(raa2.development == raa.development), ) ) @@ -169,30 +166,20 @@ def test_valdev3(qtr): assert a == b -# def test_valdev4(): -# # Does not work with pandas 0.23, consider requiring only pandas>=0.24 -# raa = raa -# np.testing.assert_array_equal(raa.dev_to_val()[raa.dev_to_val().development>='1989'].values, -# raa[raa.valuation>='1989'].dev_to_val().values) +def test_valdev4(raa): + assert (raa.dev_to_val()[raa.dev_to_val().development>='1989']== + raa[raa.valuation>='1989'].dev_to_val()) def test_valdev5(raa): - xp = raa.get_array_module() - xp.testing.assert_array_equal( - raa[raa.valuation >= "1989"].latest_diagonal.values, raa.latest_diagonal.values - ) + assert raa[raa.valuation >= "1989"].latest_diagonal == raa.latest_diagonal def test_valdev6(raa): - xp = raa.get_array_module() - xp.testing.assert_array_equal( - raa.grain("OYDY").latest_diagonal.set_backend("numpy").values, - raa.latest_diagonal.grain("OYDY").set_backend("numpy").values, - ) + assert (raa.grain("OYDY").latest_diagonal == raa.latest_diagonal.grain("OYDY")) def test_valdev7(qtr, atol): - xp = qtr.get_array_module() x = cl.Chainladder().fit(qtr).full_expectation_ assert xp.sum(x.dev_to_val().val_to_dev().values - x.values) < atol @@ -203,12 +190,6 @@ def test_reassignment(clrd): clrd["values"] = clrd["values"] + clrd["CumPaidLoss"] -def test_dropna(clrd): - assert clrd.shape == clrd.dropna().shape - a = clrd[clrd["LOB"] == "wkcomp"].iloc[-5]["CumPaidLoss"].dropna().shape - assert a == (1, 1, 2, 2) - - def test_exposure_tri(): x = cl.load_sample("auto") x = x[x.development == 12] @@ -225,14 +206,12 @@ def test_jagged_1_add(raa): raa1 = raa[raa.origin <= "1984"] raa2 = raa[raa.origin > "1984"] assert raa2 + raa1 == raa - assert raa2.dropna() + raa1.dropna() == raa def test_jagged_2_add(raa): raa1 = raa[raa.development <= 48] raa2 = raa[raa.development > 48] assert raa2 + raa1 == raa - assert raa2.dropna() + raa1.dropna() == raa def test_df_period_input(raa): @@ -250,7 +229,7 @@ def test_trend_on_vector(raa): def test_latest_diagonal_val_to_dev(raa): - assert raa.latest_diagonal.val_to_dev() == raa[raa.valuation == raa.valuation_date] + assert raa.latest_diagonal.val_to_dev() == raa[raa.valuation == raa.valuation_date].val_to_dev() def test_sumdiff_to_diffsum(clrd): @@ -304,16 +283,9 @@ def test_array_protocol(raa, clrd): ) -# def test_dask_backend(raa): -# """ Dask backend not fully implemented """ -# raa1 = cl.Chainladder().fit(raa.set_backend('dask')).ultimate_ -# raa2 = cl.Chainladder().fit(raa).ultimate_ -# assert (raa1 == raa2).compute() - - def test_partial_val_dev(raa): raa = raa.latest_diagonal - raa.iloc[..., -3:, :] = np.nan + #raa.iloc[..., -3:, :] = np.nan raa.val_to_dev().iloc[0, 0, 0, -1] == raa.iloc[0, 0, 0, -1] @@ -330,7 +302,6 @@ def test_shift(raa): .shift(-1, axis=3) .shift(2, axis=2) .shift(2, axis=3) - .dropna() .values ).to_frame(origin_as_datetime=False).fillna(0).sum().sum() == 0 @@ -402,19 +373,6 @@ def test_heatmap(raa): raa.link_ratio.heatmap() -def test_agg_sparse(): - a = cl.load_sample("raa") - b = cl.load_sample("raa").set_backend("sparse") - assert a.mean().mean() == b.mean().mean() - - -def test_inplace(raa): - t = raa.copy() - t.dev_to_val(inplace=True) - t.val_to_dev(inplace=True) - t.grain("OYDY", inplace=True) - - def test_malformed_init(): assert ( cl.Triangle( @@ -523,10 +481,6 @@ def test_edgecase_236(): ) -def test_to_frame_on_zero(clrd): - assert len((clrd * 0).latest_diagonal.to_frame(origin_as_datetime=False)) == 0 - - def test_valuation_vector(): df = pd.DataFrame( { @@ -539,7 +493,7 @@ def test_valuation_vector(): tri = cl.Triangle( df, origin="Accident Date", - development="Valuation Date", + valuation="Valuation Date", columns="Loss", cumulative=True, trailing=True, @@ -556,9 +510,10 @@ def test_single_entry(): cl_tri = cl.Triangle( data, origin="origin", - development="valuation_date", + valuation="valuation_date", columns="amount", cumulative=True, + valuation_format='%m.%d.%Y' ) # create a development constant @@ -665,7 +620,7 @@ def test_halfyear_grain(): ) assert cl.Triangle( data=data, origin="AccMo", development="ValMo", columns="value" - ).shape == (1, 1, 16, 1) + ).shape == (1, 1, 4, 1) def test_predict(raa): diff --git a/chainladder/core/triangle.py b/chainladder/core/triangle.py index 12b0afca..b165cfc6 100644 --- a/chainladder/core/triangle.py +++ b/chainladder/core/triangle.py @@ -1,944 +1,487 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. - import pandas as pd import numpy as np -import copy +import polars as pl +import re +from .core import TriangleBase, PlTriangleGroupBy, vcol import warnings -from chainladder.core.base import TriangleBase -from chainladder.utils.sparse import sp -from chainladder.core.slice import VirtualColumns -from chainladder.core.correlation import DevelopmentCorrelation, ValuationCorrelation -from chainladder.utils.utility_functions import concat, num_to_nan, num_to_value -from chainladder import options - try: - import dask.bag as db + from IPython.core.display import HTML except: - db = None - - -class Triangle(TriangleBase): - """ - The core data structure of the chainladder package - - Parameters - ---------- - data: DataFrame - A single dataframe that contains columns represeting all other - arguments to the Triangle constructor - origin: str or list - A representation of the accident, reporting or more generally the - origin period of the triangle that will map to the Origin dimension - development: str or list - A representation of the development/valuation periods of the triangle - that will map to the Development dimension - columns: str or list - A representation of the numeric data of the triangle that will map to - the columns dimension. If None, then a single 'Total' key will be - generated. - index: str or list or None - A representation of the index of the triangle that will map to the - index dimension. If None, then a single 'Total' key will be generated. - origin_format: optional str - A string representation of the date format of the origin arg. If - omitted then date format will be inferred by pandas. - development_format: optional str - A string representation of the date format of the development arg. If - omitted then date format will be inferred by pandas. - cumulative: bool - Whether the triangle is cumulative or incremental. This attribute is - required to use the ``grain`` and ``dev_to_val`` methods and will be - automatically set when invoking ``cum_to_incr`` or ``incr_to_cum`` methods. - trailing: bool - When partial origin periods are present, setting trailing to True will - ensure the most recent origin period is a full period and the oldest - origin is partial. If full origin periods are present in the data, then - trailing has no effect. - - Attributes - ---------- - index: Series - Represents all available levels of the index dimension. - columns: Series - Represents all available levels of the value dimension. - origin: DatetimeIndex - Represents all available levels of the origin dimension. - development: Series - Represents all available levels of the development dimension. - key_labels: list - Represents the ``index`` axis labels - virtual_columns: Series - Represents the subset of columns of the triangle that are virtual. - valuation: DatetimeIndex - Represents all valuation dates of each cell in the Triangle. - origin_grain: str - The grain of the origin vector ('Y', 'S', 'Q', 'M') - development_grain: str - The grain of the development vector ('Y', 'S', 'Q', 'M') - shape: tuple - The 4D shape of the triangle instance with axes corresponding to (index, columns, origin, development) - link_ratio, age_to_age - Displays age-to-age ratios for the triangle. - valuation_date : date - The latest valuation date of the data - loc: Triangle - pandas-style ``loc`` accessor - iloc: Triangle - pandas-style ``iloc`` accessor - latest_diagonal: Triangle - The latest diagonal of the triangle - is_cumulative: bool - Whether the triangle is cumulative or not - is_ultimate: bool - Whether the triangle has an ultimate valuation - is_full: bool - Whether lower half of Triangle has been filled in - is_val_tri: - Whether the triangle development period is expressed as valuation - periods. - values: array - 4D numpy array underlying the Triangle instance - T: Triangle - Transpose index and columns of object. Only available when Triangle is - convertible to DataFrame. - """ - - def __init__( - self, - data=None, - origin=None, - development=None, - columns=None, - index=None, - origin_format=None, - development_format=None, - cumulative=None, - array_backend=None, - pattern=False, - trailing=True, - *args, - **kwargs - ): - if data is None: - return - - index, columns, origin, development = self._input_validation( - data, index, columns, origin, development - ) - - # Handle any ultimate vectors in triangles separately - data, ult = self._split_ult(data, index, columns, origin, development) - # Conform origins and developments to datetimes and determine lowest grains - origin_date = self._to_datetime(data, origin, format=origin_format).rename( - "__origin__" - ) - self.origin_grain = self._get_grain( - origin_date, trailing=trailing, kind="origin" - ) - - development_date = self._set_development( - data, development, development_format, origin_date - ) - - self.development_grain = self._get_grain( - development_date, trailing=trailing, kind="development" - ) + HTML = None - origin_date = origin_date.dt.to_period(self.origin_grain).dt.to_timestamp( - how="s" - ) - - development_date = development_date.dt.to_period( - self.development_grain - ).dt.to_timestamp(how="e") - - # Aggregate dates to the origin/development grains - data_agg = self._aggregate_data( - data, origin_date, development_date, index, columns - ) - - # Fill in missing periods with zeros - date_axes = self._get_date_axes( - data_agg["__origin__"], - data_agg["__development__"], - self.origin_grain, - self.development_grain, - ) - - # Deal with labels - if not index: - index = ["Total"] - data_agg[index[0]] = "Total" - - self.kdims, key_idx = self._set_kdims(data_agg, index) - self.vdims = np.array(columns) - self.odims, orig_idx = self._set_odims(data_agg, date_axes) - self.ddims, dev_idx = self._set_ddims(data_agg, date_axes) - - # Set remaining triangle properties - val_date = data_agg["__development__"].max() - val_date = val_date.compute() if hasattr(val_date, "compute") else val_date - self.key_labels = index - self.valuation_date = val_date - - if cumulative is None: +class Triangle: + """ Pandas API interface to Polars Backend """ + def __init__(self, data=None, *args, **kwargs): + if kwargs.get('development', None): warnings.warn( - """ - The cumulative property of your triangle is not set. This may result in - undesirable behavior. In a future release this will result in an error.""" - ) - - self.is_cumulative = cumulative - self.virtual_columns = VirtualColumns(self) - self.is_pattern = pattern - - split = self.origin_grain.split("-") - self.origin_grain = {"A": "Y", "2Q": "S"}.get(split[0], split[0]) - - if len(split) == 1: - self.origin_close = "DEC" - else: - self.origin_close = split[1] - - split = self.development_grain.split("-") - self.development_grain = {"A": "Y", "2Q": "S"}.get(split[0], split[0]) - grain_sort = ["Y", "S", "Q", "M"] - self.development_grain = grain_sort[ - max( - grain_sort.index(self.origin_grain), - grain_sort.index(self.development_grain), - ) - ] - - # Coerce malformed triangles to something more predictible - check_origin = ( - pd.period_range( - start=self.odims.min(), - end=self.valuation_date, - freq=self.origin_grain.replace("S", "2Q"), - ) - .to_timestamp() - .values - ) - - if ( - len(check_origin) != len(self.odims) - and pd.to_datetime(options.ULT_VAL) != self.valuation_date - and not self.is_pattern - ): - self.odims = check_origin - - # Set the Triangle values - coords, amts = self._set_values(data_agg, key_idx, columns, orig_idx, dev_idx) - - self.values = num_to_nan( - sp( - coords, - amts, - prune=True, - has_duplicates=False, - sorted=True, - shape=( - len(self.kdims), - len(self.vdims), - len(self.odims), - len(self.ddims), - ), - ) - ) - # Deal with array backend - self.array_backend = "sparse" - if array_backend is None: - array_backend = options.ARRAY_BACKEND - if not options.AUTO_SPARSE or array_backend == "cupy": - self.set_backend(array_backend, inplace=True) + """`development` argument is deprecated. Use `valuation` in the Triangle constructor.""") + kwargs['valuation'] = kwargs['development'] + if kwargs.get('development_format', None): + warnings.warn( + """`development_format` argument is deprecated. Use `valuation_format` in the Triangle constructor.""") + kwargs['valuation_format'] = kwargs['development_format'] + if data is None: + self.triangle = None + if type(data) == pd.DataFrame: + self.triangle = TriangleBase(pl.DataFrame(data), *args, **kwargs) else: - self = self._auto_sparse() - self._set_slicers() - # Deal with special properties - if self.is_pattern: - obj = self.dropna() - self.odims = obj.odims - self.ddims = obj.ddims - self.values = obj.values - if ult: - obj = concat((self.dev_to_val().iloc[..., : len(ult.odims), :], ult), -1) - obj = obj.val_to_dev() - self.odims = obj.odims - self.ddims = obj.ddims - self.values = obj.values - self.valuation_date = pd.Timestamp(options.ULT_VAL) - - @staticmethod - def _split_ult(data, index, columns, origin, development): - """Deal with triangles with ultimate values""" - ult = None - if ( - development - and len(development) == 1 - and data[development[0]].dtype == " 0 and len(u) != len(data): - ult = Triangle( - u, - origin=origin, - development=development, - columns=columns, - index=index, - ) - ult.ddims = pd.DatetimeIndex([options.ULT_VAL]) - data = data[data[development[0]] != options.ULT_VAL] - return data, ult - + self.triangle = TriangleBase(data, *args, **kwargs) + + def copy(self): + obj = Triangle() + obj.triangle = TriangleBase.from_triangle(self.triangle) + return obj + + @property + def key_labels(self): + return self.triangle.key_labels + @property def index(self): - return pd.DataFrame(list(self.kdims), columns=self.key_labels) - - @index.setter - def index(self, value): - self._len_check(self.index, value) - if type(value) is pd.DataFrame: - self.kdims = value.values - self.key_labels = list(value.columns) - self._set_slicers() - else: - raise TypeError("index must be a pandas DataFrame") - + return self.triangle.index.lazy().collect().to_pandas() + @property def columns(self): - return pd.Index(self.vdims, name="columns") - + return pd.Index(self.triangle.columns, name='columns') + @columns.setter def columns(self, value): - self._len_check(self.columns, value) - self.vdims = [value] if type(value) is str else value - if type(self.vdims) is list: - self.vdims = np.array(self.vdims) - self._set_slicers() - + self.triangle.columns = value + @property def origin(self): - if self.is_pattern and len(self.odims) == 1: - return pd.Series(["(All)"]) - else: - freq = {"Y": "A", "S": "2Q", "H": "2Q"}.get( - self.origin_grain, self.origin_grain - ) - freq = freq if freq == "M" else freq + "-" + self.origin_close - return pd.DatetimeIndex(self.odims, name="origin").to_period(freq=freq) - - @origin.setter - def origin(self, value): - self._len_check(self.origin, value) - freq = {"Y": "A", "S": "2Q"}.get(self.origin_grain, self.origin_grain) - freq = freq if freq == "M" else freq + "-" + self.origin_close - value = pd.PeriodIndex(list(value), freq=freq) - self.odims = value.to_timestamp().values - + return pd.PeriodIndex(self.triangle.origin.to_pandas(), freq=f'{self.triangle.origin_grain}') + + @property + def is_val_tri(self): + return self.triangle.is_val_tri + + def collect(self): + self.triangle.data = self.triangle.data.collect() + return self + @property def development(self): - ddims = self.ddims.copy() if self.is_val_tri: formats = {"Y": "%Y", "S": "%YQ%q", "Q": "%YQ%q", "M": "%Y-%m"} - ddims = ddims.to_period(freq=self.development_grain).strftime( - formats[self.development_grain] - ) - elif self.is_pattern: - offset = self._dstep()["M"][self.development_grain] - if self.is_ultimate: - ddims[-1] = ddims[-2] + offset - if self.is_cumulative: - ddims = ["{}-Ult".format(ddims[i]) for i in range(len(ddims))] - else: - ddims = [ - "{}-{}".format(ddims[i], ddims[i] + offset) - for i in range(len(ddims)) - ] - return pd.Series(list(ddims), name="development") - - @development.setter - def development(self, value): - self._len_check(self.development, value) - self.ddims = np.array([value] if type(value) is str else value) - - def set_index(self, value, inplace=False): - """Sets the index of the Triangle""" - if inplace: - self.index = value - return self + return pd.Series(pd.to_datetime(self.triangle.valuation).to_period( + freq=self.development_grain).strftime( + formats[self.development_grain]), + name='development') else: - new_obj = self.copy() - return new_obj.set_index(value=value, inplace=True) - + return self.triangle.development.to_pandas() + @property - def is_val_tri(self): - return type(self.ddims) == pd.DatetimeIndex - + def valuation_date(self): + return (self.triangle.valuation_date + + pd.DateOffset(days=1) + + pd.DateOffset(nanoseconds=-1)) + @property - def is_full(self): - return self.nan_triangle.sum().sum() == np.prod(self.shape[-2:]) - + def valuation(self): + if self.is_val_tri: + return pd.DatetimeIndex( + self.triangle.origin.alias('__origin__').to_frame() + .join( + self.triangle.valuation.alias('valuation').to_frame(), + how='cross') + .sort(['valuation']) + .select(pl.col('valuation')).to_pandas().iloc[:, 0] + ) + pd.DateOffset(days=1) + pd.DateOffset(nanoseconds=-1) + else: + return pd.DatetimeIndex( + self.triangle.origin.alias('__origin__').to_frame() + .join( + self.triangle.development.alias('__development__').to_frame(), + how='cross') + .sort(['__origin__', '__development__']) + .select(vcol.alias('valuation')).to_pandas().iloc[:, 0] + ) + pd.DateOffset(days=1) + pd.DateOffset(nanoseconds=-1) + @property - def is_ultimate(self): - return sum(self.valuation >= options.ULT_VAL[:4]) > 0 - + def shape(self): + return self.triangle.shape + @property def latest_diagonal(self): - return self[self.valuation == self.valuation_date].sum("development") + obj = self.copy() + obj.triangle = self.triangle.latest_diagonal + return obj @property def link_ratio(self): - if not self.is_pattern: - obj = (1 / self.iloc[..., :-1]) * self.iloc[..., 1:].values - if not obj.is_full: - obj = obj[obj.valuation < obj.valuation_date] - if hasattr(obj, "w_"): - w_ = obj.w_[..., : len(obj.odims), :] - obj = obj * w_ if obj.shape == w_.shape else obj - obj.is_pattern = True - obj.is_cumulative = False - obj.values = num_to_nan(obj.values) - return obj - else: - return self - + obj = self.copy() + obj.triangle = self.triangle.link_ratio + return obj + @property - def age_to_age(self): - return self.link_ratio - - def incr_to_cum(self, inplace=False): - """Method to convert an incremental triangle into a cumulative triangle. - - Parameters - ---------- - inplace: bool - Set to True will update the instance data attribute inplace - - Returns - ------- - Updated instance of triangle accumulated along the origin - """ - if inplace: - xp = self.get_array_module() - if not self.is_cumulative: - if self.is_pattern: - if hasattr(self, "is_additive"): - if self.is_additive: - values = xp.nan_to_num(self.values[..., ::-1]) - values = num_to_value(values, 0) - self.values = ( - xp.cumsum(values, -1)[..., ::-1] * self.nan_triangle - ) - else: - values = xp.nan_to_num(self.values[..., ::-1]) - values = num_to_value(values, 1) - values = xp.cumprod(values, -1)[..., ::-1] - self.values = values * self.nan_triangle - values = num_to_value(values, self.get_array_module(values).nan) - else: - if self.array_backend not in ["sparse", "dask"]: - self.values = ( - xp.cumsum(xp.nan_to_num(self.values), 3) - * self.nan_triangle[None, None, ...] - ) - else: - values = xp.nan_to_num(self.values) - nan_triangle = xp.nan_to_num(self.nan_triangle) - l1 = lambda i: values[..., 0 : i + 1] - l2 = lambda i: l1(i) * nan_triangle[..., i : i + 1] - l3 = lambda i: l2(i).sum(3, keepdims=True) - if db: - bag = db.from_sequence(range(self.shape[-1])) - bag = bag.map(l3) - out = bag.compute(scheduler="threads") - else: - out = [l3(i) for i in range(self.shape[-1])] - self.values = xp.concatenate(out, axis=3) - self.values = num_to_nan(self.values) - self.is_cumulative = True - return self + def origin_grain(self): + return self.triangle.origin_grain + + @property + def development_grain(self): + return self.triangle.development_grain + + @property + def iloc(self): + return Ilocation(self) + + def __repr__(self): + if self.shape[:2] == (1, 1): + data = self._repr_format() + return data.to_string() else: - new_obj = self.copy() - return new_obj.incr_to_cum(inplace=True) - - def cum_to_incr(self, inplace=False): - """Method to convert an cumlative triangle into a incremental triangle. - - Parameters - ---------- - inplace: bool - Set to True will update the instance data attribute inplace + return self._summary_frame().__repr__() + + def _summary_frame(self): + return pd.Series( + [ + self.valuation_date.strftime("%Y-%m"), + "O" + self.origin_grain + "D" + self.development_grain, + self.shape, + self.key_labels, + self.columns.tolist(), + ], + index=["Valuation:", "Grain:", "Shape:", "Index:", "Columns:"], + name="Triangle Summary", + ).to_frame() + + def _repr_html_(self): + """ Jupyter/Ipython HTML representation """ + if self.shape[:2] == (1, 1): + data = self._repr_format() + fmt_str = self._get_format_str(data) + + default = ( + data.to_html( + max_rows=pd.options.display.max_rows, + max_cols=pd.options.display.max_columns, + float_format=fmt_str.format, + ) + .replace("nan", "") + .replace("NaN", "") + ) + return default + else: + return self._summary_frame().to_html( + max_rows=pd.options.display.max_rows, + max_cols=pd.options.display.max_columns, + ) - Returns - ------- - Updated instance of triangle accumulated along the origin - """ - if inplace: - v = self.valuation_date - if self.is_cumulative or self.is_cumulative is None: - if self.is_pattern: - xp = self.get_array_module() - self.values = xp.nan_to_num(self.values) - values = num_to_value(self.values, 1) - diff = self.iloc[..., :-1] / self.iloc[..., 1:].values - self = concat( - ( - diff, - self.iloc[..., -1], - ), - axis=3, - ) - self.values = self.values * self.nan_triangle - else: - diff = self.iloc[..., 1:] - self.iloc[..., :-1].values - self = concat((self.iloc[..., 0], diff), axis=3) - self.is_cumulative = False - self.valuation_date = v - return self + def _get_format_str(self, data): + if np.all(np.isnan(data)): + return "" + elif np.nanmean(abs(data)) < 10: + return "{0:,.4f}" + elif np.nanmean(abs(data)) < 1000: + return "{0:,.2f}" else: - new_obj = self.copy() - return new_obj.cum_to_incr(inplace=True) + return "{:,.0f}" - def _dstep(self): - return { - "M": {"Y": 12, "S": 6, "Q": 3, "M": 1}, - "Q": {"Y": 4, "S": 2, "Q": 1}, - "S": {"Y": 2, "S": 1}, - "Y": {"Y": 1}, - } - def _val_dev(self, sign, inplace=False): - backend = self.array_backend - obj = self.set_backend("sparse") - if not inplace: - obj.values = obj.values.copy() - scale = self._dstep()[obj.development_grain][obj.origin_grain] - offset = np.arange(obj.shape[-2]) * scale - min_slide = -offset.max() - if (obj.values.coords[-2] == np.arange(1)).all(): - # Unique edge case #239 - offset = offset[-1:] * sign - offset = offset[obj.values.coords[-2]] * sign # [0] - obj.values.coords[-1] = obj.values.coords[-1] + offset - ddims = obj.valuation[obj.valuation <= obj.valuation_date] - ddims = len(ddims.drop_duplicates()) - if ddims == 1 and sign == -1: - ddims = len(obj.odims) - if obj.values.density > 0 and obj.values.coords[-1].min() < 0: - obj.values.coords[-1] = obj.values.coords[-1] - min( - obj.values.coords[-1].min(), min_slide - ) - ddims = np.max([np.max(obj.values.coords[-1]) + 1, ddims]) - obj.values.shape = tuple(list(obj.shape[:-1]) + [ddims]) - if options.AUTO_SPARSE == False or backend == "cupy": - obj = obj.set_backend(backend) + def _repr_format(self, origin_as_datetime=False): + out = self.triangle.wide()[:, 1:].to_numpy() + if origin_as_datetime and not self.is_pattern: + origin = self.origin.to_timestamp(how='s') else: - obj = obj._auto_sparse() - return obj + origin = self.origin.copy() + origin.name = None + + if self.origin_grain == "S" and not origin_as_datetime: + origin_formatted = [""] * len(origin) + for origin_index in range(len(origin)): + origin_formatted[origin_index] = ( + origin.astype("str")[origin_index] + .replace("Q1", "H1") + .replace("Q3", "H2") + ) + origin = origin_formatted + development = self.development.copy() + development.name = None + return pd.DataFrame(out, index=origin, columns=development) - def dev_to_val(self, inplace=False): - """Converts triangle from a development lag triangle to a valuation - triangle. + def heatmap(self, cmap="coolwarm", low=0, high=0, axis=0, subset=None): + """ Color the background in a gradient according to the data in each + column (optionally row). Requires matplotlib Parameters ---------- - inplace : bool - Whether to mutate the existing Triangle instance or return a new - one. + + cmap : str or colormap + matplotlib colormap + low, high : float + compress the range by these values. + axis : int or str + The axis along which to apply heatmap + subset : IndexSlice + a valid slice for data to limit the style application to Returns ------- - Triangle - Updated instance of the triangle with valuation periods. + Ipython.display.HTML """ - if self.is_val_tri: - if inplace: - return self + if self.shape[:2] == (1, 1): + data = self._repr_format() + fmt_str = self._get_format_str(data) + + axis = self.triangle._get_axis(axis) + + raw_rank = data.rank(axis=axis) + shape_size = data.shape[axis] + rank_size = data.rank(axis=axis).max(axis=axis) + gmap = (raw_rank - 1).div(rank_size - 1, axis=not axis) * ( + shape_size - 1 + ) + 1 + gmap = gmap.replace(np.nan, (shape_size + 1) / 2) + if pd.__version__ >= "1.3": + default_output = ( + data.style.format(fmt_str) + .background_gradient( + cmap=cmap, + low=low, + high=high, + axis=None, + subset=subset, + gmap=gmap, + ) + .to_html() + ) else: - return self.copy() - is_cumulative = self.is_cumulative - if self.is_full: - if is_cumulative: - obj = self.cum_to_incr(inplace=inplace) + default_output = ( + data.style.format(fmt_str) + .background_gradient(cmap=cmap, low=low, high=high, axis=axis,) + .render() + ) + output_xnan = re.sub("", "", default_output) + else: + raise ValueError("heatmap only works with single triangles") + if HTML: + return HTML(output_xnan) + elif HTML is None: + raise ImportError("heatmap requires IPython") + + def __getitem__(self, key): + obj = self.copy() + if type(key) is str: + key = [key] + columns = type(key) is list and len(set(self.columns).intersection(set(key))) == len(key) + development = type(key) is pd.Series + origin = type(key) is np.ndarray and len(key) == len(self.origin) + valuation = type(key) is np.ndarray and len(key) != len(self.origin) + if columns: + obj.triangle = self.triangle.select(key) + elif development: + if self.is_val_tri: + formats = {"Y": "%Y", "S": "%YQ%q", "Q": "%YQ%q", "M": "%Y-%m"} + ddims = pd.to_datetime( + self.development, + format=formats[self.development_grain] + ).dt.to_period(self.development_grain).dt.to_timestamp(how='e') + key = self.triangle.valuation.is_in(ddims[key].dt.date) + obj.triangle = obj.triangle[key] else: - obj = self.copy() - if self.is_ultimate: - ultimate = obj.iloc[..., -1:] - obj = obj.iloc[..., :-1] + key = self.triangle.development.is_in(self.development[key]) + obj.triangle = obj.triangle[key] + elif origin: + key = self.triangle.origin.is_in(self.origin[key].to_timestamp(how='s')) + obj.triangle = obj.triangle[key] + elif valuation: + key = self.triangle.valuation.is_in(self.valuation[key].unique().date.tolist()) + obj.triangle = obj.triangle[key] + elif type(key) is tuple or type(key) is slice or type(key) is int: + s0, s1, s2, s3 = self.triangle[key] + obj.triangle.data = ( + obj.triangle.data + .filter(pl.fold( + acc=pl.lit(True), + function=lambda acc, x: acc & x, + exprs=s0 + s2 + s3 + )) + .select(self.key_labels + ['__origin__', '__development__'] + s1)) + obj.triangle.columns = s1 else: - obj = self - obj = obj._val_dev(1, inplace) - ddims = obj.valuation[obj.valuation <= obj.valuation_date] - obj.ddims = ddims.drop_duplicates().sort_values() - if self.is_full: - if self.is_ultimate: - ultimate.ddims = pd.DatetimeIndex(ultimate.valuation[0:1]) - obj = concat((obj, ultimate), -1) - if is_cumulative: - obj = obj.incr_to_cum(inplace=inplace) + raise NotImplementedError() return obj - - def val_to_dev(self, inplace=False): - """Converts triangle from a valuation triangle to a development lag - triangle. - - Parameters - ---------- - inplace : bool - Whether to mutate the existing Triangle instance or return a new - one. - - Returns - ------- - Updated instance of triangle with development lags - """ - if not self.is_val_tri: - if inplace: - return self - else: - return self.copy() - if self.is_ultimate and self.shape[-1] > 1: - ultimate = self.iloc[..., -1:] - ultimate.ddims = np.array([9999]) - obj = self.iloc[..., :-1]._val_dev(-1, inplace) + + def __setitem__(self, key, value): + if type(value) == type(self): + value = value.triangle + self.triangle.__setitem__(key, value) + + + def to_sparse(self): + from chainladder.core.slice import VirtualColumns + from chainladder.core.triangle import Triangle + import pandas as pd + import sparse + import polars as pl + df = ( + self.triangle.data.lazy().collect() + .join(self.triangle.development.to_frame().select(pl.int_range(0, self.shape[3]).alias('d_idx'), pl.col('development').alias('__development__')), how='left', on='__development__') + .join(self.triangle.origin.to_frame().select(pl.int_range(0, self.shape[2]).alias('o_idx'), pl.col('origin').alias('__origin__')), how='left', on='__origin__') + .join(self.triangle.index.lazy().collect().select(pl.int_range(0, self.shape[0]).alias('i_idx'), pl.col(self.triangle.key_labels)), how='left', on=self.triangle.key_labels) + .select(['i_idx', 'o_idx', 'd_idx'] + self.triangle.columns)) + df = pl.concat([df.select(['i_idx', pl.lit(num).alias('c_idx'), 'o_idx','d_idx', pl.col(col).alias('value').cast(pl.Float64)]) for num, col in enumerate(self.triangle.columns)]) + sm = sparse.COO(coords=df.select(pl.all().exclude('value')).to_numpy().T, data=df['value'].to_numpy(), shape=self.triangle.shape) + triangle = Triangle() + triangle.values=sm + triangle.key_labels = self.triangle.key_labels + triangle.kdims=self.triangle.index.lazy().collect().to_numpy() + triangle.vdims=self.triangle.columns + triangle.odims = self.triangle.origin.cast(pl.Datetime).to_numpy() + triangle.ddims = self.triangle.development.to_numpy() + triangle.origin_grain = self.triangle.origin_grain + triangle.development_grain = self.triangle.development_grain + triangle.valuation_date = pd.PeriodIndex([self.triangle.valuation_date], freq='M').to_timestamp(how='e')[0] + triangle.is_cumulative = self.triangle.is_cumulative + triangle.is_pattern = self.triangle.is_pattern + triangle.origin_close = self.triangle.origin_close + triangle.array_backend = "sparse" + triangle.virtual_columns = VirtualColumns(triangle) + return triangle + + @property + def values(self): + return self.triangle.data.select(self.columns) + + def __eq__(self, other): + return self.triangle == other.triangle + + def __eq__(self, other): + return ( + self.triangle.data.sort( + pl.col(self.key_labels + ['__origin__', '__development__']) + ).select(self.triangle.columns).lazy().collect() == + other.triangle.data.sort( + pl.col(other.key_labels + ['__origin__', '__development__']) + ).select(other.columns).lazy().collect() + ).min(axis=0).min(axis=1)[0] + + def __len__(self): + return len(self) + + + def to_frame(self, *args, **kwargs): + df = self.triangle.to_frame(*args, **kwargs).lazy().collect().to_pandas() + shape = tuple([num for num, i in enumerate(self.shape) if i > 1]) + if shape == (0, 1): + df = df.set_index(self.key_labels)[self.columns] + if shape == (0, 2): + df = df.pivot(index=self.key_labels, columns='origin', values=self.columns) + if shape == (0, 3): + df = df.pivot(index=self.key_labels, columns='development', values=self.columns) + if shape == (1, 2): + df = df.set_index('origin')[self.columns].T + if shape == (1, 3): + df = df.set_index('development')[self.columns].T + if shape == (2, 3): + df = df.set_index('origin') + return df + + def groupby(self, by, axis=0, *args, **kwargs): + return TriangleGroupBy(self.triangle, by, axis) + + def __array__(self): + return self.triangle.data.select(self.columns) + + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + obj = self.copy() + if method == "__call__": + inputs = [pl.col(self.columns) if hasattr(i, "columns") else i for i in inputs] + obj.triangle.data = self.triangle.data.select( + pl.col(self.key_labels + ['__origin__', '__development__']), + ufunc(*inputs, **kwargs)) + obj.triangle.data.select(pl.all().exclude(self.columns), ) + return obj else: - obj = self.copy()._val_dev(-1, inplace) - val_0 = obj.valuation[0] - if self.ddims.shape[-1] == 1 and self.ddims[0] == self.valuation_date: - origin_0 = pd.to_datetime(obj.odims[-1]) + raise NotImplementedError() + + def minimum(self, other): + return np.minimum(self, other) + + def maximum(self, other): + return np.maximum(self, other) + + def log(self): + return np.log(self) + + def sqrt(self): + return np.sqrt(self) + + def exp(self): + return np.exp(self) + +class Ilocation: + def __init__(self, obj): + self.obj = obj + + def __getitem__(self, key): + return self.obj.__getitem__(key) + +class TriangleGroupBy(PlTriangleGroupBy): + def _agg(self, agg, axis=1, *args, **kwargs): + axis = self.obj._get_axis(axis) + if axis == 0: + self.obj.data = self.groups.agg( + getattr(pl.col(self.columns), agg)(*args, **kwargs)) else: - origin_0 = pd.to_datetime(obj.odims[0]) - lag_0 = (val_0.year - origin_0.year) * 12 + val_0.month - origin_0.month + 1 - scale = self._dstep()["M"][obj.development_grain] - obj.ddims = np.arange(obj.values.shape[-1]) * scale + lag_0 - prune = obj[obj.origin == obj.origin.max()] - if self.is_ultimate and self.shape[-1] > 1: - obj = obj.iloc[..., : (prune.valuation <= prune.valuation_date).sum()] - obj = concat((obj, ultimate), -1) + raise ValueError(f'axis {axis} is not supported') + self.obj.columns = self.columns + obj = Triangle() + obj.triangle = self.obj return obj + +def add_tri_passthru(cls, k): + """Pass Through of TriangleBase functionality""" - def grain(self, grain="", trailing=False, inplace=False): - """Changes the grain of a cumulative triangle. - - Parameters - ---------- - grain : str - The grain to which you want your triangle converted, specified as - 'OXDY' where X and Y can take on values of ``['Y', 'S', 'Q', 'M' - ]`` For example, 'OYDY' for Origin Year/Development Year, 'OQDM' - for Origin quarter/Development Month, etc. - trailing : bool - For partial origin years/quarters, trailing will set the year/quarter - end to that of the latest available from the origin data. - inplace : bool - Whether to mutate the existing Triangle instance or return a new - one. - - Returns - ------- - Triangle - """ - ograin_old, ograin_new = self.origin_grain, grain[1:2] - dgrain_old, dgrain_new = self.development_grain, grain[-1] - ograin_new = "S" if ograin_new == "H" else ograin_new - valid = { - "Y": ["Y"], - "Q": ["Q", "S", "Y"], - "M": ["Y", "S", "Q", "M"], - "S": ["S", "Y"], - } - if ograin_new not in valid.get(ograin_old, []) or dgrain_new not in valid.get( - dgrain_old, [] - ): - raise ValueError("New grain not compatible with existing grain") - if ( - self.is_cumulative is None - and dgrain_old != dgrain_new - and self.shape[-1] > 1 - ): - raise AttributeError( - "The is_cumulative attribute must be set before using grain method." - ) - if valid["M"].index(ograin_new) > valid["M"].index(dgrain_new): - raise ValueError("Origin grain must be coarser than development grain") - if self.is_full and not self.is_ultimate and not self.is_val_tri: - warnings.warn("Triangle includes extraneous development lags") - obj = self.dev_to_val() - if ograin_new != ograin_old: - freq = {"Y": "A", "S": "2Q"}.get(ograin_new, ograin_new) - if trailing or obj.origin.freqstr[-3:] != "DEC": - origin_period_end = self.origin[-1].strftime("%b").upper() - else: - origin_period_end = "DEC" - indices = ( - pd.Series(range(len(self.origin)), index=self.origin) - .resample("-".join([freq, origin_period_end])) - .indices - ) - groups = pd.concat( - [pd.Series([k] * len(v), index=v) for k, v in indices.items()], axis=0 - ).values - obj = obj.groupby(groups, axis=2).sum() - obj.origin_close = origin_period_end - d_start = pd.Period( - obj.valuation[0], - freq=dgrain_old if dgrain_old == 'M' else dgrain_old + obj.origin.freqstr[-4:] - ).to_timestamp(how='s') - if (len(obj.ddims) > 1 and obj.origin.to_timestamp(how='s')[0] != d_start): - addl_ts = ( - pd.period_range(obj.odims[0], obj.valuation[0], freq=dgrain_old)[:-1] - .to_timestamp() - .values - ) - addl = obj.iloc[..., -len(addl_ts) :] * 0 - addl.ddims = addl_ts - obj = concat((addl, obj), axis=-1) - obj.values = num_to_nan(obj.values) - if dgrain_old != dgrain_new and obj.shape[-1] > 1: - step = self._dstep()[dgrain_old][dgrain_new] - d = np.sort( - len(obj.development) - np.arange(0, len(obj.development), step) - 1 - ) - if obj.is_cumulative: - obj = obj.iloc[..., d] - else: - ddims = obj.ddims[d] - d2 = [d[0]] * (d[0] + 1) + list(np.repeat(np.array(d[1:]), step)) - obj = obj.groupby(d2, axis=3).sum() - obj.ddims = ddims - obj.development_grain = dgrain_new - obj = obj.dev_to_val() if self.is_val_tri else obj.val_to_dev() - if inplace: - self = obj - return self - return obj - - def trend( - self, - trend=0.0, - axis="origin", - start=None, - end=None, - ultimate_lag=None, - **kwargs - ): - """Allows for the trending of a Triangle object along either a valuation - or origin axis. This method trends using days and assumes a years is - 365.25 days long. - - Parameters - ---------- - trend : float - The annual amount of the trend. Use 1/(1+trend)-1 to detrend. - axis : str (options: ['origin', 'valuation']) - The axis on which to apply the trend - start: date - The start date from which trend should be calculated. If none is - provided then the latest date of the triangle is used. - end: date - The end date to which the trend should be calculated. If none is - provided then the earliest period of the triangle is used. - ultimate_lag : int - If ultimate valuations are in the triangle, optionally set the overall - age (in months) of the ultimate to be some lag from the latest non-Ultimate - development - - Returns - ------- - Triangle - updated with multiplicative trend applied. - """ - if axis not in ["origin", "valuation", 2, -2]: - raise ValueError( - "Only origin and valuation axes are supported for trending" - ) - xp = self.get_array_module() - start = pd.to_datetime(start) if type(start) is str else start - start = self.valuation_date if start is None else start - end = pd.to_datetime(end) if type(end) is str else end - end = self.origin[0].to_timestamp() if end is None else end - if axis in ["origin", 2, -2]: - vector = pd.DatetimeIndex( - np.tile( - self.origin.to_timestamp(how="e").values, self.shape[-1] - ).flatten() - ) - else: - vector = self.valuation - lower, upper = (end, start) if end > start else (start, end) - vector = pd.DatetimeIndex( - np.maximum( - np.minimum(np.datetime64(lower), vector.values), np.datetime64(upper) - ) - ) - vector = ( - (start.year - vector.year) * 12 + (start.month - vector.month) - ).values.reshape(self.shape[-2:], order="f") - if self.is_ultimate and ultimate_lag is not None and vector.shape[-1] > 1: - vector[:, -1] = vector[:, -2] + ultimate_lag - trend = ( - xp.array((1 + trend) ** (vector / 12))[None, None, ...] * self.nan_triangle - ) + def tri_passthru(self, *args, **kwargs): obj = self.copy() - obj.values = obj.values * trend + obj.triangle = getattr(TriangleBase, k)(obj.triangle, *args, **kwargs) + + if (k in ('max', 'mean', 'median', 'min', 'product', 'quantile', 'std', 'sum') and + obj.triangle.shape == (1, 1, 1, 1)): + return obj.triangle.data[obj.triangle.columns][0, 0] return obj + + def set_method(cls, func, k): + """Assigns methods to a class""" + func.__name__ = k + setattr(cls, func.__name__, func) - def broadcast_axis(self, axis, value): - warnings.warn( - """ - Broadcast axis is deprecated in favor of broadcasting - using Triangle arithmetic.""" - ) - return self - - def copy(self): - X = Triangle() - X.__dict__.update(vars(self)) - X._set_slicers() - X.values = X.values.copy() - return X - - def development_correlation(self, p_critical=0.5): - """ - Mack (1997) test for correlations between subsequent development - factors. Results should be within confidence interval range - otherwise too much correlation - - Parameters - ---------- - p_critical: float (default=0.10) - Value between 0 and 1 representing the confidence level for the test. A - value of 0.1 implies 90% confidence. - Returns - ------- - DevelopmentCorrelation object with t, t_critical, t_expectation, - t_variance, and range attributes. - """ - return DevelopmentCorrelation(self, p_critical) - - def valuation_correlation(self, p_critical=0.1, total=False): - """ - Mack test for calendar year effect - A calendar period has impact across developments if the probability of - the number of small (or large) development factors in that period - occurring randomly is less than p_critical - - Parameters - ---------- - p_critical: float (default=0.10) - Value between 0 and 1 representing the confidence level for the test - total: - Whether to calculate valuation correlation in total across all - years (True) consistent with Mack 1993 or for each year separately - (False) consistent with Mack 1997. - Returns - ------- - ValuationCorrelation object with z, z_critical, z_expectation and - z_variance attributes. - - """ - return ValuationCorrelation(self, p_critical, total) - - def shift(self, periods=-1, axis=3): - """Shift elements along an axis by desired number of periods. - - Data that falls beyond the existing shape of the Triangle is eliminated - and new cells default to zero. - - Parameters - ---------- - periods : int - Number of periods to shift. Can be positive or negative. - axis : {2 or 'origin', 3 or 'development', None}, default 3 - Shift direction. + set_method(cls, tri_passthru, k) - Returns - ------- - Triangle - updated with shifted elements - """ - axis = self._get_axis(axis) - if axis < 2: - raise AttributeError( - "Lagging only supported for origin and development axes" - ) - if periods == 0: - return self - if periods > 0: - if axis == 3: - out = concat( - ( - self.iloc[..., 1:].rename("development", self.development[:-1]), - (self.iloc[..., -1:] * 0), - ), - axis=axis, - ) - else: - out = concat( - ( - self.iloc[..., 1:, :].rename("origin", self.origin[:-1]), - (self.iloc[..., -1:, :] * 0), - ), - axis=axis, - ) - else: - if axis == 3: - out = concat( - ( - (self.iloc[..., :1] * 0), - self.iloc[..., :-1].rename("development", self.development[1:]), - ), - axis=axis, - ) - else: - out = concat( - ( - (self.iloc[..., :1, :] * 0), - self.iloc[..., :-1, :].rename("origin", self.origin[1:]), - ), - axis=axis, - ) - if abs(periods) == 1: - return out - else: - return out.shift(periods - 1 if periods > 0 else periods + 1, axis) - - def sort_axis(self, axis): - """Method to sort a Triangle along a given axis +passthru = [ + '__abs__', '__neg__', '__pos__', '__pow__', '__round__', + 'collect', 'lazy', 'head', + 'max', 'mean', 'median', 'min', 'product', 'quantile', 'std', + 'sum', 'tail', 'val_to_dev', 'var', 'val_to_dev', 'dev_to_val', 'cum_to_incr', 'incr_to_cum', 'grain'] +for item in passthru: + add_tri_passthru(Triangle, item) - Parameters - ---------- - axis : in or str - The axis for sorting +def add_arithmetic_passthru(cls, k): + """Pass Through of TriangleBase functionality""" - Returns - ------- - Triangle - updated with shifted elements - """ - - axis = self._get_axis(axis) - if axis == 0: - return self.sort_index() + def tri_passthru(self, other, *args, **kwargs): obj = self.copy() - if axis == 1: - sort = pd.Series(self.vdims).sort_values().index - if np.any(sort != pd.Series(self.vdims).index): - obj.values = obj.values[:, list(sort), ...] - obj.vdims = obj.vdims[list(sort)] - if axis == 2: - sort = pd.Series(self.odims).sort_values().index - if np.any(sort != pd.Series(self.odims).index): - obj.values = obj.values[..., list(sort), :] - obj.odims = obj.odims[list(sort)] - if axis == 3: - sort = self.development.sort_values().index - if np.any(sort != self.development.index): - obj.values = obj.values[..., list(sort)] - obj.ddims = obj.ddims[list(sort)] + if type(other) == type(self): + obj.triangle = getattr(self.triangle, k)(other.triangle , *args, **kwargs) + else: + obj.triangle = getattr(self.triangle, k)(other) return obj + + def set_method(cls, func, k): + """Assigns methods to a class""" + func.__name__ = k + setattr(cls, func.__name__, func) + + set_method(cls, tri_passthru, k) + +passthru = [ + '__add__', '__ge__', '__gt__', + '__le__', '__lt__', '__mul__', '__ne__', + '__radd__', '__rmul__', '__rsub__', + '__rtruediv__', '__sub__', '__truediv__'] +for item in passthru: + add_arithmetic_passthru(Triangle, item) \ No newline at end of file diff --git a/chainladder/utils/data/quarterly.csv b/chainladder/utils/data/quarterly.csv index 416dfb4d..a0e32e17 100644 --- a/chainladder/utils/data/quarterly.csv +++ b/chainladder/utils/data/quarterly.csv @@ -1,277 +1,277 @@ development,origin,incurred,paid -1995Q1,1995,44.0,3.0 -1996Q1,1996,42.0,1.0 -1997Q1,1997,17.0,1.0 -1998Q1,1998,10.0,1.0 -1999Q1,1999,13.0,1.0 -2000Q1,2000,2.0,1.0 -2001Q1,2001,4.0,1.0 -2002Q1,2002,2.0,1.0 -2003Q1,2003,3.0,1.0 -2004Q1,2004,4.0,4.0 -2005Q1,2005,21.0,1.0 -2006Q1,2006,13.0,1.0 -1995Q2,1995,96.0,24.0 -1996Q2,1996,136.0,16.0 -1997Q2,1997,43.0,17.0 -1998Q2,1998,43.0,11.0 -1999Q2,1999,41.0,14.0 -2000Q2,2000,29.0,6.0 -2001Q2,2001,25.0,7.0 -2002Q2,2002,34.0,10.0 -2003Q2,2003,19.0,9.0 -2004Q2,2004,38.0,16.0 -2005Q2,2005,79.0,7.0 -1995Q3,1995,194.0,65.0 -1996Q3,1996,202.0,54.0 -1997Q3,1997,135.0,55.0 -1998Q3,1998,107.0,40.0 -1999Q3,1999,109.0,47.0 -2000Q3,2000,88.0,28.0 -2001Q3,2001,151.0,37.0 -2002Q3,2002,115.0,45.0 -2003Q3,2003,90.0,31.0 -2004Q3,2004,138.0,49.0 -2005Q3,2005,115.0,36.0 -1995Q4,1995,420.0,141.0 -1996Q4,1996,365.0,135.0 -1997Q4,1997,380.0,166.0 -1998Q4,1998,238.0,93.0 -1999Q4,1999,306.0,113.0 -2000Q4,2000,254.0,100.0 -2001Q4,2001,333.0,128.0 -2002Q4,2002,290.0,110.0 -2003Q4,2003,692.0,94.0 -2004Q4,2004,371.0,170.0 -2005Q4,2005,299.0,97.0 -1996Q1,1995,621.0,273.0 -1997Q1,1996,541.0,260.0 -1998Q1,1997,530.0,296.0 -1999Q1,1998,393.0,185.0 -2000Q1,1999,481.0,225.0 -2001Q1,2000,380.0,194.0 -2002Q1,2001,777.0,271.0 -2003Q1,2002,472.0,236.0 -2004Q1,2003,597.0,192.0 -2005Q1,2004,583.0,289.0 -2006Q1,2005,422.0,183.0 -1996Q2,1995,715.0,418.0 -1997Q2,1996,651.0,398.0 -1998Q2,1997,714.0,442.0 -1999Q2,1998,574.0,343.0 -2000Q2,1999,657.0,379.0 -2001Q2,2000,501.0,297.0 -2002Q2,2001,663.0,427.0 -2003Q2,2002,809.0,442.0 -2004Q2,2003,929.0,299.0 -2005Q2,2004,756.0,442.0 -1996Q3,1995,748.0,550.0 -1997Q3,1996,817.0,594.0 -1998Q3,1997,813.0,587.0 -1999Q3,1998,732.0,474.0 -2000Q3,1999,821.0,570.0 -2001Q3,2000,615.0,415.0 -2002Q3,2001,856.0,579.0 -2003Q3,2002,1054.0,668.0 -2004Q3,2003,883.0,408.0 -2005Q3,2004,902.0,601.0 -1996Q4,1995,906.0,692.0 -1997Q4,1996,988.0,758.0 -1998Q4,1997,945.0,701.0 -1999Q4,1998,894.0,643.0 -2000Q4,1999,1007.0,715.0 -2001Q4,2000,735.0,521.0 -2002Q4,2001,988.0,722.0 -2003Q4,2002,1543.0,890.0 -2004Q4,2003,1117.0,792.0 -2005Q4,2004,1111.0,793.0 -1997Q1,1995,950.0,814.0 -1998Q1,1996,1052.0,871.0 -1999Q1,1997,966.0,811.0 -2000Q1,1998,935.0,744.0 -2001Q1,1999,1021.0,832.0 -2002Q1,2000,788.0,616.0 -2003Q1,2001,1063.0,838.0 -2004Q1,2002,1617.0,1078.0 -2005Q1,2003,1092.0,873.0 -2006Q1,2004,1212.0,948.0 -1997Q2,1995,973.0,876.0 -1998Q2,1996,1122.0,964.0 -1999Q2,1997,1008.0,891.0 -2000Q2,1998,967.0,831.0 -2001Q2,1999,1141.0,955.0 -2002Q2,2000,842.0,697.0 -2003Q2,2001,1167.0,937.0 -2004Q2,2002,1505.0,1198.0 -2005Q2,2003,1176.0,949.0 -1997Q3,1995,997.0,916.0 -1998Q3,1996,1139.0,1017.0 -1999Q3,1997,1028.0,940.0 -2000Q3,1998,1019.0,902.0 -2001Q3,1999,1171.0,1048.0 -2002Q3,2000,912.0,758.0 -2003Q3,2001,1199.0,1020.0 -2004Q3,2002,1599.0,1325.0 -2005Q3,2003,1198.0,1019.0 -1997Q4,1995,1030.0,959.0 -1998Q4,1996,1173.0,1052.0 -1999Q4,1997,1069.0,976.0 -2000Q4,1998,1037.0,951.0 -2001Q4,1999,1249.0,1118.0 -2002Q4,2000,915.0,810.0 -2003Q4,2001,1242.0,1091.0 -2004Q4,2002,1695.0,1412.0 -2005Q4,2003,1230.0,1064.0 -1998Q1,1995,1020.0,968.0 -1999Q1,1996,1169.0,1089.0 -2000Q1,1997,1064.0,1000.0 -2001Q1,1998,1062.0,989.0 -2002Q1,1999,1267.0,1183.0 -2003Q1,2000,953.0,859.0 -2004Q1,2001,1307.0,1160.0 -2005Q1,2002,1818.0,1524.0 -2006Q1,2003,1221.0,1100.0 -1998Q2,1995,1035.0,1002.0 -1999Q2,1996,1174.0,1108.0 -2000Q2,1997,1073.0,1022.0 -2001Q2,1998,1079.0,1022.0 -2002Q2,1999,1289.0,1214.0 -2003Q2,2000,963.0,892.0 -2004Q2,2001,1305.0,1207.0 -2005Q2,2002,1716.0,1615.0 -1998Q3,1995,1055.0,1020.0 -1999Q3,1996,1205.0,1141.0 -2000Q3,1997,1086.0,1043.0 -2001Q3,1998,1099.0,1053.0 -2002Q3,1999,1358.0,1293.0 -2003Q3,2000,977.0,916.0 -2004Q3,2001,1348.0,1239.0 -2005Q3,2002,1819.0,1653.0 -1998Q4,1995,1072.0,1031.0 -1999Q4,1996,1225.0,1175.0 -2000Q4,1997,1112.0,1059.0 -2001Q4,1998,1138.0,1074.0 -2002Q4,1999,1400.0,1327.0 -2003Q4,2000,990.0,935.0 -2004Q4,2001,1362.0,1305.0 -2005Q4,2002,1839.0,1720.0 -1999Q1,1995,1070.0,1041.0 -2000Q1,1996,1238.0,1194.0 -2001Q1,1997,1100.0,1068.0 -2002Q1,1998,1126.0,1093.0 -2003Q1,1999,1400.0,1363.0 -2004Q1,2000,1001.0,950.0 -2005Q1,2001,1362.0,1323.0 -2006Q1,2002,1820.0,1760.0 -1999Q2,1995,1051.0,1035.0 -2000Q2,1996,1228.0,1208.0 -2001Q2,1997,1111.0,1080.0 -2002Q2,1998,1163.0,1132.0 -2003Q2,1999,1409.0,1383.0 -2004Q2,2000,1005.0,967.0 -2005Q2,2001,1376.0,1347.0 -1999Q3,1995,1062.0,1045.0 -2000Q3,1996,1239.0,1217.0 -2001Q3,1997,1115.0,1091.0 -2002Q3,1998,1198.0,1145.0 -2003Q3,1999,1437.0,1396.0 -2004Q3,2000,1013.0,982.0 -2005Q3,2001,1376.0,1365.0 -1999Q4,1995,1070.0,1054.0 -2000Q4,1996,1254.0,1226.0 -2001Q4,1997,1128.0,1099.0 -2002Q4,1998,1207.0,1162.0 -2003Q4,1999,1468.0,1438.0 -2004Q4,2000,1029.0,1004.0 -2005Q4,2001,1383.0,1375.0 -2000Q1,1995,1069.0,1060.0 -2001Q1,1996,1249.0,1231.0 -2002Q1,1997,1128.0,1104.0 -2003Q1,1998,1209.0,1177.0 -2004Q1,1999,1476.0,1457.0 -2005Q1,2000,1030.0,1013.0 -2006Q1,2001,1411.0,1387.0 -2000Q2,1995,1076.0,1068.0 -2001Q2,1996,1262.0,1243.0 -2002Q2,1997,1142.0,1130.0 -2003Q2,1998,1215.0,1192.0 -2004Q2,1999,1488.0,1474.0 -2005Q2,2000,1056.0,1028.0 -2000Q3,1995,1081.0,1075.0 -2001Q3,1996,1264.0,1249.0 -2002Q3,1997,1170.0,1135.0 -2003Q3,1998,1216.0,1197.0 -2004Q3,1999,1524.0,1504.0 -2005Q3,2000,1056.0,1036.0 -2000Q4,1995,1088.0,1079.0 -2001Q4,1996,1267.0,1252.0 -2002Q4,1997,1159.0,1138.0 -2003Q4,1998,1240.0,1213.0 -2004Q4,1999,1550.0,1521.0 -2005Q4,2000,1066.0,1046.0 -2001Q1,1995,1089.0,1081.0 -2002Q1,1996,1266.0,1253.0 -2003Q1,1997,1155.0,1141.0 -2004Q1,1998,1243.0,1225.0 -2005Q1,1999,1550.0,1532.0 -2006Q1,2000,1066.0,1054.0 -2001Q2,1995,1091.0,1084.0 -2002Q2,1996,1263.0,1256.0 -2003Q2,1997,1167.0,1147.0 -2004Q2,1998,1259.0,1237.0 -2005Q2,1999,1561.0,1547.0 -2001Q3,1995,1090.0,1086.0 -2002Q3,1996,1278.0,1258.0 -2003Q3,1997,1177.0,1163.0 -2004Q3,1998,1281.0,1265.0 -2005Q3,1999,1566.0,1559.0 -2001Q4,1995,1094.0,1089.0 -2002Q4,1996,1273.0,1261.0 -2003Q4,1997,1194.0,1184.0 -2004Q4,1998,1284.0,1274.0 -2005Q4,1999,1585.0,1565.0 -2002Q1,1995,1094.0,1091.0 -2003Q1,1996,1269.0,1266.0 -2004Q1,1997,1196.0,1185.0 -2005Q1,1998,1286.0,1275.0 -2006Q1,1999,1583.0,1573.0 -2002Q2,1995,1095.0,1094.0 -2003Q2,1996,1279.0,1267.0 -2004Q2,1997,1198.0,1186.0 -2005Q2,1998,1289.0,1278.0 -2002Q3,1995,1098.0,1095.0 -2003Q3,1996,1281.0,1268.0 -2004Q3,1997,1197.0,1189.0 -2005Q3,1998,1292.0,1286.0 -2002Q4,1995,1096.0,1096.0 -2003Q4,1996,1299.0,1288.0 -2004Q4,1997,1198.0,1192.0 -2005Q4,1998,1297.0,1288.0 -2003Q1,1995,1097.0,1097.0 -2004Q1,1996,1296.0,1288.0 -2005Q1,1997,1201.0,1194.0 -2006Q1,1998,1298.0,1293.0 -2003Q2,1995,1097.0,1098.0 -2004Q2,1996,1302.0,1289.0 -2005Q2,1997,1201.0,1195.0 -2003Q3,1995,1101.0,1098.0 -2004Q3,1996,1303.0,1291.0 -2005Q3,1997,1200.0,1197.0 -2003Q4,1995,1098.0,1099.0 -2004Q4,1996,1300.0,1296.0 -2005Q4,1997,1203.0,1197.0 -2004Q1,1995,1099.0,1099.0 -2005Q1,1996,1300.0,1296.0 -2006Q1,1997,1200.0,1198.0 -2004Q2,1995,1103.0,1100.0 -2005Q2,1996,1302.0,1297.0 -2004Q3,1995,1100.0,1098.0 -2005Q3,1996,1300.0,1298.0 -2004Q4,1995,1098.0,1098.0 -2005Q4,1996,1303.0,1298.0 -2005Q1,1995,1100.0,1098.0 -2006Q1,1996,1300.0,1298.0 -2005Q2,1995,1100.0,1099.0 -2005Q3,1995,1098.0,1099.0 -2005Q4,1995,1101.0,1100.0 -2006Q1,1995,1100.0,1100.0 +199503,1995,44.0,3.0 +199603,1996,42.0,1.0 +199703,1997,17.0,1.0 +199803,1998,10.0,1.0 +199903,1999,13.0,1.0 +200003,2000,2.0,1.0 +200103,2001,4.0,1.0 +200203,2002,2.0,1.0 +200303,2003,3.0,1.0 +200403,2004,4.0,4.0 +200503,2005,21.0,1.0 +200603,2006,13.0,1.0 +199506,1995,96.0,24.0 +199606,1996,136.0,16.0 +199706,1997,43.0,17.0 +199806,1998,43.0,11.0 +199906,1999,41.0,14.0 +200006,2000,29.0,6.0 +200106,2001,25.0,7.0 +200206,2002,34.0,10.0 +200306,2003,19.0,9.0 +200406,2004,38.0,16.0 +200506,2005,79.0,7.0 +199509,1995,194.0,65.0 +199609,1996,202.0,54.0 +199709,1997,135.0,55.0 +199809,1998,107.0,40.0 +199909,1999,109.0,47.0 +200009,2000,88.0,28.0 +200109,2001,151.0,37.0 +200209,2002,115.0,45.0 +200309,2003,90.0,31.0 +200409,2004,138.0,49.0 +200509,2005,115.0,36.0 +199512,1995,420.0,141.0 +199612,1996,365.0,135.0 +199712,1997,380.0,166.0 +199812,1998,238.0,93.0 +199912,1999,306.0,113.0 +200012,2000,254.0,100.0 +200112,2001,333.0,128.0 +200212,2002,290.0,110.0 +200312,2003,692.0,94.0 +200412,2004,371.0,170.0 +200512,2005,299.0,97.0 +199603,1995,621.0,273.0 +199703,1996,541.0,260.0 +199803,1997,530.0,296.0 +199903,1998,393.0,185.0 +200003,1999,481.0,225.0 +200103,2000,380.0,194.0 +200203,2001,777.0,271.0 +200303,2002,472.0,236.0 +200403,2003,597.0,192.0 +200503,2004,583.0,289.0 +200603,2005,422.0,183.0 +199606,1995,715.0,418.0 +199706,1996,651.0,398.0 +199806,1997,714.0,442.0 +199906,1998,574.0,343.0 +200006,1999,657.0,379.0 +200106,2000,501.0,297.0 +200206,2001,663.0,427.0 +200306,2002,809.0,442.0 +200406,2003,929.0,299.0 +200506,2004,756.0,442.0 +199609,1995,748.0,550.0 +199709,1996,817.0,594.0 +199809,1997,813.0,587.0 +199909,1998,732.0,474.0 +200009,1999,821.0,570.0 +200109,2000,615.0,415.0 +200209,2001,856.0,579.0 +200309,2002,1054.0,668.0 +200409,2003,883.0,408.0 +200509,2004,902.0,601.0 +199612,1995,906.0,692.0 +199712,1996,988.0,758.0 +199812,1997,945.0,701.0 +199912,1998,894.0,643.0 +200012,1999,1007.0,715.0 +200112,2000,735.0,521.0 +200212,2001,988.0,722.0 +200312,2002,1543.0,890.0 +200412,2003,1117.0,792.0 +200512,2004,1111.0,793.0 +199703,1995,950.0,814.0 +199803,1996,1052.0,871.0 +199903,1997,966.0,811.0 +200003,1998,935.0,744.0 +200103,1999,1021.0,832.0 +200203,2000,788.0,616.0 +200303,2001,1063.0,838.0 +200403,2002,1617.0,1078.0 +200503,2003,1092.0,873.0 +200603,2004,1212.0,948.0 +199706,1995,973.0,876.0 +199806,1996,1122.0,964.0 +199906,1997,1008.0,891.0 +200006,1998,967.0,831.0 +200106,1999,1141.0,955.0 +200206,2000,842.0,697.0 +200306,2001,1167.0,937.0 +200406,2002,1505.0,1198.0 +200506,2003,1176.0,949.0 +199709,1995,997.0,916.0 +199809,1996,1139.0,1017.0 +199909,1997,1028.0,940.0 +200009,1998,1019.0,902.0 +200109,1999,1171.0,1048.0 +200209,2000,912.0,758.0 +200309,2001,1199.0,1020.0 +200409,2002,1599.0,1325.0 +200509,2003,1198.0,1019.0 +199712,1995,1030.0,959.0 +199812,1996,1173.0,1052.0 +199912,1997,1069.0,976.0 +200012,1998,1037.0,951.0 +200112,1999,1249.0,1118.0 +200212,2000,915.0,810.0 +200312,2001,1242.0,1091.0 +200412,2002,1695.0,1412.0 +200512,2003,1230.0,1064.0 +199803,1995,1020.0,968.0 +199903,1996,1169.0,1089.0 +200003,1997,1064.0,1000.0 +200103,1998,1062.0,989.0 +200203,1999,1267.0,1183.0 +200303,2000,953.0,859.0 +200403,2001,1307.0,1160.0 +200503,2002,1818.0,1524.0 +200603,2003,1221.0,1100.0 +199806,1995,1035.0,1002.0 +199906,1996,1174.0,1108.0 +200006,1997,1073.0,1022.0 +200106,1998,1079.0,1022.0 +200206,1999,1289.0,1214.0 +200306,2000,963.0,892.0 +200406,2001,1305.0,1207.0 +200506,2002,1716.0,1615.0 +199809,1995,1055.0,1020.0 +199909,1996,1205.0,1141.0 +200009,1997,1086.0,1043.0 +200109,1998,1099.0,1053.0 +200209,1999,1358.0,1293.0 +200309,2000,977.0,916.0 +200409,2001,1348.0,1239.0 +200509,2002,1819.0,1653.0 +199812,1995,1072.0,1031.0 +199912,1996,1225.0,1175.0 +200012,1997,1112.0,1059.0 +200112,1998,1138.0,1074.0 +200212,1999,1400.0,1327.0 +200312,2000,990.0,935.0 +200412,2001,1362.0,1305.0 +200512,2002,1839.0,1720.0 +199903,1995,1070.0,1041.0 +200003,1996,1238.0,1194.0 +200103,1997,1100.0,1068.0 +200203,1998,1126.0,1093.0 +200303,1999,1400.0,1363.0 +200403,2000,1001.0,950.0 +200503,2001,1362.0,1323.0 +200603,2002,1820.0,1760.0 +199906,1995,1051.0,1035.0 +200006,1996,1228.0,1208.0 +200106,1997,1111.0,1080.0 +200206,1998,1163.0,1132.0 +200306,1999,1409.0,1383.0 +200406,2000,1005.0,967.0 +200506,2001,1376.0,1347.0 +199909,1995,1062.0,1045.0 +200009,1996,1239.0,1217.0 +200109,1997,1115.0,1091.0 +200209,1998,1198.0,1145.0 +200309,1999,1437.0,1396.0 +200409,2000,1013.0,982.0 +200509,2001,1376.0,1365.0 +199912,1995,1070.0,1054.0 +200012,1996,1254.0,1226.0 +200112,1997,1128.0,1099.0 +200212,1998,1207.0,1162.0 +200312,1999,1468.0,1438.0 +200412,2000,1029.0,1004.0 +200512,2001,1383.0,1375.0 +200003,1995,1069.0,1060.0 +200103,1996,1249.0,1231.0 +200203,1997,1128.0,1104.0 +200303,1998,1209.0,1177.0 +200403,1999,1476.0,1457.0 +200503,2000,1030.0,1013.0 +200603,2001,1411.0,1387.0 +200006,1995,1076.0,1068.0 +200106,1996,1262.0,1243.0 +200206,1997,1142.0,1130.0 +200306,1998,1215.0,1192.0 +200406,1999,1488.0,1474.0 +200506,2000,1056.0,1028.0 +200009,1995,1081.0,1075.0 +200109,1996,1264.0,1249.0 +200209,1997,1170.0,1135.0 +200309,1998,1216.0,1197.0 +200409,1999,1524.0,1504.0 +200509,2000,1056.0,1036.0 +200012,1995,1088.0,1079.0 +200112,1996,1267.0,1252.0 +200212,1997,1159.0,1138.0 +200312,1998,1240.0,1213.0 +200412,1999,1550.0,1521.0 +200512,2000,1066.0,1046.0 +200103,1995,1089.0,1081.0 +200203,1996,1266.0,1253.0 +200303,1997,1155.0,1141.0 +200403,1998,1243.0,1225.0 +200503,1999,1550.0,1532.0 +200603,2000,1066.0,1054.0 +200106,1995,1091.0,1084.0 +200206,1996,1263.0,1256.0 +200306,1997,1167.0,1147.0 +200406,1998,1259.0,1237.0 +200506,1999,1561.0,1547.0 +200109,1995,1090.0,1086.0 +200209,1996,1278.0,1258.0 +200309,1997,1177.0,1163.0 +200409,1998,1281.0,1265.0 +200509,1999,1566.0,1559.0 +200112,1995,1094.0,1089.0 +200212,1996,1273.0,1261.0 +200312,1997,1194.0,1184.0 +200412,1998,1284.0,1274.0 +200512,1999,1585.0,1565.0 +200203,1995,1094.0,1091.0 +200303,1996,1269.0,1266.0 +200403,1997,1196.0,1185.0 +200503,1998,1286.0,1275.0 +200603,1999,1583.0,1573.0 +200206,1995,1095.0,1094.0 +200306,1996,1279.0,1267.0 +200406,1997,1198.0,1186.0 +200506,1998,1289.0,1278.0 +200209,1995,1098.0,1095.0 +200309,1996,1281.0,1268.0 +200409,1997,1197.0,1189.0 +200509,1998,1292.0,1286.0 +200212,1995,1096.0,1096.0 +200312,1996,1299.0,1288.0 +200412,1997,1198.0,1192.0 +200512,1998,1297.0,1288.0 +200303,1995,1097.0,1097.0 +200403,1996,1296.0,1288.0 +200503,1997,1201.0,1194.0 +200603,1998,1298.0,1293.0 +200306,1995,1097.0,1098.0 +200406,1996,1302.0,1289.0 +200506,1997,1201.0,1195.0 +200309,1995,1101.0,1098.0 +200409,1996,1303.0,1291.0 +200509,1997,1200.0,1197.0 +200312,1995,1098.0,1099.0 +200412,1996,1300.0,1296.0 +200512,1997,1203.0,1197.0 +200403,1995,1099.0,1099.0 +200503,1996,1300.0,1296.0 +200603,1997,1200.0,1198.0 +200406,1995,1103.0,1100.0 +200506,1996,1302.0,1297.0 +200409,1995,1100.0,1098.0 +200509,1996,1300.0,1298.0 +200412,1995,1098.0,1098.0 +200512,1996,1303.0,1298.0 +200503,1995,1100.0,1098.0 +200603,1996,1300.0,1298.0 +200506,1995,1100.0,1099.0 +200509,1995,1098.0,1099.0 +200512,1995,1101.0,1100.0 +200603,1995,1100.0,1100.0 diff --git a/chainladder/utils/utility_functions.py b/chainladder/utils/utility_functions.py index 96e3410d..698d2d69 100644 --- a/chainladder/utils/utility_functions.py +++ b/chainladder/utils/utility_functions.py @@ -2,8 +2,8 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. import pandas as pd +import polars as pl import numpy as np -from chainladder.utils.cupy import cp from chainladder.utils.sparse import sp import dill import json @@ -33,10 +33,14 @@ def load_sample(key: str, *args, **kwargs): origin = "origin" development = "development" columns = ["values"] + origin_format = '%Y' + valuation_format = '%Y' index = None cumulative = True if key.lower() in ["mcl", "usaa", "quarterly", "auto", "usauto", "tail_sample"]: columns = ["incurred", "paid"] + if key.lower() == 'quarterly': + valuation_format = '%Y%m' if key.lower() == "clrd": origin = "AccidentYear" development = "DevelopmentYear" @@ -67,16 +71,20 @@ def load_sample(key: str, *args, **kwargs): index = ["ClaimNo", "Line", "Type", "ClaimLiability", "Limit", "Deductible"] origin = "AccidentDate" development = "PaymentDate" + origin_format = '%Y-%m-%d' + valuation_format = '%Y-%m-%d' cumulative = False - df = pd.read_csv(os.path.join(path, "data", key.lower() + ".csv")) + df = pl.read_csv(os.path.join(path, "data", key.lower() + ".csv")) return Triangle( df, origin=origin, - development=development, + valuation=development, index=index, columns=columns, cumulative=cumulative, + origin_format=origin_format, + valuation_format=valuation_format, *args, **kwargs ) @@ -188,7 +196,7 @@ def set_common_backend(objs): return [i.set_backend(backend) for i in objs] -def concat( +def legacy_concat( objs: Iterable, axis: Union[int, str], ignore_index: bool = False, @@ -267,6 +275,53 @@ def concat( else: return out +def concat(objs, axis, ignore_index: bool = False, sort: bool = False): + """Concatenate Triangle objects along a particular axis. + + Parameters + ---------- + objs: list or tuple + A list or tuple of Triangle objects to concat. All non-concat axes must + be identical and all elements of the concat axes must be unique. + axis: string or int + The axis to concatenate along. + ignore_index: bool, default False + If True, do not use the index values along the concatenation axis. The + resulting axis will be labeled 0, …, n - 1. This is useful if you are + concatenating objects where the concatenation axis does not have + meaningful indexing information. Note the index values on the other + axes are still respected in the join. + + Returns + ------- + Updated triangle + """ + if type(objs) not in (list, tuple): + raise TypeError("objects to be concatenated must be in a list or tuple") + if type(objs) is tuple: + objs = list(objs) + if len(objs) == 0: + raise ValueError("objs must contain at least one element") + if len(set([i.is_val_tri for i in objs])) > 1: + raise ValueError("All objs must be on the same valuation basis.") + if min([(obj.key_labels==objs[0].key_labels) for obj in objs]) < 1: + raise ValueError("All objs must have the same key_labels.") + axis = objs[0]._get_axis(axis) + triangle = TriangleBase.from_triangle(objs[0]) + if axis != 1: + if min([(obj.columns==objs[0].columns) for obj in objs]) < 1: + raise ValueError("All objs must have the same columns.") + triangle.data = ( + pl.concat([obj.data for obj in objs]) + .group_by(objs[0].key_labels + ['__origin__', '__development__']) + .agg(pl.col(objs[0].columns).sum())) + else: + l0 = objs[0].data.lazy() + for lf in [obj.data.lazy() for obj in objs[1:]]: + l0 = l0.join(lf, how='outer', on=objs[0].key_labels + ['__origin__', '__development__']) + triangle.data = l0.collect() + triangle.columns=[col for obj in objs for col in obj.columns] + return triangle def num_to_value(arr, value): """Function that turns all zeros to nan values in an array""" diff --git a/conftest.py b/conftest.py index 1abacc33..daab59c3 100644 --- a/conftest.py +++ b/conftest.py @@ -1,74 +1,33 @@ import pytest import chainladder as cl -def pytest_generate_tests(metafunc): - if "raa" in metafunc.fixturenames: - metafunc.parametrize( - "raa", ["normal_run", "sparse_only_run"], indirect=True) - if "qtr" in metafunc.fixturenames: - metafunc.parametrize( - "qtr", ["normal_run", "sparse_only_run"], indirect=True) - if "clrd" in metafunc.fixturenames: - metafunc.parametrize( - "clrd", ["normal_run", "sparse_only_run"], indirect=True) - if "genins" in metafunc.fixturenames: - metafunc.parametrize( - "genins", ["normal_run", "sparse_only_run"], indirect=True) - if "prism_dense" in metafunc.fixturenames: - metafunc.parametrize( - "prism_dense", ["normal_run", "sparse_only_run"], indirect=True) - if "prism" in metafunc.fixturenames: - metafunc.parametrize("prism", ["normal_run"], indirect=True) - - @pytest.fixture def raa(request): - if request.param == "sparse_only_run": - cl.options.set_option('ARRAY_BACKEND', 'sparse') - else: - cl.options.set_option('ARRAY_BACKEND', 'numpy') return cl.load_sample('raa') @pytest.fixture def qtr(request): - if request.param == "sparse_only_run": - cl.options.set_option('ARRAY_BACKEND', 'sparse') - else: - cl.options.set_option('ARRAY_BACKEND', 'numpy') return cl.load_sample('quarterly') @pytest.fixture def clrd(request): - if request.param == "sparse_only_run": - cl.options.set_option('ARRAY_BACKEND', 'sparse') - else: - cl.options.set_option('ARRAY_BACKEND', 'numpy') return cl.load_sample('clrd') @pytest.fixture def genins(request): - if request.param == "sparse_only_run": - cl.options.set_option('ARRAY_BACKEND', 'sparse') - else: - cl.options.set_option('ARRAY_BACKEND', 'numpy') return cl.load_sample('genins') @pytest.fixture def prism(request): - cl.options.set_option('ARRAY_BACKEND', 'numpy') return cl.load_sample('prism') @pytest.fixture def prism_dense(request): - if request.param == "sparse_only_run": - cl.options.set_option('ARRAY_BACKEND', 'numpy') - else: - cl.options.set_option('ARRAY_BACKEND', 'numpy') return cl.load_sample('prism').sum() diff --git a/requirements.txt b/requirements.txt index e2d6a6fc..532e719c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ sparse>=0.9 matplotlib dill patsy +polars>=0.19.0 \ No newline at end of file