Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
jbogaardt committed Sep 28, 2024
1 parent 7e5dd10 commit a0e6197
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 85 deletions.
97 changes: 54 additions & 43 deletions chainladder/core/dunders.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,73 +60,84 @@ def _compatibility_check(self, x, y):
return x, y

def _prep_index(self, x, y):
""" Preps index and column axes for arithmetic """
if x.kdims.shape[0] == 1 and y.kdims.shape[0] > 1:
# Broadcast x to y
x.kdims = y.kdims
x.key_labels = y.key_labels
return x, y
if x.kdims.shape[0] > 1 and y.kdims.shape[0] == 1:
# Broadcast y to x
y.kdims = x.kdims
y.key_labels = x.key_labels
return x, y
if x.kdims.shape[0] == y.kdims.shape[0] == 1 and x.key_labels != y.key_labels:
# Broadcast to the triangle with a larger multi-index
kdims = x.kdims if len(x.key_labels) > len(y.key_labels) else y.kdims
y.kdims = x.kdims = kdims
key_labels = x.key_labels if len(x.key_labels) > len(y.key_labels) else y.key_labels
y.key_labels = x.key_labels = key_labels
x.kdims = y.kdims = kdims
x.key_labels = y.key_labels = key_labels
return x, y
a, b = set(x.key_labels), set(y.key_labels)
common = a.intersection(b)
if common in [a, b] and (a != b or (a == b and x.kdims.shape[0] != y.kdims.shape[0])):
# If index labels are subset of other triangle index labels
x = x.groupby(list(common))
y = y.groupby(list(common))
return x, y
if common not in [a, b]:
raise ValueError('Index broadcasting is ambiguous between', str(a), 'and', str(b))
if (
x.key_labels == y.key_labels
and x.kdims.shape[0] == y.kdims.shape[0]
and y.kdims.shape[0] > 1
and not x.kdims is y.kdims
and not x.index.equals(y.index)
):
# Make sure exact but unsorted index labels works
x = x.sort_index()
try:
y = y.loc[x.index]
except:

# Use sets for faster operations
x_labels = set(x.key_labels)
y_labels = set(y.key_labels)
common = x_labels.intersection(y_labels)

if common == x_labels or common == y_labels:
if x_labels != y_labels or x.kdims.shape[0] != y.kdims.shape[0]:
x = x.groupby(list(common))
y = y.groupby(list(common))
elif x.kdims.shape[0] > 1 and not np.array_equal(x.kdims, y.kdims) and not x.index.equals(y.index):
x = x.sort_index()
try:
y = y.loc[x.index]
except:
x = x.groupby(list(common))
y = y.groupby(list(common))
return x, y

if common != x_labels and common != y_labels:
raise ValueError('Index broadcasting is ambiguous between ' + str(x_labels) + ' and ' + str(y_labels))

return x, y

def _prep_columns(self, x, y):
x_backend, y_backend = x.array_backend, y.array_backend

if len(x.columns) == 1 and len(y.columns) > 1:
x.vdims = y.vdims
elif len(y.columns) == 1 and len(x.columns) > 1:
y.vdims = x.vdims
elif len(y.columns) == 1 and len(x.columns) == 1 and x.columns != y.columns:
elif len(y.columns) == len(x.columns) == 1 and x.columns != y.columns:
y.vdims = x.vdims
elif x.shape[1] == y.shape[1] and np.all(x.columns == y.columns):
pass
elif x.shape[1] == y.shape[1] and np.array_equal(x.columns, y.columns):
return x, y
else:
col_union = list(x.columns) + [
item for item in y.columns if item not in x.columns
]
for item in [item for item in col_union if item not in x.columns]:
x[item] = 0
x = x[col_union]
for item in [item for item in col_union if item not in y.columns]:
y[item] = 0
y = y[col_union]
x, y = (
x.set_backend(x_backend, inplace=True),
y.set_backend(y_backend, inplace=True),
)
# Use sets for faster operations
x_cols = set(x.columns)
y_cols = set(y.columns)

# Find columns to add to each triangle
cols_to_add_to_x = y_cols - x_cols
cols_to_add_to_y = x_cols - y_cols

# Create new columns only if necessary
if cols_to_add_to_x:
new_x_cols = list(x.columns) + list(cols_to_add_to_x)
x = x.reindex(columns=new_x_cols, fill_value=0)

if cols_to_add_to_y:
new_y_cols = list(y.columns) + list(cols_to_add_to_y)
y = y.reindex(columns=new_y_cols, fill_value=0)

# Ensure both triangles have the same column order
final_cols = list(x_cols | y_cols)
x = x[final_cols]
y = y[final_cols]

# Reset backends only if they've changed
if x.array_backend != x_backend:
x = x.set_backend(x_backend, inplace=True)
if y.array_backend != y_backend:
y = y.set_backend(y_backend, inplace=True)

return x, y

def _prep_origin_development(self, obj, other):
Expand Down
37 changes: 2 additions & 35 deletions chainladder/core/tests/test_triangle.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import chainladder as cl
import pandas as pd
import polars as pl
import numpy as np
import copy
import pytest
import io
from datetime import datetime
Expand Down Expand Up @@ -746,9 +744,7 @@ def test_halfyear_development():
["2012-01-01", "2013-12-31", "incurred", 200.0],
]

df_polars = pl.DataFrame(data)
df_polars.columns = ["origin", "val_date", "idx", "value"]


assert (
type(
cl.Triangle(
Expand All @@ -760,33 +756,4 @@ def test_halfyear_development():
cumulative=True,
)
)
) == cl.Triangle

assert (
type(
cl.Triangle(
data=df_polars,
index="idx",
columns="value",
origin="origin",
development="val_date",
cumulative=True,
)
)
) == cl.Triangle

assert cl.Triangle(
data=pd.DataFrame(data, columns=["origin", "val_date", "idx", "value"]),
index="idx",
columns="value",
origin="origin",
development="val_date",
cumulative=True,
) == cl.Triangle(
data=df_polars,
index="idx",
columns="value",
origin="origin",
development="val_date",
cumulative=True,
)
) == cl.Triangle
6 changes: 3 additions & 3 deletions chainladder/development/learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def __init__(self, estimator_ml=None, y_ml=None, autoregressive=False,
self.y_ml=y_ml
self.weight_ml = weight_ml
self.autoregressive=autoregressive
self.fit_incrementals=fit_incrementals
self.fit_incrementals = fit_incrementals

def _get_y_names(self):
""" private function to get the response column name"""
Expand Down Expand Up @@ -153,7 +153,7 @@ def fit(self, X, y=None, sample_weight=None):
Parameters
----------
X : Triangle-like
Set of LDFs to which the munich adjustment will be applied.
Set of LDFs to which the estimator will be applied.
y : None
Ignored, use y_ml to set a reponse variable for the ML algorithm
sample_weight : None
Expand All @@ -180,7 +180,7 @@ def fit(self, X, y=None, sample_weight=None):
self.df_ = df
# Fit model
self.estimator_ml.fit(df, self.y_ml_.fit_transform(df).squeeze())
#return self
#return selffit_incrementals
self.triangle_ml_ = self._get_triangle_ml(df)
return self

Expand Down
2 changes: 1 addition & 1 deletion chainladder/workflow/voting.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def fit(self, X, y, sample_weight=None):
self.estimators_ = Parallel(n_jobs=self.n_jobs)(
delayed(_fit_single_estimator)(
clone(clf), X, y,
sample_weight=sample_weight,
fit_params=dict(sample_weight=sample_weight),
message_clsname='VotingChainladder',
message=self._log_message(names[idx],
idx + 1, len(clfs))
Expand Down
4 changes: 1 addition & 3 deletions environment-dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,11 @@ dependencies:
- ipykernel

- pandas
- polars
- scikit-learn
- sparse
- numba
- dill
- patsy
- matplotlib
- matplotlib-base

# testing
- lxml
Expand Down

0 comments on commit a0e6197

Please sign in to comment.