Skip to content

Commit

Permalink
Suppress warnings (#515)
Browse files Browse the repository at this point in the history
* lots of warnings when nanmean takes in all nans, this is later taken care of with num_to_nan

* Began work on deprecating infer_datetime_format

* Fixed the infer_datetime_format deprecation since it will eventually be ignored
  • Loading branch information
kennethshsu authored May 13, 2024
1 parent cb9747b commit efe6f23
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 23 deletions.
7 changes: 6 additions & 1 deletion chainladder/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,25 +231,30 @@ def _to_datetime(data, fields, period_end=False, format=None):
target_field = data[fields].astype(str).apply(lambda x: "-".join(x), axis=1)
else:
target_field = data[fields].iloc[:, 0]

if hasattr(target_field, "dt"):
target = target_field
if type(target.iloc[0]) == pd.Period:
return target.dt.to_timestamp(how={1: "e", 0: "s"}[period_end])
else:
datetime_arg = target_field.unique()
format = [{"arg": datetime_arg, "format": format}] if format else []

date_inference_list = format + [
{"arg": datetime_arg, "format": "%Y%m"},
{"arg": datetime_arg, "format": "%Y"},
{"arg": datetime_arg, "infer_datetime_format": True},
{"arg": datetime_arg, "format": "%Y-%m-%d"},
{"arg": datetime_arg},
]
for item in date_inference_list:
try:
arr = dict(zip(datetime_arg, pd.to_datetime(**item)))
break
except:
pass

target = target_field.map(arr)

return target

@staticmethod
Expand Down
51 changes: 29 additions & 22 deletions chainladder/utils/weighted_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@
import numpy as np
from chainladder.utils.sparse import sp
from sklearn.base import BaseEstimator
import warnings


class WeightedRegression(BaseEstimator):
""" Helper class that fits a system of regression equations
as a closed-form solution. This greatly speeds up
the implementation of the Mack stochastic properties.
"""Helper class that fits a system of regression equations
as a closed-form solution. This greatly speeds up
the implementation of the Mack stochastic properties.
"""

def __init__(self, axis=None, thru_orig=False, xp=None):
Expand Down Expand Up @@ -38,10 +39,10 @@ def fit(self, X, y=None, sample_weight=None):
return self

def _fit_OLS(self):
""" Given a set of w, x, y, and an axis, this Function
returns OLS slope and intercept.
TODO:
Make this work with n_periods = 1 without numpy warning.
"""Given a set of w, x, y, and an axis, this Function
returns OLS slope and intercept.
TODO:
Make this work with n_periods = 1 without numpy warning.
"""
from chainladder.utils.utility_functions import num_to_nan

Expand All @@ -54,14 +55,21 @@ def _fit_OLS(self):
w2 = w.copy()
w2 = sp(data=w2.data, coords=w2.coords, fill_value=sp.nan, shape=w2.shape)
x, y = x * w2, y * w2
slope = num_to_nan(
xp.nansum(w * x * y, axis) - xp.nansum(x * w, axis) * xp.nanmean(y, axis)
) / num_to_nan(
xp.nansum(w * x * x, axis) - xp.nanmean(x, axis) * xp.nansum(w * x, axis)
)
intercept = xp.nanmean(y, axis) - slope * xp.nanmean(x, axis)

with warnings.catch_warnings():
warnings.simplefilter("ignore", category=RuntimeWarning)
slope = num_to_nan(
xp.nansum(w * x * y, axis)
- xp.nansum(x * w, axis) * xp.nanmean(y, axis)
) / num_to_nan(
xp.nansum(w * x * x, axis)
- xp.nanmean(x, axis) * xp.nansum(w * x, axis)
)
intercept = xp.nanmean(y, axis) - slope * xp.nanmean(x, axis)

self.slope_ = slope[..., None]
self.intercept_ = intercept[..., None]

return self

def _fit_OLS_thru_orig(self):
Expand All @@ -74,7 +82,7 @@ def _fit_OLS_thru_orig(self):
fitted_value = xp.repeat(xp.expand_dims(coef, axis), x.shape[axis], axis)
fitted_value = fitted_value * x * (y * 0 + 1)
residual = (y - fitted_value) * xp.sqrt(w)
wss_residual = xp.nansum(residual ** 2, axis)
wss_residual = xp.nansum(residual**2, axis)
mse_denom = xp.nansum((y * 0 + 1) * (xp.nan_to_num(w) != 0), axis) - 1
mse_denom = num_to_nan(mse_denom)
mse = wss_residual / mse_denom
Expand All @@ -88,8 +96,8 @@ def _fit_OLS_thru_orig(self):
return self

def sigma_fill(self, interpolation):
""" This Function is designed to take an array of sigmas and does log-
linear extrapolation where n_obs=1 and sigma cannot be calculated.
"""This Function is designed to take an array of sigmas and does log-
linear extrapolation where n_obs=1 and sigma cannot be calculated.
"""
if interpolation == "log-linear":
self.sigma_ = self.loglinear_interpolation(self.sigma_)
Expand All @@ -102,13 +110,12 @@ def std_err_fill(self):
return self

def loglinear_interpolation(self, y):
""" Use Cases: generally for filling in last element of sigma_
"""
"""Use Cases: generally for filling in last element of sigma_"""
from chainladder.utils.utility_functions import num_to_nan

xp = self.xp
ly = y.copy()
ly = xp.log(xp.where(ly==0, 1e-320, ly))
ly = xp.log(xp.where(ly == 0, 1e-320, ly))
w = xp.nan_to_num(ly * 0 + 1)
reg = WeightedRegression(self.axis, False, xp=xp).fit(None, ly, w)
slope, intercept = reg.slope_, reg.intercept_
Expand All @@ -117,9 +124,9 @@ def loglinear_interpolation(self, y):
return num_to_nan(out)

def mack_interpolation(self, y):
""" Use Mack's approximation to fill last element of sigma_ which is the
same as loglinear extrapolation using the preceding two element to
the missing value. This function needs a recursive definition...
"""Use Mack's approximation to fill last element of sigma_ which is the
same as loglinear extrapolation using the preceding two element to
the missing value. This function needs a recursive definition...
"""
from chainladder.utils.utility_functions import num_to_nan

Expand Down

0 comments on commit efe6f23

Please sign in to comment.