From ed6641abdcdf9407fa7e6c57521c6e8f930d73ef Mon Sep 17 00:00:00 2001 From: John Bogaardt Date: Sat, 29 Feb 2020 19:19:31 -0700 Subject: [PATCH] added stochastic tutorial --- chainladder/__init__.py | 2 +- docs/tutorials/stochastic-tutorial.ipynb | 1854 ++++++++++++++++++++++ setup.py | 2 +- 3 files changed, 1856 insertions(+), 2 deletions(-) create mode 100644 docs/tutorials/stochastic-tutorial.ipynb diff --git a/chainladder/__init__.py b/chainladder/__init__.py index b033cd26..fb1f87c9 100644 --- a/chainladder/__init__.py +++ b/chainladder/__init__.py @@ -11,4 +11,4 @@ def array_backend(array_backend='numpy'): from chainladder.methods import * # noqa (API Import) from chainladder.workflow import * # noqa (API Import) -__version__ = '0.5.3' +__version__ = '0.5.4' diff --git a/docs/tutorials/stochastic-tutorial.ipynb b/docs/tutorials/stochastic-tutorial.ipynb new file mode 100644 index 00000000..ce3a52ba --- /dev/null +++ b/docs/tutorials/stochastic-tutorial.ipynb @@ -0,0 +1,1854 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Stochastic methods\n", + "### Getting started\n", + "All exercises rely on chainladder v0.5.4 and later. Also, this notebook uses `statsmodels` for comparison purposes." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'0.5.4'" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import chainladder as cl\n", + "import seaborn as sns\n", + "sns.set_style('whitegrid')\n", + "%matplotlib inline\n", + "cl.__version__" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### MackChainladder Intro\n", + "\n", + "Like the basic `Chainladder` method, the `MackChainladder` is entirely specified by its development pattern selections. In fact, it is the basic `Chainladder` with a few extra features. Let's explore this a bit more with the Workers' Compensation industry triangle." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tri = cl.load_dataset('clrd').groupby('LOB').sum().loc['wkcomp', ['CumPaidLoss', 'EarnedPremNet']]\n", + "cl.Chainladder().fit(tri['CumPaidLoss']).ultimate_ == \\\n", + "cl.MackChainladder().fit(tri['CumPaidLoss']).ultimate_" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's create a Mack Model." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "mack = cl.MackChainladder().fit(tri['CumPaidLoss'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "MackChainladder has the following additional fitted features that the deterministic `Chainladder` does not.\n", + "\n", + "1. `full_std_err_`: The full standard error\n", + "2. `total_process_risk_`: The total process error\n", + "3. `total_parameter_risk_`: The total parameter error\n", + "4. `mack_std_err_`: The total prediction error by origin period\n", + "5. `total_mack_std_err_`: The total prediction error across all origin periods\n", + "\n", + "Notice these are all measures of uncertainty, but where do they come from? Let's start by examining the `link_ratios` underlying the triangle" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Origin1224
1988285,804638,532
1989307,720684,140
1990320,124757,479
1991347,417793,749
1992342,982781,402
1993342,385743,433
1994351,060750,392
1995343,841768,575
1996381,484736,040
" + ], + "text/plain": [ + " 12 24\n", + "1988 285804.0 638532.0\n", + "1989 307720.0 684140.0\n", + "1990 320124.0 757479.0\n", + "1991 347417.0 793749.0\n", + "1992 342982.0 781402.0\n", + "1993 342385.0 743433.0\n", + "1994 351060.0 750392.0\n", + "1995 343841.0 768575.0\n", + "1996 381484.0 736040.0" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tri_first_lags = tri[tri.development<=24][tri.origin<'1997']['CumPaidLoss']\n", + "tri_first_lags" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A simple average link-ratio can be directly computed as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2.2066789527531494" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tri_first_lags.link_ratio.to_frame().mean().values[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Verifying that this ties to our `Development` object:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2.2066789527531494" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cl.Development(average='simple').fit(tri['CumPaidLoss']).ldf_.to_frame().values[0, 0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### A linear regression framework\n", + "\n", + "Mack noticed that this estimate for an LDF is really just a linear regression fit. For the case of the `simple` average, it is a weighted regression where the weight is set to $\\left (\\frac{1}{X} \\right )^{2}$.\n", + "\n", + "With the regression framework in hand, we get much more information about our LDF estimate than just the coefficient." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\jbogaard\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\scipy\\stats\\stats.py:1450: UserWarning: kurtosistest only valid for n>=20 ... continuing anyway, n=9\n", + " \"anyway, n=%i\" % int(n))\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
WLS Regression Results
Dep. Variable: y R-squared (uncentered): 0.997
Model: WLS Adj. R-squared (uncentered): 0.997
Method: Least Squares F-statistic: 2887.
Date: Sat, 29 Feb 2020 Prob (F-statistic): 1.60e-11
Time: 19:18:37 Log-Likelihood: -107.89
No. Observations: 9 AIC: 217.8
Df Residuals: 8 BIC: 218.0
Df Model: 1
Covariance Type: nonrobust
\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
coef std err t P>|t| [0.025 0.975]
x1 2.2067 0.041 53.735 0.000 2.112 2.301
\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
Omnibus: 7.448 Durbin-Watson: 1.177
Prob(Omnibus): 0.024 Jarque-Bera (JB): 2.533
Skew: -1.187 Prob(JB): 0.282
Kurtosis: 4.058 Cond. No. 1.00


Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified." + ], + "text/plain": [ + "\n", + "\"\"\"\n", + " WLS Regression Results \n", + "=======================================================================================\n", + "Dep. Variable: y R-squared (uncentered): 0.997\n", + "Model: WLS Adj. R-squared (uncentered): 0.997\n", + "Method: Least Squares F-statistic: 2887.\n", + "Date: Sat, 29 Feb 2020 Prob (F-statistic): 1.60e-11\n", + "Time: 19:18:37 Log-Likelihood: -107.89\n", + "No. Observations: 9 AIC: 217.8\n", + "Df Residuals: 8 BIC: 218.0\n", + "Df Model: 1 \n", + "Covariance Type: nonrobust \n", + "==============================================================================\n", + " coef std err t P>|t| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "x1 2.2067 0.041 53.735 0.000 2.112 2.301\n", + "==============================================================================\n", + "Omnibus: 7.448 Durbin-Watson: 1.177\n", + "Prob(Omnibus): 0.024 Jarque-Bera (JB): 2.533\n", + "Skew: -1.187 Prob(JB): 0.282\n", + "Kurtosis: 4.058 Cond. No. 1.00\n", + "==============================================================================\n", + "\n", + "Warnings:\n", + "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", + "\"\"\"" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import statsmodels.api as sm\n", + "import numpy as np\n", + "y = tri_first_lags.to_frame().values[:, 1]\n", + "X = tri_first_lags.to_frame().values[:, 0]\n", + "\n", + "model = sm.WLS(y, X, weights=(1/X)**2)\n", + "results = model.fit()\n", + "results.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "By toggling the weights of our regression, we can handle the most common types of averaging used in picking loss development factors." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Does this work for simple?\n", + "True\n", + "Does this work for volume-weighted average?\n", + "True\n", + "Does this work for regression average?\n", + "True\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\users\\jbogaard\\documents\\bitbucket\\chainladder-python\\chainladder\\utils\\weighted_regression.py:49: RuntimeWarning: invalid value encountered in true_divide\n", + " (xp.nansum(w*x*x, axis)-xp.nanmean(x, axis)*xp.nansum(w*x, axis)))\n" + ] + } + ], + "source": [ + "print('Does this work for simple?')\n", + "print(round(cl.Development(average='simple').fit(tri_first_lags).ldf_.to_frame().values[0, 0], 8) == \\\n", + " round(sm.WLS(y, X, weights=(1/X)**2).fit().params[0],8))\n", + "print('Does this work for volume-weighted average?')\n", + "print(round(cl.Development(average='volume').fit(tri_first_lags).ldf_.to_frame().values[0, 0], 8) == \\\n", + " round(sm.WLS(y, X, weights=(1/X)).fit().params[0],8))\n", + "print('Does this work for regression average?')\n", + "print(round(cl.Development(average='regression').fit(tri_first_lags).ldf_.to_frame().values[0, 0], 8) == \\\n", + " round(sm.OLS(y, X).fit().params[0],8))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This regression framework is what the `Development` estimator uses to set development patterns. Although we discard the information in deterministic approaches, `Development` has two useful statistics for estmating reserve variability, both of which come from the regression framework. The stastics are `std_err_` and `sigma_` and they are used by the `MackChainladder` estimator to determine the prediction error of our reserves." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "dev = cl.Development(average='simple').fit(tri['CumPaidLoss'])" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Origin12-2424-3636-4848-6060-7272-8484-9696-108108-120
(All)0.04110.01200.00510.00370.00330.00330.00420.00680.0032
" + ], + "text/plain": [ + " 12-24 24-36 36-48 48-60 60-72 72-84 84-96 96-108 108-120\n", + "(All) 0.041066 0.012024 0.005101 0.003734 0.003303 0.003337 0.00419 0.006831 0.003222" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dev.std_err_" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Origin12-2424-3636-4848-6060-7272-8484-9696-108108-120
(All)0.12320.03400.01350.00910.00740.00670.00730.00970.0032
" + ], + "text/plain": [ + " 12-24 24-36 36-48 48-60 60-72 72-84 84-96 96-108 108-120\n", + "(All) 0.123197 0.034009 0.013495 0.009146 0.007386 0.006673 0.007257 0.00966 0.003222" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dev.sigma_" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Since the regression framework is weighted, we can easily turn on/off any observation we want using the dropping capabilities of the `Development` estimator. Dropping link ratios not only affects the `ldf_` and `cdf_`, but also the `std_err_` and `sigma` of the regression.\n", + "\n", + "Here we eliminate the 1988 valuation from our triangle, which is identical to eliminating the first observation from our 12-24 regression fit." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Does this work for dropping observations?\n", + "True\n" + ] + } + ], + "source": [ + "print('Does this work for dropping observations?')\n", + "print(round(cl.Development(average='volume', drop_valuation='1988') \\\n", + " .fit(tri['CumPaidLoss']).std_err_.to_frame().values[0, 0], 8) == \\\n", + " round(sm.WLS(y[1:], X[1:], weights=(1/X[1:])).fit().bse[0],8))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With `sigma_` and `std_err_` in hand, Mack goes on to develop recursive formulas to estimate `parameter_risk_` and `process_risk_`." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Origin12243648607284961081209999
198800000000000
19890000000005,2515,251
1990000000009,52011,18311,183
199100000005,98411,62913,16113,161
19920000004,5887,46812,25213,64813,648
1993000004,0375,9818,18712,25913,50213,502
199400004,1635,9807,5559,50313,30214,50614,506
19950004,9216,7368,1379,44611,11814,50215,62015,620
1996008,82411,28912,89514,10115,19016,51319,14120,09020,090
1997014,49921,07524,74927,09328,65729,90731,16433,10333,89733,897
" + ], + "text/plain": [ + " 12 24 36 48 60 72 84 96 108 120 9999\n", + "1988 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000\n", + "1989 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 5251.180091 5251.180091\n", + "1990 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 9519.597764 11182.642025 11182.642025\n", + "1991 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 5983.826435 11629.056593 13161.494851 13161.494851\n", + "1992 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 4587.550645 7467.757613 12251.618351 13648.337451 13648.337451\n", + "1993 0.0 0.000000 0.000000 0.000000 0.000000 4036.584426 5980.685134 8186.842405 12258.771773 13502.229704 13502.229704\n", + "1994 0.0 0.000000 0.000000 0.000000 4162.542848 5980.464847 7554.693620 9503.083404 13302.246384 14505.513836 14505.513836\n", + "1995 0.0 0.000000 0.000000 4920.825234 6735.780467 8137.402853 9445.571257 11118.249482 14501.741876 15619.795959 15619.795959\n", + "1996 0.0 0.000000 8823.893815 11288.653535 12894.776869 14100.808340 15189.795391 16513.301328 19140.782034 20089.868162 20089.868162\n", + "1997 0.0 14499.310582 21075.422823 24748.584403 27093.408297 28657.082880 29907.337622 31164.059421 33102.891878 33896.767821 33896.767821" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mack.parameter_risk_" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Assumption of Independence\n", + "The Mack model makes a lot of assumptions about independence (i.e. covariance between random processes is 0). This means many of the Variance estimates in the `MackChainladder` model follow the form of $Var(A+B) = Var(A)+Var(B)$.\n", + "\n", + "Notice the sqaure of `mack_std_err_` is simply the sum of the sqaures of `parameter_risk_` and `process_risk_`." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Parameter risk and process risk are independent?\n", + "True\n" + ] + } + ], + "source": [ + "print('Parameter risk and process risk are independent?')\n", + "print(round(mack.mack_std_err_**2, 4) == round(mack.parameter_risk_**2 + mack.process_risk_**2, 4))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This independence assumption applies to variance of each origin period." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total Parameter and process risk across origin periods is independent?\n", + "True\n" + ] + } + ], + "source": [ + "print('Total Parameter and process risk across origin periods is independent?')\n", + "print(round(mack.total_process_risk_**2, 4) == round((mack.process_risk_**2).sum('origin'), 4))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Independence is also assumed to apply to the overall standard error of reserves, `total_mack_std_err_`." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(mack.total_process_risk_**2 + mack.total_parameter_risk_**2).to_frame().values[0, -1] == \\\n", + "(mack.total_mack_std_err_**2).values[0,0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This over-reliance on independence is one of the weaknesses of the `MackChainladder` method. Nevertheless, if the data align with this assumption, then `total_mack_std_err_` is a reasonable esimator of reserve variability.\n", + "\n", + "### Mack Reserve Variability\n", + "The `mack_std_err_` at ultimate is the reserve variability at for each `origin`" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Origin9999
19880
19897,313
199017,838
199121,814
199223,847
199325,283
199428,465
199533,172
199650,244
199799,027
" + ], + "text/plain": [ + " 9999\n", + "1988 0.000000\n", + "1989 7312.634869\n", + "1990 17838.223062\n", + "1991 21813.683826\n", + "1992 23847.273221\n", + "1993 25282.602592\n", + "1994 28465.249566\n", + "1995 33171.832916\n", + "1996 50243.750958\n", + "1997 99026.911753" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mack.mack_std_err_[mack.mack_std_err_.development==mack.mack_std_err_.development.max()]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "These are probably easier to see in the `summary_` of the `MackChainladder` model." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
OriginLatestIBNRUltimateMack Std Err
19881,241,71501,241,7150
19891,308,70613,3211,322,0277,313
19901,394,67542,2101,436,88517,838
19911,414,74779,4091,494,15621,814
19921,328,801119,7091,448,51023,847
19931,187,581167,1921,354,77325,283
19941,114,842260,4011,375,24328,465
1995962,081402,4031,364,48433,172
1996736,040636,8341,372,87450,244
1997340,1321,056,3351,396,46799,027
" + ], + "text/plain": [ + " Latest IBNR Ultimate Mack Std Err\n", + "1988 1241715.0 0.000000e+00 1.241715e+06 0.000000\n", + "1989 1308706.0 1.332126e+04 1.322027e+06 7312.634869\n", + "1990 1394675.0 4.221037e+04 1.436885e+06 17838.223062\n", + "1991 1414747.0 7.940888e+04 1.494156e+06 21813.683826\n", + "1992 1328801.0 1.197087e+05 1.448510e+06 23847.273221\n", + "1993 1187581.0 1.671916e+05 1.354773e+06 25282.602592\n", + "1994 1114842.0 2.604007e+05 1.375243e+06 28465.249566\n", + "1995 962081.0 4.024025e+05 1.364484e+06 33171.832916\n", + "1996 736040.0 6.368335e+05 1.372874e+06 50243.750958\n", + "1997 340132.0 1.056335e+06 1.396467e+06 99026.911753" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mack.summary_" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plot_data = mack.summary_.to_frame()\n", + "g = plot_data[['Latest', 'IBNR']] \\\n", + " .plot(kind='bar', stacked=True,\n", + " yerr=pd.DataFrame({'latest': plot_data['Mack Std Err']*0,\n", + " 'IBNR': plot_data['Mack Std Err']}),\n", + " ylim=(0, None), title='Mack Chainladder Ultimate')\n", + "g.set_xlabel('Accident Year')\n", + "g.set_ylabel('Loss');" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "dist = pd.Series(np.random.normal(mack.ibnr_.sum(),\n", + " mack.total_mack_std_err_.values[0, 0], size=10000))\n", + "dist.plot(\n", + " kind='hist', bins=50,\n", + " title=f\"\"\"Normally distributed IBNR estimate with a mean of {'{:,}'.format(round(mack.ibnr_.sum(),0))[:-2]}\"\"\");" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ODP Bootstrap Model\n", + "\n", + "The `MackChainladder` focused on a regression framework for determining the variability of reserve estimates. An alternative approach is to use statistical bootstrapping or sampling from a triangle with replacement to simulate new triangles.\n", + "\n", + "Bootstrapping imposes less model constraints than the `MackChainladder` which allows for greater applicability in different scenarios. Sampling new triangles can be accomplished through the `BootstrapODPSample` estimator. This estimator will take a single triangle and simulate new ones from it.\n", + "\n", + "Notice how easy it is to simulate 10,000 new triangles from an existing triangle by accessing the `resampled_triangles_` attribute." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "samples = cl.BootstrapODPSample(n_sims=10000).fit(tri['CumPaidLoss']).resampled_triangles_" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatively, we could use `BootstrapODPSample` to transform our triangle into a resampled set." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "samples = cl.BootstrapODPSample(n_sims=10000).fit_transform(tri['CumPaidLoss'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The notion of the ODP Bootstrap is that as our simulations approach infinity, we should expect our mean simulation to converge on the basic `Chainladder` estimate of of reserves.\n", + "\n", + "Let's apply the basic chainladder to our original triangle and also to our simulated triangles to see whether this holds true." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Percentage difference in estimate using original triangle and BootstrapODPSample is -0.0\n" + ] + } + ], + "source": [ + "difference = round(1 - cl.Chainladder().fit(samples).ibnr_.sum('origin').mean() / \\\n", + " cl.Chainladder().fit(tri['CumPaidLoss']).ibnr_.sum())\n", + "print(f\"Percentage difference in estimate using original triangle and BootstrapODPSample is {difference}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using deterministic methods with Bootstrap samples\n", + "Our `samples` is just another triangle object with all the functionality of a regular triangle. This means we can apply any functionality we want to our `samples` including any deterministic methods we learned about previously." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Triangle Summary
Valuation:1997-12
Grain:OYDY
Shape:(10000, 1, 10, 10)
Index:[LOB]
Columns:[CumPaidLoss]
" + ], + "text/plain": [ + "Valuation: 1997-12\n", + "Grain: OYDY\n", + "Shape: (10000, 1, 10, 10)\n", + "Index: ['LOB']\n", + "Columns: ['CumPaidLoss']" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "samples" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Pipeline(memory=None,\n", + " steps=[('dev',\n", + " Development(average='simple', drop=None, drop_high=None,\n", + " drop_low=None, drop_valuation=None, n_periods=-1,\n", + " sigma_interpolation='log-linear')),\n", + " ('tail', TailConstant(decay=0.5, tail=1.05))],\n", + " verbose=False)" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe = cl.Pipeline([\n", + " ('dev', cl.Development(average='simple')),\n", + " ('tail', cl.TailConstant(1.05))])\n", + "pipe.fit(samples)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now instead of a single `cdf_` vector, we have 10,000." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Triangle Summary
Valuation:1997-12
Grain:OYDY
Shape:(10000, 1, 10, 9)
Index:[LOB]
Columns:[CumPaidLoss]
" + ], + "text/plain": [ + "Valuation: 1997-12\n", + "Grain: OYDY\n", + "Shape: (10000, 1, 10, 9)\n", + "Index: ['LOB']\n", + "Columns: ['CumPaidLoss']" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe.named_steps.dev.cdf_" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This allows us to look at the varibility of any fitted property used in our prior tutorials." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "12-24 LDF of original Triangle: 2.2067\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "orig_dev = cl.Development(average='simple').fit(tri['CumPaidLoss'])\n", + "resampled_ldf = pipe.named_steps.dev.ldf_\n", + "print(f\"12-24 LDF of original Triangle: {round(orig_dev.ldf_.values[0,0,0,0],4)}\")\n", + "pd.Series(resampled_ldf.values[:, 0, 0, 0]).plot(\n", + " kind='hist', bins=100,\n", + " title='Age 12-14 LDF distribution using Bootstrap');" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Comparison between Bootstrap and Mack\n", + "We should even be able to approximate some of the Mack parameters calculated using the regression framework." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "mack_vs_bs = resampled_ldf[resampled_ldf.origin == resampled_ldf.origin.max()].std('index').to_frame().append(\n", + " orig_dev.std_err_.to_frame()).T\n", + "mack_vs_bs.columns = ['Mack', 'Bootstrap']\n", + "mack_vs_bs.plot(kind='bar', title='Mack Regression Framework LDF Std Err vs Bootstrap Simulated LDF Std Err');" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "While the `MackChainladder` produces statistics about the mean and variance of reserve estimates, those have to be fit to a distribution using MLE, MoM, etc to see the range of outcomes of reserves. With `BootstrapODPSample` based fits, we can use the empirical distribution directly if we choose to." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "99%-ile of reserve estimate is 3,151,387.0\n" + ] + } + ], + "source": [ + "ibnr = cl.Chainladder().fit(samples).ibnr_.sum('origin')\n", + "ibnr_99 = ibnr.quantile(0.99).to_frame().values[0,0]\n", + "print(f\"99%-ile of reserve estimate is {'{:0,}'.format(round(ibnr_99,0))}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's see how the `MackChainladder` reserve distribution compares to the `BootstrapODPSample` reserve distribution." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "ax = ibnr.plot(kind='hist', bins=50, alpha=0.9, color='yellow').plot()\n", + "dist.plot(kind='hist', bins=50, alpha=0.3, color='blue', title='Mack vs Bootstrap Variability');" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Expected loss methods with Bootstrap\n", + "\n", + "So far, we've only applied the multiplicative methods (i.e. basic chainladder) in a stochastic context. It is possible to use an expected loss method like the `BornhuetterFerguson`. \n", + "\n", + "To do this, we will need an exposure vector." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
OriginEarnedPremNet
19881,691,130
19891,797,930
19901,880,315
19912,064,835
19922,189,448
19932,482,657
19942,594,787
19952,616,831
19962,420,655
19972,207,902
" + ], + "text/plain": [ + " EarnedPremNet\n", + "1988 2e+06\n", + "1989 2e+06\n", + "1990 2e+06\n", + "1991 2e+06\n", + "1992 2e+06\n", + "1993 2e+06\n", + "1994 3e+06\n", + "1995 3e+06\n", + "1996 2e+06\n", + "1997 2e+06" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tri['EarnedPremNet'].latest_diagonal" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We don't necessarily need to simulate new premiums, because they aren't stochastic, but we will need to align it with our simulated triangles. We do this by repeating our premium vector for each simluated triangle using the `broadcast_axis` method of the `Triangle` class. This method projects the axis of our samples onto our premium vector, repeating premium values until our shapes are aligned." + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Triangle Summary
Valuation:1997-12
Grain:OYDY
Shape:(10000, 1, 10, 1)
Index:[LOB]
Columns:[EarnedPremNet]
" + ], + "text/plain": [ + "Valuation: 1997-12\n", + "Grain: OYDY\n", + "Shape: (10000, 1, 10, 1)\n", + "Index: ['LOB']\n", + "Columns: ['EarnedPremNet']" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "prem_samples = tri['EarnedPremNet'].latest_diagonal.broadcast_axis('index', samples.index)\n", + "prem_samples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `BornhuetterFerguson` method takes an apriori assumption which itself can be considered to be drawn from a Random process." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEFCAYAAAAMk/uQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAGTZJREFUeJzt3X2UZHV95/H3zDjTLQqzGnxaV2Hx4UujDiwDoiIyi6hBJSSoiYsYAYNoMGhijCCISYSDGsSIGJEBHTkbjzGgRlEMxg3Ik4ItGEY7Xxdd3ZxkTQDlSeh2pqf3j3v7UjRd1beHvnWret6vc+ZM1X2qT1VX3W/9fr97b62YmZlBkiSAlW0HkCQNDouCJKliUZAkVSwKkqSKRUGSVLEoSJIqj2g7gJa/iNgd+BFwSzlpJXAv8JeZ+blymT8Hbs3Mi3ts53Tge5n5d/PMq9aPiBngcZl5+yIy7g+8MTPfHBH7ASdn5qvrrr89ImIV8HlgDDg3M8+bZ5nDgS8Br83Mv9nOx7kZ2JCZdy5inQuBz2bmP2zPY2p4WRTUL/dn5j6zdyJiN+AbETGdmZdm5uk1tnEI8IP5ZtRcv5dnAf+l3NZ3gEYLQunJwMuAR2XmdJdlfh/4a+APge0qCp2v+yLW+b3teSwNvxWevKamlS2FzZn56DnTjwJOysznRcSmcpmzI+LPgN8CfgXcARwDHAl8ALgN+CPgCOCxwNOAy4AndKw/A1wA7E/RKjktMy+LiGOAV2fmK8vHP4Zi5/8W4FpgLcU3908D52XmsyNiLfAxYB9gBrgceHdmbo2ISeD9wEuBJwEfzMyPz/P8DwL+AtipfE6nlY/3bSAoWlCvyswfzVlvD+D7wG7ABPCKzPxWOW8TcH+Z6/HAFeVruSUipoC/A/YGXgfcSNlyioj3AP8D2Ar8EHhrZv4sIq4Efg7sCXwceBVwHvBF4KPAgcAW4MfAsZl579znqeXBMQW16XvAczonRMRTgLcD+2fmfhQ7uwMy82PAd4B3ZuYXysV3ysxnZea75tn2jzNzX+Bo4NMR8bhuITLzX4DTgasz89g5s8+lKEzPAfaj2NH+cTlvBLg9M19AUVw+HBGjc57PrwGXAG/LzHXAG4D/CewKvJyyBTW3IJTeDHwlM/8D+CxFa6HTAcBLgL3KfyeU09cAX87MKFs9s1mOBQ6jeG3XAZuBTR3b+0Vm7pWZH+2Y9nxgA7B3Zq6nKArr5smqZcKioDbNAPfNmfavFMXiuxFxNnBzZn6xy/rX9Nj2+QCZuZmiy+n525nxMIpWw0xmTpXbPaxj/uz4xncpisSj5qx/AMVYx7fLPN+naCVs6PWgETECHEvRaqH8/8iyaM7alJn3lrkupuiKmnV1l+fyqcz8ZXn/I8CLI2JNj3VuAaaBb0fE+4BLM/O6Xtk13CwKatP+PDD4DEBmbgMOpugyuoPi2/cHu6zfqwujs49+JUXXxwywomP6Gha2slyv8/7qjvv3l7lnl+ncPsCqOevPt435/DbwGOC8iPgJ8LlyO3/QsczWOdvsfM7zvTZzs6ykGFeczfyQdcrB6dnW0TTwNxHx+wtk1xCzKKgVEfFM4D3Ah+ZM35uiW2MiM88CPkxRPKDYCS60M511TLm9fYGnU/Tf3wY8OyJGI2I1Dx5M7rbtvwfeGhErym/vbwK+XjMDwPXAnhHx3DLPs4AXAVcusN5bgDMzc7fM3D0zd6foTjo+ImZbI78TESNll9UbgC8vsM2vAcd1rH8S8M2ypTGviHgl8A3gusz8U4oWyf7dltfw8+gj9csjy0MjAbYBk8ApmfmVzoUy83sR8TngOxFxL8U38ZPK2V8Czuro7uhlj4i4ieKb8Wsz8+cRcQVwFfDPwP8D/pEH+se/Bbw3Ij5PMY4w6ySKgdZbKFoWXwPOrPuky8Hd1wAfjYidyud+bGb+sByAf4iyMO4D/MacWRdTDFIfU96/j6LL5zEU4xafWiDORcBTgBsiYiVwK8VAdC+XU3Q7bS7/Hr8Ajl9gHQ0xjz6ShlDn0VptZ9HyYveRJKliS0GSVLGlIEmqWBQkSZWhO/ro5ptvnhkZGek6f2pqil7zB9EwZobhzD2MmWE4c5u5f+rkvu+++25fv3591zP7Zw1dURgZGWFsbKzr/ImJiZ7zB9EwZobhzD2MmWE4c5u5f+rkHh8f/2mdbdl9JEmqWBQkSRWLgiSpYlGQJFUsCpKkikVBklSxKEiSKhYFSVLFoiBJqlgUpEWa3DK9qOnSMBm6y1xIbRtdvYrdT/7KQ6b/5P2vaCGNtLRsKUiSKhYFSVLFoiB14RiBdkSOKUhdOHagHZEtBUlSxaIgSapYFLTDc+xAeoBjCtrhOXYgPcCWgiSpYlGQJFUsCpKkikVB6gMvoqdh0dhAc0TcBNxV3v0/wJnAJmAG2AycmJnbIuJ44ARgK3BGZl7WVCapLQ5ma1g0UhQiYhQgMzd0TPsScFpmXhkR5wNHRMT1wEnAfsAocE1EfD0zp5rIJUnqramWwt7AThFxRfkY7wbWA1eV8y8HXgpMA9eWRWAqIm4F1gE3dtvw1NQUExMTXR94cnKy5/xBNIyZYThzz5d5bGxsybbf7fXo9Rh1XsPl8loPumHMDEubu6micB9wNnAh8AyKIrAiM2fK+fcAa4FdeKCLqXN6VyMjIwt+wJbyQ94Pw5gZhjN305m3Z9t11vG17o9hzAz1co+Pj9faVlNF4YfArWUR+GFE3EHRUpi1M3AncHd5e+50SVILmjr66DjgQwAR8Z8pWgRXRMSGcv5hwNXADcBBETEaEWuBMYpBaElSC5pqKVwEbIqIayiONjoOuB3YGBFrgAngksycjohzKQrESuDUzJxsKJMkaQGNFIXM/BVw1DyzDp5n2Y3AxiZySMNqcss0o6tXVfdn+4vnTpeWmhfEk1rUbSfveQ1qi0VBapE7fw0aL3MhLREvWaHlwJaCtES6fesHv/lreNhSkCRVLAqSpIpFQTuMyS3TQ3kJA6mfHFPQsuNhntL2syho2XHnL20/u48kSRWLgiSpYlGQJFUsCpKkikVBklSxKEiSKhYFSVLFoiBJqlgUJEkVi4IkqWJRkCRVLArSMtDtV9/8NTgtlhfEk5YBLwKopWJLQZJUsShIkioWBUlSxaIgSapYFCRJFYuCJKliUZAkVSwK0hDxZDQ1rbGT1yLi8cA48BJgK7AJmAE2Aydm5raIOB44oZx/RmZe1lQeaTnwJDU1rZGWQkSsBj4B3F9OOgc4LTMPAlYAR0TEE4GTgAOBlwFnRcRIE3kkSfU01X10NnA+8G/l/fXAVeXty4FDgecC12bmVGbeBdwKrGsojySphiXvPoqIY4DbMvPvI+KUcvKKzJwpb98DrAV2Ae7qWHV2ek9TU1NMTEx0nT85Odlz/iAaxszQfu6n7r4Hj3qkjcuFtPU3avv9sT2GMTMsbe4mxhSOA2Yi4lBgH+Bi4PEd83cG7gTuLm/Pnd7TyMgIY2NjXedPTEz0nD+IhjEzDEZu+9cX1tbfaBDeH4s1jJmhXu7x8fFa21ryopCZL5q9HRFXAm8G/iIiNmTmlcBhwD8CNwBnRsQoMAKMUQxCS5Ja0q9LZ78D2BgRa4AJ4JLMnI6Ic4GrKcY2Ts3MyT7lkSTNo9GikJkbOu4ePM/8jcDGJjNo+E1umWZ09aq2Y0g7BH9kRwPPY/Ol/vGMZklSxaIgSapYFCRJFYuCJKliUZAkVSwKkqSKRUGSVLEoSJIqFgVJUsWiIEmqWBQkSRWLgrSMTW6ZXtR0yQviScuYFxPUYtlSkCRVLAoaGHZpSO2z+0gDw64OqX21WgoR8YSmg0iS2le3pXBpRNwGXAR8NTO3NZhJktSSWi2FzHwh8G6K31m+LiLOjIg9Gk0mSeq7xQw0/xvwY+A+4NnARyLizxtJJUlqRd0xhc8B1wOPAY7OzCMy83Dg5U2Gk9QMT2pTN3XHFDYC12fmvRHxpI7pL2wgk6SGeaSXuqnbffQC4M/K2+dGxMkAmTnZSCpJUivqFoXfyMx3AGTma4DDm4skSWpL3aKwLSLWAETE6kWsJ0kaInXHFM4HNkfELcCewAebiyRJakutopCZF0XEl4A9gB9l5u3NxpIktaFWUYiIfYA3AaPlfTLzuCaDSZL6r2730SbgPOBfmosiSWpb3aLws8y8sO5GI2IVxbkNAUwDxwIrKIrLDLAZODEzt0XE8cAJwFbgjMy8rH58SdJSqlsUflKem3ATxU6dzLyix/KHl8scGBEbgHMoisJpmXllRJwPHBER1wMnAftRdE1dExFfz8yp7Xo2kqSHpW5RGKH41h/l/Rmga1HIzC9GxOw3/t2AfwdeAVxVTrsceClFK+LasghMRcStwDrgxsU8CUnS0qh79NGxEfFM4GnALRQXx1tona0R8Wngt4BXA6/MzJly9j3AWmAX4K6O1WandzU1NcXExETX+ZOTkz3nD6JhzAxLn3tsbGzJtqXtt1R/02F8Xw9jZlja3HWPPnorxc79sRTjAs8A3rrQepn5hoh4F/Bt4JEds3YG7gTuLm/Pnd7VyMhIz53HxMTE0O1chjEzDG9u9bZUf9NhfH8MY2aol3t8fLzWtuqemfxa4FDgzsz8CHBAr4Uj4vURcUp59z5gG/CdcnwB4DDgauAG4KCIGI2ItcAYxSC0JKkFdccUZovHbPfPQgPBnwc+FRHfBFYDbwcmgI3l5TImgEsyczoizqUoECuBU73IniS1p25R+AzwTWC3iPgq8MVeC2fmL4HfnmfWwfMsu5Hi8FVJUsvqDjSfFxHfoPjFtczMf2o2lpazyS3TjK5e1XYMSfOoO9B8esfdsYj4zcz0pzi1XfyBF2lw1e0++vfy/xXAvnjpbElalup2H32i835EXN5MHElSm+p2Hz2z4+6TgKc2E0eS1Ka63UedLYVJ4I8byCJJalnd7qP/3nQQSVL76nYffY/iEhSTlD+0QzHoPJOZezSUTZLUZ3WPIroOeF1m7gUcAVxD8VvNw3eREElSV3XHFPbKzOsBMvOWiHiqv3kgSctP3aJwZ0S8j+ICdi8EftpcJElSW+p2Hx1FcZnrXwd+DLyxsUSSpNbULQqTwC+A24EE/lNjiSRJralbFD5BccLaSymOQrq4sUSSpNbULQpPy8zTgcnM/DIL/GSmBMXVUCUNl7oDzY+IiF2BmYjYmeKX1KSevBqqNHzqFoVTgWsprnv0LeBtjSWSJLWmbvfRUzIzgKcBz87Mf2gwk6SWdOvyW+x0Da+6LYU3AX+dmbc1GUZSu3p1+dkVuGOoWxRGIuImisNRtwFk5lGNpZIktaJnUYiI0zLzDOBdwJOBf+1LKklSKxZqKRwCnJGZV0XE/8rMQ/oRSpLUjoUGmld0uS1JWoYWKgozXW5LkpahhbqP1kfEdRSthL06bs9k5gsaTydJ6quFisK6vqSQJA2EnkUhM/3dBEnagdQ9o1mStAOwKEiSKhYFSVKl7mUuaouI1cAngd2BEeAM4AfAJorDWjcDJ2bmtog4HjgB2EpxktxlS51HklRfEy2Fo4E7MvMg4DDgPOAc4LRy2grgiIh4InAScCDwMuCsiBhpII8kqaYlbykAfwtc0nF/K7AeuKq8fznFz3pOA9dm5hQwFRG3UhwCe2MDmSRJNSx5UcjMewHKX2i7BDgNODszZ8+Ivofi5zx3Ae7qWHV2ek9TU1NMTEx0nT85Odlz/iAaxsywcO6xsbE+plEbJrdMM7p61UOm//L+qaF8Xw9jZlja3E20FIiIpwBfAP4qMz8TER/smL0zcCdwd3l77vSeRkZGeu5sJiYmhm5nNIyZYXhza+n0+v2F0dHRoXt/DOt7uk7u8fHxWtta8jGFiHgCcAXwrsz8ZDn5pojYUN4+DLgauAE4KCJGI2ItMEYxCC1JakkTLYV3A48B3hMR7ymnvQ04NyLWABPAJZk5HRHnUhSIlcCpmTnZQB5JUk1NjCm8jaIIzHXwPMtuBDYudQZJ0vbx5DVJUsWiIEmqWBT0sE1umW47gqQl0sghqdqx9DosUdJwsaUgaclNbpnuety8LcvBZktB0pLr1noEW5CDzpaCJKliUZAkVSwKkqSKRUGSVLEoSJIqFgVJUsWiIEmqWBRUy3wnHA3jj5FI6s2T11SLJyNJOwZbCpKkikVBklSxKEiSKhYFSX3V7SqpXj11MDjQLKmv/P2NwWZLQZJUsShIkioWBT2I/brSjs0xBT2I/b1qy+SWaUZXr6o9Xc2wKEgaCH4hGQx2H0mSKhYFSVLFoiBJqlgUJEkVi8IOykNPJc2nsaOPIuIA4AOZuSEing5sAmaAzcCJmbktIo4HTgC2Amdk5mVN5dGDeaSHpPk00lKIiD8BLgRGy0nnAKdl5kHACuCIiHgicBJwIPAy4KyIGGkijySpnqa6j34EHNlxfz1wVXn7cuBQ4LnAtZk5lZl3AbcC6xrKI0mqoZHuo8y8NCJ275i0IjNnytv3AGuBXYC7OpaZnd7T1NQUExMTXedPTk72nD+I2sjs7ytrmPTr8zGM+w9Y2tz9OqN5W8ftnYE7gbvL23On9zQyMtJzhzYxMTF0O7xhzCz1U78+H8P6WayTe3x8vNa2+nX00U0RsaG8fRhwNXADcFBEjEbEWmCMYhBaktSSfrUU3gFsjIg1wARwSWZOR8S5FAViJXBqZk72KY8kaR6NFYXM/AnwvPL2D4GD51lmI7CxqQySpMXx5DVJUsWiIGmgdTv73rPym+HvKUgaaJ5931+2FCRJFYuCJKliUVjm7HeVtBiOKSxz9sdKWgxbCsuELQJJS8GWwjJhi0DSUrClIEmqWBQkDSVPamuG3UeShpJdps2wpSBJqlgUhoxNY0lNsvtoyNhkltQkWwqSlhUHoB8eWwqSlhVb0w+PLQVJUsWiMKBs6kpLy26leuw+GlA2gaWl5WeqHlsKkqSKRUGSVLEoSJIqFoWWTW6ZZmxsrO0Y0g6rc6C587O4ow5AO9DcMge/pHb5GXwwWwqSpIpFoU921KaopOFi91Gf2ESVlofJLdOMrl5Ve/qwsSj00OuPvNzfGNKOrttnebl/wbMo9NDtjw/d3wDL/Q0j7SgW+1leLl8UWy8KEbES+Ctgb2AK+L3MvLXdVJK0OMvlC+EgDDT/JjCamc8HTgY+1NQDLeUFsRw4lvRw9NqHtLl/ab2lALwQ+BpAZn4rIvZr6oF6VfLFVvjl8q1AUrMWOzYB8M/v+/VFbWsprZiZmWn0ARYSERcCl2bm5eX9/wvskZlb51t+fHz8NuCnfYwoScvBbuvXr3/cQgsNQkvhbmDnjvsruxUEgDpPSpK0fQZhTOFa4OUAEfE84JZ240jSjmsQWgpfAF4SEdcBK4BjW84jSTus1scUJEmDYxC6jyRJA8KiIEmqWBQkSZVBGGhetIUujRERr6I4O3oGuCAzL2wl6Bx1L+kRERcAP8/Mk/sc8SFqvNZ/BLwRuK2cdEJmZt+DzlEj9/7AORQHN/wMODozJ9vI2pGpa+aIeCLw2Y7F9wFOzszz+x60Q43X+XXAO4Bp4JOZ+fFWgs5RI/frgXcCdwGbMvOiVoLOIyIOAD6QmRvmTD8cOB3YSvFab9ye7Q9rS6HrpTEiYhXwfuBQ4PnAOyNi11ZSPtSCl/SIiBOA5/Q7WA8LZd4X+N3M3FD+a70glHq9R1YAG4FjM3P2jPrdWkn5YF0zZ+bPZl9j4BTguxTPoW0LvT/OpvgsHgi8IyIe0+d83fR6f+wKnAFsAA4GXhcRu7eQ8SEi4k+AC4HROdNXAx8GXkqR+U3lF4lFG9ai8KBLYwDVpTEycxoYy8y7gF+j+CZ4bxsh59E1N0BEPB94HvCJ/kfrqmdmYD1wSkRcExGn9DtcD71yPxO4A3h7RFwFPHZAitlCr/VsQfso8Jbyvd62hTL/E7CWYie2gqL1Pgh65d4DuDkzf56Z24AbKT6Xg+BHwJHzTB8Dbs3MX2Tmr4BrgIO25wGGtSjsQtGsmzUdEVVXWGZujYgjge8B3wS29DlfN11zR8STgD8FTmwhVy89X2uKLo03A4cAL4yIV/YzXA+9cu8KvICi++BQ4MUR8eI+55vPQq81wOHA9wekiMHCmTcD48D3gcsy885+huuhV+7/DTwrIp4QETsBLwYe1e+A88nMS5l/fzb3+dxDUYwXbViLwoKXxsjMzwNPBtYAv9vHbL30yv0aip3VVymas0dFxDH9jTevrpnLb61/mZm3l99OvgL8txYyzqfXa30HxbeqH2TmFopvjOv7HXAedS75cjRwQf8iLajX+2Md8ArgvwK7A4+PiNf0PeH8uubOzF8AfwhcCnySoqvu9r4nXJy5z2dnYLsK8LAWha6XxoiIXSLiqogYKZt+vwS2tRPzIbrmzsxzM3N92Wf8fuAzmbmpjZBz9LoMyS7A5oh4dFkgDqH4VjgIeuX+MfDoiHh6ef8gim+ybatzyZf1wHX9DLWAXpnvAu4H7i+7uv4DGJQxhV77kEdQdBe9iOIL5Z7l8oNsAnhGRDw2ItZQZL9+ezY0lGc0dxw5sI4HLo2xL/DozLwgIt5EcUTMFoo+zT8YhP7XhXJ3LHcMsOeAHX3U7bV+PXASxREc38jM97YWtkON3IdQFN8VwHWZ+bbWwpZqZH4c8PXM3KfFmA9SI/ObgeOAX1H0hx9ftipbVSP3eykGoyeBD2XmJa2FnaMc9P5sZj4vIo7igcyzRx+tpDj66GPbs/2hLAqSpGYMa/eRJKkBFgVJUsWiIEmqWBQkSRWLgiSpYlGQJFUsCpKkyv8HzHV/6S0rr2YAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Simulate aprioris using numpy\n", + "apriori_mu = 0.65\n", + "apriori_sigma = .10\n", + "aprioris = np.random.normal(apriori_mu, apriori_sigma, 10000)\n", + "pd.Series(aprioris).plot(kind='hist', bins=50, title='Distribution of Aprioris');" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Recall that the `BornhuetterFerguson` apriori needs to be a constant, so we cannot use it directly. But we can exploit the fact the the premium vector and the apriori are multiplicative and embed our resampled apriori directly into our premium vector. We will modify the premium triangle using `numpy`." + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Triangle Summary
Valuation:1997-12
Grain:OYDY
Shape:(10000, 1, 10, 1)
Index:[LOB]
Columns:[EarnedPremNet]
" + ], + "text/plain": [ + "Valuation: 1997-12\n", + "Grain: OYDY\n", + "Shape: (10000, 1, 10, 1)\n", + "Index: ['LOB']\n", + "Columns: ['EarnedPremNet']" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "prem_samples.values = (prem_samples.values * aprioris.reshape(10000,-1)[..., np.newaxis, np.newaxis])\n", + "prem_samples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now our premium vector is stochastic." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "prem_samples.sum('origin').plot(kind='hist', bins=50, title=\"Premium x Apriori\");" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With these components, fitting the `BornhuetterFerguson` or any other expected loss method is straight forward and looks just like its deterministic counterpart." + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "bf = cl.BornhuetterFerguson().fit(samples, sample_weight=prem_samples)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can use our knowledge of `Triangle` manipulation to grab most things we would want out of our model" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "# Grab completed triangle replacing simulated known data with actual known data\n", + "full_triangle = bf.full_triangle_ - bf.X_ + \\\n", + " tri['CumPaidLoss'].broadcast_axis('index', samples.index)\n", + "\n", + "# Limiting to the current year for plotting\n", + "current_year = full_triangle[full_triangle.origin==full_triangle.origin.max()].to_frame().T" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As expected, plotting the expected development of our full triangle over time from the Bootstrap `BornhuetterFerguson` model fans out to greater uncertainty the farther we get from our valuation date." + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Plot the data\n", + "current_year.iloc[:, :200].reset_index(drop=True).plot(\n", + " color='green', legend=False, alpha=0.1,\n", + " title='Current Accident Year Expected Development Distribution', grid=True);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Recap\n", + "- The Mack method approaches stochastic reserving from a regression point of view
\n", + "- Bootstrap methods approach stochastic reserving from a simulation point of view
\n", + "- Where they assumptions of each model are not violated, they produce resonably consistent estimates of reserve variability
\n", + "- Mack does impose more assumptions (i.e. constraints) on the reserve estimate making the Bootstrap approach more suitable in a broader set of applciations
\n", + "- Both methods converge to their corresponding deterministic point estimates
" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/setup.py b/setup.py index 029be813..ad076b6a 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ descr = "Chainladder Package - P&C Loss Reserving package " name = 'chainladder' url = 'https://github.com/casact/chainladder-python' -version='0.5.3' # Put this in __init__.py +version='0.5.4' # Put this in __init__.py data_path = '' setup(