diff --git a/README.md b/README.md index 8e86a3b..dd4da6e 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,11 @@ Each notebook corresponds to a chapter from the source material. Click on the "O Open In Colab +8. **Ch9. Specification and Data Issues** + + Open In Colab + + ## How to Use 1. Click on the "Open in Colab" badge next to the notebook you want to explore. diff --git a/markdown/Ch7. MRA - Qualitative Regressors.md b/markdown/Ch7. MRA - Qualitative Regressors.md index b7e8641..96821ce 100644 --- a/markdown/Ch7. MRA - Qualitative Regressors.md +++ b/markdown/Ch7. MRA - Qualitative Regressors.md @@ -20,11 +20,11 @@ jupyter: ``` ```python +import numpy as np # noqa import pandas as pd import statsmodels.api as sm import statsmodels.formula.api as smf import wooldridge as wool -import numpy as np ``` ## 7.1 Linear Regression with Dummy Variables as Regressors diff --git a/markdown/Ch9. Specification and Data Issues.md b/markdown/Ch9. Specification and Data Issues.md new file mode 100644 index 0000000..0b71315 --- /dev/null +++ b/markdown/Ch9. Specification and Data Issues.md @@ -0,0 +1,384 @@ +--- +jupyter: + jupytext: + formats: notebooks//ipynb,markdown//md,scripts//py + text_representation: + extension: .md + format_name: markdown + format_version: '1.3' + jupytext_version: 1.16.4 + kernelspec: + display_name: merino + language: python + name: python3 +--- + +# Ch9. Specification and Data Issues + +```python +%pip install matplotlib numpy pandas statsmodels wooldridge scipy -q +``` + +```python +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import statsmodels.api as sm +import statsmodels.formula.api as smf +import statsmodels.stats.outliers_influence as smo +import wooldridge as wool +from scipy import stats +``` + +## 9.1 Functional Form Misspecification + +### Example 9.2: Housing Price Equation + +```python +hprice1 = wool.data("hprice1") + +# original OLS: +reg = smf.ols(formula="price ~ lotsize + sqrft + bdrms", data=hprice1) +results = reg.fit() + +# regression for RESET test: +hprice1["fitted_sq"] = results.fittedvalues**2 +hprice1["fitted_cub"] = results.fittedvalues**3 +reg_reset = smf.ols( + formula="price ~ lotsize + sqrft + bdrms + fitted_sq + fitted_cub", + data=hprice1, +) +results_reset = reg_reset.fit() + +# print regression table: +table = pd.DataFrame( + { + "b": round(results_reset.params, 4), + "se": round(results_reset.bse, 4), + "t": round(results_reset.tvalues, 4), + "pval": round(results_reset.pvalues, 4), + }, +) +print(f"table: \n{table}\n") +``` + +```python +# RESET test (H0: all coeffs including "fitted" are=0): +hypotheses = ["fitted_sq = 0", "fitted_cub = 0"] +ftest_man = results_reset.f_test(hypotheses) +fstat_man = ftest_man.statistic +fpval_man = ftest_man.pvalue + +print(f"fstat_man: {fstat_man}\n") +print(f"fpval_man: {fpval_man}\n") +``` + +```python +hprice1 = wool.data("hprice1") + +# original linear regression: +reg = smf.ols(formula="price ~ lotsize + sqrft + bdrms", data=hprice1) +results = reg.fit() + +# automated RESET test: +reset_output = smo.reset_ramsey(res=results, degree=3) +fstat_auto = reset_output.statistic +fpval_auto = reset_output.pvalue + +print(f"fstat_auto: {fstat_auto}\n") +print(f"fpval_auto: {fpval_auto}\n") +``` + +```python +hprice1 = wool.data("hprice1") + +# two alternative models: +reg1 = smf.ols(formula="price ~ lotsize + sqrft + bdrms", data=hprice1) +results1 = reg1.fit() + +reg2 = smf.ols( + formula="price ~ np.log(lotsize) +np.log(sqrft) + bdrms", + data=hprice1, +) +results2 = reg2.fit() + +# encompassing test of Davidson & MacKinnon: +# comprehensive model: +reg3 = smf.ols( + formula="price ~ lotsize + sqrft + bdrms + np.log(lotsize) + np.log(sqrft)", + data=hprice1, +) +results3 = reg3.fit() + +# model 1 vs. comprehensive model: +anovaResults1 = sm.stats.anova_lm(results1, results3) +print(f"anovaResults1: \n{anovaResults1}\n") +``` + +```python +# model 2 vs. comprehensive model: +anovaResults2 = sm.stats.anova_lm(results2, results3) +print(f"anovaResults2: \n{anovaResults2}\n") +``` + +## 9.2 Measurement Error + +```python +# set the random seed: +np.random.seed(1234567) + +# set sample size and number of simulations: +n = 1000 +r = 10000 + +# set true parameters (betas): +beta0 = 1 +beta1 = 0.5 + +# initialize arrays to store results later (b1 without ME, b1_me with ME): +b1 = np.empty(r) +b1_me = np.empty(r) + +# draw a sample of x, fixed over replications: +x = stats.norm.rvs(4, 1, size=n) + +# repeat r times: +for i in range(r): + # draw a sample of u: + u = stats.norm.rvs(0, 1, size=n) + + # draw a sample of ystar: + ystar = beta0 + beta1 * x + u + + # measurement error and mismeasured y: + e0 = stats.norm.rvs(0, 1, size=n) + y = ystar + e0 + df = pd.DataFrame({"ystar": ystar, "y": y, "x": x}) + + # regress ystar on x and store slope estimate at position i: + reg_star = smf.ols(formula="ystar ~ x", data=df) + results_star = reg_star.fit() + b1[i] = results_star.params["x"] + + # regress y on x and store slope estimate at position i: + reg_me = smf.ols(formula="y ~ x", data=df) + results_me = reg_me.fit() + b1_me[i] = results_me.params["x"] + +# mean with and without ME: +b1_mean = np.mean(b1) +b1_me_mean = np.mean(b1_me) +print(f"b1_mean: {b1_mean}\n") +print(f"b1_me_mean: {b1_me_mean}\n") +``` + +```python +# variance with and without ME: +b1_var = np.var(b1, ddof=1) +b1_me_var = np.var(b1_me, ddof=1) +print(f"b1_var: {b1_var}\n") +print(f"b1_me_var: {b1_me_var}\n") +``` + +```python +# set the random seed: +np.random.seed(1234567) + +# set sample size and number of simulations: +n = 1000 +r = 10000 + +# set true parameters (betas): +beta0 = 1 +beta1 = 0.5 + +# initialize b1 arrays to store results later: +b1 = np.empty(r) +b1_me = np.empty(r) + +# draw a sample of x, fixed over replications: +xstar = stats.norm.rvs(4, 1, size=n) + +# repeat r times: +for i in range(r): + # draw a sample of u: + u = stats.norm.rvs(0, 1, size=n) + + # draw a sample of y: + y = beta0 + beta1 * xstar + u + + # measurement error and mismeasured x: + e1 = stats.norm.rvs(0, 1, size=n) + x = xstar + e1 + df = pd.DataFrame({"y": y, "xstar": xstar, "x": x}) + + # regress y on xstar and store slope estimate at position i: + reg_star = smf.ols(formula="y ~ xstar", data=df) + results_star = reg_star.fit() + b1[i] = results_star.params["xstar"] + + # regress y on x and store slope estimate at position i: + reg_me = smf.ols(formula="y ~ x", data=df) + results_me = reg_me.fit() + b1_me[i] = results_me.params["x"] + +# mean with and without ME: +b1_mean = np.mean(b1) +b1_me_mean = np.mean(b1_me) +print(f"b1_mean: {b1_mean}\n") +print(f"b1_me_mean: {b1_me_mean}\n") +``` + +```python +# variance with and without ME: +b1_var = np.var(b1, ddof=1) +b1_me_var = np.var(b1_me, ddof=1) +print(f"b1_var: {b1_var}\n") +print(f"b1_me_var: {b1_me_var}\n") +``` + +## 9.3 Missing Data and Nonrandom Samples + +```python +# nan and inf handling in numpy: +x = np.array([-1, 0, 1, np.nan, np.inf, -np.inf]) +logx = np.log(x) +invx = np.array(1 / x) +ncdf = np.array(stats.norm.cdf(x)) +isnanx = np.isnan(x) + +results = pd.DataFrame( + {"x": x, "logx": logx, "invx": invx, "logx": logx, "ncdf": ncdf, "isnanx": isnanx}, +) +print(f"results: \n{results}\n") +``` + +```python +lawsch85 = wool.data("lawsch85") +lsat_pd = lawsch85["LSAT"] + +# create boolean indicator for missings: +missLSAT = lsat_pd.isna() + +# LSAT and indicator for Schools No. 120-129: +preview = pd.DataFrame({"lsat_pd": lsat_pd[119:129], "missLSAT": missLSAT[119:129]}) +print(f"preview: \n{preview}\n") +``` + +```python +# frequencies of indicator: +freq_missLSAT = pd.crosstab(missLSAT, columns="count") +print(f"freq_missLSAT: \n{freq_missLSAT}\n") +``` + +```python +# missings for all variables in data frame (counts): +miss_all = lawsch85.isna() +colsums = miss_all.sum(axis=0) +print(f"colsums: \n{colsums}\n") +``` + +```python +# computing amount of complete cases: +complete_cases = miss_all.sum(axis=1) == 0 +freq_complete_cases = pd.crosstab(complete_cases, columns="count") +print(f"freq_complete_cases: \n{freq_complete_cases}\n") +``` + +```python +lawsch85 = wool.data("lawsch85") + +# missings in numpy: +x_np = np.array(lawsch85["LSAT"]) +x_np_bar1 = np.mean(x_np) +x_np_bar2 = np.nanmean(x_np) +print(f"x_np_bar1: {x_np_bar1}\n") +print(f"x_np_bar2: {x_np_bar2}\n") +``` + +```python +# missings in pandas: +x_pd = lawsch85["LSAT"] +x_pd_bar1 = np.mean(x_pd) +x_pd_bar2 = np.nanmean(x_pd) +print(f"x_pd_bar1: {x_pd_bar1}\n") +print(f"x_pd_bar2: {x_pd_bar2}\n") +``` + +```python +# observations and variables: +print(f"lawsch85.shape: {lawsch85.shape}\n") +``` + +```python +# regression (missings are taken care of by default): +reg = smf.ols(formula="np.log(salary) ~ LSAT + cost + age", data=lawsch85) +results = reg.fit() +print(f"results.nobs: {results.nobs}\n") +``` + +## 9.4 Outlying Observations + +```python +rdchem = wool.data("rdchem") + +# OLS regression: +reg = smf.ols(formula="rdintens ~ sales + profmarg", data=rdchem) +results = reg.fit() + +# studentized residuals for all observations: +studres = results.get_influence().resid_studentized_external + +# display extreme values: +studres_max = np.max(studres) +studres_min = np.min(studres) +print(f"studres_max: {studres_max}\n") +print(f"studres_min: {studres_min}\n") +``` + +```python +# histogram (and overlayed density plot): +kde = sm.nonparametric.KDEUnivariate(studres) +kde.fit() + +plt.hist(studres, color="grey", density=True) +plt.plot(kde.support, kde.density, color="black", linewidth=2) +plt.ylabel("density") +plt.xlabel("studres") +``` + +## 9.5 Least Absolute Deviations (LAD) Estimation + +```python +rdchem = wool.data("rdchem") + +# OLS regression: +reg_ols = smf.ols(formula="rdintens ~ I(sales/1000) + profmarg", data=rdchem) +results_ols = reg_ols.fit() + +table_ols = pd.DataFrame( + { + "b": round(results_ols.params, 4), + "se": round(results_ols.bse, 4), + "t": round(results_ols.tvalues, 4), + "pval": round(results_ols.pvalues, 4), + }, +) +print(f"table_ols: \n{table_ols}\n") +``` + +```python +# LAD regression: +reg_lad = smf.quantreg(formula="rdintens ~ I(sales/1000) + profmarg", data=rdchem) +results_lad = reg_lad.fit(q=0.5) + +table_lad = pd.DataFrame( + { + "b": round(results_lad.params, 4), + "se": round(results_lad.bse, 4), + "t": round(results_lad.tvalues, 4), + "pval": round(results_lad.pvalues, 4), + }, +) +print(f"table_lad: \n{table_lad}\n") +``` diff --git a/myst.yml b/myst.yml index 84ff155..b50a29a 100644 --- a/myst.yml +++ b/myst.yml @@ -19,6 +19,7 @@ project: - file: notebooks/Ch6. MRA - Further Issues.ipynb - file: notebooks/Ch7. MRA - Qualitative Regressors.ipynb - file: notebooks/Ch8. Heteroskedasticity.ipynb + - file: notebooks/Ch9. Specification and Data Issues.ipynb site: template: book-theme diff --git a/notebooks/Ch7. MRA - Qualitative Regressors.ipynb b/notebooks/Ch7. MRA - Qualitative Regressors.ipynb index 99fab19..3a67a7f 100644 --- a/notebooks/Ch7. MRA - Qualitative Regressors.ipynb +++ b/notebooks/Ch7. MRA - Qualitative Regressors.ipynb @@ -31,11 +31,11 @@ "metadata": {}, "outputs": [], "source": [ + "import numpy as np # noqa\n", "import pandas as pd\n", "import statsmodels.api as sm\n", "import statsmodels.formula.api as smf\n", - "import wooldridge as wool\n", - "import numpy as np" + "import wooldridge as wool" ] }, { diff --git a/notebooks/Ch9. Specification and Data Issues.ipynb b/notebooks/Ch9. Specification and Data Issues.ipynb new file mode 100644 index 0000000..3159a13 --- /dev/null +++ b/notebooks/Ch9. Specification and Data Issues.ipynb @@ -0,0 +1,868 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "5d3db9d2", + "metadata": {}, + "source": [ + "# Ch9. Specification and Data Issues" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "dbadb3c8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install matplotlib numpy pandas statsmodels wooldridge scipy -q" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ce9220f5", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import statsmodels.api as sm\n", + "import statsmodels.formula.api as smf\n", + "import statsmodels.stats.outliers_influence as smo\n", + "import wooldridge as wool\n", + "from scipy import stats" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 9.1 Functional Form Misspecification\n", + "\n", + "### Example 9.2: Housing Price Equation" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table: \n", + " b se t pval\n", + "Intercept 166.0973 317.4325 0.5233 0.6022\n", + "lotsize 0.0002 0.0052 0.0295 0.9765\n", + "sqrft 0.0176 0.2993 0.0588 0.9532\n", + "bdrms 2.1749 33.8881 0.0642 0.9490\n", + "fitted_sq 0.0004 0.0071 0.0498 0.9604\n", + "fitted_cub 0.0000 0.0000 0.2358 0.8142\n", + "\n" + ] + } + ], + "source": [ + "hprice1 = wool.data(\"hprice1\")\n", + "\n", + "# original OLS:\n", + "reg = smf.ols(formula=\"price ~ lotsize + sqrft + bdrms\", data=hprice1)\n", + "results = reg.fit()\n", + "\n", + "# regression for RESET test:\n", + "hprice1[\"fitted_sq\"] = results.fittedvalues**2\n", + "hprice1[\"fitted_cub\"] = results.fittedvalues**3\n", + "reg_reset = smf.ols(\n", + " formula=\"price ~ lotsize + sqrft + bdrms + fitted_sq + fitted_cub\",\n", + " data=hprice1,\n", + ")\n", + "results_reset = reg_reset.fit()\n", + "\n", + "# print regression table:\n", + "table = pd.DataFrame(\n", + " {\n", + " \"b\": round(results_reset.params, 4),\n", + " \"se\": round(results_reset.bse, 4),\n", + " \"t\": round(results_reset.tvalues, 4),\n", + " \"pval\": round(results_reset.pvalues, 4),\n", + " },\n", + ")\n", + "print(f\"table: \\n{table}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "fstat_man: 4.668205534946464\n", + "\n", + "fpval_man: 0.012021711442908005\n", + "\n" + ] + } + ], + "source": [ + "# RESET test (H0: all coeffs including \"fitted\" are=0):\n", + "hypotheses = [\"fitted_sq = 0\", \"fitted_cub = 0\"]\n", + "ftest_man = results_reset.f_test(hypotheses)\n", + "fstat_man = ftest_man.statistic\n", + "fpval_man = ftest_man.pvalue\n", + "\n", + "print(f\"fstat_man: {fstat_man}\\n\")\n", + "print(f\"fpval_man: {fpval_man}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "fstat_auto: 4.668205534948772\n", + "\n", + "fpval_auto: 0.012021711442883198\n", + "\n" + ] + } + ], + "source": [ + "hprice1 = wool.data(\"hprice1\")\n", + "\n", + "# original linear regression:\n", + "reg = smf.ols(formula=\"price ~ lotsize + sqrft + bdrms\", data=hprice1)\n", + "results = reg.fit()\n", + "\n", + "# automated RESET test:\n", + "reset_output = smo.reset_ramsey(res=results, degree=3)\n", + "fstat_auto = reset_output.statistic\n", + "fpval_auto = reset_output.pvalue\n", + "\n", + "print(f\"fstat_auto: {fstat_auto}\\n\")\n", + "print(f\"fpval_auto: {fpval_auto}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "anovaResults1: \n", + " df_resid ssr df_diff ss_diff F Pr(>F)\n", + "0 84.0 300723.805123 0.0 NaN NaN NaN\n", + "1 82.0 252340.364481 2.0 48383.440642 7.861291 0.000753\n", + "\n" + ] + } + ], + "source": [ + "hprice1 = wool.data(\"hprice1\")\n", + "\n", + "# two alternative models:\n", + "reg1 = smf.ols(formula=\"price ~ lotsize + sqrft + bdrms\", data=hprice1)\n", + "results1 = reg1.fit()\n", + "\n", + "reg2 = smf.ols(\n", + " formula=\"price ~ np.log(lotsize) +np.log(sqrft) + bdrms\",\n", + " data=hprice1,\n", + ")\n", + "results2 = reg2.fit()\n", + "\n", + "# encompassing test of Davidson & MacKinnon:\n", + "# comprehensive model:\n", + "reg3 = smf.ols(\n", + " formula=\"price ~ lotsize + sqrft + bdrms + np.log(lotsize) + np.log(sqrft)\",\n", + " data=hprice1,\n", + ")\n", + "results3 = reg3.fit()\n", + "\n", + "# model 1 vs. comprehensive model:\n", + "anovaResults1 = sm.stats.anova_lm(results1, results3)\n", + "print(f\"anovaResults1: \\n{anovaResults1}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "anovaResults2: \n", + " df_resid ssr df_diff ss_diff F Pr(>F)\n", + "0 84.0 295735.273607 0.0 NaN NaN NaN\n", + "1 82.0 252340.364481 2.0 43394.909126 7.05076 0.001494\n", + "\n" + ] + } + ], + "source": [ + "# model 2 vs. comprehensive model:\n", + "anovaResults2 = sm.stats.anova_lm(results2, results3)\n", + "print(f\"anovaResults2: \\n{anovaResults2}\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 9.2 Measurement Error" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "b1_mean: 0.5002159846382418\n", + "\n", + "b1_me_mean: 0.4999676458235338\n", + "\n" + ] + } + ], + "source": [ + "# set the random seed:\n", + "np.random.seed(1234567)\n", + "\n", + "# set sample size and number of simulations:\n", + "n = 1000\n", + "r = 10000\n", + "\n", + "# set true parameters (betas):\n", + "beta0 = 1\n", + "beta1 = 0.5\n", + "\n", + "# initialize arrays to store results later (b1 without ME, b1_me with ME):\n", + "b1 = np.empty(r)\n", + "b1_me = np.empty(r)\n", + "\n", + "# draw a sample of x, fixed over replications:\n", + "x = stats.norm.rvs(4, 1, size=n)\n", + "\n", + "# repeat r times:\n", + "for i in range(r):\n", + " # draw a sample of u:\n", + " u = stats.norm.rvs(0, 1, size=n)\n", + "\n", + " # draw a sample of ystar:\n", + " ystar = beta0 + beta1 * x + u\n", + "\n", + " # measurement error and mismeasured y:\n", + " e0 = stats.norm.rvs(0, 1, size=n)\n", + " y = ystar + e0\n", + " df = pd.DataFrame({\"ystar\": ystar, \"y\": y, \"x\": x})\n", + "\n", + " # regress ystar on x and store slope estimate at position i:\n", + " reg_star = smf.ols(formula=\"ystar ~ x\", data=df)\n", + " results_star = reg_star.fit()\n", + " b1[i] = results_star.params[\"x\"]\n", + "\n", + " # regress y on x and store slope estimate at position i:\n", + " reg_me = smf.ols(formula=\"y ~ x\", data=df)\n", + " results_me = reg_me.fit()\n", + " b1_me[i] = results_me.params[\"x\"]\n", + "\n", + "# mean with and without ME:\n", + "b1_mean = np.mean(b1)\n", + "b1_me_mean = np.mean(b1_me)\n", + "print(f\"b1_mean: {b1_mean}\\n\")\n", + "print(f\"b1_me_mean: {b1_me_mean}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "b1_var: 0.0010335543409510668\n", + "\n", + "b1_me_var: 0.0020439380493408005\n", + "\n" + ] + } + ], + "source": [ + "# variance with and without ME:\n", + "b1_var = np.var(b1, ddof=1)\n", + "b1_me_var = np.var(b1_me, ddof=1)\n", + "print(f\"b1_var: {b1_var}\\n\")\n", + "print(f\"b1_me_var: {b1_me_var}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "b1_mean: 0.5002159846382418\n", + "\n", + "b1_me_mean: 0.2445467197788616\n", + "\n" + ] + } + ], + "source": [ + "# set the random seed:\n", + "np.random.seed(1234567)\n", + "\n", + "# set sample size and number of simulations:\n", + "n = 1000\n", + "r = 10000\n", + "\n", + "# set true parameters (betas):\n", + "beta0 = 1\n", + "beta1 = 0.5\n", + "\n", + "# initialize b1 arrays to store results later:\n", + "b1 = np.empty(r)\n", + "b1_me = np.empty(r)\n", + "\n", + "# draw a sample of x, fixed over replications:\n", + "xstar = stats.norm.rvs(4, 1, size=n)\n", + "\n", + "# repeat r times:\n", + "for i in range(r):\n", + " # draw a sample of u:\n", + " u = stats.norm.rvs(0, 1, size=n)\n", + "\n", + " # draw a sample of y:\n", + " y = beta0 + beta1 * xstar + u\n", + "\n", + " # measurement error and mismeasured x:\n", + " e1 = stats.norm.rvs(0, 1, size=n)\n", + " x = xstar + e1\n", + " df = pd.DataFrame({\"y\": y, \"xstar\": xstar, \"x\": x})\n", + "\n", + " # regress y on xstar and store slope estimate at position i:\n", + " reg_star = smf.ols(formula=\"y ~ xstar\", data=df)\n", + " results_star = reg_star.fit()\n", + " b1[i] = results_star.params[\"xstar\"]\n", + "\n", + " # regress y on x and store slope estimate at position i:\n", + " reg_me = smf.ols(formula=\"y ~ x\", data=df)\n", + " results_me = reg_me.fit()\n", + " b1_me[i] = results_me.params[\"x\"]\n", + "\n", + "# mean with and without ME:\n", + "b1_mean = np.mean(b1)\n", + "b1_me_mean = np.mean(b1_me)\n", + "print(f\"b1_mean: {b1_mean}\\n\")\n", + "print(f\"b1_me_mean: {b1_me_mean}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "b1_var: 0.0010335543409510668\n", + "\n", + "b1_me_var: 0.0005435611029837354\n", + "\n" + ] + } + ], + "source": [ + "# variance with and without ME:\n", + "b1_var = np.var(b1, ddof=1)\n", + "b1_me_var = np.var(b1_me, ddof=1)\n", + "print(f\"b1_var: {b1_var}\\n\")\n", + "print(f\"b1_me_var: {b1_me_var}\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 9.3 Missing Data and Nonrandom Samples" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "results: \n", + " x logx invx ncdf isnanx\n", + "0 -1.0 NaN -1.0 0.158655 False\n", + "1 0.0 -inf inf 0.500000 False\n", + "2 1.0 0.0 1.0 0.841345 False\n", + "3 NaN NaN NaN NaN True\n", + "4 inf inf 0.0 1.000000 False\n", + "5 -inf NaN -0.0 0.000000 False\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_16428/3106953107.py:3: RuntimeWarning: divide by zero encountered in log\n", + " logx = np.log(x)\n", + "/tmp/ipykernel_16428/3106953107.py:3: RuntimeWarning: invalid value encountered in log\n", + " logx = np.log(x)\n", + "/tmp/ipykernel_16428/3106953107.py:4: RuntimeWarning: divide by zero encountered in divide\n", + " invx = np.array(1 / x)\n" + ] + } + ], + "source": [ + "# nan and inf handling in numpy:\n", + "x = np.array([-1, 0, 1, np.nan, np.inf, -np.inf])\n", + "logx = np.log(x)\n", + "invx = np.array(1 / x)\n", + "ncdf = np.array(stats.norm.cdf(x))\n", + "isnanx = np.isnan(x)\n", + "\n", + "results = pd.DataFrame(\n", + " {\"x\": x, \"logx\": logx, \"invx\": invx, \"logx\": logx, \"ncdf\": ncdf, \"isnanx\": isnanx},\n", + ")\n", + "print(f\"results: \\n{results}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "preview: \n", + " lsat_pd missLSAT\n", + "119 156.0 False\n", + "120 159.0 False\n", + "121 157.0 False\n", + "122 167.0 False\n", + "123 NaN True\n", + "124 158.0 False\n", + "125 155.0 False\n", + "126 157.0 False\n", + "127 NaN True\n", + "128 163.0 False\n", + "\n" + ] + } + ], + "source": [ + "lawsch85 = wool.data(\"lawsch85\")\n", + "lsat_pd = lawsch85[\"LSAT\"]\n", + "\n", + "# create boolean indicator for missings:\n", + "missLSAT = lsat_pd.isna()\n", + "\n", + "# LSAT and indicator for Schools No. 120-129:\n", + "preview = pd.DataFrame({\"lsat_pd\": lsat_pd[119:129], \"missLSAT\": missLSAT[119:129]})\n", + "print(f\"preview: \\n{preview}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "freq_missLSAT: \n", + "col_0 count\n", + "LSAT \n", + "False 150\n", + "True 6\n", + "\n" + ] + } + ], + "source": [ + "# frequencies of indicator:\n", + "freq_missLSAT = pd.crosstab(missLSAT, columns=\"count\")\n", + "print(f\"freq_missLSAT: \\n{freq_missLSAT}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "colsums: \n", + "rank 0\n", + "salary 8\n", + "cost 6\n", + "LSAT 6\n", + "GPA 7\n", + "libvol 1\n", + "faculty 4\n", + "age 45\n", + "clsize 3\n", + "north 0\n", + "south 0\n", + "east 0\n", + "west 0\n", + "lsalary 8\n", + "studfac 6\n", + "top10 0\n", + "r11_25 0\n", + "r26_40 0\n", + "r41_60 0\n", + "llibvol 1\n", + "lcost 6\n", + "dtype: int64\n", + "\n" + ] + } + ], + "source": [ + "# missings for all variables in data frame (counts):\n", + "miss_all = lawsch85.isna()\n", + "colsums = miss_all.sum(axis=0)\n", + "print(f\"colsums: \\n{colsums}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "freq_complete_cases: \n", + "col_0 count\n", + "row_0 \n", + "False 66\n", + "True 90\n", + "\n" + ] + } + ], + "source": [ + "# computing amount of complete cases:\n", + "complete_cases = miss_all.sum(axis=1) == 0\n", + "freq_complete_cases = pd.crosstab(complete_cases, columns=\"count\")\n", + "print(f\"freq_complete_cases: \\n{freq_complete_cases}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "x_np_bar1: nan\n", + "\n", + "x_np_bar2: 158.29333333333332\n", + "\n" + ] + } + ], + "source": [ + "lawsch85 = wool.data(\"lawsch85\")\n", + "\n", + "# missings in numpy:\n", + "x_np = np.array(lawsch85[\"LSAT\"])\n", + "x_np_bar1 = np.mean(x_np)\n", + "x_np_bar2 = np.nanmean(x_np)\n", + "print(f\"x_np_bar1: {x_np_bar1}\\n\")\n", + "print(f\"x_np_bar2: {x_np_bar2}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "x_pd_bar1: 158.29333333333332\n", + "\n", + "x_pd_bar2: 158.29333333333332\n", + "\n" + ] + } + ], + "source": [ + "# missings in pandas:\n", + "x_pd = lawsch85[\"LSAT\"]\n", + "x_pd_bar1 = np.mean(x_pd)\n", + "x_pd_bar2 = np.nanmean(x_pd)\n", + "print(f\"x_pd_bar1: {x_pd_bar1}\\n\")\n", + "print(f\"x_pd_bar2: {x_pd_bar2}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "lawsch85.shape: (156, 21)\n", + "\n" + ] + } + ], + "source": [ + "# observations and variables:\n", + "print(f\"lawsch85.shape: {lawsch85.shape}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "results.nobs: 95.0\n", + "\n" + ] + } + ], + "source": [ + "# regression (missings are taken care of by default):\n", + "reg = smf.ols(formula=\"np.log(salary) ~ LSAT + cost + age\", data=lawsch85)\n", + "results = reg.fit()\n", + "print(f\"results.nobs: {results.nobs}\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 9.4 Outlying Observations" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "studres_max: 4.555033421514247\n", + "\n", + "studres_min: -1.8180393952811693\n", + "\n" + ] + } + ], + "source": [ + "rdchem = wool.data(\"rdchem\")\n", + "\n", + "# OLS regression:\n", + "reg = smf.ols(formula=\"rdintens ~ sales + profmarg\", data=rdchem)\n", + "results = reg.fit()\n", + "\n", + "# studentized residuals for all observations:\n", + "studres = results.get_influence().resid_studentized_external\n", + "\n", + "# display extreme values:\n", + "studres_max = np.max(studres)\n", + "studres_min = np.min(studres)\n", + "print(f\"studres_max: {studres_max}\\n\")\n", + "print(f\"studres_min: {studres_min}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 0, 'studres')" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAGwCAYAAABVdURTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABR7klEQVR4nO3deVxVdf4/8NdlXxRkE0URyBWFRMCV3BVUdMRKKUtr0tJxadSp1JwZxCltWhysBstyGSv7uS8lLowLLrgioCDmSqCACLK6sN3z+4MvZ0BAWS587j339Xw8zqNzD+fe+ybE+/KzqiRJkkBERESkEAaiCyAiIiLSJIYbIiIiUhSGGyIiIlIUhhsiIiJSFIYbIiIiUhSGGyIiIlIUhhsiIiJSFCPRBTQ3tVqNtLQ0tGzZEiqVSnQ5REREVAeSJKGgoABOTk4wMHh624zehZu0tDQ4OzuLLoOIiIgaIDU1Fe3bt3/qPXoXblq2bAmg/H+OlZWV4GqIiIioLvLz8+Hs7Cx/jj+N3oWbiq4oKysrhhsiIiIdU5chJRxQTERERIrCcENERESKwnBDREREisJwQ0RERIoiPNyEh4fDzc0NZmZm8PHxwfHjx596f1FREZYsWQIXFxeYmpqiY8eOWLduXTNVS0RERNpO6GypzZs3Y968eQgPD4efnx++/fZbjB49GpcvX0aHDh1qfM6kSZNw9+5drF27Fp06dUJmZiZKS0ubuXIiIiLSVipJkiRRb963b194e3tj9erV8jV3d3cEBQVhxYoV1e7fv38/XnnlFdy8eRO2trYNes/8/HxYW1sjLy+PU8GJiIh0RH0+v4V1SxUXFyMmJgb+/v5Vrvv7+yM6OrrG5+zZswe+vr749NNP0a5dO3Tp0gXvvfceHj16VOv7FBUVIT8/v8pBREREyiWsWyorKwtlZWVwdHSsct3R0REZGRk1PufmzZs4ceIEzMzMsHPnTmRlZWHWrFm4f/9+reNuVqxYgdDQUI3XT0RERNpJ+IDiJ1calCSp1tUH1Wo1VCoVfvrpJ/Tp0wdjxozBypUrsWHDhlpbbxYvXoy8vDz5SE1N1fj3QERERNpDWMuNvb09DA0Nq7XSZGZmVmvNqdC2bVu0a9cO1tbW8jV3d3dIkoTbt2+jc+fO1Z5jamoKU1NTzRZPREREWktYy42JiQl8fHwQGRlZ5XpkZCQGDBhQ43P8/PyQlpaGwsJC+drVq1dhYGDwzB1CiYiISD8I7ZZasGABvv/+e6xbtw5JSUmYP38+UlJSMHPmTADlXUpTp06V7588eTLs7Ozwxz/+EZcvX8axY8fw/vvv46233oK5ubmob4OIiIi0iNB1boKDg5GdnY1ly5YhPT0dHh4eiIiIgIuLCwAgPT0dKSkp8v0tWrRAZGQk5s6dC19fX9jZ2WHSpEn46KOPRH0LREREpGWErnMjAte5ISIi0j31+fwW2nJD1BjaMsU/JCREdAlERFSJ8KngRERERJrEcENERESKwnBDREREisJwQ0RERIrCcENERESKwnBDREREisJwQ0RERIrCcENERESKwnBDREREisJwQ0RERIrCcENERESKwnBDREREisJwQ0RERIrCcENERESKwnBDREREisJwQ0RERIrCcENERESKwnBDREREisJwQ0RERIrCcENERESKwnBDREREisJwQ0RERIrCcENERESKwnBDREREisJwQ0RERIrCcENERESKwnBDREREisJwQ0RERIrCcENERESKwnBDREREisJwQ0RERIrCcENERESKwnBDREREisJwQ0RERIrCcENERESKwnBDREREisJwQ0RERIrCcENERESKwnBDREREisJwQ0RERIrCcENERESKwnBDREREisJwQ9RAarUaOTk5SEtLE10KERFVwnBDVE9qtRqnTp3CypUrsWrVKrRr1w49evTArl27RJdGRERguCGql7KyMmzduhUHDhxAYWGhfP3y5cuYMGECFi5cCEmSBFZIREQMN0T1sH//fiQlJcmPO3bsiN69e8uPP/30U4SGhooojYiI/g/DDVEdXb16FefOnQMAGBoaYvLkyZgyZQrOnDmDf//731CpVACA0NBQREREiCyViEivMdwQ1UFpaSn27dsnPx49ejS6dOkCAFCpVJg1axY+/fRT+etvvfUW8vLymr1OIiLSgnATHh4ONzc3mJmZwcfHB8ePH6/13qNHj0KlUlU7rly50owVkz46f/48cnJyAACurq7w8fGpds9f/vIXBAYGAgDu3r2Ljz76qFlrJCKickLDzebNmzFv3jwsWbIEsbGxGDhwIEaPHo2UlJSnPu+3335Denq6fHTu3LmZKiZ9VFpaiujoaPlxQECA3AVVmUqlwtdffw0zMzMAwKpVq3D16tVmq5OIiMoJDTcrV67EtGnTMH36dLi7uyMsLAzOzs5YvXr1U5/XunVrtGnTRj4MDQ2bqWLSR0lJScjPzwcAdOnSBW3btq31XldXV7z//vsAgJKSEixcuLBZaiQiov8RFm6Ki4sRExMDf3//Ktf9/f2r/Cu5Jr169ULbtm0xfPhwHDly5Kn3FhUVIT8/v8pBVB+xsbHyef/+/Z95/8KFC+Hk5AQA2LVrFxITE5usNiIiqk5YuMnKykJZWRkcHR2rXHd0dERGRkaNz2nbti3WrFmD7du3Y8eOHejatSuGDx+OY8eO1fo+K1asgLW1tXw4Oztr9PsgZcvNzcXNmzcBADY2NnB1dX3mcywtLeXWGwBVBhoTEVHTEz6g+MmxC5Ik1TieAQC6du2Kt99+G97e3ujfvz/Cw8MRGBiIzz//vNbXX7x4MfLy8uQjNTVVo/WTssXHx8vnXl5etf7ZfNL06dNha2sLANi0aRN+//33JqmPiIiqExZu7O3tYWhoWK2VJjMzs1prztP069cP165dq/XrpqamsLKyqnIQ1YUkSYiLi5Mf9+zZs87PbdGiBebOnQugfEDyqlWrNF0eERHVQli4MTExgY+PDyIjI6tcj4yMxIABA+r8OrGxsU8d4EnUUBkZGfL0bzc3N7Rq1apez58zZw5MTU0BABs2bMCjR480XSIREdVAaLfUggUL8P3332PdunVISkrC/PnzkZKSgpkzZwIo71KaOnWqfH9YWBh27dqFa9euITExEYsXL8b27dsxZ84cUd8CKVjladzdu3ev9/Pt7e0RHBwMAMjJycGWLVs0VhsREdXOSOSbBwcHIzs7G8uWLUN6ejo8PDwQEREBFxcXAEB6enqVNW+Ki4vx3nvv4c6dOzA3N0ePHj2wd+9ejBkzRtS3QApWOdw0dC2lmTNnYuPGjQCA1atX44033tBIbUREVDuVpGdbGOfn58Pa2hp5eXkcf6PjmnKDysLCQnmguqOjI/70pz/Vem9ISEitX5MkCV5eXrh48SKA8m5ULy8vjdZKRKQP6vP5LXy2FJE2qtxqU7GHVEOoVCq5mxUA/vOf/zSqLiIiejaGG6IaaCrcAMArr7wCExMTAMDPP/+M0tLSRr0eERE9HcMN0RPUajVu3boFALCwsEC7du0a9Xo2NjYYO3YsgPINNf/73/82ukYiIqodww3REzIyMlBUVASgfK8oA4PG/5pMmTJFPv/hhx8a/XpERFQ7hhuiJ1ReTbhi5l5jjRkzRl6xeOfOnSgoKNDI6xIRUXUMN0RPSE5Ols/rspdUXZiYmMhr3jx69Ag7duzQyOsSEVF1DDdElajVanltJXNzczg4OGjstdk1RUTUPBhuiCrJzMyUt0lwcXHRyHibCv369UOnTp0AAIcPH8bdu3c19tpERPQ/DDdElTTFeJsKKpVK7pqSJIldU0RETYThhqiSytt9aGq8TWUTJ06Uz7du3arx1yciIoYboiru3LkDADA2Nkbr1q01/vrPP/+83DUVFRWFzMxMjb8HEZG+Y7gh+j+FhYXIzc0FALRt2xaGhoYafw+VSiW33qjVauzcuVPj70FEpO8Yboj+T0WrDQC0b9++yd6HXVNERE2L4Ybo/1QON43dcuFpvLy88NxzzwEAjh49inv37jXZexER6SOGG6L/01zhpnLXVFlZGXbt2tVk70VEpI8YbohQPjW7ItxYWlrC2tq6Sd+PXVNERE2H4YYIwP379/H48WMA5a02KpWqSd/P29tbnmp++PBhZGdnN+n7ERHpE4YbIjTfYOIK7JoiImo6DDdEANLT0+Xztm3bNst7vvzyy/I5VysmItIchhsiABkZGfJ5mzZtmuU9fX195YHL//3vf1FQUNAs70tEpHQMN6T3JEmSw02LFi3QsmXLZnlfAwMDBAUFAQCKi4uxb9++ZnlfIiKlY7ghvZefny/vBN5crTYVJkyYIJ9ztWIiIs1guCG9J6JLqsKgQYNgY2MDANi7dy+Kioqa9f2JiJSI4Yb0nshwY2xsjHHjxgEACgoKcPjw4WZ9fyIiJWK4Ib0nMtwAkMfdAOCUcCIiDWC4Ib1XEW6MjY1ha2vb7O8fEBAAc3NzAMDu3btRVlbW7DUQESkJww3ptcePHyMnJwcA4OjoCAOD5v+VsLCwQEBAAADg7t27OH36dLPXQESkJAw3pNcyMzPlc0dHR2F1cNYUEZHmMNyQXrt375583rp1a2F1jB07FoaGhgDKw40kScJqISLSdQw3pNcqt9w4ODgIq8PW1haDBw8GANy8eRMJCQnCaiEi0nUMN6TXtKXlBmDXFBGRpjDckF6raLkxNzeHpaWl0FoqTwlnuCEiajiGG9Jbjx49QmFhIYDyVhuVSiW0nvbt26N3794AgLi4ONy6dUtoPUREuorhhvSWtoy3qaxy1xQX9CMiahiGG9Jb2jTepgJXKyYiajyGG9Jb2thy4+7ujq5duwIATpw4USWAERFR3TDckN7SxpYb4H9dU2q1Gnv27BFcDRGR7mG4Ib1V0XJjYWEhfKZUZZwSTkTUOAw3pJcePnyIBw8eANCuVhsA8PX1Rbt27QAAkZGRKCgoEFwREZFuYbghvaSN420qGBgYYPz48QCA4uJi7Nu3T3BFRES6heGG9JK2jrepwCnhREQNx3BDekmbW24AYPDgwbCxsQEA7N27F8XFxYIrIiLSHQw3pJe0veXG2NgYY8eOBQDk5+fj8OHDgisiItIdDDekl7KysgCUz5SysLAQXE3NOGuKiKhhGG5I7zx+/FjeU8re3l5wNbXz9/eHmZkZAGD37t0oKysTXBERkW5guCG9k52dLZ/b2dkJrOTpLC0tERAQAAC4e/cuTp8+LbgiIiLdwHBDeqdyuNHmlhugatfU7t27BVZCRKQ7GG5I7+hKyw0AjB07FgYG5b+mO3fuhCRJgisiItJ+DDekdyoGEwPa33JjZ2eHQYMGAQCuX7+OpKQkwRUREWk/4eEmPDwcbm5uMDMzg4+PD44fP16n5508eRJGRkbw8vJq2gJJcSpablQqFVq1aiW2mDoICgqSz7mgHxHRswkNN5s3b8a8efOwZMkSxMbGYuDAgRg9ejRSUlKe+ry8vDxMnToVw4cPb6ZKSSnUarUcbmxsbGBkZCS4omer2IoBYLghIqoLoeFm5cqVmDZtGqZPnw53d3eEhYXB2dkZq1evfurzZsyYgcmTJ6N///7PfI+ioiLk5+dXOUh/FRQUoKSkBID2d0lVcHV1lVsoz507h9u3b4stiIhIywkLN8XFxYiJiYG/v3+V6/7+/oiOjq71eevXr8eNGzcQEhJSp/dZsWIFrK2t5cPZ2blRdZNuqzzeRtsHE1dWuWtqz5494gohItIBwsJNVlYWysrK4OjoWOW6o6MjMjIyanzOtWvXsGjRIvz000917k5YvHgx8vLy5CM1NbXRtZPu0qVp4JVx3A0RUd0JH3CgUqmqPJYkqdo1ACgrK8PkyZMRGhqKLl261Pn1TU1NYWpq2ug6SRl0aRp4Zc8//zxcXV2RnJyMI0eOIDc3VycGQxMRiSCs5cbe3h6GhobVWmkyMzOrteYA5WMlzp8/jzlz5sDIyAhGRkZYtmwZ4uPjYWRkxI0FqU50aRp4ZSqVSm69KS0tRUREhNiCiIi0mLBwY2JiAh8fH0RGRla5HhkZiQEDBlS738rKCpcuXUJcXJx8zJw5E127dkVcXBz69u3bXKWTDqtouTE1NYWlpaXgauqHXVNERHUjtFtqwYIFmDJlCnx9fdG/f3+sWbMGKSkpmDlzJoDy8TJ37tzBxo0bYWBgAA8PjyrPb926NczMzKpdJ6pJSUkJcnNzAZR3SdXU/anN/Pz8YGdnh+zsbOzbtw+PHz+WN9YkIqL/EToVPDg4GGFhYVi2bBm8vLxw7NgxREREwMXFBQCQnp7+zDVviOrq/v378rkudUlVMDIywrhx4wAAhYWF7IolIqqF8BWKZ82aheTkZBQVFSEmJkZeah4ANmzYgKNHj9b63KVLlyIuLq7piyRF0NVp4JWxa4qI6NmEhxui5lK55UZXw83IkSNhbm4OoHyX8LKyMsEVERFpH4Yb0htKCDcWFhYICAgAUD6z8MyZM4IrIiLSPgw3pDcqhxtbW1uBlTQOu6aIiJ6O4Yb0RkW4sbS01OmFHceOHQsDg/Jf3Z07d0KSJMEVERFpF4Yb0gvFxcUoKCgAoNutNkB5l1rFwPvr168jKSlJcEVERNqF4Yb0glK6pCqwa4qIqHYMN6QXlBZuxo8fL58z3BARVcVwQ3pBaeHG1dUVXl5eAIBz587h9u3bYgsiItIiDDekF5QWboCqXVN79uwRVwgRkZZhuCG9oPRww64pIqL/YbghvVARbszNzeUVfnXd888/D1dXVwDAkSNH5E1BiYj0HcMNKV5JSQny8/MB6O7KxDVRqVRy601paSkiIiLEFkREpCUYbkjxcnJy5HOldElVYNcUEVF1DDekeEocb1PBz89Pbo3at28fHj9+LLgiIiLxGG5I8ZQcboyMjDBu3DgAQGFhIQ4fPiy4IiIi8RhuSPGUHG4Adk0RET2J4YYUT+nhZuTIkfIMsN27d0OtVguuiIhILIYbUryKcGNmZqaYaeCVWVhYICAgAACQmZmJs2fPCq6IiEgshhtStNLSUuTl5QEob7VRqVSCK2oaFeNuAODXX38VWAkRkXgMN6Roubm5kCQJgDK7pCqMGTNGPme4ISJ9x3BDiqb08TYV2rRpg969ewMA4uPjkZqaKrgiIiJxGG5I0fQl3ADA2LFj5fO9e/cKrISISCyGG1K07Oxs+Vzp4abyuJtffvlFYCVERGIx3JCi6VPLjZeXF5ycnAAAhw4dwoMHDwRXREQkRoPCzdGjRzVcBlHTqAg3JiYmsLS0FFxN01KpVHLXVFFREVcrJiK91aBwM2rUKHTs2BEfffQRBy6S1iorK0Nubi4AZU8Dr6zyuBvOmiIifWXUkCelpaXhxx9/xIYNG7B06VIMHz4c06ZNQ1BQEExMTDRdI1GDNNc08NDQ0CZ77boKCQkBAAwfPhxmZmZ4/Pgxfv31V0iSpBehjoiosga13Nja2uLdd9/FhQsXcP78eXTt2hWzZ89G27Zt8e677yI+Pl7TdRLVmz6Nt6lgYWGB4cOHAyj/R0hsbKzgioiIml+jBxR7eXlh0aJFmD17Nh48eIB169bBx8cHAwcORGJioiZqJGoQfQw3ALumiIgaHG5KSkqwbds2jBkzBi4uLjhw4AC+/vpr3L17F7du3YKzszMmTpyoyVqJ6kVfw01gYKB8znBDRPqoQeFm7ty5aNu2LWbOnIkuXbogNjYWp06dwvTp02FpaQlnZ2d88sknuHLliqbrJaozfQ03zs7O6NmzJwDg3LlzyMjIEFwREVHzalC4uXz5Mr766iukpaUhLCwMHh4e1e5xcnLCkSNHGl0gUUNVhBsjIyO0bNlScDXNq3LXVEREhMBKiIiaX4PCTUhICCZOnFhtZlRpaSmOHTsGoPwDZfDgwY2vkKgB1Go1cnJyAOjPNPDKKocbrlZMRPqmQeFm6NChVZr8K+Tl5WHo0KGNLoqosfLy8qBWqwHoV5dUhT59+sDBwQEAEBkZicePHwuuiIio+TQo3NS2dkZ2drbiV4El3VDRagPoZ7gxMDCQBxY/ePAAUVFRgisiImo+9VrE78UXXwRQvsz7m2++CVNTU/lrZWVluHjxIgYMGKDZCokaQF8HE1c2duxYbNiwAUD5LuEBAQFiCyIiaib1armxtraGtbU1JElCy5Yt5cfW1tZo06YN3nnnHfz4449NVStRnVUONzY2NgIrEWfkyJEwNjYGAHm1YiIifVCvlpv169cDAFxdXfHee++xC4q0FltuACsrKwwcOBCHDx/GrVu3cOXKFbi7u4sui4ioyTV4thSDDWmzijE3BgYGsLa2FlyNOJUX9Nu7d6/ASoiImk+dW268vb1x6NAh2NjYoFevXk+dWnvhwgWNFEfUEJIkyS03NjY2MDBo9C4jOiswMBB/+ctfAJSHm/fee09wRURETa/O4Wb8+PHyAOKgoKCmqoeo0QoLC1FSUgJAf7ukKnTp0gUdO3bEjRs3cOLECeTl5el1SxYR6Yc6h5uQkJAaz4m0DQcT/49KpUJgYCC+/PJLlJaW4uDBg9zzjYgUr0Ht9ampqbh9+7b8+OzZs5g3bx7WrFmjscKIGoqDiaviuBsi0jcNCjeTJ0+W943KyMjAiBEjcPbsWXz44YdYtmyZRgskqi99X8DvSYMHD5YnAOzbt09euZmISKkaFG4SEhLQp08fAMCWLVvg6emJ6OhobNq0SV40jEgUttxUZWpqihEjRgAAMjMzcf78ecEVERE1rQaFm5KSEnlw8X//+1/84Q9/AAB069YN6enpmquOqAEqh5tWrVqJK0SLsGuKiPRJg8JNjx498M033+D48eOIjIzEqFGjAABpaWmws7PTaIFE9VURbqytrWFkVK91KhVrzJgx8jnDDREpXYPCzT//+U98++23GDJkCF599VX07NkTALBnzx65u4pIhIcPH8o7YLNL6n/atWuHXr16AQBiYmLYwkpEitagf9YOGTIEWVlZyM/PrzLV9p133oGFhYXGiiOqLw4mrl1gYCBiY2MBABEREZg2bZrgioiImkaDl241NDSstoaIq6srWrduXa/XCQ8Ph5ubG8zMzODj44Pjx4/Xeu+JEyfg5+cHOzs7mJubo1u3bvjXv/7VoPpJmTiYuHYcd0NE+qJB4ebu3buYMmUKnJycYGRkBENDwypHXW3evBnz5s3DkiVLEBsbi4EDB2L06NFISUmp8X5LS0vMmTMHx44dQ1JSEv7617/ir3/9K9fXIRkX8Ktd7969YW9vDwCIjIxEUVGR4IqIiJpGg7ql3nzzTaSkpOBvf/sb2rZt+9R9pp5m5cqVmDZtGqZPnw4ACAsLw4EDB7B69WqsWLGi2v29evWSxw0A5S1FO3bswPHjx/HOO+/U+B5FRUVV/hLPz89vUK2kG9hyUztDQ0OMHj0aP/zwAwoLC3H8+HF5ijgRkZI0KNycOHECx48fh5eXV4PfuLi4GDExMVi0aFGV6/7+/oiOjq7Ta8TGxiI6OhofffRRrfesWLECoaGhDa6TdEvlMTdsuakuMDAQP/zwA4DyrimGGyJSogZ1Szk7O0OSpEa9cVZWFsrKyuDo6FjluqOjIzIyMp763Pbt28PU1BS+vr6YPXu23PJTk8WLFyMvL08+UlNTG1U3abeKlpsWLVrIazHR/wQEBMhdxxx3Q0RK1aBwExYWhkWLFiE5ObnRBTzZpSVJ0jO7uY4fP47z58/jm2++QVhYGH7++eda7zU1NYWVlVWVg5SpuLgYhYWFANglVZtWrVrBz88PAHDt2jVcu3ZNcEVERJrXoG6p4OBgPHz4EB07doSFhQWMjY2rfL3yuIfa2Nvbw9DQsForTWZmZrXWnCe5ubkBADw9PXH37l0sXboUr776aj2/C1IaDiaum8DAQBw7dgxAeevNvHnzxBZERKRhDQo3YWFhjX5jExMT+Pj4IDIyEhMmTJCvR0ZGYvz48XV+HUmSOOuDAHCNm7oKDAzEwoULATDcEJEyNSjcvPHGGxp58wULFmDKlCnw9fVF//79sWbNGqSkpGDmzJkAysfL3LlzBxs3bgQA/Pvf/0aHDh3QrVs3AOUDmz///HPMnTtXI/WQbuNMqbrp3r07XFxc8PvvvyMqKgoFBQVo2bKl6LKIiDSmwRvv3LhxA+vXr8eNGzewatUqtG7dGvv374ezszN69OhRp9cIDg5GdnY2li1bhvT0dHh4eCAiIgIuLi4AgPT09Cpr3qjVaixevBi3bt2CkZEROnbsiE8++QQzZsxo6LdBCsJwUzcqlQqBgYEIDw9HSUkJIiMj8eKLL4oui4hIYxo0oDgqKgqenp44c+YMduzYIQ/ivHjxIkJCQur1WrNmzUJycjKKiooQExODQYMGyV/bsGEDjh49Kj+eO3cuEhIS8ODBA+Tl5eHChQv405/+BAODBi+0TArCMTd1N3bsWPmcs6aISGkalAoWLVqEjz76CJGRkTAxMZGvDx06FKdOndJYcUT1URFuzMzMuMfZMwwZMgTm5uYAyveZUqvVgisiItKcBoWbS5cuVRkEXMHBwQHZ2dmNLoqovkpLS+XVp9kl9Wzm5uYYPnw4ACAjI0PeUJOISAkaFG5atWqF9PT0atdjY2PRrl27RhdFVF+5ubnywpIMN3XDjTSJSKkaFG4mT56MhQsXIiMjAyqVCmq1GidPnsR7772HqVOnarpGomfiYOL6GzNmjHzOcENEStKgcPPxxx+jQ4cOaNeuHQoLC9G9e3cMHDgQAwYMwF//+ldN10j0TBxMXH8dOnSAp6cnAODcuXPIzMwUXBERkWY0KNwYGxvjp59+wrVr17Blyxb8+OOP+O233/DDDz/I+9YQNScu4NcwFV1TkiRh3759gqshItKMOq9zs2DBgqd+/fTp0/L5ypUrG14RUQOwW6phAgMD8cknnwAo75rS1AKdREQi1TncPDmbIiYmBmVlZejatSsA4OrVqzA0NISPj49mKySqg4pwY2xsjBYtWgiuRnf069cPNjY2yMnJwYEDB1BSUlJtrzgiIl1T526pI0eOyMe4ceMwZMgQ3L59GxcuXMCFCxeQmpqKoUOHVpmBQdQc1Gq13C1la2v7zF3l6X+MjIwwatQoAEB+fj5OnDghuCIiosZr0JibL774AitWrKgycNPGxgYfffQRvvjiC40VR1QXeXl58iJ0HExcf5wSTkRK06Bwk5+fj7t371a7npmZiYKCgkYXRVQflcfb2NnZCaxEN40aNUrewoThhoiUoEHhZsKECfjjH/+Ibdu24fbt27h9+za2bduGadOmcQM+anaVV8VmuKk/Ozs79O/fHwBw5coV3Lx5U3BFRESN06Bw88033yAwMBCvv/46XFxc4OLigtdeew2jR49GeHi4pmskeqrK4YYzpRqGXVNEpCQNCjcWFhYIDw9HdnY2YmNjceHCBdy/fx/h4eGwtLTUdI1ET8WWm8arHG5+/fVXgZUQETVenaeC18TS0hLPP/+8pmohapCKcGNiYsJp4A3k6emJDh06ICUlBUeOHEFeXh6sra1Fl0VE1CANarkh0halpaXIzc0FUN5qw2ngDaNSqTBhwgQAQElJCbumiEinMdyQTqu8Gzi7pBqnItwAwI4dOwRWQkTUOAw3pNM43kZzXnjhBTg4OAAA9u3bh4cPHwquiIioYRhuSKdxppTmGBoaYvz48QCAhw8f4uDBg4IrIiJqGIYb0mlsudGsyutUsWuKiHQVww3pNIYbzRo2bBisrKwAAL/88guKi4sFV0REVH8MN6TTKsKNhYUFzM3NBVej+0xNTTF27FgA5YO1jx49KrYgIqIGYLghnVVcXCzvZcZWG81h1xQR6TqGG9JZ3DCzaYwaNQpmZmYAgF27dqGsrExwRURE9cNwQzqLM6WahqWlJUaNGgUAuHv3LqKjowVXRERUP43afoFIJA4m/p/Q0FCNvl7llZ7/8pe/VNl76mlCQkI0WgcRUUOw5YZ0FsNN0+natSuMjMr/7XP58mV2TRGRTmG4IZ3FbqmmY2Zmhs6dOwMAHjx4gOTkZLEFERHVA8MN6ayKcGNlZQUTExPB1SiPh4eHfJ6QkCCwEiKi+mG4IZ10//59PHr0CAC7pJpKly5d5NB4+fJllJaWCq6IiKhuGG5IJ127dk0+Z5dU0zA2Nka3bt0AAEVFRbh+/brgioiI6obhhnTS1atX5XO23DQddk0RkS5iuCGdxHDTPJ577jl5W4vffvsNRUVFgisiIno2hhvSSQw3zcPIyAg9evQAAJSUlODy5cuCKyIiejaGG9JJSUlJAAADAwPY2NgIrkbZvLy85PO4uDhhdRAR1RXDDemcsrIyueXG1tYWhoaGgitStnbt2sHe3h4A8PvvvyMnJ0dwRURET8dwQzonOTlZHvtR8aFLTUelUqFnz57y4/j4eIHVEBE9G8MN6ZyKLikAcHBwEFiJ/ujZs6e831RcXBzUarXgioiIasdwQzqncrhhy03zsLKywnPPPQcAyM3NRUpKiuCKiIhqx3BDOufKlSvyOVtumk/lgcWxsbHiCiEiegaGG9I5lVtuOA28+XTr1g1mZmYAgMTERDx8+FBwRURENWO4IZ0iSZIcbqysrGBqaiq4Iv1hbGwst96UlpZyYDERaS2GG9Ipd+/eRW5uLgB2SYng4+Mjn58/fx6SJAmshoioZgw3pFMqj7fhYOLm5+DgAFdXVwBAdnY2kpOThdZDRFQThhvSKZwGLp6vr698fv78eYGVEBHVjOGGdAqngYvXrVs3WFhYACj/eRQUFAiuiIioKoYb0ikMN+IZGRnB29sbAKBWq9l6Q0Rah+GGdErFmBtbW1tYWloKrkZ/+fr6yisWnzt3DiUlJYIrIiL6H+HhJjw8HG5ubjAzM4OPjw+OHz9e6707duzAyJEj4eDgACsrK/Tv3x8HDhxoxmpJpIKCAty+fRsA4O7uLn+4UvNr1aoVunfvDgB4+PAhLl26JLgiIqL/ERpuNm/ejHnz5mHJkiWIjY3FwIEDMXr06FqXdj927BhGjhyJiIgIxMTEYOjQoRg3bhxXS9UTlWdKdevWTWAlBAD9+/eXz0+dOsVp4USkNYSGm5UrV2LatGmYPn063N3dERYWBmdnZ6xevbrG+8PCwvDBBx+gd+/e6Ny5M5YvX47OnTvjl19+aebKSYTLly/L5+7u7gIrIQBo37492rdvDwC4d+8ebt68KbgiIqJywsJNcXExYmJi4O/vX+W6v78/oqOj6/QaarUaBQUFsLW1rfWeoqIi5OfnVzlIN1Xu+vD09BRYCVWo3HpT199bIqKmJizcZGVloaysDI6OjlWuOzo6IiMjo06v8cUXX+DBgweYNGlSrfesWLEC1tbW8uHs7Nyoukkchhvt061bN1hbWwMAbty4wS5iItIKwgcUPzkoVJKkOg0U/fnnn7F06VJs3rwZrVu3rvW+xYsXIy8vTz5SU1MbXTOJkZCQAKB8plSbNm0EV0MAYGhoiAEDBsiPly9fLrAaIqJywsKNvb09DA0Nq7XSZGZmVmvNedLmzZsxbdo0bNmyBSNGjHjqvaamprCysqpykO65f/8+0tLSAJS32nCmlPbw9vaWp+Vv3769ylpEREQiCAs3JiYm8PHxQWRkZJXrkZGRVf4l+KSff/4Zb775JjZt2oTAwMCmLpO0BLuktJexsbH8OytJElasWCG4IiLSd0K7pRYsWIDvv/8e69atQ1JSEubPn4+UlBTMnDkTQHmX0tSpU+X7f/75Z0ydOhVffPEF+vXrh4yMDGRkZCAvL0/Ut0DNpHK48fDwEFgJ1cTX1xfm5uYAgE2bNnHmFBEJJTTcBAcHIywsDMuWLYOXlxeOHTuGiIgIuLi4AADS09OrrHnz7bfforS0FLNnz0bbtm3l489//rOob4GaScV4G4AtN9rI1NQU/fr1AwCUlZXhn//8p+CKiEifqSQ9W3krPz8f1tbWyMvL4/gbHeLn5ydPNa742YWGhgquiip79OgRwsPDUVBQABMTE9y4cUNeB4eIqLHq8/ktfLYU0bNIkiS33Li4uDCUailzc3PMmTMHQPk6Vhx7Q0SiMNyQ1ktJSZEXX+R4G+02f/58eebUd999h+TkZLEFEZFeYrghrcfxNrrDwcEB8+fPBwCUlJSw65CIhGC4Ia3HaeC65S9/+QtatWoFANi4cWOVDU+JiJoDww1pPYYb3dKqVSt88MEHAMr3f/v73/8uuCIi0jcMN6T1KsKNkZERunbtKrgaqot3331X3hZl69at3HOKiJoVww1ptaKiIrlbo2vXrjAxMRFcEdWFpaUllixZIj/+29/+JrAaItI3DDek1RITE1FSUgIA6NWrl+BqqD5mzJgBZ2dnAMDevXvldYqIiJoaww1ptQsXLsjn3t7eAiuh+jI1Na0y3ubDDz+Enq0ZSkSCMNyQVmO40W1vvPEGOnfuDACIiorCvn37BFdERPqA4Ya0WuVw4+XlJa4QahBjY2N8/PHH8uNFixahrKxMYEVEpA8YbkhrlZaWIj4+HgDQqVMnWFtbC66IGuLll19Gnz59AJTPfPvxxx8FV0RESsdwQ1rrt99+w+PHjwGwS0qXqVSqKruE/+1vf5N/rkRETYHhhrQWx9sox5AhQzBmzBgAQGpqKr766ivBFRGRkjHckNZiuFGWTz75BCqVCgCwfPly5OTkCK6IiJSK4Ya0VuVwwzVudJ+npyfeeOMNAEBubi5WrFghuCIiUiqGG9JKarVaXrK/Q4cOsLe3F1wRaUJoaChMTU0BAF9++SVSUlIEV0RESsRwQ1rpxo0bKCgoAMAuKSXp0KED3n33XQDlW2twU00iagoMN6SVON5GuRYvXgwbGxsAwMaNG6vs+k5EpAkMN6SVYmJi5HOGG2WxsbHBhx9+CACQJAmLFi0SXBERKQ3DDWmls2fPyuc+Pj4CK6GmMGfOHHlTzYiICBw9elRsQUSkKAw3pHVKS0tx7tw5AICLiwvatGkjuCLSNDMzM/zjH/+QH3/wwQfcVJOINIbhhrROYmIiHj58CADo27ev4Gqoqbz++uvw9PQEAJw7dw7btm0TXBERKQXDDWmd06dPy+f9+vUTWAk1JUNDwyrbMnz44YcoKSkRWBERKQXDDWkdhhv9MWrUKAwdOhQAcP36daxZs0ZwRUSkBEaiCyB6UkW4MTY25srEOiY0NLTez+nUqROOHDkCAFi4cCHu3LkjL/TXECEhIQ1+LhEpA1tuSKvk5OTgypUrAAAvLy+YmZkJroiaWrt27dCjRw8AwIMHDxAdHS24IiLSdQw3pFUqZkkB7JLSJ8OGDYOBQflfR9HR0fLq1EREDcFwQ1rlxIkT8jnDjf6ws7ODr68vAKCkpARRUVGCKyIiXcZwQ1rl+PHj8vnAgQMFVkLNbdCgQTAxMQFQvkJ1VlaW4IqISFcx3JDWKC4ulgcTu7i4yCvYkn5o0aIFBgwYAKB8W4b//ve/gisiIl3FcENa4/z583j8+DEAttroqwEDBqBly5YAgCtXruDWrVuCKyIiXcRwQ1qDXVJkYmKCYcOGyY8PHDgAtVotsCIi0kUMN6Q1KoebQYMGCayEROrZs6e8n1hGRgYuXrwouCIi0jUMN6QVysrK5JlSDg4O6Nq1q+CKSBQDAwMEBATIjw8dOoTi4mKBFRGRrmG4Ia1w8eJF5OXlAQBeeOEFqFQqwRWRSG5ubnLALSgo4MJ+RFQvDDekFQ4dOiSfVx5zQfpr5MiR8sJ+J0+eRH5+vuCKiEhXMNyQVqg87XfEiBECKyFtYW9vj969ewMoX9jv8OHDgisiIl3BcEPCFRUV4dixYwAAJycnjrch2eDBg+X9xeLi4pCeni64IiLSBQw3JNzp06fx6NEjAOWtNhxvQxUsLCyqzJyLiIjg1HAieiaGGxKOXVL0NH369IGtrS0AIDU1FXFxcWILIiKtx3BDwlUON8OHDxdYCWkjIyMjBAYGyo8jIyPx8OFDgRURkbZjuCGhsrKycPbsWQBA9+7d4eTkJLgi0kYdO3aEh4cHAODRo0eIjIwUXBERaTOGGxJq//798hiKMWPGCK6GtJm/v7+8a3hsbCxSUlIEV0RE2orhhoTau3evfD527FiBlZC2s7KyqrIG0q+//oqysjKBFRGRtmK4IWFKS0uxf/9+AECrVq0wYMAAwRWRtuvdu7e871RmZqa8hAARUWUMNyRMdHQ0cnNzAQABAQEwNjYWWxBpPUNDQ/zhD3+Qlws4duwY7ty5I7gqItI2DDckzK+//iqfs0uK6srJyUle+0aSJOzcuRMlJSWCqyIibcJwQ0JIkoQdO3YAKN8FetSoUYIrIl0yaNAgtG3bFkD5jLvKe5MREQkPN+Hh4XBzc4OZmRl8fHxw/PjxWu9NT0/H5MmT0bVrVxgYGGDevHnNVyhpVFxcHG7cuAEAGDJkCOzt7QVXRLrE0NAQEyZMgKGhIYDyVa4vX74suCoi0hZCw83mzZsxb948LFmyBLGxsRg4cCBGjx5d6xTPoqIiODg4YMmSJejZs2czV0uatHXrVvl84sSJAishXdW6dWsEBATIj3ft2oWsrCyBFRGRthAablauXIlp06Zh+vTpcHd3R1hYGJydnbF69eoa73d1dcWqVaswdepUWFtbN3O1pCmSJMnhxsDAAC+++KLgikhX9e7dG56engCA4uJibNmyBQ8ePBBcFRGJJizcFBcXIyYmBv7+/lWu+/v7Izo6WmPvU1RUhPz8/CoHiRUfH4/r168DKN/1uXXr1oIrIl2lUqkwbtw4ODg4ACifHv7KK6+gtLRUcGVEJJKwcJOVlYWysjI4OjpWue7o6IiMjAyNvc+KFStgbW0tH87Ozhp7bWqYn3/+WT5/+eWXBVZCSmBiYoLg4GCYmpoCKJ+FN3v2bEiSJLgyIhLFSHQBFetVVJAkqdq1xli8eDEWLFggP87Pz2fA0YDQ0NAGPU+tVsvdjgYGBkhOTm7waxFVsLe3R3BwMH788Ueo1WqsWbMGFy9eREBAgEb/PqmPkJAQIe9LRAJbbuzt7WFoaFitlSYzM7Naa05jmJqawsrKqspB4ty4cQMFBQUAgM6dO8PS0lJwRaQUzz33HIKCguTHp0+frrJ3GRHpD2HhxsTEBD4+PtV2942MjOQy/AoWHx8vn3t5eYkrhBTp+eefxx/+8Af58ZkzZ7B9+3Yu8kekZ4R2Sy1YsABTpkyBr68v+vfvjzVr1iAlJQUzZ84EUN6ldOfOHWzcuFF+TlxcHACgsLAQ9+7dQ1xcHExMTNC9e3cR3wLVw6NHj3DlyhUAgLm5OTp37iy4IlIib29vqFQq7NmzB5IkITExETk5OZg4cSJsbGxEl0dEzUBouAkODkZ2djaWLVuG9PR0eHh4ICIiAi4uLgDKF+17cs2bXr16yecxMTHYtGkTXFxckJyc3JylUwPEx8fLs1g8PT1hZCR8yBcpVK9evWBpaYmtW7eipKQEaWlp+PbbbzFq1Cj07NlT2DgcImoeKknPphTk5+fD2toaeXl5HH/TCPUdBKxWq/H111/j/v37AIBZs2ZxCjg1ufT0dGzZsgU5OTnyNVdXV4wdO7bJV8XmgGIizarP57fw7RdIP9y8eVMONm5ubgw21Czatm2LGTNmyAv9AUBycjL+/e9/Y/fu3VVCDxEpB/sFqFmcPXtWPu/du7fASkjfmJmZ4aWXXsLzzz+PvXv3Ijc3F5IkITY2FvHx8fDw8ECfPn3Qvn170aUSkYYw3FCTy8zMxNWrVwEAVlZW6Nq1q+CKSB917twZs2bNQnR0NE6dOoWioiKo1WpcvHgRFy9ehJOTE/r27YsePXpwPBiRjuNvMDW5EydOyOf9+vWTd3Imam4mJiYYMmQI+vbti1OnTuHcuXN49OgRACAtLQ07d+7EgQMH4OvrC19fX47LI9JRDDfUpHJycnDp0iUA5d0DPj4+gisiKl+KYNiwYRg4cCAuXbqEs2fPyguKPnz4EMeOHcOJEyfQvXt3vPDCC2jTpo3giomoPhhuqEmdOHFC3uOnX79+8v4/RNrA2NgY3t7e6NWrF1JTU3HmzBkkJSVBrVZDrVYjISEBCQkJcHd3x+DBgxlyiHQEww01maysLFy4cAFAeXdAnz59BFdEVDOVSoUOHTqgQ4cOyM/Px/nz53H+/Hk8fPgQAJCUlISkpCT07t0bw4YNg7m5ueCKiehpGG6oyRw+fFhutRkwYAAsLCwEV0T0bFZWVnKX1YULF3D8+HEUFhYCAM6dO4fLly9j9OjR8PDwEFwpEdWG69xQk7h9+zYuX74MALC0tET//v0FV0RUP8bGxujbty/+/Oc/Y8SIETA2NgYAPHjwANu2bcPu3btRXFwsuEoiqgnDDWmcWq1GRESE/Hjw4MEca0M6y9jYGC+88ALmzJkDd3d3+XpsbCzWrFmDzMxMgdURUU0YbkjjYmJikJaWBgBwcHDgDClSBGtrawQHB2PChAlyK05WVhbWrl2LW7duCa6OiCpjuCGNKigowKFDh+THgYGBXNeGFKVnz56YMWMGHB0dAQBFRUX44YcfEB8fL7gyIqrAcEMaI0kSfvnlFzx+/BhA+YeAq6ur2KKImoC9vT3eeustdO7cGUB5V+zOnTtx7tw5wZUREcBwQxoUFxcnb7NgaWkJf39/wRURNR1TU1O88sor8PX1la/t3buXAYdIC3AqOGlEVlYW9u3bJz8eN24cLC0tBVZE1PQMDQ0RGBgIMzMzeZuRvXv3QqVSCa6MSL+x5YYaraSkBFu3bpWnxXp5eaFbt26CqyJqHiqVCsOHD4efn5987ddff8W2bdsEVkWk3xhuqFEqxtncvXsXQPlYhNGjRwuuiqh5qVQqjBgxAgMGDJCvvfbaa4iKihJYFZH+YrihRjl+/DguXrwIoHw9kEmTJnFNG9JLKpUKI0eOhJeXFwCguLgY48ePR0JCgtjCiPQQww01WEJCAg4fPiw/DgoKQuvWrQVWRCSWSqXCuHHj5FlUeXl5GDduHO7duye4MiL9wnBDDfL7779j165d8uPhw4ejR48e4goi0hKGhoaYOHGiPIsqOTkZL730ErdqIGpGDDdUb+fPn8dPP/2E0tJSAOUDiF944QXBVRFpDxMTE+zevRtOTk4AyrtvZ82aJW8kS0RNi+GG6uXSpUsICAiQ/xXasWNHjB07llNfiZ7g5OSEXbt2wczMDACwdu1afPnll4KrItIPDDdUZ/Hx8Rg+fDju378PAHBxcUFwcDCMjLhcElFNevfujXXr1smPFyxYgMjISIEVEekHfipRnZw+fRqjR49Gbm4uAKBdu3aYPHkyTExMxBZGpOVeffVVJCQkYPny5VCr1QgODsa5c+fQsWNH0aXpldDQUNElAABCQkJEl6AX2HJDz3TkyBGMGDFCDjb9+/fH66+/zinfRHX0j3/8A+PGjQMA5OTkYPz48SgoKBBcFZFyMdzQU+3duxdjxozBgwcPAADDhg3DwYMHYW5uLrgyIt1hYGCAH3/8UV65OzExEW+88QbUarXgyoiUieGGavXTTz8hKChI3uV77Nix2Lt3L1q0aCG4MiLdY2Vlhd27d8Pa2hoAsHPnTnz00UeCqyJSJoYbqkaSJHzyySd4/fXX5enekyZNwo4dO+SZH0RUf126dMGmTZvk2YUhISHYvXu34KqIlIfhhqooKyvD7NmzsXjxYvnaO++8g02bNsHY2FhgZUTKMGbMGCxfvlx+/PrrryMxMVFgRUTKw3BDsocPH+LFF1/E6tWr5Wsff/wxvvnmGxgaGgqsjEhZFi5ciODgYABAYWEhgoKCkJOTI7gqIuVguCEAwL179zBs2DDs2bMHAGBkZISNGzfiww8/5AJ9RBqmUqmwbt06eZPN69ev45VXXkFZWZnYwogUguvcEK5du4YxY8bg+vXrAICWLVtix44dGDFihODKiJTLwsICu3btgq+vL7KysnDw4EEsXrwYn376qejSqJEkSUJGRgZ+//13pKWl4f79+8jPz0dpaSm+/PJLtGrVCm3atIG7uzu8vb3h7++PTp06iS5bURhu9NyRI0fw0ksvyU3iTk5OiIiIQM+ePQVXRqR8Li4u2Lp1K0aMGIGysjJ89tln8PLywuTJk0WXRg2Qnp6OuLg4XLlyBXl5eTXe8/DhQ9y/fx83b95EdHQ01q5dCwDo1KkTXnrpJUyfPp1BRwPYLaXHvvvuO/j7+8vBpkePHjh9+jSDDVEzGjJkCMLCwuTH06ZNQ0xMjLiCqF7UajUSExOxbt06fPvttzhz5kyNwcbCwgI2Njbo2LEjbG1tq339+vXr+Oc//4nOnTtjxIgROHjwIDdabQS23OihsrIyvP/++/jXv/4lXxs9ejT+3//7f7CyshJYGZF+mj17NmJjY7Fu3To8fvwY48aNw8mTJ+Hm5ia6NKqFJEm4evUqDh06hMzMzCpfMzAwgJubG7p06QJnZ2c4ODjIs00rtl8oLCxEYmIijh49iv379+PEiRPy0huHDh3CoUOH0K9fP4SGhsLf3795vzkFYLjRM/n5+Zg8eTL27t0rX5s3bx4+++wzboBJJIhKpUJ4eDiSkpJw6tQppKenw9/fHydOnICjo6Po8ugJqampOHjwIFJTU6tcd3BwQJ8+feDh4fHMVdxbtGiBvn37om/fvli4cCEyMzOxYcMGrFmzBjdu3ABQvqdfQEAARo0ahZUrV8Ld3b3JvielYbeUHvntt9/Qv39/OdgYGRnhm2++wb/+9S8GGyLBTE1NsWfPHnmLhuvXr2P06NG1jt2g5vfw4UPs2bMHa9eurRJsnJyc8Prrr2PWrFno3bt3g7anad26NT744ANcvXoVW7ZsgYeHh/y1/fv34/nnn8e8efPkPf7o6Rhu9MSOHTvQu3dvXL58GQDQqlUrHDhwADNmzBBcGRFVsLe3x4EDB9C+fXsAQGxsLMaPH49Hjx4Jrky/SZKE+Ph4fP3117hw4YJ83d7eHpMmTcLbb7+NTp06aWTZDAMDA0ycOBHx8fHYtGkTnJ2dAQClpaVYtWoVunbtio0bN3I8zjMw3ChcaWkpPvjgA7z00kvyLsQ9evTAmTNnMGzYMMHVEdGTOnTogIMHD8LOzg4AEBUVhbFjx8qb11LzysrKwsaNG7Fz5048fPgQAGBiYoJRo0bhT3/6E7p3794ka4EZGBjg1VdfxZUrV7B06VK5NSgzMxNvvPEGhgwZgoSEBI2/r1Iw3ChYWloaRo4cic8++0y+9uqrr+LMmTPo0qWLwMqI6Gnc3d0REREhb1J7+PBhjBo1Cvn5+YIr0x8lJSU4cuQIVq9ejVu3bsnX3d3dMWfOHPTr169ZVm63sLBASEgIrly5gpdeekm+fuzYMfTq1QsffPABCgsLm7wOXcNwo1Dbt2+Hp6cnjh49CqB8fM2qVavw008/wdLSUmxxRPRMffr0QWRkpLyL+IkTJzB8+HBkZGQIrkz5bt68iW+++QZRUVHyqtHW1taYPHkygoODhcwq7dChA7Zt24Z9+/ahY8eOAMpb5j/77DO4u7tj+/bt7KqqhOFGYQoKCvDWW2/h5Zdfxv379wGUD3Y7evQo3n33XW6lQKRD+vXrh8OHD8vropw/fx59+/bFpUuXBFemTPn5+di6dSs2btyI7OxsAOXdQ35+fpg9e7ZWtHiPGjUKCQkJWLp0KUxNTQEAt2/fxssvv1xlpXl9x3CjIHv27EGPHj2wfv16+drLL7+MS5cuwc/PT2BlRNRQ3t7eiIqKkgeWpqSkwM/PD7t27RJbmIKUlZUhOjoaX3/9dZUd2tu3b48ZM2Zg5MiRMDExEVhhVWZmZggJCUFCQgJGjRolX9+/fz88PDywdOlSPH78WGCF4jHcKEBqaiomTJiA8ePHy9MTW7RogfXr12PLli01roZJRLrDw8MDZ86cga+vL4DyFtoJEyZg7ty5ev8h1hgVC/F98803OHjwIIqLiwEA5ubm+MMf/oC33npLq9cZ6tSpEyIiIrB9+3Z5hl1RURFCQ0Ph4eGBffv2Ca5QHIYbHVZQUIDQ0FC4u7tX+VfcyJEjER8fjzfffJPdUEQK0bZtW0RFRWHixInyta+//hp9+/bF2bNnBVamm1JTU7F+/Xps2rQJ9+7dk6/7+Phg7ty58Pb2hoGB9n9EqlQqvPjii0hKSsL7778vr1l248YNjBkzBkFBQYiPjxdcZfPT/p8cVfPgwQOEhYWhU6dOWLp0qTxF1NHREZs2bcKBAwfw3HPPCa6SiDTNwsICmzdvxurVq+XxFhcvXkS/fv0wa9YseZ84qpkkSbh16xZ+/PFHrF27FikpKfLX2rVrh7fffhvjxo2DhYWFwCobpkWLFvj0008RFxeHQYMGydd3794NLy8vTJgwAbGxsQIrbF4MNzokPT0dS5cuRYcOHTB//nx5PxMjIyPMmTMHV65cwauvvsrWGiIFU6lUmDlzJs6dOwdPT08A5R/aq1evxnPPPYd//OMfnDL+hKKiIiQkJOC7777Df/7znyqDbu3s7DBp0iRMnz4d7dq1E1ilZvTo0QNHjx7Fxo0b0aZNG/n6rl274O3tjeHDh2P79u0oKSkRWGXTY7jRcsXFxfj1118xYcIEODs7IzQ0VJ4FBQDBwcFISkrCV199hVatWokrlIialaenJ2JiYvD555/Lyzvk5ubi73//O1xdXfH+++/j2rVrgqsUR5IkxMTEYO7cuXBycsK2bduQlpYmf93a2hpjx47FrFmzmmwhPlFUKhWmTJmCmzdvYtWqVXBycpK/dvjwYbz88stwcXHBkiVLEB8fr8gp5CpJid/VU+Tn58Pa2hp5eXlauwP23bt3cfToUezduxd79uyptreMoaEhJk+ejA8++KDK/iPNKTQ0VMj7EumKit2fm0NqaipCQkKwceNGeV2WCoMHD8bLL7+MoKAgedCpUj1+/Fj+uzMiIgI3b96sdk+bNm3g5+eH7t27N8sifE9qzj8XFR4/foy1a9ciLCysxqninTp1wksvvYSAgAD069evQXtjNYf6fH4LDzfh4eH47LPPkJ6ejh49eiAsLAwDBw6s9f6oqCgsWLAAiYmJcHJywgcffICZM2fW+f20Ldw8fPgQCQkJiI+PR2xsLKKiouT9n57Upk0bvPXWW3jnnXfg4uLSzJVWxXBD9HQiPsSuX7+Ojz/+GJs2bZJn/lTm4eGBIUOGYODAgejZsyc6deok5ANeE0pLS3Hz5k3Ex8fj9OnTOHXqFC5cuICioqJq95qZmaFz587o1asX3NzchLbSiPhzUUGtVuPQoUMIDw/Hnj17oFarq91jYmKCfv36YcCAAejZsyd69uyJzp07a8XmyjoTbjZv3owpU6YgPDwcfn5++Pbbb/H999/j8uXL6NChQ7X7b926BQ8PD7z99tuYMWMGTp48iVmzZuHnn3+usiz10zRVuHn06BHy8/NRXFyMoqIi+b+PHj3C/fv3cf/+fWRnZyM7OxupqalITk5GcnIybt++XeMfsApWVlYYN24cJk2ahNGjR8PY2FhjNTcGww3R04n8EMvKysKGDRuwZs2ap3ZNmZmZoUePHujUqRPat28PZ2dntG/fHk5OTrC2toa1tTVatWoFCwuLZgsExcXFyMvLQ15eHnJzc5GTk4O0tDTcuXNHPq5du4Zr1649ddyIkZERBg0ahFdeeQWTJk1CWFhYs9T/LCL/XFSWnp6OnTt3Ytu2bYiKinrq55CpqSlcXFzg6uoKFxcXtGvXDnZ2drC1tYWtrS1sbGxgZmYGExMTmJiYwNTUFCYmJrCxsdHoZ5bOhJu+ffvC29sbq1evlq+5u7sjKCgIK1asqHb/woULsWfPHiQlJcnXZs6cifj4eJw6dapO79lU4eaf//wnFi1a1OjXMTQ0hK+vL4YOHYqhQ4di8ODB8qwIbcJwQ/R02vAhJkkSEhISsGPHDvzyyy+IjY196odYbQwNDWFlZVXlA6zyYWxsXKdp02VlZSgpKUFxcXGNx8OHDxu1bk+nTp0wcOBABAYGYuTIkVX+jteWv7O04c/FkzIzM7F//35ERUUhKioKN27c0MjrRkZGYsSIERp5LaB+n9/C2pmKi4sRExNTLRD4+/sjOjq6xuecOnUK/v7+Va4FBARg7dq1KCkpqTEhFhUVVWmmrBi/ounZBA3JiDY2NnB1dUX37t3h6ekJDw8P9OzZs8oP7cn6tQUXDiN6Om2ZseTi4oL58+dj/vz5yMvLw5kzZ3DhwgUkJiYiMTGxTh9kZWVlWjXN3MTEBJ06dUKXLl3QtWtX+Pj4wNfXV95JvULln4G2/J2lLX8uKjMzM0NQUBCCgoIAAHfu3MHFixeRkJCAxMREJCUlISUlRd4Vva5KSko0+v1WvFadPm8lQe7cuSMBkE6ePFnl+scffyx16dKlxud07txZ+vjjj6tcO3nypARASktLq/E5ISEhEgAePHjw4MGDhwKO1NTUZ2YM4SOEnuzHlSTpqX27Nd1f0/UKixcvxoIFC+THarUa9+/fh52dnaKm/mlCfn4+nJ2dkZqaqhWDrak6/ox0A39O2o8/I+335M9IkiQUFBRUmdpeG2Hhxt7eHoaGhsjIyKhyPTMzs9a9PNq0aVPj/UZGRtWaIyuYmppWG7PC9WCezsrKir/sWo4/I93An5P2489I+1X+GVlbW9fpOcIW8TMxMYGPjw8iIyOrXI+MjMSAAQNqfE7//v2r3X/w4EH4+vpqzSwiIiIiEkvoCsULFizA999/j3Xr1iEpKQnz589HSkqKvG7N4sWLMXXqVPn+mTNn4vfff8eCBQuQlJSEdevWYe3atXjvvfdEfQtERESkZYSOuQkODkZ2djaWLVuG9PR0eHh4ICIiQl6gLj09vcrGZm5uboiIiMD8+fPx73//G05OTvjyyy/rvMYNPZ2pqSlCQkK0cuo5lePPSDfw56T9+DPSfo35GQlfoZiIiIhIk7hxJhERESkKww0REREpCsMNERERKQrDDRERESkKww3VKDk5GdOmTYObmxvMzc3RsWNHhISEoLi4WHRpei08PBxubm4wMzODj48Pjh8/Lrok+j8rVqxA79690bJlS7Ru3RpBQUH47bffRJdFT7FixQqoVCrMmzdPdClUyZ07d/D666/Dzs4OFhYW8PLyQkxMTL1eg+GGanTlyhWo1Wp8++23SExMxL/+9S988803+PDDD0WXprc2b96MefPmYcmSJYiNjcXAgQMxevToKsslkDhRUVGYPXs2Tp8+jcjISJSWlsLf3x8PHjwQXRrV4Ny5c1izZg2ef/550aVQJTk5OfDz84OxsTH27duHy5cv44svvqj3zgKcCk519tlnn2H16tW4efOm6FL0Ut++feHt7Y3Vq1fL19zd3REUFIQVK1YIrIxqcu/ePbRu3RpRUVEYNGiQ6HKoksLCQnh7eyM8PBwfffQRvLy8EBYWJrosArBo0SKcPHmy0a3SbLmhOsvLy4Otra3oMvRScXExYmJi4O/vX+W6v78/oqOjBVVFT5OXlwcA/J3RQrNnz0ZgYCBGjBghuhR6wp49e+Dr64uJEyeidevW6NWrF7777rt6vw7DDdXJjRs38NVXX8lbY1DzysrKQllZWbVNZR0dHattJkviSZKEBQsW4IUXXoCHh4focqiS//f//h8uXLjA1k4tdfPmTaxevRqdO3fGgQMHMHPmTLz77rvYuHFjvV6H4UbPLF26FCqV6qnH+fPnqzwnLS0No0aNwsSJEzF9+nRBlRMAqFSqKo8lSap2jcSbM2cOLl68iJ9//ll0KVRJamoq/vznP+PHH3+EmZmZ6HKoBmq1Gt7e3li+fDl69eqFGTNm4O23367SHV8XQveWouY3Z84cvPLKK0+9x9XVVT5PS0vD0KFD0b9/f6xZs6aJq6Pa2Nvbw9DQsForTWZmZrXWHBJr7ty52LNnD44dO4b27duLLocqiYmJQWZmJnx8fORrZWVlOHbsGL7++msUFRXB0NBQYIXUtm1bdO/evco1d3d3bN++vV6vw3CjZ+zt7WFvb1+ne+/cuYOhQ4fCx8cH69evh4EBG/pEMTExgY+PDyIjIzFhwgT5emRkJMaPHy+wMqogSRLmzp2LnTt34ujRo3BzcxNdEj1h+PDhuHTpUpVrf/zjH9GtWzcsXLiQwUYL+Pn5VVtC4erVq/KG2nXFcEM1SktLw5AhQ9ChQwd8/vnnuHfvnvy1Nm3aCKxMfy1YsABTpkyBr6+v3JKWkpLCcVBaYvbs2di0aRN2796Nli1byq1s1tbWMDc3F1wdAUDLli2rjYGytLSEnZ0dx0Zpifnz52PAgAFYvnw5Jk2ahLNnz2LNmjX17jlguKEaHTx4ENevX8f169erNa1z9QAxgoODkZ2djWXLliE9PR0eHh6IiIio979oqGlUjAkYMmRIlevr16/Hm2++2fwFEemg3r17Y+fOnVi8eDGWLVsGNzc3hIWF4bXXXqvX63CdGyIiIlIUDqIgIiIiRWG4ISIiIkVhuCEiIiJFYbghIiIiRWG4ISIiIkVhuCEiIiJFYbghIiIiRWG4ISIiIkVhuCEixdmwYQNatWolugwiEoThhoi0wptvvomgoCDRZRCRAjDcEJHeKSkpEV0CETUhhhsialbbtm2Dp6cnzM3NYWdnhxEjRuD999/Hf/7zH+zevRsqlQoqlQpHjx7F0aNHoVKpkJubKz8/Li4OKpUKycnJ8rUNGzagQ4cOsLCwwIQJE5CdnV3lPZcuXQovLy+sW7cOzz33HExNTSFJEvLy8vDOO++gdevWsLKywrBhwxAfHy8/Lz4+HkOHDkXLli1hZWUFHx8fnD9/vqn/FxFRI3FXcCJqNunp6Xj11Vfx6aefYsKECSgoKMDx48cxdepUpKSkID8/H+vXrwcA2NraIjo6+pmveebMGbz11ltYvnw5XnzxRezfvx8hISHV7rt+/Tq2bNmC7du3w9DQEAAQGBgIW1tbREREwNraGt9++y2GDx+Oq1evwtbWFq+99hp69eqF1atXw9DQEHFxcTA2Ntbs/xQi0jiGGyJqNunp6SgtLcWLL74IFxcXAICnpycAwNzcHEVFRWjTpk29XnPVqlUICAjAokWLAABdunRBdHQ09u/fX+W+4uJi/PDDD3BwcAAAHD58GJcuXUJmZiZMTU0BAJ9//jl27dqFbdu24Z133kFKSgref/99dOvWDQDQuXPnhn/zRNRs2C1FRM2mZ8+eGD58ODw9PTFx4kR89913yMnJadRrJiUloX///lWuPfkYAFxcXORgAwAxMTEoLCyEnZ0dWrRoIR+3bt3CjRs3AAALFizA9OnTMWLECHzyySfydSLSbgw3RNRsDA0NERkZiX379qF79+746quv0LVrV9y6davG+w0Myv+KkiRJvvbkYODKX3saS0vLKo/VajXatm2LuLi4Ksdvv/2G999/H0D5WJ3ExEQEBgbi8OHD6N69O3bu3Fnn75eIxGC3FBE1K5VKBT8/P/j5+eHvf/87XFxcsHPnTpiYmKCsrKzKvRUtLenp6bCxsQFQPqC4su7du+P06dNVrj35uCbe3t7IyMiAkZERXF1da72vS5cu6NKlC+bPn49XX30V69evx4QJE+rwnRKRKGy5IaJmc+bMGSxfvhznz59HSkoKduzYgXv37sHd3R2urq64ePEifvvtN2RlZaGkpASdOnWCs7Mzli5diqtXr2Lv3r344osvqrzmu+++i/379+PTTz/F1atX8fXXX1cbb1OTESNGoH///ggKCsKBAweQnJyM6Oho/PWvf8X58+fx6NEjzJkzB0ePHsXvv/+OkydP4ty5c3B3d2+q/z1EpCkSEVEzuXz5shQQECA5ODhIpqamUpcuXaSvvvpKkiRJyszMlEaOHCm1aNFCAiAdOXJEkiRJOnHihOTp6SmZmZlJAwcOlLZu3SoBkG7duiW/7tq1a6X27dtL5ubm0rhx46TPP/9csra2lr8eEhIi9ezZs1o9+fn50ty5cyUnJyfJ2NhYcnZ2ll577TUpJSVFKioqkl555RXJ2dlZMjExkZycnKQ5c+ZIjx49asL/Q0SkCSpJqmOHNREREZEOYLcUERERKQrDDRERESkKww0REREpCsMNERERKQrDDRERESkKww0REREpCsMNERERKQrDDRERESkKww0REREpCsMNERERKQrDDRERESnK/wf/T1acsdceLQAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# histogram (and overlayed density plot):\n", + "kde = sm.nonparametric.KDEUnivariate(studres)\n", + "kde.fit()\n", + "\n", + "plt.hist(studres, color=\"grey\", density=True)\n", + "plt.plot(kde.support, kde.density, color=\"black\", linewidth=2)\n", + "plt.ylabel(\"density\")\n", + "plt.xlabel(\"studres\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 9.5 Least Absolute Deviations (LAD) Estimation" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table_ols: \n", + " b se t pval\n", + "Intercept 2.6253 0.5855 4.4835 0.0001\n", + "I(sales / 1000) 0.0534 0.0441 1.2111 0.2356\n", + "profmarg 0.0446 0.0462 0.9661 0.3420\n", + "\n" + ] + } + ], + "source": [ + "rdchem = wool.data(\"rdchem\")\n", + "\n", + "# OLS regression:\n", + "reg_ols = smf.ols(formula=\"rdintens ~ I(sales/1000) + profmarg\", data=rdchem)\n", + "results_ols = reg_ols.fit()\n", + "\n", + "table_ols = pd.DataFrame(\n", + " {\n", + " \"b\": round(results_ols.params, 4),\n", + " \"se\": round(results_ols.bse, 4),\n", + " \"t\": round(results_ols.tvalues, 4),\n", + " \"pval\": round(results_ols.pvalues, 4),\n", + " },\n", + ")\n", + "print(f\"table_ols: \\n{table_ols}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table_lad: \n", + " b se t pval\n", + "Intercept 1.6231 0.7012 2.3148 0.0279\n", + "I(sales / 1000) 0.0186 0.0528 0.3529 0.7267\n", + "profmarg 0.1179 0.0553 2.1320 0.0416\n", + "\n" + ] + } + ], + "source": [ + "# LAD regression:\n", + "reg_lad = smf.quantreg(formula=\"rdintens ~ I(sales/1000) + profmarg\", data=rdchem)\n", + "results_lad = reg_lad.fit(q=0.5)\n", + "\n", + "table_lad = pd.DataFrame(\n", + " {\n", + " \"b\": round(results_lad.params, 4),\n", + " \"se\": round(results_lad.bse, 4),\n", + " \"t\": round(results_lad.tvalues, 4),\n", + " \"pval\": round(results_lad.pvalues, 4),\n", + " },\n", + ")\n", + "print(f\"table_lad: \\n{table_lad}\\n\")" + ] + } + ], + "metadata": { + "jupytext": { + "formats": "notebooks//ipynb,markdown//md,scripts//py" + }, + "kernelspec": { + "display_name": "merino", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/scripts/Ch7. MRA - Qualitative Regressors.py b/scripts/Ch7. MRA - Qualitative Regressors.py index 3f4b471..3fd2d2f 100644 --- a/scripts/Ch7. MRA - Qualitative Regressors.py +++ b/scripts/Ch7. MRA - Qualitative Regressors.py @@ -17,11 +17,11 @@ # %pip install matplotlib numpy pandas statsmodels wooldridge -q +import numpy as np # noqa import pandas as pd import statsmodels.api as sm import statsmodels.formula.api as smf import wooldridge as wool -import numpy as np # ## 7.1 Linear Regression with Dummy Variables as Regressors # diff --git a/scripts/Ch9. Specification and Data Issues.py b/scripts/Ch9. Specification and Data Issues.py new file mode 100644 index 0000000..b73c94a --- /dev/null +++ b/scripts/Ch9. Specification and Data Issues.py @@ -0,0 +1,355 @@ +# --- +# jupyter: +# jupytext: +# formats: notebooks//ipynb,markdown//md,scripts//py +# text_representation: +# extension: .py +# format_name: light +# format_version: '1.5' +# jupytext_version: 1.16.4 +# kernelspec: +# display_name: merino +# language: python +# name: python3 +# --- + +# # Ch9. Specification and Data Issues + +# %pip install matplotlib numpy pandas statsmodels wooldridge scipy -q + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import statsmodels.api as sm +import statsmodels.formula.api as smf +import statsmodels.stats.outliers_influence as smo +import wooldridge as wool +from scipy import stats + +# ## 9.1 Functional Form Misspecification +# +# ### Example 9.2: Housing Price Equation + +# + +hprice1 = wool.data("hprice1") + +# original OLS: +reg = smf.ols(formula="price ~ lotsize + sqrft + bdrms", data=hprice1) +results = reg.fit() + +# regression for RESET test: +hprice1["fitted_sq"] = results.fittedvalues**2 +hprice1["fitted_cub"] = results.fittedvalues**3 +reg_reset = smf.ols( + formula="price ~ lotsize + sqrft + bdrms + fitted_sq + fitted_cub", + data=hprice1, +) +results_reset = reg_reset.fit() + +# print regression table: +table = pd.DataFrame( + { + "b": round(results_reset.params, 4), + "se": round(results_reset.bse, 4), + "t": round(results_reset.tvalues, 4), + "pval": round(results_reset.pvalues, 4), + }, +) +print(f"table: \n{table}\n") + +# + +# RESET test (H0: all coeffs including "fitted" are=0): +hypotheses = ["fitted_sq = 0", "fitted_cub = 0"] +ftest_man = results_reset.f_test(hypotheses) +fstat_man = ftest_man.statistic +fpval_man = ftest_man.pvalue + +print(f"fstat_man: {fstat_man}\n") +print(f"fpval_man: {fpval_man}\n") + +# + +hprice1 = wool.data("hprice1") + +# original linear regression: +reg = smf.ols(formula="price ~ lotsize + sqrft + bdrms", data=hprice1) +results = reg.fit() + +# automated RESET test: +reset_output = smo.reset_ramsey(res=results, degree=3) +fstat_auto = reset_output.statistic +fpval_auto = reset_output.pvalue + +print(f"fstat_auto: {fstat_auto}\n") +print(f"fpval_auto: {fpval_auto}\n") + +# + +hprice1 = wool.data("hprice1") + +# two alternative models: +reg1 = smf.ols(formula="price ~ lotsize + sqrft + bdrms", data=hprice1) +results1 = reg1.fit() + +reg2 = smf.ols( + formula="price ~ np.log(lotsize) +np.log(sqrft) + bdrms", + data=hprice1, +) +results2 = reg2.fit() + +# encompassing test of Davidson & MacKinnon: +# comprehensive model: +reg3 = smf.ols( + formula="price ~ lotsize + sqrft + bdrms + np.log(lotsize) + np.log(sqrft)", + data=hprice1, +) +results3 = reg3.fit() + +# model 1 vs. comprehensive model: +anovaResults1 = sm.stats.anova_lm(results1, results3) +print(f"anovaResults1: \n{anovaResults1}\n") +# - + +# model 2 vs. comprehensive model: +anovaResults2 = sm.stats.anova_lm(results2, results3) +print(f"anovaResults2: \n{anovaResults2}\n") + +# ## 9.2 Measurement Error + +# + +# set the random seed: +np.random.seed(1234567) + +# set sample size and number of simulations: +n = 1000 +r = 10000 + +# set true parameters (betas): +beta0 = 1 +beta1 = 0.5 + +# initialize arrays to store results later (b1 without ME, b1_me with ME): +b1 = np.empty(r) +b1_me = np.empty(r) + +# draw a sample of x, fixed over replications: +x = stats.norm.rvs(4, 1, size=n) + +# repeat r times: +for i in range(r): + # draw a sample of u: + u = stats.norm.rvs(0, 1, size=n) + + # draw a sample of ystar: + ystar = beta0 + beta1 * x + u + + # measurement error and mismeasured y: + e0 = stats.norm.rvs(0, 1, size=n) + y = ystar + e0 + df = pd.DataFrame({"ystar": ystar, "y": y, "x": x}) + + # regress ystar on x and store slope estimate at position i: + reg_star = smf.ols(formula="ystar ~ x", data=df) + results_star = reg_star.fit() + b1[i] = results_star.params["x"] + + # regress y on x and store slope estimate at position i: + reg_me = smf.ols(formula="y ~ x", data=df) + results_me = reg_me.fit() + b1_me[i] = results_me.params["x"] + +# mean with and without ME: +b1_mean = np.mean(b1) +b1_me_mean = np.mean(b1_me) +print(f"b1_mean: {b1_mean}\n") +print(f"b1_me_mean: {b1_me_mean}\n") +# - + +# variance with and without ME: +b1_var = np.var(b1, ddof=1) +b1_me_var = np.var(b1_me, ddof=1) +print(f"b1_var: {b1_var}\n") +print(f"b1_me_var: {b1_me_var}\n") + +# + +# set the random seed: +np.random.seed(1234567) + +# set sample size and number of simulations: +n = 1000 +r = 10000 + +# set true parameters (betas): +beta0 = 1 +beta1 = 0.5 + +# initialize b1 arrays to store results later: +b1 = np.empty(r) +b1_me = np.empty(r) + +# draw a sample of x, fixed over replications: +xstar = stats.norm.rvs(4, 1, size=n) + +# repeat r times: +for i in range(r): + # draw a sample of u: + u = stats.norm.rvs(0, 1, size=n) + + # draw a sample of y: + y = beta0 + beta1 * xstar + u + + # measurement error and mismeasured x: + e1 = stats.norm.rvs(0, 1, size=n) + x = xstar + e1 + df = pd.DataFrame({"y": y, "xstar": xstar, "x": x}) + + # regress y on xstar and store slope estimate at position i: + reg_star = smf.ols(formula="y ~ xstar", data=df) + results_star = reg_star.fit() + b1[i] = results_star.params["xstar"] + + # regress y on x and store slope estimate at position i: + reg_me = smf.ols(formula="y ~ x", data=df) + results_me = reg_me.fit() + b1_me[i] = results_me.params["x"] + +# mean with and without ME: +b1_mean = np.mean(b1) +b1_me_mean = np.mean(b1_me) +print(f"b1_mean: {b1_mean}\n") +print(f"b1_me_mean: {b1_me_mean}\n") +# - + +# variance with and without ME: +b1_var = np.var(b1, ddof=1) +b1_me_var = np.var(b1_me, ddof=1) +print(f"b1_var: {b1_var}\n") +print(f"b1_me_var: {b1_me_var}\n") + +# ## 9.3 Missing Data and Nonrandom Samples + +# + +# nan and inf handling in numpy: +x = np.array([-1, 0, 1, np.nan, np.inf, -np.inf]) +logx = np.log(x) +invx = np.array(1 / x) +ncdf = np.array(stats.norm.cdf(x)) +isnanx = np.isnan(x) + +results = pd.DataFrame( + {"x": x, "logx": logx, "invx": invx, "logx": logx, "ncdf": ncdf, "isnanx": isnanx}, +) +print(f"results: \n{results}\n") + +# + +lawsch85 = wool.data("lawsch85") +lsat_pd = lawsch85["LSAT"] + +# create boolean indicator for missings: +missLSAT = lsat_pd.isna() + +# LSAT and indicator for Schools No. 120-129: +preview = pd.DataFrame({"lsat_pd": lsat_pd[119:129], "missLSAT": missLSAT[119:129]}) +print(f"preview: \n{preview}\n") +# - + +# frequencies of indicator: +freq_missLSAT = pd.crosstab(missLSAT, columns="count") +print(f"freq_missLSAT: \n{freq_missLSAT}\n") + +# missings for all variables in data frame (counts): +miss_all = lawsch85.isna() +colsums = miss_all.sum(axis=0) +print(f"colsums: \n{colsums}\n") + +# computing amount of complete cases: +complete_cases = miss_all.sum(axis=1) == 0 +freq_complete_cases = pd.crosstab(complete_cases, columns="count") +print(f"freq_complete_cases: \n{freq_complete_cases}\n") + +# + +lawsch85 = wool.data("lawsch85") + +# missings in numpy: +x_np = np.array(lawsch85["LSAT"]) +x_np_bar1 = np.mean(x_np) +x_np_bar2 = np.nanmean(x_np) +print(f"x_np_bar1: {x_np_bar1}\n") +print(f"x_np_bar2: {x_np_bar2}\n") +# - + +# missings in pandas: +x_pd = lawsch85["LSAT"] +x_pd_bar1 = np.mean(x_pd) +x_pd_bar2 = np.nanmean(x_pd) +print(f"x_pd_bar1: {x_pd_bar1}\n") +print(f"x_pd_bar2: {x_pd_bar2}\n") + +# observations and variables: +print(f"lawsch85.shape: {lawsch85.shape}\n") + +# regression (missings are taken care of by default): +reg = smf.ols(formula="np.log(salary) ~ LSAT + cost + age", data=lawsch85) +results = reg.fit() +print(f"results.nobs: {results.nobs}\n") + +# ## 9.4 Outlying Observations + +# + +rdchem = wool.data("rdchem") + +# OLS regression: +reg = smf.ols(formula="rdintens ~ sales + profmarg", data=rdchem) +results = reg.fit() + +# studentized residuals for all observations: +studres = results.get_influence().resid_studentized_external + +# display extreme values: +studres_max = np.max(studres) +studres_min = np.min(studres) +print(f"studres_max: {studres_max}\n") +print(f"studres_min: {studres_min}\n") + +# + +# histogram (and overlayed density plot): +kde = sm.nonparametric.KDEUnivariate(studres) +kde.fit() + +plt.hist(studres, color="grey", density=True) +plt.plot(kde.support, kde.density, color="black", linewidth=2) +plt.ylabel("density") +plt.xlabel("studres") +# - + +# ## 9.5 Least Absolute Deviations (LAD) Estimation + +# + +rdchem = wool.data("rdchem") + +# OLS regression: +reg_ols = smf.ols(formula="rdintens ~ I(sales/1000) + profmarg", data=rdchem) +results_ols = reg_ols.fit() + +table_ols = pd.DataFrame( + { + "b": round(results_ols.params, 4), + "se": round(results_ols.bse, 4), + "t": round(results_ols.tvalues, 4), + "pval": round(results_ols.pvalues, 4), + }, +) +print(f"table_ols: \n{table_ols}\n") + +# + +# LAD regression: +reg_lad = smf.quantreg(formula="rdintens ~ I(sales/1000) + profmarg", data=rdchem) +results_lad = reg_lad.fit(q=0.5) + +table_lad = pd.DataFrame( + { + "b": round(results_lad.params, 4), + "se": round(results_lad.bse, 4), + "t": round(results_lad.tvalues, 4), + "pval": round(results_lad.pvalues, 4), + }, +) +print(f"table_lad: \n{table_lad}\n")