diff --git a/README.md b/README.md index 82a112f..8e86a3b 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # `merino`: Mastering Econometrics Regressions, Inference, and Numerical Optimization -![](resources/advanced.jhu.edu.svg 'JHU AAP') +![](resources/advanced.jhu.edu.svg "JHU AAP") ## Introduction @@ -16,36 +16,41 @@ These notebooks are based on the content from: Each notebook corresponds to a chapter from the source material. Click on the "Open in Colab" badge to view and interact with the notebook in Google Colab. -1. **Ch2. The Simple Regression Model** +1. **Ch2. The Simple Regression Model** Open In Colab -2. **Ch3. Multiple Regression Analysis: Estimation** +2. **Ch3. Multiple Regression Analysis: Estimation** Open In Colab -3. **Ch4. Multiple Regression Analysis: Inference** +3. **Ch4. Multiple Regression Analysis: Inference** Open In Colab -4. **Ch5. MRA - OLS Asymptotics** +4. **Ch5. MRA - OLS Asymptotics** Open In Colab -5. **Ch6. MRA - Further Issues** +5. **Ch6. MRA - Further Issues** Open In Colab -6. **Ch7. MRA - Qualitative Regressors** +6. **Ch7. MRA - Qualitative Regressors** Open In Colab +7. **Ch8. Heteroskedasticity** + + Open In Colab + + ## How to Use 1. Click on the "Open in Colab" badge next to the notebook you want to explore. diff --git a/markdown/Ch8. Heteroskedasticity.md b/markdown/Ch8. Heteroskedasticity.md new file mode 100644 index 0000000..6f12dc1 --- /dev/null +++ b/markdown/Ch8. Heteroskedasticity.md @@ -0,0 +1,385 @@ +--- +jupyter: + jupytext: + formats: notebooks//ipynb,markdown//md,scripts//py + text_representation: + extension: .md + format_name: markdown + format_version: '1.3' + jupytext_version: 1.16.4 + kernelspec: + display_name: merino + language: python + name: python3 +--- + +# 8. Heteroskedasticity + +```python +%pip install numpy pandas patsy statsmodels wooldridge -q +``` + +```python +import numpy as np +import pandas as pd +import patsy as pt +import statsmodels.api as sm +import statsmodels.formula.api as smf +import wooldridge as wool +``` + +## 8.1 Heteroskedasticity-Robust Inference + +### Example 8.2: Heteroskedasticity-Robust Inference + +```python +gpa3 = wool.data("gpa3") + +# define regression model: +reg = smf.ols( + formula="cumgpa ~ sat + hsperc + tothrs + female + black + white", + data=gpa3, + subset=(gpa3["spring"] == 1), +) + +# estimate default model (only for spring data): +results_default = reg.fit() + +table_default = pd.DataFrame( + { + "b": round(results_default.params, 5), + "se": round(results_default.bse, 5), + "t": round(results_default.tvalues, 5), + "pval": round(results_default.pvalues, 5), + }, +) +print(f"table_default: \n{table_default}\n") +``` + +```python +# estimate model with White SE (only for spring data): +results_white = reg.fit(cov_type="HC0") + +table_white = pd.DataFrame( + { + "b": round(results_white.params, 5), + "se": round(results_white.bse, 5), + "t": round(results_white.tvalues, 5), + "pval": round(results_white.pvalues, 5), + }, +) +print(f"table_white: \n{table_white}\n") +``` + +```python +# estimate model with refined White SE (only for spring data): +results_refined = reg.fit(cov_type="HC3") + +table_refined = pd.DataFrame( + { + "b": round(results_refined.params, 5), + "se": round(results_refined.bse, 5), + "t": round(results_refined.tvalues, 5), + "pval": round(results_refined.pvalues, 5), + }, +) +print(f"table_refined: \n{table_refined}\n") +``` + +```python +gpa3 = wool.data("gpa3") + +# definition of model and hypotheses: +reg = smf.ols( + formula="cumgpa ~ sat + hsperc + tothrs + female + black + white", + data=gpa3, + subset=(gpa3["spring"] == 1), +) +hypotheses = ["black = 0", "white = 0"] + +# F-Tests using different variance-covariance formulas: +# ususal VCOV: +results_default = reg.fit() +ftest_default = results_default.f_test(hypotheses) +fstat_default = ftest_default.statistic +fpval_default = ftest_default.pvalue +print(f"fstat_default: {fstat_default}\n") +print(f"fpval_default: {fpval_default}\n") +``` + +```python +# refined White VCOV: +results_hc3 = reg.fit(cov_type="HC3") +ftest_hc3 = results_hc3.f_test(hypotheses) +fstat_hc3 = ftest_hc3.statistic +fpval_hc3 = ftest_hc3.pvalue +print(f"fstat_hc3: {fstat_hc3}\n") +print(f"fpval_hc3: {fpval_hc3}\n") +``` + +```python +# classical White VCOV: +results_hc0 = reg.fit(cov_type="HC0") +ftest_hc0 = results_hc0.f_test(hypotheses) +fstat_hc0 = ftest_hc0.statistic +fpval_hc0 = ftest_hc0.pvalue +print(f"fstat_hc0: {fstat_hc0}\n") +print(f"fpval_hc0: {fpval_hc0}\n") +``` + +## 8.2 Heteroskedasticity Tests + +### Example 8.4: Heteroskedasticity in a Housing Price Equation + +```python +hprice1 = wool.data("hprice1") + +# estimate model: +reg = smf.ols(formula="price ~ lotsize + sqrft + bdrms", data=hprice1) +results = reg.fit() +table_results = pd.DataFrame( + { + "b": round(results.params, 4), + "se": round(results.bse, 4), + "t": round(results.tvalues, 4), + "pval": round(results.pvalues, 4), + }, +) +print(f"table_results: \n{table_results}\n") +``` + +```python +# automatic BP test (LM version): +y, X = pt.dmatrices( + "price ~ lotsize + sqrft + bdrms", + data=hprice1, + return_type="dataframe", +) +result_bp_lm = sm.stats.diagnostic.het_breuschpagan(results.resid, X) +bp_lm_statistic = result_bp_lm[0] +bp_lm_pval = result_bp_lm[1] +print(f"bp_lm_statistic: {bp_lm_statistic}\n") +print(f"bp_lm_pval: {bp_lm_pval}\n") +``` + +```python +# manual BP test (F version): +hprice1["resid_sq"] = results.resid**2 +reg_resid = smf.ols(formula="resid_sq ~ lotsize + sqrft + bdrms", data=hprice1) +results_resid = reg_resid.fit() +bp_F_statistic = results_resid.fvalue +bp_F_pval = results_resid.f_pvalue +print(f"bp_F_statistic: {bp_F_statistic}\n") +print(f"bp_F_pval: {bp_F_pval}\n") +``` + +### Example 8.5: BP and White test in the Log Housing Price Equation + +```python +hprice1 = wool.data("hprice1") + +# estimate model: +reg = smf.ols( + formula="np.log(price) ~ np.log(lotsize) + np.log(sqrft) + bdrms", + data=hprice1, +) +results = reg.fit() + +# BP test: +y, X_bp = pt.dmatrices( + "np.log(price) ~ np.log(lotsize) + np.log(sqrft) + bdrms", + data=hprice1, + return_type="dataframe", +) +result_bp = sm.stats.diagnostic.het_breuschpagan(results.resid, X_bp) +bp_statistic = result_bp[0] +bp_pval = result_bp[1] +print(f"bp_statistic: {bp_statistic}\n") +print(f"bp_pval: {bp_pval}\n") +``` + +```python +# White test: +X_wh = pd.DataFrame( + { + "const": 1, + "fitted_reg": results.fittedvalues, + "fitted_reg_sq": results.fittedvalues**2, + }, +) +result_white = sm.stats.diagnostic.het_breuschpagan(results.resid, X_wh) +white_statistic = result_white[0] +white_pval = result_white[1] +print(f"white_statistic: {white_statistic}\n") +print(f"white_pval: {white_pval}\n") +``` + +## 8.3 Weighted Least Squares + +### Example 8.6: Financial Wealth Equation + +```python +k401ksubs = wool.data("401ksubs") + +# subsetting data: +k401ksubs_sub = k401ksubs[k401ksubs["fsize"] == 1] + +# OLS (only for singles, i.e. 'fsize'==1): +reg_ols = smf.ols( + formula="nettfa ~ inc + I((age-25)**2) + male + e401k", + data=k401ksubs_sub, +) +results_ols = reg_ols.fit(cov_type="HC0") + +# print regression table: +table_ols = pd.DataFrame( + { + "b": round(results_ols.params, 4), + "se": round(results_ols.bse, 4), + "t": round(results_ols.tvalues, 4), + "pval": round(results_ols.pvalues, 4), + }, +) +print(f"table_ols: \n{table_ols}\n") +``` + +```python +# WLS: +wls_weight = list(1 / k401ksubs_sub["inc"]) +reg_wls = smf.wls( + formula="nettfa ~ inc + I((age-25)**2) + male + e401k", + weights=wls_weight, + data=k401ksubs_sub, +) +results_wls = reg_wls.fit() + +# print regression table: +table_wls = pd.DataFrame( + { + "b": round(results_wls.params, 4), + "se": round(results_wls.bse, 4), + "t": round(results_wls.tvalues, 4), + "pval": round(results_wls.pvalues, 4), + }, +) +print(f"table_wls: \n{table_wls}\n") +``` + +```python +k401ksubs = wool.data("401ksubs") + +# subsetting data: +k401ksubs_sub = k401ksubs[k401ksubs["fsize"] == 1] + +# WLS: +wls_weight = list(1 / k401ksubs_sub["inc"]) +reg_wls = smf.wls( + formula="nettfa ~ inc + I((age-25)**2) + male + e401k", + weights=wls_weight, + data=k401ksubs_sub, +) + +# non-robust (default) results: +results_wls = reg_wls.fit() +table_default = pd.DataFrame( + { + "b": round(results_wls.params, 4), + "se": round(results_wls.bse, 4), + "t": round(results_wls.tvalues, 4), + "pval": round(results_wls.pvalues, 4), + }, +) +print(f"table_default: \n{table_default}\n") +``` + +```python +# robust results (Refined White SE): +results_white = reg_wls.fit(cov_type="HC3") +table_white = pd.DataFrame( + { + "b": round(results_white.params, 4), + "se": round(results_white.bse, 4), + "t": round(results_white.tvalues, 4), + "pval": round(results_white.pvalues, 4), + }, +) +print(f"table_white: \n{table_white}\n") +``` + +### Example 8.7: Demand for Cigarettes + +```python +smoke = wool.data("smoke") + +# OLS: +reg_ols = smf.ols( + formula="cigs ~ np.log(income) + np.log(cigpric) +" + "educ + age + I(age**2) + restaurn", + data=smoke, +) +results_ols = reg_ols.fit() +table_ols = pd.DataFrame( + { + "b": round(results_ols.params, 4), + "se": round(results_ols.bse, 4), + "t": round(results_ols.tvalues, 4), + "pval": round(results_ols.pvalues, 4), + }, +) +print(f"table_ols: \n{table_ols}\n") +``` + +```python +# BP test: +y, X = pt.dmatrices( + "cigs ~ np.log(income) + np.log(cigpric) + educ +age + I(age**2) + restaurn", + data=smoke, + return_type="dataframe", +) +result_bp = sm.stats.diagnostic.het_breuschpagan(results_ols.resid, X) +bp_statistic = result_bp[0] +bp_pval = result_bp[1] +print(f"bp_statistic: {bp_statistic}\n") +print(f"bp_pval: {bp_pval}\n") +``` + +```python +# FGLS (estimation of the variance function): +smoke["logu2"] = np.log(results_ols.resid**2) +reg_fgls = smf.ols( + formula="logu2 ~ np.log(income) + np.log(cigpric) +" + "educ + age + I(age**2) + restaurn", + data=smoke, +) +results_fgls = reg_fgls.fit() +table_fgls = pd.DataFrame( + { + "b": round(results_fgls.params, 4), + "se": round(results_fgls.bse, 4), + "t": round(results_fgls.tvalues, 4), + "pval": round(results_fgls.pvalues, 4), + }, +) +print(f"table_fgls: \n{table_fgls}\n") +``` + +```python +# FGLS (WLS): +wls_weight = list(1 / np.exp(results_fgls.fittedvalues)) +reg_wls = smf.wls( + formula="cigs ~ np.log(income) + np.log(cigpric) +" + "educ + age + I(age**2) + restaurn", + weights=wls_weight, + data=smoke, +) +results_wls = reg_wls.fit() +table_wls = pd.DataFrame( + { + "b": round(results_wls.params, 4), + "se": round(results_wls.bse, 4), + "t": round(results_wls.tvalues, 4), + "pval": round(results_wls.pvalues, 4), + }, +) +print(f"table_wls: \n{table_wls}\n") +``` diff --git a/notebooks/Ch8. Heteroskedasticity.ipynb b/notebooks/Ch8. Heteroskedasticity.ipynb new file mode 100644 index 0000000..f26fbd1 --- /dev/null +++ b/notebooks/Ch8. Heteroskedasticity.ipynb @@ -0,0 +1,809 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "183d0f27", + "metadata": {}, + "source": [ + "# 8. Heteroskedasticity" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "e8753173", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install numpy pandas patsy statsmodels wooldridge -q" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e75bf8a9", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import patsy as pt\n", + "import statsmodels.api as sm\n", + "import statsmodels.formula.api as smf\n", + "import wooldridge as wool" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 8.1 Heteroskedasticity-Robust Inference\n", + "\n", + "### Example 8.2: Heteroskedasticity-Robust Inference" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table_default: \n", + " b se t pval\n", + "Intercept 1.47006 0.22980 6.39706 0.00000\n", + "sat 0.00114 0.00018 6.38850 0.00000\n", + "hsperc -0.00857 0.00124 -6.90600 0.00000\n", + "tothrs 0.00250 0.00073 3.42551 0.00068\n", + "female 0.30343 0.05902 5.14117 0.00000\n", + "black -0.12828 0.14737 -0.87049 0.38462\n", + "white -0.05872 0.14099 -0.41650 0.67730\n", + "\n" + ] + } + ], + "source": [ + "gpa3 = wool.data(\"gpa3\")\n", + "\n", + "# define regression model:\n", + "reg = smf.ols(\n", + " formula=\"cumgpa ~ sat + hsperc + tothrs + female + black + white\",\n", + " data=gpa3,\n", + " subset=(gpa3[\"spring\"] == 1),\n", + ")\n", + "\n", + "# estimate default model (only for spring data):\n", + "results_default = reg.fit()\n", + "\n", + "table_default = pd.DataFrame(\n", + " {\n", + " \"b\": round(results_default.params, 5),\n", + " \"se\": round(results_default.bse, 5),\n", + " \"t\": round(results_default.tvalues, 5),\n", + " \"pval\": round(results_default.pvalues, 5),\n", + " },\n", + ")\n", + "print(f\"table_default: \\n{table_default}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table_white: \n", + " b se t pval\n", + "Intercept 1.47006 0.21856 6.72615 0.00000\n", + "sat 0.00114 0.00019 6.01360 0.00000\n", + "hsperc -0.00857 0.00140 -6.10008 0.00000\n", + "tothrs 0.00250 0.00073 3.41365 0.00064\n", + "female 0.30343 0.05857 5.18073 0.00000\n", + "black -0.12828 0.11810 -1.08627 0.27736\n", + "white -0.05872 0.11032 -0.53228 0.59453\n", + "\n" + ] + } + ], + "source": [ + "# estimate model with White SE (only for spring data):\n", + "results_white = reg.fit(cov_type=\"HC0\")\n", + "\n", + "table_white = pd.DataFrame(\n", + " {\n", + " \"b\": round(results_white.params, 5),\n", + " \"se\": round(results_white.bse, 5),\n", + " \"t\": round(results_white.tvalues, 5),\n", + " \"pval\": round(results_white.pvalues, 5),\n", + " },\n", + ")\n", + "print(f\"table_white: \\n{table_white}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table_refined: \n", + " b se t pval\n", + "Intercept 1.47006 0.22938 6.40885 0.00000\n", + "sat 0.00114 0.00020 5.84017 0.00000\n", + "hsperc -0.00857 0.00144 -5.93407 0.00000\n", + "tothrs 0.00250 0.00075 3.34177 0.00083\n", + "female 0.30343 0.06004 5.05388 0.00000\n", + "black -0.12828 0.12819 -1.00074 0.31695\n", + "white -0.05872 0.12044 -0.48758 0.62585\n", + "\n" + ] + } + ], + "source": [ + "# estimate model with refined White SE (only for spring data):\n", + "results_refined = reg.fit(cov_type=\"HC3\")\n", + "\n", + "table_refined = pd.DataFrame(\n", + " {\n", + " \"b\": round(results_refined.params, 5),\n", + " \"se\": round(results_refined.bse, 5),\n", + " \"t\": round(results_refined.tvalues, 5),\n", + " \"pval\": round(results_refined.pvalues, 5),\n", + " },\n", + ")\n", + "print(f\"table_refined: \\n{table_refined}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "fstat_default: 0.6796041956073422\n", + "\n", + "fpval_default: 0.5074683622584049\n", + "\n" + ] + } + ], + "source": [ + "gpa3 = wool.data(\"gpa3\")\n", + "\n", + "# definition of model and hypotheses:\n", + "reg = smf.ols(\n", + " formula=\"cumgpa ~ sat + hsperc + tothrs + female + black + white\",\n", + " data=gpa3,\n", + " subset=(gpa3[\"spring\"] == 1),\n", + ")\n", + "hypotheses = [\"black = 0\", \"white = 0\"]\n", + "\n", + "# F-Tests using different variance-covariance formulas:\n", + "# ususal VCOV:\n", + "results_default = reg.fit()\n", + "ftest_default = results_default.f_test(hypotheses)\n", + "fstat_default = ftest_default.statistic\n", + "fpval_default = ftest_default.pvalue\n", + "print(f\"fstat_default: {fstat_default}\\n\")\n", + "print(f\"fpval_default: {fpval_default}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "fstat_hc3: 0.6724692957656673\n", + "\n", + "fpval_hc3: 0.5110883633440992\n", + "\n" + ] + } + ], + "source": [ + "# refined White VCOV:\n", + "results_hc3 = reg.fit(cov_type=\"HC3\")\n", + "ftest_hc3 = results_hc3.f_test(hypotheses)\n", + "fstat_hc3 = ftest_hc3.statistic\n", + "fpval_hc3 = ftest_hc3.pvalue\n", + "print(f\"fstat_hc3: {fstat_hc3}\\n\")\n", + "print(f\"fpval_hc3: {fpval_hc3}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "fstat_hc0: 0.7477969818036264\n", + "\n", + "fpval_hc0: 0.4741442714738484\n", + "\n" + ] + } + ], + "source": [ + "# classical White VCOV:\n", + "results_hc0 = reg.fit(cov_type=\"HC0\")\n", + "ftest_hc0 = results_hc0.f_test(hypotheses)\n", + "fstat_hc0 = ftest_hc0.statistic\n", + "fpval_hc0 = ftest_hc0.pvalue\n", + "print(f\"fstat_hc0: {fstat_hc0}\\n\")\n", + "print(f\"fpval_hc0: {fpval_hc0}\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 8.2 Heteroskedasticity Tests\n", + "\n", + "### Example 8.4: Heteroskedasticity in a Housing Price Equation" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table_results: \n", + " b se t pval\n", + "Intercept -21.7703 29.4750 -0.7386 0.4622\n", + "lotsize 0.0021 0.0006 3.2201 0.0018\n", + "sqrft 0.1228 0.0132 9.2751 0.0000\n", + "bdrms 13.8525 9.0101 1.5374 0.1279\n", + "\n" + ] + } + ], + "source": [ + "hprice1 = wool.data(\"hprice1\")\n", + "\n", + "# estimate model:\n", + "reg = smf.ols(formula=\"price ~ lotsize + sqrft + bdrms\", data=hprice1)\n", + "results = reg.fit()\n", + "table_results = pd.DataFrame(\n", + " {\n", + " \"b\": round(results.params, 4),\n", + " \"se\": round(results.bse, 4),\n", + " \"t\": round(results.tvalues, 4),\n", + " \"pval\": round(results.pvalues, 4),\n", + " },\n", + ")\n", + "print(f\"table_results: \\n{table_results}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "bp_lm_statistic: 14.092385504350183\n", + "\n", + "bp_lm_pval: 0.0027820595556891643\n", + "\n" + ] + } + ], + "source": [ + "# automatic BP test (LM version):\n", + "y, X = pt.dmatrices(\n", + " \"price ~ lotsize + sqrft + bdrms\",\n", + " data=hprice1,\n", + " return_type=\"dataframe\",\n", + ")\n", + "result_bp_lm = sm.stats.diagnostic.het_breuschpagan(results.resid, X)\n", + "bp_lm_statistic = result_bp_lm[0]\n", + "bp_lm_pval = result_bp_lm[1]\n", + "print(f\"bp_lm_statistic: {bp_lm_statistic}\\n\")\n", + "print(f\"bp_lm_pval: {bp_lm_pval}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "bp_F_statistic: 5.338919363241395\n", + "\n", + "bp_F_pval: 0.002047744420936124\n", + "\n" + ] + } + ], + "source": [ + "# manual BP test (F version):\n", + "hprice1[\"resid_sq\"] = results.resid**2\n", + "reg_resid = smf.ols(formula=\"resid_sq ~ lotsize + sqrft + bdrms\", data=hprice1)\n", + "results_resid = reg_resid.fit()\n", + "bp_F_statistic = results_resid.fvalue\n", + "bp_F_pval = results_resid.f_pvalue\n", + "print(f\"bp_F_statistic: {bp_F_statistic}\\n\")\n", + "print(f\"bp_F_pval: {bp_F_pval}\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 8.5: BP and White test in the Log Housing Price Equation" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "bp_statistic: 4.223245741805276\n", + "\n", + "bp_pval: 0.23834482631492962\n", + "\n" + ] + } + ], + "source": [ + "hprice1 = wool.data(\"hprice1\")\n", + "\n", + "# estimate model:\n", + "reg = smf.ols(\n", + " formula=\"np.log(price) ~ np.log(lotsize) + np.log(sqrft) + bdrms\",\n", + " data=hprice1,\n", + ")\n", + "results = reg.fit()\n", + "\n", + "# BP test:\n", + "y, X_bp = pt.dmatrices(\n", + " \"np.log(price) ~ np.log(lotsize) + np.log(sqrft) + bdrms\",\n", + " data=hprice1,\n", + " return_type=\"dataframe\",\n", + ")\n", + "result_bp = sm.stats.diagnostic.het_breuschpagan(results.resid, X_bp)\n", + "bp_statistic = result_bp[0]\n", + "bp_pval = result_bp[1]\n", + "print(f\"bp_statistic: {bp_statistic}\\n\")\n", + "print(f\"bp_pval: {bp_pval}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "white_statistic: 3.4472865468746834\n", + "\n", + "white_pval: 0.17841494794136223\n", + "\n" + ] + } + ], + "source": [ + "# White test:\n", + "X_wh = pd.DataFrame(\n", + " {\n", + " \"const\": 1,\n", + " \"fitted_reg\": results.fittedvalues,\n", + " \"fitted_reg_sq\": results.fittedvalues**2,\n", + " },\n", + ")\n", + "result_white = sm.stats.diagnostic.het_breuschpagan(results.resid, X_wh)\n", + "white_statistic = result_white[0]\n", + "white_pval = result_white[1]\n", + "print(f\"white_statistic: {white_statistic}\\n\")\n", + "print(f\"white_pval: {white_pval}\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 8.3 Weighted Least Squares\n", + "\n", + "### Example 8.6: Financial Wealth Equation" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table_ols: \n", + " b se t pval\n", + "Intercept -20.9850 3.4909 -6.0114 0.0000\n", + "inc 0.7706 0.0994 7.7486 0.0000\n", + "I((age - 25) ** 2) 0.0251 0.0043 5.7912 0.0000\n", + "male 2.4779 2.0558 1.2053 0.2281\n", + "e401k 6.8862 2.2837 3.0153 0.0026\n", + "\n" + ] + } + ], + "source": [ + "k401ksubs = wool.data(\"401ksubs\")\n", + "\n", + "# subsetting data:\n", + "k401ksubs_sub = k401ksubs[k401ksubs[\"fsize\"] == 1]\n", + "\n", + "# OLS (only for singles, i.e. 'fsize'==1):\n", + "reg_ols = smf.ols(\n", + " formula=\"nettfa ~ inc + I((age-25)**2) + male + e401k\",\n", + " data=k401ksubs_sub,\n", + ")\n", + "results_ols = reg_ols.fit(cov_type=\"HC0\")\n", + "\n", + "# print regression table:\n", + "table_ols = pd.DataFrame(\n", + " {\n", + " \"b\": round(results_ols.params, 4),\n", + " \"se\": round(results_ols.bse, 4),\n", + " \"t\": round(results_ols.tvalues, 4),\n", + " \"pval\": round(results_ols.pvalues, 4),\n", + " },\n", + ")\n", + "print(f\"table_ols: \\n{table_ols}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table_wls: \n", + " b se t pval\n", + "Intercept -16.7025 1.9580 -8.5304 0.0000\n", + "inc 0.7404 0.0643 11.5140 0.0000\n", + "I((age - 25) ** 2) 0.0175 0.0019 9.0796 0.0000\n", + "male 1.8405 1.5636 1.1771 0.2393\n", + "e401k 5.1883 1.7034 3.0458 0.0024\n", + "\n" + ] + } + ], + "source": [ + "# WLS:\n", + "wls_weight = list(1 / k401ksubs_sub[\"inc\"])\n", + "reg_wls = smf.wls(\n", + " formula=\"nettfa ~ inc + I((age-25)**2) + male + e401k\",\n", + " weights=wls_weight,\n", + " data=k401ksubs_sub,\n", + ")\n", + "results_wls = reg_wls.fit()\n", + "\n", + "# print regression table:\n", + "table_wls = pd.DataFrame(\n", + " {\n", + " \"b\": round(results_wls.params, 4),\n", + " \"se\": round(results_wls.bse, 4),\n", + " \"t\": round(results_wls.tvalues, 4),\n", + " \"pval\": round(results_wls.pvalues, 4),\n", + " },\n", + ")\n", + "print(f\"table_wls: \\n{table_wls}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table_default: \n", + " b se t pval\n", + "Intercept -16.7025 1.9580 -8.5304 0.0000\n", + "inc 0.7404 0.0643 11.5140 0.0000\n", + "I((age - 25) ** 2) 0.0175 0.0019 9.0796 0.0000\n", + "male 1.8405 1.5636 1.1771 0.2393\n", + "e401k 5.1883 1.7034 3.0458 0.0024\n", + "\n" + ] + } + ], + "source": [ + "k401ksubs = wool.data(\"401ksubs\")\n", + "\n", + "# subsetting data:\n", + "k401ksubs_sub = k401ksubs[k401ksubs[\"fsize\"] == 1]\n", + "\n", + "# WLS:\n", + "wls_weight = list(1 / k401ksubs_sub[\"inc\"])\n", + "reg_wls = smf.wls(\n", + " formula=\"nettfa ~ inc + I((age-25)**2) + male + e401k\",\n", + " weights=wls_weight,\n", + " data=k401ksubs_sub,\n", + ")\n", + "\n", + "# non-robust (default) results:\n", + "results_wls = reg_wls.fit()\n", + "table_default = pd.DataFrame(\n", + " {\n", + " \"b\": round(results_wls.params, 4),\n", + " \"se\": round(results_wls.bse, 4),\n", + " \"t\": round(results_wls.tvalues, 4),\n", + " \"pval\": round(results_wls.pvalues, 4),\n", + " },\n", + ")\n", + "print(f\"table_default: \\n{table_default}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table_white: \n", + " b se t pval\n", + "Intercept -16.7025 2.2482 -7.4292 0.0000\n", + "inc 0.7404 0.0752 9.8403 0.0000\n", + "I((age - 25) ** 2) 0.0175 0.0026 6.7650 0.0000\n", + "male 1.8405 1.3132 1.4015 0.1611\n", + "e401k 5.1883 1.5743 3.2955 0.0010\n", + "\n" + ] + } + ], + "source": [ + "# robust results (Refined White SE):\n", + "results_white = reg_wls.fit(cov_type=\"HC3\")\n", + "table_white = pd.DataFrame(\n", + " {\n", + " \"b\": round(results_white.params, 4),\n", + " \"se\": round(results_white.bse, 4),\n", + " \"t\": round(results_white.tvalues, 4),\n", + " \"pval\": round(results_white.pvalues, 4),\n", + " },\n", + ")\n", + "print(f\"table_white: \\n{table_white}\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 8.7: Demand for Cigarettes" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table_ols: \n", + " b se t pval\n", + "Intercept -3.6398 24.0787 -0.1512 0.8799\n", + "np.log(income) 0.8803 0.7278 1.2095 0.2268\n", + "np.log(cigpric) -0.7509 5.7733 -0.1301 0.8966\n", + "educ -0.5015 0.1671 -3.0016 0.0028\n", + "age 0.7707 0.1601 4.8132 0.0000\n", + "I(age ** 2) -0.0090 0.0017 -5.1765 0.0000\n", + "restaurn -2.8251 1.1118 -2.5410 0.0112\n", + "\n" + ] + } + ], + "source": [ + "smoke = wool.data(\"smoke\")\n", + "\n", + "# OLS:\n", + "reg_ols = smf.ols(\n", + " formula=\"cigs ~ np.log(income) + np.log(cigpric) +\"\n", + " \"educ + age + I(age**2) + restaurn\",\n", + " data=smoke,\n", + ")\n", + "results_ols = reg_ols.fit()\n", + "table_ols = pd.DataFrame(\n", + " {\n", + " \"b\": round(results_ols.params, 4),\n", + " \"se\": round(results_ols.bse, 4),\n", + " \"t\": round(results_ols.tvalues, 4),\n", + " \"pval\": round(results_ols.pvalues, 4),\n", + " },\n", + ")\n", + "print(f\"table_ols: \\n{table_ols}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "bp_statistic: 32.25841908120121\n", + "\n", + "bp_pval: 1.4557794830278942e-05\n", + "\n" + ] + } + ], + "source": [ + "# BP test:\n", + "y, X = pt.dmatrices(\n", + " \"cigs ~ np.log(income) + np.log(cigpric) + educ +age + I(age**2) + restaurn\",\n", + " data=smoke,\n", + " return_type=\"dataframe\",\n", + ")\n", + "result_bp = sm.stats.diagnostic.het_breuschpagan(results_ols.resid, X)\n", + "bp_statistic = result_bp[0]\n", + "bp_pval = result_bp[1]\n", + "print(f\"bp_statistic: {bp_statistic}\\n\")\n", + "print(f\"bp_pval: {bp_pval}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table_fgls: \n", + " b se t pval\n", + "Intercept -1.9207 2.5630 -0.7494 0.4538\n", + "np.log(income) 0.2915 0.0775 3.7634 0.0002\n", + "np.log(cigpric) 0.1954 0.6145 0.3180 0.7506\n", + "educ -0.0797 0.0178 -4.4817 0.0000\n", + "age 0.2040 0.0170 11.9693 0.0000\n", + "I(age ** 2) -0.0024 0.0002 -12.8931 0.0000\n", + "restaurn -0.6270 0.1183 -5.2982 0.0000\n", + "\n" + ] + } + ], + "source": [ + "# FGLS (estimation of the variance function):\n", + "smoke[\"logu2\"] = np.log(results_ols.resid**2)\n", + "reg_fgls = smf.ols(\n", + " formula=\"logu2 ~ np.log(income) + np.log(cigpric) +\"\n", + " \"educ + age + I(age**2) + restaurn\",\n", + " data=smoke,\n", + ")\n", + "results_fgls = reg_fgls.fit()\n", + "table_fgls = pd.DataFrame(\n", + " {\n", + " \"b\": round(results_fgls.params, 4),\n", + " \"se\": round(results_fgls.bse, 4),\n", + " \"t\": round(results_fgls.tvalues, 4),\n", + " \"pval\": round(results_fgls.pvalues, 4),\n", + " },\n", + ")\n", + "print(f\"table_fgls: \\n{table_fgls}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table_wls: \n", + " b se t pval\n", + "Intercept 5.6355 17.8031 0.3165 0.7517\n", + "np.log(income) 1.2952 0.4370 2.9639 0.0031\n", + "np.log(cigpric) -2.9403 4.4601 -0.6592 0.5099\n", + "educ -0.4634 0.1202 -3.8570 0.0001\n", + "age 0.4819 0.0968 4.9784 0.0000\n", + "I(age ** 2) -0.0056 0.0009 -5.9897 0.0000\n", + "restaurn -3.4611 0.7955 -4.3508 0.0000\n", + "\n" + ] + } + ], + "source": [ + "# FGLS (WLS):\n", + "wls_weight = list(1 / np.exp(results_fgls.fittedvalues))\n", + "reg_wls = smf.wls(\n", + " formula=\"cigs ~ np.log(income) + np.log(cigpric) +\"\n", + " \"educ + age + I(age**2) + restaurn\",\n", + " weights=wls_weight,\n", + " data=smoke,\n", + ")\n", + "results_wls = reg_wls.fit()\n", + "table_wls = pd.DataFrame(\n", + " {\n", + " \"b\": round(results_wls.params, 4),\n", + " \"se\": round(results_wls.bse, 4),\n", + " \"t\": round(results_wls.tvalues, 4),\n", + " \"pval\": round(results_wls.pvalues, 4),\n", + " },\n", + ")\n", + "print(f\"table_wls: \\n{table_wls}\\n\")" + ] + } + ], + "metadata": { + "jupytext": { + "formats": "notebooks//ipynb,markdown//md,scripts//py" + }, + "kernelspec": { + "display_name": "merino", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/pre-commit.sh b/pre-commit.sh new file mode 100644 index 0000000..6314a2a --- /dev/null +++ b/pre-commit.sh @@ -0,0 +1,4 @@ +jupytext --set-formats notebooks//ipynb,markdown//md,scripts//py notebooks/*.ipynb +ruff check --fix --select ALL notebooks/*.ipynb +ruff format notebooks/*.ipynb +jupytext --set-formats notebooks//ipynb,markdown//md,scripts//py notebooks/*.ipynb diff --git a/scripts/Ch8. Heteroskedasticity.py b/scripts/Ch8. Heteroskedasticity.py new file mode 100644 index 0000000..f10926d --- /dev/null +++ b/scripts/Ch8. Heteroskedasticity.py @@ -0,0 +1,358 @@ +# --- +# jupyter: +# jupytext: +# formats: notebooks//ipynb,markdown//md,scripts//py +# text_representation: +# extension: .py +# format_name: light +# format_version: '1.5' +# jupytext_version: 1.16.4 +# kernelspec: +# display_name: merino +# language: python +# name: python3 +# --- + +# # 8. Heteroskedasticity + +# %pip install numpy pandas patsy statsmodels wooldridge -q + +import numpy as np +import pandas as pd +import patsy as pt +import statsmodels.api as sm +import statsmodels.formula.api as smf +import wooldridge as wool + +# ## 8.1 Heteroskedasticity-Robust Inference +# +# ### Example 8.2: Heteroskedasticity-Robust Inference + +# + +gpa3 = wool.data("gpa3") + +# define regression model: +reg = smf.ols( + formula="cumgpa ~ sat + hsperc + tothrs + female + black + white", + data=gpa3, + subset=(gpa3["spring"] == 1), +) + +# estimate default model (only for spring data): +results_default = reg.fit() + +table_default = pd.DataFrame( + { + "b": round(results_default.params, 5), + "se": round(results_default.bse, 5), + "t": round(results_default.tvalues, 5), + "pval": round(results_default.pvalues, 5), + }, +) +print(f"table_default: \n{table_default}\n") + +# + +# estimate model with White SE (only for spring data): +results_white = reg.fit(cov_type="HC0") + +table_white = pd.DataFrame( + { + "b": round(results_white.params, 5), + "se": round(results_white.bse, 5), + "t": round(results_white.tvalues, 5), + "pval": round(results_white.pvalues, 5), + }, +) +print(f"table_white: \n{table_white}\n") + +# + +# estimate model with refined White SE (only for spring data): +results_refined = reg.fit(cov_type="HC3") + +table_refined = pd.DataFrame( + { + "b": round(results_refined.params, 5), + "se": round(results_refined.bse, 5), + "t": round(results_refined.tvalues, 5), + "pval": round(results_refined.pvalues, 5), + }, +) +print(f"table_refined: \n{table_refined}\n") + +# + +gpa3 = wool.data("gpa3") + +# definition of model and hypotheses: +reg = smf.ols( + formula="cumgpa ~ sat + hsperc + tothrs + female + black + white", + data=gpa3, + subset=(gpa3["spring"] == 1), +) +hypotheses = ["black = 0", "white = 0"] + +# F-Tests using different variance-covariance formulas: +# ususal VCOV: +results_default = reg.fit() +ftest_default = results_default.f_test(hypotheses) +fstat_default = ftest_default.statistic +fpval_default = ftest_default.pvalue +print(f"fstat_default: {fstat_default}\n") +print(f"fpval_default: {fpval_default}\n") +# - + +# refined White VCOV: +results_hc3 = reg.fit(cov_type="HC3") +ftest_hc3 = results_hc3.f_test(hypotheses) +fstat_hc3 = ftest_hc3.statistic +fpval_hc3 = ftest_hc3.pvalue +print(f"fstat_hc3: {fstat_hc3}\n") +print(f"fpval_hc3: {fpval_hc3}\n") + +# classical White VCOV: +results_hc0 = reg.fit(cov_type="HC0") +ftest_hc0 = results_hc0.f_test(hypotheses) +fstat_hc0 = ftest_hc0.statistic +fpval_hc0 = ftest_hc0.pvalue +print(f"fstat_hc0: {fstat_hc0}\n") +print(f"fpval_hc0: {fpval_hc0}\n") + +# ## 8.2 Heteroskedasticity Tests +# +# ### Example 8.4: Heteroskedasticity in a Housing Price Equation + +# + +hprice1 = wool.data("hprice1") + +# estimate model: +reg = smf.ols(formula="price ~ lotsize + sqrft + bdrms", data=hprice1) +results = reg.fit() +table_results = pd.DataFrame( + { + "b": round(results.params, 4), + "se": round(results.bse, 4), + "t": round(results.tvalues, 4), + "pval": round(results.pvalues, 4), + }, +) +print(f"table_results: \n{table_results}\n") +# - + +# automatic BP test (LM version): +y, X = pt.dmatrices( + "price ~ lotsize + sqrft + bdrms", + data=hprice1, + return_type="dataframe", +) +result_bp_lm = sm.stats.diagnostic.het_breuschpagan(results.resid, X) +bp_lm_statistic = result_bp_lm[0] +bp_lm_pval = result_bp_lm[1] +print(f"bp_lm_statistic: {bp_lm_statistic}\n") +print(f"bp_lm_pval: {bp_lm_pval}\n") + +# manual BP test (F version): +hprice1["resid_sq"] = results.resid**2 +reg_resid = smf.ols(formula="resid_sq ~ lotsize + sqrft + bdrms", data=hprice1) +results_resid = reg_resid.fit() +bp_F_statistic = results_resid.fvalue +bp_F_pval = results_resid.f_pvalue +print(f"bp_F_statistic: {bp_F_statistic}\n") +print(f"bp_F_pval: {bp_F_pval}\n") + +# ### Example 8.5: BP and White test in the Log Housing Price Equation + +# + +hprice1 = wool.data("hprice1") + +# estimate model: +reg = smf.ols( + formula="np.log(price) ~ np.log(lotsize) + np.log(sqrft) + bdrms", + data=hprice1, +) +results = reg.fit() + +# BP test: +y, X_bp = pt.dmatrices( + "np.log(price) ~ np.log(lotsize) + np.log(sqrft) + bdrms", + data=hprice1, + return_type="dataframe", +) +result_bp = sm.stats.diagnostic.het_breuschpagan(results.resid, X_bp) +bp_statistic = result_bp[0] +bp_pval = result_bp[1] +print(f"bp_statistic: {bp_statistic}\n") +print(f"bp_pval: {bp_pval}\n") +# - + +# White test: +X_wh = pd.DataFrame( + { + "const": 1, + "fitted_reg": results.fittedvalues, + "fitted_reg_sq": results.fittedvalues**2, + }, +) +result_white = sm.stats.diagnostic.het_breuschpagan(results.resid, X_wh) +white_statistic = result_white[0] +white_pval = result_white[1] +print(f"white_statistic: {white_statistic}\n") +print(f"white_pval: {white_pval}\n") + +# ## 8.3 Weighted Least Squares +# +# ### Example 8.6: Financial Wealth Equation + +# + +k401ksubs = wool.data("401ksubs") + +# subsetting data: +k401ksubs_sub = k401ksubs[k401ksubs["fsize"] == 1] + +# OLS (only for singles, i.e. 'fsize'==1): +reg_ols = smf.ols( + formula="nettfa ~ inc + I((age-25)**2) + male + e401k", + data=k401ksubs_sub, +) +results_ols = reg_ols.fit(cov_type="HC0") + +# print regression table: +table_ols = pd.DataFrame( + { + "b": round(results_ols.params, 4), + "se": round(results_ols.bse, 4), + "t": round(results_ols.tvalues, 4), + "pval": round(results_ols.pvalues, 4), + }, +) +print(f"table_ols: \n{table_ols}\n") + +# + +# WLS: +wls_weight = list(1 / k401ksubs_sub["inc"]) +reg_wls = smf.wls( + formula="nettfa ~ inc + I((age-25)**2) + male + e401k", + weights=wls_weight, + data=k401ksubs_sub, +) +results_wls = reg_wls.fit() + +# print regression table: +table_wls = pd.DataFrame( + { + "b": round(results_wls.params, 4), + "se": round(results_wls.bse, 4), + "t": round(results_wls.tvalues, 4), + "pval": round(results_wls.pvalues, 4), + }, +) +print(f"table_wls: \n{table_wls}\n") + +# + +k401ksubs = wool.data("401ksubs") + +# subsetting data: +k401ksubs_sub = k401ksubs[k401ksubs["fsize"] == 1] + +# WLS: +wls_weight = list(1 / k401ksubs_sub["inc"]) +reg_wls = smf.wls( + formula="nettfa ~ inc + I((age-25)**2) + male + e401k", + weights=wls_weight, + data=k401ksubs_sub, +) + +# non-robust (default) results: +results_wls = reg_wls.fit() +table_default = pd.DataFrame( + { + "b": round(results_wls.params, 4), + "se": round(results_wls.bse, 4), + "t": round(results_wls.tvalues, 4), + "pval": round(results_wls.pvalues, 4), + }, +) +print(f"table_default: \n{table_default}\n") +# - + +# robust results (Refined White SE): +results_white = reg_wls.fit(cov_type="HC3") +table_white = pd.DataFrame( + { + "b": round(results_white.params, 4), + "se": round(results_white.bse, 4), + "t": round(results_white.tvalues, 4), + "pval": round(results_white.pvalues, 4), + }, +) +print(f"table_white: \n{table_white}\n") + +# ### Example 8.7: Demand for Cigarettes + +# + +smoke = wool.data("smoke") + +# OLS: +reg_ols = smf.ols( + formula="cigs ~ np.log(income) + np.log(cigpric) +" + "educ + age + I(age**2) + restaurn", + data=smoke, +) +results_ols = reg_ols.fit() +table_ols = pd.DataFrame( + { + "b": round(results_ols.params, 4), + "se": round(results_ols.bse, 4), + "t": round(results_ols.tvalues, 4), + "pval": round(results_ols.pvalues, 4), + }, +) +print(f"table_ols: \n{table_ols}\n") +# - + +# BP test: +y, X = pt.dmatrices( + "cigs ~ np.log(income) + np.log(cigpric) + educ +age + I(age**2) + restaurn", + data=smoke, + return_type="dataframe", +) +result_bp = sm.stats.diagnostic.het_breuschpagan(results_ols.resid, X) +bp_statistic = result_bp[0] +bp_pval = result_bp[1] +print(f"bp_statistic: {bp_statistic}\n") +print(f"bp_pval: {bp_pval}\n") + +# FGLS (estimation of the variance function): +smoke["logu2"] = np.log(results_ols.resid**2) +reg_fgls = smf.ols( + formula="logu2 ~ np.log(income) + np.log(cigpric) +" + "educ + age + I(age**2) + restaurn", + data=smoke, +) +results_fgls = reg_fgls.fit() +table_fgls = pd.DataFrame( + { + "b": round(results_fgls.params, 4), + "se": round(results_fgls.bse, 4), + "t": round(results_fgls.tvalues, 4), + "pval": round(results_fgls.pvalues, 4), + }, +) +print(f"table_fgls: \n{table_fgls}\n") + +# FGLS (WLS): +wls_weight = list(1 / np.exp(results_fgls.fittedvalues)) +reg_wls = smf.wls( + formula="cigs ~ np.log(income) + np.log(cigpric) +" + "educ + age + I(age**2) + restaurn", + weights=wls_weight, + data=smoke, +) +results_wls = reg_wls.fit() +table_wls = pd.DataFrame( + { + "b": round(results_wls.params, 4), + "se": round(results_wls.bse, 4), + "t": round(results_wls.tvalues, 4), + "pval": round(results_wls.pvalues, 4), + }, +) +print(f"table_wls: \n{table_wls}\n")