diff --git a/markdown/Ch10. Basic Regression Analysis with Time Series Data.md b/markdown/Ch10. Basic Regression Analysis with Time Series Data.md new file mode 100644 index 0000000..4b7c9dc --- /dev/null +++ b/markdown/Ch10. Basic Regression Analysis with Time Series Data.md @@ -0,0 +1,222 @@ +--- +jupyter: + jupytext: + formats: notebooks//ipynb,markdown//md,scripts//py + text_representation: + extension: .md + format_name: markdown + format_version: '1.3' + jupytext_version: 1.16.4 + kernelspec: + display_name: merino + language: python + name: python3 +--- + +# 10. Basic Regression Analysis with Time Series Data + +```python +%pip install matplotlib numpy pandas statsmodels wooldridge -q +``` + +```python +import matplotlib.pyplot as plt +import numpy as np # noqa +import pandas as pd +import statsmodels.formula.api as smf +import wooldridge as wool +``` + +## 10.1 Static Time Series Models + +$$ y_t = \beta_0 + \beta_1 z_{1t} + \beta_2 z_{2t} + \cdots + \beta_k z_{kt} + u_t $$ + +### Example 10.2 Effects of Inflation and Deficits on Interest Rates + +```python +intdef = wool.dataWoo("intdef") + +# linear regression of static model (Q function avoids conflicts with keywords): +reg = smf.ols(formula='i3 ~ Q("inf") + Q("def")', data=intdef) +results = reg.fit() + +# print regression table: +table = pd.DataFrame( + { + "b": round(results.params, 4), + "se": round(results.bse, 4), + "t": round(results.tvalues, 4), + "pval": round(results.pvalues, 4), + }, +) +print(f"table: \n{table}\n") +``` + +## 10.2 Time Series Data Types in Python + +### 10.2.1 Equispaced Time Series in Python + +```python +barium = wool.dataWoo("barium") +T = len(barium) + +# monthly time series starting Feb. 1978: +barium.index = pd.date_range(start="1978-02", periods=T, freq="ME") +print(f'barium["chnimp"].head(): \n{barium["chnimp"].head()}\n') +``` + +```python +# plot chnimp (default: index on the x-axis): +plt.plot("chnimp", data=barium, color="black", linestyle="-") +plt.ylabel("chnimp") +plt.xlabel("time") +``` + +## 10.3 Other Time Series Models + +### 10.3.1 Finite Distributed Lag Models + +$$ y_t = \alpha_0 + \delta_0 z_t + \delta_1 z_{t-1} + \cdots + \delta_k z_{t-k} + u_t $$ + +### Example 10.4 Effects of Personal Exemption on Fertility Rates + +```python +fertil3 = wool.dataWoo("fertil3") +T = len(fertil3) + +# define yearly time series beginning in 1913: +fertil3.index = pd.date_range(start="1913", periods=T, freq="YE").year + +# add all lags of 'pe' up to order 2: +fertil3["pe_lag1"] = fertil3["pe"].shift(1) +fertil3["pe_lag2"] = fertil3["pe"].shift(2) + +# linear regression of model with lags: +reg = smf.ols(formula="gfr ~ pe + pe_lag1 + pe_lag2 + ww2 + pill", data=fertil3) +results = reg.fit() + +# print regression table: +table = pd.DataFrame( + { + "b": round(results.params, 4), + "se": round(results.bse, 4), + "t": round(results.tvalues, 4), + "pval": round(results.pvalues, 4), + }, +) +print(f"table: \n{table}\n") +``` + +### Eample 10.4 (continued) + +```python +fertil3 = wool.dataWoo("fertil3") +T = len(fertil3) + +# define yearly time series beginning in 1913: +fertil3.index = pd.date_range(start="1913", periods=T, freq="YE").year + +# add all lags of 'pe' up to order 2: +fertil3["pe_lag1"] = fertil3["pe"].shift(1) +fertil3["pe_lag2"] = fertil3["pe"].shift(2) + +# linear regression of model with lags: +reg = smf.ols(formula="gfr ~ pe + pe_lag1 + pe_lag2 + ww2 + pill", data=fertil3) +results = reg.fit() + +# F test (H0: all pe coefficients are=0): +hypotheses1 = ["pe = 0", "pe_lag1 = 0", "pe_lag2 = 0"] +ftest1 = results.f_test(hypotheses1) +fstat1 = ftest1.statistic +fpval1 = ftest1.pvalue + +print(f"fstat1: {fstat1}\n") +print(f"fpval1: {fpval1}\n") +``` + +```python +# calculating the LRP: +b = results.params +b_pe_tot = b["pe"] + b["pe_lag1"] + b["pe_lag2"] +print(f"b_pe_tot: {b_pe_tot}\n") +``` + +```python +# F test (H0: LRP=0): +hypotheses2 = ["pe + pe_lag1 + pe_lag2 = 0"] +ftest2 = results.f_test(hypotheses2) +fstat2 = ftest2.statistic +fpval2 = ftest2.pvalue + +print(f"fstat2: {fstat2}\n") +print(f"fpval2: {fpval2}\n") +``` + +### 10.3.2 Trends + +### Example 10.7 Housing Investment and Prices + +```python +hseinv = wool.dataWoo("hseinv") + +# linear regression without time trend: +reg_wot = smf.ols(formula="np.log(invpc) ~ np.log(price)", data=hseinv) +results_wot = reg_wot.fit() + +# print regression table: +table_wot = pd.DataFrame( + { + "b": round(results_wot.params, 4), + "se": round(results_wot.bse, 4), + "t": round(results_wot.tvalues, 4), + "pval": round(results_wot.pvalues, 4), + }, +) +print(f"table_wot: \n{table_wot}\n") +``` + +```python +# linear regression with time trend (data set includes a time variable t): +reg_wt = smf.ols(formula="np.log(invpc) ~ np.log(price) + t", data=hseinv) +results_wt = reg_wt.fit() + +# print regression table: +table_wt = pd.DataFrame( + { + "b": round(results_wt.params, 4), + "se": round(results_wt.bse, 4), + "t": round(results_wt.tvalues, 4), + "pval": round(results_wt.pvalues, 4), + }, +) +print(f"table_wt: \n{table_wt}\n") +``` + +### 10.3.3 Seasonality + +### Example 10.11 Effects of Antidumping Filings + +```python +barium = wool.dataWoo("barium") + +# linear regression with seasonal effects: +reg = smf.ols( + formula="np.log(chnimp) ~ np.log(chempi) + np.log(gas) +" + "np.log(rtwex) + befile6 + affile6 + afdec6 +" + "feb + mar + apr + may + jun + jul +" + "aug + sep + oct + nov + dec", + data=barium, +) +results = reg.fit() + +# print regression table: +table = pd.DataFrame( + { + "b": round(results.params, 4), + "se": round(results.bse, 4), + "t": round(results.tvalues, 4), + "pval": round(results.pvalues, 4), + }, +) +print(f"table: \n{table}\n") +``` diff --git a/markdown/Ch2. The Simple Regression Model.md b/markdown/Ch2. The Simple Regression Model.md index 75e94a4..8ffa373 100644 --- a/markdown/Ch2. The Simple Regression Model.md +++ b/markdown/Ch2. The Simple Regression Model.md @@ -75,6 +75,7 @@ b = results.params print(f"b: \n{b}") ``` + ```python def plot_regression(x, y, data, results, title): # scatter plot and fitted values: diff --git a/markdown/Ch6. MRA - Further Issues.md b/markdown/Ch6. MRA - Further Issues.md index 783a0df..e59d9dd 100644 --- a/markdown/Ch6. MRA - Further Issues.md +++ b/markdown/Ch6. MRA - Further Issues.md @@ -73,6 +73,9 @@ $$z_y = \frac{y - \bar{y}}{\text{sd}(y)} \qquad \text{and} \qquad z_{x_1} = \fr $$\text{price\_sc} = \beta_0 + \beta_1 \cdot \text{nox\_sc} + \beta_2 \cdot \text{crime\_sc} + \beta_3 \cdot \text{rooms\_sc} + \beta_4 \cdot \text{dist\_sc} + \beta_5 \cdot \text{stratio\_sc} + u$$ + + + ```python # define a function for the standardization: def scale(x): diff --git a/notebooks/Ch10. Basic Regression Analysis with Time Series Data.ipynb b/notebooks/Ch10. Basic Regression Analysis with Time Series Data.ipynb new file mode 100644 index 0000000..84d9e58 --- /dev/null +++ b/notebooks/Ch10. Basic Regression Analysis with Time Series Data.ipynb @@ -0,0 +1,491 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "11b25d13", + "metadata": {}, + "source": [ + "# 10. Basic Regression Analysis with Time Series Data" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "ad5c1922", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install matplotlib numpy pandas statsmodels wooldridge -q" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ba368558", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np # noqa\n", + "import pandas as pd\n", + "import statsmodels.formula.api as smf\n", + "import wooldridge as wool" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 10.1 Static Time Series Models\n", + "\n", + "$$ y_t = \\beta_0 + \\beta_1 z_{1t} + \\beta_2 z_{2t} + \\cdots + \\beta_k z_{kt} + u_t $$\n", + "\n", + "### Example 10.2 Effects of Inflation and Deficits on Interest Rates" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table: \n", + " b se t pval\n", + "Intercept 1.7333 0.4320 4.0125 0.0002\n", + "Q(\"inf\") 0.6059 0.0821 7.3765 0.0000\n", + "Q(\"def\") 0.5131 0.1184 4.3338 0.0001\n", + "\n" + ] + } + ], + "source": [ + "intdef = wool.dataWoo(\"intdef\")\n", + "\n", + "# linear regression of static model (Q function avoids conflicts with keywords):\n", + "reg = smf.ols(formula='i3 ~ Q(\"inf\") + Q(\"def\")', data=intdef)\n", + "results = reg.fit()\n", + "\n", + "# print regression table:\n", + "table = pd.DataFrame(\n", + " {\n", + " \"b\": round(results.params, 4),\n", + " \"se\": round(results.bse, 4),\n", + " \"t\": round(results.tvalues, 4),\n", + " \"pval\": round(results.pvalues, 4),\n", + " },\n", + ")\n", + "print(f\"table: \\n{table}\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 10.2 Time Series Data Types in Python\n", + "\n", + "### 10.2.1 Equispaced Time Series in Python" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "barium[\"chnimp\"].head(): \n", + "1978-02-28 220.462006\n", + "1978-03-31 94.797997\n", + "1978-04-30 219.357498\n", + "1978-05-31 317.421509\n", + "1978-06-30 114.639000\n", + "Freq: ME, Name: chnimp, dtype: float64\n", + "\n" + ] + } + ], + "source": [ + "barium = wool.dataWoo(\"barium\")\n", + "T = len(barium)\n", + "\n", + "# monthly time series starting Feb. 1978:\n", + "barium.index = pd.date_range(start=\"1978-02\", periods=T, freq=\"ME\")\n", + "print(f'barium[\"chnimp\"].head(): \\n{barium[\"chnimp\"].head()}\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 0, 'time')" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# plot chnimp (default: index on the x-axis):\n", + "plt.plot(\"chnimp\", data=barium, color=\"black\", linestyle=\"-\")\n", + "plt.ylabel(\"chnimp\")\n", + "plt.xlabel(\"time\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 10.3 Other Time Series Models\n", + "\n", + "### 10.3.1 Finite Distributed Lag Models\n", + "\n", + "$$ y_t = \\alpha_0 + \\delta_0 z_t + \\delta_1 z_{t-1} + \\cdots + \\delta_k z_{t-k} + u_t $$\n", + "\n", + "### Example 10.4 Effects of Personal Exemption on Fertility Rates" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table: \n", + " b se t pval\n", + "Intercept 95.8705 3.2820 29.2114 0.0000\n", + "pe 0.0727 0.1255 0.5789 0.5647\n", + "pe_lag1 -0.0058 0.1557 -0.0371 0.9705\n", + "pe_lag2 0.0338 0.1263 0.2679 0.7896\n", + "ww2 -22.1265 10.7320 -2.0617 0.0433\n", + "pill -31.3050 3.9816 -7.8625 0.0000\n", + "\n" + ] + } + ], + "source": [ + "fertil3 = wool.dataWoo(\"fertil3\")\n", + "T = len(fertil3)\n", + "\n", + "# define yearly time series beginning in 1913:\n", + "fertil3.index = pd.date_range(start=\"1913\", periods=T, freq=\"YE\").year\n", + "\n", + "# add all lags of 'pe' up to order 2:\n", + "fertil3[\"pe_lag1\"] = fertil3[\"pe\"].shift(1)\n", + "fertil3[\"pe_lag2\"] = fertil3[\"pe\"].shift(2)\n", + "\n", + "# linear regression of model with lags:\n", + "reg = smf.ols(formula=\"gfr ~ pe + pe_lag1 + pe_lag2 + ww2 + pill\", data=fertil3)\n", + "results = reg.fit()\n", + "\n", + "# print regression table:\n", + "table = pd.DataFrame(\n", + " {\n", + " \"b\": round(results.params, 4),\n", + " \"se\": round(results.bse, 4),\n", + " \"t\": round(results.tvalues, 4),\n", + " \"pval\": round(results.pvalues, 4),\n", + " },\n", + ")\n", + "print(f\"table: \\n{table}\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Eample 10.4 (continued)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "fstat1: 3.9729640469785323\n", + "\n", + "fpval1: 0.011652005303126576\n", + "\n" + ] + } + ], + "source": [ + "fertil3 = wool.dataWoo(\"fertil3\")\n", + "T = len(fertil3)\n", + "\n", + "# define yearly time series beginning in 1913:\n", + "fertil3.index = pd.date_range(start=\"1913\", periods=T, freq=\"YE\").year\n", + "\n", + "# add all lags of 'pe' up to order 2:\n", + "fertil3[\"pe_lag1\"] = fertil3[\"pe\"].shift(1)\n", + "fertil3[\"pe_lag2\"] = fertil3[\"pe\"].shift(2)\n", + "\n", + "# linear regression of model with lags:\n", + "reg = smf.ols(formula=\"gfr ~ pe + pe_lag1 + pe_lag2 + ww2 + pill\", data=fertil3)\n", + "results = reg.fit()\n", + "\n", + "# F test (H0: all pe coefficients are=0):\n", + "hypotheses1 = [\"pe = 0\", \"pe_lag1 = 0\", \"pe_lag2 = 0\"]\n", + "ftest1 = results.f_test(hypotheses1)\n", + "fstat1 = ftest1.statistic\n", + "fpval1 = ftest1.pvalue\n", + "\n", + "print(f\"fstat1: {fstat1}\\n\")\n", + "print(f\"fpval1: {fpval1}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "b_pe_tot: 0.10071909027975486\n", + "\n" + ] + } + ], + "source": [ + "# calculating the LRP:\n", + "b = results.params\n", + "b_pe_tot = b[\"pe\"] + b[\"pe_lag1\"] + b[\"pe_lag2\"]\n", + "print(f\"b_pe_tot: {b_pe_tot}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "fstat2: 11.421238467853499\n", + "\n", + "fpval2: 0.0012408438602971525\n", + "\n" + ] + } + ], + "source": [ + "# F test (H0: LRP=0):\n", + "hypotheses2 = [\"pe + pe_lag1 + pe_lag2 = 0\"]\n", + "ftest2 = results.f_test(hypotheses2)\n", + "fstat2 = ftest2.statistic\n", + "fpval2 = ftest2.pvalue\n", + "\n", + "print(f\"fstat2: {fstat2}\\n\")\n", + "print(f\"fpval2: {fpval2}\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 10.3.2 Trends\n", + "\n", + "### Example 10.7 Housing Investment and Prices" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table_wot: \n", + " b se t pval\n", + "Intercept -0.5502 0.0430 -12.7882 0.0000\n", + "np.log(price) 1.2409 0.3824 3.2450 0.0024\n", + "\n" + ] + } + ], + "source": [ + "hseinv = wool.dataWoo(\"hseinv\")\n", + "\n", + "# linear regression without time trend:\n", + "reg_wot = smf.ols(formula=\"np.log(invpc) ~ np.log(price)\", data=hseinv)\n", + "results_wot = reg_wot.fit()\n", + "\n", + "# print regression table:\n", + "table_wot = pd.DataFrame(\n", + " {\n", + " \"b\": round(results_wot.params, 4),\n", + " \"se\": round(results_wot.bse, 4),\n", + " \"t\": round(results_wot.tvalues, 4),\n", + " \"pval\": round(results_wot.pvalues, 4),\n", + " },\n", + ")\n", + "print(f\"table_wot: \\n{table_wot}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table_wt: \n", + " b se t pval\n", + "Intercept -0.9131 0.1356 -6.7328 0.0000\n", + "np.log(price) -0.3810 0.6788 -0.5612 0.5779\n", + "t 0.0098 0.0035 2.7984 0.0079\n", + "\n" + ] + } + ], + "source": [ + "# linear regression with time trend (data set includes a time variable t):\n", + "reg_wt = smf.ols(formula=\"np.log(invpc) ~ np.log(price) + t\", data=hseinv)\n", + "results_wt = reg_wt.fit()\n", + "\n", + "# print regression table:\n", + "table_wt = pd.DataFrame(\n", + " {\n", + " \"b\": round(results_wt.params, 4),\n", + " \"se\": round(results_wt.bse, 4),\n", + " \"t\": round(results_wt.tvalues, 4),\n", + " \"pval\": round(results_wt.pvalues, 4),\n", + " },\n", + ")\n", + "print(f\"table_wt: \\n{table_wt}\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 10.3.3 Seasonality\n", + "\n", + "### Example 10.11 Effects of Antidumping Filings" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table: \n", + " b se t pval\n", + "Intercept 16.7792 32.4286 0.5174 0.6059\n", + "np.log(chempi) 3.2651 0.4929 6.6238 0.0000\n", + "np.log(gas) -1.2781 1.3890 -0.9202 0.3594\n", + "np.log(rtwex) 0.6630 0.4713 1.4068 0.1622\n", + "befile6 0.1397 0.2668 0.5236 0.6016\n", + "affile6 0.0126 0.2787 0.0453 0.9639\n", + "afdec6 -0.5213 0.3019 -1.7264 0.0870\n", + "feb -0.4177 0.3044 -1.3720 0.1728\n", + "mar 0.0591 0.2647 0.2231 0.8239\n", + "apr -0.4515 0.2684 -1.6822 0.0953\n", + "may 0.0333 0.2692 0.1237 0.9018\n", + "jun -0.2063 0.2693 -0.7663 0.4451\n", + "jul 0.0038 0.2788 0.0138 0.9890\n", + "aug -0.1571 0.2780 -0.5650 0.5732\n", + "sep -0.1342 0.2677 -0.5012 0.6172\n", + "oct 0.0517 0.2669 0.1937 0.8467\n", + "nov -0.2463 0.2628 -0.9370 0.3508\n", + "dec 0.1328 0.2714 0.4894 0.6255\n", + "\n" + ] + } + ], + "source": [ + "barium = wool.dataWoo(\"barium\")\n", + "\n", + "# linear regression with seasonal effects:\n", + "reg = smf.ols(\n", + " formula=\"np.log(chnimp) ~ np.log(chempi) + np.log(gas) +\"\n", + " \"np.log(rtwex) + befile6 + affile6 + afdec6 +\"\n", + " \"feb + mar + apr + may + jun + jul +\"\n", + " \"aug + sep + oct + nov + dec\",\n", + " data=barium,\n", + ")\n", + "results = reg.fit()\n", + "\n", + "# print regression table:\n", + "table = pd.DataFrame(\n", + " {\n", + " \"b\": round(results.params, 4),\n", + " \"se\": round(results.bse, 4),\n", + " \"t\": round(results.tvalues, 4),\n", + " \"pval\": round(results.pvalues, 4),\n", + " },\n", + ")\n", + "print(f\"table: \\n{table}\\n\")" + ] + } + ], + "metadata": { + "jupytext": { + "formats": "notebooks//ipynb,markdown//md,scripts//py" + }, + "kernelspec": { + "display_name": "merino", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/Ch2. The Simple Regression Model.ipynb b/notebooks/Ch2. The Simple Regression Model.ipynb index 94e19fc..535e54a 100644 --- a/notebooks/Ch2. The Simple Regression Model.ipynb +++ b/notebooks/Ch2. The Simple Regression Model.ipynb @@ -103,7 +103,9 @@ { "cell_type": "code", "execution_count": 4, - "metadata": {}, + "metadata": { + "lines_to_next_cell": 2 + }, "outputs": [ { "name": "stdout", diff --git a/notebooks/Ch6. MRA - Further Issues.ipynb b/notebooks/Ch6. MRA - Further Issues.ipynb index 9a4832c..909e053 100644 --- a/notebooks/Ch6. MRA - Further Issues.ipynb +++ b/notebooks/Ch6. MRA - Further Issues.ipynb @@ -109,7 +109,9 @@ "\n", "### Example 6.1: Effects of Pollution on Housing Prices\n", "\n", - "$$\\text{price\\_sc} = \\beta_0 + \\beta_1 \\cdot \\text{nox\\_sc} + \\beta_2 \\cdot \\text{crime\\_sc} + \\beta_3 \\cdot \\text{rooms\\_sc} + \\beta_4 \\cdot \\text{dist\\_sc} + \\beta_5 \\cdot \\text{stratio\\_sc} + u$$" + "$$\\text{price\\_sc} = \\beta_0 + \\beta_1 \\cdot \\text{nox\\_sc} + \\beta_2 \\cdot \\text{crime\\_sc} + \\beta_3 \\cdot \\text{rooms\\_sc} + \\beta_4 \\cdot \\text{dist\\_sc} + \\beta_5 \\cdot \\text{stratio\\_sc} + u$$\n", + "\n", + "\n" ] }, { diff --git a/scripts/Ch10. Basic Regression Analysis with Time Series Data.py b/scripts/Ch10. Basic Regression Analysis with Time Series Data.py new file mode 100644 index 0000000..ad9e34d --- /dev/null +++ b/scripts/Ch10. Basic Regression Analysis with Time Series Data.py @@ -0,0 +1,212 @@ +# --- +# jupyter: +# jupytext: +# formats: notebooks//ipynb,markdown//md,scripts//py +# text_representation: +# extension: .py +# format_name: light +# format_version: '1.5' +# jupytext_version: 1.16.4 +# kernelspec: +# display_name: merino +# language: python +# name: python3 +# --- + +# # 10. Basic Regression Analysis with Time Series Data + +# %pip install matplotlib numpy pandas statsmodels wooldridge -q + +import matplotlib.pyplot as plt +import numpy as np # noqa +import pandas as pd +import statsmodels.formula.api as smf +import wooldridge as wool + +# ## 10.1 Static Time Series Models +# +# $$ y_t = \beta_0 + \beta_1 z_{1t} + \beta_2 z_{2t} + \cdots + \beta_k z_{kt} + u_t $$ +# +# ### Example 10.2 Effects of Inflation and Deficits on Interest Rates + +# + +intdef = wool.dataWoo("intdef") + +# linear regression of static model (Q function avoids conflicts with keywords): +reg = smf.ols(formula='i3 ~ Q("inf") + Q("def")', data=intdef) +results = reg.fit() + +# print regression table: +table = pd.DataFrame( + { + "b": round(results.params, 4), + "se": round(results.bse, 4), + "t": round(results.tvalues, 4), + "pval": round(results.pvalues, 4), + }, +) +print(f"table: \n{table}\n") +# - + +# ## 10.2 Time Series Data Types in Python +# +# ### 10.2.1 Equispaced Time Series in Python + +# + +barium = wool.dataWoo("barium") +T = len(barium) + +# monthly time series starting Feb. 1978: +barium.index = pd.date_range(start="1978-02", periods=T, freq="ME") +print(f'barium["chnimp"].head(): \n{barium["chnimp"].head()}\n') +# - + +# plot chnimp (default: index on the x-axis): +plt.plot("chnimp", data=barium, color="black", linestyle="-") +plt.ylabel("chnimp") +plt.xlabel("time") + +# ## 10.3 Other Time Series Models +# +# ### 10.3.1 Finite Distributed Lag Models +# +# $$ y_t = \alpha_0 + \delta_0 z_t + \delta_1 z_{t-1} + \cdots + \delta_k z_{t-k} + u_t $$ +# +# ### Example 10.4 Effects of Personal Exemption on Fertility Rates + +# + +fertil3 = wool.dataWoo("fertil3") +T = len(fertil3) + +# define yearly time series beginning in 1913: +fertil3.index = pd.date_range(start="1913", periods=T, freq="YE").year + +# add all lags of 'pe' up to order 2: +fertil3["pe_lag1"] = fertil3["pe"].shift(1) +fertil3["pe_lag2"] = fertil3["pe"].shift(2) + +# linear regression of model with lags: +reg = smf.ols(formula="gfr ~ pe + pe_lag1 + pe_lag2 + ww2 + pill", data=fertil3) +results = reg.fit() + +# print regression table: +table = pd.DataFrame( + { + "b": round(results.params, 4), + "se": round(results.bse, 4), + "t": round(results.tvalues, 4), + "pval": round(results.pvalues, 4), + }, +) +print(f"table: \n{table}\n") +# - + +# ### Eample 10.4 (continued) + +# + +fertil3 = wool.dataWoo("fertil3") +T = len(fertil3) + +# define yearly time series beginning in 1913: +fertil3.index = pd.date_range(start="1913", periods=T, freq="YE").year + +# add all lags of 'pe' up to order 2: +fertil3["pe_lag1"] = fertil3["pe"].shift(1) +fertil3["pe_lag2"] = fertil3["pe"].shift(2) + +# linear regression of model with lags: +reg = smf.ols(formula="gfr ~ pe + pe_lag1 + pe_lag2 + ww2 + pill", data=fertil3) +results = reg.fit() + +# F test (H0: all pe coefficients are=0): +hypotheses1 = ["pe = 0", "pe_lag1 = 0", "pe_lag2 = 0"] +ftest1 = results.f_test(hypotheses1) +fstat1 = ftest1.statistic +fpval1 = ftest1.pvalue + +print(f"fstat1: {fstat1}\n") +print(f"fpval1: {fpval1}\n") +# - + +# calculating the LRP: +b = results.params +b_pe_tot = b["pe"] + b["pe_lag1"] + b["pe_lag2"] +print(f"b_pe_tot: {b_pe_tot}\n") + +# + +# F test (H0: LRP=0): +hypotheses2 = ["pe + pe_lag1 + pe_lag2 = 0"] +ftest2 = results.f_test(hypotheses2) +fstat2 = ftest2.statistic +fpval2 = ftest2.pvalue + +print(f"fstat2: {fstat2}\n") +print(f"fpval2: {fpval2}\n") +# - + +# ### 10.3.2 Trends +# +# ### Example 10.7 Housing Investment and Prices + +# + +hseinv = wool.dataWoo("hseinv") + +# linear regression without time trend: +reg_wot = smf.ols(formula="np.log(invpc) ~ np.log(price)", data=hseinv) +results_wot = reg_wot.fit() + +# print regression table: +table_wot = pd.DataFrame( + { + "b": round(results_wot.params, 4), + "se": round(results_wot.bse, 4), + "t": round(results_wot.tvalues, 4), + "pval": round(results_wot.pvalues, 4), + }, +) +print(f"table_wot: \n{table_wot}\n") + +# + +# linear regression with time trend (data set includes a time variable t): +reg_wt = smf.ols(formula="np.log(invpc) ~ np.log(price) + t", data=hseinv) +results_wt = reg_wt.fit() + +# print regression table: +table_wt = pd.DataFrame( + { + "b": round(results_wt.params, 4), + "se": round(results_wt.bse, 4), + "t": round(results_wt.tvalues, 4), + "pval": round(results_wt.pvalues, 4), + }, +) +print(f"table_wt: \n{table_wt}\n") +# - + +# ### 10.3.3 Seasonality +# +# ### Example 10.11 Effects of Antidumping Filings + +# + +barium = wool.dataWoo("barium") + +# linear regression with seasonal effects: +reg = smf.ols( + formula="np.log(chnimp) ~ np.log(chempi) + np.log(gas) +" + "np.log(rtwex) + befile6 + affile6 + afdec6 +" + "feb + mar + apr + may + jun + jul +" + "aug + sep + oct + nov + dec", + data=barium, +) +results = reg.fit() + +# print regression table: +table = pd.DataFrame( + { + "b": round(results.params, 4), + "se": round(results.bse, 4), + "t": round(results.tvalues, 4), + "pval": round(results.pvalues, 4), + }, +) +print(f"table: \n{table}\n") diff --git a/scripts/Ch2. The Simple Regression Model.py b/scripts/Ch2. The Simple Regression Model.py index c348f10..a9a7d2e 100644 --- a/scripts/Ch2. The Simple Regression Model.py +++ b/scripts/Ch2. The Simple Regression Model.py @@ -75,6 +75,7 @@ # - + def plot_regression(x, y, data, results, title): # scatter plot and fitted values: plt.plot(x, y, data=data, color="grey", marker="o", linestyle="") diff --git a/scripts/Ch6. MRA - Further Issues.py b/scripts/Ch6. MRA - Further Issues.py index bc19c3b..8852a21 100644 --- a/scripts/Ch6. MRA - Further Issues.py +++ b/scripts/Ch6. MRA - Further Issues.py @@ -70,6 +70,9 @@ # ### Example 6.1: Effects of Pollution on Housing Prices # # $$\text{price\_sc} = \beta_0 + \beta_1 \cdot \text{nox\_sc} + \beta_2 \cdot \text{crime\_sc} + \beta_3 \cdot \text{rooms\_sc} + \beta_4 \cdot \text{dist\_sc} + \beta_5 \cdot \text{stratio\_sc} + u$$ +# +# +# # + # define a function for the standardization: