Skip to content

Commit

Permalink
Merge pull request #122 from jdebacker/example
Browse files Browse the repository at this point in the history
Merging
rickecon authored Aug 26, 2024
2 parents 5e93add + 9ab32cc commit 42a4ff8
Showing 12 changed files with 70 additions and 86 deletions.
11 changes: 9 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -5,11 +5,17 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.1.12] - 2024-08-26 12:00:00

### Added

- Streamlined the `run_og_usa.py` script to make the example more clear, run faster, and save output in a common directory.

## [0.1.11] - 2024-07-26 12:00:00

### Added

- Adds a module to update Tax-Calculator growth factors using OG-USA simualtions.
- Adds a module to update Tax-Calculator growth factors using OG-USA simulations.


## [0.1.10] - 2024-06-10 12:00:00
@@ -124,7 +130,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Any earlier versions of OG-USA can be found in the [`OG-Core`](https://github.com/PSLmodels/OG-Core) repository [release history](https://github.com/PSLmodels/OG-Core/releases) from [v.0.6.4](https://github.com/PSLmodels/OG-Core/releases/tag/v0.6.4) (Jul. 20, 2021) or earlier.



[0.1.12]: https://github.com/PSLmodels/OG-USA/compare/v0.1.11...v0.1.12
[0.1.11]: https://github.com/PSLmodels/OG-USA/compare/v0.1.10...v0.1.11
[0.1.10]: https://github.com/PSLmodels/OG-USA/compare/v0.1.9...v0.1.10
[0.1.9]: https://github.com/PSLmodels/OG-USA/compare/v0.1.8...v0.1.9
[0.1.8]: https://github.com/PSLmodels/OG-USA/compare/v0.1.7...v0.1.8
3 changes: 2 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
@@ -13,7 +13,8 @@ dependencies:
- dask>=2.30.0
- dask-core>=2.30.0
- distributed>=2.30.1
- paramtools>=0.15.0
- "marshmallow<3.22" # to work around paramtools bug
- "paramtools>=0.18.2" # requires marshmallow>=3.0
- taxcalc>=3.0.0
- sphinx>=3.5.4
- sphinx-book-theme>=0.1.3
64 changes: 19 additions & 45 deletions examples/run_og_usa.py
Original file line number Diff line number Diff line change
@@ -3,6 +3,8 @@
import os
import json
import time
import importlib.resources
import copy
from taxcalc import Calculator
import matplotlib.pyplot as plt
from ogusa.calibrate import Calibration
@@ -28,8 +30,9 @@ def main():

# Directories to save data
CUR_DIR = os.path.dirname(os.path.realpath(__file__))
base_dir = os.path.join(CUR_DIR, "OG-USA-Example", "OUTPUT_BASELINE")
reform_dir = os.path.join(CUR_DIR, "OG-USA-Example", "OUTPUT_REFORM")
save_dir = os.path.join(CUR_DIR, "OG-USA-Example")
base_dir = os.path.join(save_dir, "OUTPUT_BASELINE")
reform_dir = os.path.join(save_dir, "OUTPUT_REFORM")

"""
------------------------------------------------------------------------
@@ -44,22 +47,13 @@ def main():
output_base=base_dir,
)
# Update parameters for baseline from default json file
p.update_specifications(
json.load(
open(
os.path.join(
CUR_DIR, "..", "ogusa", "ogusa_default_parameters.json"
)
)
)
)
p.tax_func_type = "GS"
p.age_specific = False
with importlib.resources.open_text(
"ogusa", "ogusa_default_parameters.json"
) as file:
defaults = json.load(file)
p.update_specifications(defaults)
p.tax_func_type = "HSV"
c = Calibration(p, estimate_tax_functions=True, client=client)
# close and delete client bc cache is too large
client.close()
del client
client = Client(n_workers=num_workers, threads_per_worker=1)
d = c.get_dict()
# # additional parameters to change
updated_params = {
@@ -84,43 +78,23 @@ def main():
# In this example the 'reform' is a change to 2017 law (the
# baseline policy is tax law in 2018)
reform_url = (
"github://PSLmodels:examples@main/psl_examples/"
+ "taxcalc/2017_law.json"
"github://PSLmodels:Tax-Calculator@master/taxcalc/"
+ "reforms/2017_law.json"
)

ref = Calculator.read_json_param_objects(reform_url, None)
iit_reform = ref["policy"]

# create new Specifications object for reform simulation
p2 = Specifications(
baseline=False,
num_workers=num_workers,
baseline_dir=base_dir,
output_base=reform_dir,
)
# Update parameters for baseline from default json file
p2.update_specifications(
json.load(
open(
os.path.join(
CUR_DIR, "..", "ogusa", "ogusa_default_parameters.json"
)
)
)
)
p2.tax_func_type = "GS"
p2.age_specific = False
p2 = copy.deepcopy(p)
# Use calibration class to estimate reform tax functions from
# Tax-Calculator, specifying reform for Tax-Calculator in iit_reform
c2 = Calibration(
p2, iit_reform=iit_reform, estimate_tax_functions=True, client=client
)
# close and delete client bc cache is too large
client.close()
del client
client = Client(n_workers=num_workers, threads_per_worker=1)
# update tax function parameters in Specifications Object
d = c2.get_dict()
# # additional parameters to change
# additional parameters to change
updated_params = {
"cit_rate": [[0.35]],
"etr_params": d["etr_params"],
@@ -164,7 +138,7 @@ def main():
op.plot_all(
base_dir,
reform_dir,
os.path.join(CUR_DIR, "OG-USA_example_plots_tables"),
os.path.join(save_dir, "OG-USA_example_plots_tables"),
)
# Create CSV file with output
ot.tp_output_dump_table(
@@ -174,7 +148,7 @@ def main():
reform_tpi,
table_format="csv",
path=os.path.join(
CUR_DIR,
save_dir,
"OG-USA_example_plots_tables",
"macro_time_series_output.csv",
),
@@ -184,7 +158,7 @@ def main():
# save percentage change output to csv file
ans.to_csv(
os.path.join(
CUR_DIR, "OG-USA_example_plots_tables", "ogusa_example_output.csv"
save_dir, "OG-USA_example_plots_tables", "ogusa_example_output.csv"
)
)

2 changes: 1 addition & 1 deletion ogusa/__init__.py
Original file line number Diff line number Diff line change
@@ -11,4 +11,4 @@
from ogusa.utils import *
from ogusa.wealth import *

__version__ = "0.1.11"
__version__ = "0.1.12"
1 change: 0 additions & 1 deletion ogusa/calibrate.py
Original file line number Diff line number Diff line change
@@ -6,7 +6,6 @@
from taxcalc import Records
from ogcore import txfunc, demographics
from ogcore.utils import safe_read_pickle, mkdirs
import pkg_resources


class Calibration:
7 changes: 4 additions & 3 deletions ogusa/get_micro_data.py
Original file line number Diff line number Diff line change
@@ -12,7 +12,7 @@
import numpy as np
import os
import pickle
import pkg_resources
import importlib.metadata
from ogcore import utils
from ogusa.constants import DEFAULT_START_YEAR, TC_LAST_YEAR

@@ -183,7 +183,7 @@ def get_data(
del results

# Pull Tax-Calc version for reference
taxcalc_version = pkg_resources.get_distribution("taxcalc").version
taxcalc_version = importlib.metadata.version("taxcalc")

return micro_data_dict, taxcalc_version

@@ -263,7 +263,8 @@ def taxcalc_advance(
"total_tax_liab": calc1.array("combined"),
"payroll_tax_liab": calc1.array("payrolltax"),
"etr": (
(calc1.array("combined") - calc1.array("ubi")) / market_income
(calc1.array("combined") - calc1.array("ubi"))
/ np.maximum(market_income, 1)
),
"year": calc1.current_year * np.ones(length),
"weight": calc1.array("s006"),
4 changes: 3 additions & 1 deletion ogusa/macro_params.py
Original file line number Diff line number Diff line change
@@ -107,7 +107,9 @@ def get_macro_params():

# find g_y
macro_parameters["g_y"] = (
fred_data_q["GDP Per Capita"].pct_change(periods=4, freq="QE").mean()
fred_data_q["GDP Per Capita"]
.pct_change(periods=4, freq="QE", fill_method=None)
.mean()
)

# # estimate r_gov_shift and r_gov_scale
24 changes: 11 additions & 13 deletions ogusa/psid_data_setup.py
Original file line number Diff line number Diff line change
@@ -12,7 +12,7 @@
# This is the case when a separate script is calling this function in
# this module
CURDIR = os.path.split(os.path.abspath(__file__))[0]
except:
except NameError:
# This is the case when a Jupyter notebook is calling this function
CURDIR = os.getcwd()
output_fldr = "io_files"
@@ -54,11 +54,13 @@ def prep_data(
# SRC sample families have 1968 family interview numbers less than 3000
raw_df = raw_df[raw_df["ID1968"] < 3000].copy()

raw_df["relation.head"][
(raw_df["year"] < 1983) & (raw_df["relation.head"] == 1)
raw_df.loc[
raw_df.index[(raw_df["year"] < 1983) & (raw_df["relation.head"] == 1)],
"relation.head",
] = 10
raw_df["relation.head"][
(raw_df["year"] < 1983) & (raw_df["relation.head"] == 2)
raw_df.loc[
raw_df.index[(raw_df["year"] < 1983) & (raw_df["relation.head"] == 2)],
"relation.head",
] = 20
head_df = raw_df.loc[
raw_df.index[
@@ -123,7 +125,7 @@ def prep_data(
# pull series of interest using pandas_datareader
fred_data = web.DataReader(["CPIAUCSL"], "fred", start, end)
# Make data annual by averaging over months in year
fred_data = fred_data.resample("A").mean()
fred_data = fred_data.resample("YE").mean()
fred_data["year_data"] = fred_data.index.year
psid_df2 = psid_df.merge(fred_data, how="left", on="year_data")
psid_df = psid_df2
@@ -275,15 +277,11 @@ def prep_data(
# Backfill and then forward fill variables that are constant over time
# within hhid
for item in PSID_CONSTANT_VARS:
rebalanced_data[item] = rebalanced_data.groupby("hh_id")[item].fillna(
method="bfill"
)
rebalanced_data[item] = rebalanced_data.groupby("hh_id")[item].fillna(
method="ffill"
)
rebalanced_data[item] = rebalanced_data.groupby("hh_id")[item].bfill()
rebalanced_data[item] = rebalanced_data.groupby("hh_id")[item].ffill()

### NOTE: we seem to get some cases where the marital status is not constant
# despite trying to set up the indentifcation of a household such that it
# despite trying to set up the identification of a household such that it
# has to be. Why this is happening needs to be checked.

# Fill in year by doing a cumulative counter within each hh_id and then
5 changes: 3 additions & 2 deletions ogusa/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pandas as pd
import numpy as np
from scipy.stats import kde
from scipy.stats import gaussian_kde
import matplotlib.pyplot as plt
import requests
import urllib3
@@ -28,6 +28,7 @@ def read_cbo_forecast():
& (pd.isnull(df["Unnamed: 2"]))
)
]
# df.fillna(value=np.nan, inplace=True)
df.fillna(value="", inplace=True)
df["full_var_name"] = (
df["Unnamed: 0"] + df["Unnamed: 1"] + df["Unnamed: 2"]
@@ -203,7 +204,7 @@ def MVKDE(
k += 1

freq_mat = np.vstack((age_frequency, income_frequency)).T
density = kde.gaussian_kde(freq_mat.T, bw_method=bandwidth)
density = gaussian_kde(freq_mat.T, bw_method=bandwidth)
age_min, income_min = freq_mat.min(axis=0)
age_max, income_max = freq_mat.max(axis=0)
agei, incomei = np.mgrid[
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -5,7 +5,7 @@

setuptools.setup(
name="ogusa",
version="0.1.11",
version="0.1.12",
author="Jason DeBacker and Richard W. Evans",
license="CC0 1.0 Universal (CC0 1.0) Public Domain Dedication",
description="USA calibration for OG-Core",
2 changes: 2 additions & 0 deletions tests/test_calibrate.py
Original file line number Diff line number Diff line change
@@ -30,6 +30,8 @@ def test_read_tax_func_estimate_error():
def test_read_tax_func_estimate():
p = ogcore.Specifications()
p.BW = 11
p.tax_func_type = "DEP"
p.start_year = 2021
tax_func_path = os.path.join(
CUR_PATH, "test_io_data", "TxFuncEst_policy.pkl"
)
31 changes: 15 additions & 16 deletions tests/test_get_micro_data.py
Original file line number Diff line number Diff line change
@@ -219,8 +219,6 @@ def test_get_calculator_puf_from_file():
def test_get_data(baseline, dask_client):
"""
Test of get_micro_data.get_data() function
Note that this test may fail if the Tax-Calculator is not v 3.2.2
"""
expected_data = utils.safe_read_pickle(
os.path.join(CUR_PATH, "test_io_data", "micro_data_dict_for_tests.pkl")
@@ -238,44 +236,45 @@ def test_get_data(baseline, dask_client):
test_data2 = {x: test_data[x] for x in keys}
for k, v in test_data2.items():
try:
assert_frame_equal(expected_data[k], v)
# check that columns are the same
assert set(expected_data[k].columns) == set(v.columns)
# check that test data returns some non-zero values
assert v.count().sum() > 0
except KeyError:
pass


def test_taxcalc_advance():
"""
Test of the get_micro_data.taxcalc_advance() function
Note that this test may fail if the Tax-Calculator is not v 3.2.1
"""
expected_dict = utils.safe_read_pickle(
os.path.join(CUR_PATH, "test_io_data", "tax_dict_for_tests.pkl")
)
test_dict = get_micro_data.taxcalc_advance(
2028, {}, {}, "cps", None, None, 2014, 2028
)
for k, v in test_dict.items():
assert np.allclose(expected_dict[k], v, equal_nan=True)
# check that keys are the same
assert set(expected_dict.keys()) == set(test_dict.keys())
for _, v in test_dict.items():
# check that test data returns some non-zero values
assert np.count_nonzero(v) > 0


@pytest.mark.local
def test_cap_inc_mtr():
"""
Test of the get_micro_data.cap_inc_mtr() function
Note that this test may fail if the Tax-Calculator is not v 3.2.1
"""
calc1 = get_micro_data.get_calculator(
calculator_start_year=2028, iit_reform={}, data="cps"
)
calc1.advance_to_year(2028)
expected = np.genfromtxt(
os.path.join(
CUR_PATH, "test_io_data", "mtr_combined_capinc_for_tests.csv"
),
delimiter=",",
)
test_data = get_micro_data.cap_inc_mtr(calc1)

assert np.allclose(expected, test_data, equal_nan=True)
# check that test data returns some non-zero values
assert np.count_nonzero(test_data) > 0
# assert mtrs < 1
assert test_data.max() < 1
# assert mtrs > -1
assert test_data.min() > -1

0 comments on commit 42a4ff8

Please sign in to comment.