Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add initial PE-based flat file #4

Merged
merged 17 commits into from
Feb 8, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Update dependencies and pass test
nikhilwoodruff committed Feb 8, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
commit 7d4f061d518f6432b901af05b52e934cd8dff5f0
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
**/*.h5
**/*.pyc
**/*.csv.gz
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -6,3 +6,6 @@ test:

format:
black . -l 79

flat_file:
python initial_flat_file/create_flat_file.py
56 changes: 56 additions & 0 deletions initial_flat_file/create_flat_file.py
Original file line number Diff line number Diff line change
@@ -1 +1,57 @@
# This file should create tax_microdata.csv.gz in the root of the repo.

import taxcalc as tc
from policyengine_us import Microsimulation
from policyengine_us.model_api import *
import numpy as np
import pandas as pd


class TaxCalcVariableAlias(Variable):
label = "TaxCalc Variable Alias"
definition_period = YEAR
entity = TaxUnit
value_type = float


class tc_RECID(TaxCalcVariableAlias):
def formula(tax_unit, period, parameters):
return tax_unit("tax_unit_id", period)


class tc_MARS(TaxCalcVariableAlias):
def formula(tax_unit, period, parameters):
filing_status = tax_unit("filing_status", period).decode_to_str()
CODE_MAP = {
"SINGLE": 1,
"JOINT": 2,
"SEPARATE": 3,
"HEAD_OF_HOUSEHOLD": 4,
"WIDOW": 5,
}
return pd.Series(filing_status).map(CODE_MAP)


class taxcalc_extension(Reform):
def apply(self):
self.add_variables(
tc_RECID,
tc_MARS,
)


def create_flat_file():
sim = Microsimulation(
reform=taxcalc_extension, dataset="enhanced_cps_2023"
)
df = pd.DataFrame()

for variable in sim.tax_benefit_system.variables:
if variable.startswith("tc_"):
df[variable[3:]] = sim.calculate(variable)

df.to_csv("tax_microdata.csv.gz", index=False, compression="gzip")


if __name__ == "__main__":
create_flat_file()
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -2,3 +2,4 @@ policyengine_us
taxcalc
pytest
black
paramtools
3 changes: 3 additions & 0 deletions tests/test_basic_flat_file.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
def test_flat_file_runs():
import taxcalc as tc
from initial_flat_file.create_flat_file import create_flat_file

create_flat_file()

input_data = tc.Records("tax_microdata.csv.gz")
policy = tc.Policy()