Skip to content

Commit

Permalink
Merge pull request #4 from PSLmodels/nikhilwoodruff/issue2
Browse files Browse the repository at this point in the history
Add initial PE-based flat file
  • Loading branch information
nikhilwoodruff authored Feb 8, 2024
2 parents f09ade1 + a32e01e commit fc30430
Show file tree
Hide file tree
Showing 8 changed files with 193 additions and 0 deletions.
23 changes: 23 additions & 0 deletions .github/workflows/pr.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: Pull request

on: pull_request

jobs:
lint:
name: Lint
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v5
- uses: psf/black@stable
with:
options: ". -l 79 --check"
test:
name: Test
runs-on: ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v3
- name: Build container
run: docker build . -t ghcr.io/pslmodels/tax-microdata-benchmarking

35 changes: 35 additions & 0 deletions .github/workflows/push.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Push

on:
push:
branches:
- master

jobs:
lint:
name: Lint
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v5
- uses: psf/black@stable
with:
options: ". -l 79 --check"
test:
name: Test
runs-on: ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v3
- name: Build container
run: docker build . -t ghcr.io/pslmodels/tax-microdata-benchmarking
- name: Log in to the Container registry
uses: docker/login-action@v1
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build container
run: docker build . -t ghcr.io/pslmodels/tax-microdata-benchmarking
- name: Push container
run: docker push ghcr.io/pslmodels/tax-microdata-benchmarking
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
**/*.h5
**/*.pyc
**/*.csv.gz
**/*.egg-info
5 changes: 5 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
FROM python:3.9
WORKDIR /app
COPY . .
RUN make install
RUN make test
11 changes: 11 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
install:
pip install -e .

test:
pytest .

format:
black . -l 79

flat_file:
python initial_flat_file/create_flat_file.py
14 changes: 14 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from setuptools import setup, find_packages

setup(
name="tax_microdata_benchmarking",
version="0.1.0",
packages=find_packages(),
install_requires=[
"policyengine_us==0.648.0",
"taxcalc==3.4.1",
"paramtools==0.18.1",
"pytest",
"black",
],
)
90 changes: 90 additions & 0 deletions tax_microdata_benchmarking/create_flat_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# This file should create tax_microdata.csv.gz in the root of the repo.

import taxcalc as tc
from policyengine_us import Microsimulation
from policyengine_us.model_api import *
import numpy as np
import pandas as pd


class TaxCalcVariableAlias(Variable):
label = "TaxCalc Variable Alias"
definition_period = YEAR
entity = TaxUnit
value_type = float


class tc_RECID(TaxCalcVariableAlias):
def formula(tax_unit, period, parameters):
return tax_unit("tax_unit_id", period)


class tc_MARS(TaxCalcVariableAlias):
def formula(tax_unit, period, parameters):
filing_status = tax_unit("filing_status", period).decode_to_str()
CODE_MAP = {
"SINGLE": 1,
"JOINT": 2,
"SEPARATE": 3,
"HEAD_OF_HOUSEHOLD": 4,
"WIDOW": 5,
}
return pd.Series(filing_status).map(CODE_MAP)


class tc_e00200p(TaxCalcVariableAlias):
def formula(tax_unit, period, parameters):
person = tax_unit.members
employment_income = person("employment_income", period)
is_tax_unit_head = person("is_tax_unit_head", period)
return tax_unit.sum(employment_income * is_tax_unit_head)


class tc_e00200s(TaxCalcVariableAlias):
def formula(tax_unit, period, parameters):
person = tax_unit.members
employment_income = person("employment_income", period)
is_tax_unit_spouse = person("is_tax_unit_spouse", period)
return tax_unit.sum(employment_income * is_tax_unit_spouse)


class tc_e00200(TaxCalcVariableAlias):
adds = [
"tc_e00200p",
"tc_e00200s",
]


class taxcalc_extension(Reform):
def apply(self):
self.add_variables(
tc_RECID,
tc_MARS,
tc_e00200p,
tc_e00200s,
tc_e00200,
)


def create_flat_file():
sim = Microsimulation(
reform=taxcalc_extension, dataset="enhanced_cps_2023"
)
df = pd.DataFrame()

for variable in sim.tax_benefit_system.variables:
if variable.startswith("tc_"):
df[variable[3:]] = sim.calculate(variable).values.astype(
np.float64
)

# Extra quality-control checks to do with different data types, nothing major
df.e00200 = df.e00200p + df.e00200s
df.RECID = df.RECID.astype(int)
df.MARS = df.MARS.astype(int)

df.to_csv("tax_microdata.csv.gz", index=False, compression="gzip")


if __name__ == "__main__":
create_flat_file()
11 changes: 11 additions & 0 deletions tests/test_basic_flat_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
def test_flat_file_runs():
import taxcalc as tc
from tax_microdata_benchmarking.create_flat_file import create_flat_file

create_flat_file()

input_data = tc.Records("tax_microdata.csv.gz")
policy = tc.Policy()
simulation = tc.Calculator(records=input_data, policy=policy)

simulation.calc_all()

0 comments on commit fc30430

Please sign in to comment.