diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml new file mode 100644 index 00000000..a323e2ec --- /dev/null +++ b/.github/workflows/pr.yml @@ -0,0 +1,23 @@ +name: Pull request + +on: pull_request + +jobs: + lint: + name: Lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v5 + - uses: psf/black@stable + with: + options: ". -l 79 --check" + test: + name: Test + runs-on: ubuntu-latest + steps: + - name: Checkout repo + uses: actions/checkout@v3 + - name: Build container + run: docker build . -t ghcr.io/pslmodels/tax-microdata-benchmarking + diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml new file mode 100644 index 00000000..48e13fc8 --- /dev/null +++ b/.github/workflows/push.yml @@ -0,0 +1,35 @@ +name: Push + +on: + push: + branches: + - master + +jobs: + lint: + name: Lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v5 + - uses: psf/black@stable + with: + options: ". -l 79 --check" + test: + name: Test + runs-on: ubuntu-latest + steps: + - name: Checkout repo + uses: actions/checkout@v3 + - name: Build container + run: docker build . -t ghcr.io/pslmodels/tax-microdata-benchmarking + - name: Log in to the Container registry + uses: docker/login-action@v1 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Build container + run: docker build . -t ghcr.io/pslmodels/tax-microdata-benchmarking + - name: Push container + run: docker push ghcr.io/pslmodels/tax-microdata-benchmarking diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..df1952cd --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +**/*.h5 +**/*.pyc +**/*.csv.gz +**/*.egg-info diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..0762831d --- /dev/null +++ b/Dockerfile @@ -0,0 +1,5 @@ +FROM python:3.9 +WORKDIR /app +COPY . . +RUN make install +RUN make test \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..b6da0bf6 --- /dev/null +++ b/Makefile @@ -0,0 +1,11 @@ +install: + pip install -e . + +test: + pytest . + +format: + black . -l 79 + +flat_file: + python initial_flat_file/create_flat_file.py \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..c909fead --- /dev/null +++ b/setup.py @@ -0,0 +1,14 @@ +from setuptools import setup, find_packages + +setup( + name="tax_microdata_benchmarking", + version="0.1.0", + packages=find_packages(), + install_requires=[ + "policyengine_us==0.648.0", + "taxcalc==3.4.1", + "paramtools==0.18.1", + "pytest", + "black", + ], +) diff --git a/tax_microdata_benchmarking/create_flat_file.py b/tax_microdata_benchmarking/create_flat_file.py new file mode 100644 index 00000000..0b298b37 --- /dev/null +++ b/tax_microdata_benchmarking/create_flat_file.py @@ -0,0 +1,90 @@ +# This file should create tax_microdata.csv.gz in the root of the repo. + +import taxcalc as tc +from policyengine_us import Microsimulation +from policyengine_us.model_api import * +import numpy as np +import pandas as pd + + +class TaxCalcVariableAlias(Variable): + label = "TaxCalc Variable Alias" + definition_period = YEAR + entity = TaxUnit + value_type = float + + +class tc_RECID(TaxCalcVariableAlias): + def formula(tax_unit, period, parameters): + return tax_unit("tax_unit_id", period) + + +class tc_MARS(TaxCalcVariableAlias): + def formula(tax_unit, period, parameters): + filing_status = tax_unit("filing_status", period).decode_to_str() + CODE_MAP = { + "SINGLE": 1, + "JOINT": 2, + "SEPARATE": 3, + "HEAD_OF_HOUSEHOLD": 4, + "WIDOW": 5, + } + return pd.Series(filing_status).map(CODE_MAP) + + +class tc_e00200p(TaxCalcVariableAlias): + def formula(tax_unit, period, parameters): + person = tax_unit.members + employment_income = person("employment_income", period) + is_tax_unit_head = person("is_tax_unit_head", period) + return tax_unit.sum(employment_income * is_tax_unit_head) + + +class tc_e00200s(TaxCalcVariableAlias): + def formula(tax_unit, period, parameters): + person = tax_unit.members + employment_income = person("employment_income", period) + is_tax_unit_spouse = person("is_tax_unit_spouse", period) + return tax_unit.sum(employment_income * is_tax_unit_spouse) + + +class tc_e00200(TaxCalcVariableAlias): + adds = [ + "tc_e00200p", + "tc_e00200s", + ] + + +class taxcalc_extension(Reform): + def apply(self): + self.add_variables( + tc_RECID, + tc_MARS, + tc_e00200p, + tc_e00200s, + tc_e00200, + ) + + +def create_flat_file(): + sim = Microsimulation( + reform=taxcalc_extension, dataset="enhanced_cps_2023" + ) + df = pd.DataFrame() + + for variable in sim.tax_benefit_system.variables: + if variable.startswith("tc_"): + df[variable[3:]] = sim.calculate(variable).values.astype( + np.float64 + ) + + # Extra quality-control checks to do with different data types, nothing major + df.e00200 = df.e00200p + df.e00200s + df.RECID = df.RECID.astype(int) + df.MARS = df.MARS.astype(int) + + df.to_csv("tax_microdata.csv.gz", index=False, compression="gzip") + + +if __name__ == "__main__": + create_flat_file() diff --git a/tests/test_basic_flat_file.py b/tests/test_basic_flat_file.py new file mode 100644 index 00000000..b541d3d1 --- /dev/null +++ b/tests/test_basic_flat_file.py @@ -0,0 +1,11 @@ +def test_flat_file_runs(): + import taxcalc as tc + from tax_microdata_benchmarking.create_flat_file import create_flat_file + + create_flat_file() + + input_data = tc.Records("tax_microdata.csv.gz") + policy = tc.Policy() + simulation = tc.Calculator(records=input_data, policy=policy) + + simulation.calc_all()