-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #47 from PSLmodels/dataset-update-3
Add tests on significant variables
- Loading branch information
Showing
7 changed files
with
259 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
DSI: 10691123.8 | ||
EIC: 42779552.39 | ||
FLPDYR: 421632002232.4199 | ||
MARS: 353242147.99 | ||
MIDR: 732548.9000000003 | ||
PT_SSTB_income: 0.0 | ||
PT_binc_w2_wages: 0.0 | ||
PT_ubia_property: 0.0 | ||
RECID: 30013006204532.71 | ||
XTOT: 369094112.43 | ||
a_lineno: 322311625.18 | ||
age_head: 9853008503.07 | ||
age_spouse: 3490884180.649999 | ||
agi_bin: 1074293553.8700001 | ||
blind_head: 4228589.040000001 | ||
blind_spouse: 729846.03 | ||
cmbtp: 114433481478.58578 | ||
data_source: 164759286.17999995 | ||
e00200: 10310664684015.027 | ||
e00200p: 7246367847990.525 | ||
e00200s: 3064296834644.767 | ||
e00300: 137183678413.40382 | ||
e00400: 94743506150.49107 | ||
e00600: 405990141519.5385 | ||
e00650: 285099022083.43726 | ||
e00700: 48771545399.6015 | ||
e00800: 19386714549.7095 | ||
e00900: 429539440385.2141 | ||
e00900p: 361522073167.05817 | ||
e00900s: 68017367420.70369 | ||
e01100: 7285729712.1093 | ||
e01200: -60216853854.91481 | ||
e01400: 552468020849.5068 | ||
e01500: 1641191913466.6348 | ||
e01700: 1000550811057.0841 | ||
e02000: 927149787725.4258 | ||
e02100: -6522807775.893997 | ||
e02100p: -6333314477.3977 | ||
e02100s: -189493275.0666 | ||
e02300: 23258519971.6033 | ||
e02400: 1491419721114.5906 | ||
e03150: 18515980459.5357 | ||
e03210: 15544627964.346104 | ||
e03220: 1637964491.1388001 | ||
e03230: 6926332334.3152 | ||
e03240: 14309653954.092802 | ||
e03270: 40914871477.34481 | ||
e03290: 4905336721.6764 | ||
e03300: 30203493523.927105 | ||
e03400: 930321104.7312 | ||
e03500: 11944679455.189001 | ||
e07240: 1632108958.2028 | ||
e07260: 2791734813.2793 | ||
e07300: 20917948400.8361 | ||
e07400: 3527062529.777301 | ||
e07600: 963701335.9661999 | ||
e09700: 37001598.5893 | ||
e09800: 21355072.916999996 | ||
e09900: 10709816534.181198 | ||
e11200: 3186861701.7149005 | ||
e17500: 235042375652.4932 | ||
e18400: 508102620626.47064 | ||
e18500: 364777444041.7581 | ||
e19200: 521982783400.69037 | ||
e19800: 272428333065.87024 | ||
e20100: 64814165943.684494 | ||
e20400: 213136239172.01752 | ||
e24515: 46065816558.2304 | ||
e24518: 12744333564.099298 | ||
e26270: 733636357457.6952 | ||
e27200: 11361348668.954098 | ||
e32800: 29847384812.3065 | ||
e58990: 4155217553.2594 | ||
e62900: 23309665135.4052 | ||
e87521: 49410862467.992905 | ||
e87530: 25810835097.957104 | ||
elderly_dependents: 40361.42 | ||
f2441: 15395770.6 | ||
f6251: 12829284.17 | ||
ffpos: 236413412.58999997 | ||
fips: 5720027978.5199995 | ||
g20500: 8881208151.5932 | ||
h_seq: 9532981967967.932 | ||
housing_ben: 0.0 | ||
k1bx14p: -30532461972.4528 | ||
k1bx14s: 2364706719.2499 | ||
mcaid_ben: 0.0 | ||
mcare_ben: 0.0 | ||
n1820: 18744271.849999998 | ||
n21: 266997611.39000002 | ||
n24: 77430155.88999997 | ||
nu06: 28512728.69 | ||
nu13: 65413305.05000002 | ||
nu18: 94116118.29 | ||
other_ben: 0.0 | ||
p08000: 209646233.93069997 | ||
p22250: -72875477887.86668 | ||
p23250: 1188459035726.539 | ||
pencon_p: 256173893908.90164 | ||
pencon_s: 115418682766.19753 | ||
s006: 385270311428.83453 | ||
snap_ben: 0.0 | ||
ssi_ben: 0.0 | ||
tanf_ben: 0.0 | ||
vet_ben: 0.0 | ||
wic_ben: 0.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,85 @@ | ||
import os | ||
import pytest | ||
import yaml | ||
from pathlib import Path | ||
|
||
test_mode = os.environ.get("TEST_MODE", "lite") | ||
|
||
FOLDER = Path(__file__).parent | ||
with open(FOLDER / "tc_variable_totals.yaml") as f: | ||
tc_variable_totals = yaml.safe_load(f) | ||
|
||
def test_flat_file_runs(): | ||
with open( | ||
FOLDER.parent | ||
/ "tax_microdata_benchmarking" | ||
/ "taxcalc_variable_metadata.yaml" | ||
) as f: | ||
taxcalc_variable_metadata = yaml.safe_load(f) | ||
|
||
EXEMPTED_VARIABLES = [ | ||
"DSI", # Issue here but deprioritized. | ||
"EIC", # PUF-PE file almost certainly more correct by including CPS data | ||
"MIDR", # Issue here but deprioritized. | ||
"RECID", # No reason to compare. | ||
"a_lineno", # No reason to compare. | ||
"agi_bin", # No reason to compare. | ||
"blind_spouse", # Issue here but deprioritized. | ||
"cmbtp", # No reason to compare. | ||
"data_source", # No reason to compare. | ||
"s006", # No reason to compare. | ||
"h_seq", # No reason to compare. | ||
"fips", # No reason to compare. | ||
"ffpos", # No reason to compare. | ||
"p23250", # PE-PUF likely closer to truth than taxdata (needs triple check). | ||
"e01200", # Unknown but deprioritized for now. | ||
"e17500", # Unknown but deprioritized for now. | ||
"e18500", # Unknown but deprioritized for now. | ||
"e02100", # Farm income, unsure who's closer. | ||
] | ||
|
||
# Exempt any variable split between filer and spouse for now. | ||
EXEMPTED_VARIABLES += [ | ||
variable | ||
for variable in taxcalc_variable_metadata["read"] | ||
if variable.endswith("p") or variable.endswith("s") | ||
] | ||
|
||
|
||
def pytest_namespace(): | ||
return {"flat_file": None} | ||
|
||
|
||
@pytest.mark.dependency() | ||
def test_flat_file_builds(): | ||
from tax_microdata_benchmarking.create_flat_file import ( | ||
create_stacked_flat_file, | ||
) | ||
|
||
create_stacked_flat_file(2021, reweight=test_mode == "full") | ||
flat_file = create_stacked_flat_file(2021, reweight=test_mode == "full") | ||
|
||
pytest.flat_file = flat_file | ||
|
||
|
||
variables_to_test = [ | ||
variable | ||
for variable in tc_variable_totals.keys() | ||
if variable not in EXEMPTED_VARIABLES | ||
] | ||
|
||
|
||
@pytest.mark.dependency(depends=["test_flat_file_builds"]) | ||
@pytest.mark.parametrize("variable", variables_to_test) | ||
def test_tc_variable_totals(variable): | ||
meta = taxcalc_variable_metadata["read"][variable] | ||
name = meta.get("desc") | ||
flat_file = pytest.flat_file | ||
weight = flat_file.s006 | ||
total = (flat_file[variable] * weight).sum() | ||
if tc_variable_totals[variable] == 0: | ||
# If the taxdata file has a zero total, we'll assume the PE file is still correct. | ||
return | ||
# 20% and more than 10bn off taxdata is a failure. | ||
assert ( | ||
abs(total / tc_variable_totals[variable] - 1) < 0.45 | ||
or abs(total / 1e9 - tc_variable_totals[variable] / 1e9) < 30 | ||
), f"{variable} ({name}) differs to tax-data by {total / tc_variable_totals[variable] - 1:.1%} ({total/1e9:.1f}bn vs {tc_variable_totals[variable]/1e9:.1f}bn)" |