Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update from forked repo #11

Merged
merged 5 commits into from
Feb 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions taxdata/puf/dcpentargetamt2011.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
,total,u26,26u35,35u45,45u55,55u60,60u65,65u75,75plus
total,220.5554,3.2668,25.0779,49.9194,70.3845,37.9728,23.3431,9.9572,0.6337
u5K,1.1581,0.0411,0.0841,0.1733,0.3118,0.1667,0.2209,0.1563,0.0040
5u10K,1.1304,0.0743,0.1418,0.1530,0.2795,0.1545,0.1692,0.1414,0.0166
10u15K,1.7084,0.1143,0.2178,0.2361,0.3626,0.3336,0.2293,0.1764,0.0384
15u20K,2.8057,0.2239,0.3736,0.4198,0.7278,0.3787,0.4233,0.2336,0.0249
20u25K,4.3694,0.2873,0.6468,0.6841,1.0589,0.6781,0.6144,0.3710,0.0287
25u30K,5.6387,0.2935,0.9197,1.0006,1.3722,1.1272,0.6375,0.2767,0.0112
30u40K,16.1258,0.5373,2.6527,3.0594,4.3521,2.7221,1.9665,0.7954,0.0402
40u50K,19.1377,0.4829,3.0644,4.0657,5.6255,3.0436,1.9112,0.8700,0.0743
50u75K,48.6375,0.7072,6.5134,10.8502,14.9928,8.2860,5.1546,2.0320,0.1013
75Ku0.1M,37.2251,0.2379,4.2673,9.0029,12.1241,6.3733,3.8528,1.3138,0.0529
0.1u0.2M,55.5740,0.2371,4.6679,13.9483,18.9280,9.8799,5.5415,2.2351,0.1361
0.2u0.5M,21.2800,0.0211,1.3460,5.0850,8.0092,3.7434,1.9665,1.0250,0.0838
0.5u1.0M,4.2527,0.0022,0.1305,0.9381,1.6156,0.8203,0.4980,0.2246,0.0063
1.0u1.5M,0.8252,0.0022,0.0276,0.1866,0.3378,0.1288,0.0812,0.0632,0.0063
1.5u2.0M,0.2767,0.0022,0.0071,0.0511,0.1204,0.0507,0.0297,0.0176,0.0063
2u5M,0.3171,0.0010,0.0124,0.0535,0.1321,0.0620,0.0354,0.0189,0.0017
5Mplus,0.0930,0.0013,0.0046,0.0117,0.0340,0.0240,0.0111,0.0058,0.0006
5u10M,0.0674,0.0010,0.0035,0.0088,0.0231,0.0194,0.0074,0.0038,0.0005
10u30M,0.0256,0.0003,0.0011,0.0029,0.0109,0.0046,0.0037,0.0020,0.0001
21 changes: 21 additions & 0 deletions taxdata/puf/dcpentargetcnt2011.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
,total,u26,26u35,35u45,45u55,55u60,60u65,65u75,75plus
total,46.9781,2.7796,8.9035,11.2926,12.9201,5.8450,3.5901,1.5212,0.1261
u5K,1.1298,0.2766,0.2415,0.1942,0.1904,0.0785,0.0679,0.0673,0.0135
5u10K,1.3069,0.3113,0.3088,0.2089,0.2054,0.0926,0.0908,0.0747,0.0145
10u15K,1.6370,0.3211,0.3539,0.2724,0.2990,0.1508,0.1208,0.0995,0.0195
15u20K,2.1262,0.3323,0.4603,0.3810,0.4441,0.1947,0.1833,0.1186,0.0119
20u25K,2.7738,0.3519,0.6175,0.5553,0.5872,0.3019,0.2284,0.1194,0.0122
25u30K,3.1692,0.2899,0.7167,0.6741,0.7544,0.3936,0.2300,0.1063,0.0041
30u40K,6.7748,0.3608,1.5822,1.5853,1.7121,0.8025,0.5353,0.1868,0.0098
40u50K,6.0192,0.2355,1.3411,1.4875,1.6533,0.7287,0.4167,0.1461,0.0103
50u75K,10.1723,0.2178,1.8331,2.6402,3.0594,1.3791,0.7824,0.2488,0.0114
75Ku0.1M,5.0471,0.0486,0.7485,1.3961,1.6301,0.7069,0.3885,0.1237,0.0046
0.1u0.2M,5.1803,0.0309,0.5872,1.4571,1.7622,0.7604,0.4110,0.1627,0.0087
0.2u0.5M,1.3446,0.0024,0.1010,0.3668,0.5043,0.2062,0.1064,0.0530,0.0046
0.5u1.0M,0.2197,0.0001,0.0085,0.0563,0.0855,0.0370,0.0213,0.0098,0.0003
1.0u1.5M,0.0416,0.0001,0.0016,0.0105,0.0174,0.0059,0.0037,0.0025,0.0003
1.5u2.0M,0.0141,0.0001,0.0005,0.0030,0.0062,0.0024,0.0013,0.0008,0.0003
2u5M,0.0170,0.0001,0.0008,0.0033,0.0072,0.0030,0.0017,0.0009,0.0001
5Mplus,0.0046,0.00006,0.0003,0.0007,0.0018,0.0008,0.0006,0.0003,0.00003
5u10M,0.0033,0.00004,0.0002,0.0005,0.0013,0.0006,0.0004,0.0002,0.00002
10u30M,0.0013,0.00002,0.0001,0.0002,0.0005,0.0002,0.0002,0.0001,0.00001
5 changes: 4 additions & 1 deletion taxdata/puf/finalprep.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,14 @@ def finalprep(data):
data = data.round(0).astype("int64")

# - Impute pension contributions:
data = impute_pension_contributions(data.copy())
data = impute_pension_contributions(data.copy(), max_flpdyr)

# - Rename 'filer' to 'data_source'
data = data.rename(columns={"filer": "data_source"})

# - Sort columns to ensure every PUF is the same
data.sort_index(axis=1, inplace=True)

return data


Expand Down
66 changes: 7 additions & 59 deletions taxdata/puf/impute_pencon.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,19 +42,21 @@
import sys
import numpy as np
import pandas as pd
from pathlib import Path

if sys.version_info[0] < 3:
from StringIO import StringIO
else:
from io import StringIO


CURPATH = Path(__file__).resolve().parent
DUMP0 = False
DUMP1 = False
DUMP2 = False


def targets():
def targets(year):
"""
Return a DataFrame containing number of taxpayers & spouses with earnings
that make a pension contribution (in millions of people) and a DataFrame
Expand All @@ -68,62 +70,8 @@ def targets():
revised data specified here. Also, the top two wage groups (5u10M and
10u30M) are combined into a single group (5Mplus).
"""
cnt = """
, total, u26, 26u35, 35u45, 45u55, 55u60, 60u65, 65u75,75plus
total ,46.9781,2.7796,8.9035,11.2926,12.9201,5.8450,3.5901,1.5212,0.1261
u5K ,1.1298,0.2766,0.2415,0.1942,0.1904,0.0785,0.0679,0.0673,0.0135
5u10K ,1.3069,0.3113,0.3088,0.2089,0.2054,0.0926,0.0908,0.0747,0.0145
10u15K ,1.6370,0.3211,0.3539,0.2724,0.2990,0.1508,0.1208,0.0995,0.0195
15u20K ,2.1262,0.3323,0.4603,0.3810,0.4441,0.1947,0.1833,0.1186,0.0119
20u25K ,2.7738,0.3519,0.6175,0.5553,0.5872,0.3019,0.2284,0.1194,0.0122
25u30K ,3.1692,0.2899,0.7167,0.6741,0.7544,0.3936,0.2300,0.1063,0.0041
30u40K ,6.7748,0.3608,1.5822,1.5853,1.7121,0.8025,0.5353,0.1868,0.0098
40u50K ,6.0192,0.2355,1.3411,1.4875,1.6533,0.7287,0.4167,0.1461,0.0103
50u75K ,10.1723,0.2178,1.8331,2.6402,3.0594,1.3791,0.7824,0.2488,0.0114
75Ku0.1M,5.0471,0.0486,0.7485,1.3961,1.6301,0.7069,0.3885,0.1237,0.0046
0.1u0.2M,5.1803,0.0309,0.5872,1.4571,1.7622,0.7604,0.4110,0.1627,0.0087
0.2u0.5M,1.3446,0.0024,0.1010,0.3668,0.5043,0.2062,0.1064,0.0530,0.0046
0.5u1.0M,0.2197,0.0001,0.0085,0.0563,0.0855,0.0370,0.0213,0.0098,0.0003
1.0u1.5M,0.0416,0.0001,0.0016,0.0105,0.0174,0.0059,0.0037,0.0025,0.0003
1.5u2.0M,0.0141,0.0001,0.0005,0.0030,0.0062,0.0024,0.0013,0.0008,0.0003
2u5M ,0.0170,0.0001,0.0008,0.0033,0.0072,0.0030,0.0017,0.0009,0.0001
5Mplus ,0.0046,0.00006,0.0003,0.0007,0.0018,0.0008,0.0006,0.0003,0.00003
"""
"""
5u10M ,0.0033,0.00004,0.0002,0.0005,0.0013,0.0006,0.0004,0.0002,0.00002
10u30M ,0.0013,0.00002,0.0001,0.0002,0.0005,0.0002,0.0002,0.0001,0.00001
"""
amt = """
, total, u26, 26u35, 35u45, 45u55, 55u60, 60u65, 65u75,75plus
total ,220.5554,3.2668,25.0779,49.9194,70.3845,37.9728,23.3431,9.9572,0.6337
u5K ,1.1581,0.0411,0.0841,0.1733,0.3118,0.1667,0.2209,0.1563,0.0040
5u10K ,1.1304,0.0743,0.1418,0.1530,0.2795,0.1545,0.1692,0.1414,0.0166
10u15K ,1.7084,0.1143,0.2178,0.2361,0.3626,0.3336,0.2293,0.1764,0.0384
15u20K ,2.8057,0.2239,0.3736,0.4198,0.7278,0.3787,0.4233,0.2336,0.0249
20u25K ,4.3694,0.2873,0.6468,0.6841,1.0589,0.6781,0.6144,0.3710,0.0287
25u30K ,5.6387,0.2935,0.9197,1.0006,1.3722,1.1272,0.6375,0.2767,0.0112
30u40K ,16.1258,0.5373,2.6527,3.0594,4.3521,2.7221,1.9665,0.7954,0.0402
40u50K ,19.1377,0.4829,3.0644,4.0657,5.6255,3.0436,1.9112,0.8700,0.0743
50u75K ,48.6375,0.7072,6.5134,10.8502,14.9928,8.2860,5.1546,2.0320,0.1013
75Ku0.1M,37.2251,0.2379,4.2673,9.0029,12.1241,6.3733,3.8528,1.3138,0.0529
0.1u0.2M,55.5740,0.2371,4.6679,13.9483,18.9280,9.8799,5.5415,2.2351,0.1361
0.2u0.5M,21.2800,0.0211,1.3460,5.0850,8.0092,3.7434,1.9665,1.0250,0.0838
0.5u1.0M,4.2527,0.0022,0.1305,0.9381,1.6156,0.8203,0.4980,0.2246,0.0063
1.0u1.5M,0.8252,0.0022,0.0276,0.1866,0.3378,0.1288,0.0812,0.0632,0.0063
1.5u2.0M,0.2767,0.0022,0.0071,0.0511,0.1204,0.0507,0.0297,0.0176,0.0063
2u5M ,0.3171,0.0010,0.0124,0.0535,0.1321,0.0620,0.0354,0.0189,0.0017
5Mplus ,0.0930,0.0013,0.0046,0.0117,0.0340,0.0240,0.0111,0.0058,0.0006
"""
"""
5u10M ,0.0674,0.0010,0.0035,0.0088,0.0231,0.0194,0.0074,0.0038,0.0005
10u30M ,0.0256,0.0003,0.0011,0.0029,0.0109,0.0046,0.0037,0.0020,0.0001
"""
cnt_df = pd.read_csv(StringIO(cnt), index_col=0)
cnt_df.columns = [name.strip() for name in cnt_df.columns]
cnt_df.index = [name.strip() for name in cnt_df.index]
amt_df = pd.read_csv(StringIO(amt), index_col=0)
amt_df.columns = [name.strip() for name in amt_df.columns]
amt_df.index = [name.strip() for name in amt_df.index]
cnt_df = pd.read_csv(Path(CURPATH, f"dcpentargetcnt{year}.csv"), index_col=0)
amt_df = pd.read_csv(Path(CURPATH, f"dcpentargetamt{year}.csv"), index_col=0)
return cnt_df, amt_df


Expand Down Expand Up @@ -275,14 +223,14 @@ def impute(idata, target_cnt, target_amt):
# end of impute() function


def impute_pension_contributions(alldata):
def impute_pension_contributions(alldata, year):
"""
Main function in impute_pencon.py file.
Argument: puf.csv DataFrame just before imputation is done.
Returns: puf.csv DataFrame with imputed pension contribution amounts.
"""
# specify target DataFrames with total column and total row removed
target_cnt, target_amt = targets()
target_cnt, target_amt = targets(year)
target_cnt.drop(labels="total", axis="index", inplace=True)
target_cnt.drop(labels="total", axis="columns", inplace=True)
target_amt.drop(labels="total", axis="index", inplace=True)
Expand Down
Loading