Skip to content

Commit

Permalink
Merge pull request #435 from andersonfrailey/imputepenconcsv
Browse files Browse the repository at this point in the history
Move impute_pencon targets to csv
  • Loading branch information
andersonfrailey authored Sep 15, 2023
2 parents bab4bf8 + f23af3f commit 88e00d0
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 60 deletions.
21 changes: 21 additions & 0 deletions taxdata/puf/dcpentargetamt2011.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
,total,u26,26u35,35u45,45u55,55u60,60u65,65u75,75plus
total,220.5554,3.2668,25.0779,49.9194,70.3845,37.9728,23.3431,9.9572,0.6337
u5K,1.1581,0.0411,0.0841,0.1733,0.3118,0.1667,0.2209,0.1563,0.0040
5u10K,1.1304,0.0743,0.1418,0.1530,0.2795,0.1545,0.1692,0.1414,0.0166
10u15K,1.7084,0.1143,0.2178,0.2361,0.3626,0.3336,0.2293,0.1764,0.0384
15u20K,2.8057,0.2239,0.3736,0.4198,0.7278,0.3787,0.4233,0.2336,0.0249
20u25K,4.3694,0.2873,0.6468,0.6841,1.0589,0.6781,0.6144,0.3710,0.0287
25u30K,5.6387,0.2935,0.9197,1.0006,1.3722,1.1272,0.6375,0.2767,0.0112
30u40K,16.1258,0.5373,2.6527,3.0594,4.3521,2.7221,1.9665,0.7954,0.0402
40u50K,19.1377,0.4829,3.0644,4.0657,5.6255,3.0436,1.9112,0.8700,0.0743
50u75K,48.6375,0.7072,6.5134,10.8502,14.9928,8.2860,5.1546,2.0320,0.1013
75Ku0.1M,37.2251,0.2379,4.2673,9.0029,12.1241,6.3733,3.8528,1.3138,0.0529
0.1u0.2M,55.5740,0.2371,4.6679,13.9483,18.9280,9.8799,5.5415,2.2351,0.1361
0.2u0.5M,21.2800,0.0211,1.3460,5.0850,8.0092,3.7434,1.9665,1.0250,0.0838
0.5u1.0M,4.2527,0.0022,0.1305,0.9381,1.6156,0.8203,0.4980,0.2246,0.0063
1.0u1.5M,0.8252,0.0022,0.0276,0.1866,0.3378,0.1288,0.0812,0.0632,0.0063
1.5u2.0M,0.2767,0.0022,0.0071,0.0511,0.1204,0.0507,0.0297,0.0176,0.0063
2u5M,0.3171,0.0010,0.0124,0.0535,0.1321,0.0620,0.0354,0.0189,0.0017
5Mplus,0.0930,0.0013,0.0046,0.0117,0.0340,0.0240,0.0111,0.0058,0.0006
5u10M,0.0674,0.0010,0.0035,0.0088,0.0231,0.0194,0.0074,0.0038,0.0005
10u30M,0.0256,0.0003,0.0011,0.0029,0.0109,0.0046,0.0037,0.0020,0.0001
21 changes: 21 additions & 0 deletions taxdata/puf/dcpentargetcnt2011.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
,total,u26,26u35,35u45,45u55,55u60,60u65,65u75,75plus
total,46.9781,2.7796,8.9035,11.2926,12.9201,5.8450,3.5901,1.5212,0.1261
u5K,1.1298,0.2766,0.2415,0.1942,0.1904,0.0785,0.0679,0.0673,0.0135
5u10K,1.3069,0.3113,0.3088,0.2089,0.2054,0.0926,0.0908,0.0747,0.0145
10u15K,1.6370,0.3211,0.3539,0.2724,0.2990,0.1508,0.1208,0.0995,0.0195
15u20K,2.1262,0.3323,0.4603,0.3810,0.4441,0.1947,0.1833,0.1186,0.0119
20u25K,2.7738,0.3519,0.6175,0.5553,0.5872,0.3019,0.2284,0.1194,0.0122
25u30K,3.1692,0.2899,0.7167,0.6741,0.7544,0.3936,0.2300,0.1063,0.0041
30u40K,6.7748,0.3608,1.5822,1.5853,1.7121,0.8025,0.5353,0.1868,0.0098
40u50K,6.0192,0.2355,1.3411,1.4875,1.6533,0.7287,0.4167,0.1461,0.0103
50u75K,10.1723,0.2178,1.8331,2.6402,3.0594,1.3791,0.7824,0.2488,0.0114
75Ku0.1M,5.0471,0.0486,0.7485,1.3961,1.6301,0.7069,0.3885,0.1237,0.0046
0.1u0.2M,5.1803,0.0309,0.5872,1.4571,1.7622,0.7604,0.4110,0.1627,0.0087
0.2u0.5M,1.3446,0.0024,0.1010,0.3668,0.5043,0.2062,0.1064,0.0530,0.0046
0.5u1.0M,0.2197,0.0001,0.0085,0.0563,0.0855,0.0370,0.0213,0.0098,0.0003
1.0u1.5M,0.0416,0.0001,0.0016,0.0105,0.0174,0.0059,0.0037,0.0025,0.0003
1.5u2.0M,0.0141,0.0001,0.0005,0.0030,0.0062,0.0024,0.0013,0.0008,0.0003
2u5M,0.0170,0.0001,0.0008,0.0033,0.0072,0.0030,0.0017,0.0009,0.0001
5Mplus,0.0046,0.00006,0.0003,0.0007,0.0018,0.0008,0.0006,0.0003,0.00003
5u10M,0.0033,0.00004,0.0002,0.0005,0.0013,0.0006,0.0004,0.0002,0.00002
10u30M,0.0013,0.00002,0.0001,0.0002,0.0005,0.0002,0.0002,0.0001,0.00001
2 changes: 1 addition & 1 deletion taxdata/puf/finalprep.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def finalprep(data):
data = data.round(0).astype("int64")

# - Impute pension contributions:
data = impute_pension_contributions(data.copy())
data = impute_pension_contributions(data.copy(), max_flpdyr)

# - Rename 'filer' to 'data_source'
data = data.rename(columns={"filer": "data_source"})
Expand Down
66 changes: 7 additions & 59 deletions taxdata/puf/impute_pencon.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,19 +42,21 @@
import sys
import numpy as np
import pandas as pd
from pathlib import Path

if sys.version_info[0] < 3:
from StringIO import StringIO
else:
from io import StringIO


CURPATH = Path(__file__).resolve().parent
DUMP0 = False
DUMP1 = False
DUMP2 = False


def targets():
def targets(year):
"""
Return a DataFrame containing number of taxpayers & spouses with earnings
that make a pension contribution (in millions of people) and a DataFrame
Expand All @@ -68,62 +70,8 @@ def targets():
revised data specified here. Also, the top two wage groups (5u10M and
10u30M) are combined into a single group (5Mplus).
"""
cnt = """
, total, u26, 26u35, 35u45, 45u55, 55u60, 60u65, 65u75,75plus
total ,46.9781,2.7796,8.9035,11.2926,12.9201,5.8450,3.5901,1.5212,0.1261
u5K ,1.1298,0.2766,0.2415,0.1942,0.1904,0.0785,0.0679,0.0673,0.0135
5u10K ,1.3069,0.3113,0.3088,0.2089,0.2054,0.0926,0.0908,0.0747,0.0145
10u15K ,1.6370,0.3211,0.3539,0.2724,0.2990,0.1508,0.1208,0.0995,0.0195
15u20K ,2.1262,0.3323,0.4603,0.3810,0.4441,0.1947,0.1833,0.1186,0.0119
20u25K ,2.7738,0.3519,0.6175,0.5553,0.5872,0.3019,0.2284,0.1194,0.0122
25u30K ,3.1692,0.2899,0.7167,0.6741,0.7544,0.3936,0.2300,0.1063,0.0041
30u40K ,6.7748,0.3608,1.5822,1.5853,1.7121,0.8025,0.5353,0.1868,0.0098
40u50K ,6.0192,0.2355,1.3411,1.4875,1.6533,0.7287,0.4167,0.1461,0.0103
50u75K ,10.1723,0.2178,1.8331,2.6402,3.0594,1.3791,0.7824,0.2488,0.0114
75Ku0.1M,5.0471,0.0486,0.7485,1.3961,1.6301,0.7069,0.3885,0.1237,0.0046
0.1u0.2M,5.1803,0.0309,0.5872,1.4571,1.7622,0.7604,0.4110,0.1627,0.0087
0.2u0.5M,1.3446,0.0024,0.1010,0.3668,0.5043,0.2062,0.1064,0.0530,0.0046
0.5u1.0M,0.2197,0.0001,0.0085,0.0563,0.0855,0.0370,0.0213,0.0098,0.0003
1.0u1.5M,0.0416,0.0001,0.0016,0.0105,0.0174,0.0059,0.0037,0.0025,0.0003
1.5u2.0M,0.0141,0.0001,0.0005,0.0030,0.0062,0.0024,0.0013,0.0008,0.0003
2u5M ,0.0170,0.0001,0.0008,0.0033,0.0072,0.0030,0.0017,0.0009,0.0001
5Mplus ,0.0046,0.00006,0.0003,0.0007,0.0018,0.0008,0.0006,0.0003,0.00003
"""
"""
5u10M ,0.0033,0.00004,0.0002,0.0005,0.0013,0.0006,0.0004,0.0002,0.00002
10u30M ,0.0013,0.00002,0.0001,0.0002,0.0005,0.0002,0.0002,0.0001,0.00001
"""
amt = """
, total, u26, 26u35, 35u45, 45u55, 55u60, 60u65, 65u75,75plus
total ,220.5554,3.2668,25.0779,49.9194,70.3845,37.9728,23.3431,9.9572,0.6337
u5K ,1.1581,0.0411,0.0841,0.1733,0.3118,0.1667,0.2209,0.1563,0.0040
5u10K ,1.1304,0.0743,0.1418,0.1530,0.2795,0.1545,0.1692,0.1414,0.0166
10u15K ,1.7084,0.1143,0.2178,0.2361,0.3626,0.3336,0.2293,0.1764,0.0384
15u20K ,2.8057,0.2239,0.3736,0.4198,0.7278,0.3787,0.4233,0.2336,0.0249
20u25K ,4.3694,0.2873,0.6468,0.6841,1.0589,0.6781,0.6144,0.3710,0.0287
25u30K ,5.6387,0.2935,0.9197,1.0006,1.3722,1.1272,0.6375,0.2767,0.0112
30u40K ,16.1258,0.5373,2.6527,3.0594,4.3521,2.7221,1.9665,0.7954,0.0402
40u50K ,19.1377,0.4829,3.0644,4.0657,5.6255,3.0436,1.9112,0.8700,0.0743
50u75K ,48.6375,0.7072,6.5134,10.8502,14.9928,8.2860,5.1546,2.0320,0.1013
75Ku0.1M,37.2251,0.2379,4.2673,9.0029,12.1241,6.3733,3.8528,1.3138,0.0529
0.1u0.2M,55.5740,0.2371,4.6679,13.9483,18.9280,9.8799,5.5415,2.2351,0.1361
0.2u0.5M,21.2800,0.0211,1.3460,5.0850,8.0092,3.7434,1.9665,1.0250,0.0838
0.5u1.0M,4.2527,0.0022,0.1305,0.9381,1.6156,0.8203,0.4980,0.2246,0.0063
1.0u1.5M,0.8252,0.0022,0.0276,0.1866,0.3378,0.1288,0.0812,0.0632,0.0063
1.5u2.0M,0.2767,0.0022,0.0071,0.0511,0.1204,0.0507,0.0297,0.0176,0.0063
2u5M ,0.3171,0.0010,0.0124,0.0535,0.1321,0.0620,0.0354,0.0189,0.0017
5Mplus ,0.0930,0.0013,0.0046,0.0117,0.0340,0.0240,0.0111,0.0058,0.0006
"""
"""
5u10M ,0.0674,0.0010,0.0035,0.0088,0.0231,0.0194,0.0074,0.0038,0.0005
10u30M ,0.0256,0.0003,0.0011,0.0029,0.0109,0.0046,0.0037,0.0020,0.0001
"""
cnt_df = pd.read_csv(StringIO(cnt), index_col=0)
cnt_df.columns = [name.strip() for name in cnt_df.columns]
cnt_df.index = [name.strip() for name in cnt_df.index]
amt_df = pd.read_csv(StringIO(amt), index_col=0)
amt_df.columns = [name.strip() for name in amt_df.columns]
amt_df.index = [name.strip() for name in amt_df.index]
cnt_df = pd.read_csv(Path(CURPATH, f"dcpentargetcnt{year}.csv"), index_col=0)
amt_df = pd.read_csv(Path(CURPATH, f"dcpentargetamt{year}.csv"), index_col=0)
return cnt_df, amt_df


Expand Down Expand Up @@ -275,14 +223,14 @@ def impute(idata, target_cnt, target_amt):
# end of impute() function


def impute_pension_contributions(alldata):
def impute_pension_contributions(alldata, year):
"""
Main function in impute_pencon.py file.
Argument: puf.csv DataFrame just before imputation is done.
Returns: puf.csv DataFrame with imputed pension contribution amounts.
"""
# specify target DataFrames with total column and total row removed
target_cnt, target_amt = targets()
target_cnt, target_amt = targets(year)
target_cnt.drop(labels="total", axis="index", inplace=True)
target_cnt.drop(labels="total", axis="columns", inplace=True)
target_amt.drop(labels="total", axis="index", inplace=True)
Expand Down

0 comments on commit 88e00d0

Please sign in to comment.