From 13c885125a5923f05bb2873a0802cd3b807b9b26 Mon Sep 17 00:00:00 2001 From: andersonfrailey Date: Wed, 13 Sep 2023 10:27:39 -0400 Subject: [PATCH 1/3] move impute pencon targets to csv --- taxdata/puf/dcpentargetamt2011.csv | 21 ++++++++++ taxdata/puf/dcpentargetcnt2011.csv | 21 ++++++++++ taxdata/puf/finalprep.py | 2 +- taxdata/puf/impute_pencon.py | 66 ++++-------------------------- 4 files changed, 50 insertions(+), 60 deletions(-) create mode 100644 taxdata/puf/dcpentargetamt2011.csv create mode 100644 taxdata/puf/dcpentargetcnt2011.csv diff --git a/taxdata/puf/dcpentargetamt2011.csv b/taxdata/puf/dcpentargetamt2011.csv new file mode 100644 index 00000000..8b03f4bd --- /dev/null +++ b/taxdata/puf/dcpentargetamt2011.csv @@ -0,0 +1,21 @@ +,total,u26,26u35,35u45,45u55,55u60,60u65,65u75,75plus +total,220.5554,3.2668,25.0779,49.9194,70.3845,37.9728,23.3431,9.9572,0.6337 +u5K,1.1581,0.0411,0.0841,0.1733,0.3118,0.1667,0.2209,0.1563,0.0040 +5u10K,1.1304,0.0743,0.1418,0.1530,0.2795,0.1545,0.1692,0.1414,0.0166 +10u15K,1.7084,0.1143,0.2178,0.2361,0.3626,0.3336,0.2293,0.1764,0.0384 +15u20K,2.8057,0.2239,0.3736,0.4198,0.7278,0.3787,0.4233,0.2336,0.0249 +20u25K,4.3694,0.2873,0.6468,0.6841,1.0589,0.6781,0.6144,0.3710,0.0287 +25u30K,5.6387,0.2935,0.9197,1.0006,1.3722,1.1272,0.6375,0.2767,0.0112 +30u40K,16.1258,0.5373,2.6527,3.0594,4.3521,2.7221,1.9665,0.7954,0.0402 +40u50K,19.1377,0.4829,3.0644,4.0657,5.6255,3.0436,1.9112,0.8700,0.0743 +50u75K,48.6375,0.7072,6.5134,10.8502,14.9928,8.2860,5.1546,2.0320,0.1013 +75Ku0.1M,37.2251,0.2379,4.2673,9.0029,12.1241,6.3733,3.8528,1.3138,0.0529 +0.1u0.2M,55.5740,0.2371,4.6679,13.9483,18.9280,9.8799,5.5415,2.2351,0.1361 +0.2u0.5M,21.2800,0.0211,1.3460,5.0850,8.0092,3.7434,1.9665,1.0250,0.0838 +0.5u1.0M,4.2527,0.0022,0.1305,0.9381,1.6156,0.8203,0.4980,0.2246,0.0063 +1.0u1.5M,0.8252,0.0022,0.0276,0.1866,0.3378,0.1288,0.0812,0.0632,0.0063 +1.5u2.0M,0.2767,0.0022,0.0071,0.0511,0.1204,0.0507,0.0297,0.0176,0.0063 +2u5M,0.3171,0.0010,0.0124,0.0535,0.1321,0.0620,0.0354,0.0189,0.0017 +5Mplus,0.0930,0.0013,0.0046,0.0117,0.0340,0.0240,0.0111,0.0058,0.0006 +5u10M,0.0674,0.0010,0.0035,0.0088,0.0231,0.0194,0.0074,0.0038,0.0005 +10u30M,0.0256,0.0003,0.0011,0.0029,0.0109,0.0046,0.0037,0.0020,0.0001 \ No newline at end of file diff --git a/taxdata/puf/dcpentargetcnt2011.csv b/taxdata/puf/dcpentargetcnt2011.csv new file mode 100644 index 00000000..28750636 --- /dev/null +++ b/taxdata/puf/dcpentargetcnt2011.csv @@ -0,0 +1,21 @@ +,total,u26,26u35,35u45,45u55,55u60,60u65,65u75,75plus +total,46.9781,2.7796,8.9035,11.2926,12.9201,5.8450,3.5901,1.5212,0.1261 +u5K,1.1298,0.2766,0.2415,0.1942,0.1904,0.0785,0.0679,0.0673,0.0135 +5u10K,1.3069,0.3113,0.3088,0.2089,0.2054,0.0926,0.0908,0.0747,0.0145 +10u15K,1.6370,0.3211,0.3539,0.2724,0.2990,0.1508,0.1208,0.0995,0.0195 +15u20K,2.1262,0.3323,0.4603,0.3810,0.4441,0.1947,0.1833,0.1186,0.0119 +20u25K,2.7738,0.3519,0.6175,0.5553,0.5872,0.3019,0.2284,0.1194,0.0122 +25u30K,3.1692,0.2899,0.7167,0.6741,0.7544,0.3936,0.2300,0.1063,0.0041 +30u40K,6.7748,0.3608,1.5822,1.5853,1.7121,0.8025,0.5353,0.1868,0.0098 +40u50K,6.0192,0.2355,1.3411,1.4875,1.6533,0.7287,0.4167,0.1461,0.0103 +50u75K,10.1723,0.2178,1.8331,2.6402,3.0594,1.3791,0.7824,0.2488,0.0114 +75Ku0.1M,5.0471,0.0486,0.7485,1.3961,1.6301,0.7069,0.3885,0.1237,0.0046 +0.1u0.2M,5.1803,0.0309,0.5872,1.4571,1.7622,0.7604,0.4110,0.1627,0.0087 +0.2u0.5M,1.3446,0.0024,0.1010,0.3668,0.5043,0.2062,0.1064,0.0530,0.0046 +0.5u1.0M,0.2197,0.0001,0.0085,0.0563,0.0855,0.0370,0.0213,0.0098,0.0003 +1.0u1.5M,0.0416,0.0001,0.0016,0.0105,0.0174,0.0059,0.0037,0.0025,0.0003 +1.5u2.0M,0.0141,0.0001,0.0005,0.0030,0.0062,0.0024,0.0013,0.0008,0.0003 +2u5M,0.0170,0.0001,0.0008,0.0033,0.0072,0.0030,0.0017,0.0009,0.0001 +5Mplus,0.0046,0.00006,0.0003,0.0007,0.0018,0.0008,0.0006,0.0003,0.00003 +5u10M,0.0033,0.00004,0.0002,0.0005,0.0013,0.0006,0.0004,0.0002,0.00002 +10u30M,0.0013,0.00002,0.0001,0.0002,0.0005,0.0002,0.0002,0.0001,0.00001 \ No newline at end of file diff --git a/taxdata/puf/finalprep.py b/taxdata/puf/finalprep.py index aa41ec92..cc143af1 100644 --- a/taxdata/puf/finalprep.py +++ b/taxdata/puf/finalprep.py @@ -69,7 +69,7 @@ def finalprep(data): data = data.round(0).astype("int64") # - Impute pension contributions: - data = impute_pension_contributions(data.copy()) + data = impute_pension_contributions(data.copy(), max_flpdyr) # - Rename 'filer' to 'data_source' data = data.rename(columns={"filer": "data_source"}) diff --git a/taxdata/puf/impute_pencon.py b/taxdata/puf/impute_pencon.py index 35c1c0c4..e6a0deeb 100644 --- a/taxdata/puf/impute_pencon.py +++ b/taxdata/puf/impute_pencon.py @@ -42,6 +42,7 @@ import sys import numpy as np import pandas as pd +from pathlib import Path if sys.version_info[0] < 3: from StringIO import StringIO @@ -49,12 +50,13 @@ from io import StringIO +CURPATH = Path(__file__).resolve().parent DUMP0 = False DUMP1 = False DUMP2 = False -def targets(): +def targets(year): """ Return a DataFrame containing number of taxpayers & spouses with earnings that make a pension contribution (in millions of people) and a DataFrame @@ -68,62 +70,8 @@ def targets(): revised data specified here. Also, the top two wage groups (5u10M and 10u30M) are combined into a single group (5Mplus). """ - cnt = """ - , total, u26, 26u35, 35u45, 45u55, 55u60, 60u65, 65u75,75plus -total ,46.9781,2.7796,8.9035,11.2926,12.9201,5.8450,3.5901,1.5212,0.1261 -u5K ,1.1298,0.2766,0.2415,0.1942,0.1904,0.0785,0.0679,0.0673,0.0135 -5u10K ,1.3069,0.3113,0.3088,0.2089,0.2054,0.0926,0.0908,0.0747,0.0145 -10u15K ,1.6370,0.3211,0.3539,0.2724,0.2990,0.1508,0.1208,0.0995,0.0195 -15u20K ,2.1262,0.3323,0.4603,0.3810,0.4441,0.1947,0.1833,0.1186,0.0119 -20u25K ,2.7738,0.3519,0.6175,0.5553,0.5872,0.3019,0.2284,0.1194,0.0122 -25u30K ,3.1692,0.2899,0.7167,0.6741,0.7544,0.3936,0.2300,0.1063,0.0041 -30u40K ,6.7748,0.3608,1.5822,1.5853,1.7121,0.8025,0.5353,0.1868,0.0098 -40u50K ,6.0192,0.2355,1.3411,1.4875,1.6533,0.7287,0.4167,0.1461,0.0103 -50u75K ,10.1723,0.2178,1.8331,2.6402,3.0594,1.3791,0.7824,0.2488,0.0114 -75Ku0.1M,5.0471,0.0486,0.7485,1.3961,1.6301,0.7069,0.3885,0.1237,0.0046 -0.1u0.2M,5.1803,0.0309,0.5872,1.4571,1.7622,0.7604,0.4110,0.1627,0.0087 -0.2u0.5M,1.3446,0.0024,0.1010,0.3668,0.5043,0.2062,0.1064,0.0530,0.0046 -0.5u1.0M,0.2197,0.0001,0.0085,0.0563,0.0855,0.0370,0.0213,0.0098,0.0003 -1.0u1.5M,0.0416,0.0001,0.0016,0.0105,0.0174,0.0059,0.0037,0.0025,0.0003 -1.5u2.0M,0.0141,0.0001,0.0005,0.0030,0.0062,0.0024,0.0013,0.0008,0.0003 -2u5M ,0.0170,0.0001,0.0008,0.0033,0.0072,0.0030,0.0017,0.0009,0.0001 -5Mplus ,0.0046,0.00006,0.0003,0.0007,0.0018,0.0008,0.0006,0.0003,0.00003 -""" - """ -5u10M ,0.0033,0.00004,0.0002,0.0005,0.0013,0.0006,0.0004,0.0002,0.00002 -10u30M ,0.0013,0.00002,0.0001,0.0002,0.0005,0.0002,0.0002,0.0001,0.00001 - """ - amt = """ - , total, u26, 26u35, 35u45, 45u55, 55u60, 60u65, 65u75,75plus -total ,220.5554,3.2668,25.0779,49.9194,70.3845,37.9728,23.3431,9.9572,0.6337 -u5K ,1.1581,0.0411,0.0841,0.1733,0.3118,0.1667,0.2209,0.1563,0.0040 -5u10K ,1.1304,0.0743,0.1418,0.1530,0.2795,0.1545,0.1692,0.1414,0.0166 -10u15K ,1.7084,0.1143,0.2178,0.2361,0.3626,0.3336,0.2293,0.1764,0.0384 -15u20K ,2.8057,0.2239,0.3736,0.4198,0.7278,0.3787,0.4233,0.2336,0.0249 -20u25K ,4.3694,0.2873,0.6468,0.6841,1.0589,0.6781,0.6144,0.3710,0.0287 -25u30K ,5.6387,0.2935,0.9197,1.0006,1.3722,1.1272,0.6375,0.2767,0.0112 -30u40K ,16.1258,0.5373,2.6527,3.0594,4.3521,2.7221,1.9665,0.7954,0.0402 -40u50K ,19.1377,0.4829,3.0644,4.0657,5.6255,3.0436,1.9112,0.8700,0.0743 -50u75K ,48.6375,0.7072,6.5134,10.8502,14.9928,8.2860,5.1546,2.0320,0.1013 -75Ku0.1M,37.2251,0.2379,4.2673,9.0029,12.1241,6.3733,3.8528,1.3138,0.0529 -0.1u0.2M,55.5740,0.2371,4.6679,13.9483,18.9280,9.8799,5.5415,2.2351,0.1361 -0.2u0.5M,21.2800,0.0211,1.3460,5.0850,8.0092,3.7434,1.9665,1.0250,0.0838 -0.5u1.0M,4.2527,0.0022,0.1305,0.9381,1.6156,0.8203,0.4980,0.2246,0.0063 -1.0u1.5M,0.8252,0.0022,0.0276,0.1866,0.3378,0.1288,0.0812,0.0632,0.0063 -1.5u2.0M,0.2767,0.0022,0.0071,0.0511,0.1204,0.0507,0.0297,0.0176,0.0063 -2u5M ,0.3171,0.0010,0.0124,0.0535,0.1321,0.0620,0.0354,0.0189,0.0017 -5Mplus ,0.0930,0.0013,0.0046,0.0117,0.0340,0.0240,0.0111,0.0058,0.0006 -""" - """ -5u10M ,0.0674,0.0010,0.0035,0.0088,0.0231,0.0194,0.0074,0.0038,0.0005 -10u30M ,0.0256,0.0003,0.0011,0.0029,0.0109,0.0046,0.0037,0.0020,0.0001 - """ - cnt_df = pd.read_csv(StringIO(cnt), index_col=0) - cnt_df.columns = [name.strip() for name in cnt_df.columns] - cnt_df.index = [name.strip() for name in cnt_df.index] - amt_df = pd.read_csv(StringIO(amt), index_col=0) - amt_df.columns = [name.strip() for name in amt_df.columns] - amt_df.index = [name.strip() for name in amt_df.index] + cnt_df = pd.read_csv(Path(CURPATH, f'dcpentargetcnt{year}.csv'), index_col=0) + amt_df = pd.read_csv(Path(CURPATH, f'dcpentargetamt{year}.csv'), index_col=0) return cnt_df, amt_df @@ -275,14 +223,14 @@ def impute(idata, target_cnt, target_amt): # end of impute() function -def impute_pension_contributions(alldata): +def impute_pension_contributions(alldata, year): """ Main function in impute_pencon.py file. Argument: puf.csv DataFrame just before imputation is done. Returns: puf.csv DataFrame with imputed pension contribution amounts. """ # specify target DataFrames with total column and total row removed - target_cnt, target_amt = targets() + target_cnt, target_amt = targets(year) target_cnt.drop(labels="total", axis="index", inplace=True) target_cnt.drop(labels="total", axis="columns", inplace=True) target_amt.drop(labels="total", axis="index", inplace=True) From f23af3fa4d018d442e6cfc2b3eddaa1557150abb Mon Sep 17 00:00:00 2001 From: andersonfrailey Date: Wed, 13 Sep 2023 11:06:02 -0400 Subject: [PATCH 2/3] change to double quotes --- taxdata/puf/impute_pencon.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/taxdata/puf/impute_pencon.py b/taxdata/puf/impute_pencon.py index e6a0deeb..40cbdea4 100644 --- a/taxdata/puf/impute_pencon.py +++ b/taxdata/puf/impute_pencon.py @@ -70,8 +70,8 @@ def targets(year): revised data specified here. Also, the top two wage groups (5u10M and 10u30M) are combined into a single group (5Mplus). """ - cnt_df = pd.read_csv(Path(CURPATH, f'dcpentargetcnt{year}.csv'), index_col=0) - amt_df = pd.read_csv(Path(CURPATH, f'dcpentargetamt{year}.csv'), index_col=0) + cnt_df = pd.read_csv(Path(CURPATH, f"dcpentargetcnt{year}.csv"), index_col=0) + amt_df = pd.read_csv(Path(CURPATH, f"dcpentargetamt{year}.csv"), index_col=0) return cnt_df, amt_df From 697761ccad75aceb00a12e028863719d7437c44d Mon Sep 17 00:00:00 2001 From: andersonfrailey Date: Wed, 13 Sep 2023 17:01:41 -0400 Subject: [PATCH 3/3] Sort PUF columns --- taxdata/puf/finalprep.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/taxdata/puf/finalprep.py b/taxdata/puf/finalprep.py index aa41ec92..35fb1806 100644 --- a/taxdata/puf/finalprep.py +++ b/taxdata/puf/finalprep.py @@ -74,6 +74,9 @@ def finalprep(data): # - Rename 'filer' to 'data_source' data = data.rename(columns={"filer": "data_source"}) + # - Sort columns to ensure every PUF is the same + data.sort_index(axis=1, inplace=True) + return data