Merge pull request #387 from andersonfrailey/zeroweights

Fix zero weights issue
PSLmodels · Jun 10, 2021 · f395298 · f395298
2 parents 75fc0f2 + dc7a93d
commit f395298
Show file tree

Hide file tree

Showing 9 changed files with 32 additions and 372 deletions.
diff --git a/Makefile b/Makefile
@@ -87,7 +87,8 @@ puf-files: data/cps-matched-puf.csv \
 
 data/cps-matched-puf.csv: taxdata/puf/finalprep.py \
                   taxdata/puf/impute_itmexp.py \
-                  taxdata/puf/impute_pencon.py
+                  taxdata/puf/impute_pencon.py\
+                  createpuf.py
 	python createpuf.py
 # Above recipe also makes data/puf.csv
 

diff --git a/Manifest.toml b/Manifest.toml
diff --git a/Project.toml b/Project.toml
diff --git a/README.md b/README.md
@@ -59,7 +59,8 @@ To run the scripts that produce `puf.csv` and `cps.csv.gz`, activate the
 
 `Julia` must also be installed to solve for the PUF and CPS weights. You 
 can download `Julia` from their [website](https://julialang.org/downloads/)
-or by using `homebrew`.
+or by using `homebrew`. After installing Julia, you will need to also install
+these three packages: `JuMP, Cbc, NPZ`.
 
 Data-Preparation Documentation and Workflow
 -------------------------------------------

diff --git a/createpuf.py b/createpuf.py
@@ -96,7 +96,6 @@ def dataprep(data):
 print("Prepping PUF")
 puf2011 = pd.read_csv(Path(DATA_PATH, "puf2011.csv"))
 raw_puf = puf.preppuf(puf2011, PUF_YEAR)
-# raw_puf.to_csv(Path(DATA_PATH, "raw_puf.csv"), index=False)
 
 # rename CPS file to match PUF
 print("Prepping CPS")
@@ -118,12 +117,6 @@ def dataprep(data):
 raw_cps["e19800"] = raw_cps["charitable"] * cash
 raw_cps["e20100"] = raw_cps["charitable"] * non_cash
 
-# cap number of dependents in CPS to line up with PUF
-# raw_cps["depne"] = np.where(
-#     raw_cps["mars"] == 2,
-#     np.minimum(5, raw_cps["depne"]),
-#     np.minimum(3, raw_cps["depne"]),
-# )
 raw_cps = dataprep(raw_cps)
 raw_puf = dataprep(raw_puf)
 raw_cps["recid"] = range(1, len(raw_cps.index) + 1)
@@ -166,6 +159,7 @@ def dataprep(data):
 data.drop(list(data.filter(regex=".*_cps")), axis=1, inplace=True)
 # add back non-filers
 print("Adding non-filers")
+nonfilers.rename(columns={"s006": "matched_weight"}, inplace=True)
 data = pd.concat([data, nonfilers], sort=False, ignore_index=True)
 data = data.fillna(0.0)
 data.reset_index(inplace=True)

diff --git a/puf_stage2/puf_weights.csv.gz b/puf_stage2/puf_weights.csv.gz