diff --git a/.gitignore b/.gitignore index d27f11f..402e230 100644 --- a/.gitignore +++ b/.gitignore @@ -1,15 +1,16 @@ **/*.h5 -**/*.pyc +**/*.npy +**/*.csv **/*.csv.zip **/*.csv.gz -**/*.csv +**/*.pyc **/*.egg-info **/_build/ **/*tfevents* +tmd/storage/output/cached_files tmd/storage/output/tax_expenditures !tmd/storage/input/*.csv !tmd/areas/targets/*.csv -tmd/areas/weights/*.log **demographics_2015.csv **puf_2015.csv *.DS_STORE \ No newline at end of file diff --git a/Makefile b/Makefile index d909153..f73f7d9 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ install: .PHONY=clean clean: - rm -f tmd/storage/output/tmd* + rm -f tmd/storage/output/tmd* tmd/storage/output/cached_files tmd/storage/output/tmd.csv.gz: \ setup.py \ @@ -36,10 +36,17 @@ tmd/storage/output/tmd_weights.csv.gz: \ tmd/create_taxcalc_sampling_weights.py python tmd/create_taxcalc_sampling_weights.py +tmd/storage/output/cached_files: \ + tmd/storage/output/tmd.csv.gz \ + tmd/storage/output/tmd_growfactors.csv \ + tmd/storage/output/tmd_weights.csv.gz + python tmd/create_taxcalc_cached_files.py + .PHONY=tmd_files tmd_files: tmd/storage/output/tmd.csv.gz \ tmd/storage/output/tmd_growfactors.csv \ - tmd/storage/output/tmd_weights.csv.gz + tmd/storage/output/tmd_weights.csv.gz \ + tmd/storage/output/cached_files .PHONY=test test: tmd_files diff --git a/tmd/areas/create_area_weights.py b/tmd/areas/create_area_weights.py index dee7422..01c5166 100644 --- a/tmd/areas/create_area_weights.py +++ b/tmd/areas/create_area_weights.py @@ -30,8 +30,8 @@ GFFILE_PATH = STORAGE_FOLDER / "output" / "tmd_growfactors.csv" POPFILE_PATH = STORAGE_FOLDER / "input" / "cbo_population_forecast.yaml" -# taxcalc calculated variable cache files: -TAXCALC_AGI_CACHE = AREAS_FOLDER / "cache_agi.npy" +# Tax-Calcultor calculated variable cache files: +TAXCALC_AGI_CACHE = STORAGE_FOLDER / "output" / "cached_c00100.npy" PARAMS = {} @@ -166,26 +166,12 @@ def valid_area(area: str): return all_ok -def all_taxcalc_variables(write_cache): +def all_taxcalc_variables(): """ Return all read and needed calc Tax-Calculator variables in pd.DataFrame. """ vdf = pd.read_csv(INFILE_PATH) - if TAXCALC_AGI_CACHE.exists(): - vdf["c00100"] = np.load(TAXCALC_AGI_CACHE) - else: - input_data = tc.Records.tmd_constructor( - data_path=INFILE_PATH, - weights_path=WTFILE_PATH, - growfactors_path=GFFILE_PATH, - exact_calculations=True, - ) - sim = tc.Calculator(records=input_data, policy=tc.Policy()) - sim.calc_all() - agi = sim.array("c00100") - vdf["c00100"] = agi - if write_cache: - np.save(TAXCALC_AGI_CACHE, agi, allow_pickle=False) + vdf["c00100"] = np.load(TAXCALC_AGI_CACHE) assert np.all(vdf.s006 > 0), "Not all weights are positive" return vdf @@ -432,7 +418,6 @@ def create_area_weights_file( area: str, write_log: bool = True, write_file: bool = True, - write_cache: bool = True, ): """ Create Tax-Calculator-style weights file for FIRST_YEAR through LAST_YEAR @@ -501,7 +486,7 @@ def create_area_weights_file( out.write(f"USING CUSTOMIZED PARAMETERS IN {pfile}\n") # construct variable matrix and target array and weights_scale - vdf = all_taxcalc_variables(write_cache) + vdf = all_taxcalc_variables() target_matrix, target_array, weights_scale = prepared_data(area, vdf) wght_us = np.array(vdf.s006 * weights_scale) out.write("INITIAL WEIGHTS STATISTICS:\n") @@ -672,6 +657,5 @@ def create_area_weights_file( area_code, write_log=False, write_file=True, - write_cache=False, ) sys.exit(RCODE) diff --git a/tmd/areas/make_all.py b/tmd/areas/make_all.py index 8cbc690..e14cc83 100644 --- a/tmd/areas/make_all.py +++ b/tmd/areas/make_all.py @@ -11,7 +11,6 @@ from tmd.areas.create_area_weights import ( valid_area, create_area_weights_file, - TAXCALC_AGI_CACHE, ) from tmd.areas import AREAS_FOLDER from tmd.storage import STORAGE_FOLDER @@ -87,7 +86,6 @@ def create_area_weights(area: str): area, write_log=True, write_file=True, - write_cache=True, ) time1 = time.time() print(f"... {area} exectime(secs)= {(time1 - time0):.1f}") @@ -101,7 +99,6 @@ def make_all_areas(num_workers): Call create_area_weights(area) for each out-of-date or non-existent area weights file for which there is an area targets file. """ - TAXCALC_AGI_CACHE.unlink(missing_ok=True) todo_areas = to_do_areas() # show processing plan if todo_areas: @@ -119,7 +116,6 @@ def make_all_areas(num_workers): # process each target file for which the weights file is not up-to-date with Pool(num_workers) as pool: pool.map(create_area_weights, todo_areas) - TAXCALC_AGI_CACHE.unlink(missing_ok=True) return 0 diff --git a/tmd/create_taxcalc_cached_files.py b/tmd/create_taxcalc_cached_files.py new file mode 100644 index 0000000..ed13fbb --- /dev/null +++ b/tmd/create_taxcalc_cached_files.py @@ -0,0 +1,46 @@ +""" +Generate tmd/storage/output/cached_*.npy files for TAX_YEAR. +""" + +import numpy as np +import taxcalc as tc +from tmd.storage import STORAGE_FOLDER, CACHED_TAXCALC_VARIABLES + +TAX_YEAR = 2021 + +INFILE_PATH = STORAGE_FOLDER / "output" / "tmd.csv.gz" +WTFILE_PATH = STORAGE_FOLDER / "output" / "tmd_weights.csv.gz" +GFFILE_PATH = STORAGE_FOLDER / "output" / "tmd_growfactors.csv" + + +def create_cached_files(): + """ + Create a Numpy binary file containing FIRST_YEAR values + for each variable in the CACHED_TAXCALC_VARIABLES list. + """ + # calculate all Tax-Calculator variables for TAX_YEAR + pol = tc.Policy() + rec = tc.Records.tmd_constructor( + data_path=INFILE_PATH, + weights_path=WTFILE_PATH, + growfactors_path=GFFILE_PATH, + exact_calculations=True, + ) + calc = tc.Calculator(policy=pol, records=rec) + calc.advance_to_year(TAX_YEAR) + calc.calc_all() + + # write each variable in CACHED_TAXCALC_VARIABLES list to a binary file + for vname in CACHED_TAXCALC_VARIABLES: + varray = calc.array(vname) + fpath = STORAGE_FOLDER / "output" / f"cached_{vname}.npy" + np.save(fpath, varray, allow_pickle=False) + fpath = STORAGE_FOLDER / "output" / "cached_files" + with open(fpath, "w", encoding="utf-8") as cfiles: + cfiles.write(" ") # provides timestamp for Makefile + + return 0 + + +if __name__ == "__main__": + create_cached_files() diff --git a/tmd/storage/__init__.py b/tmd/storage/__init__.py index 111a973..21c7019 100644 --- a/tmd/storage/__init__.py +++ b/tmd/storage/__init__.py @@ -1,3 +1,8 @@ from pathlib import Path STORAGE_FOLDER = Path(__file__).parent + +CACHED_TAXCALC_VARIABLES = [ + "c00100", # AGI + "iitax", # individual income tax liability (including refundable credits) +]