diff --git a/cps_stage2/cps_weights.csv.gz b/cps_stage2/cps_weights.csv.gz index 9b1f004b..fd2f5183 100644 Binary files a/cps_stage2/cps_weights.csv.gz and b/cps_stage2/cps_weights.csv.gz differ diff --git a/cps_stage2/solver.jl b/cps_stage2/solver.jl index 2e3c3692..2da28e54 100644 --- a/cps_stage2/solver.jl +++ b/cps_stage2/solver.jl @@ -1,50 +1,54 @@ -using NPZ, JuMP, Cbc, Tulip +using JuMP, NPZ, Tulip +using Printf +using Statistics +using LinearAlgebra function Solve_func(year, tol) - println("\nSolving weights for $year ...\n") - - solver = "Tulip" # Tulip, Cbc - Tulip_max_iter = 100 # 100 default, 500 seems good enough - - println("Using solver: ", solver) - if solver == "Tulip" - model = Model(Tulip.Optimizer) - set_optimizer_attribute(model, "OutputLevel", 1) # 0=disable output (default), 1=show iterations - set_optimizer_attribute(model, "IPM_IterationsLimit", Tulip_max_iter) # default 100 - elseif solver == "Cbc" - model = Model(Cbc.Optimizer) - set_optimizer_attribute(model, "logLevel", 1) - # I have not figured out option to limit iterations - else - println("ERROR! Solver must be Tulip or Cbc.") - end + println("\nSolving weights for $year ...\n\n") array = npzread(string(year, "_input.npz")) + # ddir = "/home/donboyd/Documents/python_projects/taxdata/puf_stage2/" + # array = npzread(string(ddir, year, "_input.npz")) + A1 = array["A1"] A2 = array["A2"] b = array["b"] N = size(A1)[2] - @variable(model, r[1:N] >= 0) - @variable(model, s[1:N] >= 0) + # scaling: determine a scaling vector with one value per constraint + # - the goal is to keep coefficients reasonably near 1.0 + # - multiply each row of A1 and A2 by its specific scaling constant + # - multiply each element of the b target vector by its scaling constant + # - current approach: choose scale factors so that the sum of absolute values in each row of + # A1 and of A2 will equal the total number of records / 1000; maybe we can improve on this - @objective(model, Min, sum(r[i] + s[i] for i in 1:N)) + scale = (N / 1000.) ./ sum(abs.(A1), dims=2) - # bound on top by tolerance - @constraint(model, [i in 1:N], r[i] + s[i] <= tol) + A1s = scale .* A1 + A2s = scale .* A2 + bs = scale .* b - # Ax = b - @constraint(model, [i in 1:length(b)], sum(A1[i,j] * r[j] + A2[i,j] * s[j] - for j in 1:N) == b[i]) + model = Model(Tulip.Optimizer) + set_optimizer_attribute(model, "OutputLevel", 1) # 0=disable output (default), 1=show iterations + set_optimizer_attribute(model, "IPM_IterationsLimit", 100) # default 100 seems to be enough + + # r and s must each fall between 0 and the tolerance + @variable(model, 0 <= r[1:N] <= tol) + @variable(model, 0 <= s[1:N] <= tol) + + @objective(model, Min, sum(r[i] + s[i] for i in 1:N)) + # Ax = b - use the scaled matrices and vector + @constraint(model, [i in 1:length(bs)], sum(A1s[i,j] * r[j] + A2s[i,j] * s[j] + for j in 1:N) == bs[i]) optimize!(model) + println("Termination status: ", termination_status(model)) - println("Objective: ", objective_value(model)) - println("\nSolver used was: ", solver_name(model), "\n") + @printf "Objective = %.4f\n" objective_value(model) r_vec = value.(r) s_vec = value.(s) @@ -53,9 +57,25 @@ function Solve_func(year, tol) println("\n") -end + # quick checks on results + + # Did we satisfy constraints? + rs = r_vec - s_vec + b_calc = sum(rs' .* A1, dims=2) + check = vec(b_calc) ./ b + q = (0, .1, .25, .5, .75, .9, 1) + println("Quantiles used below: ", q) + println("\nQuantiles of ratio of calculated targets to intended targets: ") + println(quantile!(check, q)) + + # Are the ratios of new weights to old weights in bounds (within tolerances)? + x = 1.0 .+ r_vec - s_vec # note the .+ + println("\nQuantiles of ratio of new weight to initial weight: ") + println(quantile!(x, q)) + +end year_list = [x for x in 2014:2030] tol = 0.70 diff --git a/history/reports/taxdata_report_2021-06-10.pdf b/history/reports/taxdata_report_2021-06-10.pdf index 622b6bc5..3d2ce01f 100644 Binary files a/history/reports/taxdata_report_2021-06-10.pdf and b/history/reports/taxdata_report_2021-06-10.pdf differ diff --git a/puf_stage2/puf_weights.csv.gz b/puf_stage2/puf_weights.csv.gz index 380819ee..25068893 100644 Binary files a/puf_stage2/puf_weights.csv.gz and b/puf_stage2/puf_weights.csv.gz differ diff --git a/puf_stage2/solver.jl b/puf_stage2/solver.jl index fe4f3667..423b2372 100644 --- a/puf_stage2/solver.jl +++ b/puf_stage2/solver.jl @@ -1,24 +1,11 @@ -using NPZ, JuMP, Cbc, Tulip +using JuMP, NPZ, Tulip +using Printf +using Statistics +using LinearAlgebra function Solve_func(year, tol) - println("\nSolving weights for $year ...\n") - - solver = "Tulip" # Tulip, Cbc - Tulip_max_iter = 100 # 100 default, 500 seems good enough - - println("Using solver: ", solver) - if solver == "Tulip" - model = Model(Tulip.Optimizer) - set_optimizer_attribute(model, "OutputLevel", 1) # 0=disable output (default), 1=show iterations - set_optimizer_attribute(model, "IPM_IterationsLimit", Tulip_max_iter) # default 100 - elseif solver == "Cbc" - model = Model(Cbc.Optimizer) - set_optimizer_attribute(model, "logLevel", 1) - # I have not figured out option to limit iterations - else - println("ERROR! Solver must be Tulip or Cbc.") - end + println("\nSolving weights for $year ...\n\n") array = npzread(string(year, "_input.npz")) @@ -28,23 +15,37 @@ function Solve_func(year, tol) N = size(A1)[2] - @variable(model, r[1:N] >= 0) - @variable(model, s[1:N] >= 0) + # scaling: determine a scaling vector with one value per constraint + # - the goal is to keep coefficients reasonably near 1.0 + # - multiply each row of A1 and A2 by its specific scaling constant + # - multiply each element of the b target vector by its scaling constant + # - current approach: choose scale factors so that the sum of absolute values in each row of + # A1 and of A2 will equal the total number of records / 1000; maybe we can improve on this - @objective(model, Min, sum(r[i] + s[i] for i in 1:N)) + scale = (N / 1000.) ./ sum(abs.(A1), dims=2) - # bound on top by tolerance - @constraint(model, [i in 1:N], r[i] + s[i] <= tol) + A1s = scale .* A1 + A2s = scale .* A2 + bs = scale .* b - # Ax = b - @constraint(model, [i in 1:length(b)], sum(A1[i,j] * r[j] + A2[i,j] * s[j] - for j in 1:N) == b[i]) + model = Model(Tulip.Optimizer) + set_optimizer_attribute(model, "OutputLevel", 1) # 0=disable output (default), 1=show iterations + set_optimizer_attribute(model, "IPM_IterationsLimit", 100) # default 100 seems to be enough + # r and s must each fall between 0 and the tolerance + @variable(model, 0 <= r[1:N] <= tol) + @variable(model, 0 <= s[1:N] <= tol) + + @objective(model, Min, sum(r[i] + s[i] for i in 1:N)) + + # Ax = b - use the scaled matrices and vector + @constraint(model, [i in 1:length(bs)], sum(A1s[i,j] * r[j] + A2s[i,j] * s[j] + for j in 1:N) == bs[i]) optimize!(model) + println("Termination status: ", termination_status(model)) - println("Objective: ", objective_value(model)) - println("\nSolver used was: ", solver_name(model), "\n") + @printf "Objective = %.4f\n" objective_value(model) r_vec = value.(r) s_vec = value.(s) @@ -53,14 +54,32 @@ function Solve_func(year, tol) println("\n") -end + # quick checks on results + + # Did we satisfy constraints? + rs = r_vec - s_vec + b_calc = sum(rs' .* A1, dims=2) + check = vec(b_calc) ./ b + + q = (0, .1, .25, .5, .75, .9, 1) + println("Quantiles used below: ", q) + println("\nQuantiles of ratio of calculated targets to intended targets: ") + println(quantile!(check, q)) + + # Are the ratios of new weights to old weights in bounds (within tolerances)? + x = 1.0 .+ r_vec - s_vec # note the .+ + println("\nQuantiles of ratio of new weight to initial weight: ") + println(quantile!(x, q)) + +end year_list = [x for x in 2012:2030] -tol_list = [0.40, 0.38, 0.35, 0.33, 0.30, 0.45, 0.45, - 0.45, 0.45, 0.45, 0.45, 0.45, 0.45, 0.45, - 0.45, 0.45, 0.45, 0.45, 0.45] +tol_list = [0.40, 0.38, 0.35, 0.33, 0.30, + 0.45, 0.45, 0.45, 0.45, 0.45, + 0.45, 0.45, 0.45, 0.45, 0.45, + 0.45, 0.45, 0.45, 0.45] # Run solver function for all years and tolerances (in order) for i in zip(year_list, tol_list) diff --git a/puf_stage3/puf_ratios.csv b/puf_stage3/puf_ratios.csv index a27c99e5..18778d97 100644 --- a/puf_stage3/puf_ratios.csv +++ b/puf_stage3/puf_ratios.csv @@ -1,21 +1,21 @@ agi_bin,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18 INT2011,1.0259,0.5597,0.9448,0.9681,0.9728,0.9464,0.8390,0.8997,0.9713,0.9239,0.9342,0.9413,0.9497,0.9510,0.9693,0.9702,0.9569,1.0123,1.7014 -INT2012,0.7775,0.9454,0.8433,0.8158,0.7151,0.8094,0.7962,0.7931,0.8480,0.9069,0.9137,0.9169,1.0346,1.0436,1.0129,1.1436,1.0985,1.2358,1.3450 -INT2013,1.1323,0.7667,0.7821,0.7777,0.8937,0.8712,0.9556,0.9062,0.8346,0.8594,0.9739,1.0302,1.0488,1.0501,1.0890,0.9632,1.0451,0.9671,1.1390 -INT2014,0.9111,0.8672,0.8492,0.7748,0.8437,0.8796,0.9731,0.8831,0.8364,1.0106,0.8445,1.0516,1.0274,0.9292,1.0611,1.1524,1.0613,1.0733,1.0909 -INT2015,0.9812,0.9511,0.9321,0.9456,0.9535,0.9255,0.9365,0.9307,0.9464,0.9697,0.9888,1.0162,0.9974,0.9906,1.0240,0.9827,1.0220,1.1450,1.1673 -INT2016,0.9850,1.0086,1.0456,1.0315,1.0448,1.0332,1.0138,1.0056,1.0025,1.0178,0.9484,0.9532,0.9745,1.0132,1.0722,1.0130,1.0812,1.0796,0.9902 -INT2017,0.9920,0.9161,0.8971,0.8912,0.8948,0.9130,0.9175,0.9379,0.9610,0.9767,1.0163,1.0690,1.0326,0.9887,0.9393,0.9756,0.9563,1.0538,1.1932 -INT2018,0.9988,0.9757,0.9678,0.9657,0.9753,0.9719,0.9760,0.9775,0.9747,0.9992,0.9927,1.0042,1.0130,1.0068,1.0187,0.9898,1.0192,1.0862,1.0026 -INT2019,0.9963,0.9766,0.9767,0.9928,0.9782,0.9818,0.9775,0.9860,0.9825,0.9937,0.9910,1.0106,1.0036,1.0066,0.9995,1.0011,1.0255,1.0309,1.0190 -INT2020,1.0046,0.9803,0.9750,0.9753,0.9860,0.9891,0.9834,0.9847,0.9921,0.9968,0.9971,1.0082,1.0056,1.0049,1.0071,0.9993,1.0141,1.0093,1.0097 -INT2021,0.9958,0.9800,0.9761,0.9783,0.9805,0.9849,0.9894,0.9853,0.9937,0.9970,1.0049,1.0005,1.0052,1.0115,1.0045,1.0033,1.0287,1.0093,1.0040 -INT2022,1.0051,0.9810,0.9811,0.9778,0.9748,0.9823,0.9827,0.9904,0.9925,1.0059,1.0013,1.0030,1.0038,1.0070,1.0046,1.0062,1.0126,0.9978,1.0125 -INT2023,1.0002,0.9816,0.9773,0.9792,0.9879,0.9886,0.9863,1.0144,0.9958,0.9948,1.0026,1.0021,1.0025,1.0049,1.0013,1.0087,1.0237,1.0039,0.9928 -INT2024,1.0001,0.9846,0.9811,0.9782,0.9709,0.9790,0.9894,1.0085,0.9891,0.9979,0.9986,0.9989,1.0048,1.0079,1.0111,1.0082,1.0236,1.0129,1.0079 -INT2025,1.0089,0.9930,0.9766,0.9769,0.9849,0.9824,0.9980,0.9963,0.9944,1.0019,0.9990,0.9957,1.0058,1.0080,1.0131,1.0152,1.0234,0.9943,0.9916 -INT2026,1.0099,0.9841,0.9805,0.9799,0.9857,0.9894,0.9987,0.9944,0.9946,0.9936,0.9917,0.9943,1.0057,1.0143,1.0376,1.0247,1.0210,0.9983,0.9936 -INT2027,1.0104,0.9895,0.9828,0.9791,0.9895,0.9922,0.9856,0.9913,0.9927,0.9932,0.9991,0.9917,1.0081,1.0188,1.0194,1.0277,1.0244,0.9935,0.9947 -INT2028,1.0066,0.9973,0.9750,0.9846,0.9869,0.9868,1.0183,1.0002,0.9901,0.9958,0.9908,0.9919,1.0060,1.0172,1.0240,1.0230,1.0234,0.9959,0.9917 -INT2029,1.0122,0.9941,0.9809,0.9800,0.9824,0.9847,0.9951,0.9852,0.9938,0.9924,0.9953,0.9920,1.0043,1.0288,1.0279,1.0488,1.0228,1.0005,0.9924 -INT2030,1.0292,1.0010,0.9819,0.9844,0.9818,0.9930,0.9820,0.9801,0.9845,0.9904,0.9834,0.9922,1.0052,1.0358,1.0396,1.0429,1.0298,0.9958,0.9922 +INT2012,0.7778,0.9454,0.8433,0.8157,0.7153,0.8101,0.7961,0.7940,0.8481,0.9063,0.9130,0.9182,1.0343,1.0435,1.0129,1.1436,1.0975,1.2339,1.3445 +INT2013,1.1325,0.7670,0.7821,0.7778,0.8935,0.8699,0.9558,0.9045,0.8342,0.8596,0.9746,1.0271,1.0498,1.0505,1.0891,0.9632,1.0448,0.9690,1.1441 +INT2014,0.9106,0.8669,0.8492,0.7737,0.8431,0.8802,0.9729,0.8840,0.8368,1.0108,0.8443,1.0534,1.0274,0.9291,1.0609,1.1524,1.0626,1.0728,1.0862 +INT2015,0.9813,0.9511,0.9323,0.9470,0.9543,0.9246,0.9367,0.9315,0.9463,0.9699,0.9891,1.0163,0.9966,0.9906,1.0241,0.9827,1.0221,1.1437,1.1672 +INT2016,0.9843,1.0086,1.0453,1.0314,1.0447,1.0342,1.0116,1.0025,1.0025,1.0175,0.9484,0.9531,0.9752,1.0131,1.0723,1.0130,1.0804,1.0810,0.9935 +INT2017,0.9929,0.9161,0.8978,0.8911,0.8936,0.9131,0.9193,0.9401,0.9611,0.9766,1.0158,1.0694,1.0315,0.9881,0.9403,0.9756,0.9565,1.0514,1.1945 +INT2018,0.9993,0.9758,0.9681,0.9665,0.9766,0.9721,0.9766,0.9770,0.9751,0.9991,0.9931,1.0037,1.0140,1.0066,1.0183,0.9897,1.0193,1.0882,0.9982 +INT2019,0.9980,0.9765,0.9762,0.9919,0.9792,0.9812,0.9770,0.9863,0.9817,0.9947,0.9912,1.0110,1.0024,1.0074,0.9980,1.0006,1.0246,1.0290,1.0196 +INT2020,1.0019,0.9803,0.9743,0.9754,0.9840,0.9864,0.9831,0.9832,0.9931,0.9966,0.9961,1.0100,1.0059,1.0053,1.0076,0.9998,1.0137,1.0108,1.0110 +INT2021,0.9955,0.9801,0.9764,0.9784,0.9829,0.9883,0.9897,0.9869,0.9922,0.9966,1.0065,0.9979,1.0064,1.0113,1.0046,1.0033,1.0297,1.0091,1.0024 +INT2022,1.0062,0.9808,0.9811,0.9780,0.9734,0.9820,0.9829,0.9905,0.9935,1.0054,1.0020,1.0042,1.0030,1.0050,1.0046,1.0063,1.0131,0.9980,1.0111 +INT2023,0.9986,0.9817,0.9773,0.9788,0.9894,0.9889,0.9858,1.0148,0.9943,0.9951,1.0021,1.0026,1.0029,1.0064,1.0005,1.0085,1.0218,1.0031,0.9942 +INT2024,1.0010,0.9843,0.9811,0.9788,0.9695,0.9788,0.9896,1.0154,0.9902,0.9971,0.9982,0.9975,1.0039,1.0080,1.0115,1.0085,1.0235,1.0136,1.0079 +INT2025,1.0090,0.9935,0.9766,0.9773,0.9851,0.9824,1.0010,0.9910,0.9948,1.0028,0.9996,0.9955,1.0063,1.0074,1.0128,1.0135,1.0242,0.9942,0.9915 +INT2026,1.0098,0.9839,0.9811,0.9792,0.9866,0.9897,0.9952,0.9932,0.9948,0.9942,0.9894,0.9948,1.0066,1.0147,1.0383,1.0261,1.0210,0.9985,0.9936 +INT2027,1.0105,0.9894,0.9829,0.9790,0.9893,0.9917,0.9862,0.9903,0.9927,0.9931,1.0005,0.9919,1.0071,1.0188,1.0188,1.0278,1.0252,0.9940,0.9946 +INT2028,1.0064,0.9973,0.9730,0.9848,0.9872,0.9868,1.0216,1.0003,0.9897,0.9956,0.9913,0.9918,1.0060,1.0168,1.0237,1.0224,1.0231,0.9958,0.9918 +INT2029,1.0129,0.9941,0.9822,0.9798,0.9815,0.9847,0.9922,0.9852,0.9928,0.9919,0.9956,0.9916,1.0049,1.0295,1.0289,1.0500,1.0227,1.0009,0.9925 +INT2030,1.0287,1.0011,0.9819,0.9846,0.9817,0.9930,0.9818,0.9787,0.9853,0.9912,0.9831,0.9924,1.0050,1.0358,1.0395,1.0421,1.0303,0.9955,0.9921