diff --git a/cps_stage2/cps_weights.csv.gz b/cps_stage2/cps_weights.csv.gz index 516a80c9..fd2f5183 100644 Binary files a/cps_stage2/cps_weights.csv.gz and b/cps_stage2/cps_weights.csv.gz differ diff --git a/cps_stage2/solver.jl b/cps_stage2/solver.jl index 8e5c903f..2da28e54 100644 --- a/cps_stage2/solver.jl +++ b/cps_stage2/solver.jl @@ -1,34 +1,54 @@ -using JuMP, Cbc, NPZ +using JuMP, NPZ, Tulip +using Printf +using Statistics +using LinearAlgebra function Solve_func(year, tol) - println("Solving weights for $year ...\n\n") + println("\nSolving weights for $year ...\n\n") array = npzread(string(year, "_input.npz")) + # ddir = "/home/donboyd/Documents/python_projects/taxdata/puf_stage2/" + # array = npzread(string(ddir, year, "_input.npz")) + A1 = array["A1"] A2 = array["A2"] b = array["b"] - model = Model(Cbc.Optimizer) - set_optimizer_attribute(model, "logLevel", 1) N = size(A1)[2] - @variable(model, r[1:N] >= 0) - @variable(model, s[1:N] >= 0) + # scaling: determine a scaling vector with one value per constraint + # - the goal is to keep coefficients reasonably near 1.0 + # - multiply each row of A1 and A2 by its specific scaling constant + # - multiply each element of the b target vector by its scaling constant + # - current approach: choose scale factors so that the sum of absolute values in each row of + # A1 and of A2 will equal the total number of records / 1000; maybe we can improve on this - @objective(model, Min, sum(r[i] + s[i] for i in 1:N)) + scale = (N / 1000.) ./ sum(abs.(A1), dims=2) + + A1s = scale .* A1 + A2s = scale .* A2 + bs = scale .* b - # bound on top by tolerance - @constraint(model, [i in 1:N], r[i] + s[i] <= tol) + model = Model(Tulip.Optimizer) + set_optimizer_attribute(model, "OutputLevel", 1) # 0=disable output (default), 1=show iterations + set_optimizer_attribute(model, "IPM_IterationsLimit", 100) # default 100 seems to be enough - # Ax = b - @constraint(model, [i in 1:length(b)], sum(A1[i,j] * r[j] + A2[i,j] * s[j] - for j in 1:N) == b[i]) + # r and s must each fall between 0 and the tolerance + @variable(model, 0 <= r[1:N] <= tol) + @variable(model, 0 <= s[1:N] <= tol) + @objective(model, Min, sum(r[i] + s[i] for i in 1:N)) + + # Ax = b - use the scaled matrices and vector + @constraint(model, [i in 1:length(bs)], sum(A1s[i,j] * r[j] + A2s[i,j] * s[j] + for j in 1:N) == bs[i]) optimize!(model) - termination_status(model) + + println("Termination status: ", termination_status(model)) + @printf "Objective = %.4f\n" objective_value(model) r_vec = value.(r) s_vec = value.(s) @@ -37,9 +57,25 @@ function Solve_func(year, tol) println("\n") -end + # quick checks on results + + # Did we satisfy constraints? + rs = r_vec - s_vec + b_calc = sum(rs' .* A1, dims=2) + check = vec(b_calc) ./ b + + q = (0, .1, .25, .5, .75, .9, 1) + println("Quantiles used below: ", q) + println("\nQuantiles of ratio of calculated targets to intended targets: ") + println(quantile!(check, q)) + # Are the ratios of new weights to old weights in bounds (within tolerances)? + x = 1.0 .+ r_vec - s_vec # note the .+ + println("\nQuantiles of ratio of new weight to initial weight: ") + println(quantile!(x, q)) + +end year_list = [x for x in 2014:2030] tol = 0.70 @@ -47,4 +83,4 @@ tol = 0.70 # Run solver function for all years and tolerances (in order) for i in year_list Solve_func(i, tol) -end +end \ No newline at end of file diff --git a/history/reports/taxdata_report_2021-06-10.pdf b/history/reports/taxdata_report_2021-06-10.pdf new file mode 100644 index 00000000..3d2ce01f Binary files /dev/null and b/history/reports/taxdata_report_2021-06-10.pdf differ diff --git a/puf_stage2/puf_weights.csv.gz b/puf_stage2/puf_weights.csv.gz index 2040eb82..25068893 100644 Binary files a/puf_stage2/puf_weights.csv.gz and b/puf_stage2/puf_weights.csv.gz differ diff --git a/puf_stage2/solver.jl b/puf_stage2/solver.jl index 07edd9bb..423b2372 100644 --- a/puf_stage2/solver.jl +++ b/puf_stage2/solver.jl @@ -1,8 +1,11 @@ -using JuMP, Cbc, NPZ +using JuMP, NPZ, Tulip +using Printf +using Statistics +using LinearAlgebra function Solve_func(year, tol) - println("Solving weights for $year ...\n\n") + println("\nSolving weights for $year ...\n\n") array = npzread(string(year, "_input.npz")) @@ -10,25 +13,39 @@ function Solve_func(year, tol) A2 = array["A2"] b = array["b"] - model = Model(Cbc.Optimizer) - set_optimizer_attribute(model, "logLevel", 1) N = size(A1)[2] - @variable(model, r[1:N] >= 0) - @variable(model, s[1:N] >= 0) + # scaling: determine a scaling vector with one value per constraint + # - the goal is to keep coefficients reasonably near 1.0 + # - multiply each row of A1 and A2 by its specific scaling constant + # - multiply each element of the b target vector by its scaling constant + # - current approach: choose scale factors so that the sum of absolute values in each row of + # A1 and of A2 will equal the total number of records / 1000; maybe we can improve on this - @objective(model, Min, sum(r[i] + s[i] for i in 1:N)) + scale = (N / 1000.) ./ sum(abs.(A1), dims=2) + + A1s = scale .* A1 + A2s = scale .* A2 + bs = scale .* b + + model = Model(Tulip.Optimizer) + set_optimizer_attribute(model, "OutputLevel", 1) # 0=disable output (default), 1=show iterations + set_optimizer_attribute(model, "IPM_IterationsLimit", 100) # default 100 seems to be enough - # bound on top by tolerance - @constraint(model, [i in 1:N], r[i] + s[i] <= tol) + # r and s must each fall between 0 and the tolerance + @variable(model, 0 <= r[1:N] <= tol) + @variable(model, 0 <= s[1:N] <= tol) - # Ax = b - @constraint(model, [i in 1:length(b)], sum(A1[i,j] * r[j] + A2[i,j] * s[j] - for j in 1:N) == b[i]) + @objective(model, Min, sum(r[i] + s[i] for i in 1:N)) + # Ax = b - use the scaled matrices and vector + @constraint(model, [i in 1:length(bs)], sum(A1s[i,j] * r[j] + A2s[i,j] * s[j] + for j in 1:N) == bs[i]) optimize!(model) - termination_status(model) + + println("Termination status: ", termination_status(model)) + @printf "Objective = %.4f\n" objective_value(model) r_vec = value.(r) s_vec = value.(s) @@ -37,14 +54,32 @@ function Solve_func(year, tol) println("\n") -end + # quick checks on results + # Did we satisfy constraints? + rs = r_vec - s_vec + b_calc = sum(rs' .* A1, dims=2) + check = vec(b_calc) ./ b + + q = (0, .1, .25, .5, .75, .9, 1) + println("Quantiles used below: ", q) + + println("\nQuantiles of ratio of calculated targets to intended targets: ") + println(quantile!(check, q)) + + # Are the ratios of new weights to old weights in bounds (within tolerances)? + x = 1.0 .+ r_vec - s_vec # note the .+ + println("\nQuantiles of ratio of new weight to initial weight: ") + println(quantile!(x, q)) + +end year_list = [x for x in 2012:2030] -tol_list = [0.40, 0.38, 0.35, 0.33, 0.30, 0.45, 0.45, - 0.45, 0.45, 0.45, 0.45, 0.45, 0.45, 0.45, - 0.45, 0.45, 0.45, 0.45, 0.45] +tol_list = [0.40, 0.38, 0.35, 0.33, 0.30, + 0.45, 0.45, 0.45, 0.45, 0.45, + 0.45, 0.45, 0.45, 0.45, 0.45, + 0.45, 0.45, 0.45, 0.45] # Run solver function for all years and tolerances (in order) for i in zip(year_list, tol_list) diff --git a/puf_stage3/puf_ratios.csv b/puf_stage3/puf_ratios.csv index 044bd80e..18778d97 100644 --- a/puf_stage3/puf_ratios.csv +++ b/puf_stage3/puf_ratios.csv @@ -5,17 +5,17 @@ INT2013,1.1325,0.7670,0.7821,0.7778,0.8935,0.8699,0.9558,0.9045,0.8342,0.8596,0. INT2014,0.9106,0.8669,0.8492,0.7737,0.8431,0.8802,0.9729,0.8840,0.8368,1.0108,0.8443,1.0534,1.0274,0.9291,1.0609,1.1524,1.0626,1.0728,1.0862 INT2015,0.9813,0.9511,0.9323,0.9470,0.9543,0.9246,0.9367,0.9315,0.9463,0.9699,0.9891,1.0163,0.9966,0.9906,1.0241,0.9827,1.0221,1.1437,1.1672 INT2016,0.9843,1.0086,1.0453,1.0314,1.0447,1.0342,1.0116,1.0025,1.0025,1.0175,0.9484,0.9531,0.9752,1.0131,1.0723,1.0130,1.0804,1.0810,0.9935 -INT2017,0.9929,0.9161,0.8978,0.8911,0.8936,0.9131,0.9193,0.9401,0.9611,0.9766,1.0158,1.0694,1.0315,0.9881,0.9403,0.9756,0.9565,1.0515,1.1945 +INT2017,0.9929,0.9161,0.8978,0.8911,0.8936,0.9131,0.9193,0.9401,0.9611,0.9766,1.0158,1.0694,1.0315,0.9881,0.9403,0.9756,0.9565,1.0514,1.1945 INT2018,0.9993,0.9758,0.9681,0.9665,0.9766,0.9721,0.9766,0.9770,0.9751,0.9991,0.9931,1.0037,1.0140,1.0066,1.0183,0.9897,1.0193,1.0882,0.9982 INT2019,0.9980,0.9765,0.9762,0.9919,0.9792,0.9812,0.9770,0.9863,0.9817,0.9947,0.9912,1.0110,1.0024,1.0074,0.9980,1.0006,1.0246,1.0290,1.0196 INT2020,1.0019,0.9803,0.9743,0.9754,0.9840,0.9864,0.9831,0.9832,0.9931,0.9966,0.9961,1.0100,1.0059,1.0053,1.0076,0.9998,1.0137,1.0108,1.0110 -INT2021,0.9955,0.9801,0.9764,0.9784,0.9829,0.9883,0.9897,0.9869,0.9922,0.9966,1.0064,0.9979,1.0065,1.0113,1.0046,1.0033,1.0297,1.0091,1.0024 +INT2021,0.9955,0.9801,0.9764,0.9784,0.9829,0.9883,0.9897,0.9869,0.9922,0.9966,1.0065,0.9979,1.0064,1.0113,1.0046,1.0033,1.0297,1.0091,1.0024 INT2022,1.0062,0.9808,0.9811,0.9780,0.9734,0.9820,0.9829,0.9905,0.9935,1.0054,1.0020,1.0042,1.0030,1.0050,1.0046,1.0063,1.0131,0.9980,1.0111 INT2023,0.9986,0.9817,0.9773,0.9788,0.9894,0.9889,0.9858,1.0148,0.9943,0.9951,1.0021,1.0026,1.0029,1.0064,1.0005,1.0085,1.0218,1.0031,0.9942 INT2024,1.0010,0.9843,0.9811,0.9788,0.9695,0.9788,0.9896,1.0154,0.9902,0.9971,0.9982,0.9975,1.0039,1.0080,1.0115,1.0085,1.0235,1.0136,1.0079 INT2025,1.0090,0.9935,0.9766,0.9773,0.9851,0.9824,1.0010,0.9910,0.9948,1.0028,0.9996,0.9955,1.0063,1.0074,1.0128,1.0135,1.0242,0.9942,0.9915 INT2026,1.0098,0.9839,0.9811,0.9792,0.9866,0.9897,0.9952,0.9932,0.9948,0.9942,0.9894,0.9948,1.0066,1.0147,1.0383,1.0261,1.0210,0.9985,0.9936 -INT2027,1.0105,0.9894,0.9830,0.9790,0.9893,0.9917,0.9862,0.9903,0.9927,0.9931,1.0005,0.9919,1.0071,1.0188,1.0188,1.0278,1.0252,0.9940,0.9946 -INT2028,1.0064,0.9973,0.9730,0.9848,0.9872,0.9868,1.0216,1.0003,0.9897,0.9957,0.9913,0.9918,1.0060,1.0168,1.0237,1.0224,1.0231,0.9958,0.9918 -INT2029,1.0129,0.9941,0.9822,0.9797,0.9815,0.9847,0.9922,0.9852,0.9928,0.9919,0.9956,0.9916,1.0049,1.0295,1.0289,1.0500,1.0227,1.0009,0.9925 -INT2030,1.0287,1.0011,0.9819,0.9846,0.9817,0.9930,0.9818,0.9787,0.9853,0.9912,0.9831,0.9924,1.0050,1.0358,1.0394,1.0421,1.0303,0.9955,0.9921 +INT2027,1.0105,0.9894,0.9829,0.9790,0.9893,0.9917,0.9862,0.9903,0.9927,0.9931,1.0005,0.9919,1.0071,1.0188,1.0188,1.0278,1.0252,0.9940,0.9946 +INT2028,1.0064,0.9973,0.9730,0.9848,0.9872,0.9868,1.0216,1.0003,0.9897,0.9956,0.9913,0.9918,1.0060,1.0168,1.0237,1.0224,1.0231,0.9958,0.9918 +INT2029,1.0129,0.9941,0.9822,0.9798,0.9815,0.9847,0.9922,0.9852,0.9928,0.9919,0.9956,0.9916,1.0049,1.0295,1.0289,1.0500,1.0227,1.0009,0.9925 +INT2030,1.0287,1.0011,0.9819,0.9846,0.9817,0.9930,0.9818,0.9787,0.9853,0.9912,0.9831,0.9924,1.0050,1.0358,1.0395,1.0421,1.0303,0.9955,0.9921