Skip to content

Commit

Permalink
code clean up
Browse files Browse the repository at this point in the history
  • Loading branch information
andersonfrailey committed Jun 10, 2021
1 parent 06b1690 commit 7350b56
Show file tree
Hide file tree
Showing 6 changed files with 119 additions and 80 deletions.
Binary file modified cps_stage2/cps_weights.csv.gz
Binary file not shown.
78 changes: 49 additions & 29 deletions cps_stage2/solver.jl
Original file line number Diff line number Diff line change
@@ -1,50 +1,54 @@
using NPZ, JuMP, Cbc, Tulip
using JuMP, NPZ, Tulip
using Printf
using Statistics
using LinearAlgebra

function Solve_func(year, tol)

println("\nSolving weights for $year ...\n")

solver = "Tulip" # Tulip, Cbc
Tulip_max_iter = 100 # 100 default, 500 seems good enough

println("Using solver: ", solver)
if solver == "Tulip"
model = Model(Tulip.Optimizer)
set_optimizer_attribute(model, "OutputLevel", 1) # 0=disable output (default), 1=show iterations
set_optimizer_attribute(model, "IPM_IterationsLimit", Tulip_max_iter) # default 100
elseif solver == "Cbc"
model = Model(Cbc.Optimizer)
set_optimizer_attribute(model, "logLevel", 1)
# I have not figured out option to limit iterations
else
println("ERROR! Solver must be Tulip or Cbc.")
end
println("\nSolving weights for $year ...\n\n")

array = npzread(string(year, "_input.npz"))

# ddir = "/home/donboyd/Documents/python_projects/taxdata/puf_stage2/"
# array = npzread(string(ddir, year, "_input.npz"))

A1 = array["A1"]
A2 = array["A2"]
b = array["b"]

N = size(A1)[2]

@variable(model, r[1:N] >= 0)
@variable(model, s[1:N] >= 0)
# scaling: determine a scaling vector with one value per constraint
# - the goal is to keep coefficients reasonably near 1.0
# - multiply each row of A1 and A2 by its specific scaling constant
# - multiply each element of the b target vector by its scaling constant
# - current approach: choose scale factors so that the sum of absolute values in each row of
# A1 and of A2 will equal the total number of records / 1000; maybe we can improve on this

@objective(model, Min, sum(r[i] + s[i] for i in 1:N))
scale = (N / 1000.) ./ sum(abs.(A1), dims=2)

# bound on top by tolerance
@constraint(model, [i in 1:N], r[i] + s[i] <= tol)
A1s = scale .* A1
A2s = scale .* A2
bs = scale .* b

# Ax = b
@constraint(model, [i in 1:length(b)], sum(A1[i,j] * r[j] + A2[i,j] * s[j]
for j in 1:N) == b[i])
model = Model(Tulip.Optimizer)
set_optimizer_attribute(model, "OutputLevel", 1) # 0=disable output (default), 1=show iterations
set_optimizer_attribute(model, "IPM_IterationsLimit", 100) # default 100 seems to be enough

# r and s must each fall between 0 and the tolerance
@variable(model, 0 <= r[1:N] <= tol)
@variable(model, 0 <= s[1:N] <= tol)

@objective(model, Min, sum(r[i] + s[i] for i in 1:N))

# Ax = b - use the scaled matrices and vector
@constraint(model, [i in 1:length(bs)], sum(A1s[i,j] * r[j] + A2s[i,j] * s[j]
for j in 1:N) == bs[i])

optimize!(model)

println("Termination status: ", termination_status(model))
println("Objective: ", objective_value(model))
println("\nSolver used was: ", solver_name(model), "\n")
@printf "Objective = %.4f\n" objective_value(model)

r_vec = value.(r)
s_vec = value.(s)
Expand All @@ -53,9 +57,25 @@ function Solve_func(year, tol)

println("\n")

end
# quick checks on results

# Did we satisfy constraints?
rs = r_vec - s_vec
b_calc = sum(rs' .* A1, dims=2)
check = vec(b_calc) ./ b

q = (0, .1, .25, .5, .75, .9, 1)
println("Quantiles used below: ", q)

println("\nQuantiles of ratio of calculated targets to intended targets: ")
println(quantile!(check, q))

# Are the ratios of new weights to old weights in bounds (within tolerances)?
x = 1.0 .+ r_vec - s_vec # note the .+
println("\nQuantiles of ratio of new weight to initial weight: ")
println(quantile!(x, q))

end

year_list = [x for x in 2014:2030]
tol = 0.70
Expand Down
Binary file modified history/reports/taxdata_report_2021-06-10.pdf
Binary file not shown.
Binary file modified puf_stage2/puf_weights.csv.gz
Binary file not shown.
83 changes: 51 additions & 32 deletions puf_stage2/solver.jl
Original file line number Diff line number Diff line change
@@ -1,24 +1,11 @@
using NPZ, JuMP, Cbc, Tulip
using JuMP, NPZ, Tulip
using Printf
using Statistics
using LinearAlgebra

function Solve_func(year, tol)

println("\nSolving weights for $year ...\n")

solver = "Tulip" # Tulip, Cbc
Tulip_max_iter = 100 # 100 default, 500 seems good enough

println("Using solver: ", solver)
if solver == "Tulip"
model = Model(Tulip.Optimizer)
set_optimizer_attribute(model, "OutputLevel", 1) # 0=disable output (default), 1=show iterations
set_optimizer_attribute(model, "IPM_IterationsLimit", Tulip_max_iter) # default 100
elseif solver == "Cbc"
model = Model(Cbc.Optimizer)
set_optimizer_attribute(model, "logLevel", 1)
# I have not figured out option to limit iterations
else
println("ERROR! Solver must be Tulip or Cbc.")
end
println("\nSolving weights for $year ...\n\n")

array = npzread(string(year, "_input.npz"))

Expand All @@ -28,23 +15,37 @@ function Solve_func(year, tol)

N = size(A1)[2]

@variable(model, r[1:N] >= 0)
@variable(model, s[1:N] >= 0)
# scaling: determine a scaling vector with one value per constraint
# - the goal is to keep coefficients reasonably near 1.0
# - multiply each row of A1 and A2 by its specific scaling constant
# - multiply each element of the b target vector by its scaling constant
# - current approach: choose scale factors so that the sum of absolute values in each row of
# A1 and of A2 will equal the total number of records / 1000; maybe we can improve on this

@objective(model, Min, sum(r[i] + s[i] for i in 1:N))
scale = (N / 1000.) ./ sum(abs.(A1), dims=2)

# bound on top by tolerance
@constraint(model, [i in 1:N], r[i] + s[i] <= tol)
A1s = scale .* A1
A2s = scale .* A2
bs = scale .* b

# Ax = b
@constraint(model, [i in 1:length(b)], sum(A1[i,j] * r[j] + A2[i,j] * s[j]
for j in 1:N) == b[i])
model = Model(Tulip.Optimizer)
set_optimizer_attribute(model, "OutputLevel", 1) # 0=disable output (default), 1=show iterations
set_optimizer_attribute(model, "IPM_IterationsLimit", 100) # default 100 seems to be enough

# r and s must each fall between 0 and the tolerance
@variable(model, 0 <= r[1:N] <= tol)
@variable(model, 0 <= s[1:N] <= tol)

@objective(model, Min, sum(r[i] + s[i] for i in 1:N))

# Ax = b - use the scaled matrices and vector
@constraint(model, [i in 1:length(bs)], sum(A1s[i,j] * r[j] + A2s[i,j] * s[j]
for j in 1:N) == bs[i])

optimize!(model)

println("Termination status: ", termination_status(model))
println("Objective: ", objective_value(model))
println("\nSolver used was: ", solver_name(model), "\n")
@printf "Objective = %.4f\n" objective_value(model)

r_vec = value.(r)
s_vec = value.(s)
Expand All @@ -53,14 +54,32 @@ function Solve_func(year, tol)

println("\n")

end
# quick checks on results

# Did we satisfy constraints?
rs = r_vec - s_vec
b_calc = sum(rs' .* A1, dims=2)
check = vec(b_calc) ./ b

q = (0, .1, .25, .5, .75, .9, 1)
println("Quantiles used below: ", q)

println("\nQuantiles of ratio of calculated targets to intended targets: ")
println(quantile!(check, q))

# Are the ratios of new weights to old weights in bounds (within tolerances)?
x = 1.0 .+ r_vec - s_vec # note the .+
println("\nQuantiles of ratio of new weight to initial weight: ")
println(quantile!(x, q))

end


year_list = [x for x in 2012:2030]
tol_list = [0.40, 0.38, 0.35, 0.33, 0.30, 0.45, 0.45,
0.45, 0.45, 0.45, 0.45, 0.45, 0.45, 0.45,
0.45, 0.45, 0.45, 0.45, 0.45]
tol_list = [0.40, 0.38, 0.35, 0.33, 0.30,
0.45, 0.45, 0.45, 0.45, 0.45,
0.45, 0.45, 0.45, 0.45, 0.45,
0.45, 0.45, 0.45, 0.45]

# Run solver function for all years and tolerances (in order)
for i in zip(year_list, tol_list)
Expand Down
38 changes: 19 additions & 19 deletions puf_stage3/puf_ratios.csv
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
agi_bin,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
INT2011,1.0259,0.5597,0.9448,0.9681,0.9728,0.9464,0.8390,0.8997,0.9713,0.9239,0.9342,0.9413,0.9497,0.9510,0.9693,0.9702,0.9569,1.0123,1.7014
INT2012,0.7775,0.9454,0.8433,0.8158,0.7151,0.8094,0.7962,0.7931,0.8480,0.9069,0.9137,0.9169,1.0346,1.0436,1.0129,1.1436,1.0985,1.2358,1.3450
INT2013,1.1323,0.7667,0.7821,0.7777,0.8937,0.8712,0.9556,0.9062,0.8346,0.8594,0.9739,1.0302,1.0488,1.0501,1.0890,0.9632,1.0451,0.9671,1.1390
INT2014,0.9111,0.8672,0.8492,0.7748,0.8437,0.8796,0.9731,0.8831,0.8364,1.0106,0.8445,1.0516,1.0274,0.9292,1.0611,1.1524,1.0613,1.0733,1.0909
INT2015,0.9812,0.9511,0.9321,0.9456,0.9535,0.9255,0.9365,0.9307,0.9464,0.9697,0.9888,1.0162,0.9974,0.9906,1.0240,0.9827,1.0220,1.1450,1.1673
INT2016,0.9850,1.0086,1.0456,1.0315,1.0448,1.0332,1.0138,1.0056,1.0025,1.0178,0.9484,0.9532,0.9745,1.0132,1.0722,1.0130,1.0812,1.0796,0.9902
INT2017,0.9920,0.9161,0.8971,0.8912,0.8948,0.9130,0.9175,0.9379,0.9610,0.9767,1.0163,1.0690,1.0326,0.9887,0.9393,0.9756,0.9563,1.0538,1.1932
INT2018,0.9988,0.9757,0.9678,0.9657,0.9753,0.9719,0.9760,0.9775,0.9747,0.9992,0.9927,1.0042,1.0130,1.0068,1.0187,0.9898,1.0192,1.0862,1.0026
INT2019,0.9963,0.9766,0.9767,0.9928,0.9782,0.9818,0.9775,0.9860,0.9825,0.9937,0.9910,1.0106,1.0036,1.0066,0.9995,1.0011,1.0255,1.0309,1.0190
INT2020,1.0046,0.9803,0.9750,0.9753,0.9860,0.9891,0.9834,0.9847,0.9921,0.9968,0.9971,1.0082,1.0056,1.0049,1.0071,0.9993,1.0141,1.0093,1.0097
INT2021,0.9958,0.9800,0.9761,0.9783,0.9805,0.9849,0.9894,0.9853,0.9937,0.9970,1.0049,1.0005,1.0052,1.0115,1.0045,1.0033,1.0287,1.0093,1.0040
INT2022,1.0051,0.9810,0.9811,0.9778,0.9748,0.9823,0.9827,0.9904,0.9925,1.0059,1.0013,1.0030,1.0038,1.0070,1.0046,1.0062,1.0126,0.9978,1.0125
INT2023,1.0002,0.9816,0.9773,0.9792,0.9879,0.9886,0.9863,1.0144,0.9958,0.9948,1.0026,1.0021,1.0025,1.0049,1.0013,1.0087,1.0237,1.0039,0.9928
INT2024,1.0001,0.9846,0.9811,0.9782,0.9709,0.9790,0.9894,1.0085,0.9891,0.9979,0.9986,0.9989,1.0048,1.0079,1.0111,1.0082,1.0236,1.0129,1.0079
INT2025,1.0089,0.9930,0.9766,0.9769,0.9849,0.9824,0.9980,0.9963,0.9944,1.0019,0.9990,0.9957,1.0058,1.0080,1.0131,1.0152,1.0234,0.9943,0.9916
INT2026,1.0099,0.9841,0.9805,0.9799,0.9857,0.9894,0.9987,0.9944,0.9946,0.9936,0.9917,0.9943,1.0057,1.0143,1.0376,1.0247,1.0210,0.9983,0.9936
INT2027,1.0104,0.9895,0.9828,0.9791,0.9895,0.9922,0.9856,0.9913,0.9927,0.9932,0.9991,0.9917,1.0081,1.0188,1.0194,1.0277,1.0244,0.9935,0.9947
INT2028,1.0066,0.9973,0.9750,0.9846,0.9869,0.9868,1.0183,1.0002,0.9901,0.9958,0.9908,0.9919,1.0060,1.0172,1.0240,1.0230,1.0234,0.9959,0.9917
INT2029,1.0122,0.9941,0.9809,0.9800,0.9824,0.9847,0.9951,0.9852,0.9938,0.9924,0.9953,0.9920,1.0043,1.0288,1.0279,1.0488,1.0228,1.0005,0.9924
INT2030,1.0292,1.0010,0.9819,0.9844,0.9818,0.9930,0.9820,0.9801,0.9845,0.9904,0.9834,0.9922,1.0052,1.0358,1.0396,1.0429,1.0298,0.9958,0.9922
INT2012,0.7778,0.9454,0.8433,0.8157,0.7153,0.8101,0.7961,0.7940,0.8481,0.9063,0.9130,0.9182,1.0343,1.0435,1.0129,1.1436,1.0975,1.2339,1.3445
INT2013,1.1325,0.7670,0.7821,0.7778,0.8935,0.8699,0.9558,0.9045,0.8342,0.8596,0.9746,1.0271,1.0498,1.0505,1.0891,0.9632,1.0448,0.9690,1.1441
INT2014,0.9106,0.8669,0.8492,0.7737,0.8431,0.8802,0.9729,0.8840,0.8368,1.0108,0.8443,1.0534,1.0274,0.9291,1.0609,1.1524,1.0626,1.0728,1.0862
INT2015,0.9813,0.9511,0.9323,0.9470,0.9543,0.9246,0.9367,0.9315,0.9463,0.9699,0.9891,1.0163,0.9966,0.9906,1.0241,0.9827,1.0221,1.1437,1.1672
INT2016,0.9843,1.0086,1.0453,1.0314,1.0447,1.0342,1.0116,1.0025,1.0025,1.0175,0.9484,0.9531,0.9752,1.0131,1.0723,1.0130,1.0804,1.0810,0.9935
INT2017,0.9929,0.9161,0.8978,0.8911,0.8936,0.9131,0.9193,0.9401,0.9611,0.9766,1.0158,1.0694,1.0315,0.9881,0.9403,0.9756,0.9565,1.0514,1.1945
INT2018,0.9993,0.9758,0.9681,0.9665,0.9766,0.9721,0.9766,0.9770,0.9751,0.9991,0.9931,1.0037,1.0140,1.0066,1.0183,0.9897,1.0193,1.0882,0.9982
INT2019,0.9980,0.9765,0.9762,0.9919,0.9792,0.9812,0.9770,0.9863,0.9817,0.9947,0.9912,1.0110,1.0024,1.0074,0.9980,1.0006,1.0246,1.0290,1.0196
INT2020,1.0019,0.9803,0.9743,0.9754,0.9840,0.9864,0.9831,0.9832,0.9931,0.9966,0.9961,1.0100,1.0059,1.0053,1.0076,0.9998,1.0137,1.0108,1.0110
INT2021,0.9955,0.9801,0.9764,0.9784,0.9829,0.9883,0.9897,0.9869,0.9922,0.9966,1.0065,0.9979,1.0064,1.0113,1.0046,1.0033,1.0297,1.0091,1.0024
INT2022,1.0062,0.9808,0.9811,0.9780,0.9734,0.9820,0.9829,0.9905,0.9935,1.0054,1.0020,1.0042,1.0030,1.0050,1.0046,1.0063,1.0131,0.9980,1.0111
INT2023,0.9986,0.9817,0.9773,0.9788,0.9894,0.9889,0.9858,1.0148,0.9943,0.9951,1.0021,1.0026,1.0029,1.0064,1.0005,1.0085,1.0218,1.0031,0.9942
INT2024,1.0010,0.9843,0.9811,0.9788,0.9695,0.9788,0.9896,1.0154,0.9902,0.9971,0.9982,0.9975,1.0039,1.0080,1.0115,1.0085,1.0235,1.0136,1.0079
INT2025,1.0090,0.9935,0.9766,0.9773,0.9851,0.9824,1.0010,0.9910,0.9948,1.0028,0.9996,0.9955,1.0063,1.0074,1.0128,1.0135,1.0242,0.9942,0.9915
INT2026,1.0098,0.9839,0.9811,0.9792,0.9866,0.9897,0.9952,0.9932,0.9948,0.9942,0.9894,0.9948,1.0066,1.0147,1.0383,1.0261,1.0210,0.9985,0.9936
INT2027,1.0105,0.9894,0.9829,0.9790,0.9893,0.9917,0.9862,0.9903,0.9927,0.9931,1.0005,0.9919,1.0071,1.0188,1.0188,1.0278,1.0252,0.9940,0.9946
INT2028,1.0064,0.9973,0.9730,0.9848,0.9872,0.9868,1.0216,1.0003,0.9897,0.9956,0.9913,0.9918,1.0060,1.0168,1.0237,1.0224,1.0231,0.9958,0.9918
INT2029,1.0129,0.9941,0.9822,0.9798,0.9815,0.9847,0.9922,0.9852,0.9928,0.9919,0.9956,0.9916,1.0049,1.0295,1.0289,1.0500,1.0227,1.0009,0.9925
INT2030,1.0287,1.0011,0.9819,0.9846,0.9817,0.9930,0.9818,0.9787,0.9853,0.9912,0.9831,0.9924,1.0050,1.0358,1.0395,1.0421,1.0303,0.9955,0.9921

0 comments on commit 7350b56

Please sign in to comment.