code clean up

PSLmodels · Jun 10, 2021 · 7350b56 · 7350b56
1 parent 06b1690
commit 7350b56
Show file tree

Hide file tree

Showing 6 changed files with 119 additions and 80 deletions.
diff --git a/cps_stage2/cps_weights.csv.gz b/cps_stage2/cps_weights.csv.gz
diff --git a/cps_stage2/solver.jl b/cps_stage2/solver.jl
@@ -1,50 +1,54 @@
-using NPZ, JuMP, Cbc, Tulip
+using JuMP, NPZ, Tulip
+using Printf
+using Statistics
+using LinearAlgebra
 
 function Solve_func(year, tol)
 
-	println("\nSolving weights for $year ...\n")
-
-	solver = "Tulip"  # Tulip, Cbc
-	Tulip_max_iter = 100  # 100 default, 500 seems good enough
-
-	println("Using solver: ", solver)
-	if solver == "Tulip"
-		model = Model(Tulip.Optimizer)
-		set_optimizer_attribute(model, "OutputLevel", 1)  # 0=disable output (default), 1=show iterations
-		set_optimizer_attribute(model, "IPM_IterationsLimit", Tulip_max_iter)  # default 100
-	elseif solver == "Cbc"
-		model = Model(Cbc.Optimizer)
-		set_optimizer_attribute(model, "logLevel", 1)
-		# I have not figured out option to limit iterations
-	else
-		println("ERROR! Solver must be Tulip or Cbc.")
-	end
+	println("\nSolving weights for $year ...\n\n")
 
 	array = npzread(string(year, "_input.npz"))
 
+	# ddir = "/home/donboyd/Documents/python_projects/taxdata/puf_stage2/"
+	# array = npzread(string(ddir, year, "_input.npz"))
+
 	A1 = array["A1"]
 	A2 = array["A2"]
 	b = array["b"]
 
 	N = size(A1)[2]
 
-	@variable(model, r[1:N] >= 0)
-	@variable(model, s[1:N] >= 0)
+        # scaling: determine a scaling vector with one value per constraint
+	#  - the goal is to keep coefficients reasonably near 1.0
+	#  - multiply each row of A1 and A2 by its specific scaling constant
+	#  - multiply each element of the b target vector by its scaling constant
+	#  - current approach: choose scale factors so that the sum of absolute values in each row of
+	#    A1 and of A2 will equal the total number of records / 1000; maybe we can improve on this
 
-	@objective(model, Min, sum(r[i] + s[i] for i in 1:N))
+	scale = (N / 1000.) ./ sum(abs.(A1), dims=2)
 
-	# bound on top by tolerance
-	@constraint(model, [i in 1:N], r[i] + s[i] <= tol)
+        A1s = scale .* A1
+	A2s = scale .* A2
+	bs = scale .* b
 
-	# Ax = b
-	@constraint(model, [i in 1:length(b)], sum(A1[i,j] * r[j] + A2[i,j] * s[j]
-		                          for j in 1:N) == b[i])
+	model = Model(Tulip.Optimizer)
+	set_optimizer_attribute(model, "OutputLevel", 1)  # 0=disable output (default), 1=show iterations
+	set_optimizer_attribute(model, "IPM_IterationsLimit", 100)  # default 100 seems to be enough
+
+	# r and s must each fall between 0 and the tolerance
+	@variable(model, 0 <= r[1:N] <= tol)
+	@variable(model, 0 <= s[1:N] <= tol)
+
+	@objective(model, Min, sum(r[i] + s[i] for i in 1:N))
 
+	# Ax = b  - use the scaled matrices and vector
+	@constraint(model, [i in 1:length(bs)], sum(A1s[i,j] * r[j] + A2s[i,j] * s[j]
+		                          for j in 1:N) == bs[i])
 
 	optimize!(model)
+
 	println("Termination status: ", termination_status(model))
-	println("Objective: ", objective_value(model))
-	println("\nSolver used was: ", solver_name(model), "\n")
+	@printf "Objective = %.4f\n" objective_value(model)
 
 	r_vec = value.(r)
 	s_vec = value.(s)
@@ -53,9 +57,25 @@ function Solve_func(year, tol)
 
 	println("\n")
 
-end
+	# quick checks on results
+
+	# Did we satisfy constraints?
+	rs = r_vec - s_vec
+	b_calc = sum(rs' .* A1, dims=2)
+	check = vec(b_calc) ./ b
 
+	q = (0, .1, .25, .5, .75, .9, 1)
+	println("Quantiles used below: ", q)
 
+	println("\nQuantiles of ratio of calculated targets to intended targets: ")
+	println(quantile!(check, q))
+
+	# Are the ratios of new weights to old weights in bounds (within tolerances)?
+	x = 1.0 .+ r_vec - s_vec  # note the .+
+	println("\nQuantiles of ratio of new weight to initial weight: ")
+	println(quantile!(x, q))
+
+end
 
 year_list = [x for x in 2014:2030]
 tol = 0.70

diff --git a/history/reports/taxdata_report_2021-06-10.pdf b/history/reports/taxdata_report_2021-06-10.pdf
diff --git a/puf_stage2/puf_weights.csv.gz b/puf_stage2/puf_weights.csv.gz
diff --git a/puf_stage2/solver.jl b/puf_stage2/solver.jl
@@ -1,24 +1,11 @@
-using NPZ, JuMP, Cbc, Tulip
+using JuMP, NPZ, Tulip
+using Printf
+using Statistics
+using LinearAlgebra
 
 function Solve_func(year, tol)
 
-	println("\nSolving weights for $year ...\n")
-
-	solver = "Tulip"  # Tulip, Cbc
-	Tulip_max_iter = 100  # 100 default, 500 seems good enough
-
-	println("Using solver: ", solver)
-	if solver == "Tulip"
-		model = Model(Tulip.Optimizer)
-		set_optimizer_attribute(model, "OutputLevel", 1)  # 0=disable output (default), 1=show iterations
-		set_optimizer_attribute(model, "IPM_IterationsLimit", Tulip_max_iter)  # default 100
-	elseif solver == "Cbc"
-		model = Model(Cbc.Optimizer)
-		set_optimizer_attribute(model, "logLevel", 1)
-		# I have not figured out option to limit iterations
-	else
-		println("ERROR! Solver must be Tulip or Cbc.")
-	end
+	println("\nSolving weights for $year ...\n\n")
 
 	array = npzread(string(year, "_input.npz"))
 
@@ -28,23 +15,37 @@ function Solve_func(year, tol)
 
 	N = size(A1)[2]
 
-	@variable(model, r[1:N] >= 0)
-	@variable(model, s[1:N] >= 0)
+        # scaling: determine a scaling vector with one value per constraint
+	#  - the goal is to keep coefficients reasonably near 1.0
+	#  - multiply each row of A1 and A2 by its specific scaling constant
+	#  - multiply each element of the b target vector by its scaling constant
+	#  - current approach: choose scale factors so that the sum of absolute values in each row of
+	#    A1 and of A2 will equal the total number of records / 1000; maybe we can improve on this
 
-	@objective(model, Min, sum(r[i] + s[i] for i in 1:N))
+	scale = (N / 1000.) ./ sum(abs.(A1), dims=2)
 
-	# bound on top by tolerance
-	@constraint(model, [i in 1:N], r[i] + s[i] <= tol)
+        A1s = scale .* A1
+	A2s = scale .* A2
+	bs = scale .* b
 
-	# Ax = b
-	@constraint(model, [i in 1:length(b)], sum(A1[i,j] * r[j] + A2[i,j] * s[j]
-		                          for j in 1:N) == b[i])
+	model = Model(Tulip.Optimizer)
+	set_optimizer_attribute(model, "OutputLevel", 1)  # 0=disable output (default), 1=show iterations
+	set_optimizer_attribute(model, "IPM_IterationsLimit", 100)  # default 100 seems to be enough
 
+	# r and s must each fall between 0 and the tolerance
+	@variable(model, 0 <= r[1:N] <= tol)
+	@variable(model, 0 <= s[1:N] <= tol)
+
+	@objective(model, Min, sum(r[i] + s[i] for i in 1:N))
+
+	# Ax = b  - use the scaled matrices and vector
+	@constraint(model, [i in 1:length(bs)], sum(A1s[i,j] * r[j] + A2s[i,j] * s[j]
+		                          for j in 1:N) == bs[i])
 
 	optimize!(model)
+
 	println("Termination status: ", termination_status(model))
-	println("Objective: ", objective_value(model))
-	println("\nSolver used was: ", solver_name(model), "\n")
+	@printf "Objective = %.4f\n" objective_value(model)
 
 	r_vec = value.(r)
 	s_vec = value.(s)
@@ -53,14 +54,32 @@ function Solve_func(year, tol)
 
 	println("\n")
 
-end
+	# quick checks on results
+
+	# Did we satisfy constraints?
+	rs = r_vec - s_vec
+	b_calc = sum(rs' .* A1, dims=2)
+	check = vec(b_calc) ./ b
+
+	q = (0, .1, .25, .5, .75, .9, 1)
+	println("Quantiles used below: ", q)
 
+	println("\nQuantiles of ratio of calculated targets to intended targets: ")
+	println(quantile!(check, q))
+
+	# Are the ratios of new weights to old weights in bounds (within tolerances)?
+	x = 1.0 .+ r_vec - s_vec  # note the .+
+	println("\nQuantiles of ratio of new weight to initial weight: ")
+	println(quantile!(x, q))
+
+end
 
 
 year_list = [x for x in 2012:2030]
-tol_list = [0.40, 0.38, 0.35, 0.33, 0.30, 0.45, 0.45,
-			0.45, 0.45, 0.45, 0.45, 0.45, 0.45, 0.45,
-			0.45, 0.45, 0.45, 0.45, 0.45]
+tol_list = [0.40, 0.38, 0.35, 0.33, 0.30,
+ 			0.45, 0.45, 0.45, 0.45, 0.45,
+			0.45, 0.45, 0.45, 0.45,	0.45,
+			0.45, 0.45, 0.45, 0.45]
 
 # Run solver function for all years and tolerances (in order)
 for i in zip(year_list, tol_list)

diff --git a/puf_stage3/puf_ratios.csv b/puf_stage3/puf_ratios.csv
@@ -1,21 +1,21 @@
 agi_bin,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
 INT2011,1.0259,0.5597,0.9448,0.9681,0.9728,0.9464,0.8390,0.8997,0.9713,0.9239,0.9342,0.9413,0.9497,0.9510,0.9693,0.9702,0.9569,1.0123,1.7014
-INT2012,0.7775,0.9454,0.8433,0.8158,0.7151,0.8094,0.7962,0.7931,0.8480,0.9069,0.9137,0.9169,1.0346,1.0436,1.0129,1.1436,1.0985,1.2358,1.3450
-INT2013,1.1323,0.7667,0.7821,0.7777,0.8937,0.8712,0.9556,0.9062,0.8346,0.8594,0.9739,1.0302,1.0488,1.0501,1.0890,0.9632,1.0451,0.9671,1.1390
-INT2014,0.9111,0.8672,0.8492,0.7748,0.8437,0.8796,0.9731,0.8831,0.8364,1.0106,0.8445,1.0516,1.0274,0.9292,1.0611,1.1524,1.0613,1.0733,1.0909
-INT2015,0.9812,0.9511,0.9321,0.9456,0.9535,0.9255,0.9365,0.9307,0.9464,0.9697,0.9888,1.0162,0.9974,0.9906,1.0240,0.9827,1.0220,1.1450,1.1673
-INT2016,0.9850,1.0086,1.0456,1.0315,1.0448,1.0332,1.0138,1.0056,1.0025,1.0178,0.9484,0.9532,0.9745,1.0132,1.0722,1.0130,1.0812,1.0796,0.9902
-INT2017,0.9920,0.9161,0.8971,0.8912,0.8948,0.9130,0.9175,0.9379,0.9610,0.9767,1.0163,1.0690,1.0326,0.9887,0.9393,0.9756,0.9563,1.0538,1.1932
-INT2018,0.9988,0.9757,0.9678,0.9657,0.9753,0.9719,0.9760,0.9775,0.9747,0.9992,0.9927,1.0042,1.0130,1.0068,1.0187,0.9898,1.0192,1.0862,1.0026
-INT2019,0.9963,0.9766,0.9767,0.9928,0.9782,0.9818,0.9775,0.9860,0.9825,0.9937,0.9910,1.0106,1.0036,1.0066,0.9995,1.0011,1.0255,1.0309,1.0190
-INT2020,1.0046,0.9803,0.9750,0.9753,0.9860,0.9891,0.9834,0.9847,0.9921,0.9968,0.9971,1.0082,1.0056,1.0049,1.0071,0.9993,1.0141,1.0093,1.0097
-INT2021,0.9958,0.9800,0.9761,0.9783,0.9805,0.9849,0.9894,0.9853,0.9937,0.9970,1.0049,1.0005,1.0052,1.0115,1.0045,1.0033,1.0287,1.0093,1.0040
-INT2022,1.0051,0.9810,0.9811,0.9778,0.9748,0.9823,0.9827,0.9904,0.9925,1.0059,1.0013,1.0030,1.0038,1.0070,1.0046,1.0062,1.0126,0.9978,1.0125
-INT2023,1.0002,0.9816,0.9773,0.9792,0.9879,0.9886,0.9863,1.0144,0.9958,0.9948,1.0026,1.0021,1.0025,1.0049,1.0013,1.0087,1.0237,1.0039,0.9928
-INT2024,1.0001,0.9846,0.9811,0.9782,0.9709,0.9790,0.9894,1.0085,0.9891,0.9979,0.9986,0.9989,1.0048,1.0079,1.0111,1.0082,1.0236,1.0129,1.0079
-INT2025,1.0089,0.9930,0.9766,0.9769,0.9849,0.9824,0.9980,0.9963,0.9944,1.0019,0.9990,0.9957,1.0058,1.0080,1.0131,1.0152,1.0234,0.9943,0.9916
-INT2026,1.0099,0.9841,0.9805,0.9799,0.9857,0.9894,0.9987,0.9944,0.9946,0.9936,0.9917,0.9943,1.0057,1.0143,1.0376,1.0247,1.0210,0.9983,0.9936
-INT2027,1.0104,0.9895,0.9828,0.9791,0.9895,0.9922,0.9856,0.9913,0.9927,0.9932,0.9991,0.9917,1.0081,1.0188,1.0194,1.0277,1.0244,0.9935,0.9947
-INT2028,1.0066,0.9973,0.9750,0.9846,0.9869,0.9868,1.0183,1.0002,0.9901,0.9958,0.9908,0.9919,1.0060,1.0172,1.0240,1.0230,1.0234,0.9959,0.9917
-INT2029,1.0122,0.9941,0.9809,0.9800,0.9824,0.9847,0.9951,0.9852,0.9938,0.9924,0.9953,0.9920,1.0043,1.0288,1.0279,1.0488,1.0228,1.0005,0.9924
-INT2030,1.0292,1.0010,0.9819,0.9844,0.9818,0.9930,0.9820,0.9801,0.9845,0.9904,0.9834,0.9922,1.0052,1.0358,1.0396,1.0429,1.0298,0.9958,0.9922
+INT2012,0.7778,0.9454,0.8433,0.8157,0.7153,0.8101,0.7961,0.7940,0.8481,0.9063,0.9130,0.9182,1.0343,1.0435,1.0129,1.1436,1.0975,1.2339,1.3445
+INT2013,1.1325,0.7670,0.7821,0.7778,0.8935,0.8699,0.9558,0.9045,0.8342,0.8596,0.9746,1.0271,1.0498,1.0505,1.0891,0.9632,1.0448,0.9690,1.1441
+INT2014,0.9106,0.8669,0.8492,0.7737,0.8431,0.8802,0.9729,0.8840,0.8368,1.0108,0.8443,1.0534,1.0274,0.9291,1.0609,1.1524,1.0626,1.0728,1.0862
+INT2015,0.9813,0.9511,0.9323,0.9470,0.9543,0.9246,0.9367,0.9315,0.9463,0.9699,0.9891,1.0163,0.9966,0.9906,1.0241,0.9827,1.0221,1.1437,1.1672
+INT2016,0.9843,1.0086,1.0453,1.0314,1.0447,1.0342,1.0116,1.0025,1.0025,1.0175,0.9484,0.9531,0.9752,1.0131,1.0723,1.0130,1.0804,1.0810,0.9935
+INT2017,0.9929,0.9161,0.8978,0.8911,0.8936,0.9131,0.9193,0.9401,0.9611,0.9766,1.0158,1.0694,1.0315,0.9881,0.9403,0.9756,0.9565,1.0514,1.1945
+INT2018,0.9993,0.9758,0.9681,0.9665,0.9766,0.9721,0.9766,0.9770,0.9751,0.9991,0.9931,1.0037,1.0140,1.0066,1.0183,0.9897,1.0193,1.0882,0.9982
+INT2019,0.9980,0.9765,0.9762,0.9919,0.9792,0.9812,0.9770,0.9863,0.9817,0.9947,0.9912,1.0110,1.0024,1.0074,0.9980,1.0006,1.0246,1.0290,1.0196
+INT2020,1.0019,0.9803,0.9743,0.9754,0.9840,0.9864,0.9831,0.9832,0.9931,0.9966,0.9961,1.0100,1.0059,1.0053,1.0076,0.9998,1.0137,1.0108,1.0110
+INT2021,0.9955,0.9801,0.9764,0.9784,0.9829,0.9883,0.9897,0.9869,0.9922,0.9966,1.0065,0.9979,1.0064,1.0113,1.0046,1.0033,1.0297,1.0091,1.0024
+INT2022,1.0062,0.9808,0.9811,0.9780,0.9734,0.9820,0.9829,0.9905,0.9935,1.0054,1.0020,1.0042,1.0030,1.0050,1.0046,1.0063,1.0131,0.9980,1.0111
+INT2023,0.9986,0.9817,0.9773,0.9788,0.9894,0.9889,0.9858,1.0148,0.9943,0.9951,1.0021,1.0026,1.0029,1.0064,1.0005,1.0085,1.0218,1.0031,0.9942
+INT2024,1.0010,0.9843,0.9811,0.9788,0.9695,0.9788,0.9896,1.0154,0.9902,0.9971,0.9982,0.9975,1.0039,1.0080,1.0115,1.0085,1.0235,1.0136,1.0079
+INT2025,1.0090,0.9935,0.9766,0.9773,0.9851,0.9824,1.0010,0.9910,0.9948,1.0028,0.9996,0.9955,1.0063,1.0074,1.0128,1.0135,1.0242,0.9942,0.9915
+INT2026,1.0098,0.9839,0.9811,0.9792,0.9866,0.9897,0.9952,0.9932,0.9948,0.9942,0.9894,0.9948,1.0066,1.0147,1.0383,1.0261,1.0210,0.9985,0.9936
+INT2027,1.0105,0.9894,0.9829,0.9790,0.9893,0.9917,0.9862,0.9903,0.9927,0.9931,1.0005,0.9919,1.0071,1.0188,1.0188,1.0278,1.0252,0.9940,0.9946
+INT2028,1.0064,0.9973,0.9730,0.9848,0.9872,0.9868,1.0216,1.0003,0.9897,0.9956,0.9913,0.9918,1.0060,1.0168,1.0237,1.0224,1.0231,0.9958,0.9918
+INT2029,1.0129,0.9941,0.9822,0.9798,0.9815,0.9847,0.9922,0.9852,0.9928,0.9919,0.9956,0.9916,1.0049,1.0295,1.0289,1.0500,1.0227,1.0009,0.9925
+INT2030,1.0287,1.0011,0.9819,0.9846,0.9817,0.9930,0.9818,0.9787,0.9853,0.9912,0.9831,0.9924,1.0050,1.0358,1.0395,1.0421,1.0303,0.9955,0.9921