From 7b9d83f6631cd4268628b105208245c1292c55ee Mon Sep 17 00:00:00 2001 From: Sebastian Pokutta <23001135+pokutta@users.noreply.github.com> Date: Thu, 14 Jan 2021 22:12:43 +0100 Subject: [PATCH 1/6] minor improving of the benchmark script and larger instance test in benchmarking --- src/FrankWolfe.jl | 12 ++++++------ test/benchmark.jl | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/FrankWolfe.jl b/src/FrankWolfe.jl index 2778c7dc2..6cb52f6cc 100644 --- a/src/FrankWolfe.jl +++ b/src/FrankWolfe.jl @@ -24,19 +24,19 @@ function benchmarkOracles(f,grad,lmo,n;k=100,T=Float64) sv = n*sizeof(T)/1024/1024 println("\nSize of single vector ($T): $sv MB\n") to = TimerOutput() - @showprogress 1 "Testing f..." for i in 1:k + @showprogress 1 "Testing f... " for i in 1:k x = rand(n) @timeit to "f" temp = f(x) end - @showprogress 1 "Testing grad..." for i in 1:k + @showprogress 1 "Testing grad... " for i in 1:k x = rand(n) @timeit to "grad" temp = grad(x) end - @showprogress 1 "Testing lmo..." for i in 1:k + @showprogress 1 "Testing lmo... " for i in 1:k x = rand(n) @timeit to "lmo" temp = compute_extreme_point(lmo, x) end - @showprogress 1 "Testing dual gap..." for i in 1:k + @showprogress 1 "Testing dual gap... " for i in 1:k @timeit to "dual gap" begin x = rand(n) gradient = grad(x) @@ -44,14 +44,14 @@ function benchmarkOracles(f,grad,lmo,n;k=100,T=Float64) dualGap = dot(x, gradient) - dot(v, gradient) end end - @showprogress 1 "Testing update... (emph: blas)" for i in 1:k + @showprogress 1 "Testing update... (emph: blas) " for i in 1:k x = rand(n) gradient = grad(x) v = compute_extreme_point(lmo, gradient) gamma = 1/2 @timeit to "update (blas)" @emphasis(blas, x = (1-gamma) * x + gamma * v) end - @showprogress 1 "Testing update... (emph: memory)" for i in 1:k + @showprogress 1 "Testing update... (emph: memory) " for i in 1:k x = rand(n) gradient = grad(x) v = compute_extreme_point(lmo, gradient) diff --git a/test/benchmark.jl b/test/benchmark.jl index a9d6cfe11..473d82761 100644 --- a/test/benchmark.jl +++ b/test/benchmark.jl @@ -2,7 +2,7 @@ import FrankWolfe import LinearAlgebra -n = Int(1e5); +n = Int(1e7); xpi = rand(n); total = sum(xpi); From b9769bfb8546746715f3a75122fe383273225986 Mon Sep 17 00:00:00 2001 From: Sebastian Pokutta <23001135+pokutta@users.noreply.github.com> Date: Thu, 14 Jan 2021 23:01:35 +0100 Subject: [PATCH 2/6] - benchmarking performance via broadcasting --- src/FrankWolfe.jl | 8 ++++---- test/benchmark.jl | 9 +++++---- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/FrankWolfe.jl b/src/FrankWolfe.jl index 6cb52f6cc..813830e48 100644 --- a/src/FrankWolfe.jl +++ b/src/FrankWolfe.jl @@ -37,10 +37,10 @@ function benchmarkOracles(f,grad,lmo,n;k=100,T=Float64) @timeit to "lmo" temp = compute_extreme_point(lmo, x) end @showprogress 1 "Testing dual gap... " for i in 1:k - @timeit to "dual gap" begin - x = rand(n) - gradient = grad(x) - v = compute_extreme_point(lmo, gradient) + x = rand(n) + gradient = grad(x) + v = compute_extreme_point(lmo, gradient) + @timeit to "dual gap" begin dualGap = dot(x, gradient) - dot(v, gradient) end end diff --git a/test/benchmark.jl b/test/benchmark.jl index 473d82761..1d6ab63f8 100644 --- a/test/benchmark.jl +++ b/test/benchmark.jl @@ -2,7 +2,7 @@ import FrankWolfe import LinearAlgebra -n = Int(1e7); +n = Int(1e6); xpi = rand(n); total = sum(xpi); @@ -13,11 +13,11 @@ const grad(x) = 2 * (x-xp) function cf(x,xp) - return LinearAlgebra.norm(x-xp)^2 + return @. LinearAlgebra.norm(x-xp)^2 end -function cgrad(x,xp) - return 2 * (x-xp) +function cgrad(x,xp) + return @. 2 * (x-xp) end lmo_prob = FrankWolfe.ProbabilitySimplexOracle(1); @@ -26,3 +26,4 @@ x0 = FrankWolfe.compute_extreme_point(lmo_prob, zeros(n)); FrankWolfe.benchmarkOracles(f,grad,lmo_prob,n;k=100,T=Float64) FrankWolfe.benchmarkOracles(x -> cf(x,xp),x -> cgrad(x,xp),lmo_prob,n;k=100,T=Float64) + From a76f65695e36c22876d7087b02f3b967973fc99a Mon Sep 17 00:00:00 2001 From: Sebastian Pokutta <23001135+pokutta@users.noreply.github.com> Date: Fri, 15 Jan 2021 00:48:15 +0100 Subject: [PATCH 3/6] added dual prices and reduced costs for probability simplex and unit simplex --- src/simplex_oracles.jl | 44 ++++++++++++++++++++++----------------- test/lmo.jl | 10 +++++++++ test/runtests.jl | 47 +++++++++++++++++++++--------------------- 3 files changed, 59 insertions(+), 42 deletions(-) diff --git a/src/simplex_oracles.jl b/src/simplex_oracles.jl index 98c5beb53..48dc6a358 100644 --- a/src/simplex_oracles.jl +++ b/src/simplex_oracles.jl @@ -26,16 +26,22 @@ function compute_extreme_point(lmo::UnitSimplexOracle{T}, direction) where {T} return MaybeHotVector(zero(T), idx, length(direction)) end -function unitSimplexLMO(grad;r=1) - n = length(grad) - v = zeros(n) - aux = argmin(grad) - if grad[aux] < 0.0 - v[aux] = 1.0 - end - return v*r +""" +Dual costs for a given primal solution to form a primal dual pair +for scaled probability simplex. +Returns two vectors. The first one is the dual costs associated with the constraints +and the second is the reduced costs for the variables. +""" +function compute_dual_solution(lmo::UnitSimplexOracle{T}, direction, primalSolution) where {T} + idx = argmax(primalSolution) + critical = min(direction[idx],0) + lambda = [ critical ] + mu = direction .- lambda + return lambda, mu end + + """ ProbabilitySimplexOracle(right_side) @@ -60,15 +66,15 @@ function compute_extreme_point(lmo::ProbabilitySimplexOracle{T}, direction) wher return MaybeHotVector(lmo.right_side, idx, length(direction)) end - -# simple probabilitySimplexLMO -# TODO: -# - not optimized - -function probabilitySimplexLMO(grad;r=1) - n = length(grad) - v = zeros(n) - aux = argmin(grad) - v[aux] = 1.0 - return v*r +""" +Dual costs for a given primal solution to form a primal dual pair +for scaled probability simplex. +Returns two vectors. The first one is the dual costs associated with the constraints +and the second is the reduced costs for the variables. +""" +function compute_dual_solution(lmo::ProbabilitySimplexOracle{T}, direction, primalSolution) where {T} + idx = argmax(primalSolution) + lambda = [ direction[idx] ] + mu = direction .- lambda + return lambda, mu end diff --git a/test/lmo.jl b/test/lmo.jl index c475cd41a..5988c0880 100644 --- a/test/lmo.jl +++ b/test/lmo.jl @@ -40,6 +40,11 @@ end @test res_point_prob[j] == res_point_unit[j] == 0 end end + # computing dual solutions and testing complementarity + dual, redC = FrankWolfe.compute_dual_solution(lmo_prob, direction, res_point_prob) + @test sum((redC .* res_point_prob )) + (dual[1] * (rhs - sum(res_point_prob))) == 0 + dual, redC = FrankWolfe.compute_dual_solution(lmo_unit, direction, res_point_unit) + @test sum((redC .* res_point_unit )) + (dual[1] * (rhs - sum(res_point_unit))) == 0 end @testset "Choosing least-degrading direction" for idx in 1:n # all directions worsening, must pick idx @@ -55,6 +60,11 @@ end @test res_point_prob[j] == 0 end end + # computing dual solutions and testing complementarity + dual, redC = FrankWolfe.compute_dual_solution(lmo_prob, direction, res_point_prob) + @test sum((redC .* res_point_prob )) + (dual[1] * (rhs - sum(res_point_prob))) == 0 + dual, redC = FrankWolfe.compute_dual_solution(lmo_unit, direction, res_point_unit) + @test sum((redC .* res_point_unit )) + (dual[1] * (rhs - sum(res_point_unit))) == 0 end end diff --git a/test/runtests.jl b/test/runtests.jl index 30a68f58d..66d7c124d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -45,32 +45,33 @@ end xp = xpi ./ total; f(x) = LinearAlgebra.norm(x-xp)^2 grad(x) = 2 * (x-xp) + @testset "Using sparse structure" begin + lmo_prob = FrankWolfe.ProbabilitySimplexOracle(1.0); + x0 = FrankWolfe.compute_extreme_point(lmo_prob, zeros(n)); - lmo_prob = FrankWolfe.ProbabilitySimplexOracle(1.0); - x0 = FrankWolfe.compute_extreme_point(lmo_prob, zeros(n)); + x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo_prob,x0,maxIt=k, + stepSize=FrankWolfe.backtracking,printIt=k/10,verbose=true,emph=FrankWolfe.blas); + + @test x !== nothing - x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo_prob,x0,maxIt=k, - stepSize=FrankWolfe.backtracking,printIt=k/10,verbose=true,emph=FrankWolfe.blas); - - @test x !== nothing + x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo_prob,x0,maxIt=k, + stepSize=FrankWolfe.backtracking,printIt=k/10,verbose=true,emph=FrankWolfe.memory); + + @test x !== nothing + end + @testset "Using dense structure" begin + lmo_prob = FrankWolfe.L1ballDense{Float64}(1); + x0 = FrankWolfe.compute_extreme_point(lmo_prob, zeros(n)); - x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo_prob,x0,maxIt=k, - stepSize=FrankWolfe.backtracking,printIt=k/10,verbose=true,emph=FrankWolfe.memory); - - @test x !== nothing - - lmo_prob = FrankWolfe.L1ballDense{Float64}(1); - x0 = FrankWolfe.compute_extreme_point(lmo_prob, zeros(n)); - - x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo_prob,x0,maxIt=k, - stepSize=FrankWolfe.backtracking,printIt=k/10,verbose=true,emph=FrankWolfe.blas); - - @test x !== nothing - - x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo_prob,x0,maxIt=k, - stepSize=FrankWolfe.backtracking,printIt=k/10,verbose=true,emph=FrankWolfe.memory); - - @test x !== nothing + x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo_prob,x0,maxIt=k, + stepSize=FrankWolfe.backtracking,printIt=k/10,verbose=true,emph=FrankWolfe.blas); + + @test x !== nothing + x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo_prob,x0,maxIt=k, + stepSize=FrankWolfe.backtracking,printIt=k/10,verbose=true,emph=FrankWolfe.memory); + + @test x !== nothing + end end end From e30ae444881d7c7be0e77c827c98c747a1c5bd46 Mon Sep 17 00:00:00 2001 From: Sebastian Pokutta <23001135+pokutta@users.noreply.github.com> Date: Fri, 15 Jan 2021 00:52:03 +0100 Subject: [PATCH 4/6] Update src/simplex_oracles.jl --- src/simplex_oracles.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/simplex_oracles.jl b/src/simplex_oracles.jl index 48dc6a358..8acc926cd 100644 --- a/src/simplex_oracles.jl +++ b/src/simplex_oracles.jl @@ -28,7 +28,7 @@ end """ Dual costs for a given primal solution to form a primal dual pair -for scaled probability simplex. +for scaled unit simplex. Returns two vectors. The first one is the dual costs associated with the constraints and the second is the reduced costs for the variables. """ From f99d1727c4f51d9e6495610e669206e890f3ee73 Mon Sep 17 00:00:00 2001 From: Sebastian Pokutta <23001135+pokutta@users.noreply.github.com> Date: Fri, 15 Jan 2021 01:19:22 +0100 Subject: [PATCH 5/6] added a comment to resolve missing types in benchmarkOracles --- src/FrankWolfe.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/FrankWolfe.jl b/src/FrankWolfe.jl index 0319b2001..a28959121 100644 --- a/src/FrankWolfe.jl +++ b/src/FrankWolfe.jl @@ -20,6 +20,8 @@ include("utils.jl") # critical components ############################################################## +# TODO: add actual use of T for the rand(n) + function benchmarkOracles(f,grad,lmo,n;k=100,T=Float64) sv = n*sizeof(T)/1024/1024 println("\nSize of single vector ($T): $sv MB\n") From 310a1a6e30023823ad5ddd697d63f42c10d86ebb Mon Sep 17 00:00:00 2001 From: Sebastian Pokutta <23001135+pokutta@users.noreply.github.com> Date: Fri, 15 Jan 2021 10:58:18 +0100 Subject: [PATCH 6/6] rational "support" and multiprecision test --- src/FrankWolfe.jl | 20 +++++++++++----- src/defs.jl | 2 +- test/runtests.jl | 60 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+), 7 deletions(-) diff --git a/src/FrankWolfe.jl b/src/FrankWolfe.jl index a28959121..9c7f533a9 100644 --- a/src/FrankWolfe.jl +++ b/src/FrankWolfe.jl @@ -85,7 +85,7 @@ end # Vanilla FW ############################################################## -function fw(f, grad, lmo, x0; stepSize::LSMethod = agnostic, L = Inf, stepLim=20, +function fw(f, grad, lmo, x0; stepSize::LSMethod = agnostic, L = Inf, gamma0 = 0, stepLim=20, epsilon=1e-7, maxIt=10000, printIt=1000, trajectory=false, verbose=false,lsTol=1e-7,emph::Emph = blas) function headerPrint(data) @printf("\n───────────────────────────────────────────────────────────────────────────────────\n") @@ -114,9 +114,14 @@ function fw(f, grad, lmo, x0; stepSize::LSMethod = agnostic, L = Inf, stepLim=20 println("WARNING: Lipschitz constant not set. Prepare to blow up spectacularly.") end + if stepSize === fixed && gamma0 == 0 + println("WARNING: gamma0 not set. We are not going to move a single bit.") + end + if verbose println("\nVanilla Frank-Wolfe Algorithm.") - println("EMPHASIS: $emph STEPSIZE: $stepSize EPSILON: $epsilon MAXIT: $maxIt") + numType = eltype(x0) + println("EMPHASIS: $emph STEPSIZE: $stepSize EPSILON: $epsilon MAXIT: $maxIt TYPE: $numType") headers = ["Type", "Iteration", "Primal", "Dual", "Dual Gap","Time"] headerPrint(headers) end @@ -140,7 +145,7 @@ function fw(f, grad, lmo, x0; stepSize::LSMethod = agnostic, L = Inf, stepLim=20 end if stepSize === agnostic - gamma = 2/(2+t) + gamma = 2 // (2+t) elseif stepSize === goldenratio nothing, gamma = segmentSearch(f,grad,x,v,lsTol=lsTol) elseif stepSize === backtracking @@ -149,9 +154,11 @@ function fw(f, grad, lmo, x0; stepSize::LSMethod = agnostic, L = Inf, stepLim=20 gamma = 1 / sqrt(t+1) elseif stepSize === shortstep gamma = dualGap / (L * norm(x-v)^2 ) + elseif stepSize === fixed + gamma = gamma0 end - @emphasis(emph, x = (1-gamma) * x + gamma * v) + @emphasis(emph, x = (1 - gamma) * x + gamma * v) if mod(t,printIt) == 0 && verbose tt = "FW" @@ -217,7 +224,8 @@ end if verbose println("\nLazified Conditional Gradients (Frank-Wolfe + Lazification).") - println("EMPHASIS: $emph STEPSIZE: $stepSize EPSILON: $epsilon MAXIT: $maxIt PHIFACTOR: $phiFactor") + numType = eltype(x0) + println("EMPHASIS: $emph STEPSIZE: $stepSize EPSILON: $epsilon MAXIT: $maxIt PHIFACTOR: $phiFactor TYPE: $numType") headers = ["Type", "Iteration", "Primal", "Dual", "Dual Gap","Time", "Cache Size"] headerPrint(headers) end @@ -268,7 +276,7 @@ while t <= maxIt && dualGap >= max(epsilon,eps()) gamma = dualGap / (L * norm(x-v)^2 ) end - @emphasis(emph, x = (1.0 -gamma) * x + gamma * v) + @emphasis(emph, x = (1 - gamma) * x + gamma * v) if mod(t,printIt) == 0 && verbose tt = "FW" diff --git a/src/defs.jl b/src/defs.jl index 609d70e67..25d8b21af 100644 --- a/src/defs.jl +++ b/src/defs.jl @@ -1,2 +1,2 @@ -@enum LSMethod agnostic=1 backtracking=2 goldenratio=3 nonconvex=4 shortstep=5 +@enum LSMethod agnostic=1 backtracking=2 goldenratio=3 nonconvex=4 shortstep=5 fixed=6 @enum Emph blas=1 memory=2 diff --git a/test/runtests.jl b/test/runtests.jl index 66d7c124d..8170b6f5f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -74,4 +74,64 @@ end @test x !== nothing end end + @testset "Testing rational variant" begin + rhs = 1 + n = 100 + k = 1000 + + xpi = rand(n); + total = sum(xpi); + xp = xpi ./ total; + + f(x) = LinearAlgebra.norm(x-xp)^2 + grad(x) = 2* (x-xp); + + lmo = FrankWolfe.ProbabilitySimplexOracle{Rational{BigInt}}(rhs); + direction = rand(n) + x0 = FrankWolfe.compute_extreme_point(lmo, direction); + + @time x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo,x0,maxIt=k, + stepSize=FrankWolfe.agnostic,printIt=k/10,emph=FrankWolfe.blas,verbose=true); + + @test eltype(x0) == Rational{BigInt} + + @time x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo,x0,maxIt=k, + stepSize=FrankWolfe.agnostic,printIt=k/10,emph=FrankWolfe.memory,verbose=true); + @test eltype(x0) == Rational{BigInt} + + end + @testset "Multi-precision tests" begin + rhs = 1 + n = 100 + k = 1000 + + xp = zeros(n) + + L = 2 + bound = 2 * L * 2 / (k + 2) + + f(x) = LinearAlgebra.norm(x-xp)^2 + grad(x) = 2* (x-xp); + testTypes = [Float16, Float32, Float64, BigFloat, Rational{BigInt}] + + @testset "Multi-precision test for $T" for T in testTypes + println("\nTesting precision for type: ", T) + lmo = FrankWolfe.ProbabilitySimplexOracle{T}(rhs); + direction = rand(n) + x0 = FrankWolfe.compute_extreme_point(lmo, direction); + + @time x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo,x0,maxIt=k, + stepSize=FrankWolfe.agnostic,printIt=k/10,emph=FrankWolfe.blas,verbose=true); + + @test eltype(x0) == T + @test primal - 1//n <= bound + + @time x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo,x0,maxIt=k, + stepSize=FrankWolfe.agnostic,printIt=k/10,emph=FrankWolfe.memory,verbose=true); + + @test eltype(x0) == T + @test primal - 1//n <= bound + end + end + end