diff --git a/src/FrankWolfe.jl b/src/FrankWolfe.jl index 4808d6433..9c7f533a9 100644 --- a/src/FrankWolfe.jl +++ b/src/FrankWolfe.jl @@ -20,38 +20,40 @@ include("utils.jl") # critical components ############################################################## +# TODO: add actual use of T for the rand(n) + function benchmarkOracles(f,grad,lmo,n;k=100,T=Float64) sv = n*sizeof(T)/1024/1024 println("\nSize of single vector ($T): $sv MB\n") to = TimerOutput() - @showprogress 1 "Testing f..." for i in 1:k + @showprogress 1 "Testing f... " for i in 1:k x = rand(n) @timeit to "f" temp = f(x) end - @showprogress 1 "Testing grad..." for i in 1:k + @showprogress 1 "Testing grad... " for i in 1:k x = rand(n) @timeit to "grad" temp = grad(x) end - @showprogress 1 "Testing lmo..." for i in 1:k + @showprogress 1 "Testing lmo... " for i in 1:k x = rand(n) @timeit to "lmo" temp = compute_extreme_point(lmo, x) end - @showprogress 1 "Testing dual gap..." for i in 1:k - @timeit to "dual gap" begin - x = rand(n) - gradient = grad(x) - v = compute_extreme_point(lmo, gradient) + @showprogress 1 "Testing dual gap... " for i in 1:k + x = rand(n) + gradient = grad(x) + v = compute_extreme_point(lmo, gradient) + @timeit to "dual gap" begin dualGap = dot(x, gradient) - dot(v, gradient) end end - @showprogress 1 "Testing update... (emph: blas)" for i in 1:k + @showprogress 1 "Testing update... (emph: blas) " for i in 1:k x = rand(n) gradient = grad(x) v = compute_extreme_point(lmo, gradient) gamma = 1/2 @timeit to "update (blas)" @emphasis(blas, x = (1-gamma) * x + gamma * v) end - @showprogress 1 "Testing update... (emph: memory)" for i in 1:k + @showprogress 1 "Testing update... (emph: memory) " for i in 1:k x = rand(n) gradient = grad(x) v = compute_extreme_point(lmo, gradient) @@ -83,7 +85,7 @@ end # Vanilla FW ############################################################## -function fw(f, grad, lmo, x0; stepSize::LSMethod = agnostic, L = Inf, stepLim=20, +function fw(f, grad, lmo, x0; stepSize::LSMethod = agnostic, L = Inf, gamma0 = 0, stepLim=20, epsilon=1e-7, maxIt=10000, printIt=1000, trajectory=false, verbose=false,lsTol=1e-7,emph::Emph = blas) function headerPrint(data) @printf("\n───────────────────────────────────────────────────────────────────────────────────\n") @@ -112,9 +114,14 @@ function fw(f, grad, lmo, x0; stepSize::LSMethod = agnostic, L = Inf, stepLim=20 println("WARNING: Lipschitz constant not set. Prepare to blow up spectacularly.") end + if stepSize === fixed && gamma0 == 0 + println("WARNING: gamma0 not set. We are not going to move a single bit.") + end + if verbose println("\nVanilla Frank-Wolfe Algorithm.") - println("EMPHASIS: $emph STEPSIZE: $stepSize EPSILON: $epsilon MAXIT: $maxIt") + numType = eltype(x0) + println("EMPHASIS: $emph STEPSIZE: $stepSize EPSILON: $epsilon MAXIT: $maxIt TYPE: $numType") headers = ["Type", "Iteration", "Primal", "Dual", "Dual Gap","Time"] headerPrint(headers) end @@ -138,7 +145,7 @@ function fw(f, grad, lmo, x0; stepSize::LSMethod = agnostic, L = Inf, stepLim=20 end if stepSize === agnostic - gamma = 2/(2+t) + gamma = 2 // (2+t) elseif stepSize === goldenratio nothing, gamma = segmentSearch(f,grad,x,v,lsTol=lsTol) elseif stepSize === backtracking @@ -147,9 +154,11 @@ function fw(f, grad, lmo, x0; stepSize::LSMethod = agnostic, L = Inf, stepLim=20 gamma = 1 / sqrt(t+1) elseif stepSize === shortstep gamma = dualGap / (L * norm(x-v)^2 ) + elseif stepSize === fixed + gamma = gamma0 end - @emphasis(emph, x = (1-gamma) * x + gamma * v) + @emphasis(emph, x = (1 - gamma) * x + gamma * v) if mod(t,printIt) == 0 && verbose tt = "FW" @@ -215,7 +224,8 @@ end if verbose println("\nLazified Conditional Gradients (Frank-Wolfe + Lazification).") - println("EMPHASIS: $emph STEPSIZE: $stepSize EPSILON: $epsilon MAXIT: $maxIt PHIFACTOR: $phiFactor") + numType = eltype(x0) + println("EMPHASIS: $emph STEPSIZE: $stepSize EPSILON: $epsilon MAXIT: $maxIt PHIFACTOR: $phiFactor TYPE: $numType") headers = ["Type", "Iteration", "Primal", "Dual", "Dual Gap","Time", "Cache Size"] headerPrint(headers) end @@ -266,7 +276,7 @@ while t <= maxIt && dualGap >= max(epsilon,eps()) gamma = dualGap / (L * norm(x-v)^2 ) end - @emphasis(emph, x = (1.0 -gamma) * x + gamma * v) + @emphasis(emph, x = (1 - gamma) * x + gamma * v) if mod(t,printIt) == 0 && verbose tt = "FW" diff --git a/src/defs.jl b/src/defs.jl index 609d70e67..25d8b21af 100644 --- a/src/defs.jl +++ b/src/defs.jl @@ -1,2 +1,2 @@ -@enum LSMethod agnostic=1 backtracking=2 goldenratio=3 nonconvex=4 shortstep=5 +@enum LSMethod agnostic=1 backtracking=2 goldenratio=3 nonconvex=4 shortstep=5 fixed=6 @enum Emph blas=1 memory=2 diff --git a/src/simplex_oracles.jl b/src/simplex_oracles.jl index 98c5beb53..8acc926cd 100644 --- a/src/simplex_oracles.jl +++ b/src/simplex_oracles.jl @@ -26,16 +26,22 @@ function compute_extreme_point(lmo::UnitSimplexOracle{T}, direction) where {T} return MaybeHotVector(zero(T), idx, length(direction)) end -function unitSimplexLMO(grad;r=1) - n = length(grad) - v = zeros(n) - aux = argmin(grad) - if grad[aux] < 0.0 - v[aux] = 1.0 - end - return v*r +""" +Dual costs for a given primal solution to form a primal dual pair +for scaled unit simplex. +Returns two vectors. The first one is the dual costs associated with the constraints +and the second is the reduced costs for the variables. +""" +function compute_dual_solution(lmo::UnitSimplexOracle{T}, direction, primalSolution) where {T} + idx = argmax(primalSolution) + critical = min(direction[idx],0) + lambda = [ critical ] + mu = direction .- lambda + return lambda, mu end + + """ ProbabilitySimplexOracle(right_side) @@ -60,15 +66,15 @@ function compute_extreme_point(lmo::ProbabilitySimplexOracle{T}, direction) wher return MaybeHotVector(lmo.right_side, idx, length(direction)) end - -# simple probabilitySimplexLMO -# TODO: -# - not optimized - -function probabilitySimplexLMO(grad;r=1) - n = length(grad) - v = zeros(n) - aux = argmin(grad) - v[aux] = 1.0 - return v*r +""" +Dual costs for a given primal solution to form a primal dual pair +for scaled probability simplex. +Returns two vectors. The first one is the dual costs associated with the constraints +and the second is the reduced costs for the variables. +""" +function compute_dual_solution(lmo::ProbabilitySimplexOracle{T}, direction, primalSolution) where {T} + idx = argmax(primalSolution) + lambda = [ direction[idx] ] + mu = direction .- lambda + return lambda, mu end diff --git a/test/benchmark.jl b/test/benchmark.jl index a9d6cfe11..1d6ab63f8 100644 --- a/test/benchmark.jl +++ b/test/benchmark.jl @@ -2,7 +2,7 @@ import FrankWolfe import LinearAlgebra -n = Int(1e5); +n = Int(1e6); xpi = rand(n); total = sum(xpi); @@ -13,11 +13,11 @@ const grad(x) = 2 * (x-xp) function cf(x,xp) - return LinearAlgebra.norm(x-xp)^2 + return @. LinearAlgebra.norm(x-xp)^2 end -function cgrad(x,xp) - return 2 * (x-xp) +function cgrad(x,xp) + return @. 2 * (x-xp) end lmo_prob = FrankWolfe.ProbabilitySimplexOracle(1); @@ -26,3 +26,4 @@ x0 = FrankWolfe.compute_extreme_point(lmo_prob, zeros(n)); FrankWolfe.benchmarkOracles(f,grad,lmo_prob,n;k=100,T=Float64) FrankWolfe.benchmarkOracles(x -> cf(x,xp),x -> cgrad(x,xp),lmo_prob,n;k=100,T=Float64) + diff --git a/test/lmo.jl b/test/lmo.jl index c475cd41a..5988c0880 100644 --- a/test/lmo.jl +++ b/test/lmo.jl @@ -40,6 +40,11 @@ end @test res_point_prob[j] == res_point_unit[j] == 0 end end + # computing dual solutions and testing complementarity + dual, redC = FrankWolfe.compute_dual_solution(lmo_prob, direction, res_point_prob) + @test sum((redC .* res_point_prob )) + (dual[1] * (rhs - sum(res_point_prob))) == 0 + dual, redC = FrankWolfe.compute_dual_solution(lmo_unit, direction, res_point_unit) + @test sum((redC .* res_point_unit )) + (dual[1] * (rhs - sum(res_point_unit))) == 0 end @testset "Choosing least-degrading direction" for idx in 1:n # all directions worsening, must pick idx @@ -55,6 +60,11 @@ end @test res_point_prob[j] == 0 end end + # computing dual solutions and testing complementarity + dual, redC = FrankWolfe.compute_dual_solution(lmo_prob, direction, res_point_prob) + @test sum((redC .* res_point_prob )) + (dual[1] * (rhs - sum(res_point_prob))) == 0 + dual, redC = FrankWolfe.compute_dual_solution(lmo_unit, direction, res_point_unit) + @test sum((redC .* res_point_unit )) + (dual[1] * (rhs - sum(res_point_unit))) == 0 end end diff --git a/test/runtests.jl b/test/runtests.jl index 30a68f58d..8170b6f5f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -45,32 +45,93 @@ end xp = xpi ./ total; f(x) = LinearAlgebra.norm(x-xp)^2 grad(x) = 2 * (x-xp) + @testset "Using sparse structure" begin + lmo_prob = FrankWolfe.ProbabilitySimplexOracle(1.0); + x0 = FrankWolfe.compute_extreme_point(lmo_prob, zeros(n)); - lmo_prob = FrankWolfe.ProbabilitySimplexOracle(1.0); - x0 = FrankWolfe.compute_extreme_point(lmo_prob, zeros(n)); + x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo_prob,x0,maxIt=k, + stepSize=FrankWolfe.backtracking,printIt=k/10,verbose=true,emph=FrankWolfe.blas); + + @test x !== nothing - x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo_prob,x0,maxIt=k, - stepSize=FrankWolfe.backtracking,printIt=k/10,verbose=true,emph=FrankWolfe.blas); - - @test x !== nothing + x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo_prob,x0,maxIt=k, + stepSize=FrankWolfe.backtracking,printIt=k/10,verbose=true,emph=FrankWolfe.memory); + + @test x !== nothing + end + @testset "Using dense structure" begin + lmo_prob = FrankWolfe.L1ballDense{Float64}(1); + x0 = FrankWolfe.compute_extreme_point(lmo_prob, zeros(n)); + + x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo_prob,x0,maxIt=k, + stepSize=FrankWolfe.backtracking,printIt=k/10,verbose=true,emph=FrankWolfe.blas); + + @test x !== nothing - x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo_prob,x0,maxIt=k, - stepSize=FrankWolfe.backtracking,printIt=k/10,verbose=true,emph=FrankWolfe.memory); + x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo_prob,x0,maxIt=k, + stepSize=FrankWolfe.backtracking,printIt=k/10,verbose=true,emph=FrankWolfe.memory); + + @test x !== nothing + end + end + @testset "Testing rational variant" begin + rhs = 1 + n = 100 + k = 1000 + + xpi = rand(n); + total = sum(xpi); + xp = xpi ./ total; + + f(x) = LinearAlgebra.norm(x-xp)^2 + grad(x) = 2* (x-xp); - @test x !== nothing + lmo = FrankWolfe.ProbabilitySimplexOracle{Rational{BigInt}}(rhs); + direction = rand(n) + x0 = FrankWolfe.compute_extreme_point(lmo, direction); + + @time x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo,x0,maxIt=k, + stepSize=FrankWolfe.agnostic,printIt=k/10,emph=FrankWolfe.blas,verbose=true); + + @test eltype(x0) == Rational{BigInt} - lmo_prob = FrankWolfe.L1ballDense{Float64}(1); - x0 = FrankWolfe.compute_extreme_point(lmo_prob, zeros(n)); + @time x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo,x0,maxIt=k, + stepSize=FrankWolfe.agnostic,printIt=k/10,emph=FrankWolfe.memory,verbose=true); + @test eltype(x0) == Rational{BigInt} + + end + @testset "Multi-precision tests" begin + rhs = 1 + n = 100 + k = 1000 - x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo_prob,x0,maxIt=k, - stepSize=FrankWolfe.backtracking,printIt=k/10,verbose=true,emph=FrankWolfe.blas); + xp = zeros(n) - @test x !== nothing + L = 2 + bound = 2 * L * 2 / (k + 2) - x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo_prob,x0,maxIt=k, - stepSize=FrankWolfe.backtracking,printIt=k/10,verbose=true,emph=FrankWolfe.memory); - - @test x !== nothing + f(x) = LinearAlgebra.norm(x-xp)^2 + grad(x) = 2* (x-xp); + testTypes = [Float16, Float32, Float64, BigFloat, Rational{BigInt}] + @testset "Multi-precision test for $T" for T in testTypes + println("\nTesting precision for type: ", T) + lmo = FrankWolfe.ProbabilitySimplexOracle{T}(rhs); + direction = rand(n) + x0 = FrankWolfe.compute_extreme_point(lmo, direction); + + @time x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo,x0,maxIt=k, + stepSize=FrankWolfe.agnostic,printIt=k/10,emph=FrankWolfe.blas,verbose=true); + + @test eltype(x0) == T + @test primal - 1//n <= bound + + @time x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo,x0,maxIt=k, + stepSize=FrankWolfe.agnostic,printIt=k/10,emph=FrankWolfe.memory,verbose=true); + + @test eltype(x0) == T + @test primal - 1//n <= bound + end end + end