Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added dual prices for the probability simplex and the unit simplex #15

Merged
merged 7 commits into from
Jan 15, 2021
20 changes: 10 additions & 10 deletions src/FrankWolfe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,34 +24,34 @@ function benchmarkOracles(f,grad,lmo,n;k=100,T=Float64)
sv = n*sizeof(T)/1024/1024
println("\nSize of single vector ($T): $sv MB\n")
to = TimerOutput()
@showprogress 1 "Testing f..." for i in 1:k
@showprogress 1 "Testing f... " for i in 1:k
x = rand(n)
@timeit to "f" temp = f(x)
end
@showprogress 1 "Testing grad..." for i in 1:k
@showprogress 1 "Testing grad... " for i in 1:k
x = rand(n)
@timeit to "grad" temp = grad(x)
end
@showprogress 1 "Testing lmo..." for i in 1:k
@showprogress 1 "Testing lmo... " for i in 1:k
x = rand(n)
@timeit to "lmo" temp = compute_extreme_point(lmo, x)
end
@showprogress 1 "Testing dual gap..." for i in 1:k
@timeit to "dual gap" begin
x = rand(n)
gradient = grad(x)
v = compute_extreme_point(lmo, gradient)
@showprogress 1 "Testing dual gap... " for i in 1:k
x = rand(n)
gradient = grad(x)
v = compute_extreme_point(lmo, gradient)
@timeit to "dual gap" begin
dualGap = dot(x, gradient) - dot(v, gradient)
end
end
@showprogress 1 "Testing update... (emph: blas)" for i in 1:k
@showprogress 1 "Testing update... (emph: blas) " for i in 1:k
x = rand(n)
gradient = grad(x)
v = compute_extreme_point(lmo, gradient)
gamma = 1/2
@timeit to "update (blas)" @emphasis(blas, x = (1-gamma) * x + gamma * v)
end
@showprogress 1 "Testing update... (emph: memory)" for i in 1:k
@showprogress 1 "Testing update... (emph: memory) " for i in 1:k
x = rand(n)
gradient = grad(x)
v = compute_extreme_point(lmo, gradient)
Expand Down
44 changes: 25 additions & 19 deletions src/simplex_oracles.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,22 @@ function compute_extreme_point(lmo::UnitSimplexOracle{T}, direction) where {T}
return MaybeHotVector(zero(T), idx, length(direction))
end

function unitSimplexLMO(grad;r=1)
n = length(grad)
v = zeros(n)
aux = argmin(grad)
if grad[aux] < 0.0
v[aux] = 1.0
end
return v*r
"""
Dual costs for a given primal solution to form a primal dual pair
for scaled unit simplex.
Returns two vectors. The first one is the dual costs associated with the constraints
and the second is the reduced costs for the variables.
"""
function compute_dual_solution(lmo::UnitSimplexOracle{T}, direction, primalSolution) where {T}
idx = argmax(primalSolution)
critical = min(direction[idx],0)
lambda = [ critical ]
mu = direction .- lambda
return lambda, mu
end



"""
ProbabilitySimplexOracle(right_side)

Expand All @@ -60,15 +66,15 @@ function compute_extreme_point(lmo::ProbabilitySimplexOracle{T}, direction) wher
return MaybeHotVector(lmo.right_side, idx, length(direction))
end


# simple probabilitySimplexLMO
# TODO:
# - not optimized

function probabilitySimplexLMO(grad;r=1)
n = length(grad)
v = zeros(n)
aux = argmin(grad)
v[aux] = 1.0
return v*r
"""
Dual costs for a given primal solution to form a primal dual pair
for scaled probability simplex.
Returns two vectors. The first one is the dual costs associated with the constraints
and the second is the reduced costs for the variables.
"""
function compute_dual_solution(lmo::ProbabilitySimplexOracle{T}, direction, primalSolution) where {T}
idx = argmax(primalSolution)
lambda = [ direction[idx] ]
mu = direction .- lambda
return lambda, mu
end
9 changes: 5 additions & 4 deletions test/benchmark.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import FrankWolfe
import LinearAlgebra


n = Int(1e5);
n = Int(1e6);

xpi = rand(n);
total = sum(xpi);
Expand All @@ -13,11 +13,11 @@ const grad(x) = 2 * (x-xp)


function cf(x,xp)
return LinearAlgebra.norm(x-xp)^2
return @. LinearAlgebra.norm(x-xp)^2
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure the broadcast will be needed here, x and xp should have the same dimensions right?
x - xp works for vectors x and xp, it will only error if one of them is a scalar

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good point - saves half the memory though 126MB vs. 63

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok curious, well let's leave it as-is then

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i am not sure yet that i understand where the grossly different allocations come from with and without broadcasting. see numbers below

end

function cgrad(x,xp)
return 2 * (x-xp)
function cgrad(x,xp)
return @. 2 * (x-xp)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same as above I think

Copy link
Member Author

@pokutta pokutta Jan 15, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here: they basically halve the memory allocs when you broadcast:

without broadcast:

 Size of single vector (Float64): 0.762939453125 MB

─────────────────────────────────────────────────────────────────────────────
                                      Time                   Allocations      
                              ──────────────────────   ───────────────────────
 caching 100 points      100    1.65s  95.6%  16.5ms   7.68GiB  95.4%  78.6MiB
 lmo                     100   21.1ms  1.22%   211μs     0.00B  0.00%    0.00B
 update (blas)           100   18.3ms  1.06%   183μs    153MiB  1.85%  1.53MiB
 grad                    100   15.6ms  0.90%   156μs    153MiB  1.85%  1.53MiB
 f                       100   12.5ms  0.72%   125μs   76.3MiB  0.93%   781KiB
 update (memory)         100   6.46ms  0.37%  64.6μs     0.00B  0.00%    0.00B
 dual gap                100   2.11ms  0.12%  21.1μs     0.00B  0.00%    0.00B
 ─────────────────────────────────────────────────────────────────────────────

with broadcasting

 Size of single vector (Float64): 0.762939453125 MB

─────────────────────────────────────────────────────────────────────────────
                                      Time                   Allocations      
                              ──────────────────────   ───────────────────────
       Tot / % measured:           2.08s / 89.4%           8.57GiB / 92.2%    

 Section              ncalls     time   %tot     avg     alloc   %tot      avg
 ─────────────────────────────────────────────────────────────────────────────
 caching 100 points      100    1.79s  96.4%  17.9ms   7.60GiB  96.2%  77.8MiB
 lmo                     100   20.1ms  1.08%   201μs     0.00B  0.00%    0.00B
 update (blas)           100   19.6ms  1.05%   196μs    153MiB  1.89%  1.53MiB
 f                       100   9.52ms  0.51%  95.2μs   76.3MiB  0.94%   781KiB
 grad                    100   9.19ms  0.50%  91.9μs   76.3MiB  0.94%   781KiB
 update (memory)         100   6.46ms  0.35%  64.6μs     0.00B  0.00%    0.00B
 dual gap                100   2.01ms  0.11%  20.1μs     0.00B  0.00%    0.00B
 ─────────────────────────────────────────────────────────────────────────────

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done for the gradient here as example

end

lmo_prob = FrankWolfe.ProbabilitySimplexOracle(1);
Expand All @@ -26,3 +26,4 @@ x0 = FrankWolfe.compute_extreme_point(lmo_prob, zeros(n));
FrankWolfe.benchmarkOracles(f,grad,lmo_prob,n;k=100,T=Float64)

FrankWolfe.benchmarkOracles(x -> cf(x,xp),x -> cgrad(x,xp),lmo_prob,n;k=100,T=Float64)

10 changes: 10 additions & 0 deletions test/lmo.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ end
@test res_point_prob[j] == res_point_unit[j] == 0
end
end
# computing dual solutions and testing complementarity
dual, redC = FrankWolfe.compute_dual_solution(lmo_prob, direction, res_point_prob)
@test sum((redC .* res_point_prob )) + (dual[1] * (rhs - sum(res_point_prob))) == 0
dual, redC = FrankWolfe.compute_dual_solution(lmo_unit, direction, res_point_unit)
@test sum((redC .* res_point_unit )) + (dual[1] * (rhs - sum(res_point_unit))) == 0
end
@testset "Choosing least-degrading direction" for idx in 1:n
# all directions worsening, must pick idx
Expand All @@ -55,6 +60,11 @@ end
@test res_point_prob[j] == 0
end
end
# computing dual solutions and testing complementarity
dual, redC = FrankWolfe.compute_dual_solution(lmo_prob, direction, res_point_prob)
@test sum((redC .* res_point_prob )) + (dual[1] * (rhs - sum(res_point_prob))) == 0
dual, redC = FrankWolfe.compute_dual_solution(lmo_unit, direction, res_point_unit)
@test sum((redC .* res_point_unit )) + (dual[1] * (rhs - sum(res_point_unit))) == 0
end
end

Expand Down
47 changes: 24 additions & 23 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,32 +45,33 @@ end
xp = xpi ./ total;
f(x) = LinearAlgebra.norm(x-xp)^2
grad(x) = 2 * (x-xp)
@testset "Using sparse structure" begin
lmo_prob = FrankWolfe.ProbabilitySimplexOracle(1.0);
x0 = FrankWolfe.compute_extreme_point(lmo_prob, zeros(n));

lmo_prob = FrankWolfe.ProbabilitySimplexOracle(1.0);
x0 = FrankWolfe.compute_extreme_point(lmo_prob, zeros(n));
x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo_prob,x0,maxIt=k,
stepSize=FrankWolfe.backtracking,printIt=k/10,verbose=true,emph=FrankWolfe.blas);

@test x !== nothing

x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo_prob,x0,maxIt=k,
stepSize=FrankWolfe.backtracking,printIt=k/10,verbose=true,emph=FrankWolfe.blas);

@test x !== nothing
x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo_prob,x0,maxIt=k,
stepSize=FrankWolfe.backtracking,printIt=k/10,verbose=true,emph=FrankWolfe.memory);

@test x !== nothing
end
@testset "Using dense structure" begin
lmo_prob = FrankWolfe.L1ballDense{Float64}(1);
x0 = FrankWolfe.compute_extreme_point(lmo_prob, zeros(n));

x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo_prob,x0,maxIt=k,
stepSize=FrankWolfe.backtracking,printIt=k/10,verbose=true,emph=FrankWolfe.memory);

@test x !== nothing

lmo_prob = FrankWolfe.L1ballDense{Float64}(1);
x0 = FrankWolfe.compute_extreme_point(lmo_prob, zeros(n));

x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo_prob,x0,maxIt=k,
stepSize=FrankWolfe.backtracking,printIt=k/10,verbose=true,emph=FrankWolfe.blas);

@test x !== nothing

x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo_prob,x0,maxIt=k,
stepSize=FrankWolfe.backtracking,printIt=k/10,verbose=true,emph=FrankWolfe.memory);

@test x !== nothing
x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo_prob,x0,maxIt=k,
stepSize=FrankWolfe.backtracking,printIt=k/10,verbose=true,emph=FrankWolfe.blas);

@test x !== nothing

x, v, primal, dualGap, trajectory = FrankWolfe.fw(f,grad,lmo_prob,x0,maxIt=k,
stepSize=FrankWolfe.backtracking,printIt=k/10,verbose=true,emph=FrankWolfe.memory);

@test x !== nothing
end
end
end