Skip to content

Commit

Permalink
Integrate with POI to improve UX (#262)
Browse files Browse the repository at this point in the history
* [WIP] Integrate with POI to improve UX

* add missing import

* temp change to proj toml

* format

* simplify method setting to sue model constructor

* add possible fix to scalarize bridge error

* add pkg to project

* format

* improvements

* remove jump wrapper

* clean tests

* fix readme

* use intermediary API

* format

* Apply suggestions from code review

Co-authored-by: Benoît Legat <[email protected]>

* add suggestion

* use Parameter set

* todo was fixed

* format

* update docs for newer Flux

* format

* kwargs

* remove diff model

* suggestions

* format

* fix examples

---------

Co-authored-by: Benoît Legat <[email protected]>
  • Loading branch information
joaquimg and blegat authored Jan 31, 2025
1 parent d09d1b1 commit 6dd82d6
Show file tree
Hide file tree
Showing 14 changed files with 1,513 additions and 69 deletions.
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee"
MathOptSetDistances = "3b969827-a86c-476c-9527-bb6f1a8fbad5"
ParametricOptInterface = "0ce4ce61-57bf-432b-a095-efac525d185e"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"

[compat]
Expand All @@ -21,5 +22,6 @@ IterativeSolvers = "0.9"
JuMP = "1"
LazyArrays = "0.21, 0.22, 1"
MathOptInterface = "1.18"
MathOptSetDistances = "0.2.7"
MathOptSetDistances = "0.2.9"
ParametricOptInterface = "0.9.0"
julia = "1.6"
71 changes: 70 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,76 @@ examples, tutorials, and an API reference.

## Use with JuMP

Use DiffOpt with JuMP by following this brief example:
### DiffOpt-JuMP API with `Parameters`

```julia
using JuMP, DiffOpt, HiGHS

model = Model(
() -> DiffOpt.diff_optimizer(
HiGHS.Optimizer;
with_parametric_opt_interface = true,
),
)
set_silent(model)

p_val = 4.0
pc_val = 2.0
@variable(model, x)
@variable(model, p in Parameter(p_val))
@variable(model, pc in Parameter(pc_val))
@constraint(model, cons, pc * x >= 3 * p)
@objective(model, Min, 2x)
optimize!(model)
@show value(x) == 3 * p_val / pc_val

# the function is
# x(p, pc) = 3p / pc
# hence,
# dx/dp = 3 / pc
# dx/dpc = -3p / pc^2

# First, try forward mode AD

# differentiate w.r.t. p
direction_p = 3.0
MOI.set(model, DiffOpt.ForwardConstraintSet(), ParameterRef(p), Parameter(direction_p))
DiffOpt.forward_differentiate!(model)
@show MOI.get(model, DiffOpt.ForwardVariablePrimal(), x) == direction_p * 3 / pc_val

# update p and pc
p_val = 2.0
pc_val = 6.0
set_parameter_value(p, p_val)
set_parameter_value(pc, pc_val)
# re-optimize
optimize!(model)
# check solution
@show value(x) 3 * p_val / pc_val

# stop differentiating with respect to p
DiffOpt.empty_input_sensitivities!(model)
# differentiate w.r.t. pc
direction_pc = 10.0
MOI.set(model, DiffOpt.ForwardConstraintSet(), ParameterRef(pc), Parameter(direction_pc))
DiffOpt.forward_differentiate!(model)
@show abs(MOI.get(model, DiffOpt.ForwardVariablePrimal(), x) -
-direction_pc * 3 * p_val / pc_val^2) < 1e-5

# always a good practice to clear previously set sensitivities
DiffOpt.empty_input_sensitivities!(model)
# Now, reverse model AD
direction_x = 10.0
MOI.set(model, DiffOpt.ReverseVariablePrimal(), x, direction_x)
DiffOpt.reverse_differentiate!(model)
@show MOI.get(model, DiffOpt.ReverseConstraintSet(), ParameterRef(p)) == MOI.Parameter(direction_x * 3 / pc_val)
@show abs(MOI.get(model, DiffOpt.ReverseConstraintSet(), ParameterRef(pc)).value -
-direction_x * 3 * p_val / pc_val^2) < 1e-5
```

### Low level DiffOpt-JuMP API:

A brief example:

```julia
using JuMP, DiffOpt, HiGHS
Expand Down
29 changes: 11 additions & 18 deletions docs/src/examples/custom-relu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ function matrix_relu(
@variable(model, x[1:layer_size, 1:batch_size] >= 0)
@objective(model, Min, x[:]'x[:] - 2y[:]'x[:])
optimize!(model)
return value.(x)
return Float32.(value.(x))
end

# Define the reverse differentiation rule, for the function we defined above.
Expand All @@ -42,9 +42,9 @@ function ChainRulesCore.rrule(::typeof(matrix_relu), y::Matrix{T}) where {T}
function pullback_matrix_relu(dl_dx)
## some value from the backpropagation (e.g., loss) is denoted by `l`
## so `dl_dy` is the derivative of `l` wrt `y`
x = model[:x] # load decision variable `x` into scope
dl_dy = zeros(T, size(dl_dx))
dl_dq = zeros(T, size(dl_dx))
x = model[:x]::Matrix{JuMP.VariableRef} # load decision variable `x` into scope
dl_dy = zeros(T, size(x))
dl_dq = zeros(T, size(x))
## set sensitivities
MOI.set.(model, DiffOpt.ReverseVariablePrimal(), x[:], dl_dx[:])
## compute grad
Expand Down Expand Up @@ -76,13 +76,13 @@ m = Flux.Chain(

N = 1000 # batch size
## Preprocessing train data
imgs = MLDatasets.MNIST.traintensor(1:N)
labels = MLDatasets.MNIST.trainlabels(1:N)
imgs = MLDatasets.MNIST(; split = :train).features[:, :, 1:N]
labels = MLDatasets.MNIST(; split = :train).targets[1:N]
train_X = float.(reshape(imgs, size(imgs, 1) * size(imgs, 2), N)) # stack images
train_Y = Flux.onehotbatch(labels, 0:9);
## Preprocessing test data
test_imgs = MLDatasets.MNIST.testtensor(1:N)
test_labels = MLDatasets.MNIST.testlabels(1:N)
test_imgs = MLDatasets.MNIST(; split = :test).features[:, :, 1:N]
test_labels = MLDatasets.MNIST(; split = :test).targets[1:N];
test_X = float.(reshape(test_imgs, size(test_imgs, 1) * size(test_imgs, 2), N))
test_Y = Flux.onehotbatch(test_labels, 0:9);

Expand All @@ -97,19 +97,12 @@ dataset = repeated((train_X, train_Y), epochs);
# ## Network training

# training loss function, Flux optimizer
custom_loss(x, y) = Flux.crossentropy(m(x), y)
opt = Flux.Adam()
evalcb = () -> @show(custom_loss(train_X, train_Y))
custom_loss(m, x, y) = Flux.crossentropy(m(x), y)
opt = Flux.setup(Flux.Adam(), m)

# Train to optimize network parameters

@time Flux.train!(
custom_loss,
Flux.params(m),
dataset,
opt,
cb = Flux.throttle(evalcb, 5),
);
@time Flux.train!(custom_loss, m, dataset, opt);

# Although our custom implementation takes time, it is able to reach similar
# accuracy as the usual ReLU function implementation.
Expand Down
29 changes: 11 additions & 18 deletions docs/src/examples/polyhedral_project.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ function (polytope::Polytope{N})(
)
@objective(model, Min, dot(x - y, x - y))
optimize!(model)
return JuMP.value.(x)
return Float32.(JuMP.value.(x))
end

# The `@functor` macro from Flux implements auxiliary functions for collecting the parameters of
Expand All @@ -75,12 +75,12 @@ function ChainRulesCore.rrule(
model = direct_model(DiffOpt.diff_optimizer(Ipopt.Optimizer))
xv = polytope(y; model = model)
function pullback_matrix_projection(dl_dx)
layer_size, batch_size = size(dl_dx)
dl_dx = ChainRulesCore.unthunk(dl_dx)
## `dl_dy` is the derivative of `l` wrt `y`
x = model[:x]
x = model[:x]::Matrix{JuMP.VariableRef}
layer_size, batch_size = size(x)
## grad wrt input parameters
dl_dy = zeros(size(dl_dx))
dl_dy = zeros(size(x))
## grad wrt layer parameters
dl_dw = zero.(polytope.w)
dl_db = zero(polytope.b)
Expand Down Expand Up @@ -122,13 +122,13 @@ m = Flux.Chain(

M = 500 # batch size
## Preprocessing train data
imgs = MLDatasets.MNIST.traintensor(1:M)
labels = MLDatasets.MNIST.trainlabels(1:M);
imgs = MLDatasets.MNIST(; split = :train).features[:, :, 1:M]
labels = MLDatasets.MNIST(; split = :train).targets[1:M]
train_X = float.(reshape(imgs, size(imgs, 1) * size(imgs, 2), M)) # stack images
train_Y = Flux.onehotbatch(labels, 0:9);
## Preprocessing test data
test_imgs = MLDatasets.MNIST.testtensor(1:M)
test_labels = MLDatasets.MNIST.testlabels(1:M)
test_imgs = MLDatasets.MNIST(; split = :test).features[:, :, 1:M]
test_labels = MLDatasets.MNIST(; split = :test).targets[1:M]
test_X = float.(reshape(test_imgs, size(test_imgs, 1) * size(test_imgs, 2), M))
test_Y = Flux.onehotbatch(test_labels, 0:9);

Expand All @@ -142,19 +142,12 @@ dataset = repeated((train_X, train_Y), epochs);
# ## Network training

# training loss function, Flux optimizer
custom_loss(x, y) = Flux.crossentropy(m(x), y)
opt = Flux.ADAM()
evalcb = () -> @show(custom_loss(train_X, train_Y))
custom_loss(m, x, y) = Flux.crossentropy(m(x), y)
opt = Flux.setup(Flux.Adam(), m)

# Train to optimize network parameters

@time Flux.train!(
custom_loss,
Flux.params(m),
dataset,
opt,
cb = Flux.throttle(evalcb, 5),
);
@time Flux.train!(custom_loss, m, dataset, opt);

# Although our custom implementation takes time, it is able to reach similar
# accuracy as the usual ReLU function implementation.
Expand Down
21 changes: 8 additions & 13 deletions docs/src/manual.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
As of now, this package only works for optimization models that can be written either in convex conic form or convex quadratic form.


## Supported objectives & constraints - scheme 1
## Supported objectives & constraints - `QuadraticProgram` backend

For `QPTH`/`OPTNET` style backend, the package supports following `Function-in-Set` constraints:
For `QuadraticProgram` backend, the package supports following `Function-in-Set` constraints:

| MOI Function | MOI Set |
|:-------|:---------------|
Expand All @@ -26,9 +26,9 @@ and the following objective types:
| `ScalarQuadraticFunction` |


## Supported objectives & constraints - scheme 2
## Supported objectives & constraints - `ConicProgram` backend

For `DiffCP`/`CVXPY` style backend, the package supports following `Function-in-Set` constraints:
For the `ConicProgram` backend, the package supports following `Function-in-Set` constraints:

| MOI Function | MOI Set |
|:-------|:---------------|
Expand All @@ -50,19 +50,16 @@ and the following objective types:
| `VariableIndex` |
| `ScalarAffineFunction` |

Other conic sets such as `RotatedSecondOrderCone` and `PositiveSemidefiniteConeSquare` are supported through bridges.

## Creating a differentiable optimizer

## Creating a differentiable MOI optimizer

You can create a differentiable optimizer over an existing MOI solver by using the `diff_optimizer` utility.
```@docs
diff_optimizer
```

## Adding new sets and constraints

The DiffOpt `Optimizer` behaves similarly to other MOI Optimizers
and implements the `MOI.AbstractOptimizer` API.

## Projections on cone sets

DiffOpt requires taking projections and finding projection gradients of vectors while computing the jacobians. For this purpose, we use [MathOptSetDistances.jl](https://github.com/matbesancon/MathOptSetDistances.jl), which is a dedicated package for computing set distances, projections and projection gradients.
Expand Down Expand Up @@ -104,6 +101,4 @@ In the light of above, DiffOpt differentiates program variables ``x``, ``s``, ``
- OptNet: Differentiable Optimization as a Layer in Neural Networks

### Backward Pass vector
One possible point of confusion in finding Jacobians is the role of the backward pass vector - above eqn (7), *OptNet: Differentiable Optimization as a Layer in Neural Networks*. While differentiating convex programs, it is often the case that we don't want to find the actual derivatives, rather we might be interested in computing the product of Jacobians with a *backward pass vector*, often used in backprop in machine learning/automatic differentiation. This is what happens in scheme 1 of `DiffOpt` backend.

But, for the conic system (scheme 2), we provide perturbations in conic data (`dA`, `db`, `dc`) to compute pertubations (`dx`, `dy`, `dz`) in input variables. Unlike the quadratic case, these perturbations are actual derivatives, not the product with a backward pass vector. This is an important distinction between the two schemes of differential optimization.
One possible point of confusion in finding Jacobians is the role of the backward pass vector - above eqn (7), *OptNet: Differentiable Optimization as a Layer in Neural Networks*. While differentiating convex programs, it is often the case that we don't want to find the actual derivatives, rather we might be interested in computing the product of Jacobians with a *backward pass vector*, often used in backpropagation in machine learning/automatic differentiation. This is what happens in `DiffOpt` backends.
5 changes: 5 additions & 0 deletions src/DiffOpt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,14 @@ import LazyArrays
import LinearAlgebra
import MathOptInterface as MOI
import MathOptSetDistances as MOSD
import ParametricOptInterface as POI
import SparseArrays

include("utils.jl")
include("product_of_sets.jl")
include("diff_opt.jl")
include("moi_wrapper.jl")
include("parameters.jl")
include("jump_moi_overloads.jl")

include("copy_dual.jl")
Expand All @@ -40,4 +42,7 @@ end

export diff_optimizer

# TODO
# add precompilation statements

end # module
19 changes: 19 additions & 0 deletions src/bridges.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,25 @@ function MOI.get(
MOI.get(model, attr, bridge.vector_constraint),
)[1]
end

function MOI.set(
model::MOI.ModelLike,
attr::ForwardConstraintFunction,
bridge::MOI.Bridges.Constraint.ScalarizeBridge,
value,
)
MOI.set.(model, attr, bridge.scalar_constraints, value)
return
end

function MOI.get(
model::MOI.ModelLike,
attr::ReverseConstraintFunction,
bridge::MOI.Bridges.Constraint.ScalarizeBridge,
)
return _vectorize(MOI.get.(model, attr, bridge.scalar_constraints))
end

function MOI.get(
model::MOI.ModelLike,
attr::DiffOpt.ReverseConstraintFunction,
Expand Down
11 changes: 11 additions & 0 deletions src/diff_opt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,17 @@ The output solution differentials can be queried with the attribute
"""
function forward_differentiate! end

"""
empty_input_sensitivities!(model::MOI.ModelLike)
Empty the input sensitivities of the model.
Sets to zero all the sensitivities set by the user with method such as:
- `MOI.set(model, DiffOpt.ReverseVariablePrimal(), variable_index, value)`
- `MOI.set(model, DiffOpt.ForwardObjectiveFunction(), expression)`
- `MOI.set(model, DiffOpt.ForwardConstraintFunction(), index, expression)`
"""
function empty_input_sensitivities! end

"""
ForwardObjectiveFunction <: MOI.AbstractModelAttribute
Expand Down
Loading

0 comments on commit 6dd82d6

Please sign in to comment.