diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b52714ec..826a0648 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,7 +22,7 @@ jobs: with: version: ${{ matrix.version }} arch: ${{ matrix.arch }} - - uses: actions/cache@v1 + - uses: actions/cache@v3 env: cache-name: cache-artifacts with: diff --git a/Project.toml b/Project.toml index 42b60eeb..a3689608 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "DiffOpt" uuid = "930fe3bc-9c6b-11ea-2d94-6184641e85e7" -authors = ["Akshay Sharma", "Mathieu Besançon", "Joaquim Dias Garcia", "Benoît Legat", "Oscar Dowson"] -version = "0.4.3" +authors = ["Akshay Sharma", "Mathieu Besançon", "Joaquim Dias Garcia", "Benoît Legat", "Oscar Dowson", "Andrew Rosemberg"] +version = "0.5.0" [deps] BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0" diff --git a/docs/src/index.md b/docs/src/index.md index 0eb47b63..aece6932 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -1,10 +1,10 @@ # DiffOpt.jl -[DiffOpt.jl](https://github.com/jump-dev/DiffOpt.jl) is a package for differentiating convex optimization program ([JuMP.jl](https://github.com/jump-dev/JuMP.jl) or [MathOptInterface.jl](https://github.com/jump-dev/MathOptInterface.jl) models) with respect to program parameters. Note that this package does not contain any solver. +[DiffOpt.jl](https://github.com/jump-dev/DiffOpt.jl) is a package for differentiating convex and non-convex optimization program ([JuMP.jl](https://github.com/jump-dev/JuMP.jl) or [MathOptInterface.jl](https://github.com/jump-dev/MathOptInterface.jl) models) with respect to program parameters. Note that this package does not contain any solver. This package has two major backends, available via the `reverse_differentiate!` and `forward_differentiate!` methods, to differentiate models (quadratic or conic) with optimal solutions. !!! note - Currently supports *linear programs* (LP), *convex quadratic programs* (QP) and *convex conic programs* (SDP, SOCP, exponential cone constraints only). + Currently supports *linear programs* (LP), *convex quadratic programs* (QP), *convex conic programs* (SDP, SOCP, exponential cone constraints only), and *general nonlinear programs* (NLP). ## Installation @@ -16,8 +16,8 @@ DiffOpt can be installed through the Julia package manager: ## Why are Differentiable optimization problems important? -Differentiable optimization is a promising field of convex optimization and has many potential applications in game theory, control theory and machine learning (specifically deep learning - refer [this video](https://www.youtube.com/watch?v=NrcaNnEXkT8) for more). -Recent work has shown how to differentiate specific subclasses of convex optimization problems. But several applications remain unexplored (refer section 8 of this [really good thesis](https://github.com/bamos/thesis)). With the help of automatic differentiation, differentiable optimization can have a significant impact on creating end-to-end differentiable systems to model neural networks, stochastic processes, or a game. +Differentiable optimization is a promising field of constrained optimization and has many potential applications in game theory, control theory and machine learning (specifically deep learning - refer [this video](https://www.youtube.com/watch?v=NrcaNnEXkT8) for more). +Recent work has shown how to differentiate specific subclasses of constrained optimization problems. But several applications remain unexplored (refer section 8 of this [really good thesis](https://github.com/bamos/thesis)). With the help of automatic differentiation, differentiable optimization can have a significant impact on creating end-to-end differentiable systems to model neural networks, stochastic processes, or a game. ## Contributing diff --git a/docs/src/manual.md b/docs/src/manual.md index 614c4be8..07341b9d 100644 --- a/docs/src/manual.md +++ b/docs/src/manual.md @@ -1,10 +1,6 @@ # Manual -!!! note - As of now, this package only works for optimization models that can be written either in convex conic form or convex quadratic form. - - -## Supported objectives & constraints - `QuadraticProgram` backend +## Supported objectives & constraints - scheme 1 For `QuadraticProgram` backend, the package supports following `Function-in-Set` constraints: @@ -52,6 +48,33 @@ and the following objective types: Other conic sets such as `RotatedSecondOrderCone` and `PositiveSemidefiniteConeSquare` are supported through bridges. +## Supported objectives & constraints - `NonlinearProgram` backend + +For the `NonlinearProgram` backend, the package supports following `Function-in-Set` constraints: + +| MOI Function | MOI Set | +|:-------|:---------------| +| `VariableIndex` | `GreaterThan` | +| `VariableIndex` | `LessThan` | +| `VariableIndex` | `EqualTo` | +| `ScalarAffineFunction` | `GreaterThan` | +| `ScalarAffineFunction` | `LessThan` | +| `ScalarAffineFunction` | `EqualTo` | +| `ScalarQuadraticFunction` | `GreaterThan` | +| `ScalarQuadraticFunction` | `LessThan` | +| `ScalarQuadraticFunction` | `EqualTo` | +| `ScalarNonlinearFunction` | `GreaterThan` | +| `ScalarNonlinearFunction` | `LessThan` | +| `ScalarNonlinearFunction` | `EqualTo` | + +and the following objective types: + +| MOI Function | +|:-------:| +| `VariableIndex` | +| `ScalarAffineFunction` | +| `ScalarQuadraticFunction` | +| `ScalarNonlinearFunction` | ## Creating a differentiable MOI optimizer @@ -68,7 +91,7 @@ DiffOpt requires taking projections and finding projection gradients of vectors ## Conic problem formulation !!! note - As of now, the package is using `SCS` geometric form for affine expressions in cones. + As of now, when defining a conic or convex quadratic problem, the package is using `SCS` geometric form for affine expressions in cones. Consider a convex conic optimization problem in its primal (P) and dual (D) forms: ```math diff --git a/docs/src/reference.md b/docs/src/reference.md index 4688643a..9bc2fd82 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -4,5 +4,5 @@ ``` ```@autodocs -Modules = [DiffOpt, DiffOpt.QuadraticProgram, DiffOpt.ConicProgram] +Modules = [DiffOpt, DiffOpt.QuadraticProgram, DiffOpt.ConicProgram, DiffOpt.NonLinearProgram] ``` diff --git a/docs/src/usage.md b/docs/src/usage.md index 2ad2aa5a..ef6445c7 100644 --- a/docs/src/usage.md +++ b/docs/src/usage.md @@ -56,3 +56,65 @@ MOI.set(model, DiffOpt.ForwardObjectiveFunction(), ones(2) ⋅ x) DiffOpt.forward_differentiate!(model) grad_x = MOI.get.(model, DiffOpt.ForwardVariablePrimal(), x) ``` + +3. To differentiate a general nonlinear program, have to use the API for Parameterized JuMP models. For example, consider the following nonlinear program: + +```julia +using JuMP, DiffOpt, HiGHS + +model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) +set_silent(model) + +p_val = 4.0 +pc_val = 2.0 +@variable(model, x) +@variable(model, p in Parameter(p_val)) +@variable(model, pc in Parameter(pc_val)) +@constraint(model, cons, pc * x >= 3 * p) +@objective(model, Min, x^4) +optimize!(model) +@show value(x) == 3 * p_val / pc_val + +# the function is +# x(p, pc) = 3p / pc +# hence, +# dx/dp = 3 / pc +# dx/dpc = -3p / pc^2 + +# First, try forward mode AD + +# differentiate w.r.t. p +direction_p = 3.0 +MOI.set(model, DiffOpt.ForwardConstraintSet(), ParameterRef(p), Parameter(direction_p)) +DiffOpt.forward_differentiate!(model) +@show MOI.get(model, DiffOpt.ForwardVariablePrimal(), x) == direction_p * 3 / pc_val + +# update p and pc +p_val = 2.0 +pc_val = 6.0 +set_parameter_value(p, p_val) +set_parameter_value(pc, pc_val) +# re-optimize +optimize!(model) +# check solution +@show value(x) ≈ 3 * p_val / pc_val + +# stop differentiating with respect to p +DiffOpt.empty_input_sensitivities!(model) +# differentiate w.r.t. pc +direction_pc = 10.0 +MOI.set(model, DiffOpt.ForwardConstraintSet(), ParameterRef(pc), Parameter(direction_pc)) +DiffOpt.forward_differentiate!(model) +@show abs(MOI.get(model, DiffOpt.ForwardVariablePrimal(), x) - + -direction_pc * 3 * p_val / pc_val^2) < 1e-5 + +# always a good practice to clear previously set sensitivities +DiffOpt.empty_input_sensitivities!(model) +# Now, reverse model AD +direction_x = 10.0 +MOI.set(model, DiffOpt.ReverseVariablePrimal(), x, direction_x) +DiffOpt.reverse_differentiate!(model) +@show MOI.get(model, DiffOpt.ReverseConstraintSet(), ParameterRef(p)) == MOI.Parameter(direction_x * 3 / pc_val) +@show abs(MOI.get(model, DiffOpt.ReverseConstraintSet(), ParameterRef(pc)).value - + -direction_x * 3 * p_val / pc_val^2) < 1e-5 +``` \ No newline at end of file diff --git a/src/DiffOpt.jl b/src/DiffOpt.jl index 71dd1a00..0dce78d9 100644 --- a/src/DiffOpt.jl +++ b/src/DiffOpt.jl @@ -27,6 +27,7 @@ include("bridges.jl") include("QuadraticProgram/QuadraticProgram.jl") include("ConicProgram/ConicProgram.jl") +include("NonLinearProgram/NonLinearProgram.jl") """ add_all_model_constructors(model) @@ -37,6 +38,13 @@ Add all constructors of [`AbstractModel`](@ref) defined in this package to function add_all_model_constructors(model) add_model_constructor(model, QuadraticProgram.Model) add_model_constructor(model, ConicProgram.Model) + add_model_constructor(model, NonLinearProgram.Model) + return +end + +function add_default_factorization(model) + model.input_cache.factorization = + NonLinearProgram._lu_with_inertia_correction return end diff --git a/src/NonLinearProgram/NonLinearProgram.jl b/src/NonLinearProgram/NonLinearProgram.jl new file mode 100644 index 00000000..93f52a10 --- /dev/null +++ b/src/NonLinearProgram/NonLinearProgram.jl @@ -0,0 +1,606 @@ +# Copyright (c) 2025: Andrew Rosemberg and contributors +# +# Use of this source code is governed by an MIT-style license that can be found +# in the LICENSE.md file or at https://opensource.org/licenses/MIT. + +module NonLinearProgram + +import DiffOpt +import JuMP +import MathOptInterface as MOI +using SparseArrays +using LinearAlgebra + +Base.@kwdef struct Cache + primal_vars::Vector{MOI.VariableIndex} # Sorted primal variables + dual_mapping::Vector{Int} # Unified mapping for constraints and bounds + params::Vector{MOI.VariableIndex} # VariableRefs for parameters + index_duals::Vector{Int} # Indices for dual variables + leq_locations::Vector{Int} # Locations of <= constraints + geq_locations::Vector{Int} # Locations of >= constraints + has_up::Vector{Int} # Variables with upper bounds + has_low::Vector{Int} # Variables with lower bounds + evaluator::MOI.Nonlinear.Evaluator # Evaluator for the NLP + cons::Vector{MOI.Nonlinear.ConstraintIndex} # Constraints index for the NLP +end + +Base.@kwdef struct ForwCache + primal_Δs::Dict{MOI.VariableIndex,Float64} # Sensitivity for primal variables (indexed by VariableIndex) + dual_Δs::Vector{Float64} # Sensitivity for constraints and bounds (indexed by ConstraintIndex) +end + +Base.@kwdef struct ReverseCache + Δp::Vector{Float64} # Sensitivity for parameters +end + +# Define the form of the NLP +mutable struct Form <: MOI.ModelLike + model::MOI.Nonlinear.Model + num_variables::Int + num_constraints::Int + sense::MOI.OptimizationSense + list_of_constraint::MOI.Utilities.DoubleDicts.IndexDoubleDict + var2param::Dict{MOI.VariableIndex,MOI.Nonlinear.ParameterIndex} + var2ci::Dict{MOI.VariableIndex,MOI.ConstraintIndex} + upper_bounds::Dict{Int,Float64} + lower_bounds::Dict{Int,Float64} + constraint_upper_bounds::Dict{Int,MOI.ConstraintIndex} + constraint_lower_bounds::Dict{Int,MOI.ConstraintIndex} + constraints_2_nlp_index::Dict{ + MOI.ConstraintIndex, + MOI.Nonlinear.ConstraintIndex, + } + nlp_index_2_constraint::Dict{ + MOI.Nonlinear.ConstraintIndex, + MOI.ConstraintIndex, + } + leq_values::Dict{MOI.ConstraintIndex,Float64} + geq_values::Dict{MOI.ConstraintIndex,Float64} +end + +function Form() + return Form( + MOI.Nonlinear.Model(), + 0, + 0, + MOI.MIN_SENSE, + MOI.Utilities.DoubleDicts.IndexDoubleDict(), + Dict{MOI.VariableIndex,MOI.Nonlinear.ParameterIndex}(), + Dict{MOI.VariableIndex,MOI.ConstraintIndex}(), + Dict{Int,Float64}(), + Dict{Int,Float64}(), + Dict{Int,MOI.ConstraintIndex}(), + Dict{Int,MOI.ConstraintIndex}(), + Dict{MOI.ConstraintIndex,MOI.Nonlinear.ConstraintIndex}(), + Dict{MOI.Nonlinear.ConstraintIndex,MOI.ConstraintIndex}(), + Dict{MOI.ConstraintIndex,Float64}(), + Dict{MOI.ConstraintIndex,Float64}(), + ) +end + +function MOI.is_valid(model::Form, ref::MOI.VariableIndex) + return ref.value <= model.num_variables +end + +function MOI.is_valid(model::Form, ref::MOI.ConstraintIndex) + return ref.value <= model.num_constraints +end + +function MOI.add_variable(form::Form) + form.num_variables += 1 + return MOI.VariableIndex(form.num_variables) +end + +function MOI.add_variables(form::Form, n) + idxs = Vector{MOI.VariableIndex}(undef, n) + for i in 1:n + idxs[i] = MOI.add_variable(form) + end + return idxs +end + +function MOI.supports(form::Form, attribute, val) + return MOI.supports(form.model, attribute, val) +end + +function MOI.supports_constraint( + ::Form, + ::Type{F}, + ::Type{S}, +) where { + F<:Union{ + MOI.ScalarNonlinearFunction, + MOI.ScalarQuadraticFunction{Float64}, + MOI.ScalarAffineFunction{Float64}, + MOI.VariableIndex, + }, + S<:Union{ + MOI.GreaterThan{Float64}, + MOI.LessThan{Float64}, + # MOI.Interval{Float64}, + MOI.EqualTo{Float64}, + MOI.Parameter{Float64}, + }, +} + return true +end + +function _add_leq_geq( + form::Form, + idx::MOI.ConstraintIndex, + set::MOI.GreaterThan, +) + form.geq_values[idx] = set.lower + return +end + +function _add_leq_geq(form::Form, idx::MOI.ConstraintIndex, set::MOI.LessThan) + form.leq_values[idx] = set.upper + return +end + +function _add_leq_geq(::Form, ::MOI.ConstraintIndex, ::MOI.EqualTo) + return +end + +function MOI.add_constraint( + form::Form, + func::F, + set::S, +) where { + F<:Union{ + MOI.ScalarNonlinearFunction, + MOI.ScalarQuadraticFunction{Float64}, + MOI.ScalarAffineFunction{Float64}, + }, + S<:Union{ + MOI.GreaterThan{Float64}, + MOI.LessThan{Float64}, + # MOI.Interval{Float64}, + MOI.EqualTo{Float64}, + }, +} + form.num_constraints += 1 + idx_nlp = MOI.Nonlinear.add_constraint(form.model, func, set) + idx = MOI.ConstraintIndex{F,S}(form.num_constraints) + _add_leq_geq(form, idx, set) + form.list_of_constraint[idx] = idx + form.constraints_2_nlp_index[idx] = idx_nlp + form.nlp_index_2_constraint[idx_nlp] = idx + return idx +end + +function MOI.add_constraint( + form::Form, + func::F, + set::S, +) where {F<:MOI.VariableIndex,S<:MOI.EqualTo} + form.num_constraints += 1 + idx_nlp = MOI.Nonlinear.add_constraint(form.model, func, set) + idx = MOI.ConstraintIndex{F,S}(form.num_constraints) + _add_leq_geq(form, idx, set) + form.list_of_constraint[idx] = idx + form.constraints_2_nlp_index[idx] = idx_nlp + form.nlp_index_2_constraint[idx_nlp] = idx + return idx +end + +function MOI.add_constraint( + form::Form, + idx::F, + set::S, +) where {F<:MOI.VariableIndex,S<:MOI.Parameter{Float64}} + form.num_constraints += 1 + p = MOI.Nonlinear.add_parameter(form.model, set.value) + form.var2param[idx] = p + idx_ci = MOI.ConstraintIndex{F,S}(form.num_constraints) + form.var2ci[idx] = idx_ci + return idx_ci +end + +function MOI.add_constraint( + form::Form, + var_idx::F, + set::S, +) where {F<:MOI.VariableIndex,S<:MOI.GreaterThan} + form.num_constraints += 1 + form.lower_bounds[var_idx.value] = set.lower + idx = MOI.ConstraintIndex{F,S}(form.num_constraints) + form.list_of_constraint[idx] = idx + form.constraint_lower_bounds[var_idx.value] = idx + return idx +end + +function MOI.add_constraint( + form::Form, + var_idx::F, + set::S, +) where {F<:MOI.VariableIndex,S<:MOI.LessThan} + form.num_constraints += 1 + form.upper_bounds[var_idx.value] = set.upper + idx = MOI.ConstraintIndex{F,S}(form.num_constraints) + form.list_of_constraint[idx] = idx + form.constraint_upper_bounds[var_idx.value] = idx + return idx +end + +function MOI.get(form::Form, ::MOI.ListOfConstraintTypesPresent) + return collect( + MOI.Utilities.DoubleDicts.outer_keys(form.list_of_constraint), + ) +end + +function MOI.get(form::Form, ::MOI.NumberOfConstraints{F,S}) where {F,S} + return length(form.list_of_constraint[F, S]) +end + +function MOI.get(::Form, ::MOI.ConstraintPrimalStart) + return +end + +function MOI.supports(::Form, ::MOI.ObjectiveSense) + return true +end + +function MOI.supports(::Form, ::MOI.ObjectiveFunction) + return true +end + +function MOI.set(form::Form, ::MOI.ObjectiveSense, sense::MOI.OptimizationSense) + form.sense = sense + return +end + +function MOI.get(form::Form, ::MOI.ObjectiveSense) + return form.sense +end + +function MOI.set( + form::Form, + ::MOI.ObjectiveFunction, + func, #::MOI.ScalarNonlinearFunction +) + MOI.Nonlinear.set_objective(form.model, func) + return +end + +""" + DiffOpt.NonLinearProgram.Model <: DiffOpt.AbstractModel + +Model to differentiate nonlinear programs. + +Supports forward and reverse differentiation, caching sensitivity data +for primal variables, constraints, and bounds, excluding slack variables. +""" +mutable struct Model <: DiffOpt.AbstractModel + model::Form + cache::Union{Nothing,Cache} # Cache for evaluator and mappings + forw_grad_cache::Union{Nothing,ForwCache} # Cache for forward sensitivity results + back_grad_cache::Union{Nothing,ReverseCache} # Cache for reverse sensitivity results + diff_time::Float64 + input_cache::DiffOpt.InputCache + x::Vector{Float64} + y::Vector{Float64} + s::Vector{Float64} +end + +function Model() + return Model( + Form(), + nothing, + nothing, + nothing, + NaN, + DiffOpt.InputCache(), + [], + [], + [], + ) +end + +objective_sense(form::Form) = form.sense +objective_sense(model::Model) = objective_sense(model.model) + +function MOI.set( + model::Model, + ::MOI.ConstraintPrimalStart, + ci::MOI.ConstraintIndex, + value, +) + MOI.throw_if_not_valid(model, ci) + return DiffOpt._enlarge_set(model.s, ci.value, value) +end + +function MOI.supports( + ::Model, + ::MOI.ConstraintDualStart, + ::Type{MOI.ConstraintIndex{MOI.VariableIndex,S}}, +) where { + S<:Union{ + MOI.GreaterThan{Float64}, + MOI.LessThan{Float64}, + MOI.EqualTo{Float64}, + MOI.Interval{Float64}, + }, +} + return true +end + +function MOI.set( + model::Model, + ::MOI.ConstraintDualStart, + ci::MOI.ConstraintIndex, + value, +) + MOI.throw_if_not_valid(model, ci) + return DiffOpt._enlarge_set(model.y, ci.value, value) +end + +function MOI.set( + model::Model, + ::MOI.VariablePrimalStart, + vi::MOI.VariableIndex, + value, +) + MOI.throw_if_not_valid(model, vi) + return DiffOpt._enlarge_set(model.x, vi.value, value) +end + +function MOI.is_empty(model::Model) + return model.cache === nothing +end + +function MOI.empty!(model::Model) + model.cache = nothing + model.forw_grad_cache = nothing + model.back_grad_cache = nothing + model.diff_time = NaN + return +end + +include("nlp_utilities.jl") + +""" + function _lu_with_inertia_correction( + M::SparseArrays.SparseMatrixCSC, # Jacobian of KKT system + model::Model, # Model to extract number of variables and constraints + st::T = 1e-6, # Step size for inertia correction + max_corrections::Int = 50, # Maximum number of corrections + ) where T<:Real + +Lu-factorization with inertia correction. If no inertia correction is needed, it only performs the LU +factorization. +""" +function _lu_with_inertia_correction( + M::SparseArrays.SparseMatrixCSC, # Jacobian of KKT system + model::Model, # Model to extract number of variables and constraints + st::T = 1e-6, # Step size for inertia correction + max_corrections::Int = 50, # Maximum number of corrections +) where {T<:Real} + num_w = + _get_num_primal_vars(model) + + length(model.cache.leq_locations) + + length(model.cache.geq_locations) + num_cons = _get_num_constraints(model) + # Factorization + K = SparseArrays.lu(M; check = false) + # Inertia correction + status = K.status + if status == 1 + @info "Inertia correction needed. + Attempting correction by adding diagonal matrix with positive values for the Jacobian of the stationary equations + and negative values for the Jacobian of the constraints." + num_c = 0 + diag_mat = ones(size(M, 1)) + diag_mat[num_w+1:num_w+num_cons] .= -1 + diag_mat = SparseArrays.spdiagm(diag_mat) + while status == 1 && num_c < max_corrections + M = M + st * diag_mat + K = lu(M; check = false) + status = K.status + num_c += 1 + end + if status != 0 + @warn "Inertia correction failed." + return nothing + end + end + return K +end + +_all_variables(form::Form) = MOI.VariableIndex.(1:form.num_variables) +_all_variables(model::Model) = _all_variables(model.model) +_all_params(form::Form) = collect(keys(form.var2param)) +_all_params(model::Model) = _all_params(model.model) +_all_primal_vars(form::Form) = setdiff(_all_variables(form), _all_params(form)) +_all_primal_vars(model::Model) = _all_primal_vars(model.model) + +_get_num_constraints(form::Form) = length(form.constraints_2_nlp_index) +_get_num_constraints(model::Model) = _get_num_constraints(model.model) +_get_num_primal_vars(form::Form) = length(_all_primal_vars(form)) +_get_num_primal_vars(model::Model) = _get_num_primal_vars(model.model) +_get_num_params(form::Form) = length(_all_params(form)) +_get_num_params(model::Model) = _get_num_params(model.model) + +function _cache_evaluator!(model::Model) + form = model.model + # Retrieve and sort primal variables by NLP index + params = sort(_all_params(form); by = x -> x.value) + primal_vars = sort(_all_primal_vars(form); by = x -> x.value) + num_primal = length(primal_vars) + + # Create evaluator and constraints + evaluator = _create_evaluator(form) + num_constraints = _get_num_constraints(form) + # Analyze constraints and bounds + leq_locations, geq_locations = _find_inequalities(form) + num_leq = length(leq_locations) + num_geq = length(geq_locations) + has_up = findall(i -> haskey(form.upper_bounds, i.value), primal_vars) + has_low = findall(i -> haskey(form.lower_bounds, i.value), primal_vars) + num_low = length(has_low) + num_up = length(has_up) + + # Create unified dual mapping from constraint index to NLP index + dual_mapping = Vector{Int}(undef, form.num_constraints) + for (ci, cni) in form.constraints_2_nlp_index + dual_mapping[ci.value] = cni.value + end + + # Add bounds to dual mapping + offset = num_constraints + for (i, var_idx) in enumerate(primal_vars[has_low]) + # offset + i + dual_mapping[form.constraint_lower_bounds[var_idx.value].value] = + offset + i + end + offset += num_low + for (i, var_idx) in enumerate(primal_vars[has_up]) + # offset + i + dual_mapping[form.constraint_upper_bounds[var_idx.value].value] = + offset + i + end + + num_slacks = num_leq + num_geq + num_w = num_primal + num_slacks + # Create index for dual variables + index_duals = [ + num_w+1:num_w+num_constraints + num_w+num_constraints+1:num_w+num_constraints+num_low + num_w+num_constraints+num_low+num_geq+1:num_w+num_constraints+num_low+num_geq+num_up + ] + cons = sort(collect(keys(form.nlp_index_2_constraint)); by = x -> x.value) + + model.cache = Cache(; + primal_vars = primal_vars, + dual_mapping = dual_mapping, + params = params, + index_duals = index_duals, + leq_locations = leq_locations, + geq_locations = geq_locations, + has_up = has_up, + has_low = has_low, + evaluator = evaluator, + cons = cons, + ) + return model.cache +end + +function DiffOpt.forward_differentiate!(model::Model; tol = 1e-6) + model.diff_time = @elapsed begin + cache = _cache_evaluator!(model) + form = model.model + # Fetch parameter sensitivities + Δp = zeros(length(cache.params)) + for (i, var_idx) in enumerate(cache.params) + ky = form.var2ci[var_idx] + if haskey(model.input_cache.dp, ky) # only for set sensitivities + Δp[i] = model.input_cache.dp[ky] + end + end + + # Compute Jacobian + Δs = _compute_sensitivity(model; tol = tol) + + # Extract primal and dual sensitivities + primal_Δs = Δs[1:length(model.cache.primal_vars), :] * Δp # Exclude slacks + dual_Δs = Δs[cache.index_duals, :] * Δp # Includes constraints and bounds + + model.forw_grad_cache = ForwCache(; + primal_Δs = Dict(model.cache.primal_vars .=> primal_Δs), + dual_Δs = dual_Δs, + ) + end + return nothing +end + +function DiffOpt.reverse_differentiate!(model::Model; tol = 1e-6) + model.diff_time = @elapsed begin + cache = _cache_evaluator!(model) + form = model.model + + # Compute Jacobian + Δs = _compute_sensitivity(model; tol = tol) + num_primal = length(cache.primal_vars) + # Fetch primal sensitivities + Δx = zeros(num_primal) + for (i, var_idx) in enumerate(cache.primal_vars) + if haskey(model.input_cache.dx, var_idx) + Δx[i] = model.input_cache.dx[var_idx] + end + end + # Fetch dual sensitivities + num_constraints = length(cache.cons) + num_up = length(cache.has_up) + num_low = length(cache.has_low) + Δdual = zeros(num_constraints + num_up + num_low) + for (i, ci) in enumerate(cache.cons) + idx = form.nlp_index_2_constraint[ci] + if haskey(model.input_cache.dy, idx) + Δdual[i] = model.input_cache.dy[idx] + end + end + for (i, var_idx) in enumerate(cache.primal_vars[cache.has_low]) + idx = form.constraint_lower_bounds[var_idx.value].value + if haskey(model.input_cache.dy, idx) + Δdual[num_constraints+i] = model.input_cache.dy[idx] + end + end + for (i, var_idx) in enumerate(cache.primal_vars[cache.has_up]) + idx = form.constraint_upper_bounds[var_idx.value].value + if haskey(model.input_cache.dy, idx) + Δdual[num_constraints+num_low+i] = model.input_cache.dy[idx] + end + end + # Extract Parameter sensitivities + Δw = zeros(size(Δs, 1)) + Δw[1:num_primal] = Δx + Δw[cache.index_duals] = Δdual + Δp = Δs' * Δw + + # Order by ConstraintIndex + varorder = + sort(collect(keys(form.var2ci)); by = x -> form.var2ci[x].value) + Δp = [Δp[form.var2param[var_idx].value] for var_idx in varorder] + + model.back_grad_cache = ReverseCache(; Δp = Δp) + end + return nothing +end + +function MOI.get( + model::Model, + ::DiffOpt.ForwardVariablePrimal, + vi::MOI.VariableIndex, +) + if model.forw_grad_cache === nothing + error("Forward differentiation has not been performed yet.") + end + return model.forw_grad_cache.primal_Δs[vi] +end + +function MOI.get( + model::Model, + ::DiffOpt.ForwardConstraintDual, + ci::MOI.ConstraintIndex, +) + if model.forw_grad_cache === nothing + error("Forward differentiation has not been performed yet.") + end + try + idx = model.cache.dual_mapping[ci.value] + return model.forw_grad_cache.dual_Δs[idx] + catch + error("ConstraintIndex not found in dual mapping.") + end +end + +function MOI.get( + model::Model, + ::DiffOpt.ReverseConstraintSet, + ci::MOI.ConstraintIndex{MOI.VariableIndex,MOI.Parameter{T}}, +) where {T} + return MOI.Parameter{T}(model.back_grad_cache.Δp[ci.value]) +end + +end # module NonLinearProgram diff --git a/src/NonLinearProgram/nlp_utilities.jl b/src/NonLinearProgram/nlp_utilities.jl new file mode 100644 index 00000000..96c334f2 --- /dev/null +++ b/src/NonLinearProgram/nlp_utilities.jl @@ -0,0 +1,498 @@ +# Copyright (c) 2025: Andrew Rosemberg and contributors +#= +The code in this file related to calculating hessians and jacobians is based on the +JuMP Tutorial for Querying Hessians: +https://github.com/jump-dev/JuMP.jl/blob/301d46e81cb66c74c6e22cd89fb89ced740f157b/docs/src/tutorials/nonlinear/querying_hessians.jl#L67-L72 + +Use of this source code is governed by an MIT-style license that can be found +in the LICENSE.md file or at https://opensource.org/licenses/MIT. +=# + +""" + _fill_off_diagonal(H) + +Filling the off-diagonal elements of a sparse matrix to make it symmetric. +""" +function _fill_off_diagonal(H::SparseMatrixCSC) + ret = H + H' + row_vals = SparseArrays.rowvals(ret) + non_zeros = SparseArrays.nonzeros(ret) + for col in 1:size(ret, 2) + for i in SparseArrays.nzrange(ret, col) + if col == row_vals[i] + non_zeros[i] /= 2 + end + end + end + return ret +end + +""" + _compute_optimal_hessian(evaluator::MOI.Nonlinear.Evaluator, rows::Vector{JuMP.ConstraintRef}, x::Vector{JuMP.VariableRef}) + +Compute the Hessian of the Lagrangian calculated at the optimal solution. +""" +function _compute_optimal_hessian( + model::Model, + rows::Vector{MOI.Nonlinear.ConstraintIndex}, +) + _sense_multiplier = _sense_mult(model) + evaluator = model.cache.evaluator + y = [model.y[model.model.nlp_index_2_constraint[row].value] for row in rows] + hessian_sparsity = MOI.hessian_lagrangian_structure(evaluator) + I = [i for (i, _) in hessian_sparsity] + J = [j for (_, j) in hessian_sparsity] + V = zeros(length(hessian_sparsity)) + # The signals are being adjusted to match the Ipopt convention (inner.mult_g) + # but we don't know if we need to adjust the objective function multiplier + MOI.eval_hessian_lagrangian( + evaluator, + V, + model.x, + 1.0, + -_sense_multiplier * y, + ) + num_vars = length(model.x) + H = SparseArrays.sparse(I, J, V, num_vars, num_vars) + return _fill_off_diagonal(H) +end + +""" + _compute_optimal_jacobian(evaluator::MOI.Nonlinear.Evaluator, rows::Vector{JuMP.ConstraintRef}, x::Vector{JuMP.VariableRef}) + +Compute the Jacobian of the constraints calculated at the optimal solution. +""" +function _compute_optimal_jacobian( + model::Model, + rows::Vector{MOI.Nonlinear.ConstraintIndex}, +) + evaluator = model.cache.evaluator + jacobian_sparsity = MOI.jacobian_structure(evaluator) + I = [i for (i, _) in jacobian_sparsity] + J = [j for (_, j) in jacobian_sparsity] + V = zeros(length(jacobian_sparsity)) + MOI.eval_constraint_jacobian(evaluator, V, model.x) + A = SparseArrays.sparse(I, J, V, length(rows), length(model.x)) + return A +end + +""" + _compute_optimal_hess_jac(evaluator::MOI.Nonlinear.Evaluator, rows::Vector{JuMP.ConstraintRef}, x::Vector{JuMP.VariableRef}) + +Compute the Hessian of the Lagrangian and Jacobian of the constraints calculated at the optimal solution. +""" +function _compute_optimal_hess_jac( + model::Model, + rows::Vector{MOI.Nonlinear.ConstraintIndex}, +) + hessian = _compute_optimal_hessian(model, rows) + jacobian = _compute_optimal_jacobian(model, rows) + + return hessian, jacobian +end + +""" + _create_evaluator(form::Form) + +Create the evaluator for the NLP. +""" +function _create_evaluator(form::Form) + nlp = form.model + backend = MOI.Nonlinear.SparseReverseMode() + evaluator = MOI.Nonlinear.Evaluator( + nlp, + backend, + MOI.VariableIndex.(1:form.num_variables), + ) + MOI.initialize(evaluator, [:Hess, :Jac]) + return evaluator +end + +""" + _is_less_inequality(con::MOI.ConstraintIndex{F, S}) where {F, S} + +Check if the constraint is a less than inequality. +""" +function _is_less_inequality( + ::MOI.ConstraintIndex{F,S}, +) where { + F<:Union{ + MOI.ScalarNonlinearFunction, + MOI.ScalarQuadraticFunction{Float64}, + MOI.ScalarAffineFunction{Float64}, + }, + S<:MOI.LessThan, +} + return true +end + +function _is_less_inequality(::MOI.ConstraintIndex{F,S}) where {F,S} + return false +end + +function _is_greater_inequality(::MOI.ConstraintIndex{F,S}) where {F,S} + return false +end + +""" + _is_greater_inequality(con::MOI.ConstraintIndex{F, S}) where {F, S} + +Check if the constraint is a greater than inequality. +""" +function _is_greater_inequality( + ::MOI.ConstraintIndex{F,S}, +) where { + F<:Union{ + MOI.ScalarNonlinearFunction, + MOI.ScalarQuadraticFunction{Float64}, + MOI.ScalarAffineFunction{Float64}, + }, + S<:MOI.GreaterThan, +} + return true +end + +""" + _find_inequalities(cons::Vector{JuMP.ConstraintRef}) + +Find the indices of the inequality constraints. +""" +function _find_inequalities(model::Form) + num_cons = length(model.list_of_constraint) + leq_locations = zeros(num_cons) + geq_locations = zeros(num_cons) + for con in keys(model.list_of_constraint) + if _is_less_inequality(con) + leq_locations[model.constraints_2_nlp_index[con].value] = true + end + if _is_greater_inequality(con) + geq_locations[model.constraints_2_nlp_index[con].value] = true + end + end + return findall(x -> x == 1, leq_locations), + findall(x -> x == 1, geq_locations) +end + +""" + _compute_solution_and_bounds(model::Model; tol=1e-6) + +Compute the solution and bounds of the primal variables. +""" +function _compute_solution_and_bounds(model::Model; tol = 1e-6) + _sense_multiplier = _sense_mult(model) + num_vars = _get_num_primal_vars(model) + form = model.model + leq_locations = model.cache.leq_locations + geq_locations = model.cache.geq_locations + ineq_locations = vcat(geq_locations, leq_locations) + num_leq = length(leq_locations) + num_geq = length(geq_locations) + num_ineq = num_leq + num_geq + has_up = model.cache.has_up + has_low = model.cache.has_low + primal_vars = model.cache.primal_vars + cons = model.cache.cons + # MOI constraint indices of leq and geq constraints + model_cons_leq = + [form.nlp_index_2_constraint[con] for con in cons[leq_locations]] + model_cons_geq = + [form.nlp_index_2_constraint[con] for con in cons[geq_locations]] + # Value of the slack variables + # c(x) - b - su = 0, su <= 0 + s_leq = + [model.s[con.value] for con in model_cons_leq] - [form.leq_values[con] for con in model_cons_leq] + # c(x) - b - sl = 0, sl >= 0 + s_geq = + [model.s[con.value] for con in model_cons_geq] - [form.geq_values[con] for con in model_cons_geq] + primal_idx = [i.value for i in model.cache.primal_vars] + X = [model.x[primal_idx]; s_geq; s_leq] # Primal, Slack with Lower Bounds, Slack with Upper Bounds + + # Value and dual of the lower bounds + V_L = spzeros(num_vars + num_ineq) + X_L = spzeros(num_vars + num_ineq) + for (i, j) in enumerate(has_low) + # Dual of the lower bounds of the primal variables + V_L[j] = + model.y[form.constraint_lower_bounds[primal_vars[j].value].value] * + _sense_multiplier + if _sense_multiplier == 1.0 + @assert V_L[j] >= -tol "Dual of lower bound must be positive: $i $(V_L[i])" + else + @assert V_L[j] <= tol "Dual of lower bound must be negative: $i $(V_L[i])" + end + # Lower bounds of the primal variables + X_L[j] = form.lower_bounds[primal_vars[j].value] + end + for (i, con) in enumerate(cons[geq_locations]) + # Dual of the lower bounds of the slack variables + # By convention jump dual will allways be positive for geq constraints + # but for ipopt it will be positive if min problem and negative if max problem + V_L[num_vars+i] = + model.y[form.nlp_index_2_constraint[con].value] * _sense_multiplier + # + if _sense_multiplier == 1.0 + @assert V_L[num_vars+i] >= -tol "Dual of geq constraint must be positive: $i $(V_L[num_vars+i])" + else + @assert V_L[num_vars+i] <= tol "Dual of geq constraint must be negative: $i $(V_L[num_vars+i])" + end + end + # value and dual of the upper bounds + V_U = spzeros(num_vars + num_ineq) + X_U = spzeros(num_vars + num_ineq) + for (i, j) in enumerate(has_up) + # Dual of the upper bounds of the primal variables + V_U[j] = + model.y[form.constraint_upper_bounds[primal_vars[j].value].value] * + (-_sense_multiplier) + if _sense_multiplier == 1.0 + @assert V_U[j] >= -tol "Dual of upper bound must be positive: $i $(V_U[i])" + else + @assert V_U[j] <= tol "Dual of upper bound must be negative: $i $(V_U[i])" + end + # Upper bounds of the primal variables + X_U[j] = form.upper_bounds[primal_vars[j].value] + end + for (i, con) in enumerate(cons[leq_locations]) + # Dual of the upper bounds of the slack variables + # By convention jump dual will allways be negative for leq constraints + # but for ipopt it will be positive if min problem and negative if max problem + V_U[num_vars+num_geq+i] = + model.y[form.nlp_index_2_constraint[con].value] * + (-_sense_multiplier) + if _sense_multiplier == 1.0 + @assert V_U[num_vars+num_geq+i] >= -tol "Dual of leq constraint must be positive: $i $(V_U[num_vars+i])" + else + @assert V_U[num_vars+num_geq+i] <= tol "Dual of leq constraint must be negative: $i $(V_U[num_vars+i])" + end + end + return X, # Primal and slack solution + V_L, # Dual of the lower bounds + X_L, # Lower bounds + V_U, # Dual of the upper bounds + X_U, # Upper bounds + leq_locations, # Indices of the leq constraints wrt the nlp constraints + geq_locations, # Indices of the geq constraints wrt the nlp constraints + ineq_locations, # Indices of the ineq constraints wrt the nlp constraints + vcat(has_up, collect(num_vars+num_geq+1:num_vars+num_geq+num_leq)), # Indices of variables with upper bounds (both primal and slack) + vcat(has_low, collect(num_vars+1:num_vars+num_geq)), # Indices of variables with lower bounds (both primal and slack) + cons # Vector of the nlp constraints +end + +""" + _build_sensitivity_matrices(model::Model, cons::Vector{MOI.Nonlinear.ConstraintIndex}, _X::AbstractVector, _V_L::AbstractVector, _X_L::AbstractVector, _V_U::AbstractVector, _X_U::AbstractVector, leq_locations::Vector{Z}, geq_locations::Vector{Z}, ineq_locations::Vector{Z}, has_up::Vector{Z}, has_low::Vector{Z}) + +Build the M (KKT Jacobian w.r.t. solution) and N (KKT Jacobian w.r.t. parameters) matrices for the sensitivity analysis. +""" +function _build_sensitivity_matrices( + model::Model, + cons::Vector{MOI.Nonlinear.ConstraintIndex}, + _X::AbstractVector, + _V_L::AbstractVector, + _X_L::AbstractVector, + _V_U::AbstractVector, + _X_U::AbstractVector, + leq_locations::Vector{Z}, + geq_locations::Vector{Z}, + ineq_locations::Vector{Z}, + has_up::Vector{Z}, + has_low::Vector{Z}, +) where {Z<:Integer} + # Setting + num_vars = _get_num_primal_vars(model) + num_parms = _get_num_params(model) + num_cons = _get_num_constraints(model) + num_ineq = length(ineq_locations) + num_low = length(has_low) + num_up = length(has_up) + + # Primal solution + X_lb = spzeros(num_low, num_low) + X_ub = spzeros(num_up, num_up) + V_L = spzeros(num_low, num_vars + num_ineq) + V_U = spzeros(num_up, num_vars + num_ineq) + I_L = spzeros(num_vars + num_ineq, num_low) + I_U = spzeros(num_vars + num_ineq, num_up) + + # value and dual of the lower bounds + for (i, j) in enumerate(has_low) + V_L[i, j] = _V_L[j] + X_lb[i, i] = _X[j] - _X_L[j] + I_L[j, i] = -1 + end + # value and dual of the upper bounds + for (i, j) in enumerate(has_up) + V_U[i, j] = _V_U[j] + X_ub[i, i] = _X_U[j] - _X[j] + I_U[j, i] = 1 + end + + # Function Derivatives + hessian, jacobian = _compute_optimal_hess_jac(model, cons) + primal_idx = [i.value for i in model.cache.primal_vars] + params_idx = [i.value for i in model.cache.params] + # Hessian of the lagrangian wrt the primal variables + W = spzeros(num_vars + num_ineq, num_vars + num_ineq) + W[1:num_vars, 1:num_vars] = hessian[primal_idx, primal_idx] + # Jacobian of the constraints + A = spzeros(num_cons, num_vars + num_ineq) + # A is the Jacobian of: c(x) = b and c(x) <= b and c(x) >= b, possibly all mixed up. + # Each of the will be re-written as: + # c(x) - b = 0 + # c(x) - b - su = 0, su <= 0 + # c(x) - b - sl = 0, sl >= 0 + # Jacobian of the constraints wrt the primal variables + A[:, 1:num_vars] = jacobian[:, primal_idx] + # Jacobian of the constraints wrt the slack variables + for (i, j) in enumerate(geq_locations) + A[j, num_vars+i] = -1 + end + for (i, j) in enumerate(leq_locations) + A[j, num_vars+length(geq_locations)+i] = -1 + end + # Partial second derivative of the lagrangian wrt primal solution and parameters + ∇ₓₚL = spzeros(num_vars + num_ineq, num_parms) + ∇ₓₚL[1:num_vars, :] = hessian[primal_idx, params_idx] + # Partial derivative of the equality constraintswith wrt parameters + ∇ₚC = jacobian[:, params_idx] + + # M matrix - KKT Jacobian w.r.t. primal and dual solution + # Based on the implicit function diferentiation method used in sIpopt to derive sensitivities + # Ref: sIPOPT paper https://optimization-online.org/wp-content/uploads/2011/04/3008.pdf. + # M = [ + # [W A' -I I]; + # [A 0 0 0]; + # [V_L 0 (X - X_L) 0] + # [V_U 0 0 0 (X_U - X)] + # ] + len_w = num_vars + num_ineq + M = spzeros( + len_w + num_cons + num_low + num_up, + len_w + num_cons + num_low + num_up, + ) + + M[1:len_w, 1:len_w] = W + M[1:len_w, len_w+1:len_w+num_cons] = A' + M[len_w+1:len_w+num_cons, 1:len_w] = A + M[1:len_w, len_w+num_cons+1:len_w+num_cons+num_low] = I_L + M[len_w+num_cons+1:len_w+num_cons+num_low, 1:len_w] = V_L + M[ + len_w+num_cons+1:len_w+num_cons+num_low, + len_w+num_cons+1:len_w+num_cons+num_low, + ] = X_lb + M[len_w+num_cons+num_low+1:len_w+num_cons+num_low+num_up, 1:len_w] = V_U + M[ + len_w+num_cons+num_low+1:len_w+num_cons+num_low+num_up, + len_w+num_cons+num_low+1:len_w+num_cons+num_low+num_up, + ] = X_ub + M[1:len_w, len_w+num_cons+num_low+1:end] = I_U + + # N matrix + # N = [∇ₓₚL ; ∇ₚC; zeros(num_low + num_up, num_parms)] + N = spzeros(len_w + num_cons + num_low + num_up, num_parms) + N[1:len_w, :] = ∇ₓₚL + N[len_w+1:len_w+num_cons, :] = ∇ₚC + + return M, N +end + +""" + _compute_derivatives_no_relax(model::Model, cons::Vector{MOI.Nonlinear.ConstraintIndex}, + _X::AbstractVector, _V_L::AbstractVector, _X_L::AbstractVector, _V_U::AbstractVector, _X_U::AbstractVector, leq_locations::Vector{Z}, geq_locations::Vector{Z}, ineq_locations::Vector{Z}, + has_up::Vector{Z}, has_low::Vector{Z} + ) + +Compute the derivatives of the solution w.r.t. the parameters without accounting for active set changes. +""" +function _compute_derivatives_no_relax( + model::Model, + cons::Vector{MOI.Nonlinear.ConstraintIndex}, + _X::AbstractVector, + _V_L::AbstractVector, + _X_L::AbstractVector, + _V_U::AbstractVector, + _X_U::AbstractVector, + leq_locations::Vector{Z}, + geq_locations::Vector{Z}, + ineq_locations::Vector{Z}, + has_up::Vector{Z}, + has_low::Vector{Z}; +) where {Z<:Integer} + M, N = _build_sensitivity_matrices( + model, + cons, + _X, + _V_L, + _X_L, + _V_U, + _X_U, + leq_locations, + geq_locations, + ineq_locations, + has_up, + has_low, + ) + + # Sensitivity of the solution (primal-dual_constraints-dual_bounds) w.r.t. the parameters + K = model.input_cache.factorization(M, model) + if isnothing(K) + return zeros(size(M, 1), size(N, 2)), K, N + end + ∂s = zeros(size(M, 1), size(N, 2)) + # ∂s = - (K \ N) # Sensitivity + ldiv!(∂s, K, N) + ∂s = -∂s # multiply by -1 since we used ∂s as an auxilary variable to calculate K \ N + + return ∂s, K, N +end + +_sense_mult(model::Model) = objective_sense(model) == MOI.MIN_SENSE ? 1.0 : -1.0 + +""" + _compute_sensitivity(model::Model; tol=1e-6) + +Compute the sensitivity of the solution given sensitivity of the parameters (Δp). +""" +function _compute_sensitivity(model::Model; tol = 1e-6) + # Solution and bounds + X, + V_L, + X_L, + V_U, + X_U, + leq_locations, + geq_locations, + ineq_locations, + has_up, + has_low, + cons = _compute_solution_and_bounds(model; tol = tol) + # Compute derivatives + # ∂s = [∂x; ∂λ; ∂ν_L; ∂ν_U] + ∂s, K, N = _compute_derivatives_no_relax( + model, + cons, + X, + V_L, + X_L, + V_U, + X_U, + leq_locations, + geq_locations, + ineq_locations, + has_up, + has_low, + ) + ## Adjust signs based on JuMP convention + num_vars = _get_num_primal_vars(model) + num_cons = _get_num_constraints(model) + num_ineq = length(ineq_locations) + num_w = num_vars + num_ineq + num_lower = length(has_low) + _sense_multiplier = _sense_mult(model) + # Duals + ∂s[num_w+1:num_w+num_cons, :] *= -_sense_multiplier + # Dual bounds lower + ∂s[num_w+num_cons+1:num_w+num_cons+num_lower, :] *= _sense_multiplier + # Dual bounds upper + ∂s[num_w+num_cons+num_lower+1:end, :] *= -_sense_multiplier + return ∂s +end diff --git a/src/diff_opt.jl b/src/diff_opt.jl index c91ab9e8..146cc8ea 100644 --- a/src/diff_opt.jl +++ b/src/diff_opt.jl @@ -13,6 +13,9 @@ const MOIDD = MOI.Utilities.DoubleDicts Base.@kwdef mutable struct InputCache dx::Dict{MOI.VariableIndex,Float64} = Dict{MOI.VariableIndex,Float64}()# dz for QP + dp::Dict{MOI.ConstraintIndex,Float64} = Dict{MOI.ConstraintIndex,Float64}() # Specifically for NonLinearProgram + dy::Dict{MOI.ConstraintIndex,Float64} = Dict{MOI.ConstraintIndex,Float64}() + # Dual sensitivity currently only works for NonLinearProgram # ds # dy #= [d\lambda, d\nu] for QP # FIXME Would it be possible to have a DoubleDict where the value depends @@ -25,10 +28,13 @@ Base.@kwdef mutable struct InputCache vector_constraints::MOIDD.DoubleDict{MOI.VectorAffineFunction{Float64}} = MOIDD.DoubleDict{MOI.VectorAffineFunction{Float64}}() # also includes G for QPs objective::Union{Nothing,MOI.AbstractScalarFunction} = nothing + factorization::Union{Nothing,Function} = nothing end function Base.empty!(cache::InputCache) empty!(cache.dx) + empty!(cache.dp) + empty!(cache.dy) empty!(cache.scalar_constraints) empty!(cache.vector_constraints) cache.objective = nothing @@ -87,6 +93,28 @@ where `x` and `y` are the relevant `MOI.VariableIndex`. """ struct ForwardObjectiveFunction <: MOI.AbstractModelAttribute end +""" + MFactorization <: MOI.AbstractModelAttribute + +A `MOI.AbstractModelAttribute` to set which factorization function to use for the +implict function diferentiation needed to compute the sensitivities for +`NonLinearProgram` models. + +The function will be called with the following signature: +```julia +function factorization(M::SparseMatrixCSC{T DiffOpt.diff_optimizer(Ipopt.Optimizer)) + set_silent(model) + @variable(model, p ∈ MOI.Parameter(1.0)) + @variable(model, p2 ∈ MOI.Parameter(2.0)) + @variable(model, p3 ∈ MOI.Parameter(100.0)) + @variable(model, x[i = 1:2], start = -i) + @constraint(model, g_1, x[1]^2 <= p) + @constraint(model, g_2, p * (x[1] + x[2])^2 <= p2) + if ismin + @objective(model, Min, (1 - x[1])^2 + p3 * (x[2] - x[1]^2)^2) + else + @objective(model, Max, -(1 - x[1])^2 - p3 * (x[2] - x[1]^2)^2) + end + + return model, x, [g_1; g_2], [p; p2; p3] +end + +################################################ +#= +From sIpopt paper: https://optimization-online.org/2011/04/3008/ +=# +################################################ + +function create_nonlinear_jump_model_sipopt(; ismin = true) + model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + set_silent(model) + @variable(model, p1 ∈ MOI.Parameter(4.5)) + @variable(model, p2 ∈ MOI.Parameter(1.0)) + @variable(model, x[i = 1:3] >= 0, start = -i) + @constraint(model, g_1, 6 * x[1] + 3 * x[2] + 2 * x[3] - p1 == 0) + @constraint(model, g_2, p2 * x[1] + x[2] - x[3] - 1 == 0) + if ismin + @objective(model, Min, x[1]^2 + x[2]^2 + x[3]^2) + else + @objective(model, Max, -x[1]^2 - x[2]^2 - x[3]^2) + end + return model, x, [g_1; g_2], [p1; p2] +end + +################################################ +#= +Simple Problems +=# +################################################ + +function create_jump_model_1(p_val = [1.5]) + model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + set_silent(model) + + # Parameters + @variable(model, p ∈ MOI.Parameter(p_val[1])) + + # Variables + @variable(model, x) + + # Constraints + @constraint(model, con1, x >= p) + @constraint(model, con2, x >= 2) + @objective(model, Min, x^2) + + return model, [x], [con1; con2], [p] +end + +function create_jump_model_2(p_val = [1.5]) + model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + set_silent(model) + + # Parameters + @variable(model, p ∈ MOI.Parameter(p_val[1])) + + # Variables + @variable(model, x >= 2.0) + + # Constraints + @constraint(model, con1, x >= p) + @objective(model, Min, x^2) + + return model, [x], [con1], [p] +end + +function create_jump_model_3(p_val = [-1.5]) + model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + set_silent(model) + + # Parameters + @variable(model, p ∈ MOI.Parameter(p_val[1])) + + # Variables + @variable(model, x) + + # Constraints + @constraint(model, con1, x <= p) + @constraint(model, con2, x <= -2) + @objective(model, Min, -x) + + return model, [x], [con1; con2], [p] +end + +function create_jump_model_4(p_val = [1.5]) + model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + set_silent(model) + + # Parameters + @variable(model, p ∈ MOI.Parameter(p_val[1])) + + # Variables + @variable(model, x) + + # Constraints + @constraint(model, con1, x <= p) + @constraint(model, con2, x <= 2) + @objective(model, Max, x) + + return model, [x], [con1; con2], [p] +end + +function create_jump_model_5(p_val = [1.5]) + model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + set_silent(model) + + # Parameters + @variable(model, p ∈ MOI.Parameter(p_val[1])) + + # Variables + @variable(model, x) + + # Constraints + @constraint(model, con1, x >= p) + @constraint(model, con2, x >= 2) + @objective(model, Max, -x) + + return model, [x], [con1; con2], [p] +end + +# Softmax model +h(y) = -sum(y .* log.(y)) +softmax(x) = exp.(x) / sum(exp.(x)) +function create_jump_model_6(p_a = collect(1.0:0.1:2.0)) + model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + set_silent(model) + + # Parameters + @variable(model, x[i = 1:length(p_a)] ∈ MOI.Parameter.(p_a)) + + # Variables + @variable(model, y[1:length(p_a)] >= 0.0) + + # Constraints + @constraint(model, con1, sum(y) == 1) + @constraint(model, con2[i = 1:length(x)], y[i] <= 1) + + # Objective + @objective(model, Max, dot(x, y) + h(y)) + + return model, y, [con1; con2], x +end + +function create_jump_model_7(p_val = [1.5], g = sin) + model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + set_silent(model) + + # Parameters + @variable(model, p ∈ MOI.Parameter(p_val[1])) + + # Variables + @variable(model, x) + + # Constraints + @constraint(model, con1, x * g(p) == 1) + @objective(model, Min, 0) + + return model, [x], [con1], [p] +end + +################################################ +#= +Non Linear Problems +=# +################################################ + +function create_nonlinear_jump_model_1(p_val = [1.0; 2.0; 100]; ismin = true) + model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + set_silent(model) + + # Parameters + @variable(model, p[i = 1:3] ∈ MOI.Parameter.(p_val)) + + # Variables + @variable(model, x) + @variable(model, y) + + # Constraints + @constraint(model, con1, y >= p[1] * sin(x)) # NLP Constraint + @constraint(model, con2, x + y == p[1]) + @constraint(model, con3, p[2] * x >= 0.1) + if ismin + @objective(model, Min, (1 - x)^2 + p[3] * (y - x^2)^2) # NLP Objective + else + @objective(model, Max, -(1 - x)^2 - p[3] * (y - x^2)^2) # NLP Objective + end + + return model, [x; y], [con1; con2; con3], p +end + +function create_nonlinear_jump_model_2(p_val = [3.0; 2.0; 10]; ismin = true) + model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + set_silent(model) + + # Parameters + @variable(model, p[i = 1:3] ∈ MOI.Parameter.(p_val)) + + # Variables + @variable(model, x <= 10) + @variable(model, y) + + # Constraints + @constraint(model, con1, y >= p[1] * sin(x)) # NLP Constraint + @constraint(model, con2, x + y == p[1]) + @constraint(model, con3, p[2] * x >= 0.1) + if ismin + @objective(model, Min, (1 - x)^2 + p[3] * (y - x^2)^2) # NLP Objective + else + @objective(model, Max, -(1 - x)^2 - p[3] * (y - x^2)^2) # NLP Objective + end + + return model, [x; y], [con1; con2; con3], p +end + +function create_nonlinear_jump_model_3(p_val = [3.0; 2.0; 10]; ismin = true) + model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + set_silent(model) + + # Parameters + @variable(model, p[i = 1:3] ∈ MOI.Parameter.(p_val)) + + # Variables + @variable(model, x <= 10) + @variable(model, y) + + # Constraints + @constraint(model, con1, y >= p[1] * sin(x)) # NLP Constraint + @constraint(model, con2, x + y == p[1]) + @constraint(model, con3, p[2] * x >= 0.1) + if ismin + @objective(model, Min, (1 - x)^2 + p[3] * (y - x^2)^2) # NLP Objective + else + @objective(model, Max, -(1 - x)^2 - p[3] * (y - x^2)^2) # NLP Objective + end + return model, [x; y], [con1; con2; con3], p +end + +function create_nonlinear_jump_model_4(p_val = [1.0; 2.0; 100]; ismin = true) + model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + set_silent(model) + + # Parameters + @variable(model, p[i = 1:3] ∈ MOI.Parameter.(p_val)) + + # Variables + @variable(model, x) + @variable(model, y) + + # Constraints + @constraint(model, con0, x == p[1] - 0.5) + @constraint(model, con1, y >= p[1] * sin(x)) # NLP Constraint + @constraint(model, con2, x + y == p[1]) + @constraint(model, con3, p[2] * x >= 0.1) + if ismin + @objective(model, Min, (1 - x)^2 + p[3] * (y - x^2)^2) # NLP Objective + else + @objective(model, Max, -(1 - x)^2 - p[3] * (y - x^2)^2) # NLP Objective + end + + return model, [x; y], [con1; con2; con3], p +end + +function create_nonlinear_jump_model_5(p_val = [1.0; 2.0; 100]; ismin = true) + model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + set_silent(model) + + # Parameters + @variable(model, p[i = 1:3] ∈ MOI.Parameter.(p_val)) + + # Variables + @variable(model, x) + @variable(model, y) + + # Constraints + fix(x, 0.5) + con0 = JuMP.FixRef(x) + @constraint(model, con1, y >= p[1] * sin(x)) # NLP Constraint + @constraint(model, con2, x + y == p[1]) + @constraint(model, con3, p[2] * x >= 0.1) + if ismin + @objective(model, Min, (1 - x)^2 + p[3] * (y - x^2)^2) # NLP Objective + else + @objective(model, Max, -(1 - x)^2 - p[3] * (y - x^2)^2) # NLP Objective + end + + return model, [x; y], [con0; con1; con2; con3], p +end + +function create_nonlinear_jump_model_6(p_val = [100.0; 200.0]; ismin = true) + model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + set_silent(model) + + # Parameters + @variable(model, p[i = 1:2] ∈ MOI.Parameter.(p_val)) + + # Variables + @variable(model, x[i = 1:2]) + @variable(model, z) # >= 2.0) + @variable(model, w) # <= 3.0) + # @variable(model, f[1:2]) + + # Constraints + @constraint( + model, + con1, + x[2] - 0.0001 * x[1]^2 - 0.2 * z^2 - 0.3 * w^2 >= p[1] + 1 + ) + @constraint( + model, + con2, + x[1] + 0.001 * x[2]^2 + 0.5 * w^2 + 0.4 * z^2 <= 10 * p[1] + 2 + ) + @constraint(model, con3, z^2 + w^2 == 13) + if ismin + @objective(model, Min, x[2] - x[1] + z - w) + else + @objective(model, Max, -x[2] + x[1] - z + w) + end + + return model, [x; z; w], [con2; con3], p +end diff --git a/test/nlp_program.jl b/test/nlp_program.jl new file mode 100644 index 00000000..9009fb50 --- /dev/null +++ b/test/nlp_program.jl @@ -0,0 +1,740 @@ +module TestNLPProgram + +using DiffOpt +using JuMP +using Ipopt +using Test +using FiniteDiff +import DelimitedFiles +using SparseArrays + +include(joinpath(@__DIR__, "data/nlp_problems.jl")) + +function runtests() + for name in names(@__MODULE__; all = true) + if startswith("$name", "test_") + @testset "$(name)" begin + getfield(@__MODULE__, name)() + end + end + end + return +end + +################################################ +#= +# Test JuMP Hessian and Jacobian + +From JuMP Tutorial for Querying Hessians: +https://github.com/jump-dev/JuMP.jl/blob/301d46e81cb66c74c6e22cd89fb89ced740f157b/docs/src/tutorials/nonlinear/querying_hessians.jl#L67-L72 +=# +################################################ + +function analytic_hessian(x, σ, μ, p) + g_1_H = [2.0 0.0; 0.0 0.0] + g_2_H = p[1] * [2.0 2.0; 2.0 2.0] + f_H = zeros(2, 2) + f_H[1, 1] = 2.0 + p[3] * 12.0 * x[1]^2 - p[3] * 4.0 * x[2] + f_H[1, 2] = f_H[2, 1] = -p[3] * 4.0 * x[1] + f_H[2, 2] = p[3] * 2.0 + return σ * f_H + μ' * [g_1_H, g_2_H] +end + +function analytic_jacobian(x, p) + g_1_J = [ + 2.0 * x[1], # ∂g_1/∂x_1 + 0.0, # ∂g_1/∂x_2 + -1.0, # ∂g_1/∂p_1 + 0.0, # ∂g_1/∂p_2 + 0.0, # ∂g_1/∂p_3 + ] + g_2_J = [ + p[1] * 2.0 * (x[1] + x[2]), # ∂g_2/∂x_1 + 2.0 * (x[1] + x[2]), # ∂g_2/∂x_2 + (x[1] + x[2])^2, # ∂g_2/∂p_1 + -1.0, # ∂g_2/∂p_2 + 0.0, # ∂g_2/∂p_3 + ] + return hcat(g_2_J, g_1_J)'[:, :] +end + +function _test_create_evaluator(nlp_model) + @testset "Create Evaluator" begin + cache = DiffOpt.NonLinearProgram._cache_evaluator!(nlp_model) + @test cache.evaluator isa MOI.Nonlinear.Evaluator + @test cache.cons isa Vector{MOI.Nonlinear.ConstraintIndex} + end +end + +function test_compute_optimal_hess_jacobian() + @testset "Compute Optimal Hessian and Jacobian" begin + # Model + model, x, cons, params = create_nonlinear_jump_model() + # Optimize + optimize!(model) + @assert is_solved_and_feasible(model) + # Create evaluator + nlp_model = DiffOpt._diff(model.moi_backend.optimizer.model).model + _test_create_evaluator(nlp_model) + cons = nlp_model.cache.cons + y = [ + nlp_model.y[nlp_model.model.nlp_index_2_constraint[row].value] + for row in cons + ] + hessian, jacobian = + DiffOpt.NonLinearProgram._compute_optimal_hess_jac(nlp_model, cons) + # Check Hessian + primal_idx = [i.value for i in nlp_model.cache.primal_vars] + params_idx = [i.value for i in nlp_model.cache.params] + @test all( + isapprox( + hessian[primal_idx, primal_idx], + analytic_hessian( + nlp_model.x[primal_idx], + 1.0, + -y, + nlp_model.x[params_idx], + ); + atol = 1, + ), + ) + # Check Jacobian + @test all( + isapprox( + jacobian[:, [primal_idx; params_idx]], + analytic_jacobian( + nlp_model.x[primal_idx], + nlp_model.x[params_idx], + ), + ), + ) + end +end + +################################################ +#= +# Test Sensitivity through analytical +=# +################################################ + +function test_analytical_simple(; P = 2) # Number of parameters + @testset "Bounds Bounds" begin + m = Model( + () -> DiffOpt.diff_optimizer( + Ipopt.Optimizer; + with_parametric_opt_interface = false, + ), + ) + + @variable(m, 0 ≤ x[1:P] ≤ 1) + @variable(m, p[1:P] ∈ Parameter.(0.5)) + + @constraint(m, x .≥ p) + + @objective(m, Min, sum(x)) + + optimize!(m) + @assert is_solved_and_feasible(m) + + # Set pertubations + Δp = [0.1 for _ in 1:P] + MOI.set.( + m, + DiffOpt.ForwardConstraintSet(), + ParameterRef.(p), + Parameter.(Δp), + ) + + # Compute derivatives + DiffOpt.forward_differentiate!(m) + + # Test sensitivities + @test all( + isapprox( + [ + MOI.get(m, DiffOpt.ForwardVariablePrimal(), x[i]) for + i in 1:P + ], + [0.1 for _ in 1:P]; + atol = 1e-8, + ), + ) + end + @testset "Bounds as RHS constraints" begin + m = Model( + () -> DiffOpt.diff_optimizer( + Ipopt.Optimizer; + with_parametric_opt_interface = false, + ), + ) + + @variable(m, x[1:P]) + @constraint(m, x .≥ 0) + @constraint(m, x .≤ 1) + @variable(m, p[1:P] ∈ Parameter.(0.5)) + + @constraint(m, x .≥ p) + + @objective(m, Min, sum(x)) + + optimize!(m) + @assert is_solved_and_feasible(m) + + # Set pertubations + Δp = [0.1 for _ in 1:P] + MOI.set.( + m, + DiffOpt.ForwardConstraintSet(), + ParameterRef.(p), + Parameter.(Δp), + ) + + # Compute derivatives + DiffOpt.forward_differentiate!(m) + + # Test sensitivities + @test all( + isapprox( + [ + MOI.get(m, DiffOpt.ForwardVariablePrimal(), x[i]) for + i in 1:P + ], + [0.1 for _ in 1:P]; + atol = 1e-8, + ), + ) + end + @testset "Bounds as Mixed constraints" begin + m = Model( + () -> DiffOpt.diff_optimizer( + Ipopt.Optimizer; + with_parametric_opt_interface = false, + ), + ) + + @variable(m, x[1:P]) + @constraint(m, 0 .≤ x) + @constraint(m, x .≤ 1) + @variable(m, p[1:P] ∈ Parameter.(0.5)) + + @constraint(m, x .≥ p) + + @objective(m, Min, sum(x)) + + optimize!(m) + @assert is_solved_and_feasible(m) + + # Set pertubations + Δp = [0.1 for _ in 1:P] + MOI.set.( + m, + DiffOpt.ForwardConstraintSet(), + ParameterRef.(p), + Parameter.(Δp), + ) + + # Compute derivatives + DiffOpt.forward_differentiate!(m) + + # Test sensitivities + @test all( + isapprox( + [ + MOI.get(m, DiffOpt.ForwardVariablePrimal(), x[i]) for + i in 1:P + ], + [0.1 for _ in 1:P]; + atol = 1e-8, + ), + ) + end + @testset "Bounds as LHS constraints" begin + m = Model( + () -> DiffOpt.diff_optimizer( + Ipopt.Optimizer; + with_parametric_opt_interface = false, + ), + ) + + @variable(m, x[1:P]) + @constraint(m, 0 .≤ x) + @constraint(m, 1 .≥ x) + @variable(m, p[1:P] ∈ Parameter.(0.5)) + + @constraint(m, x .≥ p) + + @objective(m, Min, sum(x)) + + optimize!(m) + @assert is_solved_and_feasible(m) + + # Set pertubations + Δp = [0.1 for _ in 1:P] + MOI.set.( + m, + DiffOpt.ForwardConstraintSet(), + ParameterRef.(p), + Parameter.(Δp), + ) + + # Compute derivatives + DiffOpt.forward_differentiate!(m) + + # Test sensitivities + @test all( + isapprox( + [ + MOI.get(m, DiffOpt.ForwardVariablePrimal(), x[i]) for + i in 1:P + ], + [0.1 for _ in 1:P]; + atol = 1e-8, + ), + ) + end +end + +# f(x, p) = 0 +# x = g(p) +# ∂x/∂p = ∂g/∂p + +DICT_PROBLEMS_Analytical_no_cc = Dict( + "geq no impact" => ( + p_a = [1.5], + Δp = [0.2], + Δx = [0.0], + Δy = [0.0; 0.0], + Δvu = [], + Δvl = [], + model_generator = create_jump_model_1, + ), + "geq impact" => ( + p_a = [2.1], + Δp = [0.2], + Δx = [0.2], + Δy = [0.4; 0.0], + Δvu = [], + Δvl = [], + model_generator = create_jump_model_1, + ), + "geq bound impact" => ( + p_a = [2.1], + Δp = [0.2], + Δx = [0.2], + Δy = [0.4], + Δvu = [], + Δvl = [0.0], + model_generator = create_jump_model_2, + ), + "leq no impact" => ( + p_a = [-1.5], + Δp = [-0.2], + Δx = [0.0], + Δy = [0.0; 0.0], + Δvu = [], + Δvl = [], + model_generator = create_jump_model_3, + ), + "leq impact" => ( + p_a = [-2.1], + Δp = [-0.2], + Δx = [-0.2], + Δy = [0.0; 0.0], + Δvu = [], + Δvl = [], + model_generator = create_jump_model_3, + ), + "leq no impact max" => ( + p_a = [2.1], + Δp = [0.2], + Δx = [0.0], + Δy = [0.0; 0.0], + Δvu = [], + Δvl = [], + model_generator = create_jump_model_4, + ), + "leq impact max" => ( + p_a = [1.5], + Δp = [0.2], + Δx = [0.2], + Δy = [0.0; 0.0], + Δvu = [], + Δvl = [], + model_generator = create_jump_model_4, + ), + "geq no impact max" => ( + p_a = [1.5], + Δp = [0.2], + Δx = [0.0], + Δy = [0.0; 0.0], + Δvu = [], + Δvl = [], + model_generator = create_jump_model_5, + ), + "geq impact max" => ( + p_a = [2.1], + Δp = [0.2], + Δx = [0.2], + Δy = [0.0; 0.0], + Δvu = [], + Δvl = [], + model_generator = create_jump_model_5, + ), +) + +function test_compute_derivatives_Analytical(; + DICT_PROBLEMS = DICT_PROBLEMS_Analytical_no_cc, +) + @testset "Compute Derivatives Analytical: $problem_name" for ( + problem_name, + (p_a, Δp, Δx, Δy, Δvu, Δvl, model_generator), + ) in DICT_PROBLEMS + # OPT Problem + model, primal_vars, cons, params = model_generator() + set_parameter_value.(params, p_a) + optimize!(model) + @assert is_solved_and_feasible(model) + # Set pertubations + MOI.set.( + model, + DiffOpt.ForwardConstraintSet(), + ParameterRef.(params), + Parameter.(Δp), + ) + # Compute derivatives + DiffOpt.forward_differentiate!(model) + # Test sensitivities primal_vars + if !isempty(Δx) + @test all( + isapprox.( + [ + MOI.get(model, DiffOpt.ForwardVariablePrimal(), var) for + var in primal_vars + ], + Δx; + atol = 1e-4, + ), + ) + end + # Test sensitivities cons + if !isempty(Δy) + @test all( + isapprox.( + [ + MOI.get(model, DiffOpt.ForwardConstraintDual(), con) for + con in cons + ], + Δy; + atol = 1e-4, + ), + ) + end + # Test sensitivities dual vars + if !isempty(Δvu) + primal_vars_upper = [v for v in primal_vars if has_upper_bound(v)] + @test all( + isapprox.( + [ + MOI.get( + model, + DiffOpt.ForwardConstraintDual(), + UpperBoundRef(var), + ) for var in primal_vars_upper + ], + Δvu; + atol = 1e-4, + ), + ) + end + if !isempty(Δvl) + primal_vars_lower = [v for v in primal_vars if has_lower_bound(v)] + @test all( + isapprox.( + [ + MOI.get( + model, + DiffOpt.ForwardConstraintDual(), + LowerBoundRef(var), + ) for var in primal_vars_lower + ], + Δvl; + atol = 1e-4, + ), + ) + end + end +end + +################################################ +#= +# Test Sensitivity through finite differences +=# +################################################ + +function stack_solution(model, p_a, params, primal_vars, cons) + set_parameter_value.(params, p_a) + optimize!(model) + @assert is_solved_and_feasible(model) + return [value.(primal_vars); dual.(cons)] +end + +DICT_PROBLEMS_no_cc = Dict( + "QP_sIpopt" => ( + p_a = [4.5; 1.0], + Δp = [0.001; 0.0], + model_generator = create_nonlinear_jump_model_sipopt, + ), + "NLP_1" => ( + p_a = [3.0; 2.0; 200], + Δp = [0.001; 0.0; 0.0], + model_generator = create_nonlinear_jump_model_1, + ), + "NLP_1_2" => ( + p_a = [3.0; 2.0; 200], + Δp = [0.0; 0.001; 0.0], + model_generator = create_nonlinear_jump_model_1, + ), + "NLP_1_3" => ( + p_a = [3.0; 2.0; 200], + Δp = [0.0; 0.0; 0.001], + model_generator = create_nonlinear_jump_model_1, + ), + "NLP_1_4" => ( + p_a = [3.0; 2.0; 200], + Δp = [0.1; 0.5; 0.5], + model_generator = create_nonlinear_jump_model_1, + ), + "NLP_1_4" => ( + p_a = [3.0; 2.0; 200], + Δp = [0.5; -0.5; 0.1], + model_generator = create_nonlinear_jump_model_1, + ), + "NLP_2" => ( + p_a = [3.0; 2.0; 10], + Δp = [0.01; 0.0; 0.0], + model_generator = create_nonlinear_jump_model_2, + ), + "NLP_2_2" => ( + p_a = [3.0; 2.0; 10], + Δp = [-0.1; 0.0; 0.0], + model_generator = create_nonlinear_jump_model_2, + ), + "NLP_3" => ( + p_a = [3.0; 2.0; 10], + Δp = [0.001; 0.0; 0.0], + model_generator = create_nonlinear_jump_model_3, + ), + "NLP_3_2" => ( + p_a = [3.0; 2.0; 10], + Δp = [0.0; 0.001; 0.0], + model_generator = create_nonlinear_jump_model_3, + ), + "NLP_3_3" => ( + p_a = [3.0; 2.0; 10], + Δp = [0.0; 0.0; 0.001], + model_generator = create_nonlinear_jump_model_3, + ), + "NLP_3_4" => ( + p_a = [3.0; 2.0; 10], + Δp = [0.5; 0.001; 0.5], + model_generator = create_nonlinear_jump_model_3, + ), + "NLP_3_5" => ( + p_a = [3.0; 2.0; 10], + Δp = [0.1; 0.3; 0.1], + model_generator = create_nonlinear_jump_model_3, + ), + "NLP_3_6" => ( + p_a = [3.0; 2.0; 10], + Δp = [0.1; 0.2; -0.5], + model_generator = create_nonlinear_jump_model_3, + ), + "NLP_4" => ( + p_a = [1.0; 2.0; 100], + Δp = [0.001; 0.0; 0.0], + model_generator = create_nonlinear_jump_model_4, + ), + "NLP_5" => ( + p_a = [1.0; 2.0; 100], + Δp = [0.0; 0.001; 0.0], + model_generator = create_nonlinear_jump_model_5, + ), + "NLP_6" => ( + p_a = [100.0; 200.0], + Δp = [0.2; 0.5], + model_generator = create_nonlinear_jump_model_6, + ), +) + +function test_compute_derivatives_Finite_Diff(; + DICT_PROBLEMS = DICT_PROBLEMS_no_cc, +) + @testset "Compute Derivatives FiniteDiff: $problem_name" for ( + problem_name, + (p_a, Δp, model_generator), + ) in DICT_PROBLEMS, + ismin in [true, false] + # OPT Problem + model, primal_vars, cons, params = model_generator(; ismin = ismin) + set_parameter_value.(params, p_a) + optimize!(model) + @assert is_solved_and_feasible(model) + # Set pertubations + MOI.set.( + model, + DiffOpt.ForwardConstraintSet(), + ParameterRef.(params), + Parameter.(Δp), + ) + # Compute derivatives + DiffOpt.forward_differentiate!(model) + Δx = [ + MOI.get(model, DiffOpt.ForwardVariablePrimal(), var) for + var in primal_vars + ] + Δy = [ + MOI.get(model, DiffOpt.ForwardConstraintDual(), con) for con in cons + ] + # Compute derivatives using finite differences + ∂s_fd = + FiniteDiff.finite_difference_jacobian( + (p) -> stack_solution(model, p, params, primal_vars, cons), + p_a, + ) * Δp + # Test sensitivities primal_vars + @test all(isapprox.(Δx, ∂s_fd[1:length(primal_vars)]; atol = 1e-4)) + # Test sensitivities cons + @test all(isapprox.(Δy, ∂s_fd[length(primal_vars)+1:end]; atol = 1e-4)) + end +end + +################################################ +#= +# Test Sensitivity through Reverse Mode +=# +################################################ + +# Copied from test/jump.jl and adapated for nlp interface +function test_differentiating_non_trivial_convex_qp_jump() + nz = 10 + nineq_le = 25 + neq = 10 + # read matrices from files + names = ["P", "q", "G", "h", "A", "b"] + matrices = [] + for name in names + filename = joinpath(@__DIR__, "data", "$name.txt") + push!(matrices, DelimitedFiles.readdlm(filename, ' ', Float64, '\n')) + end + Q, q, G, h, A, b = matrices + q = vec(q) + h = vec(h) + b = vec(b) + model = JuMP.Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + MOI.set(model, MOI.Silent(), true) + @variable(model, x[1:nz]) + @variable(model, p_le[1:nineq_le] ∈ MOI.Parameter.(0.0)) + @variable(model, p_eq[1:neq] ∈ MOI.Parameter.(0.0)) + @objective(model, Min, x' * Q * x + q' * x) + @constraint(model, c_le, G * x .<= h + p_le) + @constraint(model, c_eq, A * x .== b + p_eq) + optimize!(model) + MOI.set.(model, DiffOpt.ReverseVariablePrimal(), x, 1.0) + # compute gradients + DiffOpt.reverse_differentiate!(model) + # read gradients from files + param_names = ["dP", "dq", "dG", "dh", "dA", "db"] + grads_actual = [] + for name in param_names + filename = joinpath(@__DIR__, "data", "$(name).txt") + push!( + grads_actual, + DelimitedFiles.readdlm(filename, ' ', Float64, '\n'), + ) + end + dh = grads_actual[4] + db = grads_actual[6] + + for (i, ci) in enumerate(c_le) + @test -dh[i] ≈ + -MOI.get( + model, + DiffOpt.ReverseConstraintSet(), + ParameterRef(p_le[i]), + ).value atol = 1e-2 rtol = 1e-2 + end + for (i, ci) in enumerate(c_eq) + @test -db[i] ≈ + -MOI.get( + model, + DiffOpt.ReverseConstraintSet(), + ParameterRef(p_eq[i]), + ).value atol = 1e-2 rtol = 1e-2 + end + + return +end + +################################################ +#= +# Test Changing Factorization routine +=# +################################################ + +function test_changing_factorization() + P = 2 + m = Model( + () -> DiffOpt.diff_optimizer( + Ipopt.Optimizer; + with_parametric_opt_interface = false, + ), + ) + + @variable(m, x[1:P]) + @constraint(m, x .≥ 0) + @constraint(m, x .≤ 1) + @variable(m, p[1:P] ∈ Parameter.(0.5)) + + @constraint(m, x .≥ p) + + @objective(m, Min, sum(x)) + + optimize!(m) + @assert is_solved_and_feasible(m) + + # Set pertubations + Δp = [0.1 for _ in 1:P] + MOI.set.( + m, + DiffOpt.ForwardConstraintSet(), + ParameterRef.(p), + Parameter.(Δp), + ) + + # wrong type + @test_throws MethodError MOI.set(m, DiffOpt.MFactorization(), 2) + + # correct type but wrong number of arguments + MOI.set(m, DiffOpt.MFactorization(), SparseArrays.lu) + + @test_throws MethodError DiffOpt.forward_differentiate!(m) + + # correct type and correct number of arguments + MOI.set(m, DiffOpt.MFactorization(), (M, model) -> SparseArrays.lu(M)) + + # Compute derivatives + DiffOpt.forward_differentiate!(m) + + # Test sensitivities + @test all( + isapprox( + [MOI.get(m, DiffOpt.ForwardVariablePrimal(), x[i]) for i in 1:P], + [0.1 for _ in 1:P]; + atol = 1e-8, + ), + ) +end + +end # module + +TestNLPProgram.runtests() diff --git a/test/parameters.jl b/test/parameters.jl index dd9b1e66..b38f84c0 100644 --- a/test/parameters.jl +++ b/test/parameters.jl @@ -609,7 +609,7 @@ function test_quadratic_objective_qp() return end -function test_diff_errors() +function test_diff_errors_POI() model = Model( () -> DiffOpt.diff_optimizer( HiGHS.Optimizer; @@ -671,6 +671,56 @@ function test_diff_errors() return end +function test_diff_errors() + model = Model( + () -> DiffOpt.diff_optimizer( + HiGHS.Optimizer; + with_parametric_opt_interface = false, + ), + ) + set_silent(model) + @variable(model, x) + @variable(model, p in Parameter(3.0)) + @constraint(model, cons, x >= 3 * p) + @objective(model, Min, 2x) + optimize!(model) + @test value(x) ≈ 9 + + @test_throws ErrorException MOI.set( + model, + DiffOpt.ForwardConstraintSet(), + ParameterRef(x), + Parameter(1.0), + ) + # Not easily tested because hard to check if variable is a parameter + # @test_throws ErrorException MOI.set( + # model, + # DiffOpt.ReverseVariablePrimal(), + # p, + # 1, + # ) + @test_throws ErrorException MOI.get( + model, + DiffOpt.ForwardVariablePrimal(), + p, + ) + @test_throws ErrorException MOI.get( + model, + DiffOpt.ReverseConstraintSet(), + ParameterRef(x), + ) + @test_throws ErrorException MOI.get( + model, + DiffOpt.ReverseObjectiveFunction(), + ) + @test_throws ErrorException MOI.get( + model, + DiffOpt.ReverseConstraintFunction(), + cons, + ) + return +end + function is_empty(cache::DiffOpt.InputCache) return isempty(cache.dx) && isempty(cache.scalar_constraints) &&