-
-
Notifications
You must be signed in to change notification settings - Fork 398
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[docs] add Automatic differentiation of user-defined operators tutori…
…al (#3713)
- Loading branch information
Showing
6 changed files
with
335 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,322 @@ | ||
# Copyright (c) 2024 Oscar Dowson and contributors #src | ||
# #src | ||
# Permission is hereby granted, free of charge, to any person obtaining a copy #src | ||
# of this software and associated documentation files (the "Software"), to deal #src | ||
# in the Software without restriction, including without limitation the rights #src | ||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell #src | ||
# copies of the Software, and to permit persons to whom the Software is #src | ||
# furnished to do so, subject to the following conditions: #src | ||
# #src | ||
# The above copyright notice and this permission notice shall be included in all #src | ||
# copies or substantial portions of the Software. #src | ||
# #src | ||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR #src | ||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, #src | ||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #src | ||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #src | ||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, #src | ||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE #src | ||
# SOFTWARE. #src | ||
|
||
# # Automatic differentiation of user-defined operators | ||
|
||
# The purpose of this tutorial is to demonstrate how to apply automatic | ||
# differentiation to [User-defined operators](@ref jump_user_defined_operators). | ||
|
||
# !!! tip | ||
# This tutorial is for advanced users. As an alternative, consider using | ||
# [Function tracing](@ref) instead of creating an operator, and if an | ||
# operator is necessary, consider using the default of `@operator(model, op_f, N, f)` | ||
# instead of passing explicit [Gradients and Hessians](@ref). | ||
|
||
# This tutorial uses the following packages: | ||
|
||
using JuMP | ||
import Enzyme | ||
import ForwardDiff | ||
import Ipopt | ||
import Test | ||
|
||
# As a simple example, we consider the Rosenbrock function: | ||
|
||
f(x::T...) where {T} = (1 - x[1])^2 + 100 * (x[2] - x[1]^2)^2 | ||
|
||
# Here's the value at a random point: | ||
|
||
x = rand(2) | ||
|
||
#- | ||
|
||
f(x...) | ||
|
||
# ## Analytic derivative | ||
|
||
# If expressions are simple enough, you can provide analytic functions for the | ||
# gradient and Hessian. | ||
|
||
# ### Gradient | ||
|
||
# The Rosenbrock function has the gradient vector: | ||
|
||
function analytic_∇f(g::AbstractVector, x...) | ||
g[1] = 400 * x[1]^3 - 400 * x[1] * x[2] + 2 * x[1] - 2 | ||
g[2] = 200 * (x[2] - x[1]^2) | ||
return | ||
end | ||
|
||
# Let's evaluate it at the same vector `x`: | ||
|
||
analytic_g = zeros(2) | ||
analytic_∇f(analytic_g, x...) | ||
analytic_g | ||
|
||
# ### Hessian | ||
|
||
# The Hessian matrix is: | ||
|
||
function analytic_∇²f(H::AbstractMatrix, x...) | ||
H[1, 1] = 1200 * x[1]^2 - 400 * x[2] + 2 | ||
## H[1, 2] = -400 * x[1] <-- not needed because Hessian is symmetric | ||
H[2, 1] = -400 * x[1] | ||
H[2, 2] = 200.0 | ||
return | ||
end | ||
|
||
# Note that because the Hessian is symmetric, JuMP requires that we fill in only | ||
# the lower triangle. | ||
|
||
analytic_H = zeros(2, 2) | ||
analytic_∇²f(analytic_H, x...) | ||
analytic_H | ||
|
||
# ### JuMP example | ||
|
||
# Putting our analytic functions together, we get: | ||
|
||
function analytic_rosenbrock() | ||
model = Model(Ipopt.Optimizer) | ||
set_silent(model) | ||
@variable(model, x[1:2]) | ||
@operator(model, op_rosenbrock, 2, f, analytic_∇f, analytic_∇²f) | ||
@objective(model, Min, op_rosenbrock(x[1], x[2])) | ||
optimize!(model) | ||
Test.@test is_solved_and_feasible(model) | ||
return value.(x) | ||
end | ||
|
||
analytic_rosenbrock() | ||
|
||
# ## ForwardDiff | ||
|
||
# Instead of analytic functions, you can use [ForwardDiff.jl](https://github.com/JuliaDiff/ForwardDiff.jl) | ||
# to compute derivatives. | ||
|
||
# !!! info | ||
# If you do not specify a gradient or Hessian, JuMP will use ForwardDiff.jl | ||
# to compute derivatives by default. We provide this section as a worked | ||
# example of what is going on under the hood. | ||
|
||
# ### Pros and cons | ||
|
||
# The main benefit of ForwardDiff is that it is simple, robust, and works with a | ||
# broad range of Julia syntax. | ||
|
||
# The main downside is that `f` must be a function that accepts arguments of | ||
# `x::Real...`. See [Common mistakes when writing a user-defined operator](@ref) | ||
# for more details. | ||
|
||
# ### Gradient | ||
|
||
# The gradient can be computed using `ForwardDiff.gradient!`. Note that | ||
# ForwardDiff expects a single `Vector{T}` argument, but we receive `x` as a | ||
# tuple, so we need `y -> f(y...)` and `collect(x)` to get things in the right | ||
# format. | ||
|
||
function fdiff_∇f(g::AbstractVector{T}, x::Vararg{T,N}) where {T,N} | ||
ForwardDiff.gradient!(g, y -> f(y...), collect(x)) | ||
return | ||
end | ||
|
||
# Let's check that we find the analytic solution: | ||
|
||
fdiff_g = zeros(2) | ||
fdiff_∇f(fdiff_g, x...) | ||
Test.@test ≈(analytic_g, fdiff_g) | ||
|
||
# ### Hessian | ||
|
||
# The Hessian is a bit more complicated, but code to implement it is: | ||
|
||
function fdiff_∇²f(H::AbstractMatrix{T}, x::Vararg{T,N}) where {T,N} | ||
h = ForwardDiff.hessian(y -> f(y...), collect(x)) | ||
for i in 1:N, j in 1:i | ||
H[i, j] = h[i, j] | ||
end | ||
return | ||
end | ||
|
||
# Let's check that we find the analytic solution: | ||
|
||
fdiff_H = zeros(2, 2) | ||
fdiff_∇²f(fdiff_H, x...) | ||
Test.@test ≈(analytic_H, fdiff_H) | ||
|
||
# ### JuMP example | ||
|
||
# The code for computing the gradient and Hessian using ForwardDiff can be | ||
# re-used for many operators. Thus, it is helpful to encapsulate it into the | ||
# function: | ||
|
||
""" | ||
fdiff_derivatives(f::Function) -> Tuple{Function,Function} | ||
Return a tuple of functions that evalute the gradient and Hessian of `f` using | ||
ForwardDiff.jl. | ||
""" | ||
function fdiff_derivatives(f::Function) | ||
function ∇f(g::AbstractVector{T}, x::Vararg{T,N}) where {T,N} | ||
ForwardDiff.gradient!(g, y -> f(y...), collect(x)) | ||
return | ||
end | ||
function ∇²f(H::AbstractMatrix{T}, x::Vararg{T,N}) where {T,N} | ||
h = ForwardDiff.hessian(y -> f(y...), collect(x)) | ||
for i in 1:N, j in 1:i | ||
H[i, j] = h[i, j] | ||
end | ||
return | ||
end | ||
return ∇f, ∇²f | ||
end | ||
|
||
# Here's an example using `fdiff_derivatives`: | ||
|
||
function fdiff_rosenbrock() | ||
model = Model(Ipopt.Optimizer) | ||
set_silent(model) | ||
@variable(model, x[1:2]) | ||
@operator(model, op_rosenbrock, 2, f, fdiff_derivatives(f)...) | ||
@objective(model, Min, op_rosenbrock(x[1], x[2])) | ||
optimize!(model) | ||
Test.@test is_solved_and_feasible(model) | ||
return value.(x) | ||
end | ||
|
||
fdiff_rosenbrock() | ||
|
||
# ## Enzyme | ||
|
||
# Another library for automatic differentiation in Julia is [Enzyme.jl](https://github.com/EnzymeAD/Enzyme.jl). | ||
|
||
# ### Pros and cons | ||
|
||
# The main benefit of Enzyme is that it can produce fast derivatives for | ||
# functions with many input arguments. | ||
|
||
# The main downsides are that it may throw unusual errors if your code uses an | ||
# unsupported feature of Julia and that it may have large compile times. | ||
|
||
# !!! warning | ||
# The JuMP developers cannot help you debug error messages related to | ||
# Enzyme. If the operator works, it works. If not, we suggest you try a | ||
# different automatic differentiation library. See [juliadiff.org](https://juliadiff.org/) | ||
# for details. | ||
|
||
# ### Gradient | ||
|
||
# The gradient can be computed using `Enzyme.autodiff` with the | ||
# `Enzyme.Reverse` mode. We need to wrap `x` in `Enzyme.Active` to indicate that | ||
# we want to compute the derivatives with respect to these arguments. | ||
|
||
function enzyme_∇f(g::AbstractVector{T}, x::Vararg{T,N}) where {T,N} | ||
g .= Enzyme.autodiff(Enzyme.Reverse, f, Enzyme.Active.(x)...)[1] | ||
return | ||
end | ||
|
||
# Let's check that we find the analytic solution: | ||
|
||
enzyme_g = zeros(2) | ||
enzyme_∇f(enzyme_g, x...) | ||
Test.@test ≈(analytic_g, enzyme_g) | ||
|
||
# ### Hessian | ||
|
||
# We can compute the Hessian in Enzyme using forward-over-reverse automatic | ||
# differentiation. | ||
|
||
# The code to implement the Hessian in Enzyme is complicated, so we will not | ||
# explain it in detail; see the [Enzyme documentation](https://enzymead.github.io/Enzyme.jl/v0.11.20/generated/autodiff/#Vector-forward-over-reverse). | ||
|
||
function enzyme_∇²f(H::AbstractMatrix{T}, x::Vararg{T,N}) where {T,N} | ||
## direction(i) returns a tuple with a `1` in the `i`'th entry and `0` | ||
## otherwise | ||
direction(i) = ntuple(j -> Enzyme.Active(T(i == j)), N) | ||
## As the inner function, compute the gradient using Reverse mode | ||
∇f_deferred(x...) = Enzyme.autodiff_deferred(Enzyme.Reverse, f, x...)[1] | ||
## For the outer autodiff, use Forward mode. | ||
hess = Enzyme.autodiff( | ||
Enzyme.Forward, | ||
∇f_deferred, | ||
## Compute multiple evaluations of Forward mode, each time using `x` but | ||
## initializing with a different direction. | ||
Enzyme.BatchDuplicated.(Enzyme.Active.(x), ntuple(direction, N))..., | ||
)[1] | ||
## Unpack Enzyme's `hess` data structure into the the matrix `H` expected by | ||
## JuMP. | ||
for j in 1:N, i in 1:j | ||
H[j, i] = hess[j][i] | ||
end | ||
return | ||
end | ||
|
||
# Let's check that we find the analytic solution: | ||
|
||
enzyme_H = zeros(2, 2) | ||
enzyme_∇²f(enzyme_H, x...) | ||
Test.@test ≈(analytic_H, enzyme_H) | ||
|
||
# ### JuMP example | ||
|
||
# The code for computing the gradient and Hessian using Enzyme can be re-used | ||
# for many operators. Thus, it is helpful to encapsulate it into the function: | ||
|
||
""" | ||
enzyme_derivatives(f::Function) -> Tuple{Function,Function} | ||
Return a tuple of functions that evalute the gradient and Hessian of `f` using | ||
Enzyme.jl. | ||
""" | ||
function enzyme_derivatives(f::Function) | ||
function ∇f(g::AbstractVector{T}, x::Vararg{T,N}) where {T,N} | ||
g .= Enzyme.autodiff(Enzyme.Reverse, f, Enzyme.Active.(x)...)[1] | ||
return | ||
end | ||
function ∇²f(H::AbstractMatrix{T}, x::Vararg{T,N}) where {T,N} | ||
direction(i) = ntuple(j -> Enzyme.Active(T(i == j)), N) | ||
∇f_deferred(x...) = Enzyme.autodiff_deferred(Enzyme.Reverse, f, x...)[1] | ||
hess = Enzyme.autodiff( | ||
Enzyme.Forward, | ||
∇f_deferred, | ||
Enzyme.BatchDuplicated.(Enzyme.Active.(x), ntuple(direction, N))..., | ||
)[1] | ||
for j in 1:N, i in 1:j | ||
H[j, i] = hess[j][i] | ||
end | ||
return | ||
end | ||
return ∇f, ∇²f | ||
end | ||
|
||
# Here's an example using `enzyme_derivatives`: | ||
|
||
function enzyme_rosenbrock() | ||
model = Model(Ipopt.Optimizer) | ||
set_silent(model) | ||
@variable(model, x[1:2]) | ||
@operator(model, op_rosenbrock, 2, f, enzyme_derivatives(f)...) | ||
@objective(model, Min, op_rosenbrock(x[1], x[2])) | ||
optimize!(model) | ||
Test.@test is_solved_and_feasible(model) | ||
return value.(x) | ||
end | ||
|
||
enzyme_rosenbrock() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -128,6 +128,7 @@ Dualization | |
EAGO | ||
[Ee]mbotech | ||
[Ee]cos | ||
Enzyme | ||
Geomstats | ||
Geoopt | ||
GLPK | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters