Skip to content

Commit

Permalink
Breaking changes and improvements (#11)
Browse files Browse the repository at this point in the history
* breaking changes and improvements

* sparsity fixes

* fix the fix

* fix sparse FD implementation

* fix empty hessian

* test model forwarddiffy

* test sparsity

* lower bound NonconvexCore
  • Loading branch information
mohamed82008 authored Jun 14, 2022
1 parent 309c0c5 commit a224f24
Show file tree
Hide file tree
Showing 10 changed files with 276 additions and 110 deletions.
4 changes: 2 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "NonconvexUtils"
uuid = "c48e48a2-1f5e-44ff-8799-c8e168d11d1b"
authors = ["Mohamed Tarek <mohamed82008@gmail.com> and contributors"]
version = "0.2.1"
version = "0.3.0"

[deps]
AbstractDifferentiation = "c29ec348-61ec-40c8-8164-b8c60e9d9f3d"
Expand All @@ -24,7 +24,7 @@ ForwardDiff = "0.10"
IterativeSolvers = "0.8, 0.9"
LinearMaps = "3"
MacroTools = "0.5"
NonconvexCore = "1"
NonconvexCore = "1.0.8"
SparseDiffTools = "1.24"
Symbolics = "4.6"
Zygote = "0.5, 0.6"
Expand Down
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,21 @@

Useful hacks for use in Nonconvex.jl.

## Hack #1: `AbstractDiffFunction` and `ForwardDiffFunction`
## Hack #1: `abstractdiffy` and `forwarddiffy`

[`Nonconvex.jl`](https://github.com/JuliaNonconvex/Nonconvex.jl) uses [`Zygote.jl`](https://github.com/FluxML/Zygote.jl) for automatic differentiation (AD). In order to force the use of another AD package for a function `f`, one can specify any AD `backend` from [`AbstractDifferentiation.jl`](https://github.com/JuliaDiff/AbstractDifferentiation.jl) in the following way:
```julia
g = AbstractDiffFunction(f, backend)
g = abstractdiffy(f, backend, x...)
```
where `x...` refers to some sample inputs to `f`.

If you want to use [`ForwardDiff.jl`](https://github.com/JuliaDiff/ForwardDiff.jl) to differentiate the function `f`, you can also use
```julia
g = ForwardDiffFunction(f)
g = forwarddiffy(f, x...)
```
which is short for:
```julia
AbstractDiffFunction(f, AbstractDifferentiation.ForwardDiffBackend())
g = abstractdiffy(f, AD.ForwardDiffBackend(), x...)
```

## Hack #2: `TraceFunction`
Expand Down
6 changes: 2 additions & 4 deletions src/NonconvexUtils.jl
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
module NonconvexUtils

export ForwardDiffFunction,
AbstractDiffFunction,
export forwarddiffy,
abstractdiffy,
AD,
TraceFunction,
CustomGradFunction,
LazyJacobian,
CustomHessianFunction,
ImplicitFunction,
SymbolicFunction,
SparseForwardDiffFunction,
sparsify,
symbolify

Expand Down
59 changes: 59 additions & 0 deletions src/abstractdiff.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,62 @@ function ChainRulesCore.frule(
return v, ∇ * Δx
end
@ForwardDiff_frule (f::AbstractDiffFunction)(x::AbstractVector{<:ForwardDiff.Dual})

function tovecfunc(f, x...)
vx, _unflattenx = flatten(x)
unflattenx = NonconvexCore.Unflatten(x, _unflattenx)
y = f(x...)
tmp = NonconvexCore.maybeflatten(y)
# should be addressed in maybeflatten
if y isa Real
unflatteny = identity
else
unflatteny = NonconvexCore.Unflatten(y, tmp[2])
end
return x -> NonconvexCore.maybeflatten(f(unflattenx(x)...))[1], float.(vx), unflatteny
end

# does not assume vector input and output
forwarddiffy(f_or_m, x...) = abstractdiffy(f_or_m, AD.ForwardDiffBackend(), x...)
function abstractdiffy(f, backend, x...)
flat_f, vx, unflatteny = tovecfunc(f, x...)
ad_flat_f = AbstractDiffFunction(flat_f, backend)
return (x...,) -> unflatteny(ad_flat_f(flatten(x)[1]))
end
function abstractdiffy(model::NonconvexCore.AbstractModel, backend; objective = true, ineq_constraints = true, eq_constraints = true, sd_constraints = true)
x = getmin(model)
if objective
obj = NonconvexCore.Objective(abstractdiffy(model.objective, backend, x), flags = model.objective.flags)
else
obj = model.objective
end
if ineq_constraints
ineq = length(model.ineq_constraints.fs) != 0 ? NonconvexCore.VectorOfFunctions(map(model.ineq_constraints.fs) do c
return NonconvexCore.IneqConstraint(abstractdiffy(c, backend, x), c.rhs, c.dim, c.flags)
end) : NonconvexCore.VectorOfFunctions(NonconvexCore.IneqConstraint[])
else
ineq = model.ineq_constraints
end
if eq_constraints
eq = length(model.eq_constraints.fs) != 0 ? NonconvexCore.VectorOfFunctions(map(model.eq_constraints.fs) do c
return NonconvexCore.EqConstraint(abstractdiffy(c, backend, x), c.rhs, c.dim, c.flags)
end) : NonconvexCore.VectorOfFunctions(NonconvexCore.EqConstraint[])
else
eq = model.eq_constraints
end
if sd_constraints
sd = length(model.sd_constraints.fs) != 0 ? NonconvexCore.VectorOfFunctions(map(model.sd_constraints.fs) do c
return NonconvexCore.SDConstraint(abstractdiffy(c, backend, x), c.dim)
end) : NonconvexCore.VectorOfFunctions(NonconvexCore.SDConstraint[])
else
sd = model.sd_constraints
end
if model isa NonconvexCore.Model
ModelT = NonconvexCore.Model
elseif model isa NonconvexCore.DictModel
ModelT = NonconvexCore.DictModel
else
throw("Unsupported model type.")
end
return ModelT(obj, eq, ineq, sd, model.box_min, model.box_max, model.init, model.integer)
end
3 changes: 2 additions & 1 deletion src/custom.jl
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ end
(to::CustomHessianFunction)(x) = to.f(x)
function ChainRulesCore.rrule(f::CustomHessianFunction, x)
g = CustomGradFunction(f.g, f.h)
return f(x), Δ -> (NoTangent(), g(x) * Δ)
G = g(x)
return f(x), Δ -> (NoTangent(), G * Δ)
end
function ChainRulesCore.frule(
(_, Δx), f::CustomHessianFunction, x::AbstractVector,
Expand Down
128 changes: 85 additions & 43 deletions src/sparse_forwarddiff.jl
Original file line number Diff line number Diff line change
@@ -1,27 +1,28 @@
struct SparseForwardDiffFunction{F, F!, Y, J, JP, JC, JJ, JJ!, G, H, HP, HC} <: Function
struct SparseForwardDiffFunction{F, F!, Y, J, JP, JC, JJ, JJ!, G, HH1, HP, HC, HH2} <: Function
f::F
f!::F!
y::Y
jac::J
jac_pattern::JP
jac_colors::JC
vecJ::JJ
J::JJ
vecJ!::JJ!
G::G
hess::H
vecJ::G
hess::HH1
hess_pattern::HP
hess_colors::HC
H::HH2
end

function SparseForwardDiffFunction(f, x::AbstractVector; hessian = false, jac_pattern = nothing, hess_pattern = nothing)
val = f(x)
_f = val isa Real ? x -> [f(x)] : f
_f = x -> _sparsevec(f(x))
f! = (y, x) -> begin
v = f(x)
y .= v
return y
end
y = val isa Real ? [val] : copy(val)
y = _sparsevec(val)
jac_pattern = jac_pattern === nothing ? Symbolics.jacobian_sparsity(f!, y, x) : jac_pattern
if nnz(jac_pattern) > 0
jac = float.(jac_pattern)
Expand All @@ -33,93 +34,134 @@ function SparseForwardDiffFunction(f, x::AbstractVector; hessian = false, jac_pa
end
vecJ! = (G, x) -> begin
_jac = SparseDiffTools.forwarddiff_color_jacobian(_f, x, colorvec = jac_colors)
G .= vec(Array(_jac))
G .= _sparsevec(_jac)
return G
end
G = vec(Array(jac))
vecJ = x -> copy(vecJ!(G, x))
J = x -> begin
xT = eltype(x)
_jac = SparseDiffTools.forwarddiff_color_jacobian(_f, x, colorvec = jac_colors, sparsity = jac, jac_prototype = xT.(jac))
return copy(_jac)
end
if hessian
hess_pattern = hess_pattern === nothing ? Symbolics.jacobian_sparsity(vecJ!, G, x) : hess_pattern
if nnz(hess_pattern) > 0
hess = float.(hess_pattern)
hess_colors = SparseDiffTools.matrix_colors(hess)
_J = x -> _sparsevec(J(x))
H = x -> begin
_hess = SparseDiffTools.forwarddiff_color_jacobian(_J, x, colorvec = hess_colors, sparsity = hess_pattern, jac_prototype = hess)
return copy(_hess)
end
else
T = eltype(G)
hess = sparse(Int[], Int[], T[])
hess = sparse(Int[], Int[], T[], length(x), length(x))
hess_colors = Int[]
H = x -> hess
end
else
hess = nothing
hess_colors = nothing
H = nothing
end
return SparseForwardDiffFunction(f, f!, y, jac, jac_pattern, jac_colors, J, vecJ!, G, hess, hess_pattern, hess_colors, H)
end

_sparsevec(x::Real) = [x]
_sparsevec(x::Vector) = copy(vec(x))
_sparsevec(x::Matrix) = copy(vec(x))
function _sparsevec(x::SparseMatrixCSC)
m, n = size(x)
linear_inds = zeros(Int, length(x.nzval))
count = 1
for colind in 1:length(x.colptr)-1
for ind in x.colptr[colind]:x.colptr[colind+1]-1
rowind = x.rowval[ind]
val = x.nzval[ind]
linear_inds[count] = rowind + (colind - 1) * m
count += 1
end
end
return SparseForwardDiffFunction(f, f!, y, jac, jac_pattern, jac_colors, vecJ, vecJ!, G, hess, hess_pattern, hess_colors)
return sparsevec(linear_inds, copy(x.nzval))
end

(f::SparseForwardDiffFunction)(x) = f.f(x)
function ChainRulesCore.rrule(f::SparseForwardDiffFunction, x::AbstractVector)
if f.vecJ === nothing
return f(x), _ -> (NoTangent(), NoTangent())
if f.H === nothing
J = f.J
else
vecjac = SparseForwardDiffFunction(f.vecJ, f.vecJ!, f.G, f.hess, f.hess_pattern, f.hess_colors, nothing, nothing, nothing, nothing, nothing, nothing)
val = f(x)
J = SparseForwardDiffFunction(f.J, f.vecJ!, f.vecJ, f.hess, f.hess_pattern, f.hess_colors, f.H, nothing, nothing, nothing, nothing, nothing, nothing)
end
val = f(x)
jac = J(x)
return val, Δ -> begin
if val isa Real
jac = reshape(vecjac(x), 1, length(x))
(NoTangent(), sparse(vec(jac' * Δ)))
else
jac = reshape(vecjac(x), length(val), length(x))
end
return val, Δ -> begin
(NoTangent(), jac' *isa Real ? Δ : vec(Δ)))
(NoTangent(), jac' * sparse(vec(Δ)))
end
end
end
function ChainRulesCore.frule((_, Δx), f::SparseForwardDiffFunction, x::AbstractVector)
if f.vecJ === nothing
val = f(x)
return val, zero(val)
if f.H === nothing
J = f.J
else
vecjac = SparseForwardDiffFunction(f.vecJ, f.vecJ!, f.G, f.hess, f.hess_pattern, f.hess_colors, nothing, nothing, nothing, nothing, nothing, nothing)
val = f(x)
if val isa Real
jac = reshape(vecjac(x), 1, length(x))
else
jac = reshape(vecjac(x), length(val), length(x))
end
Δy = jac * (Δx isa Real ? Δx : vec(Δx))
return val, (val isa Real) ? only(Δy) : reshape(Δy, size(val))
J = SparseForwardDiffFunction(f.J, f.vecJ!, f.vecJ, f.hess, f.hess_pattern, f.hess_colors, f.H, nothing, nothing, nothing, nothing, nothing, nothing)
end
val = f(x)
jac = J(x)
if val isa Real
Δy = only(jac * Δx)
elseif val isa AbstractVector
Δy = jac * sparse(vec(Δx))
else
Δy = reshape(jac * sparse(vec(Δx)), size(val)...)
end
return val, Δy
end
@ForwardDiff_frule (f::SparseForwardDiffFunction)(x::AbstractVector{<:ForwardDiff.Dual})

function sparsify(model::NonconvexCore.AbstractModel; objective = true, ineq_constraints = true, eq_constraints = true, kwargs...)
vmodel, v, _ = NonconvexCore.tovecmodel(model)
function sparsify(f, x...; kwargs...)
# defined in the abstractdiff.jl file
flat_f, vx, unflatteny = tovecfunc(f, x...)
sp_flat_f = SparseForwardDiffFunction(flat_f, vx; kwargs...)
return x -> unflatteny(sp_flat_f(flatten(x)[1]))
end

function sparsify(model::NonconvexCore.AbstractModel; objective = true, ineq_constraints = true, eq_constraints = true, sd_constraints = true, kwargs...)
x = getmin(model)
if objective
# Objective
sparse_flat_obj = SparseForwardDiffFunction(vmodel.objective, v; kwargs...)
obj = NonconvexCore.Objective(x -> sparse_flat_obj(flatten(x)[1]), flags = model.objective.flags)
obj = NonconvexCore.Objective(sparsify(model.objective, x; kwargs...), flags = model.objective.flags)
else
obj = model.objective
end
if ineq_constraints
ineq = length(vmodel.ineq_constraints.fs) != 0 ? NonconvexCore.VectorOfFunctions(map(vmodel.ineq_constraints.fs) do c
sparse_flat_ineq = SparseForwardDiffFunction(c, v; kwargs...)
NonconvexCore.IneqConstraint(x -> sparse_flat_ineq(flatten(x)[1]), c.rhs, c.dim, c.flags)
ineq = length(model.ineq_constraints.fs) != 0 ? NonconvexCore.VectorOfFunctions(map(model.ineq_constraints.fs) do c
return NonconvexCore.IneqConstraint(sparsify(c, x; kwargs...), c.rhs, c.dim, c.flags)
end) : NonconvexCore.VectorOfFunctions(NonconvexCore.IneqConstraint[])
else
ineq = model.ineq_constraints
end
if eq_constraints
eq = length(vmodel.eq_constraints.fs) != 0 ? NonconvexCore.VectorOfFunctions(map(vmodel.eq_constraints.fs) do c
sparse_flat_eq = SparseForwardDiffFunction(c, v; kwargs...)
NonconvexCore.EqConstraint(x -> sparse_flat_eq(flatten(x)[1]), c.rhs, c.dim, c.flags)
eq = length(model.eq_constraints.fs) != 0 ? NonconvexCore.VectorOfFunctions(map(model.eq_constraints.fs) do c
return NonconvexCore.EqConstraint(sparsify(c, x; kwargs...), c.rhs, c.dim, c.flags)
end) : NonconvexCore.VectorOfFunctions(NonconvexCore.EqConstraint[])
else
eq = model.eq_constraints
end
if sd_constraints
sd = length(model.sd_constraints.fs) != 0 ? NonconvexCore.VectorOfFunctions(map(model.sd_constraints.fs) do c
return NonconvexCore.SDConstraint(sparsify(c, x; kwargs...), c.dim)
end) : NonconvexCore.VectorOfFunctions(NonconvexCore.SDConstraint[])
else
sd = model.sd_constraints
end
if model isa NonconvexCore.Model
ModelT = NonconvexCore.Model
elseif model isa NonconvexCore.DictModel
ModelT = NonconvexCore.DictModel
else
throw("Unsupported model type.")
end
return ModelT(obj, eq, ineq, model.sd_constraints, model.box_min, model.box_max, model.init, model.integer)
return ModelT(obj, eq, ineq, sd, model.box_min, model.box_max, model.init, model.integer)
end
Loading

2 comments on commit a224f24

@mohamed82008
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/62369

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.3.0 -m "<description of version>" a224f244668da62e3bd1231b944b159d0eb76ae0
git push origin v0.3.0

Please sign in to comment.