Breaking changes and improvements (#11)

* breaking changes and improvements * sparsity fixes * fix the fix * fix sparse FD implementation * fix empty hessian * test model forwarddiffy * test sparsity * lower bound NonconvexCore
JuliaNonconvex · Jun 14, 2022 · a224f24 · a224f24 · mohamed82008 · Jun 14, 2022
1 parent 309c0c5
commit a224f24
Show file tree

Hide file tree

Showing 10 changed files with 276 additions and 110 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "NonconvexUtils"
 uuid = "c48e48a2-1f5e-44ff-8799-c8e168d11d1b"
 authors = ["Mohamed Tarek <mohamed82008@gmail.com> and contributors"]
-version = "0.2.1"
+version = "0.3.0"
 
 [deps]
 AbstractDifferentiation = "c29ec348-61ec-40c8-8164-b8c60e9d9f3d"
@@ -24,7 +24,7 @@ ForwardDiff = "0.10"
 IterativeSolvers = "0.8, 0.9"
 LinearMaps = "3"
 MacroTools = "0.5"
-NonconvexCore = "1"
+NonconvexCore = "1.0.8"
 SparseDiffTools = "1.24"
 Symbolics = "4.6"
 Zygote = "0.5, 0.6"

diff --git a/README.md b/README.md
@@ -5,20 +5,21 @@
 
 Useful hacks for use in Nonconvex.jl.
 
-## Hack #1: `AbstractDiffFunction` and `ForwardDiffFunction`
+## Hack #1: `abstractdiffy` and `forwarddiffy`
 
 [`Nonconvex.jl`](https://github.com/JuliaNonconvex/Nonconvex.jl) uses [`Zygote.jl`](https://github.com/FluxML/Zygote.jl) for automatic differentiation (AD). In order to force the use of another AD package for a function `f`, one can specify any AD `backend` from [`AbstractDifferentiation.jl`](https://github.com/JuliaDiff/AbstractDifferentiation.jl) in the following way:
 ```julia
-g = AbstractDiffFunction(f, backend)
+g = abstractdiffy(f, backend, x...)
 ```
+where `x...` refers to some sample inputs to `f`.
 
 If you want to use [`ForwardDiff.jl`](https://github.com/JuliaDiff/ForwardDiff.jl) to differentiate the function `f`, you can also use
 ```julia
-g = ForwardDiffFunction(f)
+g = forwarddiffy(f, x...)
 ```
 which is short for:
 ```julia
-AbstractDiffFunction(f, AbstractDifferentiation.ForwardDiffBackend())
+g = abstractdiffy(f, AD.ForwardDiffBackend(), x...)
 ```
 
 ## Hack #2: `TraceFunction`

diff --git a/src/NonconvexUtils.jl b/src/NonconvexUtils.jl
@@ -1,15 +1,13 @@
 module NonconvexUtils
 
-export  ForwardDiffFunction,
-        AbstractDiffFunction,
+export  forwarddiffy,
+        abstractdiffy,
         AD,
         TraceFunction,
         CustomGradFunction,
         LazyJacobian,
         CustomHessianFunction,
         ImplicitFunction,
-        SymbolicFunction,
-        SparseForwardDiffFunction,
         sparsify,
         symbolify
 

diff --git a/src/abstractdiff.jl b/src/abstractdiff.jl
@@ -17,3 +17,62 @@ function ChainRulesCore.frule(
     return v, ∇ * Δx
 end
 @ForwardDiff_frule (f::AbstractDiffFunction)(x::AbstractVector{<:ForwardDiff.Dual})
+
+function tovecfunc(f, x...)
+    vx, _unflattenx = flatten(x)
+    unflattenx = NonconvexCore.Unflatten(x, _unflattenx)
+    y = f(x...)
+    tmp = NonconvexCore.maybeflatten(y)
+    # should be addressed in maybeflatten
+    if y isa Real
+        unflatteny = identity
+    else
+        unflatteny = NonconvexCore.Unflatten(y, tmp[2])
+    end
+    return x -> NonconvexCore.maybeflatten(f(unflattenx(x)...))[1], float.(vx), unflatteny
+end
+
+# does not assume vector input and output
+forwarddiffy(f_or_m, x...) = abstractdiffy(f_or_m, AD.ForwardDiffBackend(), x...)
+function abstractdiffy(f, backend, x...)
+    flat_f, vx, unflatteny = tovecfunc(f, x...)
+    ad_flat_f = AbstractDiffFunction(flat_f, backend)
+    return (x...,) -> unflatteny(ad_flat_f(flatten(x)[1]))
+end
+function abstractdiffy(model::NonconvexCore.AbstractModel, backend; objective = true, ineq_constraints = true, eq_constraints = true, sd_constraints = true)
+    x = getmin(model)
+    if objective
+        obj = NonconvexCore.Objective(abstractdiffy(model.objective, backend, x), flags = model.objective.flags)
+    else
+        obj = model.objective
+    end
+    if ineq_constraints
+        ineq = length(model.ineq_constraints.fs) != 0 ? NonconvexCore.VectorOfFunctions(map(model.ineq_constraints.fs) do c
+            return NonconvexCore.IneqConstraint(abstractdiffy(c, backend, x), c.rhs, c.dim, c.flags)
+        end) : NonconvexCore.VectorOfFunctions(NonconvexCore.IneqConstraint[])
+    else
+        ineq = model.ineq_constraints
+    end
+    if eq_constraints
+        eq = length(model.eq_constraints.fs) != 0 ? NonconvexCore.VectorOfFunctions(map(model.eq_constraints.fs) do c
+            return NonconvexCore.EqConstraint(abstractdiffy(c, backend, x), c.rhs, c.dim, c.flags)
+        end) : NonconvexCore.VectorOfFunctions(NonconvexCore.EqConstraint[])
+    else
+        eq = model.eq_constraints
+    end
+    if sd_constraints
+        sd = length(model.sd_constraints.fs) != 0 ? NonconvexCore.VectorOfFunctions(map(model.sd_constraints.fs) do c
+            return NonconvexCore.SDConstraint(abstractdiffy(c, backend, x), c.dim)
+        end) : NonconvexCore.VectorOfFunctions(NonconvexCore.SDConstraint[])
+    else
+        sd = model.sd_constraints
+    end
+    if model isa NonconvexCore.Model
+        ModelT = NonconvexCore.Model
+    elseif model isa NonconvexCore.DictModel
+        ModelT = NonconvexCore.DictModel
+    else
+        throw("Unsupported model type.")
+    end
+    return ModelT(obj, eq, ineq, sd, model.box_min, model.box_max, model.init, model.integer)
+end
diff --git a/src/custom.jl b/src/custom.jl
@@ -56,7 +56,8 @@ end
 (to::CustomHessianFunction)(x) = to.f(x)
 function ChainRulesCore.rrule(f::CustomHessianFunction, x)
     g = CustomGradFunction(f.g, f.h)
-    return f(x), Δ -> (NoTangent(), g(x) * Δ)
+    G = g(x)
+    return f(x), Δ -> (NoTangent(), G * Δ)
 end
 function ChainRulesCore.frule(
     (_, Δx), f::CustomHessianFunction, x::AbstractVector,

diff --git a/src/sparse_forwarddiff.jl b/src/sparse_forwarddiff.jl
@@ -1,27 +1,28 @@
-struct SparseForwardDiffFunction{F, F!, Y, J, JP, JC, JJ, JJ!, G, H, HP, HC} <: Function
+struct SparseForwardDiffFunction{F, F!, Y, J, JP, JC, JJ, JJ!, G, HH1, HP, HC, HH2} <: Function
     f::F
     f!::F!
     y::Y
     jac::J
     jac_pattern::JP
     jac_colors::JC
-    vecJ::JJ
+    J::JJ
     vecJ!::JJ!
-    G::G
-    hess::H
+    vecJ::G
+    hess::HH1
     hess_pattern::HP
     hess_colors::HC
+    H::HH2
 end
 
 function SparseForwardDiffFunction(f, x::AbstractVector; hessian = false, jac_pattern = nothing, hess_pattern = nothing)
     val = f(x)
-    _f = val isa Real ? x -> [f(x)] : f
+    _f = x -> _sparsevec(f(x))
     f! = (y, x) -> begin
         v = f(x)
         y .= v
         return y
     end
-    y = val isa Real ? [val] : copy(val)
+    y = _sparsevec(val)
     jac_pattern = jac_pattern === nothing ? Symbolics.jacobian_sparsity(f!, y, x) : jac_pattern
     if nnz(jac_pattern) > 0
         jac = float.(jac_pattern)
@@ -33,93 +34,134 @@ function SparseForwardDiffFunction(f, x::AbstractVector; hessian = false, jac_pa
     end
     vecJ! = (G, x) -> begin
         _jac = SparseDiffTools.forwarddiff_color_jacobian(_f, x, colorvec = jac_colors)
-        G .= vec(Array(_jac))
+        G .= _sparsevec(_jac)
         return G
     end
     G = vec(Array(jac))
-    vecJ = x -> copy(vecJ!(G, x))
+    J = x -> begin
+        xT = eltype(x)
+        _jac = SparseDiffTools.forwarddiff_color_jacobian(_f, x, colorvec = jac_colors, sparsity = jac, jac_prototype = xT.(jac))
+        return copy(_jac)
+    end
     if hessian
         hess_pattern = hess_pattern === nothing ? Symbolics.jacobian_sparsity(vecJ!, G, x) : hess_pattern
         if nnz(hess_pattern) > 0
             hess = float.(hess_pattern)
             hess_colors = SparseDiffTools.matrix_colors(hess)
+            _J = x -> _sparsevec(J(x))
+            H = x -> begin
+                _hess = SparseDiffTools.forwarddiff_color_jacobian(_J, x, colorvec = hess_colors, sparsity = hess_pattern, jac_prototype = hess)
+                return copy(_hess)
+            end
         else
             T = eltype(G)
-            hess = sparse(Int[], Int[], T[])
+            hess = sparse(Int[], Int[], T[], length(x), length(x))
             hess_colors = Int[]
+            H = x -> hess
         end
     else
         hess = nothing
         hess_colors = nothing
+        H = nothing
+    end
+    return SparseForwardDiffFunction(f, f!, y, jac, jac_pattern, jac_colors, J, vecJ!, G, hess, hess_pattern, hess_colors, H)
+end
+
+_sparsevec(x::Real) = [x]
+_sparsevec(x::Vector) = copy(vec(x))
+_sparsevec(x::Matrix) = copy(vec(x))
+function _sparsevec(x::SparseMatrixCSC)
+    m, n = size(x)
+    linear_inds = zeros(Int, length(x.nzval))
+    count = 1
+    for colind in 1:length(x.colptr)-1
+        for ind in x.colptr[colind]:x.colptr[colind+1]-1
+            rowind = x.rowval[ind]
+            val = x.nzval[ind]
+            linear_inds[count] = rowind + (colind - 1) * m
+            count += 1
+        end
     end
-    return SparseForwardDiffFunction(f, f!, y, jac, jac_pattern, jac_colors, vecJ, vecJ!, G, hess, hess_pattern, hess_colors)
+    return sparsevec(linear_inds, copy(x.nzval))
 end
+
 (f::SparseForwardDiffFunction)(x) = f.f(x)
 function ChainRulesCore.rrule(f::SparseForwardDiffFunction, x::AbstractVector)
-    if f.vecJ === nothing
-        return f(x), _ -> (NoTangent(), NoTangent())
+    if f.H === nothing
+        J = f.J
     else
-        vecjac = SparseForwardDiffFunction(f.vecJ, f.vecJ!, f.G, f.hess, f.hess_pattern, f.hess_colors, nothing, nothing, nothing, nothing, nothing, nothing)
-        val = f(x)
+        J = SparseForwardDiffFunction(f.J, f.vecJ!, f.vecJ, f.hess, f.hess_pattern, f.hess_colors, f.H, nothing, nothing, nothing, nothing, nothing, nothing)
+    end
+    val = f(x)
+    jac = J(x)
+    return val, Δ -> begin
         if val isa Real
-            jac = reshape(vecjac(x), 1, length(x))
+            (NoTangent(), sparse(vec(jac' * Δ)))
         else
-            jac = reshape(vecjac(x), length(val), length(x))
-        end
-        return val, Δ -> begin
-            (NoTangent(), jac' * (Δ isa Real ? Δ : vec(Δ)))
+            (NoTangent(), jac' * sparse(vec(Δ)))
         end
     end
 end
 function ChainRulesCore.frule((_, Δx), f::SparseForwardDiffFunction, x::AbstractVector)
-    if f.vecJ === nothing
-        val = f(x)
-        return val, zero(val)
+    if f.H === nothing
+        J = f.J
     else
-        vecjac = SparseForwardDiffFunction(f.vecJ, f.vecJ!, f.G, f.hess, f.hess_pattern, f.hess_colors, nothing, nothing, nothing, nothing, nothing, nothing)
-        val = f(x)
-        if val isa Real
-            jac = reshape(vecjac(x), 1, length(x))
-        else
-            jac = reshape(vecjac(x), length(val), length(x))
-        end
-        Δy = jac * (Δx isa Real ? Δx : vec(Δx))
-        return val, (val isa Real) ? only(Δy) : reshape(Δy, size(val))
+        J = SparseForwardDiffFunction(f.J, f.vecJ!, f.vecJ, f.hess, f.hess_pattern, f.hess_colors, f.H, nothing, nothing, nothing, nothing, nothing, nothing)
+    end
+    val = f(x)
+    jac = J(x)
+    if val isa Real
+        Δy = only(jac * Δx)
+    elseif val isa AbstractVector
+        Δy = jac * sparse(vec(Δx))
+    else
+        Δy = reshape(jac * sparse(vec(Δx)), size(val)...)
     end
+    return val, Δy
 end
 @ForwardDiff_frule (f::SparseForwardDiffFunction)(x::AbstractVector{<:ForwardDiff.Dual})
 
-function sparsify(model::NonconvexCore.AbstractModel; objective = true, ineq_constraints = true, eq_constraints = true, kwargs...)
-    vmodel, v, _ = NonconvexCore.tovecmodel(model)
+function sparsify(f, x...; kwargs...)
+    # defined in the abstractdiff.jl file
+    flat_f, vx, unflatteny = tovecfunc(f, x...)
+    sp_flat_f = SparseForwardDiffFunction(flat_f, vx; kwargs...)
+    return x -> unflatteny(sp_flat_f(flatten(x)[1]))
+end
+
+function sparsify(model::NonconvexCore.AbstractModel; objective = true, ineq_constraints = true, eq_constraints = true, sd_constraints = true, kwargs...)
+    x = getmin(model)
     if objective
-        # Objective
-        sparse_flat_obj = SparseForwardDiffFunction(vmodel.objective, v; kwargs...)
-        obj = NonconvexCore.Objective(x -> sparse_flat_obj(flatten(x)[1]), flags = model.objective.flags)
+        obj = NonconvexCore.Objective(sparsify(model.objective, x; kwargs...), flags = model.objective.flags)
     else
         obj = model.objective
     end
     if ineq_constraints
-        ineq = length(vmodel.ineq_constraints.fs) != 0 ? NonconvexCore.VectorOfFunctions(map(vmodel.ineq_constraints.fs) do c
-            sparse_flat_ineq = SparseForwardDiffFunction(c, v; kwargs...)
-            NonconvexCore.IneqConstraint(x -> sparse_flat_ineq(flatten(x)[1]), c.rhs, c.dim, c.flags)
+        ineq = length(model.ineq_constraints.fs) != 0 ? NonconvexCore.VectorOfFunctions(map(model.ineq_constraints.fs) do c
+            return NonconvexCore.IneqConstraint(sparsify(c, x; kwargs...), c.rhs, c.dim, c.flags)
         end) : NonconvexCore.VectorOfFunctions(NonconvexCore.IneqConstraint[])
     else
         ineq = model.ineq_constraints
     end
     if eq_constraints
-        eq = length(vmodel.eq_constraints.fs) != 0 ? NonconvexCore.VectorOfFunctions(map(vmodel.eq_constraints.fs) do c
-            sparse_flat_eq = SparseForwardDiffFunction(c, v; kwargs...)
-            NonconvexCore.EqConstraint(x -> sparse_flat_eq(flatten(x)[1]), c.rhs, c.dim, c.flags)
+        eq = length(model.eq_constraints.fs) != 0 ? NonconvexCore.VectorOfFunctions(map(model.eq_constraints.fs) do c
+            return NonconvexCore.EqConstraint(sparsify(c, x; kwargs...), c.rhs, c.dim, c.flags)
         end) : NonconvexCore.VectorOfFunctions(NonconvexCore.EqConstraint[])
     else
         eq = model.eq_constraints
     end
+    if sd_constraints
+        sd = length(model.sd_constraints.fs) != 0 ? NonconvexCore.VectorOfFunctions(map(model.sd_constraints.fs) do c
+            return NonconvexCore.SDConstraint(sparsify(c, x; kwargs...), c.dim)
+        end) : NonconvexCore.VectorOfFunctions(NonconvexCore.SDConstraint[])
+    else
+        sd = model.sd_constraints
+    end
     if model isa NonconvexCore.Model
         ModelT = NonconvexCore.Model
     elseif model isa NonconvexCore.DictModel
         ModelT = NonconvexCore.DictModel
     else
         throw("Unsupported model type.")
     end
-    return ModelT(obj, eq, ineq, model.sd_constraints, model.box_min, model.box_max, model.init, model.integer)
+    return ModelT(obj, eq, ineq, sd, model.box_min, model.box_max, model.init, model.integer)
 end