From 679ce71abfaa64c6eba524fc46710a7b02161b94 Mon Sep 17 00:00:00 2001 From: Jarrett Revels Date: Mon, 1 Feb 2016 16:14:45 -0500 Subject: [PATCH] giant rewrite for Julia v0.5 Major Changes: - Drop Vector storage for Partials. This reduces indirection in code that had to handle multiple container types. I have yet to see a case where Tuple storage wasn't faster due to GC overhead. - Consolidate all ForwardDiffNumber types into the new type DiffNumber, which is structured like a GradientNumber. HessianNumber and TensorNumber have been replaced by the ability to nest DiffNumbers. This allows for Tuple storage of higher-order partial components, and cuts out a lot of code. - API functions have been replaced by macros to allow type stable passage of constant kwargs. This includes some small breaking changes. For example, the target function is ALWAYS the first argument to the new API functions (to maintain consistency with other higher-order mutation functions in Base, like `map!`). Additionally, some kwarg names have changed. - experimental multithreading support can be enabled on some API functions by passing in `multithread = true` - the caching layer has been simplified, and is now thread-safe. --- src/DiffNumber.jl | 316 +++++++++++++++++++++++++++++ src/ForwardDiff.jl | 104 ++-------- src/ForwardDiffNumber.jl | 179 ----------------- src/GradientNumber.jl | 213 -------------------- src/HessianNumber.jl | 277 -------------------------- src/Partials.jl | 201 +++++++------------ src/TensorNumber.jl | 343 -------------------------------- src/api.jl | 64 ++++++ src/api/ForwardDiffResult.jl | 172 ---------------- src/api/api.jl | 33 --- src/api/cache.jl | 110 ---------- src/api/deprecated.jl | 33 --- src/api/derivative.jl | 46 ----- src/api/gradient.jl | 132 ------------ src/api/hessian.jl | 225 --------------------- src/api/jacobian.jl | 173 ---------------- src/api/tensor.jl | 109 ---------- src/cache.jl | 41 ++++ src/derivative.jl | 78 ++++++++ src/gradient.jl | 197 ++++++++++++++++++ src/jacobian.jl | 44 ++++ test/DiffNumberTest.jl | 375 +++++++++++++++++++++++++++++++++++ test/GradientTest.jl | 29 +++ test/PartialsTest.jl | 112 +++++++++++ test/runtests.jl | 38 +--- test/test_behaviors.jl | 109 ---------- test/test_deprecated.jl | 80 -------- test/test_derivatives.jl | 93 --------- test/test_gradients.jl | 338 ------------------------------- test/test_hessians.jl | 329 ------------------------------ test/test_jacobians.jl | 126 ------------ test/test_tensors.jl | 346 -------------------------------- 32 files changed, 1354 insertions(+), 3711 deletions(-) create mode 100644 src/DiffNumber.jl delete mode 100644 src/ForwardDiffNumber.jl delete mode 100644 src/GradientNumber.jl delete mode 100644 src/HessianNumber.jl delete mode 100644 src/TensorNumber.jl create mode 100644 src/api.jl delete mode 100644 src/api/ForwardDiffResult.jl delete mode 100644 src/api/api.jl delete mode 100644 src/api/cache.jl delete mode 100644 src/api/deprecated.jl delete mode 100644 src/api/derivative.jl delete mode 100644 src/api/gradient.jl delete mode 100644 src/api/hessian.jl delete mode 100644 src/api/jacobian.jl delete mode 100644 src/api/tensor.jl create mode 100644 src/cache.jl create mode 100644 src/derivative.jl create mode 100644 src/gradient.jl create mode 100644 src/jacobian.jl create mode 100644 test/DiffNumberTest.jl create mode 100644 test/GradientTest.jl create mode 100644 test/PartialsTest.jl delete mode 100644 test/test_behaviors.jl delete mode 100644 test/test_deprecated.jl delete mode 100644 test/test_derivatives.jl delete mode 100644 test/test_gradients.jl delete mode 100644 test/test_hessians.jl delete mode 100644 test/test_jacobians.jl delete mode 100644 test/test_tensors.jl diff --git a/src/DiffNumber.jl b/src/DiffNumber.jl new file mode 100644 index 00000000..7c8d7659 --- /dev/null +++ b/src/DiffNumber.jl @@ -0,0 +1,316 @@ +############## +# DiffNumber # +############## + +immutable DiffNumber{N,T<:Real} <: Real + value::T + partials::Partials{N,T} +end + +################ +# Constructors # +################ + +DiffNumber{N,T}(value::T, partials::Partials{N,T}) = DiffNumber{N,T}(value, partials) + +function DiffNumber{N,A,B}(value::A, partials::Partials{N,B}) + T = promote_type(A, B) + return DiffNumber(convert(T, value), convert(Partials{N,T}, partials)) +end + +DiffNumber(value::Real, partials::Tuple) = DiffNumber(value, Partials(partials)) +DiffNumber(value::Real, partials::Tuple{}) = DiffNumber(value, Partials{0,typeof(value)}(partials)) +DiffNumber(value::Real, partials::Real...) = DiffNumber(value, partials) + +############################## +# Utility/Accessor Functions # +############################## + +@inline value(x::Real) = x +@inline value(n::DiffNumber) = n.value + +@inline partials(x::Real) = Partials{0,typeof(x)}(tuple()) +@inline partials(n::DiffNumber) = n.partials +@inline partials(n::DiffNumber, i) = n.partials[i] +@inline partials(n::DiffNumber, i, j) = partials(n, i).partials[j] +@inline partials(n::DiffNumber, i, j, k) = partials(n, i, j).partials[k] + +@inline npartials{N}(::DiffNumber{N}) = N +@inline npartials{N,T}(::Type{DiffNumber{N,T}}) = N + +@inline numtype{N,T}(::DiffNumber{N,T}) = T +@inline numtype{N,T}(::Type{DiffNumber{N,T}}) = T + +##################### +# Generic Functions # +##################### + +macro ambiguous(ex) + def = ex.head == :macrocall ? ex.args[2] : ex + f = def.args[1].args[1].args[1] + return quote + $(f)(a::DiffNumber, b::DiffNumber) = error("npartials($(typeof(a))) != npartials($(typeof(b)))") + $(esc(ex)) + end +end + +Base.copy(n::DiffNumber) = n + +Base.eps(n::DiffNumber) = eps(value(n)) +Base.eps{F<:DiffNumber}(::Type{F}) = eps(numtype(F)) + +Base.floor{T<:Real}(::Type{T}, n::DiffNumber) = floor(T, value(n)) +Base.ceil{T<:Real}(::Type{T}, n::DiffNumber) = ceil(T, value(n)) +Base.trunc{T<:Real}(::Type{T}, n::DiffNumber) = trunc(T, value(n)) +Base.round{T<:Real}(::Type{T}, n::DiffNumber) = round(T, value(n)) + +const FDNUM_HASH = hash(DiffNumber) + +Base.hash(n::DiffNumber) = hash(value(n)) +Base.hash(n::DiffNumber, hsh::UInt64) = hash(value(n), hsh) + +function Base.read{N,T}(io::IO, ::Type{DiffNumber{N,T}}) + value = read(io, T) + partials = read(io, Partials{N,T}) + return DiffNumber{N,T}(value, partials) +end + +function Base.write(io::IO, n::DiffNumber) + write(io, value(n)) + write(io, partials(n)) +end + +@inline Base.zero(n::DiffNumber) = zero(typeof(n)) +@inline Base.zero{N,T}(::Type{DiffNumber{N,T}}) = DiffNumber(zero(T), zero(Partials{N,T})) + +@inline Base.one(n::DiffNumber) = one(typeof(n)) +@inline Base.one{N,T}(::Type{DiffNumber{N,T}}) = DiffNumber(one(T), zero(Partials{N,T})) + +@inline Base.rand(n::DiffNumber) = rand(typeof(n)) +@inline Base.rand{N,T}(::Type{DiffNumber{N,T}}) = DiffNumber(rand(T), zero(Partials{N,T})) +@inline Base.rand(rng::AbstractRNG, n::DiffNumber) = rand(rng, typeof(n)) +@inline Base.rand{N,T}(rng::AbstractRNG, ::Type{DiffNumber{N,T}}) = DiffNumber(rand(rng, T), zero(Partials{N,T})) + +# Predicates # +#------------# + +isconstant(n::DiffNumber) = iszero(partials(n)) + +@ambiguous Base.isequal{N}(a::DiffNumber{N}, b::DiffNumber{N}) = isequal(value(a), value(b)) +@ambiguous Base.(:(==)){N}(a::DiffNumber{N}, b::DiffNumber{N}) = value(a) == value(b) +@ambiguous Base.isless{N}(a::DiffNumber{N}, b::DiffNumber{N}) = value(a) < value(b) +@ambiguous Base.(:<){N}(a::DiffNumber{N}, b::DiffNumber{N}) = isless(a, b) +@ambiguous Base.(:(<=)){N}(a::DiffNumber{N}, b::DiffNumber{N}) = <=(value(a), value(b)) + +for T in (AbstractFloat, Irrational, Real) + Base.isequal(n::DiffNumber, x::T) = isequal(value(n), x) + Base.isequal(x::T, n::DiffNumber) = isequal(n, x) + + Base.(:(==))(n::DiffNumber, x::T) = (value(n) == x) + Base.(:(==))(x::T, n::DiffNumber) = ==(n, x) + + Base.isless(n::DiffNumber, x::T) = value(n) < x + Base.isless(x::T, n::DiffNumber) = x < value(n) + + Base.(:<)(n::DiffNumber, x::T) = isless(n, x) + Base.(:<)(x::T, n::DiffNumber) = isless(x, n) + + Base.(:(<=))(n::DiffNumber, x::T) = <=(value(n), x) + Base.(:(<=))(x::T, n::DiffNumber) = <=(x, value(n)) +end + +Base.isnan(n::DiffNumber) = isnan(value(n)) +Base.isfinite(n::DiffNumber) = isfinite(value(n)) +Base.isinf(n::DiffNumber) = isinf(value(n)) +Base.isreal(n::DiffNumber) = isreal(value(n)) +Base.isinteger(n::DiffNumber) = isinteger(value(n)) +Base.iseven(n::DiffNumber) = iseven(value(n)) +Base.isodd(n::DiffNumber) = isodd(value(n)) + +######################## +# Promotion/Conversion # +######################## + +Base.promote_rule{N1,N2,A<:Real,B<:Real}(D1::Type{DiffNumber{N1,A}}, D2::Type{DiffNumber{N2,B}}) = error("can't promote $(D1) and $(D2)") +Base.promote_rule{N,A<:Real,B<:Real}(::Type{DiffNumber{N,A}}, ::Type{DiffNumber{N,B}}) = DiffNumber{N,promote_type(A, B)} +Base.promote_rule{N,T<:Real}(::Type{DiffNumber{N,T}}, ::Type{BigFloat}) = DiffNumber{N,promote_type(T, BigFloat)} +Base.promote_rule{N,T<:Real}(::Type{BigFloat}, ::Type{DiffNumber{N,T}}) = DiffNumber{N,promote_type(BigFloat, T)} +Base.promote_rule{N,T<:Real}(::Type{DiffNumber{N,T}}, ::Type{Bool}) = DiffNumber{N,promote_type(T, Bool)} +Base.promote_rule{N,T<:Real}(::Type{Bool}, ::Type{DiffNumber{N,T}}) = DiffNumber{N,promote_type(Bool, T)} +Base.promote_rule{N,T<:Real,s}(::Type{DiffNumber{N,T}}, ::Type{Irrational{s}}) = DiffNumber{N,promote_type(T, Irrational{s})} +Base.promote_rule{N,s,T<:Real}(::Type{Irrational{s}}, ::Type{DiffNumber{N,T}}) = DiffNumber{N,promote_type(Irrational{s}, T)} +Base.promote_rule{N,A<:Real,B<:Real}(::Type{DiffNumber{N,A}}, ::Type{B}) = DiffNumber{N,promote_type(A, B)} +Base.promote_rule{N,A<:Real,B<:Real}(::Type{A}, ::Type{DiffNumber{N,B}}) = DiffNumber{N,promote_type(A, B)} + +Base.convert(::Type{DiffNumber}, n::DiffNumber) = n +Base.convert{N1,N2,T<:Real}(D::Type{DiffNumber{N1,T}}, n::DiffNumber{N2}) = error("can't convert $(typeof(n)) to $(D)") +Base.convert{N,T<:Real}(::Type{DiffNumber{N,T}}, n::DiffNumber{N}) = DiffNumber(convert(T, value(n)), convert(Partials{N,T}, partials(n))) +Base.convert{N,T<:Real}(::Type{DiffNumber{N,T}}, n::DiffNumber{N,T}) = n +Base.convert{N,T<:Real}(::Type{DiffNumber{N,T}}, x::Real) = DiffNumber(convert(T, x), zero(Partials{N,T})) +Base.convert(::Type{DiffNumber}, x::Real) = DiffNumber(x) + +Base.promote_array_type{D<:DiffNumber, A<:AbstractFloat}(F, ::Type{D}, ::Type{A}) = D + +Base.float{N,T}(n::DiffNumber{N,T}) = DiffNumber{N,promote_type(T, Float16)}(n) + +######## +# Math # +######## + +# Addition/Subtraction # +#----------------------# + +@ambiguous @inline Base.(:+){N}(n1::DiffNumber{N}, n2::DiffNumber{N}) = DiffNumber(value(n1) + value(n2), partials(n1) + partials(n2)) +@inline Base.(:+)(n::DiffNumber, x::Real) = DiffNumber(value(n) + x, partials(n)) +@inline Base.(:+)(x::Real, n::DiffNumber) = n + x + +@ambiguous @inline Base.(:-){N}(n1::DiffNumber{N}, n2::DiffNumber{N}) = DiffNumber(value(n1) - value(n2), partials(n1) - partials(n2)) +@inline Base.(:-)(n::DiffNumber, x::Real) = DiffNumber(value(n) - x, partials(n)) +@inline Base.(:-)(x::Real, n::DiffNumber) = DiffNumber(x - value(n), -(partials(n))) +@inline Base.(:-)(n::DiffNumber) = DiffNumber(-(value(n)), -(partials(n))) + +# Multiplication # +#----------------# + +@inline Base.(:*)(n::DiffNumber, x::Bool) = x ? n : (signbit(value(n))==0 ? zero(n) : -zero(n)) +@inline Base.(:*)(x::Bool, n::DiffNumber) = n * x + +@ambiguous @inline function Base.(:*){N}(n1::DiffNumber{N}, n2::DiffNumber{N}) + v1, v2 = value(n1), value(n2) + return DiffNumber(v1 * v2, _mul_partials(partials(n1), partials(n2), v2, v1)) +end + +@inline Base.(:*)(n::DiffNumber, x::Real) = DiffNumber(value(n) * x, partials(n) * x) +@inline Base.(:*)(x::Real, n::DiffNumber) = n * x + +# Division # +#----------# + +@ambiguous @inline function Base.(:/){N}(n1::DiffNumber{N}, n2::DiffNumber{N}) + v1, v2 = value(n1), value(n2) + return DiffNumber(v1 / v2, _div_partials(partials(n1), partials(n2), v1, v2)) +end + +@inline function Base.(:/)(x::Real, n::DiffNumber) + v = value(n) + divv = x / v + return DiffNumber(divv, -(divv / v) * partials(n)) +end + +@inline Base.(:/)(n::DiffNumber, x::Real) = DiffNumber(value(n) / x, partials(n) / x) + +# Exponentiation # +#----------------# + +for f in (:(Base.(:^)), :(NaNMath.pow)) + + @eval begin + @ambiguous @inline function ($f){N}(n1::DiffNumber{N}, n2::DiffNumber{N}) + if iszero(partials(n2)) + return $(f)(n1, value(n2)) + else + v1, v2 = value(n1), value(n2) + expv = ($f)(v1, v2) + powval = v2 * ($f)(v1, v2 - 1) + logval = expv * log(v1) + new_partials = _mul_partials(partials(n1), partials(n2), powval, logval) + return DiffNumber(expv, new_partials) + end + end + + @inline ($f)(::Base.Irrational{:e}, n::DiffNumber) = exp(n) + end + + for T in (:Integer, :Rational, :Real) + @eval begin + @inline function ($f)(n::DiffNumber, x::$(T)) + v = value(n) + expv = ($f)(v, x) + deriv = x * ($f)(v, x - 1) + return DiffNumber(expv, deriv * partials(n)) + end + + @inline function ($f)(x::$(T), n::DiffNumber) + v = value(n) + expv = ($f)(x, v) + deriv = expv*log(x) + return DiffNumber(expv, deriv * partials(n)) + end + end + end +end + +# Unary Math Functions # +#--------------------- # + +function to_nanmath(x::Expr) + if x.head == :call + funsym = Expr(:.,:NaNMath,Base.Meta.quot(x.args[1])) + return Expr(:call,funsym,[to_nanmath(z) for z in x.args[2:end]]...) + else + return Expr(:call,[to_nanmath(z) for z in x.args]...) + end +end + +to_nanmath(x) = x + +@inline Base.conj(n::DiffNumber) = n +@inline Base.transpose(n::DiffNumber) = n +@inline Base.ctranspose(n::DiffNumber) = n +@inline Base.abs(n::DiffNumber) = signbit(value(n)) ? -n : n + +for fsym in AUTO_DEFINED_UNARY_FUNCS + v = :v + deriv = Calculus.differentiate(:($(fsym)($v)), v) + + @eval begin + @inline function Base.$(fsym)(n::DiffNumber) + $(v) = value(n) + return DiffNumber($(fsym)($v), $(deriv) * partials(n)) + end + end + + # extend corresponding NaNMath methods + if fsym in NANMATH_FUNCS + nan_deriv = to_nanmath(deriv) + @eval begin + @inline function NaNMath.$(fsym)(n::DiffNumber) + v = value(n) + return DiffNumber(NaNMath.$(fsym)($v), $(nan_deriv) * partials(n)) + end + end + end +end + +################# +# Special Cases # +################# + +# Manually Optimized Functions # +#------------------------------# + +@inline function Base.exp{N}(n::DiffNumber{N}) + expv = exp(value(n)) + return DiffNumber(expv, expv * partials(n)) +end + +@inline function Base.sqrt{N}(n::DiffNumber{N}) + sqrtv = sqrt(value(n)) + deriv = 0.5 / sqrtv + return DiffNumber(sqrtv, deriv * partials(n)) +end + +# Other Functions # +#-----------------# + +@inline function calc_atan2(y, x) + z = y/x + v= value(z) + atan2v = atan2(value(y), value(x)) + deriv = inv(one(v) + v*v) + return DiffNumber(atan2v, deriv * partials(z)) +end + +@ambiguous @inline Base.atan2{N}(y::DiffNumber{N}, x::DiffNumber{N}) = calc_atan2(y, x) +@inline Base.atan2(y::Real, x::DiffNumber) = calc_atan2(y, x) +@inline Base.atan2(y::DiffNumber, x::Real) = calc_atan2(y, x) diff --git a/src/ForwardDiff.jl b/src/ForwardDiff.jl index 344028a7..0a2256ab 100644 --- a/src/ForwardDiff.jl +++ b/src/ForwardDiff.jl @@ -2,90 +2,20 @@ isdefined(Base, :__precompile__) && __precompile__() module ForwardDiff - if VERSION < v"0.4-" - warn("ForwardDiff.jl is only officially compatible with Julia v0.4-. You're currently running Julia $VERSION.") - end - - ########### - # imports # - ########### - import Calculus - import NaNMath - import Base: *, /, +, -, ^, getindex, length, - hash, ==, <, <=, isequal, copy, zero, - one, float, rand, convert, promote_rule, - read, write, isless, isreal, isnan, - isfinite, isinf, eps, conj, transpose, - ctranspose, eltype, abs, abs2, start, - next, done, atan2, floor, ceil, trunc, round - - const auto_defined_unary_funcs = map(first, Calculus.symbolic_derivatives_1arg()) - - for fsym in auto_defined_unary_funcs - @eval import Base.$(fsym); - end - - ############ - # includes # - ############ - include("Partials.jl") - include("ForwardDiffNumber.jl") - include("GradientNumber.jl") - include("HessianNumber.jl") - include("TensorNumber.jl") - include("api/api.jl") - - ####################### - # Promotion Utilities # - ####################### - @inline switch_eltype{T,S}(::Type{Vector{T}}, ::Type{S}) = Vector{S} - @inline switch_eltype{N,T,S}(::Type{NTuple{N,T}}, ::Type{S}) = NTuple{N,S} - - for F in (:TensorNumber, :GradientNumber, :HessianNumber) - @eval begin - @inline switch_eltype{N,T,S}(::Type{$F{N,T,NTuple{N,T}}}, ::Type{S}) = $F{N,S,NTuple{N,S}} - @inline switch_eltype{N,T,S}(::Type{$F{N,T,Vector{T}}}, ::Type{S}) = $F{N,S,Vector{S}} - - function promote_rule{N,T1,C1,T2,C2}(::Type{($F){N,T1,C1}}, - ::Type{($F){N,T2,C2}}) - P = promote_type(Partials{T1,C1}, Partials{T2,C2}) - T, C = eltype(P), containtype(P) - return ($F){N,T,C} - end - - function promote_rule{N,T,C,S<:Real}(::Type{($F){N,T,C}}, ::Type{S}) - R = promote_type(T, S) - return ($F){N,R,switch_eltype(C, R)} - end - end - end - - @inline function promote_eltype{F<:ForwardDiffNumber}(::Type{F}, types::DataType...) - return switch_eltype(F, promote_type(eltype(F), types...)) - end - - @inline promote_typeof(n1::ForwardDiffNumber, n2::ForwardDiffNumber) = promote_type(typeof(n1), typeof(n2)) - - @inline promote_eltypesof(n1::ForwardDiffNumber, n2::ForwardDiffNumber, a) = promote_eltype(promote_typeof(n1, n2), typeof(a)) - - @inline promote_eltypeof(n::ForwardDiffNumber, a) = promote_eltype(typeof(n), typeof(a)) - @inline promote_eltypeof(n::ForwardDiffNumber, a, b) = promote_eltype(typeof(n), typeof(a), typeof(b)) - - ########### - # exports # - ########### - export AllResults, - ForwardDiffCache, - value, - value!, - derivative!, - derivative, - gradient!, - jacobian!, - jacobian, - hessian!, - hessian, - tensor!, - tensor - -end # module ForwardDiff +import Base.Threads +import Calculus +import NaNMath + +const AUTO_DEFINED_UNARY_FUNCS = map(first, Calculus.symbolic_derivatives_1arg()) +const NANMATH_FUNCS = (:sin, :cos, :tan, :asin, :acos, :acosh, + :atanh, :log, :log2, :log10, :lgamma, :log1p) + +include("Partials.jl") +include("DiffNumber.jl") +include("cache.jl") +include("api.jl") +include("derivative.jl") +include("gradient.jl") +include("jacobian.jl") + +end # module diff --git a/src/ForwardDiffNumber.jl b/src/ForwardDiffNumber.jl deleted file mode 100644 index ec70cdd9..00000000 --- a/src/ForwardDiffNumber.jl +++ /dev/null @@ -1,179 +0,0 @@ -############################### -# Abstract Types/Type Aliases # -############################### - -@eval typealias ExternalReal Union{$(subtypes(Real)...)} -abstract ForwardDiffNumber{N,T<:Real,C} <: Real - -# Subtypes F<:ForwardDiffNumber should define: -# npartials(::Type{F}) --> N from ForwardDiffNumber{N,T,C} -# eltype(::Type{F}) --> T from ForwardDiffNumber{N,T,C} -# containtype(::Type{F}) --> C from ForwardDiffNumber{N,T,C} -# value(n::F) --> the value of n -# grad(n::F) --> a container corresponding to all first order partials -# hess(n::F) --> a container corresponding to the lower -# triangular half of the symmetric -# Hessian (including the diagonal) -# tens(n::F) --> a container of corresponding to lower -# tetrahedral half of the symmetric -# Tensor (including the diagonal) -# isconstant(n::F) --> returns true if all partials stored by n are zero -# -#...as well as: -# ==(a::F, b::F) -# isequal(a::F, b::F) -# zero(::Type{F}) -# one(::Type{F}) -# rand(::Type{F}) -# hash(n::F) -# read(io::IO, ::Type{F}) -# write(io::IO, n::F) -# conversion/promotion rules - -#################### -# Type Definitions # -#################### - -immutable GradientNumber{N,T,C} <: ForwardDiffNumber{N,T,C} - value::T - partials::Partials{T,C} -end - -immutable HessianNumber{N,T,C} <: ForwardDiffNumber{N,T,C} - gradnum::GradientNumber{N,T,C} - hess::Vector{T} - function HessianNumber(gradnum, hess) - @assert length(hess) == halfhesslen(N) - return new(gradnum, hess) - end -end - -immutable TensorNumber{N,T,C} <: ForwardDiffNumber{N,T,C} - hessnum::HessianNumber{N,T,C} - tens::Vector{T} - function TensorNumber(hessnum, tens) - @assert length(tens) == halftenslen(N) - return new(hessnum, tens) - end -end - - -################################## -# Ambiguous Function Definitions # -################################## -ambiguous_error{A,B}(f, a::A, b::B) = error("""Oops - $f(::$A, ::$B) should never have been called. - It was defined to resolve ambiguity, and was supposed to - fall back to a more specific method defined elsewhere. - Please report this bug to ForwardDiff.jl's issue tracker.""") - -ambiguous_binary_funcs = [:(==), :isequal, :isless, :<, :+, :-, :*, :/, :^, :atan2, :calc_atan2] -fdnum_ambiguous_binary_funcs = [:(==), :isequal, :isless, :<] -fdnum_types = [:GradientNumber, :HessianNumber, :TensorNumber] - -for f in ambiguous_binary_funcs - if f in fdnum_ambiguous_binary_funcs - @eval $f(a::ForwardDiffNumber, b::ForwardDiffNumber) = ambiguous_error($f, a, b) - end - for A in fdnum_types, B in fdnum_types - @eval $f(a::$A, b::$B) = ambiguous_error($f, a, b) - end -end - -############################## -# Utility/Accessor Functions # -############################## -@inline halfhesslen(n) = div(n*(n+1),2) # correct length(hess(::ForwardDiffNumber)) -@inline halftenslen(n) = div(n*(n+1)*(n+2),6) # correct length(tens(::ForwardDiffNumber)) - -@inline grad(n::ForwardDiffNumber, i) = grad(n)[i] -@inline hess(n::ForwardDiffNumber, i) = hess(n)[i] -@inline tens(n::ForwardDiffNumber, i) = tens(n)[i] - -@inline npartials{N}(::ForwardDiffNumber{N}) = N -@inline eltype{N,T}(::ForwardDiffNumber{N,T}) = T -@inline containtype{N,T,C}(::ForwardDiffNumber{N,T,C}) = C - -@inline npartials{N,T,C}(::Type{ForwardDiffNumber{N,T,C}}) = N -@inline eltype{N,T,C}(::Type{ForwardDiffNumber{N,T,C}}) = T -@inline containtype{N,T,C}(::Type{ForwardDiffNumber{N,T,C}}) = C - -for T in fdnum_types - @eval isless(a::$T, b::$T) = value(a) < value(b) - @eval <(a::$T, b::$T) = isless(a, b) -end - -for T in (Base.Irrational, AbstractFloat, Real) - @eval begin - ==(n::ForwardDiffNumber, x::$T) = isconstant(n) && (value(n) == x) - ==(x::$T, n::ForwardDiffNumber) = ==(n, x) - - isequal(n::ForwardDiffNumber, x::$T) = isconstant(n) && isequal(value(n), x) - isequal(x::$T, n::ForwardDiffNumber) = isequal(n, x) - - isless(x::$T, n::ForwardDiffNumber) = x < value(n) - isless(n::ForwardDiffNumber, x::$T) = value(n) < x - - <(x::$T, n::ForwardDiffNumber) = isless(x, n) - <(n::ForwardDiffNumber, x::$T) = isless(n, x) - <=(x::ForwardDiffNumber, n::ForwardDiffNumber) = <=(value(x), value(n)) - <=(x::$T, n::ForwardDiffNumber) = <=(x, value(n)) - <=(n::ForwardDiffNumber, x::$T) = <=(value(n), x) - end -end - -copy(n::ForwardDiffNumber) = n # assumes all types of ForwardDiffNumbers are immutable - -eps(n::ForwardDiffNumber) = eps(value(n)) -eps{F<:ForwardDiffNumber}(::Type{F}) = eps(eltype(F)) - -isnan(n::ForwardDiffNumber) = isnan(value(n)) -isfinite(n::ForwardDiffNumber) = isfinite(value(n)) -isinf(n::ForwardDiffNumber) = isinf(value(n)) -isreal(n::ForwardDiffNumber) = isconstant(n) - -floor{T<:ExternalReal}(::Type{T}, n::ForwardDiffNumber) = floor(T, value(n)) -ceil{ T<:ExternalReal}(::Type{T}, n::ForwardDiffNumber) = ceil( T, value(n)) -trunc{T<:ExternalReal}(::Type{T}, n::ForwardDiffNumber) = trunc(T, value(n)) -round{T<:ExternalReal}(::Type{T}, n::ForwardDiffNumber) = round(T, value(n)) - -######################## -# Conversion/Promotion # -######################## -zero(n::ForwardDiffNumber) = zero(typeof(n)) -one(n::ForwardDiffNumber) = one(typeof(n)) - -function float(n::ForwardDiffNumber) - T = promote_type(eltype(n), Float16) - return convert(switch_eltype(typeof(n), T), n) -end - -convert(::Type{Integer}, n::ForwardDiffNumber) = isconstant(n) ? Integer(value(n)) : throw(InexactError()) -convert(::Type{Bool}, n::ForwardDiffNumber) = isconstant(n) ? Bool(value(n)) : throw(InexactError()) -convert{T<:ExternalReal}(::Type{T}, n::ForwardDiffNumber) = isconstant(n) ? T(value(n)) : throw(InexactError()) - -################## -# Math Functions # -################## -conj(n::ForwardDiffNumber) = n -transpose(n::ForwardDiffNumber) = n -ctranspose(n::ForwardDiffNumber) = n - -@inline abs(n::ForwardDiffNumber) = signbit(value(n)) ? -n : n -@inline abs2(n::ForwardDiffNumber) = n*n - -# NaNMath Helper Functions # -#--------------------------# -function to_nanmath(x::Expr) - if x.head == :call - funsym = Expr(:.,:NaNMath,Base.Meta.quot(x.args[1])) - return Expr(:call,funsym,[to_nanmath(z) for z in x.args[2:end]]...) - else - return Expr(:call,[to_nanmath(z) for z in x.args]...) - end -end - -to_nanmath(x) = x - -# Overloading of `promote_array_type` # -#-------------------------------------# -Base.promote_array_type{S<:ForwardDiff.ForwardDiffNumber, A<:AbstractFloat}(F, ::Type{S}, ::Type{A}) = S diff --git a/src/GradientNumber.jl b/src/GradientNumber.jl deleted file mode 100644 index 6838c059..00000000 --- a/src/GradientNumber.jl +++ /dev/null @@ -1,213 +0,0 @@ -################ -# Constructors # -################ -GradientNumber{N,T}(value::T, grad::NTuple{N,T}) = GradientNumber{N,T,NTuple{N,T}}(value, Partials(grad)) -GradientNumber{T}(value::T, grad::T...) = GradientNumber(value, grad) - -############################## -# Utility/Accessor Functions # -############################## -@inline partials(g::GradientNumber) = g.partials - -@inline value(g::GradientNumber) = g.value -@inline grad(g::GradientNumber) = data(partials(g)) -@inline hess(g::GradientNumber) = error("GradientNumbers do not store Hessian values") -@inline tens(g::GradientNumber) = error("GradientNumbers do not store tensor values") - -@inline eltype{N,T,C}(::Type{GradientNumber{N,T,C}}) = T -@inline npartials{N,T,C}(::Type{GradientNumber{N,T,C}}) = N -@inline containtype{N,T,C}(::Type{GradientNumber{N,T,C}}) = C - -##################### -# Generic Functions # -##################### -isconstant(g::GradientNumber) = iszero(partials(g)) - -==(a::GradientNumber, b::GradientNumber) = value(a) == value(b) && partials(a) == partials(b) - -isequal(a::GradientNumber, b::GradientNumber) = isequal(value(a), value(b)) && isequal(partials(a), partials(b)) - -hash(g::GradientNumber) = isconstant(g) ? hash(value(g)) : hash(value(g), hash(partials(g))) -hash(g::GradientNumber, hsh::UInt64) = hash(hash(g), hsh) - -function read{N,T,C}(io::IO, ::Type{GradientNumber{N,T,C}}) - value = read(io, T) - partials = read(io, Partials{T,C}, N) - return GradientNumber{N,T,C}(value, partials) -end - -function write(io::IO, g::GradientNumber) - write(io, value(g)) - write(io, partials(g)) -end - -############## -# Conversion # -############## -@inline zero{N,T,C}(G::Type{GradientNumber{N,T,C}}) = G(zero(T), zero_partials(C, N)) -@inline one{N,T,C}(G::Type{GradientNumber{N,T,C}}) = G(one(T), zero_partials(C, N)) -@inline rand{N,T,C}(G::Type{GradientNumber{N,T,C}}) = G(rand(T), rand_partials(C, N)) - -convert{N,T,C}(::Type{GradientNumber{N,T,C}}, g::GradientNumber) = GradientNumber{N,T,C}(value(g), partials(g)) -convert{N,T,C}(::Type{GradientNumber{N,T,C}}, g::GradientNumber{N,T,C}) = g -convert(::Type{GradientNumber}, g::GradientNumber) = g -convert{N,T,C}(::Type{GradientNumber{N,T,C}}, x::ExternalReal) = GradientNumber{N,T,C}(x, zero_partials(C, N)) -convert(::Type{GradientNumber}, x::ExternalReal) = GradientNumber(x) - -############################ -# Math with GradientNumber # -############################ -@inline function gradnum_from_deriv(g::GradientNumber, new_a, deriv) - G = promote_eltypeof(g, new_a, deriv) - return G(new_a, deriv*partials(g)) -end - -# Addition/Subtraction # -#----------------------# -@inline +{N}(g1::GradientNumber{N}, g2::GradientNumber{N}) = promote_typeof(g1, g2)(value(g1)+value(g2), partials(g1)+partials(g2)) -@inline +(g::GradientNumber, x::Real) = promote_eltypeof(g, x)(value(g)+x, partials(g)) -@inline +(x::Real, g::GradientNumber) = g+x - -@inline -(g::GradientNumber) = typeof(g)(-value(g), -partials(g)) -@inline -{N}(g1::GradientNumber{N}, g2::GradientNumber{N}) = promote_typeof(g1, g2)(value(g1)-value(g2), partials(g1)-partials(g2)) -@inline -(g::GradientNumber, x::Real) = promote_eltypeof(g, x)(value(g)-x, partials(g)) -@inline -(x::Real, g::GradientNumber) = promote_eltypeof(g, x)(x-value(g), -partials(g)) - -# Multiplication # -#----------------# -@inline *(g::GradientNumber, x::Bool) = x ? g : (signbit(value(g))==0 ? zero(g) : -zero(g)) -@inline *(x::Bool, g::GradientNumber) = g*x - -@inline function *{N}(g1::GradientNumber{N}, g2::GradientNumber{N}) - a1, a2 = value(g1), value(g2) - return promote_typeof(g1, g2)(a1*a2, _mul_partials(partials(g1), partials(g2), a2, a1)) -end - -@inline *(g::GradientNumber, x::Real) = promote_eltypeof(g, x)(value(g)*x, partials(g)*x) -@inline *(x::Real, g::GradientNumber) = g*x - -# Division # -#----------# -@inline function /{N}(g1::GradientNumber{N}, g2::GradientNumber{N}) - a1, a2 = value(g1), value(g2) - div_a = a1/a2 - return promote_eltypesof(g1, g2, div_a)(div_a, _div_partials(partials(g1), partials(g2), a1, a2)) -end - -@inline function /(x::Real, g::GradientNumber) - a = value(g) - div_a = x/a - deriv = -(div_a/a) - return gradnum_from_deriv(g, div_a, deriv) -end - -@inline function /(g::GradientNumber, x::Real) - div_a = value(g)/x - return promote_eltypeof(g, div_a)(div_a, partials(g)/x) -end - -# Exponentiation # -#----------------# -for f in (:^, :(NaNMath.pow)) - @eval begin - @inline function ($f){N}(g1::GradientNumber{N}, g2::GradientNumber{N}) - a1, a2 = value(g1), value(g2) - exp_a = ($f)(a1, a2) - powval = a2 * ($f)(a1, a2 - 1) - logval = exp_a * log(a1) - new_bs = _mul_partials(partials(g1), partials(g2), powval, logval) - return promote_eltypesof(g1, g2, exp_a)(exp_a, new_bs) - end - - @inline ($f)(::Base.Irrational{:e}, g::GradientNumber) = exp(g) - end - - # generate redundant definitions to resolve ambiguity warnings - for R in (:Integer, :Rational, :Real) - @eval begin - @inline function ($f)(g::GradientNumber, x::$R) - a = value(g) - exp_a = ($f)(a, x) - deriv = x*($f)(a, x - 1) - return gradnum_from_deriv(g, exp_a, deriv) - end - - @inline function ($f)(x::$R, g::GradientNumber) - a = value(g) - exp_a = ($f)(x, a) - deriv = exp_a*log(x) - return gradnum_from_deriv(g, exp_a, deriv) - end - end - end -end - -# Unary functions on GradientNumbers # -#------------------------------------# -for fsym in auto_defined_unary_funcs - a = :a - new_a = :($(fsym)($a)) - deriv = Calculus.differentiate(new_a, a) - - @eval begin - @inline function $(fsym)(g::GradientNumber) - a = value(g) - return gradnum_from_deriv(g, $new_a, $deriv) - end - end - - # extend corresponding NaNMath methods - if fsym in (:sin, :cos, :tan, - :asin, :acos, :acosh, - :atanh, :log, :log2, - :log10, :lgamma, :log1p) - - nan_fsym = Expr(:.,:NaNMath,Base.Meta.quot(fsym)) - nan_new_a = :($(nan_fsym)($a)) - nan_deriv = to_nanmath(deriv) - - @eval begin - @inline function $(nan_fsym)(g::GradientNumber) - a = value(g) - return gradnum_from_deriv(g, $nan_new_a, $nan_deriv) - end - end - end -end - -################# -# Special Cases # -################# - -# Manually Optimized Functions # -#------------------------------# -@inline function exp(g::GradientNumber) - exp_a = exp(value(g)) - return gradnum_from_deriv(g, exp_a, exp_a) -end - -@inline function sqrt(g::GradientNumber) - sqrt_a = sqrt(value(g)) - deriv = 0.5 / sqrt_a - return gradnum_from_deriv(g, sqrt_a, deriv) -end - -# Other Functions # -#-----------------# -@inline calc_atan2(y::GradientNumber, x::GradientNumber) = atan2(value(y), value(x)) -@inline calc_atan2(y::Real, x::GradientNumber) = atan2(y, value(x)) -@inline calc_atan2(y::GradientNumber, x::Real) = atan2(value(y), x) - -for Y in (:Real, :GradientNumber), X in (:Real, :GradientNumber) - if !(Y == :Real && X == :Real) - @eval begin - @inline function atan2(y::$Y, x::$X) - z = y/x - a = value(z) - atan2_a = calc_atan2(y, x) - deriv = inv(one(a) + a*a) - return gradnum_from_deriv(z, atan2_a, deriv) - end - end - end -end diff --git a/src/HessianNumber.jl b/src/HessianNumber.jl deleted file mode 100644 index 38b3d225..00000000 --- a/src/HessianNumber.jl +++ /dev/null @@ -1,277 +0,0 @@ -################ -# Constructors # -################ -function HessianNumber{N,T,C}(gradnum::GradientNumber{N,T,C}, - hess::Vector=zeros(T, halfhesslen(N))) - return HessianNumber{N,T,C}(gradnum, hess) -end - -HessianNumber(value::Real) = HessianNumber(GradientNumber(value)) - -############################## -# Utility/Accessor Functions # -############################## -zero{N,T,C}(::Type{HessianNumber{N,T,C}}) = HessianNumber(zero(GradientNumber{N,T,C})) -one{N,T,C}(::Type{HessianNumber{N,T,C}}) = HessianNumber(one(GradientNumber{N,T,C})) -rand{N,T,C}(::Type{HessianNumber{N,T,C}}) = HessianNumber(rand(GradientNumber{N,T,C}), rand(T, halfhesslen(N))) - -@inline gradnum(h::HessianNumber) = h.gradnum - -@inline value(h::HessianNumber) = value(gradnum(h)) -@inline grad(h::HessianNumber) = grad(gradnum(h)) -@inline hess(h::HessianNumber) = h.hess -@inline tens(h::HessianNumber) = error("HessianNumbers do not store tensor values") - -@inline npartials{N,T,C}(::Type{HessianNumber{N,T,C}}) = N -@inline eltype{N,T,C}(::Type{HessianNumber{N,T,C}}) = T -@inline containtype{N,T,C}(::Type{HessianNumber{N,T,C}}) = C - -##################### -# Generic Functions # -##################### -function isconstant(h::HessianNumber) - zeroT = zero(eltype(h)) - return isconstant(gradnum(h)) && all(x -> x == zeroT, hess(h)) -end - -=={N}(a::HessianNumber{N}, b::HessianNumber{N}) = (gradnum(a) == gradnum(b)) && (hess(a) == hess(b)) - -isequal{N}(a::HessianNumber{N}, b::HessianNumber{N}) = isequal(gradnum(a), gradnum(b)) && isequal(hess(a),hess(b)) - -hash(h::HessianNumber) = isconstant(h) ? hash(value(h)) : hash(gradnum(h), hash(hess(h))) -hash(h::HessianNumber, hsh::UInt64) = hash(hash(h), hsh) - -function read{N,T,C}(io::IO, ::Type{HessianNumber{N,T,C}}) - gradnum = read(io, GradientNumber{N,T,C}) - hess = [read(io, T) for i in 1:halfhesslen(N)] - return HessianNumber{N,T,C}(gradnum, hess) -end - -function write(io::IO, h::HessianNumber) - write(io, gradnum(h)) - for du in hess(h) - write(io, du) - end -end - -############## -# Conversion # -############## -convert{N,T,C}(::Type{HessianNumber{N,T,C}}, x::ExternalReal) = HessianNumber(GradientNumber{N,T,C}(x)) -convert{N,T,C}(::Type{HessianNumber{N,T,C}}, h::HessianNumber{N}) = HessianNumber(GradientNumber{N,T,C}(gradnum(h)), hess(h)) -convert{N,T,C}(::Type{HessianNumber{N,T,C}}, h::HessianNumber{N,T,C}) = h -convert(::Type{HessianNumber}, h::HessianNumber) = h - -#################################### -# Math Functions on HessianNumbers # -#################################### -function loadhess_deriv!{N}(h::HessianNumber{N}, deriv1, deriv2, output) - q = 1 - for i in 1:N - for j in 1:i - output[q] = deriv1*hess(h, q) + deriv2*grad(h, i)*grad(h, j) - q += 1 - end - end - return output -end - -@inline function hessnum_from_deriv{N}(h::HessianNumber{N}, new_a, deriv1, deriv2) - new_g = gradnum_from_deriv(gradnum(h), new_a, deriv1) - new_hessvec = Array(eltype(new_g), halfhesslen(N)) - loadhess_deriv!(h, deriv1, deriv2, new_hessvec) - return HessianNumber(new_g, new_hessvec) -end - -# Addition/Subtraction # -#----------------------# -+{N}(h1::HessianNumber{N}, h2::HessianNumber{N}) = HessianNumber(gradnum(h1) + gradnum(h2), hess(h1) + hess(h2)) --{N}(h1::HessianNumber{N}, h2::HessianNumber{N}) = HessianNumber(gradnum(h1) - gradnum(h2), hess(h1) - hess(h2)) - --(h::HessianNumber) = HessianNumber(-gradnum(h), -hess(h)) - -# Multiplication # -#----------------# -for T in (:Bool, :Real) - @eval begin - *(h::HessianNumber, x::$(T)) = HessianNumber(gradnum(h) * x, hess(h) * x) - *(x::$(T), h::HessianNumber) = HessianNumber(x * gradnum(h), x * hess(h)) - end -end - -function *{N}(h1::HessianNumber{N}, h2::HessianNumber{N}) - mul_g = gradnum(h1)*gradnum(h2) - hessvec = Array(eltype(mul_g), halfhesslen(N)) - - a1, a2 = value(h1), value(h2) - q = 1 - for i in 1:N - for j in 1:i - hessvec[q] = (hess(h1,q)*a2 - + grad(h1,i)*grad(h2,j) - + grad(h1,j)*grad(h2,i) - + a1*hess(h2,q)) - q += 1 - end - end - - return HessianNumber(mul_g, hessvec) -end - -# Division # -#----------# -/(h::HessianNumber, x::Real) = HessianNumber(gradnum(h) / x, hess(h) / x) - -function /(x::Real, h::HessianNumber) - a = value(h) - - div_a = x / a - div_a_sq = div_a / a - div_a_cb = div_a_sq / a - - deriv1 = -div_a_sq - deriv2 = div_a_cb + div_a_cb - - return hessnum_from_deriv(h, div_a, deriv1, deriv2) -end - -function /{N}(h1::HessianNumber{N}, h2::HessianNumber{N}) - div_g = gradnum(h1)/gradnum(h2) - hessvec = Array(eltype(div_g), halfhesslen(N)) - - a1, a2 = value(h1), value(h2) - two_a1 = a1 + a1 - a2_sq = a2 * a2 - inv_a2_cb = inv(a2_sq * a2) - q = 1 - for i in 1:N - for j in 1:i - h2_bi, h2_bj = grad(h2,i), grad(h2,j) - term1 = two_a1*h2_bj*h2_bi + a2_sq*hess(h1,q) - term2 = grad(h1,i)*h2_bj + grad(h1,j)*h2_bi + a1*hess(h2,q) - hessvec[q] = (term1 - a2*term2) * inv_a2_cb - q += 1 - end - end - - return HessianNumber(div_g, hessvec) -end - -# Exponentiation # -#----------------# -^(::Base.Irrational{:e}, h::HessianNumber) = exp(h) - -for T in (:Rational, :Integer, :Real) - @eval begin - function ^(h::HessianNumber, x::$(T)) - a = value(h) - x_min_one = x - 1 - exp_a = a^x - deriv1 = x * a^x_min_one - deriv2 = x * x_min_one * a^(x - 2) - return hessnum_from_deriv(h, exp_a, deriv1, deriv2) - end - - function ^(x::$(T), h::HessianNumber) - log_x = log(x) - exp_x = x^value(h) - deriv1 = exp_x * log_x - deriv2 = deriv1 * log_x - return hessnum_from_deriv(h, exp_x, deriv1, deriv2) - end - end -end - -function ^{N}(h1::HessianNumber{N}, h2::HessianNumber{N}) - exp_g = gradnum(h1)^gradnum(h2) - hessvec = Array(eltype(exp_g), halfhesslen(N)) - - a1, a2 = value(h1), value(h2) - a1_exp_a2 = a1^(a2 - 2) - a2_sq = a2 * a2 - log_a1, log_a2 = log(a1), log(a2) - a1_x_loga1 = a1 * log_a1 - a1_x_loga1_x_loga1 = a1_x_loga1 * log_a1 - q = 1 - for i in 1:N - for j in 1:i - h1_bi, h1_bj = grad(h1, i), grad(h1, j) - h2_bi, h2_bj = grad(h2, i), grad(h2, j) - h1_q, h2_q = hess(h1, q), hess(h2, q) - term1 = (h1_bj*(a1_x_loga1*h2_bi - h1_bi) - + a1*(log_a1*h1_bi*h2_bj + h1_q)) - term2 = (h1_bj*h2_bi - + a1_x_loga1*h2_q - + h2_bj*(h1_bi + a1_x_loga1_x_loga1*h2_bi)) - term3 = a2_sq*h1_bi*h1_bj + a2*term1 + a1*term2 - hessvec[q] = a1_exp_a2*term3 - q += 1 - end - end - - return HessianNumber(exp_g, hessvec) -end - -# Unary functions on HessianNumbers # -#-----------------------------------# -# the second derivatives of functions in -# unsupported_unary_hess_funcs involve -# differentiating elementary functions -# that are unsupported by Calculus.jl -const unsupported_unary_hess_funcs = [:asec, :acsc, :asecd, :acscd, :acsch, :trigamma] -const auto_defined_unary_hess_funcs = filter!(sym -> !in(sym, unsupported_unary_hess_funcs), auto_defined_unary_funcs) - -for fsym in auto_defined_unary_hess_funcs - a = :a - new_a = :($(fsym)($a)) - deriv1 = Calculus.differentiate(new_a, a) - deriv2 = Calculus.differentiate(deriv1, a) - - @eval function $(fsym){N}(h::HessianNumber{N}) - a = value(h) - new_a = $new_a - deriv1 = $deriv1 - deriv2 = $deriv2 - return hessnum_from_deriv(h, new_a, deriv1, deriv2) - end -end - -################# -# Special Cases # -################# - -# Manually Optimized Functions # -#------------------------------# -@inline function exp{N}(h::HessianNumber{N}) - exp_a = exp(value(h)) - return hessnum_from_deriv(h, exp_a, exp_a, exp_a) -end - -@inline function sqrt{N}(h::HessianNumber{N}) - a = value(h) - sqrt_a = sqrt(a) - deriv1 = 0.5 / sqrt_a - deriv2 = -0.25 / (a * sqrt_a) - return hessnum_from_deriv(h, sqrt_a, deriv1, deriv2) -end - -# Other Functions # -#-----------------# -@inline calc_atan2(y::HessianNumber, x::HessianNumber) = calc_atan2(gradnum(y), gradnum(x)) -@inline calc_atan2(y::Real, x::HessianNumber) = calc_atan2(y, gradnum(x)) -@inline calc_atan2(y::HessianNumber, x::Real) = calc_atan2(gradnum(y), x) - -for Y in (:Real, :HessianNumber), X in (:Real, :HessianNumber) - if !(Y == :Real && X == :Real) - @eval begin - function atan2(y::$Y, x::$X) - z = y/x - a = value(z) - atan2_a = calc_atan2(y, x) - deriv1 = inv(one(a) + a*a) - deriv2 = (a + a) * -abs2(deriv1) - return hessnum_from_deriv(z, atan2_a, deriv1, deriv2) - end - end - end -end diff --git a/src/Partials.jl b/src/Partials.jl index 4798bdbc..e6efed04 100644 --- a/src/Partials.jl +++ b/src/Partials.jl @@ -1,152 +1,81 @@ -immutable Partials{T,C} - data::C - Partials{N}(data::NTuple{N,T}) = new(data) - Partials(data::Vector{T}) = new(data) +immutable Partials{N,T} + values::NTuple{N,T} end -typealias PartialsTup{N,T} Partials{T,NTuple{N,T}} -typealias PartialsVec{T} Partials{T,Vector{T}} - -Partials(data) = Partials{eltype(data),typeof(data)}(data) - ############################## # Utility/Accessor Functions # ############################## -@inline data(partials::Partials) = partials.data - -@inline eltype{T,C}(::Type{Partials{T,C}}) = T -@inline eltype{T}(::Partials{T}) = T -@inline containtype{T,C}(::Type{Partials{T,C}}) = C -@inline containtype{T,C}(::Partials{T,C}) = C +@inline numtype(x) = eltype(x) +@inline numtype{N,T}(::Partials{N,T}) = T +@inline numtype{N,T}(::Type{Partials{N,T}}) = T -@inline getindex(partials::Partials, i) = data(partials)[i] +@inline npartials{N}(::Partials{N}) = N +@inline npartials{N,T}(::Type{Partials{N,T}}) = N -@inline length(partials::Partials) = length(data(partials)) +@inline Base.length{N}(::Partials{N}) = N -start(partials) = start(data(partials)) -next(partials, i) = next(data(partials), i) -done(partials, i) = done(data(partials), i) +@inline Base.getindex(partials::Partials, i) = partials.values[i] +setindex{N,T}(partials::Partials{N,T}, v, i) = Partials{N,T}((partials[1:i-1]..., v, partials[i+1:N]...)) -################ -# Constructors # -################ -@inline zero_partials{C<:Tuple}(::Type{C}, n::Int) = Partials(zero_tuple(C)) -zero_partials{T}(::Type{Vector{T}}, n) = Partials(zeros(T, n)) - -@inline rand_partials{C<:Tuple}(::Type{C}, n::Int) = Partials(rand_tuple(C)) -rand_partials{T}(::Type{Vector{T}}, n::Int) = Partials(rand(T, n)) +Base.start(partials::Partials) = start(partials.values) +Base.next(partials::Partials, i) = next(partials.values, i) +Base.done(partials::Partials, i) = done(partials.values, i) ##################### # Generic Functions # ##################### -function iszero{T}(partials::Partials{T}) - p = data(partials) - return isempty(p) || (z = zero(T); all(x -> x == z, p)) -end -==(a::Partials, b::Partials) = data(a) == data(b) -isequal(a::Partials, b::Partials) = isequal(data(a), data(b)) +@inline iszero(partials::Partials) = iszero_tuple(partials.values) -hash(partials::Partials) = hash(data(partials)) -hash(partials::Partials, hsh::UInt64) = hash(hash(partials), hsh) +@inline Base.zero(partials::Partials) = zero(typeof(partials)) +@inline Base.zero{N,T}(::Type{Partials{N,T}}) = Partials(zero_tuple(NTuple{N,T})) -@inline copy(partials::Partials) = partials +@inline Base.rand(partials::Partials) = rand(typeof(partials)) +@inline Base.rand{N,T}(::Type{Partials{N,T}}) = Partials(rand_tuple(NTuple{N,T})) +@inline Base.rand(rng::AbstractRNG, partials::Partials) = rand(rng, typeof(partials)) +@inline Base.rand{N,T}(rng::AbstractRNG, ::Type{Partials{N,T}}) = Partials(rand_tuple(rng, NTuple{N,T})) -function read{N,T}(io::IO, ::Type{PartialsTup{N,T}}, n::Int) - return Partials(ntuple(i->read(io, T), Val{N})) -end +Base.isequal{N}(a::Partials{N}, b::Partials{N}) = isequal(a.values, b.values) +Base.(:(==)){N}(a::Partials{N}, b::Partials{N}) = a.values == b.values -function read{T}(io::IO, ::Type{PartialsVec{T}}, n::Int) - return Partials([read(io, T) for i in 1:n]) -end +const PARTIALS_HASH = hash(Partials) + +Base.hash(partials::Partials) = hash(partials.values, PARTIALS_HASH) +Base.hash(partials::Partials, hsh::UInt64) = hash(hash(partials), hsh) + +@inline Base.copy(partials::Partials) = partials -function write(io::IO, partials::Partials) - for partial in data(partials) - write(io, partial) +Base.read{N,T}(io::IO, ::Type{Partials{N,T}}) = Partials(ntuple(i->read(io, T), Val{N})) + +function Base.write(io::IO, partials::Partials) + for p in partials + write(io, p) end end ######################## # Conversion/Promotion # ######################## -convert{N,A,B}(::Type{PartialsTup{N,A}}, data::NTuple{N,B}) = PartialsTup{N,A}(NTuple{N,A}(data)) -convert{N,A,B}(::Type{PartialsTup{N,A}}, data::Vector{B}) = PartialsTup{N,A}(NTuple{N,A}(data...)) -convert{N,A,B}(::Type{PartialsVec{A}}, data::NTuple{N,B}) = PartialsVec{A}(Vector{A}(collect(data))) -convert{A,B}(::Type{PartialsVec{A}}, data::Vector{B}) = PartialsVec{A}(Vector{A}(data)) -convert{T}(::Type{PartialsVec{T}}, data::Vector{T}) = PartialsVec{T}(data) -convert{N,T}(::Type{PartialsTup{N,T}}, data::NTuple{N,T}) = PartialsTup{N,T}(data) - -convert{T,C}(::Type{Partials{T,C}}, partials::Partials) = Partials{T,C}(data(partials)) -convert{T,C}(::Type{Partials{T,C}}, partials::Partials{T,C}) = partials -convert(::Type{Partials}, partials::Partials) = partials - -promote_rule{A,B}(::Type{PartialsVec{A}}, ::Type{PartialsVec{B}}) = PartialsVec{promote_type(A, B)} -promote_rule{N,A,B}(::Type{PartialsTup{N,A}}, ::Type{PartialsVec{B}}) = PartialsVec{promote_type(A, B)} -promote_rule{N,A,B}(::Type{PartialsVec{A}}, ::Type{PartialsTup{N,B}}) = PartialsVec{promote_type(A, B)} -promote_rule{N,A,B}(::Type{PartialsTup{N,A}}, ::Type{PartialsTup{N,B}}) = PartialsTup{N,promote_type(A, B)} - -################## -# Math Functions # -################## - -# Addition/Subtraction # -#----------------------# -@inline function +{N,A,B}(a::PartialsTup{N,A}, b::PartialsTup{N,B}) - return Partials(add_tuples(data(a), data(b))) -end - -function +{A,B}(a::PartialsVec{A}, b::PartialsVec{B}) - return Partials(data(a) + data(b)) -end - -@inline function -{N,A,B}(a::PartialsTup{N,A}, b::PartialsTup{N,B}) - return Partials(subtract_tuples(data(a), data(b))) -end - -function -{A,B}(a::PartialsVec{A}, b::PartialsVec{B}) - return Partials(data(a) - data(b)) -end - -@inline -{N,T}(partials::PartialsTup{N,T}) = Partials(minus_tuple(data(partials))) --{T}(partials::PartialsVec{T}) = Partials(-data(partials)) -# Multiplication # -#----------------# -@inline function *{N,T}(partials::PartialsTup{N,T}, x::Number) - return Partials(scale_tuple(data(partials), x)) -end - -function *{T}(partials::PartialsVec{T}, x::Number) - return Partials(data(partials)*x) -end - -@inline *(x::Number, partials::Partials) = partials*x - -function _load_mul_partials!(result::Vector, a, b, afactor, bfactor) - @simd for i in eachindex(result) - @inbounds result[i] = (afactor * a[i]) + (bfactor * b[i]) - end - return result -end +Base.promote_rule{N,A,B}(::Type{Partials{N,A}}, ::Type{Partials{N,B}}) = Partials{N,promote_type(A, B)} -function _mul_partials{A,B,C,D}(a::PartialsVec{A}, b::PartialsVec{B}, afactor::C, bfactor::D) - T = promote_type(A, B, C, D) - return Partials(_load_mul_partials!(Vector{T}(length(a)), a, b, afactor, bfactor)) -end +Base.convert{N,T}(::Type{Partials{N,T}}, partials::Partials) = Partials{N,T}(partials.values) +Base.convert{N,T}(::Type{Partials{N,T}}, partials::Partials{N,T}) = partials -@inline function _mul_partials{N,A,B}(a::PartialsTup{N,A}, b::PartialsTup{N,B}, afactor, bfactor) - return Partials(mul_tuples(data(a), data(b), afactor, bfactor)) -end +######################## +# Arithmetic Functions # +######################## -# Division # -#----------# -@inline function /{N,T}(partials::PartialsTup{N,T}, x::Number) - return Partials(div_tuple_by_scalar(data(partials), x)) -end +@inline Base.(:+){N}(a::Partials{N}, b::Partials{N}) = Partials(add_tuples(a.values, b.values)) +@inline Base.(:-){N}(a::Partials{N}, b::Partials{N}) = Partials(sub_tuples(a.values, b.values)) +@inline Base.(:-)(partials::Partials) = Partials(minus_tuple(partials.values)) +@inline Base.(:*)(partials::Partials, x::Real) = Partials(scale_tuple(partials.values, x)) +@inline Base.(:*)(x::Real, partials::Partials) = partials*x +@inline Base.(:/)(partials::Partials, x::Real) = Partials(div_tuple_by_scalar(partials.values, x)) -function /{T}(partials::PartialsVec{T}, x::Number) - return Partials(data(partials) / x) +@inline function _mul_partials{N}(a::Partials{N}, b::Partials{N}, afactor, bfactor) + return Partials(mul_tuples(a.values, b.values, afactor, bfactor)) end @inline function _div_partials(a::Partials, b::Partials, aval, bval) @@ -164,25 +93,39 @@ end # faster since they generate inline code # that doesn't rely on closures. -function tupexpr(f,N) +function tupexpr(f, N) ex = Expr(:tuple, [f(i) for i=1:N]...) return quote - @inbounds return $ex + @inbounds return $(ex) end end -@inline zero_tuple(::Type{Tuple{}}) = tuple() +@inline iszero_tuple(::Tuple{}) = true -@generated function zero_tuple{N,T}(::Type{NTuple{N,T}}) - result = tupexpr(i -> :z, N) +@generated function iszero_tuple{N,T}(tup::NTuple{N,T}) + ex = Expr(:&&, [:(z == tup[$i]) for i=1:N]...) return quote z = zero($T) - return $result + @inbounds return $(ex) end end +@inline zero_tuple(::Type{Tuple{}}) = tuple() + +@generated function zero_tuple{N,T}(::Type{NTuple{N,T}}) + z = zero(T) + result = ntuple(n -> z, Val{N}) + return :($result) +end + +@inline rand_tuple(::AbstractRNG, ::Type{Tuple{}}) = tuple() + @inline rand_tuple(::Type{Tuple{}}) = tuple() +@generated function rand_tuple{N,T}(rng::AbstractRNG, ::Type{NTuple{N,T}}) + return tupexpr(i -> :(rand(rng, $T)), N) +end + @generated function rand_tuple{N,T}(::Type{NTuple{N,T}}) return tupexpr(i -> :(rand($T)), N) end @@ -195,16 +138,16 @@ end return tupexpr(i -> :(tup[$i]/x), N) end -@generated function minus_tuple{N}(tup::NTuple{N}) - return tupexpr(i -> :(-tup[$i]), N) +@generated function add_tuples{N}(a::NTuple{N}, b::NTuple{N}) + return tupexpr(i -> :(a[$i]+b[$i]), N) end -@generated function subtract_tuples{N}(a::NTuple{N}, b::NTuple{N}) +@generated function sub_tuples{N}(a::NTuple{N}, b::NTuple{N}) return tupexpr(i -> :(a[$i]-b[$i]), N) end -@generated function add_tuples{N}(a::NTuple{N}, b::NTuple{N}) - return tupexpr(i -> :(a[$i]+b[$i]), N) +@generated function minus_tuple{N}(tup::NTuple{N}) + return tupexpr(i -> :(-tup[$i]), N) end @generated function mul_tuples{N}(a::NTuple{N}, b::NTuple{N}, afactor, bfactor) diff --git a/src/TensorNumber.jl b/src/TensorNumber.jl deleted file mode 100644 index a1c12fc3..00000000 --- a/src/TensorNumber.jl +++ /dev/null @@ -1,343 +0,0 @@ -################ -# Constructors # -################ -function TensorNumber{N,T,C}(hessnum::HessianNumber{N,T,C}, - tens::Vector=zeros(T, halftenslen(N))) - return TensorNumber{N,T,C}(hessnum, tens) -end - -TensorNumber(value::Real) = TensorNumber(HessianNumber(value)) - -############################## -# Utility/Accessor Functions # -############################## -zero{N,T,C}(::Type{TensorNumber{N,T,C}}) = TensorNumber(zero(HessianNumber{N,T,C})) -one{N,T,C}(::Type{TensorNumber{N,T,C}}) = TensorNumber(one(HessianNumber{N,T,C})) -rand{N,T,C}(::Type{TensorNumber{N,T,C}}) = TensorNumber(rand(HessianNumber{N,T,C}), rand(T, halftenslen(N))) - -@inline hessnum(t::TensorNumber) = t.hessnum -@inline gradnum(t::TensorNumber) = gradnum(hessnum(t)) - -@inline value(t::TensorNumber) = value(hessnum(t)) -@inline grad(t::TensorNumber) = grad(hessnum(t)) -@inline hess(t::TensorNumber) = hess(hessnum(t)) -@inline tens(t::TensorNumber) = t.tens - -@inline npartials{N,T,C}(::Type{TensorNumber{N,T,C}}) = N -@inline eltype{N,T,C}(::Type{TensorNumber{N,T,C}}) = T -@inline containtype{N,T,C}(::Type{TensorNumber{N,T,C}}) = C - -##################### -# Generic Functions # -##################### -function isconstant(t::TensorNumber) - zeroT = zero(eltype(t)) - return isconstant(hessnum(t)) && all(x -> x == zeroT, tens(t)) -end - -=={N}(a::TensorNumber{N}, b::TensorNumber{N}) = (hessnum(a) == hessnum(b)) && (tens(a) == tens(b)) - -isequal{N}(a::TensorNumber{N}, b::TensorNumber{N}) = isequal(hessnum(a), hessnum(b)) && isequal(tens(a), tens(b)) - -hash(t::TensorNumber) = isconstant(t) ? hash(value(t)) : hash(hessnum(t), hash(tens(t))) -hash(t::TensorNumber, hsh::UInt64) = hash(hash(t), hsh) - -function read{N,T,C}(io::IO, ::Type{TensorNumber{N,T,C}}) - hessnum = read(io, HessianNumber{N,T,C}) - tens = [read(io, T) for i in 1:halftenslen(N)] - return TensorNumber{N,T,C}(hessnum, tens) -end - -function write(io::IO, t::TensorNumber) - write(io, hessnum(t)) - for du in tens(t) - write(io, du) - end -end - -######################## -# Conversion/Promotion # -######################## -convert{N,T,C}(::Type{TensorNumber{N,T,C}}, x::ExternalReal) = TensorNumber(HessianNumber{N,T,C}(x)) -convert{N,T,C}(::Type{TensorNumber{N,T,C}}, t::TensorNumber{N}) = TensorNumber(HessianNumber{N,T,C}(hessnum(t)), tens(t)) -convert{N,T,C}(::Type{TensorNumber{N,T,C}}, t::TensorNumber{N,T,C}) = t -convert(::Type{TensorNumber}, t::TensorNumber) = t - -######################### -# Math on TensorNumbers # -######################### -function hess_inds(i, j) - x, y = ifelse(i < j, (j, i), (i, j)) - return div(x*(x-1), 2) + y -end - -function loadtens_deriv!{N}(t::TensorNumber{N}, deriv1, deriv2, deriv3, output) - p = 1 - for i in 1:N - for j in i:N - for k in i:j - qij, qik, qjk = hess_inds(i,j), hess_inds(i,k), hess_inds(j,k) - bi, bj, bk = grad(t,i), grad(t,j), grad(t,k) - output[p] = deriv1*tens(t,p) + deriv2*(bk*hess(t,qij) + bj*hess(t,qik) + bi*hess(t,qjk)) + deriv3*bi*bj*bk - p += 1 - end - end - end - return output -end - -@inline function tensnum_from_deriv{N}(t::TensorNumber{N}, new_a, deriv1, deriv2, deriv3) - new_h = hessnum_from_deriv(hessnum(t), new_a, deriv1, deriv2) - new_tensvec = Array(eltype(new_h), halftenslen(N)) - loadtens_deriv!(t, deriv1, deriv2, deriv3, new_tensvec) - return TensorNumber(new_h, new_tensvec) -end - -# Addition/Subtraction # -#----------------------# -+{N}(t1::TensorNumber{N}, t2::TensorNumber{N}) = TensorNumber(hessnum(t1) + hessnum(t2), tens(t1) + tens(t2)) --{N}(t1::TensorNumber{N}, t2::TensorNumber{N}) = TensorNumber(hessnum(t1) - hessnum(t2), tens(t1) - tens(t2)) --(t::TensorNumber) = TensorNumber(-hessnum(t), -tens(t)) - -# Multiplication # -#----------------# -for T in (:Bool, :Real) - @eval begin - *(t::TensorNumber, x::$(T)) = TensorNumber(hessnum(t) * x, tens(t) * x) - *(x::$(T), t::TensorNumber) = TensorNumber(x * hessnum(t), x * tens(t)) - end -end - -function *{N}(t1::TensorNumber{N}, t2::TensorNumber{N}) - mul_h = hessnum(t1)*hessnum(t2) - tensvec = Array(eltype(mul_h), halftenslen(N)) - - a1, a2 = value(t1), value(t2) - p = 1 - for i in 1:N - for j in i:N - for k in i:j - qij, qik, qjk = hess_inds(i,j), hess_inds(i,k), hess_inds(j,k) - tensvec[p] = (tens(t1,p)*a2 + - hess(t1,qjk)*grad(t2,i) + - hess(t1,qik)*grad(t2,j) + - hess(t1,qij)*grad(t2,k) + - grad(t1,k)*hess(t2,qij) + - grad(t1,j)*hess(t2,qik) + - grad(t1,i)*hess(t2,qjk) + - a1*tens(t2,p)) - p += 1 - end - end - end - - return TensorNumber(mul_h, tensvec) -end - -# Division # -#----------# -/(t::TensorNumber, x::Real) = TensorNumber(hessnum(t) / x, tens(t) / x) - -function /(x::Real, t::TensorNumber) - a = value(t) - div_a = x / a - div_a_sq = div_a / a - div_a_cb = div_a_sq / a - - deriv1 = -div_a_sq - deriv2 = div_a_cb + div_a_cb - deriv3 = -(deriv2 + deriv2 + deriv2)/a - - return tensnum_from_deriv(t, div_a, deriv1, deriv2, deriv3) -end - -function /{N}(t1::TensorNumber{N}, t2::TensorNumber{N}) - div_h = hessnum(t1)/hessnum(t2) - tensvec = Array(eltype(div_h), halftenslen(N)) - - a1, a2 = value(t1), value(t2) - inv_a2 = inv(a2) - abs2_inv_a2 = abs2(inv_a2) - - coeff0 = inv_a2 - coeff1 = -abs2_inv_a2 - coeff2 = 2*abs2_inv_a2*inv_a2 - coeff3 = -2*abs2_inv_a2*(2*inv_a2*inv_a2 + abs2_inv_a2) - - p = 1 - for i in 1:N - for j in i:N - for k in i:j - qij, qik, qjk = hess_inds(i,j), hess_inds(i,k), hess_inds(j,k) - t1_bi, t1_bj, t1_bk = grad(t1,i), grad(t1,j), grad(t1,k) - t2_bi, t2_bj, t2_bk = grad(t2,i), grad(t2,j), grad(t2,k) - t1_cqij, t1_cqik, t1_cqjk = hess(t1,qij), hess(t1,qik), hess(t1,qjk) - t2_cqij, t2_cqik, t2_cqjk = hess(t2,qij), hess(t2,qik), hess(t2,qjk) - loop_coeff1 = (tens(t2,p)*a1 + t2_cqjk*t1_bi + t2_cqik*t1_bj + t2_cqij*t1_bk - + t2_bk*t1_cqij + t2_bj*t1_cqik + t2_bi*t1_cqjk) - loop_coeff2 = (t2_bk*t2_cqij*a1 + t2_bj*t2_cqik*a1 + t2_bi*t2_cqjk*a1 - + t2_bj*t2_bk*t1_bi + t2_bi*t2_bk*t1_bj + t2_bi*t2_bj*t1_bk) - loop_coeff3 = (t2_bi*t2_bj*t2_bk*a1) - tensvec[p] = coeff0*tens(t1,p) + coeff1*loop_coeff1 + coeff2*loop_coeff2 + coeff3*loop_coeff3 - p += 1 - end - end - end - - return TensorNumber(div_h, tensvec) -end - -# Exponentiation # -#----------------# -^(::Base.Irrational{:e}, t::TensorNumber) = exp(t) - -for T in (:Rational, :Integer, :Real) - @eval begin - function ^(t::TensorNumber, x::$(T)) - a = value(t) - x_min_one = x - 1 - x_min_two = x - 2 - x_x_min_one = x * x_min_one - - exp_a = a^x - deriv1 = x * a^x_min_one - deriv2 = x_x_min_one * a^x_min_two - deriv3 = x_x_min_one * x_min_two * a^(x - 3) - - return tensnum_from_deriv(t, exp_a, deriv1, deriv2, deriv3) - end - - function ^(x::$(T), t::TensorNumber) - log_x = log(x) - - exp_x = x^value(t) - deriv1 = exp_x * log_x - deriv2 = deriv1 * log_x - deriv3 = deriv2 * log_x - - return tensnum_from_deriv(t, exp_x, deriv1, deriv2, deriv3) - end - end -end - -function ^{N}(t1::TensorNumber{N}, t2::TensorNumber{N}) - exp_h = hessnum(t1)^hessnum(t2) - tensvec = Array(eltype(exp_h), halftenslen(N)) - - a1, a2 = value(t1), value(t2) - inv_a1 = inv(a1) - abs2_inv_a1 = abs2(inv_a1) - - f_0 = log(a1) - f_1 = inv_a1 - f_2 = -abs2_inv_a1 - f_3 = 2*abs2_inv_a1*inv_a1 - - deriv = a1^a2 - - p = 1 - for i in 1:N - for j in i:N - for k in i:j - qij, qik, qjk = hess_inds(i,j), hess_inds(i,k), hess_inds(j,k) - - t1_bi, t1_bj, t1_bk = grad(t1,i), grad(t1,j), grad(t1,k) - t2_bi, t2_bj, t2_bk = grad(t2,i), grad(t2,j), grad(t2,k) - t1_cqij, t1_cqik, t1_cqjk = hess(t1,qij), hess(t1,qik), hess(t1,qjk) - t2_cqij, t2_cqik, t2_cqjk = hess(t2,qij), hess(t2,qik), hess(t2,qjk) - - d_1 = t1_bi*f_1 - d_2 = t1_bj*f_1 - d_3 = t1_bk*f_1 - d_4 = t1_cqij*f_1 + t1_bi*t1_bj*f_2 - d_5 = t1_cqik*f_1 + t1_bi*t1_bk*f_2 - d_6 = t1_cqjk*f_1 + t1_bj*t1_bk*f_2 - d_7 = (tens(t1,p)*f_1 + (t1_bk*t1_cqij + t1_bj*t1_cqik - + t1_bi*t1_cqjk)*f_2 + t1_bi*t1_bj*t1_bk*f_3) - - e_1 = t2_bi*f_0 + a2*d_1 - e_2 = t2_bj*f_0 + a2*d_2 - e_3 = t2_bk*f_0 + a2*d_3 - e_4 = t2_cqij*f_0 + t2_bj*d_1 + t2_bi*d_2 + a2*d_4 - e_5 = t2_cqik*f_0 + t2_bk*d_1 + t2_bi*d_3 + a2*d_5 - e_6 = t2_cqjk*f_0 + t2_bk*d_2 + t2_bj*d_3 + a2*d_6 - e_7 = (tens(t2,p)*f_0 + t2_cqjk*d_1 + t2_cqik*d_2 + t2_cqij*d_3 - + t2_bk*d_4 + t2_bj*d_5 + t2_bi*d_6 + a2*d_7) - - tensvec[p] = deriv*(e_7 + e_3*e_4 + e_2*e_5 + e_1*e_6 + e_1*e_2*e_3) - p += 1 - end - end - end - - return TensorNumber(exp_h, tensvec) -end - -# Unary functions on TensorNumbers # -#----------------------------------# -# the third derivatives of functions in unsupported_unary_tens_funcs -# involve differentiating elementary functions that are unsupported -# by Calculus.jl -const unsupported_unary_tens_funcs = [:digamma] -const auto_defined_unary_tens_funcs = filter!(sym -> !in(sym, unsupported_unary_tens_funcs), ForwardDiff.auto_defined_unary_hess_funcs) - -for fsym in auto_defined_unary_tens_funcs - a = :a - new_a = :($(fsym)($a)) - deriv1 = Calculus.differentiate(new_a, a) - deriv2 = Calculus.differentiate(deriv1, a) - deriv3 = Calculus.differentiate(deriv2, a) - - @eval function $(fsym){N}(t::TensorNumber{N}) - a = value(t) - new_a = $new_a - deriv1 = $deriv1 - deriv2 = $deriv2 - deriv3 = $deriv3 - return tensnum_from_deriv(t, new_a, deriv1, deriv2, deriv3) - end -end - -################# -# Special Cases # -################# - -# Manually Optimized Functions # -#------------------------------# -@inline function exp{N}(t::TensorNumber{N}) - exp_a = exp(value(t)) - return tensnum_from_deriv(t, exp_a, exp_a, exp_a, exp_a) -end - -function sqrt{N}(t::TensorNumber{N}) - a = value(t) - sqrt_a = sqrt(a) - deriv1 = 0.5 / sqrt_a - sqrt_a_cb = a * sqrt_a - deriv2 = -0.25 / sqrt_a_cb - deriv3 = 0.375 / (a * sqrt_a_cb) - return tensnum_from_deriv(t, sqrt_a, deriv1, deriv2, deriv3) -end - -# Other Functions # -#-----------------# -@inline calc_atan2(y::TensorNumber, x::TensorNumber) = calc_atan2(hessnum(y), hessnum(x)) -@inline calc_atan2(y::Real, x::TensorNumber) = calc_atan2(y, hessnum(x)) -@inline calc_atan2(y::TensorNumber, x::Real) = calc_atan2(hessnum(y), x) - -for Y in (:Real, :TensorNumber), X in (:Real, :TensorNumber) - if !(Y == :Real && X == :Real) - @eval begin - function atan2(y::$Y, x::$X) - z = y/x - a = value(z) - atan2_a = calc_atan2(y, x) - deriv1 = inv(one(a) + a*a) - abs2_deriv1 = -2 * abs2(deriv1) - deriv2 = a * abs2_deriv1 - deriv3 = abs2_deriv1 - (4 * a * deriv1 * deriv2) - return tensnum_from_deriv(z, atan2_a, deriv1, deriv2, deriv3) - end - end - end -end diff --git a/src/api.jl b/src/api.jl new file mode 100644 index 00000000..0252f0e1 --- /dev/null +++ b/src/api.jl @@ -0,0 +1,64 @@ +################# +# API Utilities # +################# + +const KWARG_DEFAULTS = (:all => false, :chunk => nothing, :multithread => false, + :input_length => nothing, :input_mutates => false, + :output_length => nothing, :output_mutates => false) + +iskwarg(ex) = isa(ex, Expr) && (ex.head == :kw || ex.head == :(=)) + +function separate_kwargs(args) + # if called as `f(args...; kwargs...)`, i.e. with a semicolon + if isa(first(args), Expr) && first(args).head == :parameters + kwargs = first(args).args + args = args[2:end] + else # if called as `f(args..., kwargs...)`, i.e. with a comma + i = findfirst(iskwarg, args) + if i == 0 + kwargs = tuple() + else + kwargs = args[i:end] + args = args[1:i-1] + end + end + return args, kwargs +end + +function arrange_kwargs(kwargs, defaults, order) + badargs = setdiff(map(kw -> kw.args[1], kwargs), order) + @assert isempty(badargs) "unrecognized keyword arguments: $(badargs)" + return [:(Val{$(getkw(kwargs, kwsym, defaults))}) for kwsym in order] +end + +function getkw(kwargs, kwsym, defaults) + for kwexpr in kwargs + if kwexpr.args[1] == kwsym + return kwexpr.args[2] + end + end + return default_value(defaults, kwsym) +end + +function default_value(defaults, kwsym) + for kwpair in defaults + if kwpair.first == kwsym + return kwpair.second + end + end + throw(KeyError(kwsym)) +end + +const AUTO_CHUNK_THRESHOLD = 11 + +function pick_chunk(input_length) + if input_length <= AUTO_CHUNK_THRESHOLD + return input_length + else + # Constrained to chunk <= AUTO_CHUNK_THRESHOLD, minimize (in order of priority): + # 1. the number of chunks that need to be computed + # 2. the number of "left over" perturbations in the final chunk + nchunks = round(Int, input_length / AUTO_CHUNK_THRESHOLD, RoundUp) + return round(Int, input_length / nchunks, RoundUp) + end +end diff --git a/src/api/ForwardDiffResult.jl b/src/api/ForwardDiffResult.jl deleted file mode 100644 index 147ffcea..00000000 --- a/src/api/ForwardDiffResult.jl +++ /dev/null @@ -1,172 +0,0 @@ -# This file contains methods to handle the results -# of ForwardDiff calculations (generally either -# ForwardDiffNumbers or Arrays of ForwardDiffNumbers. - -immutable ForwardDiffResult{T} - data::T - ForwardDiffResult(data::Real) = new(data) - ForwardDiffResult(data::Array) = new(data) -end - -ForwardDiffResult{T}(data::T) = ForwardDiffResult{T}(data) - -data(result::ForwardDiffResult) = result.data - -########## -# Values # -########## -value!(output, result::ForwardDiffResult) = get_value!(output, data(result)) -value(result::ForwardDiffResult) = get_value(data(result)) - -function _load_value!(output::Array, arr::Array) - @simd for i in eachindex(output) - @inbounds output[i] = get_value(arr[i]) - end - return output -end - -function get_value!(output::Array, arr::Array) - @assert length(arr) == length(output) - return _load_value!(output, arr) -end - -get_value{F}(arr::Array{F}) = _load_value!(similar(arr, eltype(F)), arr) -get_value(n::Real) = n -get_value(n::ForwardDiffNumber) = value(n) - -############### -# Derivatives # -############### -derivative!(output, result::ForwardDiffResult) = get_derivative!(output, data(result)) -derivative(result::ForwardDiffResult) = get_derivative(data(result)) - -function _load_derivative!(output::Array, arr::Array) - @simd for i in eachindex(output) - @inbounds output[i] = get_derivative(arr[i]) - end - return output -end - -function get_derivative!(output::Array, arr::Array) - @assert length(output) == length(arr) - return _load_derivative!(output, arr) -end - -get_derivative{F}(arr::Array{F}) = _load_derivative!(similar(arr, eltype(F)), arr) -get_derivative(n::Real) = zero(n) -get_derivative(n::ForwardDiffNumber{1}) = first(grad(n)) - -############# -# Gradients # -############# -gradient!(output, result::ForwardDiffResult) = get_gradient!(output, data(result)) -gradient(result::ForwardDiffResult) = get_gradient(data(result)) - -function _load_gradient!(output, n::ForwardDiffNumber) - @simd for i in eachindex(output) - @inbounds output[i] = grad(n, i) - end - return output -end - -function get_gradient!{N}(output, n::ForwardDiffNumber{N}) - @assert length(output) == N - return _load_gradient!(output, n) -end - -get_gradient{N,T}(n::ForwardDiffNumber{N,T,NTuple{N,T}}) = _load_gradient!(Array(T, N), n) -get_gradient{N,T}(n::ForwardDiffNumber{N,T,Vector{T}}) = grad(n) - -############# -# Jacobians # -############# -jacobian!(output, result::ForwardDiffResult) = get_jacobian!(output, data(result)) -jacobian(result::ForwardDiffResult) = get_jacobian(data(result)) - -function _load_jacobian!(output, arr::Array) - nrows, ncols = size(output) - for j in 1:ncols - @simd for i in 1:nrows - @inbounds output[i,j] = grad(arr[i], j) - end - end - return output -end - -function get_jacobian!{F}(output, arr::Array{F}) - @assert size(output, 1) == length(arr) - @assert size(output, 2) == npartials(F) - return _load_jacobian!(output, arr) -end - -function get_jacobian{F}(arr::Array{F}) - output = Array(eltype(F), length(arr), npartials(F)) - return _load_jacobian!(output, arr) -end - -############ -# Hessians # -############ -hessian!(output, result::ForwardDiffResult) = get_hessian!(output, data(result)) -hessian(result::ForwardDiffResult) = get_hessian(data(result)) - -function _load_hessian!{N}(output, n::ForwardDiffNumber{N}) - q = 1 - for i in 1:N - @simd for j in 1:i - val = hess(n, q) - @inbounds output[i, j] = val - @inbounds output[j, i] = val - q += 1 - end - end - return output -end - -function get_hessian!{N}(output, n::ForwardDiffNumber{N}) - @assert size(output) == (N, N) - return _load_hessian!(output, n) -end - -get_hessian{N,T}(n::ForwardDiffNumber{N,T}) = _load_hessian!(Array(T, N, N), n) - -############ -# Hessians # -############ -tensor!(output, result::ForwardDiffResult) = get_tensor!(output, data(result)) -tensor(result::ForwardDiffResult) = get_tensor(data(result)) - -function _load_tensor!{N}(output, n::ForwardDiffNumber{N}) - p = 1 - for i in 1:N - for j in i:N - for k in i:j - @inbounds output[j, k, i] = tens(n, p) - p += 1 - end - end - for j in 1:(i-1) - for k in 1:j - @inbounds output[j, k, i] = output[i, j, k] - end - end - for j in i:N - for k in 1:(i-1) - @inbounds output[j, k, i] = output[i, j, k] - end - end - for j in 1:N - for k in (j+1):N - @inbounds output[j, k, i] = output[k, j, i] - end - end - end - return output -end - -function get_tensor!{N}(output, n::ForwardDiffNumber{N}) - @assert size(output) == (N, N, N) - return _load_tensor!(output, n) -end - -get_tensor{N,T}(n::ForwardDiffNumber{N,T}) = _load_tensor!(Array(T, N, N, N), n) diff --git a/src/api/api.jl b/src/api/api.jl deleted file mode 100644 index 31670285..00000000 --- a/src/api/api.jl +++ /dev/null @@ -1,33 +0,0 @@ -# Note for the files in this folder: -# Following convention, methods whose names are prefixed -# with an underscore are unsafe to use outside of a strictly -# controlled context - such methods assume that all -# boundary-checking is done by the caller. - -const tuple_usage_threshold = 10 -const default_chunk_size = 0 - -abstract AllResults - -function check_chunk_size(xlen::Int, chunk_size::Int) - if chunk_size != default_chunk_size - @assert chunk_size > 0 "Invalid chunk_size: $chunk_size. chunk_size cannot be negative." - @assert xlen % chunk_size == 0 "Length of input vector is indivisible by chunk size (length(x) = $xlen, chunk size = $chunk_size)" - end -end - -function chunk_size_matches_vec_mode(xlen::Int, chunk_size::Int) - return (chunk_size == default_chunk_size) || (chunk_size == xlen) -end - -include("cache.jl") - -const dummy_cache = make_dummy_cache() - -include("ForwardDiffResult.jl") -include("derivative.jl") -include("gradient.jl") -include("jacobian.jl") -include("hessian.jl") -include("tensor.jl") -include("deprecated.jl") diff --git a/src/api/cache.jl b/src/api/cache.jl deleted file mode 100644 index 2b455173..00000000 --- a/src/api/cache.jl +++ /dev/null @@ -1,110 +0,0 @@ -############################################ -# Methods for building work vectors/tuples # -############################################ -typealias GradNumVec{N,T} GradientNumber{N,T,Vector{T}} -typealias GradNumTup{N,T} GradientNumber{N,T,NTuple{N,T}} - -build_workvec{F}(::Type{F}, xlen) = Vector{F}(xlen) - -@generated function workvec_eltype{F,T,xlen,chunk_size}(::Type{F}, ::Type{T}, - ::Type{Val{xlen}}, - ::Type{Val{chunk_size}}) - if chunk_size == default_chunk_size - C = xlen > tuple_usage_threshold ? Vector{T} : NTuple{xlen,T} - return :($F{$xlen,$T,$C}) - else - return :($F{$chunk_size,$T,NTuple{$chunk_size,$T}}) - end -end - -partials_type{N,T,C}(::Type{GradientNumber{N,T,C}}) = Partials{T,C} -partials_type{N,T,C}(::Type{HessianNumber{N,T,C}}) = partials_type(GradientNumber{N,T,C}) -partials_type{N,T,C}(::Type{TensorNumber{N,T,C}}) = partials_type(GradientNumber{N,T,C}) - -function build_partials{N,T}(::Type{GradNumVec{N,T}}) - chunk_arr = Array(Partials{T,Vector{T}}, N) - @simd for i in eachindex(chunk_arr) - @inbounds chunk_arr[i] = Partials(setindex!(zeros(T, N), one(T), i)) - end - return chunk_arr -end - -function build_partials{N,T}(::Type{GradNumTup{N,T}}) - z = zero(T) - o = one(T) - partials_chunk = Vector{Partials{T, NTuple{N,T}}}(N) - @simd for i in eachindex(partials_chunk) - @inbounds partials_chunk[i] = Partials(ntuple(x -> ifelse(x == i, o, z), Val{N})) - end - return partials_chunk -end - -build_partials{N,T,C}(::Type{HessianNumber{N,T,C}}) = build_partials(GradientNumber{N,T,C}) -build_partials{N,T,C}(::Type{TensorNumber{N,T,C}}) = build_partials(GradientNumber{N,T,C}) - -zeros_type{N,T,C}(::Type{GradientNumber{N,T,C}}) = Partials{T,C} -zeros_type{N,T,C}(::Type{HessianNumber{N,T,C}}) = Vector{T} -zeros_type{N,T,C}(::Type{TensorNumber{N,T,C}}) = Vector{T} - -build_zeros{N,T}(::Type{GradNumVec{N,T}}) = Partials(zeros(T, N)) -build_zeros{N,T}(::Type{GradNumTup{N,T}}) = Partials(zero_tuple(NTuple{N,T})) -build_zeros{N,T,C}(::Type{HessianNumber{N,T,C}}) = zeros(T, halfhesslen(N)) -build_zeros{N,T,C}(::Type{TensorNumber{N,T,C}}) = zeros(T, halftenslen(N)) - -####################### -# Cache Types/Methods # -####################### -immutable ForwardDiffCache - workvec_cache::Function - partials_cache::Function - zeros_cache::Function -end - -function ForwardDiffCache() - const workvec_dict = Dict() - const partials_dict = Dict() - const zeros_dict = Dict() - workvec_cache{F}(::Type{F}, xlen) = cache_retrieve!(workvec_dict, build_workvec, F, xlen) - partials_cache{F}(::Type{F}) = cache_retrieve!(partials_dict, build_partials, F) - zeros_cache{F}(::Type{F}) = cache_retrieve!(zeros_dict, build_zeros, F) - return ForwardDiffCache(workvec_cache, partials_cache, zeros_cache) -end - -function make_dummy_cache() - workvec_cache{F}(::Type{F}, xlen) = build_workvec(F, xlen) - partials_cache{F}(::Type{F}) = build_partials(F) - zeros_cache{F}(::Type{F}) = build_zeros(F) - return ForwardDiffCache(workvec_cache, partials_cache, zeros_cache) -end - -function cache_retrieve!(dict, build_func, args...) - if haskey(dict, args) - return dict[args] - else - item = build_func(args...) - dict[args] = item - return item - end -end - -# Retrieval methods # -#-------------------# -function get_workvec!{F1,T,xlen,chunk_size}(cache::ForwardDiffCache, - ::Type{F1}, ::Type{T}, - X::Type{Val{xlen}}, - C::Type{Val{chunk_size}}) - F2 = workvec_eltype(F1, T, X, C) - return cache.workvec_cache(F2, xlen)::Vector{F2} -end - -function get_workvec!{F}(cache::ForwardDiffCache, ::Type{F}, xlen::Int) - return cache.workvec_cache(F, xlen)::Vector{F} -end - -function get_partials!{F}(cache::ForwardDiffCache, ::Type{F}) - return cache.partials_cache(F)::Vector{partials_type(F)} -end - -function get_zeros!{F}(cache::ForwardDiffCache, ::Type{F}) - return cache.zeros_cache(F)::zeros_type(F) -end diff --git a/src/api/deprecated.jl b/src/api/deprecated.jl deleted file mode 100644 index 31a05cb7..00000000 --- a/src/api/deprecated.jl +++ /dev/null @@ -1,33 +0,0 @@ -using Base.depwarn - -Base.@deprecate forwarddiff_gradient(f::Function, T::DataType; fadtype::Symbol=:dual, args...) ForwardDiff.gradient(f, mutates=false) -Base.@deprecate forwarddiff_jacobian(f::Function, T::DataType; fadtype::Symbol=:dual, args...) ForwardDiff.jacobian(f, mutates=false) -Base.@deprecate forwarddiff_hessian(f::Function, T::DataType; fadtype::Symbol=:dual, args...) ForwardDiff.hessian(f, mutates=false) -Base.@deprecate forwarddiff_tensor(f::Function, T::DataType; fadtype::Symbol=:dual, args...) ForwardDiff.tensor(f, mutates=false) - -function depr_inplace_fad(fad_func, f) - warn("Addendum to the deprecation warning above:\n"* - "The depr_inplace_fad function is actually only meant to be used to patch over the old API for mutating functions. Instead of:\n"* - "\tdeprecated_inplace_fad($fad_func, $f)\n"* - "You should use the following:\n"* - "\t$fad_func($f, mutates=true)\n"* - "Be aware that mutating functions created with the new API take in the output array as the 1st argument rather than the 2nd.") - g! = fad_func(f, mutates=true) - fad!(x,y) = g!(y,x) # old api mutated second argument, not first - return fad! -end - -Base.@deprecate forwarddiff_gradient!(f::Function, T::DataType; fadtype::Symbol=:dual, args...) depr_inplace_fad(ForwardDiff.gradient, f) -Base.@deprecate forwarddiff_jacobian!(f::Function, T::DataType; fadtype::Symbol=:dual, args...) depr_inplace_fad(ForwardDiff.jacobian, f) -Base.@deprecate forwarddiff_hessian!(f::Function, T::DataType; fadtype::Symbol=:dual, args...) depr_inplace_fad(ForwardDiff.hessian, f) -Base.@deprecate forwarddiff_tensor!(f::Function, T::DataType; fadtype::Symbol=:dual, args...) depr_inplace_fad(ForwardDiff.tensor, f) - -export forwarddiff_gradient, - forwarddiff_gradient!, - forwarddiff_jacobian, - forwarddiff_jacobian!, - forwarddiff_hessian, - forwarddiff_hessian!, - forwarddiff_tensor, - forwarddiff_tensor!, - deprecated_inplace_fad \ No newline at end of file diff --git a/src/api/derivative.jl b/src/api/derivative.jl deleted file mode 100644 index 1ff285ce..00000000 --- a/src/api/derivative.jl +++ /dev/null @@ -1,46 +0,0 @@ -###################### -# Taking Derivatives # -###################### - -# Exposed API methods # -#---------------------# -@generated function derivative!{A}(output, f, x::Number, ::Type{A}=Void) - if A <: Void - return_stmt = :(derivative!(output, result)) - elseif A <: AllResults - return_stmt = :(derivative!(output, result), result) - else - error("invalid argument $A passed to FowardDiff.derivative") - end - - return quote - result = ForwardDiffResult(f(GradientNumber(x, one(x)))) - $return_stmt - end -end - -@generated function derivative{A}(f, x::Number, ::Type{A}=Void) - - if A <: Void - return_stmt = :(derivative(result)) - elseif A <: AllResults - return_stmt = :(derivative(result), result) - else - error("invalid argument $A passed to FowardDiff.derivative") - end - - return quote - result = ForwardDiffResult(f(GradientNumber(x, one(x)))) - $return_stmt - end -end - -function derivative{A}(f, ::Type{A}=Void; mutates=false) - if mutates - d!(output, x::Number) = ForwardDiff.derivative!(output, f, x, A) - return d! - else - d(x::Number) = ForwardDiff.derivative(f, x, A) - return d - end -end diff --git a/src/api/gradient.jl b/src/api/gradient.jl deleted file mode 100644 index 7034f7da..00000000 --- a/src/api/gradient.jl +++ /dev/null @@ -1,132 +0,0 @@ -#################### -# Taking Gradients # -#################### - -# Exposed API methods # -#---------------------# -@generated function gradient!{T,A}(output::Vector{T}, f, x::Vector, ::Type{A}=Void; - chunk_size::Int=default_chunk_size, - cache::ForwardDiffCache=dummy_cache) - if A <: Void - return_stmt = :(gradient!(output, result)::Vector{T}) - elseif A <: AllResults - return_stmt = :(gradient!(output, result)::Vector{T}, result) - else - error("invalid argument $A passed to FowardDiff.gradient") - end - - return quote - result = _calc_gradient(f, x, T, chunk_size, cache) - return $return_stmt - end -end - -@generated function gradient{T,A}(f, x::Vector{T}, ::Type{A}=Void; - chunk_size::Int=default_chunk_size, - cache::ForwardDiffCache=dummy_cache) - if A <: Void - return_stmt = :(gradient(result)::Vector{T}) - elseif A <: AllResults - return_stmt = :(gradient(result)::Vector{T}, result) - else - error("invalid argument $A passed to FowardDiff.gradient") - end - - return quote - result = _calc_gradient(f, x, T, chunk_size, cache) - return $return_stmt - end -end - -function gradient{A}(f, ::Type{A}=Void; - mutates::Bool=false, - chunk_size::Int=default_chunk_size, - cache::ForwardDiffCache=ForwardDiffCache()) - if mutates - function g!(output::Vector, x::Vector) - return ForwardDiff.gradient!(output, f, x, A; - chunk_size=chunk_size, - cache=cache) - end - return g! - else - function g(x::Vector) - return ForwardDiff.gradient(f, x, A; - chunk_size=chunk_size, - cache=cache) - end - return g - end -end - -# Calculate gradient of a given function # -#----------------------------------------# -function _calc_gradient{S}(f, x::Vector, ::Type{S}, - chunk_size::Int, - cache::ForwardDiffCache) - X = Val{length(x)} - C = Val{chunk_size} - return _calc_gradient(f, x, S, X, C, cache) -end - -@generated function _calc_gradient{T,S,xlen,chunk_size}(f, x::Vector{T}, ::Type{S}, - X::Type{Val{xlen}}, - C::Type{Val{chunk_size}}, - cache::ForwardDiffCache) - check_chunk_size(xlen, chunk_size) - G = workvec_eltype(GradientNumber, T, Val{xlen}, Val{chunk_size}) - if chunk_size_matches_vec_mode(xlen, chunk_size) - # Vector-Mode - ResultType = switch_eltype(G, S) - body = quote - @simd for i in 1:xlen - @inbounds gradvec[i] = G(x[i], partials[i]) - end - - result::$ResultType = f(gradvec) - end - else - # Chunk-Mode - ChunkType = switch_eltype(G, S) - ResultType = GradientNumber{xlen,S,Vector{S}} - body = quote - gradzeros = get_zeros!(cache, G) - output = Vector{S}(xlen) - - @simd for i in 1:xlen - @inbounds gradvec[i] = G(x[i], gradzeros) - end - - local chunk_result::$ChunkType - - for i in 1:chunk_size:xlen - offset = i-1 - - @simd for j in 1:chunk_size - q = j+offset - @inbounds gradvec[q] = G(x[q], partials[j]) - end - - chunk_result = f(gradvec) - - @simd for j in 1:chunk_size - q = j+offset - @inbounds output[q] = grad(chunk_result, j) - @inbounds gradvec[q] = G(x[q], gradzeros) - end - end - - result::$ResultType = ($ResultType)(value(chunk_result), output) - end - end - - return quote - G = $G - gradvec = get_workvec!(cache, GradientNumber, T, X, C) - partials = get_partials!(cache, G) - - $body - - return ForwardDiffResult(result) - end -end diff --git a/src/api/hessian.jl b/src/api/hessian.jl deleted file mode 100644 index 04453501..00000000 --- a/src/api/hessian.jl +++ /dev/null @@ -1,225 +0,0 @@ -################### -# Taking Hessians # -################### - -# Exposed API methods # -#---------------------# -@generated function hessian!{T,A}(output::Matrix{T}, f, x::Vector, ::Type{A}=Void; - chunk_size::Int=default_chunk_size, - cache::ForwardDiffCache=dummy_cache) - if A <: Void - return_stmt = :(hessian!(output, result)::Matrix{T}) - elseif A <: AllResults - return_stmt = :(hessian!(output, result)::Matrix{T}, result) - else - error("invalid argument $A passed to FowardDiff.hessian") - end - - return quote - result = _calc_hessian(f, x, T, chunk_size, cache) - return $return_stmt - end -end - -@generated function hessian{T,A}(f, x::Vector{T}, ::Type{A}=Void; - chunk_size::Int=default_chunk_size, - cache::ForwardDiffCache=dummy_cache) - if A <: Void - return_stmt = :(hessian(result)::Matrix{T}) - elseif A <: AllResults - return_stmt = :(hessian(result)::Matrix{T}, result) - else - error("invalid argument $A passed to FowardDiff.hessian") - end - - return quote - result = _calc_hessian(f, x, T, chunk_size, cache) - return $return_stmt - end -end - -function hessian{A}(f, ::Type{A}=Void; - mutates::Bool=false, - chunk_size::Int=default_chunk_size, - cache::ForwardDiffCache=ForwardDiffCache()) - if mutates - function h!(output::Matrix, x::Vector) - return ForwardDiff.hessian!(output, f, x, A; - chunk_size=chunk_size, - cache=cache) - end - return h! - else - function h(x::Vector) - return ForwardDiff.hessian(f, x, A; - chunk_size=chunk_size, - cache=cache) - end - return h - end -end - -# Calculate Hessian of a given function # -#---------------------------------------# -function _calc_hessian{S}(f, x::Vector, ::Type{S}, - chunk_size::Int, - cache::ForwardDiffCache) - X = Val{length(x)} - C = Val{chunk_size} - return _calc_hessian(f, x, S, X, C, cache) -end - -gradnum_type{N,T,C}(::Type{HessianNumber{N,T,C}}) = GradientNumber{N,T,C} - -@generated function _calc_hessian{T,S,xlen,chunk_size}(f, x::Vector{T}, ::Type{S}, - X::Type{Val{xlen}}, - C::Type{Val{chunk_size}}, - cache::ForwardDiffCache) - check_chunk_size(xlen, chunk_size) - - # chunk_size is incremented by one when users - # input a non-xlen chunk_size (this allows - # simplification of loop alignment for the - # chunk-based calculation code) - vec_mode_bool = chunk_size_matches_vec_mode(xlen, chunk_size) - C2 = Val{vec_mode_bool ? chunk_size : chunk_size+1} - H = workvec_eltype(HessianNumber, T, Val{xlen}, C2) - N = npartials(H) - G = gradnum_type(H) - - if vec_mode_bool - # Vector-Mode - ResultHess = switch_eltype(H, S) - body = quote - @simd for i in 1:xlen - @inbounds hessvec[i] = HessianNumber(G(x[i], partials[i]), hesszeros) - end - - result::$ResultHess = f(hessvec) - end - else - # Chunk-Mode - ChunkType = switch_eltype(H, S) - ResultGrad = GradientNumber{xlen,S,Vector{S}} - ResultHess = HessianNumber{xlen,S,Vector{S}} - body = quote - N = $N - M = N-1 - gradzeros = get_zeros!(cache, G) - output_grad = Vector{S}(xlen) - output_hess = Vector{S}(halfhesslen(xlen)) - - @simd for i in 1:xlen - @inbounds hessvec[i] = HessianNumber(G(x[i], gradzeros), hesszeros) - end - - # The below loop fills triangular blocks - # along diagonal. The size of these blocks - # is determined by the chunk size. - # - # For example, if N = 3 and xlen = 6, the - # numbers inside the slots below indicate the - # iteration of the loop (i.e. ith call of f) - # in which they are filled: - # - # Hessian matrix: - # ------------------------- - # | 1 | 1 | | | | | - # ------------------------- - # | 1 | 1 | | | | | - # ------------------------- - # | | | 2 | 2 | | | - # ------------------------- - # | | | 2 | 2 | | | - # ------------------------- - # | | | | | 3 | 3 | - # ------------------------- - # | | | | | 3 | 3 | - # ------------------------- - - local chunk_result::$ChunkType - - for i in 1:M:xlen - offset = i-1 - - @simd for j in 1:M - q = j+offset - @inbounds hessvec[q] = HessianNumber(G(x[q], partials[j]), hesszeros) - end - - chunk_result = f(hessvec) - - q = 1 - for j in i:(M+offset) - for k in i:j - @inbounds output_hess[hess_inds(j, k)] = hess(chunk_result, q) - q += 1 - end - end - - @simd for j in 1:M - q = j+offset - @inbounds output_grad[q] = grad(chunk_result, j) - @inbounds hessvec[q] = HessianNumber(G(x[q], gradzeros), hesszeros) - end - end - - # The below loop fills in the rest. Once - # again, using N = 3 and xlen = 6, with each - # iteration (i.e. ith call of f) filling the - # corresponding slots, and where 'x' indicates - # previously filled slots: - # - # ------------------------- - # | x | x | 1 | 1 | 2 | 2 | - # ------------------------- - # | x | x | 3 | 3 | 4 | 4 | - # ------------------------- - # | 1 | 3 | x | x | 5 | 5 | - # ------------------------- - # | 1 | 3 | x | x | 6 | 6 | - # ------------------------- - # | 2 | 4 | 5 | 6 | x | x | - # ------------------------- - # | 2 | 4 | 5 | 6 | x | x | - # ------------------------- - - for offset in M:M:(xlen-M) - col_offset = offset - M - for j in 1:M - col = col_offset + j - @inbounds hessvec[col] = HessianNumber(G(x[col], partials[1]), hesszeros) - for row_offset in offset:M:(xlen-1) - for i in 1:M - row = row_offset + i - @inbounds hessvec[row] = HessianNumber(G(x[row], partials[i+1]), hesszeros) - end - - chunk_result = f(hessvec) - - for i in 1:M - row = row_offset + i - q = halfhesslen(i) + 1 - @inbounds output_hess[hess_inds(row, col)] = hess(chunk_result, q) - @inbounds hessvec[row] = HessianNumber(G(x[row], gradzeros), hesszeros) - end - end - @inbounds hessvec[col] = HessianNumber(G(x[col], gradzeros), hesszeros) - end - end - - result::$ResultHess = ($ResultHess)(($ResultGrad)(value(chunk_result), output_grad), output_hess) - end - end - - return quote - H, G = $H, $G - hessvec = get_workvec!(cache, HessianNumber, T, X, $C2) - partials = get_partials!(cache, H) - hesszeros = get_zeros!(cache, H) - - $body - - return ForwardDiffResult(result) - end -end diff --git a/src/api/jacobian.jl b/src/api/jacobian.jl deleted file mode 100644 index 39e9e605..00000000 --- a/src/api/jacobian.jl +++ /dev/null @@ -1,173 +0,0 @@ -#################### -# Taking Jacobians # -#################### - -# Exposed API methods # -#---------------------# -@generated function jacobian!{T,A}(output::Matrix{T}, f, x::Vector, ::Type{A}=Void; - chunk_size::Int=default_chunk_size, - cache::ForwardDiffCache=dummy_cache) - if A <: Void - return_stmt = :(jacobian!(output, result)::Matrix{T}) - elseif A <: AllResults - return_stmt = :(jacobian!(output, result)::Matrix{T}, result) - else - error("invalid argument $A passed to FowardDiff.jacobian") - end - - return quote - result = _calc_jacobian(f, x, T, chunk_size, cache) - return $return_stmt - end -end - -@generated function jacobian{T,A}(f, x::Vector{T}, ::Type{A}=Void; - chunk_size::Int=default_chunk_size, - cache::ForwardDiffCache=dummy_cache) - if A <: Void - return_stmt = :(jacobian(result)::Matrix{T}) - elseif A <: AllResults - return_stmt = :(jacobian(result)::Matrix{T}, result) - else - error("invalid argument $A passed to FowardDiff.jacobian") - end - - return quote - result = _calc_jacobian(f, x, T, chunk_size, cache) - return $return_stmt - end -end - -function jacobian{A}(f, ::Type{A}=Void; - mutates::Bool=false, - chunk_size::Int=default_chunk_size, - cache::ForwardDiffCache=ForwardDiffCache(), - output_length::Int=0) - # if output_length > 0, assume that f is of - # the form f!(output, x), and generate the - # appropriate closure - if output_length > 0 - output_cache = ForwardDiffCache() - newf = (x::Vector) -> begin - output = get_workvec!(output_cache, eltype(x), output_length) - f(output, x) - return output - end - else - newf = f - end - - if mutates - function j!(output::Matrix, x::Vector) - return ForwardDiff.jacobian!(output, newf, x, A; - chunk_size=chunk_size, - cache=cache) - end - return j! - else - function j(x::Vector) - return ForwardDiff.jacobian(newf, x, A; - chunk_size=chunk_size, - cache=cache) - end - return j - end -end - -# Calculate Jacobian of a given function # -#----------------------------------------# -function _calc_jacobian{S}(f, x::Vector, ::Type{S}, - chunk_size::Int, - cache::ForwardDiffCache) - X = Val{length(x)} - C = Val{chunk_size} - return _calc_jacobian(f, x, S, X, C, cache) -end - -@generated function _calc_jacobian{T,S,xlen,chunk_size}(f, x::Vector{T}, ::Type{S}, - X::Type{Val{xlen}}, - C::Type{Val{chunk_size}}, - cache::ForwardDiffCache) - check_chunk_size(xlen, chunk_size) - G = workvec_eltype(GradientNumber, T, Val{xlen}, Val{chunk_size}) - if chunk_size_matches_vec_mode(xlen, chunk_size) - # Vector-Mode - ResultType = switch_eltype(G, S) - body = quote - @simd for i in 1:xlen - @inbounds gradvec[i] = G(x[i], partials[i]) - end - - result::Vector{$ResultType} = f(gradvec) - end - else - # Chunk-Mode - ChunkType = switch_eltype(G, S) - ResultType = GradientNumber{xlen,S,Vector{S}} - body = quote - gradzeros = get_zeros!(cache, G) - - @simd for i in 1:xlen - @inbounds gradvec[i] = G(x[i], gradzeros) - end - - # Perform the first chunk "manually" to retrieve - # the info we need to build our output - - @simd for i in 1:chunk_size - @inbounds gradvec[i] = G(x[i], partials[i]) - end - - first_result::Vector{$ChunkType} = f(gradvec) - - nrows, ncols = length(first_result), xlen - output = Vector{S}[Vector{S}(ncols) for i in 1:nrows] - - for j in 1:chunk_size - @simd for i in 1:nrows - @inbounds output[i][j] = grad(first_result[i], j) - end - @inbounds gradvec[j] = G(x[j], gradzeros) - end - - # Perform the rest of the chunks, filling in the output as we go - - local chunk_result::Vector{$ChunkType} - - for i in (chunk_size+1):chunk_size:xlen - offset = i-1 - - @simd for j in 1:chunk_size - m = j+offset - @inbounds gradvec[m] = G(x[m], partials[j]) - end - - chunk_result = f(gradvec) - - for j in 1:chunk_size - m = j+offset - @simd for n in 1:nrows - @inbounds output[n][m] = grad(chunk_result[n], j) - end - @inbounds gradvec[m] = G(x[m], gradzeros) - end - end - - result = Vector{$ResultType}(nrows) - - @simd for i in eachindex(result) - @inbounds result[i] = ($ResultType)(value(chunk_result[i]), output[i]) - end - end - end - - return quote - G = $G - gradvec = get_workvec!(cache, GradientNumber, T, X, C) - partials = get_partials!(cache, G) - - $body - - return ForwardDiffResult(result) - end -end diff --git a/src/api/tensor.jl b/src/api/tensor.jl deleted file mode 100644 index 92c86351..00000000 --- a/src/api/tensor.jl +++ /dev/null @@ -1,109 +0,0 @@ -################## -# Taking Tensors # -################## - -# Exposed API methods # -#---------------------# -@generated function tensor!{T,A}(output::Array{T,3}, f, x::Vector, ::Type{A}=Void; - chunk_size::Int=default_chunk_size, - cache::ForwardDiffCache=dummy_cache) - if A <: Void - return_stmt = :(tensor!(output, result)::Array{T,3}) - elseif A <: AllResults - return_stmt = :(tensor!(output, result)::Array{T,3}, result) - else - error("invalid argument $A passed to FowardDiff.tensor") - end - - return quote - result = _calc_tensor(f, x, T, chunk_size, cache) - return $return_stmt - end -end - -@generated function tensor{T,A}(f, x::Vector{T}, ::Type{A}=Void; - chunk_size::Int=default_chunk_size, - cache::ForwardDiffCache=dummy_cache) - if A <: Void - return_stmt = :(tensor(result)::Array{T,3}) - elseif A <: AllResults - return_stmt = :(tensor(result)::Array{T,3}, result) - else - error("invalid argument $A passed to FowardDiff.tensor") - end - - return quote - result = _calc_tensor(f, x, T, chunk_size, cache) - return $return_stmt - end -end - -function tensor{A}(f, ::Type{A}=Void; - mutates::Bool=false, - chunk_size::Int=default_chunk_size, - cache::ForwardDiffCache=ForwardDiffCache()) - if mutates - function t!(output::Array, x::Vector) - return ForwardDiff.tensor!(output, f, x, A; - chunk_size=chunk_size, - cache=cache) - end - return t! - else - function t(x::Vector) - return ForwardDiff.tensor(f, x, A; - chunk_size=chunk_size, - cache=cache) - end - return t - end -end - -# Calculate third order Taylor series term of a given function # -#--------------------------------------------------------------# -function _calc_tensor{S}(f, x::Vector, ::Type{S}, - chunk_size::Int, - cache::ForwardDiffCache) - X = Val{length(x)} - C = Val{chunk_size} - return _calc_tensor(f, x, S, X, C, cache) -end - -hessnum_type{N,T,C}(::Type{TensorNumber{N,T,C}}) = HessianNumber{N,T,C} - -@generated function _calc_tensor{T,S,xlen,chunk_size}(f, x::Vector{T}, ::Type{S}, - X::Type{Val{xlen}}, - C::Type{Val{chunk_size}}, - cache::ForwardDiffCache) - check_chunk_size(xlen, chunk_size) - - F = workvec_eltype(TensorNumber, T, Val{xlen}, Val{chunk_size}) - H = hessnum_type(F) - G = gradnum_type(H) - - if chunk_size_matches_vec_mode(xlen, chunk_size) - # Vector-Mode - ResultType = switch_eltype(F, S) - body = quote - @simd for i in 1:xlen - @inbounds tensvec[i] = TensorNumber(H(G(x[i], partials[i]), hesszeros), tenszeros) - end - - result::$ResultType = f(tensvec) - end - else - error("chunk_size configuration for ForwardDiff.tensor is not yet supported") - end - - return quote - F, H, G = $F, $H, $G - tensvec = get_workvec!(cache, TensorNumber, T, X, C) - partials = get_partials!(cache, F) - tenszeros = get_zeros!(cache, F) - hesszeros = get_zeros!(cache, H) - - $body - - return ForwardDiffResult(result) - end -end diff --git a/src/cache.jl b/src/cache.jl new file mode 100644 index 00000000..303328b1 --- /dev/null +++ b/src/cache.jl @@ -0,0 +1,41 @@ +const CACHE = ntuple(n -> Dict{DataType,Any}(), Threads.nthreads()) + +function clearcache!() + for d in CACHE + empty!(d) + end +end + +@eval cachefetch!(D::DataType, L::DataType) = $(Expr(:tuple, [:(cachefetch!($i, D, L)) for i in 1:Threads.nthreads()]...)) + +function cachefetch!{N,T,L}(tid::Integer, ::Type{DiffNumber{N,T}}, ::Type{Val{L}}) + K = Tuple{DiffNumber{N,T},L} + V = Vector{DiffNumber{N,T}} + d = CACHE[tid] + if haskey(d, K) + workvec = d[K]::V + else + workvec = V(L) + d[K] = workvec + end + return workvec::V +end + +function cachefetch!{N,T,Z}(tid::Integer, ::Type{Partials{N,T}}, ::Type{Val{Z}}) + K = Tuple{Partials{N,T},Z} + V = Vector{Partials{N,T}} + d = CACHE[tid] + if haskey(d, K) + seed_partials = d[K]::V + else + seed_partials = V(Z) + x = one(T) + for i in 1:Z + seed_partials[i] = setindex(zero(Partials{N,T}), x, i) + end + d[K] = seed_partials + end + return seed_partials::V +end + +cachefetch!{N,T}(tid::Integer, ::Type{Partials{N,T}}) = cachefetch!(tid, Partials{N,T}, Val{N}) diff --git a/src/derivative.jl b/src/derivative.jl new file mode 100644 index 00000000..adcc6b14 --- /dev/null +++ b/src/derivative.jl @@ -0,0 +1,78 @@ +############################ +# @derivative!/@derivative # +############################ + +const DERIVATIVE_KWARG_ORDER = (:all,) +const DERIVATIVE_F_KWARG_ORDER = (:all, :output_mutates) + +macro derivative!(args...) + args, kwargs = separate_kwargs(args) + arranged_kwargs = arrange_kwargs(kwargs, KWARG_DEFAULTS, DERIVATIVE_KWARG_ORDER) + return esc(:(ForwardDiff.derivative!($(args...), $(arranged_kwargs...)))) +end + +macro derivative(args...) + args, kwargs = separate_kwargs(args) + if length(args) == 1 + arranged_kwargs = arrange_kwargs(kwargs, KWARG_DEFAULTS, DERIVATIVE_F_KWARG_ORDER) + else + arranged_kwargs = arrange_kwargs(kwargs, KWARG_DEFAULTS, DERIVATIVE_KWARG_ORDER) + end + return esc(:(ForwardDiff.derivative($(args...), $(arranged_kwargs...)))) +end + +########################## +# derivative!/derivative # +########################## + +function derivative!(f, output::Array, x::Real, A::DataType) + return handle_deriv_result!(output, f(DiffNumber(x, one(x))), A) +end + +function derivative(f, x::Real, A::DataType) + return handle_deriv_result(f(DiffNumber(x, one(x))), A) +end + +@generated function derivative{output_mutates}(f, A::DataType, ::Type{Val{output_mutates}}) + if output_mutates + return quote + d!(output::Array, x::Real) = derivative!(f, output, x, A) + return d! + end + else + return quote + d(x::Real) = derivative(f, x, A) + return d + end + end +end + +############################### +# handling derivative results # +############################### + +handle_deriv_result(result::DiffNumber, ::Type{Val{false}}) = partials(result, 1) + +function handle_deriv_result(result::DiffNumber, ::Type{Val{true}}) + return value(result), partials(result, 1) +end + +function handle_deriv_result{T}(result::Array{T}, A::DataType) + return handle_deriv_result!(similar(result, numtype(T)), result, A) +end + +function handle_deriv_result!(output::Array, result::Array, ::Type{Val{true}}) + valoutput = similar(output) + for i in eachindex(result) + valoutput[i] = value(result[i]) + output[i] = partials(result[i], 1) + end + return valoutput, output +end + +function handle_deriv_result!(output::Array, result::Array, ::Type{Val{false}}) + for i in eachindex(result) + output[i] = partials(result[i], 1) + end + return output +end diff --git a/src/gradient.jl b/src/gradient.jl new file mode 100644 index 00000000..d6f3b431 --- /dev/null +++ b/src/gradient.jl @@ -0,0 +1,197 @@ +######################## +# @gradient!/@gradient # +######################## + +const GRADIENT_KWARG_ORDER = (:all, :chunk, :input_length, :multithread) +const GRADIENT_F_KWARG_ORDER = (:all, :chunk, :input_length, :multithread, :output_mutates) + +macro gradient!(args...) + args, kwargs = separate_kwargs(args) + arranged_kwargs = arrange_kwargs(kwargs, KWARG_DEFAULTS, GRADIENT_KWARG_ORDER) + return esc(:(ForwardDiff.gradient!($(args...), $(arranged_kwargs...)))) +end + +macro gradient(args...) + args, kwargs = separate_kwargs(args) + if length(args) == 1 + arranged_kwargs = arrange_kwargs(kwargs, KWARG_DEFAULTS, GRADIENT_F_KWARG_ORDER) + else + arranged_kwargs = arrange_kwargs(kwargs, KWARG_DEFAULTS, GRADIENT_KWARG_ORDER) + end + return esc(:(ForwardDiff.gradient($(args...), $(arranged_kwargs...)))) +end + +###################### +# gradient!/gradient # +###################### + +@generated function gradient!{S,A,L}(f, output::Vector{S}, x::Vector, ::Type{Val{A}}, N::DataType, ::Type{Val{L}}, M::DataType) + return quote + val, grad = call_gradient!(f, output, x, N, Val{$(L == nothing ? :(length(x)) : L)}, M) + return $(A ? :(val::S, output::Vector{S}) : :(output::Vector{S})) + end +end + +function gradient(f, x::Vector, A::DataType, N::DataType, L::DataType, M::DataType) + return gradient!(f, similar(x), x, A, N, L, M) +end + +@generated function gradient{A,output_mutates}(f, ::Type{Val{A}}, N::DataType, L::DataType, M::DataType, ::Type{Val{output_mutates}}) + R = A ? :(Tuple{S,Vector{S}}) : :(Vector{S}) + if output_mutates + return quote + g!{S}(output::Vector{S}, x::Vector) = gradient!(f, output, x, Val{A}, N, L, M)::$(R) + return g! + end + else + return quote + g{S}(x::Vector{S}) = gradient(f, x, Val{A}, N, L, M)::$(R) + return g + end + end +end + +################## +# calc_gradient! # +################## +# The below code is pretty ugly, so here's an overview: +# +# `call_gradient!` is the entry point that is called by the API functions. If a chunk size +# isn't given by an upstream caller, `call_gradient!` picks one based on the input length. +# +# `calc_gradient!` is the workhorse function - it generates code for calculating the +# gradient in chunk-mode or vector-mode, depending on the input length and chunk size. +# +# `multi_calc_gradient!` is just like `_calc_gradient!`, but uses Julia's multithreading +# capabilities when performing calculations in chunk-mode. +# +# `VEC_MODE_EXPR` is a constant expression for vector-mode that provides the function body +# for `calc_gradient!` and `multi_calc_gradient!` when chunk size equals input length. +# +# `calc_gradient_expr` takes in a vector-mode expression body or chunk-mode expression body +# and returns a completed function body with the input body injected in the correct place. + +@generated function call_gradient!{S,N,L,M}(f, output::Vector{S}, x::Vector, ::Type{Val{N}}, ::Type{Val{L}}, ::Type{Val{M}}) + gradf! = M ? :multi_calc_gradient! : :calc_gradient! + return :($(gradf!)(f, output, x, Val{$(N == nothing ? pick_chunk(L) : N)}, Val{L})::Tuple{S, Vector{S}}) +end + +@generated function calc_gradient!{S,T,N,L}(f, output::Vector{S}, x::Vector{T}, ::Type{Val{N}}, ::Type{Val{L}}) + if N == L + body = VEC_MODE_EXPR + else + remainder = L % N == 0 ? N : L % N + fill_length = L - remainder + reseed_partials = remainder == N ? :() : :(seed_partials = cachefetch!(tid, Partials{N,T}, Val{$(remainder)})) + body = quote + workvec::Vector{DiffNumber{N,T}} = cachefetch!(tid, DiffNumber{N,T}, Val{L}) + pzeros = zero(Partials{N,T}) + + @simd for i in 1:L + @inbounds workvec[i] = DiffNumber{N,T}(x[i], pzeros) + end + + for c in 1:$(N):$(fill_length) + @simd for i in 1:N + j = i + c - 1 + @inbounds workvec[j] = DiffNumber{N,T}(x[j], seed_partials[i]) + end + result::DiffNumber{N,S} = f(workvec) + @simd for i in 1:N + j = i + c - 1 + @inbounds output[j] = partials(result, i) + @inbounds workvec[j] = DiffNumber{N,T}(x[j], pzeros) + end + end + + # Performing the final chunk manually seems to triggers some additional + # optimization heuristics, which results in more efficient memory allocation + $(reseed_partials) + @simd for i in 1:$(remainder) + j = $(fill_length) + i + @inbounds workvec[j] = DiffNumber{N,T}(x[j], seed_partials[i]) + end + result::DiffNumber{N,S} = f(workvec) + @simd for i in 1:$(remainder) + j = $(fill_length) + i + @inbounds output[j] = partials(result, i) + @inbounds workvec[j] = DiffNumber{N,T}(x[j], pzeros) + end + end + end + return calc_gradient_expr(body) +end + +@generated function multi_calc_gradient!{S,T,N,L}(f, output::Vector{S}, x::Vector{T}, ::Type{Val{N}}, ::Type{Val{L}}) + if N == L + body = VEC_MODE_EXPR + else + nthreads = Threads.nthreads() + remainder = L % N == 0 ? N : L % N + fill_length = L - remainder + reseed_partials = remainder == N ? :() : :(seed_partials = cachefetch!(tid, Partials{N,T}, Val{$(remainder)})) + body = quote + workvecs::NTuple{$(nthreads()), Vector{DiffNumber{N,T}}} = cachefetch!(DiffNumber{N,T}, Val{L}) + pzeros = zero(Partials{N,T}) + + for t in 1:$(nthreads) + # must be local, see https://github.com/JuliaLang/julia/issues/14948 + local workvec = workvecs[t] + @simd for i in 1:L + @inbounds workvec[i] = DiffNumber{N,T}(x[i], pzeros) + end + end + + for c in 1:$(N):$(fill_length) + local workvec = workvecs[Threads.threadid()] + @simd for i in 1:N + j = i + c - 1 + @inbounds workvec[j] = DiffNumber{N,T}(x[j], seed_partials[i]) + end + local result::DiffNumber{N,S} = f(workvec) + @simd for i in 1:N + j = i + c - 1 + @inbounds output[j] = partials(result, i) + @inbounds workvec[j] = DiffNumber{N,T}(x[j], pzeros) + end + end + + # Performing the final chunk manually seems to triggers some additional + # optimization heuristics, which results in more efficient memory allocation + $(reseed_partials) + workvec = workvecs[tid] + @simd for i in 1:$(remainder) + j = $(fill_length) + i + @inbounds workvec[j] = DiffNumber{N,T}(x[j], seed_partials[i]) + end + result::DiffNumber{N,S} = f(workvec) + @simd for i in 1:$(remainder) + j = $(fill_length) + i + @inbounds output[j] = partials(result, i) + @inbounds workvec[j] = DiffNumber{N,T}(x[j], pzeros) + end + end + end + return calc_gradient_expr(body) +end + +const VEC_MODE_EXPR = quote + workvec::Vector{DiffNumber{N,T}} = cachefetch!(tid, DiffNumber{N,T}, Val{L}) + @simd for i in 1:L + @inbounds workvec[i] = DiffNumber{N,T}(x[i], seed_partials[i]) + end + result::DiffNumber{N,S} = f(workvec) + @simd for i in 1:L + @inbounds output[i] = partials(result, i) + end +end + +function calc_gradient_expr(body) + return quote + @assert L == length(x) == length(output) + tid = Threads.threadid() + seed_partials::Vector{Partials{N,T}} = cachefetch!(tid, Partials{N,T}) + $(body) + return (value(result)::S, output::Vector{S}) + end +end diff --git a/src/jacobian.jl b/src/jacobian.jl new file mode 100644 index 00000000..117bfce0 --- /dev/null +++ b/src/jacobian.jl @@ -0,0 +1,44 @@ +######################## +# @jacobian!/@jacobian # +######################## + +const JACOBIAN_KWARG_ORDER = (:all, :chunk, :input_length, :output_length) +const JACOBIAN_F_KWARG_ORDER = (:all, :chunk, :input_length, :input_mutates, :output_length, :output_mutates) + +macro jacobian!(args...) + args, kwargs = separate_kwargs(args) + arranged_kwargs = arrange_kwargs(kwargs, KWARG_DEFAULTS, JACOBIAN_KWARG_ORDER) + return esc(:(ForwardDiff.jacobian!($(args...), $(arranged_kwargs...)))) +end + +macro jacobian(args...) + args, kwargs = separate_kwargs(args) + if length(args) == 1 + arranged_kwargs = arrange_kwargs(kwargs, KWARG_DEFAULTS, JACOBIAN_F_KWARG_ORDER) + else + arranged_kwargs = arrange_kwargs(kwargs, KWARG_DEFAULTS, JACOBIAN_KWARG_ORDER) + end + + return esc(:(ForwardDiff.jacobian($(args...), $(arranged_kwargs...)))) +end + +###################### +# jacobian!/jacobian # +###################### + +@generated function jacobian!(f, output::Matrix, x::Vector, A::DataType, + N::DataType, IL::DataType, OL::DataType) + # TODO +end + +@generated function jacobian(f, x::Vector, A::DataType, N::DataType) + # TODO +end + +@generated function jacobian{allresults, chunk, input_mutates, output_mutates}(f, + ::Type{Val{allresults}}, + ::Type{Val{chunk}}, + ::Type{Val{input_mutates}}, + ::Type{Val{output_mutates}}) + # TODO +end diff --git a/test/DiffNumberTest.jl b/test/DiffNumberTest.jl new file mode 100644 index 00000000..72617378 --- /dev/null +++ b/test/DiffNumberTest.jl @@ -0,0 +1,375 @@ +module DiffNumberTest + +using Base.Test +using ForwardDiff +using ForwardDiff: Partials, DiffNumber, value, partials + +import NaNMath +import Calculus + +const N = 3 +const M = 4 +const T = Float32 + +const PARTIALS = Partials(ntuple(n -> rand(T), Val{N})) +const PRIMAL = rand(T) +const FDNUM = DiffNumber(PRIMAL, PARTIALS) + +const PARTIALS2 = Partials(ntuple(n -> rand(T), Val{N})) +const PRIMAL2 = rand(T) +const FDNUM2 = DiffNumber(PRIMAL2, PARTIALS2) + +const M_PARTIALS = Partials(ntuple(m -> rand(T), Val{M})) +const NESTED_PARTIALS = convert(Partials{N,DiffNumber{M,T}}, PARTIALS) +const NESTED_FDNUM = DiffNumber(DiffNumber(PRIMAL, M_PARTIALS), NESTED_PARTIALS) + +const M_PARTIALS2 = Partials(ntuple(m -> rand(T), Val{M})) +const NESTED_PARTIALS2 = convert(Partials{N,DiffNumber{M,T}}, PARTIALS2) +const NESTED_FDNUM2 = DiffNumber(DiffNumber(PRIMAL2, M_PARTIALS2), NESTED_PARTIALS2) + +samerng() = MersenneTwister(1) + +################ +# Constructors # +################ + +@test DiffNumber(PRIMAL, PARTIALS...) === FDNUM +@test typeof(DiffNumber(widen(T)(PRIMAL), PARTIALS)) === DiffNumber{N,widen(T)} +@test typeof(DiffNumber(widen(T)(PRIMAL), PARTIALS.values)) === DiffNumber{N,widen(T)} +@test typeof(DiffNumber(widen(T)(PRIMAL), PARTIALS...)) === DiffNumber{N,widen(T)} +@test typeof(NESTED_FDNUM) == DiffNumber{N,DiffNumber{M,T}} + +############# +# Accessors # +############# + +@test value(PRIMAL) == PRIMAL +@test value(FDNUM) == PRIMAL +@test value(NESTED_FDNUM) === DiffNumber(PRIMAL, M_PARTIALS) + +@test partials(PRIMAL) == Partials{0,T}(tuple()) +@test partials(FDNUM) == PARTIALS +@test partials(NESTED_FDNUM) === NESTED_PARTIALS + +for i in 1:N + @test partials(FDNUM, i) == PARTIALS[i] + for j in 1:M + @test partials(NESTED_FDNUM, i, j) == partials(NESTED_PARTIALS[i], j) + end +end + +@test ForwardDiff.npartials(FDNUM) == N +@test ForwardDiff.npartials(typeof(FDNUM)) == N +@test ForwardDiff.npartials(NESTED_FDNUM) == N +@test ForwardDiff.npartials(typeof(NESTED_FDNUM)) == N + +@test ForwardDiff.numtype(FDNUM) == T +@test ForwardDiff.numtype(typeof(FDNUM)) == T +@test ForwardDiff.numtype(NESTED_FDNUM) == DiffNumber{M,T} +@test ForwardDiff.numtype(typeof(NESTED_FDNUM)) == DiffNumber{M,T} + +##################### +# Generic Functions # +##################### + +@test FDNUM === copy(FDNUM) +@test NESTED_FDNUM === copy(NESTED_FDNUM) + +@test eps(FDNUM) === eps(PRIMAL) +@test eps(typeof(FDNUM)) === eps(T) +@test eps(NESTED_FDNUM) === eps(PRIMAL) +@test eps(typeof(NESTED_FDNUM)) === eps(T) + +@test floor(Int, FDNUM) === floor(Int, PRIMAL) +@test floor(Int, FDNUM2) === floor(Int, PRIMAL2) +@test floor(Int, NESTED_FDNUM) === floor(Int, PRIMAL) + +@test ceil(Int, FDNUM) === ceil(Int, PRIMAL) +@test ceil(Int, FDNUM2) === ceil(Int, PRIMAL2) +@test ceil(Int, NESTED_FDNUM) === ceil(Int, PRIMAL) + +@test trunc(Int, FDNUM) === trunc(Int, PRIMAL) +@test trunc(Int, FDNUM2) === trunc(Int, PRIMAL2) +@test trunc(Int, NESTED_FDNUM) === trunc(Int, PRIMAL) + +@test round(Int, FDNUM) === round(Int, PRIMAL) +@test round(Int, FDNUM2) === round(Int, PRIMAL2) +@test round(Int, NESTED_FDNUM) === round(Int, PRIMAL) + +@test hash(FDNUM) === hash(PRIMAL) +@test hash(FDNUM, hash(PRIMAL)) === hash(PRIMAL, hash(PRIMAL)) +@test hash(NESTED_FDNUM) === hash(PRIMAL) +@test hash(NESTED_FDNUM, hash(PRIMAL)) === hash(PRIMAL, hash(PRIMAL)) + +const TMPIO = IOBuffer() +write(TMPIO, FDNUM) +seekstart(TMPIO) +@test read(TMPIO, typeof(FDNUM)) === FDNUM +seekstart(TMPIO) +write(TMPIO, FDNUM2) +seekstart(TMPIO) +@test read(TMPIO, typeof(FDNUM2)) === FDNUM2 +seekstart(TMPIO) +write(TMPIO, NESTED_FDNUM) +seekstart(TMPIO) +@test read(TMPIO, typeof(NESTED_FDNUM)) === NESTED_FDNUM +close(TMPIO) + +@test zero(FDNUM) === DiffNumber(zero(PRIMAL), zero(PARTIALS)) +@test zero(typeof(FDNUM)) === DiffNumber(zero(T), zero(Partials{N,T})) +@test zero(NESTED_FDNUM) === DiffNumber(DiffNumber(zero(PRIMAL), zero(M_PARTIALS)), zero(NESTED_PARTIALS)) +@test zero(typeof(NESTED_FDNUM)) === DiffNumber(DiffNumber(zero(T), zero(Partials{M,T})), zero(Partials{N,DiffNumber{M,T}})) + +@test one(FDNUM) === DiffNumber(one(PRIMAL), zero(PARTIALS)) +@test one(typeof(FDNUM)) === DiffNumber(one(T), zero(Partials{N,T})) +@test one(NESTED_FDNUM) === DiffNumber(DiffNumber(one(PRIMAL), zero(M_PARTIALS)), zero(NESTED_PARTIALS)) +@test one(typeof(NESTED_FDNUM)) === DiffNumber(DiffNumber(one(T), zero(Partials{M,T})), zero(Partials{N,DiffNumber{M,T}})) + +@test rand(samerng(), FDNUM) === DiffNumber(rand(samerng(), T), zero(PARTIALS)) +@test rand(samerng(), typeof(FDNUM)) === DiffNumber(rand(samerng(), T), zero(Partials{N,T})) +@test rand(samerng(), NESTED_FDNUM) === DiffNumber(DiffNumber(rand(samerng(), T), zero(M_PARTIALS)), zero(NESTED_PARTIALS)) +@test rand(samerng(), typeof(NESTED_FDNUM)) === DiffNumber(DiffNumber(rand(samerng(), T), zero(Partials{M,T})), zero(Partials{N,DiffNumber{M,T}})) + +# Predicates # +#------------# + +@test ForwardDiff.isconstant(zero(FDNUM)) +@test ForwardDiff.isconstant(rand(FDNUM)) +@test ForwardDiff.isconstant(one(FDNUM)) +@test !(ForwardDiff.isconstant(FDNUM)) + +@test ForwardDiff.isconstant(zero(NESTED_FDNUM)) +@test ForwardDiff.isconstant(rand(NESTED_FDNUM)) +@test ForwardDiff.isconstant(one(NESTED_FDNUM)) +@test !(ForwardDiff.isconstant(NESTED_FDNUM)) + +@test isequal(FDNUM, DiffNumber(PRIMAL, PARTIALS2)) +@test !(isequal(FDNUM, FDNUM2)) + +@test isequal(NESTED_FDNUM, DiffNumber(DiffNumber(PRIMAL, M_PARTIALS2), NESTED_PARTIALS2)) +@test !(isequal(NESTED_FDNUM, NESTED_FDNUM2)) + +@test FDNUM == DiffNumber(PRIMAL, PARTIALS2) +@test FDNUM != FDNUM2 +@test NESTED_FDNUM != NESTED_FDNUM2 + +@test isless(DiffNumber(1, PARTIALS), DiffNumber(2, PARTIALS2)) +@test !(isless(DiffNumber(1, PARTIALS), DiffNumber(1, PARTIALS2))) +@test !(isless(DiffNumber(2, PARTIALS), DiffNumber(1, PARTIALS2))) + +@test isless(DiffNumber(DiffNumber(1, M_PARTIALS), NESTED_PARTIALS), DiffNumber(DiffNumber(2, M_PARTIALS2), NESTED_PARTIALS2)) +@test !(isless(DiffNumber(DiffNumber(1, M_PARTIALS), NESTED_PARTIALS), DiffNumber(DiffNumber(1, M_PARTIALS2), NESTED_PARTIALS2))) +@test !(isless(DiffNumber(DiffNumber(2, M_PARTIALS), NESTED_PARTIALS), DiffNumber(DiffNumber(1, M_PARTIALS2), NESTED_PARTIALS2))) + +@test DiffNumber(1, PARTIALS) < DiffNumber(2, PARTIALS2) +@test !(DiffNumber(1, PARTIALS) < DiffNumber(1, PARTIALS2)) +@test !(DiffNumber(2, PARTIALS) < DiffNumber(1, PARTIALS2)) + +@test DiffNumber(DiffNumber(1, M_PARTIALS), NESTED_PARTIALS) < DiffNumber(DiffNumber(2, M_PARTIALS2), NESTED_PARTIALS2) +@test !(DiffNumber(DiffNumber(1, M_PARTIALS), NESTED_PARTIALS) < DiffNumber(DiffNumber(1, M_PARTIALS2), NESTED_PARTIALS2)) +@test !(DiffNumber(DiffNumber(2, M_PARTIALS), NESTED_PARTIALS) < DiffNumber(DiffNumber(1, M_PARTIALS2), NESTED_PARTIALS2)) + +@test DiffNumber(1, PARTIALS) <= DiffNumber(2, PARTIALS2) +@test DiffNumber(1, PARTIALS) <= DiffNumber(1, PARTIALS2) +@test !(DiffNumber(2, PARTIALS) <= DiffNumber(1, PARTIALS2)) + +@test DiffNumber(DiffNumber(1, M_PARTIALS), NESTED_PARTIALS) <= DiffNumber(DiffNumber(2, M_PARTIALS2), NESTED_PARTIALS2) +@test DiffNumber(DiffNumber(1, M_PARTIALS), NESTED_PARTIALS) <= DiffNumber(DiffNumber(1, M_PARTIALS2), NESTED_PARTIALS2) +@test !(DiffNumber(DiffNumber(2, M_PARTIALS), NESTED_PARTIALS) <= DiffNumber(DiffNumber(1, M_PARTIALS2), NESTED_PARTIALS2)) + +@test DiffNumber(2, PARTIALS) > DiffNumber(1, PARTIALS2) +@test !(DiffNumber(1, PARTIALS) > DiffNumber(1, PARTIALS2)) +@test !(DiffNumber(1, PARTIALS) > DiffNumber(2, PARTIALS2)) + +@test DiffNumber(DiffNumber(2, M_PARTIALS), NESTED_PARTIALS) > DiffNumber(DiffNumber(1, M_PARTIALS2), NESTED_PARTIALS2) +@test !(DiffNumber(DiffNumber(1, M_PARTIALS), NESTED_PARTIALS) > DiffNumber(DiffNumber(1, M_PARTIALS2), NESTED_PARTIALS2)) +@test !(DiffNumber(DiffNumber(1, M_PARTIALS), NESTED_PARTIALS) > DiffNumber(DiffNumber(2, M_PARTIALS2), NESTED_PARTIALS2)) + +@test DiffNumber(2, PARTIALS) >= DiffNumber(1, PARTIALS2) +@test DiffNumber(1, PARTIALS) >= DiffNumber(1, PARTIALS2) +@test !(DiffNumber(1, PARTIALS) >= DiffNumber(2, PARTIALS2)) + +@test DiffNumber(DiffNumber(2, M_PARTIALS), NESTED_PARTIALS) >= DiffNumber(DiffNumber(1, M_PARTIALS2), NESTED_PARTIALS2) +@test DiffNumber(DiffNumber(1, M_PARTIALS), NESTED_PARTIALS) >= DiffNumber(DiffNumber(1, M_PARTIALS2), NESTED_PARTIALS2) +@test !(DiffNumber(DiffNumber(1, M_PARTIALS), NESTED_PARTIALS) >= DiffNumber(DiffNumber(2, M_PARTIALS2), NESTED_PARTIALS2)) + +@test isnan(DiffNumber(NaN, PARTIALS)) +@test !(isnan(FDNUM)) + +@test isnan(DiffNumber(DiffNumber(NaN, M_PARTIALS), NESTED_PARTIALS)) +@test !(isnan(NESTED_FDNUM)) + +@test isfinite(FDNUM) +@test !(isfinite(DiffNumber(Inf, PARTIALS))) + +@test isfinite(NESTED_FDNUM) +@test !(isfinite(DiffNumber(DiffNumber(NaN, M_PARTIALS), NESTED_PARTIALS))) + +@test isinf(DiffNumber(Inf, PARTIALS)) +@test !(isinf(FDNUM)) + +@test isinf(DiffNumber(DiffNumber(Inf, M_PARTIALS), NESTED_PARTIALS)) +@test !(isinf(NESTED_FDNUM)) + +@test isreal(FDNUM) +@test isreal(NESTED_FDNUM) + +@test isinteger(DiffNumber(1.0, PARTIALS)) +@test !(isinteger(FDNUM)) + +@test isinteger(DiffNumber(DiffNumber(1.0, M_PARTIALS), NESTED_PARTIALS)) +@test !(isinteger(NESTED_FDNUM)) + +@test iseven(DiffNumber(2)) +@test !(iseven(DiffNumber(1))) + +@test iseven(DiffNumber(DiffNumber(2))) +@test !(iseven(DiffNumber(DiffNumber(1)))) + +@test isodd(DiffNumber(1)) +@test !(isodd(DiffNumber(2))) + +@test isodd(DiffNumber(DiffNumber(1))) +@test !(isodd(DiffNumber(DiffNumber(2)))) + +######################## +# Promotion/Conversion # +######################## + +const WIDE_T = widen(T) + +@test promote_type(DiffNumber{N,T}, T) == DiffNumber{N,T} +@test promote_type(DiffNumber{N,T}, WIDE_T) == DiffNumber{N,WIDE_T} +@test promote_type(DiffNumber{N,WIDE_T}, T) == DiffNumber{N,WIDE_T} +@test promote_type(DiffNumber{N,T}, DiffNumber{N,T}) == DiffNumber{N,T} +@test promote_type(DiffNumber{N,T}, DiffNumber{N,WIDE_T}) == DiffNumber{N,WIDE_T} +@test promote_type(DiffNumber{N,WIDE_T}, DiffNumber{N,DiffNumber{M,T}}) == DiffNumber{N,DiffNumber{M,WIDE_T}} + +const WIDE_FDNUM = convert(DiffNumber{N,WIDE_T}, FDNUM) +const WIDE_NESTED_FDNUM = convert(DiffNumber{N,DiffNumber{M,WIDE_T}}, NESTED_FDNUM) + +@test typeof(WIDE_FDNUM) == DiffNumber{N,WIDE_T} +@test typeof(WIDE_NESTED_FDNUM) == DiffNumber{N,DiffNumber{M,WIDE_T}} + +@test value(WIDE_FDNUM) == PRIMAL +@test value(WIDE_NESTED_FDNUM) == PRIMAL + +@test convert(DiffNumber, FDNUM) === FDNUM +@test convert(DiffNumber, NESTED_FDNUM) === NESTED_FDNUM +@test convert(DiffNumber{N,T}, FDNUM) === FDNUM +@test convert(DiffNumber{N,DiffNumber{M,T}}, NESTED_FDNUM) === NESTED_FDNUM +@test convert(DiffNumber{N,WIDE_T}, PRIMAL) === DiffNumber(WIDE_T(PRIMAL), zero(Partials{N,WIDE_T})) +@test convert(DiffNumber{N,DiffNumber{M,WIDE_T}}, PRIMAL) === DiffNumber(DiffNumber(WIDE_T(PRIMAL), zero(Partials{M,WIDE_T})), zero(Partials{N,DiffNumber{M,T}})) +@test convert(DiffNumber{N,DiffNumber{M,T}}, FDNUM) === DiffNumber(DiffNumber{M,T}(PRIMAL), convert(Partials{N,DiffNumber{M,T}}, PARTIALS)) +@test convert(DiffNumber{N,DiffNumber{M,WIDE_T}}, FDNUM) === DiffNumber(DiffNumber{M,WIDE_T}(PRIMAL), convert(Partials{N,DiffNumber{M,WIDE_T}}, PARTIALS)) + +######## +# Math # +######## + +test_approx_diffnums(a::Real, b::Real) = @test_approx_eq a b + +function test_approx_diffnums{N}(a::DiffNumber{N}, b::DiffNumber{N}) + test_approx_diffnums(value(a), value(b)) + for i in 1:N + test_approx_diffnums(partials(a)[i], partials(b)[i]) + end +end + +# Arithmetic # +#------------# + +@test FDNUM + FDNUM2 === DiffNumber(value(FDNUM) + value(FDNUM2), partials(FDNUM) + partials(FDNUM2)) +@test FDNUM + PRIMAL === DiffNumber(value(FDNUM) + PRIMAL, partials(FDNUM)) +@test PRIMAL + FDNUM === DiffNumber(value(FDNUM) + PRIMAL, partials(FDNUM)) + +@test NESTED_FDNUM + NESTED_FDNUM2 === DiffNumber(value(NESTED_FDNUM) + value(NESTED_FDNUM2), partials(NESTED_FDNUM) + partials(NESTED_FDNUM2)) +@test NESTED_FDNUM + PRIMAL === DiffNumber(value(NESTED_FDNUM) + PRIMAL, partials(NESTED_FDNUM)) +@test PRIMAL + NESTED_FDNUM === DiffNumber(value(NESTED_FDNUM) + PRIMAL, partials(NESTED_FDNUM)) + +@test FDNUM - FDNUM2 === DiffNumber(value(FDNUM) - value(FDNUM2), partials(FDNUM) - partials(FDNUM2)) +@test FDNUM - PRIMAL === DiffNumber(value(FDNUM) - PRIMAL, partials(FDNUM)) +@test PRIMAL - FDNUM === DiffNumber(PRIMAL - value(FDNUM), -(partials(FDNUM))) +@test -(FDNUM) === DiffNumber(-(value(FDNUM)), -(partials(FDNUM))) + +@test NESTED_FDNUM - NESTED_FDNUM2 === DiffNumber(value(NESTED_FDNUM) - value(NESTED_FDNUM2), partials(NESTED_FDNUM) - partials(NESTED_FDNUM2)) +@test NESTED_FDNUM - PRIMAL === DiffNumber(value(NESTED_FDNUM) - PRIMAL, partials(NESTED_FDNUM)) +@test PRIMAL - NESTED_FDNUM === DiffNumber(PRIMAL - value(NESTED_FDNUM), -(partials(NESTED_FDNUM))) +@test -(NESTED_FDNUM) === DiffNumber(-(value(NESTED_FDNUM)), -(partials(NESTED_FDNUM))) + +@test FDNUM * FDNUM2 === DiffNumber(value(FDNUM) * value(FDNUM2), ForwardDiff._mul_partials(partials(FDNUM), partials(FDNUM2), value(FDNUM2), value(FDNUM))) +@test FDNUM * PRIMAL === DiffNumber(value(FDNUM) * PRIMAL, partials(FDNUM) * PRIMAL) +@test PRIMAL * FDNUM === DiffNumber(value(FDNUM) * PRIMAL, partials(FDNUM) * PRIMAL) + +@test NESTED_FDNUM * NESTED_FDNUM2 === DiffNumber(value(NESTED_FDNUM) * value(NESTED_FDNUM2), ForwardDiff._mul_partials(partials(NESTED_FDNUM), partials(NESTED_FDNUM2), value(NESTED_FDNUM2), value(NESTED_FDNUM))) +@test NESTED_FDNUM * PRIMAL === DiffNumber(value(NESTED_FDNUM) * PRIMAL, partials(NESTED_FDNUM) * PRIMAL) +@test PRIMAL * NESTED_FDNUM === DiffNumber(value(NESTED_FDNUM) * PRIMAL, partials(NESTED_FDNUM) * PRIMAL) + +test_approx_diffnums(FDNUM / FDNUM2, DiffNumber(value(FDNUM) / value(FDNUM2), ForwardDiff._div_partials(partials(FDNUM), partials(FDNUM2), value(FDNUM), value(FDNUM2)))) +test_approx_diffnums(FDNUM / PRIMAL, DiffNumber(value(FDNUM) / PRIMAL, partials(FDNUM) / PRIMAL)) +test_approx_diffnums(PRIMAL / FDNUM, DiffNumber(PRIMAL / value(FDNUM), (-(PRIMAL) / value(FDNUM)^2) * partials(FDNUM))) + +test_approx_diffnums(NESTED_FDNUM / NESTED_FDNUM2, DiffNumber(value(NESTED_FDNUM) / value(NESTED_FDNUM2), ForwardDiff._div_partials(partials(NESTED_FDNUM), partials(NESTED_FDNUM2), value(NESTED_FDNUM), value(NESTED_FDNUM2)))) +test_approx_diffnums(NESTED_FDNUM / PRIMAL, DiffNumber(value(NESTED_FDNUM) / PRIMAL, partials(NESTED_FDNUM) / PRIMAL)) +test_approx_diffnums(PRIMAL / NESTED_FDNUM, DiffNumber(PRIMAL / value(NESTED_FDNUM), (-(PRIMAL) / value(NESTED_FDNUM)^2) * partials(NESTED_FDNUM))) + +test_approx_diffnums(FDNUM^FDNUM2, exp(FDNUM2 * log(FDNUM))) +test_approx_diffnums(FDNUM^PRIMAL, exp(PRIMAL * log(FDNUM))) +test_approx_diffnums(PRIMAL^FDNUM, exp(FDNUM * log(PRIMAL))) + +test_approx_diffnums(NESTED_FDNUM^NESTED_FDNUM2, exp(NESTED_FDNUM2 * log(NESTED_FDNUM))) +test_approx_diffnums(NESTED_FDNUM^PRIMAL, exp(PRIMAL * log(NESTED_FDNUM))) +test_approx_diffnums(PRIMAL^NESTED_FDNUM, exp(NESTED_FDNUM * log(PRIMAL))) + +@test partials(NaNMath.pow(DiffNumber(-2.0, 1.0), DiffNumber(2.0, 0.0)), 1) == -4.0 + +# Unary Functions # +#-----------------# + +@test conj(FDNUM) === FDNUM +@test conj(NESTED_FDNUM) === NESTED_FDNUM +@test transpose(FDNUM) === FDNUM +@test transpose(NESTED_FDNUM) === NESTED_FDNUM +@test ctranspose(FDNUM) === FDNUM +@test ctranspose(NESTED_FDNUM) === NESTED_FDNUM + +@test abs(-FDNUM) === FDNUM +@test abs(FDNUM) === FDNUM +@test abs(-NESTED_FDNUM) === NESTED_FDNUM +@test abs(NESTED_FDNUM) === NESTED_FDNUM + +const UNSUPPORTED_NESTED_FUNCS = (:trigamma, :airyprime, :besselj1, :bessely1) +const DOMAIN_ERR_FUNCS = (:asec, :acsc, :asecd, :acscd, :acoth, :acosh) + +for fsym in ForwardDiff.AUTO_DEFINED_UNARY_FUNCS + try + v = :v + deriv = Calculus.differentiate(:($(fsym)($v)), v) + @eval begin + is_domain_err_func = $(fsym in DOMAIN_ERR_FUNCS) + is_nanmath_func = $(fsym in ForwardDiff.NANMATH_FUNCS) + is_unsupported_nested_func = $(fsym in UNSUPPORTED_NESTED_FUNCS) + + fdnum = is_domain_err_func ? FDNUM + 1 : FDNUM + $(v) = value(fdnum) + test_approx_diffnums($(fsym)(fdnum), DiffNumber($(fsym)($v), $(deriv) * partials(fdnum))) + if is_nanmath_func + test_approx_diffnums(NaNMath.$(fsym)(fdnum), DiffNumber(NaNMath.$(fsym)($v), $(deriv) * partials(fdnum))) + end + + if !(is_unsupported_nested_func) + nested_fdnum = is_domain_err_func ? NESTED_FDNUM + 1 : NESTED_FDNUM + $(v) = value(nested_fdnum) + test_approx_diffnums($(fsym)(nested_fdnum), DiffNumber($(fsym)($v), $(deriv) * partials(nested_fdnum))) + if is_nanmath_func + test_approx_diffnums(NaNMath.$(fsym)(nested_fdnum), DiffNumber(NaNMath.$(fsym)($v), $(deriv) * partials(nested_fdnum))) + end + end + end + catch err + warn("Encountered error when testing $(fsym)(::DiffNumber):") + throw(err) + end +end + +end # module diff --git a/test/GradientTest.jl b/test/GradientTest.jl new file mode 100644 index 00000000..24133756 --- /dev/null +++ b/test/GradientTest.jl @@ -0,0 +1,29 @@ +module GradientTest + +using Base.Test +using ForwardDiff +using ForwardDiff: default_value, KWARG_DEFAULTS + +######################## +# @gradient/@gradient! # +######################## + +const ALL_DEFAULT = :(Val{$(default_value(KWARG_DEFAULTS, :all))}) +const CHUNK_DEFAULT = :(Val{$(default_value(KWARG_DEFAULTS, :chunk))}) +const INPUT_LENGTH_DEFAULT = :(Val{$(default_value(KWARG_DEFAULTS, :input_length))}) +const MULTITHREAD_DEFAULT = :(Val{$(default_value(KWARG_DEFAULTS, :multithread))}) +const OUTPUT_MUTATES_DEFAULT = :(Val{$(default_value(KWARG_DEFAULTS, :output_mutates))}) + +@test macroexpand(:(ForwardDiff.@gradient(sin))) == :(ForwardDiff.gradient(sin, $ALL_DEFAULT, $CHUNK_DEFAULT, $INPUT_LENGTH_DEFAULT, $MULTITHREAD_DEFAULT, $OUTPUT_MUTATES_DEFAULT)) +@test macroexpand(:(ForwardDiff.@gradient(sin; output_mutates=1, all=2, multithread=3, chunk=4, input_length=5))) == :(ForwardDiff.gradient(sin, Val{2}, Val{4}, Val{5}, Val{3}, Val{1})) +@test macroexpand(:(ForwardDiff.@gradient(sin, chunk=1, output_mutates=2))) == :(ForwardDiff.gradient(sin, $ALL_DEFAULT, Val{1}, $INPUT_LENGTH_DEFAULT, $MULTITHREAD_DEFAULT, Val{2})) + +@test macroexpand(:(ForwardDiff.@gradient(sin, x))) == :(ForwardDiff.gradient(sin, x, $ALL_DEFAULT, $CHUNK_DEFAULT, $INPUT_LENGTH_DEFAULT, $MULTITHREAD_DEFAULT)) +@test macroexpand(:(ForwardDiff.@gradient(sin, x, input_length=1, all=2, multithread=3, chunk=4))) == :(ForwardDiff.gradient(sin, x, Val{2}, Val{4}, Val{1}, Val{3})) +@test macroexpand(:(ForwardDiff.@gradient(sin, x; chunk=1, multithread=2))) == :(ForwardDiff.gradient(sin, x, $ALL_DEFAULT, Val{1}, $INPUT_LENGTH_DEFAULT, Val{2})) + +@test macroexpand(:(ForwardDiff.@gradient!(sin, output, x))) == :(ForwardDiff.gradient!(sin, output, x, $ALL_DEFAULT, $CHUNK_DEFAULT, $INPUT_LENGTH_DEFAULT, $MULTITHREAD_DEFAULT)) +@test macroexpand(:(ForwardDiff.@gradient!(sin, output, x, input_length=1, all=2, multithread=3, chunk=4))) == :(ForwardDiff.gradient!(sin, output, x, Val{2}, Val{4}, Val{1}, Val{3})) +@test macroexpand(:(ForwardDiff.@gradient!(sin, output, x; chunk=1, multithread=2))) == :(ForwardDiff.gradient!(sin, output, x, $ALL_DEFAULT, Val{1}, $INPUT_LENGTH_DEFAULT, Val{2})) + +end # module diff --git a/test/PartialsTest.jl b/test/PartialsTest.jl new file mode 100644 index 00000000..3b402a09 --- /dev/null +++ b/test/PartialsTest.jl @@ -0,0 +1,112 @@ +module PartialsTest + +using Base.Test +using ForwardDiff +using ForwardDiff: Partials + +const N = 3 +const T = Float32 + +const VALUES = ntuple(n -> rand(T), Val{N}) +const PARTIALS = Partials(VALUES) + +const VALUES2 = ntuple(n -> rand(T), Val{N}) +const PARTIALS2 = Partials(VALUES2) + +samerng() = MersenneTwister(1) + +############################## +# Utility/Accessor Functions # +############################## + +@test PARTIALS.values == VALUES + +@test ForwardDiff.numtype(PARTIALS) == T +@test ForwardDiff.numtype(typeof(PARTIALS)) == T +@test ForwardDiff.numtype(typeof(VALUES)) == T +@test ForwardDiff.numtype(VALUES) == T + +@test ForwardDiff.npartials(PARTIALS) == N +@test ForwardDiff.npartials(typeof(PARTIALS)) == N + +@test length(PARTIALS) == N +@test length(VALUES) == N + +for i in 1:N + @test PARTIALS[i] == VALUES[i] +end + +@test start(PARTIALS) == start(VALUES) +@test next(PARTIALS, start(PARTIALS)) == next(VALUES, start(VALUES)) +@test done(PARTIALS, start(PARTIALS)) == done(VALUES, start(VALUES)) + +i = 1 +for p in PARTIALS + @test p == VALUES[i] + i += 1 +end + +##################### +# Generic Functions # +##################### + +@test zero(PARTIALS) == zero(typeof(PARTIALS)) +@test zero(PARTIALS).values == map(zero, VALUES) + +@test rand(samerng(), PARTIALS) == rand(samerng(), typeof(PARTIALS)) + +@test !(ForwardDiff.iszero(PARTIALS)) +@test ForwardDiff.iszero(zero(PARTIALS)) + +@test PARTIALS == copy(PARTIALS) +@test PARTIALS != PARTIALS2 +@test isequal(PARTIALS, copy(PARTIALS)) +@test !(isequal(PARTIALS, PARTIALS2)) + +@test hash(PARTIALS) == hash(VALUES, ForwardDiff.PARTIALS_HASH) +@test hash(PARTIALS) == hash(copy(PARTIALS)) +@test hash(PARTIALS, hash(1)) == hash(copy(PARTIALS), hash(1)) +@test hash(PARTIALS, hash(1)) == hash(copy(PARTIALS), hash(1)) + +const TMPIO = IOBuffer() +write(TMPIO, PARTIALS) +seekstart(TMPIO) +@test read(TMPIO, typeof(PARTIALS)) == PARTIALS +seekstart(TMPIO) +write(TMPIO, PARTIALS2) +seekstart(TMPIO) +@test read(TMPIO, typeof(PARTIALS2)) == PARTIALS2 +close(TMPIO) + +######################## +# Conversion/Promotion # +######################## + +const WIDE_T = widen(T) +const WIDE_PARTIALS = convert(Partials{N,WIDE_T}, PARTIALS) + +@test typeof(WIDE_PARTIALS) == Partials{N,WIDE_T} +@test WIDE_PARTIALS == PARTIALS +@test convert(Partials{N,T}, PARTIALS) === PARTIALS +@test promote_type(Partials{N,T}, Partials{N,T}) == Partials{N,T} +@test promote_type(Partials{N,T}, Partials{N,WIDE_T}) == Partials{N,WIDE_T} + +######################## +# Arithmetic Functions # +######################## + +@test (PARTIALS + PARTIALS).values == map(v -> v + v, VALUES) +@test (PARTIALS - PARTIALS).values == map(v -> v - v, VALUES) +@test getfield(-(PARTIALS), :values) == map(-, VALUES) + +const X = rand() +const Y = rand() + +@test X * PARTIALS == PARTIALS * X +@test (X * PARTIALS).values == map(v -> X * v, VALUES) +@test (PARTIALS / X).values == map(v -> v / X, VALUES) + +@test ForwardDiff._mul_partials(PARTIALS, PARTIALS2, X, Y).values == map((a, b) -> (X * a) + (Y * b), VALUES, VALUES2) +@test ForwardDiff._div_partials(PARTIALS, PARTIALS2, X, Y) == ForwardDiff._mul_partials(PARTIALS, PARTIALS2, inv(Y), -X/(Y^2)) + +end # module diff --git a/test/runtests.jl b/test/runtests.jl index 931d3deb..b42a1945 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,36 +1,16 @@ using ForwardDiff -print("Testing GradientNumbers and ForwardDiff.gradient...") +print("Testing Partials...") tic() -include("test_gradients.jl") -println("Done (took $(toq()) seconds).") +include("PartialsTest.jl") +println("done (took $(toq()) seconds).") -print("Testing HessianNumbers and ForwardDiff.hessian...") +print("Testing DiffNumber...") tic() -include("test_hessians.jl") -println("Done (took $(toq()) seconds).") +include("DiffNumberTest.jl") +println("done (took $(toq()) seconds).") -print("Testing TensorNumbers and ForwardDiff.tensor...") +print("Testing Gradient-related functionality...") tic() -include("test_tensors.jl") -println("Done (took $(toq()) seconds).") - -print("Testing ForwardDiff.derivative...") -tic() -include("test_derivatives.jl") -println("Done (took $(toq()) seconds).") - -print("Testing ForwardDiff.jacobian...") -tic() -include("test_jacobians.jl") -println("Done (took $(toq()) seconds).") - -println("Testing deprecation wrapper (deprecation warnings are expected)...") -tic() -include("test_deprecated.jl") -println("Done (took $(toq()) seconds).") - -println("Testing behavioral examples...") -tic() -include("test_behaviors.jl") -println("Done (took $(toq()) seconds).") +include("GradientTest.jl") +println("done (took $(toq()) seconds).") diff --git a/test/test_behaviors.jl b/test/test_behaviors.jl deleted file mode 100644 index 53ef2bc5..00000000 --- a/test/test_behaviors.jl +++ /dev/null @@ -1,109 +0,0 @@ -using Base.Test -using ForwardDiff - -########################## -# Nested Differentiation # -########################## - -# README example # -#----------------# -x = rand(5) - -f = x -> sum(sin, x) + prod(tan, x) * sum(sqrt, x) -g = ForwardDiff.gradient(f) -j = ForwardDiff.jacobian(g) - -@test_approx_eq ForwardDiff.hessian(f, x) j(x) - -# Issue #59 example # -#-------------------# -x = rand(2) - -f = x -> sin(x)/3 * cos(x)/2 -df = ForwardDiff.derivative(f) -testdf = x -> (((cos(x)^2)/3) - (sin(x)^2)/3) / 2 -f2 = x -> df(x[1]) * f(x[2]) -testf2 = x -> testdf(x[1]) * f(x[2]) - -@test_approx_eq ForwardDiff.gradient(f2, x) ForwardDiff.gradient(testf2, x) - -# Mixing chunk mode and vector mode # -#-----------------------------------# -x = rand(2*ForwardDiff.tuple_usage_threshold) # big enough to trigger vector mode - -f = x -> sum(sin, x) + prod(tan, x) * sum(sqrt, x) -g = ForwardDiff.gradient(f) # gradient in vector mode -j = x -> ForwardDiff.jacobian(g, x, chunk_size=2)/2 # jacobian in chunk_mode - -@test_approx_eq ForwardDiff.hessian(f, x) 2*j(x) - -##################### -# Conversion Issues # -##################### - -# Target function returns a literal (Issue #71) # -#-----------------------------------------------# - -@test ForwardDiff.derivative(x->zero(x), rand()) == ForwardDiff.derivative(x->1.0, rand()) -@test ForwardDiff.gradient(x->zero(x[1]), [rand()]) == ForwardDiff.gradient(x->1.0, [rand()]) -@test ForwardDiff.hessian(x->zero(x[1]), [rand()]) == ForwardDiff.hessian(x->1.0, [rand()]) -@test ForwardDiff.jacobian(x->[zero(x[1])], [rand()]) == ForwardDiff.jacobian(x->[1.0], [rand()]) - -####################### -# Promote type Issues # -####################### - -# Test overloading of `promote_array_type` # -#------------------------------------------# - -promtyp = Base.promote_array_type(Base.DotAddFun(), - ForwardDiff.ForwardDiffNumber{2, Float64, - Tuple{Float64, Float64}}, Float64) -fdiffnum = ForwardDiff.ForwardDiffNumber{2,Float64,Tuple{Float64,Float64}} -@test promtyp <: fdiffnum - - -promtyp = Base.promote_array_type(Base.DotAddFun(), - ForwardDiff.GradientNumber{2, Float64, - Tuple{Float64, Float64}}, Float64) -gradnum = ForwardDiff.GradientNumber{2,Float64,Tuple{Float64,Float64}} -@test promtyp <: gradnum - -promtyp = Base.promote_array_type(Base.DotAddFun(), - ForwardDiff.HessianNumber{2, Float64, - Tuple{Float64, Float64}}, Float64) -hessnum = ForwardDiff.HessianNumber{2,Float64,Tuple{Float64,Float64}} -@test promtyp <: hessnum - -promtyp = Base.promote_array_type(Base.DotAddFun(), - ForwardDiff.TensorNumber{2, Float64, - Tuple{Float64, Float64}}, Float64) -tensnum = ForwardDiff.TensorNumber{2,Float64,Tuple{Float64,Float64}} -@test promtyp <: tensnum - - -# Arithmetic element-wise functions # -#-----------------------------------# - -N = 4 -a = ones(N) -jac0 = reshape(vcat([[zeros(N*(i-1)); a; zeros(N^2-N*i)] for i = 1:N]...), N^2, N) - -for op in (-, +, .-, .+, ./, .*) - - f = x -> [op(x[1], a); op(x[2], a); op(x[3], a); op(x[4], a)] - - # jacobian - jac = ForwardDiff.jacobian(f, a) - @test reduce(&, -jac + jac0 .== 0) - - f = x -> sum([op(x[1], a); op(x[2], a); op(x[3], a); op(x[4], a)]) - - # hessian - hess = ForwardDiff.hessian(f, a) - @test reduce(&, -hess + zeros(N, N) .== 0) - - # tensor - tens = ForwardDiff.tensor(f, a) - @test reduce(&, -tens + zeros(N, N, N) .== 0) -end diff --git a/test/test_deprecated.jl b/test/test_deprecated.jl deleted file mode 100644 index e42b2ecd..00000000 --- a/test/test_deprecated.jl +++ /dev/null @@ -1,80 +0,0 @@ -T = Float64 -dummy_fsym = :sin -testexpr = :(sin(a) + exp(b) - tan(c) * cos(d)) - -testf = @eval (x::Vector) -> begin - a,b,c,d = x - return $testexpr -end - -############# -# Gradients # -############# -N = 4 -testx = grad_test_x(dummy_fsym, N) -testout = Array(T, N) -testresult = grad_test_result(testexpr, testx) - -gradf! = forwarddiff_gradient!(testf, T) -gradf!(testx, testout) -@test_approx_eq testout testresult - -gradf = forwarddiff_gradient(testf, T) -@test_approx_eq gradf(testx) testresult - -############# -# Jacobians # -############# -N,M = 4,5 -testx = jacob_test_x(dummy_fsym, N) -testout = Array(T, M, N) -testexpr_jac = [:(sin(a) + cos(b)), :(-tan(c)), :(4 * exp(d)), :(cos(b)^5), :(sin(a))] -testresult = jacob_test_result(testexpr_jac, testx) - -jactestf = @eval (x::Vector) -> begin - a,b,c,d = x - return [$(testexpr_jac...)] -end - -jacf! = forwarddiff_jacobian!(jactestf, T) -jacf!(testx, testout) -@test_approx_eq testout testresult - -jacf = forwarddiff_jacobian(jactestf, T) -@test_approx_eq jacf(testx) testresult - -############ -# Hessians # -############ -N = 6 -testx = hess_test_x(dummy_fsym, N) -testout = Array(T, N, N) -testexpr_hess = :(sin(a) + exp(b) - tan(c) * cos(l) + sin(m) * exp(r)) -testresult = hess_test_result(testexpr_hess, testx) - -hess_testf = @eval (x::Vector) -> begin - a,b,c,l,m,r = x - return $testexpr_hess -end - -hessf! = forwarddiff_hessian!(hess_testf, T) -hessf!(testx, testout) -@test_approx_eq testout testresult - -hessf = forwarddiff_hessian(hess_testf, T) -@test_approx_eq hessf(testx) testresult - -########### -# Tensors # -########### -N = 4 -testx = tens_test_x(dummy_fsym, N) -testout = Array(T, N, N, N) -testresult = tens_test_result(testexpr, testx) - -tensf! = forwarddiff_tensor!(testf, T) -tensf!(testx, testout) -@test_approx_eq testout testresult - -tensf = forwarddiff_tensor(testf, T) -@test_approx_eq tensf(testx) testresult diff --git a/test/test_derivatives.jl b/test/test_derivatives.jl deleted file mode 100644 index 8cdf0fc2..00000000 --- a/test/test_derivatives.jl +++ /dev/null @@ -1,93 +0,0 @@ -using Base.Test -using ForwardDiff -using Calculus - -########################### -# Test taking derivatives # -########################### -N = 4 -M = 2N -L = 3N - -testout = Array(Float64, N, M, L) - -function deriv_test_x(fsym) - randrange = 0.01:.01:.99 - needs_rand_mod = tuple(acosh, acoth, asec, acsc, asecd, acscd) - - if fsym in needs_rand_mod - randrange += 1 - end - - return rand(randrange) -end - -function test_approx_deriv(a::Array, b::Array) - @assert length(a) == length(b) - for i in eachindex(a) - @test_approx_eq a[i] b[i] - end -end - -for fsym in ForwardDiff.auto_defined_unary_funcs - func_expr = :($(fsym)(x) + 4^$(fsym)(x) - x * $(fsym)(x)) - deriv = Calculus.differentiate(func_expr) - try - @eval begin - x = deriv_test_x($fsym) - testdf = x -> $func_expr - val_result = testdf(x) - deriv_result = $deriv - - test_all_results = (testout, results) -> begin - @test_approx_eq ForwardDiff.value(results) val_result - @test_approx_eq ForwardDiff.derivative(results) deriv_result - @test_approx_eq testout deriv_result - end - - @test_approx_eq deriv_result ForwardDiff.derivative(testdf, x) - testout, result = ForwardDiff.derivative(testdf, x, AllResults) - test_all_results(testout, result) - - @test_approx_eq deriv_result ForwardDiff.derivative(testdf)(x) - testout2, result2 = ForwardDiff.derivative(testdf, AllResults)(x) - test_all_results(testout2, result2) - - testdf_arr = t -> [testdf(t) for i in 1:N, j in 1:M, k in 1:L] - val_result_arr = [val_result for i in 1:N, j in 1:M, k in 1:L] - deriv_result_arr = [deriv_result for i in 1:N, j in 1:M, k in 1:L] - - test_all_results = (testout, results) -> begin - test_approx_deriv(ForwardDiff.value(results), val_result_arr) - test_approx_deriv(ForwardDiff.derivative(results), deriv_result_arr) - test_approx_deriv(testout, deriv_result_arr) - end - - test_approx_deriv(deriv_result_arr, ForwardDiff.derivative(testdf_arr, x)) - testout, result = ForwardDiff.derivative(testdf_arr, x, AllResults) - test_all_results(testout, result) - - test_approx_deriv(deriv_result_arr, ForwardDiff.derivative(testdf_arr)(x)) - testout2, result2 = ForwardDiff.derivative(testdf_arr, AllResults)(x) - test_all_results(testout2, result2) - - testout = similar(testout) - ForwardDiff.derivative!(testout, testdf_arr, x) - test_approx_deriv(deriv_result_arr, testout) - - testout = similar(testout) - results = ForwardDiff.derivative!(testout, testdf_arr, x, AllResults) - test_all_results(testout, results[2]) - - testout = similar(testout) - ForwardDiff.derivative(testdf_arr; mutates=true)(testout, x) - test_approx_deriv(deriv_result_arr, testout) - - testout = similar(testout) - results2 = ForwardDiff.derivative(testdf_arr, AllResults; mutates=true)(testout, x) - test_all_results(testout, results[2]) - end - catch err - error("Failure when testing derivative of $fsym: $err") - end -end diff --git a/test/test_gradients.jl b/test/test_gradients.jl deleted file mode 100644 index 5f238f03..00000000 --- a/test/test_gradients.jl +++ /dev/null @@ -1,338 +0,0 @@ -using Base.Test -using Calculus -using ForwardDiff -using ForwardDiff: - GradientNumber, - value, - grad, - npartials, - isconstant - -floatrange = 0.0:.01:.99 -intrange = 0:10 -N = 3 -T = Float64 - -test_val = rand(floatrange) -test_partialstup = tuple(rand(floatrange, N)...) -test_partialsvec = collect(test_partialstup) - -for (test_partials, Grad) in ((test_partialstup, ForwardDiff.GradNumTup), (test_partialsvec, ForwardDiff.GradNumVec)) - test_grad = Grad{N,T}(test_val, test_partials) - - ###################### - # Accessor Functions # - ###################### - @test value(test_grad) == test_val - @test grad(test_grad) == test_partials - - for i in 1:N - @test grad(test_grad, i) == test_partials[i] - end - - @test npartials(test_grad) == npartials(typeof(test_grad)) == N - - ################################## - # Value Representation Functions # - ################################## - @test eps(test_grad) == eps(test_val) - @test eps(typeof(test_grad)) == eps(T) - - grad_zero = Grad{N,T}(zero(test_val), map(zero, test_partials)) - grad_one = Grad{N,T}(one(test_val), map(zero, test_partials)) - - @test zero(test_grad) == grad_zero - @test zero(typeof(test_grad)) == grad_zero - - @test one(test_grad) == grad_one - @test one(typeof(test_grad)) == grad_one - - ######################################### - # Conversion/Promotion/Hashing/Equality # - ######################################### - int_val = round(Int, test_val) - int_partials = map(x -> round(Int, x), test_partials) - float_val = float(int_val) - float_partials = map(float, int_partials) - - int_grad = Grad{N,T}(int_val, int_partials) - float_grad = Grad{N,T}(float_val, float_partials) - const_grad = GradientNumber(float_val) - - @test convert(typeof(test_grad), test_grad) == test_grad - @test convert(GradientNumber, test_grad) == test_grad - @test convert(Grad{N,T}, int_grad) == float_grad - @test convert(Grad{3,T}, 1) == Grad{3,T}(1.0) - @test convert(T, Grad{2,T}(1)) == 1.0 - - @test float(int_grad) == float_grad - - @test promote_type(Grad{N,Int}, Grad{N,Int}) == Grad{N,Int} - @test promote_type(Grad{N,Float64}, Grad{N,Int}) == Grad{N,Float64} - @test promote_type(Grad{N,Int}, Float64) == Grad{N,Float64} - @test promote_type(Grad{N,Float64}, Int) == Grad{N,Float64} - - @test hash(int_grad) == hash(float_grad) - @test hash(const_grad) == hash(float_val) - - @test int_grad == float_grad - @test float_val == const_grad - @test const_grad == float_val - - @test isequal(int_grad, float_grad) - @test isequal(float_val, const_grad) - @test isequal(const_grad, float_val) - - @test copy(test_grad) == test_grad - - #################### - # is____ Functions # - #################### - @test isnan(test_grad) == isnan(test_val) - @test isnan(Grad{3,T}(NaN)) - - not_const_grad = Grad{N,T}(one(T), map(one, test_partials)) - @test !(isconstant(not_const_grad)) - @test !(isreal(not_const_grad)) - @test isconstant(const_grad) && isreal(const_grad) - @test isconstant(zero(not_const_grad)) && isreal(zero(not_const_grad)) - - inf_grad = Grad{N,T}(Inf) - @test isfinite(test_grad) && isfinite(test_val) - @test !(isfinite(inf_grad)) - - @test isinf(inf_grad) - @test !(isinf(test_grad)) - - @test isless(test_grad-1, test_grad) - @test test_grad-1 < test_grad - @test !(test_grad < test_val) - @test test_grad-1 <= test_grad - @test test_grad <= test_val - @test test_grad > test_grad-1 - @test test_grad >= test_grad-1 - - @test isless(test_val-1, test_grad) - @test test_val-1 < test_grad - @test test_grad > test_val-1 - - @test isless(test_grad, test_val+1) - @test test_grad < test_val+1 - @test test_val+1 > test_grad - - @test floor(Int, test_grad) == floor(Int, test_val) - @test ceil(Int, test_grad) == ceil(Int, test_val) - @test trunc(Int, test_grad) == trunc(Int, test_val) - @test round(Int, test_grad) == round(Int, test_val) - - ####### - # I/O # - ####### - io = IOBuffer() - write(io, test_grad) - seekstart(io) - - @test read(io, typeof(test_grad)) == test_grad - - close(io) - - ##################################### - # Arithmetic/Mathematical Functions # - ##################################### - rand_val = rand(T) - rand_partials = map(x -> rand(T), test_partials) - rand_grad = Grad{N,T}(rand_val, rand_partials) - - # Addition/Subtraction # - #----------------------# - @test rand_grad + test_grad == Grad{N,T}(rand_val+test_val, map(+, rand_partials, test_partials)) - @test rand_grad + test_grad == test_grad + rand_grad - @test rand_grad - test_grad == Grad{N,T}(rand_val-test_val, map(-, rand_partials, test_partials)) - - @test rand_val + test_grad == Grad{N,T}(rand_val+test_val, test_partials) - @test rand_val + test_grad == test_grad + rand_val - @test rand_val - test_grad == Grad{N,T}(rand_val-test_val, map(-, test_partials)) - @test test_grad - rand_val == Grad{N,T}(test_val-rand_val, test_partials) - - @test -test_grad == Grad{N,T}(-test_val, map(-, test_partials)) - - # Multiplication # - #----------------# - rand_x_test = rand_grad * test_grad - - @test value(rand_x_test) == rand_val * test_val - - for i in 1:N - @test grad(rand_x_test, i) == (rand_partials[i] * test_val) + (rand_val * test_partials[i]) - end - - @test rand_val * test_grad == Grad{N,T}(rand_val*test_val, map(x -> rand_val*x, test_partials)) - @test test_grad * rand_val == rand_val * test_grad - - @test test_grad * false == zero(test_grad) - @test test_grad * true == test_grad - @test true * test_grad == test_grad * true - @test false * test_grad == test_grad * false - - # Division # - #----------# - function grad_approx_eq(a::GradientNumber, b::GradientNumber) - @test_approx_eq value(a) value(b) - @test_approx_eq collect(grad(a)) collect(grad(b)) - end - - grad_approx_eq(rand_grad / test_grad, rand_grad * inv(test_grad)) - grad_approx_eq(rand_val / test_grad, rand_val * inv(test_grad)) - - @test test_grad / rand_val == Grad{N,T}(test_val/rand_val, map(x -> x/rand_val, test_partials)) - - # Exponentiation # - #----------------# - grad_approx_eq(test_grad^rand_grad, exp(rand_grad * log(test_grad))) - grad_approx_eq(test_grad^rand_val, exp(rand_val * log(test_grad))) - grad_approx_eq(rand_val^test_grad, exp(test_grad * log(rand_val))) - - # Unary functions # - #-----------------# - for (fsym, expr) in Calculus.symbolic_derivatives_1arg() - @eval begin - func = $fsym - - local orig_grad, f_grad - - try - orig_grad = $test_grad - f_grad = func(orig_grad) - catch DomainError - # some of the provided functions - # have a domain x > 1, so we simply - # add 1 to our test GradientNumber if - # a DomainError is thrown - orig_grad = $test_grad + 1 - f_grad = func(orig_grad) - end - - x = value(orig_grad) - df = $expr - - @test_approx_eq value(f_grad) func(x) - - for i in 1:N - try - @test_approx_eq grad(f_grad, i) df*grad(orig_grad, i) - catch exception - info("The exception was thrown while testing function $func at value $orig_grad") - throw(exception) - end - end - end - end - - # Special Cases # - #---------------# - @test abs(test_grad) == test_grad - @test abs(-test_grad) == test_grad - @test abs2(test_grad) == test_grad*test_grad - - @test conj(test_grad) == test_grad - @test transpose(test_grad) == test_grad - @test ctranspose(test_grad) == test_grad - - atan2_grad = atan2(test_grad, rand_grad) - - @test value(atan2_grad) == atan2(test_val, rand_val) - @test grad(atan2_grad) == grad(atan(test_grad/rand_grad)) -end - -##################### -# API Usage Testing # -##################### -N = 4 -testout = Array(Float64, N) - -function grad_deriv_i(f_expr, x::Vector, i) - var_syms = [:a, :b, :c, :d] - diff_expr = differentiate(f_expr, var_syms[i]) - @eval begin - a,b,c,d = $x - return $diff_expr - end -end - -function grad_test_result(f_expr, x::Vector) - return [grad_deriv_i(f_expr, x, i) for i in 1:N] -end - -function grad_test_x(fsym, N) - randrange = 0.01:.01:.99 - - needs_modification = (:asec, :acsc, :asecd, :acscd, :acosh, :acoth) - if fsym in needs_modification - randrange += 1 - end - - return rand(randrange, N) -end - -chunk_sizes = (ForwardDiff.default_chunk_size, 1, Int(N/2), N) - -for fsym in map(first, Calculus.symbolic_derivatives_1arg()) - testexpr = :($(fsym)(a) + $(fsym)(b) - $(fsym)(c) * $(fsym)(d)) - - testf = @eval (x::Vector) -> begin - a,b,c,d = x - return $testexpr - end - - for chunk in chunk_sizes - try - testx = grad_test_x(fsym, N) - val_result = testf(testx) - grad_result = grad_test_result(testexpr, testx) - - # Non-AllResults - test_grad = (testout) -> @test_approx_eq testout grad_result - - ForwardDiff.gradient!(testout, testf, testx; chunk_size=chunk) - test_grad(testout) - - test_grad(ForwardDiff.gradient(testf, testx; chunk_size=chunk)) - - gradf! = ForwardDiff.gradient(testf; mutates=true, chunk_size=chunk) - testout = similar(testout) - gradf!(testout, testx) - test_grad(testout) - - gradf = ForwardDiff.gradient(testf; mutates=false, chunk_size=chunk) - test_grad(gradf(testx)) - - # AllResults - test_all_results = (testout, results) -> begin - @test_approx_eq ForwardDiff.value(results) val_result - test_grad(ForwardDiff.gradient(results)) - test_grad(testout) - end - - testout = similar(testout) - results = ForwardDiff.gradient!(testout, testf, testx, AllResults; chunk_size=chunk) - test_all_results(testout, results[2]) - - testout = similar(testout) - testout, results2 = ForwardDiff.gradient(testf, testx, AllResults; chunk_size=chunk) - test_all_results(testout, results2) - - gradf! = ForwardDiff.gradient(testf, AllResults; mutates=true, chunk_size=chunk) - testout = similar(testout) - results3 = gradf!(testout, testx) - test_all_results(testout, results3[2]) - - gradf = ForwardDiff.gradient(testf, AllResults; mutates=false, chunk_size=chunk) - testout = similar(testout) - testout, results4 = gradf(testx) - test_all_results(testout, results4) - catch err - warn("Failure when testing gradients involving $fsym with chunk_size=$chunk:") - throw(err) - end - end -end diff --git a/test/test_hessians.jl b/test/test_hessians.jl deleted file mode 100644 index 6ede9266..00000000 --- a/test/test_hessians.jl +++ /dev/null @@ -1,329 +0,0 @@ -using Base.Test -using Calculus -using ForwardDiff -using ForwardDiff: - GradientNumber, - HessianNumber, - value, - grad, - hess, - npartials, - isconstant, - gradnum - -floatrange = .01:.01:.99 -intrange = 1:10 -N = 4 -T = Float64 -C = NTuple{N,T} -hessveclen = ForwardDiff.halfhesslen(N) - -test_val = rand(floatrange) -test_partials = tuple(rand(floatrange, N)...) -test_hessvec = rand(floatrange, hessveclen) -test_grad = GradientNumber(test_val, test_partials) -test_hess = HessianNumber(test_grad, test_hessvec) - -###################### -# Accessor Functions # -###################### -@test value(test_hess) == test_val -@test grad(test_hess) == test_partials -@test hess(test_hess) == test_hessvec - -for i in 1:N - @test grad(test_hess, i) == test_partials[i] -end - -for i in 1:hessveclen - @test hess(test_hess, i) == test_hessvec[i] -end - -@test npartials(test_hess) == npartials(typeof(test_hess)) == N - -################################## -# Value Representation Functions # -################################## -@test eps(test_hess) == eps(test_val) -@test eps(typeof(test_hess)) == eps(T) - -hess_zero = HessianNumber(zero(test_grad), map(zero, test_hessvec)) -hess_one = HessianNumber(one(test_grad), map(zero, test_hessvec)) - -@test zero(test_hess) == hess_zero -@test zero(typeof(test_hess)) == hess_zero - -@test one(test_hess) == hess_one -@test one(typeof(test_hess)) == hess_one - -######################################### -# Conversion/Promotion/Hashing/Equality # -######################################### -int_val = round(Int, test_val) -int_partials = map(x -> round(Int, x), test_partials) -int_hessvec = map(x -> round(Int, x), test_hessvec) - -float_val = float(int_val) -float_partials = map(float, int_partials) -float_hessvec= map(float, int_hessvec) - -int_hess = HessianNumber(GradientNumber(int_val, int_partials), int_hessvec) -float_hess = HessianNumber(GradientNumber(float_val, float_partials), float_hessvec) -const_hess = HessianNumber{N,T,C}(float_val) - -@test convert(typeof(test_hess), test_hess) == test_hess -@test convert(HessianNumber, test_hess) == test_hess -@test convert(HessianNumber{N,T,C}, int_hess) == float_hess -@test convert(HessianNumber{3,T,NTuple{3,T}}, 1) == HessianNumber{3,T,NTuple{3,T}}(1.0) -@test convert(T, HessianNumber(GradientNumber(1, tuple(0, 0)))) == 1.0 - -@test float(int_hess) == float_hess - -IntHess = HessianNumber{N,Int,NTuple{N,Int}} -FloatHess = HessianNumber{N,Float64,NTuple{N,Float64}} - -@test promote_type(IntHess, IntHess) == IntHess -@test promote_type(FloatHess, IntHess) == FloatHess -@test promote_type(IntHess, Float64) == FloatHess -@test promote_type(FloatHess, Int) == FloatHess - -@test hash(int_hess) == hash(float_hess) -@test hash(const_hess) == hash(float_val) - -@test int_hess == float_hess -@test float_val == const_hess -@test const_hess == float_val - -@test isequal(int_hess, float_hess) -@test isequal(float_val, const_hess) -@test isequal(const_hess, float_val) - -@test copy(test_hess) == test_hess - -#################### -# is____ Functions # -#################### -@test isnan(test_hess) == isnan(test_val) -@test isnan(HessianNumber{N,T,C}(NaN)) - -not_const_hess = HessianNumber(GradientNumber(one(T), map(one, test_partials))) -@test !(isconstant(not_const_hess)) -@test !(isreal(not_const_hess)) -@test isconstant(const_hess) && isreal(const_hess) -@test isconstant(zero(not_const_hess)) && isreal(zero(not_const_hess)) - -inf_hess = HessianNumber{N,T,C}(Inf) -@test isfinite(test_hess) == isfinite(test_val) -@test !isfinite(inf_hess) - -@test isinf(inf_hess) -@test !(isinf(test_hess)) - -@test isless(test_hess-1, test_hess) -@test test_hess-1 < test_hess -@test test_hess > test_hess-1 - -@test isless(test_val-1, test_hess) -@test test_val-1 < test_hess -@test test_hess > test_val-1 - -@test test_hess-1 <= test_hess -@test test_hess <= test_val - -@test isless(test_hess, test_val+1) -@test test_hess < test_val+1 -@test test_val+1 > test_hess - -@test test_hess+1 >= test_hess -@test test_hess+1 >= test_val - -@test floor(Int, test_hess) == floor(Int, test_val) -@test ceil(Int, test_hess) == ceil(Int, test_val) -@test trunc(Int, test_hess) == trunc(Int, test_val) -@test round(Int, test_hess) == round(Int, test_val) - -####### -# I/O # -####### -io = IOBuffer() -write(io, test_hess) -seekstart(io) - -@test read(io, typeof(test_hess)) == test_hess - -close(io) - -############## -# Math tests # -############## -rand_val = rand(floatrange) -rand_partials = map(x -> rand(floatrange), test_partials) -rand_hessvec = map(x -> rand(floatrange), test_hessvec) -rand_grad = GradientNumber(rand_val, rand_partials) -rand_hess = HessianNumber(rand_grad, rand_hessvec) - -# Addition/Subtraction # -#----------------------# -@test rand_hess + test_hess == HessianNumber(rand_grad + test_grad, rand_hessvec + test_hessvec) -@test rand_hess + test_hess == test_hess + rand_hess -@test rand_hess - test_hess == HessianNumber(rand_grad - test_grad, rand_hessvec - test_hessvec) - -@test rand_val + test_hess == HessianNumber(rand_val + test_grad, test_hessvec) -@test rand_val + test_hess == test_hess + rand_val -@test rand_val - test_hess == HessianNumber(rand_val - test_grad, -test_hessvec) -@test test_hess - rand_val == HessianNumber(test_grad - rand_val, test_hessvec) - -@test -test_hess == HessianNumber(-test_grad, -test_hessvec) - -# Multiplication/Division # -#-------------------------# -function hess_approx_eq(a::HessianNumber, b::HessianNumber) - eps = 1e-9 - try - @test_approx_eq_eps value(a) value(b) eps - @test_approx_eq_eps collect(grad(a)) collect(grad(b)) eps - @test_approx_eq_eps hess(a) hess(b) eps - catch err - error("Failure: HessianNumber a and HessianNumber b should be equal.\n Error: $err\n rand_hess = $rand_hess\n test_hess = $test_hess") - end -end - -@test gradnum(rand_hess * test_hess) == rand_grad * test_grad - -@test rand_val * test_hess == HessianNumber(rand_val * test_grad, rand_val * test_hessvec) -@test test_hess * rand_val == rand_val * test_hess - -@test test_hess * true == test_hess -@test true * test_hess == test_hess * true -@test test_hess * false == zero(test_hess) -@test false * test_hess == test_hess * false - -hess_approx_eq(rand_hess, rand_hess * (test_hess/test_hess)) -hess_approx_eq(rand_hess, test_hess * (rand_hess/test_hess)) -hess_approx_eq(rand_hess, (rand_hess * test_hess)/test_hess) -hess_approx_eq(test_hess, (rand_hess * test_hess)/rand_hess) - -hess_approx_eq(2 * test_hess, test_hess + test_hess) -hess_approx_eq(test_hess * inv(test_hess), one(test_hess)) - -hess_approx_eq(rand_val / test_hess, rand_val * inv(test_hess)) -hess_approx_eq(rand_val / test_hess, rand_val * 1/test_hess) -hess_approx_eq(rand_hess / test_hess, rand_hess * inv(test_hess)) -hess_approx_eq(rand_hess / test_hess, rand_hess * 1/test_hess) -hess_approx_eq(test_hess / test_hess, one(test_hess)) - -@test test_hess / rand_val == HessianNumber(test_grad / rand_val, test_hessvec / rand_val) - -# Exponentiation # -#----------------# -hess_approx_eq(test_hess^rand_hess, exp(rand_hess * log(test_hess))) -hess_approx_eq(test_hess^rand_val, exp(rand_val * log(test_hess))) -hess_approx_eq(rand_val^test_hess, exp(test_hess * log(rand_val))) - -# Special Cases # -#---------------# -@test abs(test_hess) == test_hess -@test abs(-test_hess) == test_hess -hess_approx_eq(abs2(test_hess), test_hess*test_hess) - -atan2_hess = atan2(test_hess, rand_hess) -atanyx_hess = atan(test_hess/rand_hess) - -@test value(atan2_hess) == atan2(test_val, rand_val) -@test_approx_eq collect(grad(atan2_hess)) collect(grad(atanyx_hess)) -@test_approx_eq hess(atan2_hess) hess(atanyx_hess) - -# Unary functions/API usage testing # -#-----------------------------------# -N = 6 -testout = Array(Float64, N, N) - -function hess_deriv_ij(f_expr, x::Vector, i, j) - var_syms = [:a, :b, :c, :l, :m, :r] - diff_expr = differentiate(f_expr, var_syms[j]) - diff_expr = differentiate(diff_expr, var_syms[i]) - @eval begin - a,b,c,l,m,r = $x - return $diff_expr - end -end - -function hess_test_result(f_expr, x::Vector) - return [hess_deriv_ij(f_expr, x, i, j) for i in 1:N, j in 1:N] -end - -function hess_test_x(fsym, N) - randrange = .01:.01:.99 - - needs_modification = (:acosh, :acoth) - if fsym in needs_modification - randrange += 1 - end - - return rand(randrange, N) -end - -chunk_sizes = (ForwardDiff.default_chunk_size, 2, Int(N/2), N) - -for fsym in ForwardDiff.auto_defined_unary_hess_funcs - testexpr = :($(fsym)(a) + $(fsym)(b) - $(fsym)(c) * $(fsym)(l) - $(fsym)(m) + $(fsym)(r)) - - testf = @eval (x::Vector) -> begin - a,b,c,l,m,r = x - return $testexpr - end - - for chunk in chunk_sizes - try - testx = hess_test_x(fsym, N) - val_result = testf(testx) - grad_result = ForwardDiff.gradient(testf, testx) - hess_result = hess_test_result(testexpr, testx) - - # Non-AllResults - test_hess = (testout) -> @test_approx_eq testout hess_result - - ForwardDiff.hessian!(testout, testf, testx; chunk_size=chunk) - test_hess(testout) - - test_hess(ForwardDiff.hessian(testf, testx; chunk_size=chunk)) - - hessf! = ForwardDiff.hessian(testf; mutates=true, chunk_size=chunk) - testout = similar(testout) - hessf!(testout, testx) - test_hess(testout) - - hessf = ForwardDiff.hessian(testf; mutates=false, chunk_size=chunk) - test_hess(hessf(testx)) - - # AllResults - test_all_results = (testout, results) -> begin - @test_approx_eq ForwardDiff.value(results) val_result - @test_approx_eq ForwardDiff.gradient(results) grad_result - test_hess(ForwardDiff.hessian(results)) - test_hess(testout) - end - - testout = similar(testout) - results = ForwardDiff.hessian!(testout, testf, testx, AllResults; chunk_size=chunk) - test_all_results(testout, results[2]) - - testout = similar(testout) - testout, results2 = ForwardDiff.hessian(testf, testx, AllResults; chunk_size=chunk) - test_all_results(testout, results2) - - hessf! = ForwardDiff.hessian(testf, AllResults; mutates=true, chunk_size=chunk) - testout = similar(testout) - results3 = hessf!(testout, testx) - test_all_results(testout, results3[2]) - - hessf = ForwardDiff.hessian(testf, AllResults; mutates=false, chunk_size=chunk) - testout = similar(testout) - testout, results4 = hessf(testx) - test_all_results(testout, results4) - catch err - warn("Failure when testing Hessians involving $fsym with chunk_size=$chunk:") - throw(err) - end - end -end diff --git a/test/test_jacobians.jl b/test/test_jacobians.jl deleted file mode 100644 index f9d1834d..00000000 --- a/test/test_jacobians.jl +++ /dev/null @@ -1,126 +0,0 @@ -using Base.Test -using ForwardDiff -using Calculus - -N = 4 -M = 5 - -testout = Array(Float64, M, N) - -function jacob_deriv_ij(fs::Vector{Expr}, x::Vector, i, j) - var_syms = [:a, :b, :c, :d] - diff_expr = differentiate(fs[i], var_syms[j]) - @eval begin - a,b,c,d = $x - return $diff_expr - end -end - -function jacob_test_result(fs::Vector{Expr}, x::Vector) - return [jacob_deriv_ij(fs, x, i, j) for i in 1:M, j in 1:N] -end - -function jacob_test_x(fsym, N) - randrange = 0.01:.01:.99 - needs_rand_mod = tuple(:acosh, :acoth, :asec, :acsc, :asecd, :acscd) - - if fsym in needs_rand_mod - randrange += 1 - end - - return rand(randrange, N) -end - -chunk_sizes = (ForwardDiff.default_chunk_size, 1, Int(N/2), N) - -for fsym in ForwardDiff.auto_defined_unary_funcs - testexprs = [:($(fsym)(a) + $(fsym)(b)), - :(- $(fsym)(c)), - :(4 * $(fsym)(d)), - :($(fsym)(b)^5), - :($(fsym)(a))] - - testf = @eval (x::Vector) -> begin - a,b,c,d = x - return [$(testexprs...)] - end - - testf! = @eval (output::Vector, x::Vector) -> begin - a,b,c,d = x - output[1] = $(testexprs[1]) - output[2] = $(testexprs[2]) - output[3] = $(testexprs[3]) - output[4] = $(testexprs[4]) - output[5] = $(testexprs[5]) - end - - for chunk in chunk_sizes - try - testx = jacob_test_x(fsym, N) - val_result = testf(testx) - jacob_result = jacob_test_result(testexprs, testx) - - # Non-AllResults - test_jacob = (testout) -> @test_approx_eq testout jacob_result - - ForwardDiff.jacobian!(testout, testf, testx; chunk_size=chunk) - test_jacob(testout) - - test_jacob(ForwardDiff.jacobian(testf, testx; chunk_size=chunk)) - - jacf! = ForwardDiff.jacobian(testf; mutates=true, chunk_size=chunk) - testout = similar(testout) - jacf!(testout, testx) - test_jacob(testout) - - jacf = ForwardDiff.jacobian(testf; mutates=false, chunk_size=chunk) - test_jacob(jacf(testx)) - - jacf! = ForwardDiff.jacobian(testf!; mutates=true, chunk_size=chunk, output_length=M) - testout = similar(testout) - jacf!(testout, testx) - test_jacob(testout) - - jacf = ForwardDiff.jacobian(testf!; mutates=false, chunk_size=chunk, output_length=M) - test_jacob(jacf(testx)) - - # AllResults - test_all_results = (testout, results) -> begin - @test_approx_eq ForwardDiff.value(results) val_result - test_jacob(ForwardDiff.jacobian(results)) - test_jacob(testout) - end - - testout = similar(testout) - results = ForwardDiff.jacobian!(testout, testf, testx, AllResults; chunk_size=chunk) - test_all_results(testout, results[2]) - - testout = similar(testout) - testout, results2 = ForwardDiff.jacobian(testf, testx, AllResults; chunk_size=chunk) - test_all_results(testout, results2) - - jacf! = ForwardDiff.jacobian(testf, AllResults; mutates=true, chunk_size=chunk) - testout = similar(testout) - results3 = jacf!(testout, testx) - test_all_results(testout, results3[2]) - - jacf = ForwardDiff.jacobian(testf, AllResults; mutates=false, chunk_size=chunk) - testout = similar(testout) - testout, results4 = jacf(testx) - test_all_results(testout, results4) - - jacf! = ForwardDiff.jacobian(testf!, AllResults; mutates=true, chunk_size=chunk, output_length=M) - testout = similar(testout) - results5 = jacf!(testout, testx) - test_all_results(testout, results5[2]) - - jacf = ForwardDiff.jacobian(testf!, AllResults; mutates=false, chunk_size=chunk, output_length=M) - testout = similar(testout) - testout, results6 = jacf(testx) - test_all_results(testout, results6) - catch err - warn("Failure when testing Jacobians involving $fsym with chunk_size=$chunk:") - throw(err) - end - end -end diff --git a/test/test_tensors.jl b/test/test_tensors.jl deleted file mode 100644 index 0d6f608d..00000000 --- a/test/test_tensors.jl +++ /dev/null @@ -1,346 +0,0 @@ -using Base.Test -using Calculus -using ForwardDiff -using ForwardDiff: - GradientNumber, - HessianNumber, - TensorNumber, - value, - grad, - hess, - tens, - npartials, - isconstant, - hessnum, - hess_inds - -floatrange = .01:.01:.99 -intrange = 1:10 -N = 4 -T = Float64 -C = NTuple{N,T} -hessveclen = ForwardDiff.halfhesslen(N) -tensveclen = ForwardDiff.halftenslen(N) - -test_val = rand(floatrange) -test_partials = tuple(rand(floatrange, N)...) -test_hessvec = rand(floatrange, hessveclen) -test_tensvec = rand(floatrange, tensveclen) - -test_grad = GradientNumber(test_val, test_partials) -test_hess = HessianNumber(test_grad, test_hessvec) -test_tens = TensorNumber(test_hess, test_tensvec) - -###################### -# Accessor Functions # -###################### -@test value(test_tens) == test_val -@test grad(test_tens) == test_partials -@test hess(test_tens) == test_hessvec -@test tens(test_tens) == test_tensvec - -for i in 1:N - @test grad(test_tens, i) == test_partials[i] -end - -for i in 1:hessveclen - @test hess(test_tens, i) == test_hessvec[i] -end - -for i in 1:tensveclen - @test tens(test_tens, i) == test_tensvec[i] -end - -@test npartials(test_tens) == npartials(typeof(test_tens)) == N - -################################## -# Value Representation Functions # -################################## -@test eps(test_tens) == eps(test_val) -@test eps(typeof(test_tens)) == eps(T) - -tens_zero = TensorNumber(zero(test_hess), map(zero, test_tensvec)) -tens_one = TensorNumber(one(test_hess), map(zero, test_tensvec)) - -@test zero(test_tens) == tens_zero -@test zero(typeof(test_tens)) == tens_zero - -@test one(test_tens) == tens_one -@test one(typeof(test_tens)) == tens_one - -######################################### -# Conversion/Promotion/Hashing/Equality # -######################################### -int_val = round(Int, test_val) -int_partials = map(x -> round(Int, x), test_partials) -int_hessvec = map(x -> round(Int, x), test_hessvec) -int_tensvec = map(x -> round(Int, x), test_tensvec) - -float_val = float(int_val) -float_partials = map(float, int_partials) -float_hessvec= map(float, int_hessvec) -float_tensvec= map(float, int_tensvec) - -int_tens = TensorNumber(HessianNumber(GradientNumber(int_val, int_partials), int_hessvec), int_tensvec) -float_tens = TensorNumber(HessianNumber(GradientNumber(float_val, float_partials), float_hessvec), float_tensvec) -const_tens = TensorNumber{N,T,C}(float_val) - -@test convert(typeof(test_tens), test_tens) == test_tens -@test convert(TensorNumber, test_tens) == test_tens -@test convert(TensorNumber{N,T,C}, int_tens) == float_tens -@test convert(TensorNumber{3,T,NTuple{3,T}}, 1) == TensorNumber{3,T,NTuple{3,T}}(1.0) -@test convert(T, TensorNumber(HessianNumber(GradientNumber(1, tuple(0, 0))))) == 1.0 - -@test float(int_tens) == float_tens - -IntTens = TensorNumber{N,Int,NTuple{N,Int}} -FloatTens = TensorNumber{N,Float64,NTuple{N,Float64}} - -@test promote_type(IntTens, IntTens) == IntTens -@test promote_type(FloatTens, IntTens) == FloatTens -@test promote_type(IntTens, Float64) == FloatTens -@test promote_type(FloatTens, Int) == FloatTens - -@test hash(int_tens) == hash(float_tens) -@test hash(const_tens) == hash(float_val) - -@test int_tens == float_tens -@test float_val == const_tens -@test const_tens == float_val - -@test isequal(int_tens, float_tens) -@test isequal(float_val, const_tens) -@test isequal(const_tens, float_val) - -@test copy(test_tens) == test_tens - -#################### -# is____ Functions # -#################### -@test isnan(test_tens) == isnan(test_val) -@test isnan(TensorNumber{N,T,C}(NaN)) - -not_const_tens = TensorNumber(HessianNumber(GradientNumber(one(T), map(one, test_partials)))) -@test !(isconstant(not_const_tens)) -@test !(isreal(not_const_tens)) -@test isconstant(const_tens) && isreal(const_tens) -@test isconstant(zero(not_const_tens)) && isreal(zero(not_const_tens)) - -inf_tens = TensorNumber{N,T,C}(Inf) -@test isfinite(test_tens) == isfinite(test_val) -@test !isfinite(inf_tens) - -@test isinf(inf_tens) -@test !(isinf(test_tens)) - -@test isless(test_tens-1, test_tens) -@test test_tens-1 < test_tens -@test test_tens > test_tens-1 - -@test isless(test_val-1, test_tens) -@test test_val-1 < test_tens -@test test_tens > test_val-1 - -@test test_tens-1 <= test_tens -@test test_tens <= test_val - -@test isless(test_tens, test_val+1) -@test test_tens < test_val+1 -@test test_val+1 > test_tens - -@test test_tens+1 >= test_tens -@test test_tens+1 >= test_val - -@test floor(Int, test_tens) == floor(Int, test_val) -@test ceil(Int, test_tens) == ceil(Int, test_val) -@test trunc(Int, test_tens) == trunc(Int, test_val) -@test round(Int, test_tens) == round(Int, test_val) - -####### -# I/O # -####### -io = IOBuffer() -write(io, test_tens) -seekstart(io) - -@test read(io, typeof(test_tens)) == test_tens - -close(io) - -############## -# Math tests # -############## -rand_val = rand(floatrange) -rand_partials = map(x -> rand(floatrange), test_partials) -rand_hessvec = map(x -> rand(floatrange), test_hessvec) -rand_tensvec = map(x -> rand(floatrange), test_tensvec) - -rand_grad = GradientNumber(rand_val, rand_partials) -rand_hess = HessianNumber(rand_grad, rand_hessvec) -rand_tens = TensorNumber(rand_hess, rand_tensvec) - -# Addition/Subtraction # -#----------------------# -@test rand_tens + test_tens == TensorNumber(rand_hess + test_hess, rand_tensvec + test_tensvec) -@test rand_tens + test_tens == test_tens + rand_tens -@test rand_tens - test_tens == TensorNumber(rand_hess - test_hess, rand_tensvec - test_tensvec) - -@test rand_val + test_tens == TensorNumber(rand_val + test_hess, test_tensvec) -@test rand_val + test_tens == test_tens + rand_val -@test rand_val - test_tens == TensorNumber(rand_val - test_hess, -test_tensvec) -@test test_tens - rand_val == TensorNumber(test_hess - rand_val, test_tensvec) - -@test -test_tens == TensorNumber(-test_hess, -test_tensvec) - -# Multiplication/Division # -#-------------------------# -function tens_approx_eq(a::TensorNumber, b::TensorNumber) - eps = 1e-8 - try - @test_approx_eq_eps value(a) value(b) eps - @test_approx_eq_eps collect(grad(a)) collect(grad(b)) eps - @test_approx_eq_eps hess(a) hess(b) eps - @test_approx_eq_eps tens(a) tens(b) eps - catch err - error("Failure: TensorNumber a and TensorNumber b should be equal.\n Error: $err\n rand_tens = $rand_tens\n test_tens = $test_tens") - end -end - -@test hessnum(rand_tens * test_tens) == rand_hess * test_hess - -@test rand_val * test_tens == TensorNumber(rand_val * test_hess, rand_val * test_tensvec) -@test test_tens * rand_val == rand_val * test_tens - -@test test_tens * true == test_tens -@test true * test_tens == test_tens * true -@test test_tens * false == zero(test_tens) -@test false * test_tens == test_tens * false - -tens_approx_eq(rand_tens, rand_tens * (test_tens/test_tens)) -tens_approx_eq(rand_tens, test_tens * (rand_tens/test_tens)) -tens_approx_eq(rand_tens, (rand_tens * test_tens)/test_tens) -tens_approx_eq(test_tens, (rand_tens * test_tens)/rand_tens) - -tens_approx_eq(2 * test_tens, test_tens + test_tens) -tens_approx_eq(test_tens * inv(test_tens), one(test_tens)) - -tens_approx_eq(rand_val / test_tens, rand_val * inv(test_tens)) -tens_approx_eq(rand_val / test_tens, rand_val * 1/test_tens) -tens_approx_eq(rand_tens / test_tens, rand_tens * inv(test_tens)) -tens_approx_eq(rand_tens / test_tens, rand_tens * 1/test_tens) -tens_approx_eq(test_tens / test_tens, one(test_tens)) - -@test test_tens / rand_val == TensorNumber(test_hess / rand_val, test_tensvec / rand_val) - -# Exponentiation # -#----------------# -tens_approx_eq(test_tens^rand_tens, exp(rand_tens * log(test_tens))) -tens_approx_eq(test_tens^rand_val, exp(rand_val * log(test_tens))) -tens_approx_eq(rand_val^test_tens, exp(test_tens * log(rand_val))) - -# Special Cases # -#---------------# -@test abs(test_tens) == test_tens -@test abs(-test_tens) == test_tens -tens_approx_eq(abs2(test_tens), test_tens*test_tens) - -atan2_tens = atan2(test_tens, rand_tens) -atanyx_tens = atan(test_tens/rand_tens) - -@test value(atan2_tens) == atan2(test_val, rand_val) -@test_approx_eq collect(grad(atan2_tens)) collect(grad(atanyx_tens)) -@test_approx_eq hess(atan2_tens) hess(atanyx_tens) -@test_approx_eq tens(atan2_tens) tens(atanyx_tens) - -# Unary functions/API usage testing # -#-----------------------------------# -testout = Array(Float64, N, N, N) - -function tens_deriv_ijk(f_expr, x::Vector, i, j, k) - var_syms = [:a, :b, :c, :d] - diff_expr = differentiate(f_expr, var_syms[k]) - diff_expr = differentiate(diff_expr, var_syms[j]) - diff_expr = differentiate(diff_expr, var_syms[i]) - @eval begin - a,b,c,d = $x - return $diff_expr - end -end - -function tens_test_result(f_expr, x::Vector) - return [tens_deriv_ijk(f_expr, x, i, j, k) for i in 1:N, j in 1:N, k in 1:N] -end - -function tens_test_x(fsym, N) - randrange = .01:.01:.99 - - needs_modification = tuple(:acosh, :acoth) - if fsym in needs_modification - randrange += 1 - end - - return rand(randrange, N) -end - -for fsym in ForwardDiff.auto_defined_unary_tens_funcs - testexpr = :($(fsym)(a) + $(fsym)(b) - $(fsym)(c) * $(fsym)(d)) - - testf = @eval (x::Vector) -> begin - a,b,c,d = x - return $testexpr - end - - try - testx = tens_test_x(fsym, N) - val_result = testf(testx) - grad_result = ForwardDiff.gradient(testf, testx) - hess_result = ForwardDiff.hessian(testf, testx) - tens_result = tens_test_result(testexpr, testx) - - # Non-AllResults - test_tens = (testout) -> @test_approx_eq testout tens_result - - ForwardDiff.tensor!(testout, testf, testx) - test_tens(testout) - - test_tens(ForwardDiff.tensor(testf, testx)) - - tensf! = ForwardDiff.tensor(testf; mutates=true) - testout = similar(testout) - tensf!(testout, testx) - test_tens(testout) - - tensf = ForwardDiff.tensor(testf; mutates=false) - test_tens(tensf(testx)) - - # AllResults - test_all_results = (testout, results) -> begin - @test_approx_eq ForwardDiff.value(results) val_result - @test_approx_eq ForwardDiff.gradient(results) grad_result - @test_approx_eq ForwardDiff.hessian(results) hess_result - test_tens(ForwardDiff.tensor(results)) - test_tens(testout) - end - - testout = similar(testout) - results = ForwardDiff.tensor!(testout, testf, testx, AllResults) - test_all_results(testout, results[2]) - - testout = similar(testout) - testout, results2 = ForwardDiff.tensor(testf, testx, AllResults) - test_all_results(testout, results2) - - tensf! = ForwardDiff.tensor(testf, AllResults; mutates=true) - testout = similar(testout) - results3 = tensf!(testout, testx) - test_all_results(testout, results3[2]) - - tensf = ForwardDiff.tensor(testf, AllResults; mutates=false) - testout = similar(testout) - testout, results4 = tensf(testx) - test_all_results(testout, results4) - catch err - warn("Failure when testing Tensors involving $fsym:") - throw(err) - end -end