From 6f311f01e9671509656e5f74562e663b5d1fe854 Mon Sep 17 00:00:00 2001 From: Jarrett Revels Date: Mon, 8 Feb 2016 18:30:46 -0500 Subject: [PATCH] Wrap multithreading code in VERSION conditionals in order to support v0.4 --- .travis.yml | 2 +- src/ForwardDiff.jl | 3 +- src/cache.jl | 4 +-- src/gradient.jl | 82 ++++++++++++++++++++++++---------------------- 4 files changed, 47 insertions(+), 44 deletions(-) diff --git a/.travis.yml b/.travis.yml index c0904ee6..47ffb0d8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,4 +9,4 @@ script: - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi - julia -e 'Pkg.clone(pwd()); Pkg.build("ForwardDiff"); Pkg.test("ForwardDiff"; coverage=true)'; after_success: - - julia -e 'cd(Pkg.dir("ForwardDiff")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())' \ No newline at end of file + - julia -e 'cd(Pkg.dir("ForwardDiff")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())' diff --git a/src/ForwardDiff.jl b/src/ForwardDiff.jl index 0a2256ab..e4975efb 100644 --- a/src/ForwardDiff.jl +++ b/src/ForwardDiff.jl @@ -2,10 +2,11 @@ isdefined(Base, :__precompile__) && __precompile__() module ForwardDiff -import Base.Threads import Calculus import NaNMath +const THREAD_VERSION = v"0.5.0-dev+923" +const NTHREADS = VERSION >= THREAD_VERSION ? Base.Threads.nthreads() : 1 const AUTO_DEFINED_UNARY_FUNCS = map(first, Calculus.symbolic_derivatives_1arg()) const NANMATH_FUNCS = (:sin, :cos, :tan, :asin, :acos, :acosh, :atanh, :log, :log2, :log10, :lgamma, :log1p) diff --git a/src/cache.jl b/src/cache.jl index 303328b1..c989118d 100644 --- a/src/cache.jl +++ b/src/cache.jl @@ -1,4 +1,4 @@ -const CACHE = ntuple(n -> Dict{DataType,Any}(), Threads.nthreads()) +const CACHE = ntuple(n -> Dict{DataType,Any}(), NTHREADS) function clearcache!() for d in CACHE @@ -6,7 +6,7 @@ function clearcache!() end end -@eval cachefetch!(D::DataType, L::DataType) = $(Expr(:tuple, [:(cachefetch!($i, D, L)) for i in 1:Threads.nthreads()]...)) +@eval cachefetch!(D::DataType, L::DataType) = $(Expr(:tuple, [:(cachefetch!($i, D, L)) for i in 1:NTHREADS]...)) function cachefetch!{N,T,L}(tid::Integer, ::Type{DiffNumber{N,T}}, ::Type{Val{L}}) K = Tuple{DiffNumber{N,T},L} diff --git a/src/gradient.jl b/src/gradient.jl index 43a6fb82..14d8d236 100644 --- a/src/gradient.jl +++ b/src/gradient.jl @@ -122,57 +122,59 @@ end return calc_gradient_expr(body) end -@generated function multi_calc_gradient!{S,T,N,L}(f, output::Vector{S}, x::Vector{T}, ::Type{Val{N}}, ::Type{Val{L}}) - if N == L - body = VEC_MODE_EXPR - else - nthreads = Threads.nthreads() - remainder = L % N == 0 ? N : L % N - fill_length = L - remainder - reseed_partials = remainder == N ? :() : :(seed_partials = cachefetch!(tid, Partials{N,T}, Val{$(remainder)})) - body = quote - workvecs::NTuple{$(nthreads), Vector{DiffNumber{N,T}}} = cachefetch!(DiffNumber{N,T}, Val{L}) - pzeros = zero(Partials{N,T}) +if VERSION >= THREAD_VERSION + @generated function multi_calc_gradient!{S,T,N,L}(f, output::Vector{S}, x::Vector{T}, ::Type{Val{N}}, ::Type{Val{L}}) + if N == L + body = VEC_MODE_EXPR + else + nthreads = Threads.nthreads() + remainder = L % N == 0 ? N : L % N + fill_length = L - remainder + reseed_partials = remainder == N ? :() : :(seed_partials = cachefetch!(tid, Partials{N,T}, Val{$(remainder)})) + body = quote + workvecs::NTuple{$(nthreads), Vector{DiffNumber{N,T}}} = cachefetch!(DiffNumber{N,T}, Val{L}) + pzeros = zero(Partials{N,T}) + + Threads.@threads for t in 1:$(nthreads) + # must be local, see https://github.com/JuliaLang/julia/issues/14948 + local workvec = workvecs[t] + @simd for i in 1:L + @inbounds workvec[i] = DiffNumber{N,T}(x[i], pzeros) + end + end - Threads.@threads for t in 1:$(nthreads) - # must be local, see https://github.com/JuliaLang/julia/issues/14948 - local workvec = workvecs[t] - @simd for i in 1:L - @inbounds workvec[i] = DiffNumber{N,T}(x[i], pzeros) + Threads.@threads for c in 1:$(N):$(fill_length) + local workvec = workvecs[Threads.threadid()] + @simd for i in 1:N + j = i + c - 1 + @inbounds workvec[j] = DiffNumber{N,T}(x[j], seed_partials[i]) + end + local result::DiffNumber{N,S} = f(workvec) + @simd for i in 1:N + j = i + c - 1 + @inbounds output[j] = partials(result, i) + @inbounds workvec[j] = DiffNumber{N,T}(x[j], pzeros) + end end - end - Threads.@threads for c in 1:$(N):$(fill_length) - local workvec = workvecs[Threads.threadid()] - @simd for i in 1:N - j = i + c - 1 + # Performing the final chunk manually seems to triggers some additional + # optimization heuristics, which results in more efficient memory allocation + $(reseed_partials) + workvec = workvecs[tid] + @simd for i in 1:$(remainder) + j = $(fill_length) + i @inbounds workvec[j] = DiffNumber{N,T}(x[j], seed_partials[i]) end - local result::DiffNumber{N,S} = f(workvec) - @simd for i in 1:N - j = i + c - 1 + result::DiffNumber{N,S} = f(workvec) + @simd for i in 1:$(remainder) + j = $(fill_length) + i @inbounds output[j] = partials(result, i) @inbounds workvec[j] = DiffNumber{N,T}(x[j], pzeros) end end - - # Performing the final chunk manually seems to triggers some additional - # optimization heuristics, which results in more efficient memory allocation - $(reseed_partials) - workvec = workvecs[tid] - @simd for i in 1:$(remainder) - j = $(fill_length) + i - @inbounds workvec[j] = DiffNumber{N,T}(x[j], seed_partials[i]) - end - result::DiffNumber{N,S} = f(workvec) - @simd for i in 1:$(remainder) - j = $(fill_length) + i - @inbounds output[j] = partials(result, i) - @inbounds workvec[j] = DiffNumber{N,T}(x[j], pzeros) - end end + return calc_gradient_expr(body) end - return calc_gradient_expr(body) end const VEC_MODE_EXPR = quote