Skip to content

Commit

Permalink
Wrap multithreading code in VERSION conditionals in order to support …
Browse files Browse the repository at this point in the history
…v0.4
  • Loading branch information
jrevels committed Feb 9, 2016
1 parent d37f375 commit 6f311f0
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 44 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ script:
- if [[ -a .git/shallow ]]; then git fetch --unshallow; fi
- julia -e 'Pkg.clone(pwd()); Pkg.build("ForwardDiff"); Pkg.test("ForwardDiff"; coverage=true)';
after_success:
- julia -e 'cd(Pkg.dir("ForwardDiff")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())'
- julia -e 'cd(Pkg.dir("ForwardDiff")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())'
3 changes: 2 additions & 1 deletion src/ForwardDiff.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@ isdefined(Base, :__precompile__) && __precompile__()

module ForwardDiff

import Base.Threads
import Calculus
import NaNMath

const THREAD_VERSION = v"0.5.0-dev+923"
const NTHREADS = VERSION >= THREAD_VERSION ? Base.Threads.nthreads() : 1
const AUTO_DEFINED_UNARY_FUNCS = map(first, Calculus.symbolic_derivatives_1arg())
const NANMATH_FUNCS = (:sin, :cos, :tan, :asin, :acos, :acosh,
:atanh, :log, :log2, :log10, :lgamma, :log1p)
Expand Down
4 changes: 2 additions & 2 deletions src/cache.jl
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
const CACHE = ntuple(n -> Dict{DataType,Any}(), Threads.nthreads())
const CACHE = ntuple(n -> Dict{DataType,Any}(), NTHREADS)

function clearcache!()
for d in CACHE
empty!(d)
end
end

@eval cachefetch!(D::DataType, L::DataType) = $(Expr(:tuple, [:(cachefetch!($i, D, L)) for i in 1:Threads.nthreads()]...))
@eval cachefetch!(D::DataType, L::DataType) = $(Expr(:tuple, [:(cachefetch!($i, D, L)) for i in 1:NTHREADS]...))

function cachefetch!{N,T,L}(tid::Integer, ::Type{DiffNumber{N,T}}, ::Type{Val{L}})
K = Tuple{DiffNumber{N,T},L}
Expand Down
82 changes: 42 additions & 40 deletions src/gradient.jl
Original file line number Diff line number Diff line change
Expand Up @@ -122,57 +122,59 @@ end
return calc_gradient_expr(body)
end

@generated function multi_calc_gradient!{S,T,N,L}(f, output::Vector{S}, x::Vector{T}, ::Type{Val{N}}, ::Type{Val{L}})
if N == L
body = VEC_MODE_EXPR
else
nthreads = Threads.nthreads()
remainder = L % N == 0 ? N : L % N
fill_length = L - remainder
reseed_partials = remainder == N ? :() : :(seed_partials = cachefetch!(tid, Partials{N,T}, Val{$(remainder)}))
body = quote
workvecs::NTuple{$(nthreads), Vector{DiffNumber{N,T}}} = cachefetch!(DiffNumber{N,T}, Val{L})
pzeros = zero(Partials{N,T})
if VERSION >= THREAD_VERSION
@generated function multi_calc_gradient!{S,T,N,L}(f, output::Vector{S}, x::Vector{T}, ::Type{Val{N}}, ::Type{Val{L}})
if N == L
body = VEC_MODE_EXPR
else
nthreads = Threads.nthreads()
remainder = L % N == 0 ? N : L % N
fill_length = L - remainder
reseed_partials = remainder == N ? :() : :(seed_partials = cachefetch!(tid, Partials{N,T}, Val{$(remainder)}))
body = quote
workvecs::NTuple{$(nthreads), Vector{DiffNumber{N,T}}} = cachefetch!(DiffNumber{N,T}, Val{L})
pzeros = zero(Partials{N,T})

Threads.@threads for t in 1:$(nthreads)
# must be local, see https://github.com/JuliaLang/julia/issues/14948
local workvec = workvecs[t]
@simd for i in 1:L
@inbounds workvec[i] = DiffNumber{N,T}(x[i], pzeros)
end
end

Threads.@threads for t in 1:$(nthreads)
# must be local, see https://github.com/JuliaLang/julia/issues/14948
local workvec = workvecs[t]
@simd for i in 1:L
@inbounds workvec[i] = DiffNumber{N,T}(x[i], pzeros)
Threads.@threads for c in 1:$(N):$(fill_length)
local workvec = workvecs[Threads.threadid()]
@simd for i in 1:N
j = i + c - 1
@inbounds workvec[j] = DiffNumber{N,T}(x[j], seed_partials[i])
end
local result::DiffNumber{N,S} = f(workvec)
@simd for i in 1:N
j = i + c - 1
@inbounds output[j] = partials(result, i)
@inbounds workvec[j] = DiffNumber{N,T}(x[j], pzeros)
end
end
end

Threads.@threads for c in 1:$(N):$(fill_length)
local workvec = workvecs[Threads.threadid()]
@simd for i in 1:N
j = i + c - 1
# Performing the final chunk manually seems to triggers some additional
# optimization heuristics, which results in more efficient memory allocation
$(reseed_partials)
workvec = workvecs[tid]
@simd for i in 1:$(remainder)
j = $(fill_length) + i
@inbounds workvec[j] = DiffNumber{N,T}(x[j], seed_partials[i])
end
local result::DiffNumber{N,S} = f(workvec)
@simd for i in 1:N
j = i + c - 1
result::DiffNumber{N,S} = f(workvec)
@simd for i in 1:$(remainder)
j = $(fill_length) + i
@inbounds output[j] = partials(result, i)
@inbounds workvec[j] = DiffNumber{N,T}(x[j], pzeros)
end
end

# Performing the final chunk manually seems to triggers some additional
# optimization heuristics, which results in more efficient memory allocation
$(reseed_partials)
workvec = workvecs[tid]
@simd for i in 1:$(remainder)
j = $(fill_length) + i
@inbounds workvec[j] = DiffNumber{N,T}(x[j], seed_partials[i])
end
result::DiffNumber{N,S} = f(workvec)
@simd for i in 1:$(remainder)
j = $(fill_length) + i
@inbounds output[j] = partials(result, i)
@inbounds workvec[j] = DiffNumber{N,T}(x[j], pzeros)
end
end
return calc_gradient_expr(body)
end
return calc_gradient_expr(body)
end

const VEC_MODE_EXPR = quote
Expand Down

0 comments on commit 6f311f0

Please sign in to comment.