Skip to content

Commit

Permalink
Simplify the compile cache.
Browse files Browse the repository at this point in the history
It's now the users responsiblity to pass a cache.
  • Loading branch information
maleadt committed Sep 29, 2020
1 parent 272201e commit a90f7bb
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 40 deletions.
2 changes: 1 addition & 1 deletion src/GPUCompiler.jl
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ function __init__()
InitializeAllAsmParsers()
InitializeAllTargetMCs()

frozen[] = parse(Bool, get(ENV, "JULIA_GPUCOMPILER_FROZEN", "false"))
freeze_kernels[] = parse(Bool, get(ENV, "JULIA_GPUCOMPILER_FROZEN", "false"))

return
end
Expand Down
85 changes: 49 additions & 36 deletions src/cache.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@ using Base: _methods_by_ftype

using Serialization, Scratch

const compilecache = Dict{UInt, Any}()
const compilelock = ReentrantLock()

@inline function check_cache(compiler, linker, spec, id; kwargs...)
@inline function get_interactive(cache, compiler, linker, spec, id; kwargs...)
# generate a key for indexing the compilation cache
key = hash(kwargs, id)
key = hash(spec.name, key) # fields f and tt are already covered by the id
Expand All @@ -21,45 +20,58 @@ const compilelock = ReentrantLock()
# NOTE: no use of lock(::Function)/@lock/get! to keep stack traces clean
lock(compilelock)
try
obj = get(compilecache, key, nothing)
if obj === nothing
obj = get(cache, key, nothing)
if obj === nothing || compile_hook[] != nothing
asm = compiler(spec; kwargs...)
obj = linker(spec, asm)
compilecache[key] = obj
cache[key] = obj
end
obj
finally
unlock(compilelock)
end
end

# generated function that crafts a custom code info to call the actual cufunction impl.
# this gives us the flexibility to insert manual back edges for automatic recompilation.
#
# we also increment a global specialization counter and pass it along to index the cache.

specialization_counter = 0
@inline function get_frozen(cache, compiler, linker, spec; kwargs...)
# generate a key for indexing the compilation cache
key = hash(kwargs, env)
key = hash(spec, key)

const frozen = Ref(false)

@generated function cached_compilation(compiler::Core.Function, linker::Core.Function,
spec::FunctionSpec{f,tt}, env::UInt=zero(UInt);
kwargs...) where {f,tt}
frozen[] && return quote
key = hash(spec)
asm = get(compilecache, key, nothing)
if asm === nothing
path = joinpath(@get_scratch!("kernels"), "$(hash(spec)).jls")
asm = if isfile(path)
# NOTE: no use of lock(::Function)/@lock/get! to keep stack traces clean
lock(compilelock)
try
obj = get(cache, key, nothing)
if obj === nothing
path = joinpath(@get_scratch!("kernels"), "$key.jls")
if isfile(path)
@debug "Loading compiled kernel for $spec from $path"
deserialize(path)
asm = deserialize(path)
else
asm = compiler(spec; kwargs...)
serialize(path, asm)
asm
end
obj = linker(spec, asm)
cache[key] = obj
end
obj = linker(spec, asm)
obj
finally
unlock(compilelock)
end
end

# generated function that crafts a custom code info to call the actual cufunction impl.
# this gives us the flexibility to insert manual back edges for automatic recompilation.
#
# we also increment a global specialization counter and pass it along to index the cache.

specialization_counter = 0

const freeze_kernels = Ref(false)

@generated function cached_compilation(cache::Dict, compiler::Function, linker::Function,
spec::FunctionSpec{f,tt}; kwargs...) where {f,tt}
freeze_kernels[] && return quote
get_frozen(cache, compiler, linker, spec; kwargs...)
end

# get a hold of the method and code info of the kernel function
Expand Down Expand Up @@ -92,22 +104,23 @@ const frozen = Ref(false)
# underlying C methods -- which GPUCompiler does, so everything Just Works.

# prepare the slots
new_ci.slotnames = Symbol[:kwfunc, :kwargs, Symbol("#self#"), :compiler, :linker, :spec, :id]
new_ci.slotnames = Symbol[:kwfunc, :kwargs, Symbol("#self#"),
:cache, :compiler, :linker, :spec]
new_ci.slotflags = UInt8[0x00 for i = 1:7]
kwargs = SlotNumber(2)
compiler = SlotNumber(4)
linker = SlotNumber(5)
spec = SlotNumber(6)
env = SlotNumber(7)
cache = SlotNumber(4)
compiler = SlotNumber(5)
linker = SlotNumber(6)
spec = SlotNumber(7)

# call the compiler
append!(new_ci.code, [Expr(:call, Core.kwfunc, check_cache),
append!(new_ci.code, [Expr(:call, Core.kwfunc, get_interactive),
Expr(:call, merge, NamedTuple(), kwargs),
Expr(:call, hash, env, id),
Expr(:call, SSAValue(1), SSAValue(2), check_cache, compiler, linker, spec, SSAValue(3)),
Expr(:return, SSAValue(4))])
append!(new_ci.codelocs, [1, 1, 1, 1, 1]) # see note below
new_ci.ssavaluetypes += 5
Expr(:call, SSAValue(1), SSAValue(2), get_interactive,
cache, compiler, linker, spec, id),
Expr(:return, SSAValue(3))])
append!(new_ci.codelocs, [1, 1, 1, 1]) # see note below
new_ci.ssavaluetypes += 4

# NOTE: we keep the first entry of the original linetable, and use it for location info
# on the call to check_cache. we can't not have a codeloc (using 0 causes
Expand Down
3 changes: 0 additions & 3 deletions src/reflection.jl
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,6 @@ function emit_hooked_compilation(inner_hook, ex...)
user_code = ex[end]
user_kwargs = ex[1:end-1]
quote
# wipe the compile cache to force recompilation
empty!(GPUCompiler.compilecache)

local kernels = 0
function outer_hook(job)
kernels += 1
Expand Down

0 comments on commit a90f7bb

Please sign in to comment.