diff --git a/Manifest.toml b/Manifest.toml index dea5461c15..785913693e 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -68,8 +68,10 @@ uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" version = "6.0.1" [[GPUCompiler]] -deps = ["DataStructures", "InteractiveUtils", "LLVM", "Libdl", "TimerOutputs", "UUIDs"] -git-tree-sha1 = "1b19d415fc3581ff0ed2f57875fca16b5190060a" +deps = ["DataStructures", "InteractiveUtils", "LLVM", "Libdl", "Scratch", "Serialization", "TimerOutputs", "UUIDs"] +git-tree-sha1 = "3de6681f84fe77dfe9ce52a20610af8afcb0cd9e" +repo-rev = "097278f" +repo-url = "https://github.com/JuliaGPU/GPUCompiler.jl.git" uuid = "61eb1bfa-7361-4325-ad38-22787b887f55" version = "0.7.3" @@ -152,6 +154,12 @@ version = "1.0.3" [[SHA]] uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" +[[Scratch]] +deps = ["Dates"] +git-tree-sha1 = "92245127815ac0bafbdca65a6e12d7a8c32149ed" +uuid = "6c6a2e73-6563-6170-7368-637461726353" +version = "1.0.2" + [[Serialization]] uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" diff --git a/src/compiler/execution.jl b/src/compiler/execution.jl index ea466dcc08..57e47d8f78 100644 --- a/src/compiler/execution.jl +++ b/src/compiler/execution.jl @@ -291,24 +291,28 @@ when function changes, or when different types or keyword arguments are provided """ function cufunction(f::F, tt::TT=Tuple{}; name=nothing, kwargs...) where {F<:Core.Function, TT<:Type} - ctx = context() - env = hash(pointer_from_objref(ctx)) # contexts are unique, but handles might alias - # TODO: implement this as a hash function in for CuContext - + dev = device() + cache = cufunction_cache[dev] source = FunctionSpec(f, tt, true, name) - GPUCompiler.cached_compilation(_cufunction, source, env; kwargs...)::HostKernel{f,tt} + return GPUCompiler.cached_compilation(cache, cufunction_compile, cufunction_link, + source; kwargs...)::HostKernel{f,tt} end -# actual compilation -function _cufunction(source::FunctionSpec; kwargs...) - # compile to PTX - ctx = context() +const cufunction_cache = PerDevice{Dict{UInt, Any}}((dev)->Dict{UInt, Any}()) + +# compile to PTX +function cufunction_compile(@nospecialize(source::FunctionSpec); kwargs...) dev = device() cap = supported_capability(dev) target = PTXCompilerTarget(; cap=supported_capability(dev), kwargs...) params = CUDACompilerParams() job = CompilerJob(target, source, params) - asm, kernel_fn, undefined_fns = GPUCompiler.compile(:asm, job) + return GPUCompiler.compile(:asm, job) +end + +# link to device code +function cufunction_link(@nospecialize(source::FunctionSpec), (asm, kernel_fn, undefined_fns)) + ctx = context() # settings to JIT based on Julia's debug setting jit_options = Dict{CUjit_option,Any}() diff --git a/src/initialization.jl b/src/initialization.jl index d245c3e8bc..bd2b5b9ad8 100644 --- a/src/initialization.jl +++ b/src/initialization.jl @@ -79,6 +79,8 @@ function __init__() initializer(prepare_cuda_call) + initialize!(cufunction_cache, ndevices()) + @require ForwardDiff="f6369f11-7733-5829-9624-2563aa707210" include("forwarddiff.jl") end