diff --git a/Manifest.toml b/Manifest.toml index 7db629f14f..4da1b90d60 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -68,8 +68,10 @@ uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" version = "6.0.1" [[GPUCompiler]] -deps = ["DataStructures", "InteractiveUtils", "LLVM", "Libdl", "TimerOutputs", "UUIDs"] -git-tree-sha1 = "1b19d415fc3581ff0ed2f57875fca16b5190060a" +deps = ["DataStructures", "InteractiveUtils", "LLVM", "Libdl", "Scratch", "Serialization", "TimerOutputs", "UUIDs"] +git-tree-sha1 = "d36b2fd1cfe01afed8c0126aced9f1646a483dbb" +repo-rev = "4bb076f" +repo-url = "https://github.com/JuliaGPU/GPUCompiler.jl.git" uuid = "61eb1bfa-7361-4325-ad38-22787b887f55" version = "0.7.3" @@ -152,6 +154,12 @@ version = "1.1.0" [[SHA]] uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" +[[Scratch]] +deps = ["Dates"] +git-tree-sha1 = "92245127815ac0bafbdca65a6e12d7a8c32149ed" +uuid = "6c6a2e73-6563-6170-7368-637461726353" +version = "1.0.2" + [[Serialization]] uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" diff --git a/src/compiler/execution.jl b/src/compiler/execution.jl index ea466dcc08..fc72f9fffb 100644 --- a/src/compiler/execution.jl +++ b/src/compiler/execution.jl @@ -291,24 +291,29 @@ when function changes, or when different types or keyword arguments are provided """ function cufunction(f::F, tt::TT=Tuple{}; name=nothing, kwargs...) where {F<:Core.Function, TT<:Type} - ctx = context() - env = hash(pointer_from_objref(ctx)) # contexts are unique, but handles might alias - # TODO: implement this as a hash function in for CuContext - + dev = device() + cache = cufunction_cache[dev] source = FunctionSpec(f, tt, true, name) - GPUCompiler.cached_compilation(_cufunction, source, env; kwargs...)::HostKernel{f,tt} + return GPUCompiler.cached_compilation(cache, cufunction_compile, cufunction_link, + source; kwargs...)::HostKernel{f,tt} end -# actual compilation -function _cufunction(source::FunctionSpec; kwargs...) - # compile to PTX - ctx = context() +const cufunction_cache = PerDevice{Dict{UInt, Any}}((dev)->Dict{UInt, Any}()) + +# compile to PTX +function cufunction_compile(@nospecialize(source::FunctionSpec); kwargs...) dev = device() cap = supported_capability(dev) target = PTXCompilerTarget(; cap=supported_capability(dev), kwargs...) params = CUDACompilerParams() job = CompilerJob(target, source, params) - asm, kernel_fn, undefined_fns = GPUCompiler.compile(:asm, job) + return GPUCompiler.compile(:asm, job) +end + +# link to device code +function cufunction_link(@nospecialize(source::FunctionSpec), + (asm, kernel_fn, undefined_fns); kwargs...) + ctx = context() # settings to JIT based on Julia's debug setting jit_options = Dict{CUjit_option,Any}() diff --git a/src/initialization.jl b/src/initialization.jl index c8991b0b8b..a839b1b8d2 100644 --- a/src/initialization.jl +++ b/src/initialization.jl @@ -134,6 +134,7 @@ function __runtime_init__() end end + initialize!(cufunction_cache, ndevices()) resize!(__device_contexts, ndevices()) fill!(__device_contexts, nothing)