You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
The following basic code using UnitRange works and shows that ranges are supported inside CUDA kernels:
using CUDA
A = CUDA.ones(2,2)
function f(X, range::UnitRange)
ix = (blockIdx().x-1) * blockDim().x + threadIdx().x
iy = (blockIdx().y-1) * blockDim().y + threadIdx().y
for i in range
X[ix,iy] = 2*X[ix,iy]
end
return
end
@cuda threads=(2,2) f(A, 1:3)
However, e.g. size(range,1) and range[end] fails inside CUDA kernels, e.g. the following codes fail:
using CUDA
A = CUDA.ones(2,2)
function f(X, range::UnitRange)
ix = (blockIdx().x-1) * blockDim().x + threadIdx().x
iy = (blockIdx().y-1) * blockDim().y + threadIdx().y
if (size(range,1) > 10) return; end
for i in range
X[ix,iy] = 2*X[ix,iy]
end
return
end
@cuda threads=(2,2) f(A, 1:3)
using CUDA
A = CUDA.ones(2,2)
function f(X, range::UnitRange)
ix = (blockIdx().x-1) * blockDim().x + threadIdx().x
iy = (blockIdx().y-1) * blockDim().y + threadIdx().y
if (range[end] > 10) return; end
for i in range
X[ix,iy] = 2*X[ix,iy]
end
return
end
@cuda threads=(2,2) f(A, 1:3)
The error is the following for size(range,1):
julia> @cuda threads=(2,2) f(A, 1:3)
ERROR: InvalidIRError: compiling kernel f(CuDeviceArray{Float32,2,CUDA.AS.Global}, UnitRange{Int64}) resulted in invalid LLVM IR
Reason: unsupported call to the Julia runtime (call to jl_f__apply_latest)
Stacktrace:
[1] #invokelatest#1 at essentials.jl:712
[2] invokelatest at essentials.jl:711
[3] throw_overflowerr_binaryop at checked.jl:154
[4] multiple call sites at unknown:0
Stacktrace:
[1] check_ir(::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget,CUDA.CUDACompilerParams}, ::LLVM.Module) at /home/omlins/.juliapro/JuliaPro_v1.4.1-1/packages/GPUCompiler/pCBTA/src/validation.jl:123
[2] macro expansion at /home/omlins/.juliapro/JuliaPro_v1.4.1-1/packages/GPUCompiler/pCBTA/src/driver.jl:241 [inlined]
[3] macro expansion at /home/omlins/.juliapro/JuliaPro_v1.4.1-1/packages/TimerOutputs/dVnaw/src/TimerOutput.jl:206 [inlined]
[4] codegen(::Symbol, ::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget,CUDA.CUDACompilerParams}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, strip::Bool, validate::Bool, only_entry::Bool) at /home/omlins/.juliapro/JuliaPro_v1.4.1-1/packages/GPUCompiler/pCBTA/src/driver.jl:239
[5] compile(::Symbol, ::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget,CUDA.CUDACompilerParams}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, strip::Bool, validate::Bool, only_entry::Bool) at /home/omlins/.juliapro/JuliaPro_v1.4.1-1/packages/GPUCompiler/pCBTA/src/driver.jl:39
[6] compile at /home/omlins/.juliapro/JuliaPro_v1.4.1-1/packages/GPUCompiler/pCBTA/src/driver.jl:35 [inlined]
[7] _cufunction(::GPUCompiler.FunctionSpec{typeof(f),Tuple{CuDeviceArray{Float32,2,CUDA.AS.Global},UnitRange{Int64}}}; kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at /home/omlins/.juliapro/JuliaPro_v1.4.1-1/packages/CUDA/sjcZt/src/compiler/execution.jl:308
[8] _cufunction at /home/omlins/.juliapro/JuliaPro_v1.4.1-1/packages/CUDA/sjcZt/src/compiler/execution.jl:302 [inlined]
[9] check_cache(::typeof(CUDA._cufunction), ::GPUCompiler.FunctionSpec{typeof(f),Tuple{CuDeviceArray{Float32,2,CUDA.AS.Global},UnitRange{Int64}}}, ::UInt64; kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at /home/omlins/.juliapro/JuliaPro_v1.4.1-1/packages/GPUCompiler/pCBTA/src/cache.jl:24
[10] f at ./none:2 [inlined]
[11] cached_compilation(::typeof(CUDA._cufunction), ::GPUCompiler.FunctionSpec{typeof(f),Tuple{CuDeviceArray{Float32,2,CUDA.AS.Global},UnitRange{Int64}}}, ::UInt64; kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at /home/omlins/.juliapro/JuliaPro_v1.4.1-1/packages/GPUCompiler/pCBTA/src/cache.jl:0
[12] cached_compilation(::Function, ::GPUCompiler.FunctionSpec{typeof(f),Tuple{CuDeviceArray{Float32,2,CUDA.AS.Global},UnitRange{Int64}}}, ::UInt64) at /home/omlins/.juliapro/JuliaPro_v1.4.1-1/packages/GPUCompiler/pCBTA/src/cache.jl:44
[13] cufunction(::Function, ::Type{T} where T; name::Nothing, kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at /home/omlins/.juliapro/JuliaPro_v1.4.1-1/packages/CUDA/sjcZt/src/compiler/execution.jl:296
[14] cufunction(::Function, ::Type{T} where T) at /home/omlins/.juliapro/JuliaPro_v1.4.1-1/packages/CUDA/sjcZt/src/compiler/execution.jl:291
[15] top-level scope at /home/omlins/.juliapro/JuliaPro_v1.4.1-1/packages/CUDA/sjcZt/src/compiler/execution.jl:108
The error is the following for range[end]:
julia> @cuda threads=(2,2) f(A, 1:3)
ERROR: InvalidIRError: compiling kernel f(CuDeviceArray{Float32,2,CUDA.AS.Global}, UnitRange{Int64}) resulted in invalid LLVM IR
Reason: unsupported call to the Julia runtime (call to jl_f__apply_latest)
Stacktrace:
[1] #invokelatest#1 at essentials.jl:712
[2] invokelatest at essentials.jl:711
[3] throw_overflowerr_binaryop at checked.jl:154
[4] multiple call sites at unknown:0
Stacktrace:
[1] check_ir(::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget,CUDA.CUDACompilerParams}, ::LLVM.Module) at /home/omlins/.juliapro/JuliaPro_v1.4.1-1/packages/GPUCompiler/pCBTA/src/validation.jl:123
[2] macro expansion at /home/omlins/.juliapro/JuliaPro_v1.4.1-1/packages/GPUCompiler/pCBTA/src/driver.jl:241 [inlined]
[3] macro expansion at /home/omlins/.juliapro/JuliaPro_v1.4.1-1/packages/TimerOutputs/dVnaw/src/TimerOutput.jl:206 [inlined]
[4] codegen(::Symbol, ::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget,CUDA.CUDACompilerParams}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, strip::Bool, validate::Bool, only_entry::Bool) at /home/omlins/.juliapro/JuliaPro_v1.4.1-1/packages/GPUCompiler/pCBTA/src/driver.jl:239
[5] compile(::Symbol, ::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget,CUDA.CUDACompilerParams}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, strip::Bool, validate::Bool, only_entry::Bool) at /home/omlins/.juliapro/JuliaPro_v1.4.1-1/packages/GPUCompiler/pCBTA/src/driver.jl:39
[6] compile at /home/omlins/.juliapro/JuliaPro_v1.4.1-1/packages/GPUCompiler/pCBTA/src/driver.jl:35 [inlined]
[7] _cufunction(::GPUCompiler.FunctionSpec{typeof(f),Tuple{CuDeviceArray{Float32,2,CUDA.AS.Global},UnitRange{Int64}}}; kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at /home/omlins/.juliapro/JuliaPro_v1.4.1-1/packages/CUDA/sjcZt/src/compiler/execution.jl:308
[8] _cufunction at /home/omlins/.juliapro/JuliaPro_v1.4.1-1/packages/CUDA/sjcZt/src/compiler/execution.jl:302 [inlined]
[9] check_cache(::typeof(CUDA._cufunction), ::GPUCompiler.FunctionSpec{typeof(f),Tuple{CuDeviceArray{Float32,2,CUDA.AS.Global},UnitRange{Int64}}}, ::UInt64; kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at /home/omlins/.juliapro/JuliaPro_v1.4.1-1/packages/GPUCompiler/pCBTA/src/cache.jl:24
[10] f at ./none:2 [inlined]
[11] cached_compilation(::typeof(CUDA._cufunction), ::GPUCompiler.FunctionSpec{typeof(f),Tuple{CuDeviceArray{Float32,2,CUDA.AS.Global},UnitRange{Int64}}}, ::UInt64; kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at /home/omlins/.juliapro/JuliaPro_v1.4.1-1/packages/GPUCompiler/pCBTA/src/cache.jl:0
[12] cached_compilation(::Function, ::GPUCompiler.FunctionSpec{typeof(f),Tuple{CuDeviceArray{Float32,2,CUDA.AS.Global},UnitRange{Int64}}}, ::UInt64) at /home/omlins/.juliapro/JuliaPro_v1.4.1-1/packages/GPUCompiler/pCBTA/src/cache.jl:44
[13] cufunction(::Function, ::Type{T} where T; name::Nothing, kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at /home/omlins/.juliapro/JuliaPro_v1.4.1-1/packages/CUDA/sjcZt/src/compiler/execution.jl:296
[14] cufunction(::Function, ::Type{T} where T) at /home/omlins/.juliapro/JuliaPro_v1.4.1-1/packages/CUDA/sjcZt/src/compiler/execution.jl:291
[15] top-level scope at /home/omlins/.juliapro/JuliaPro_v1.4.1-1/packages/CUDA/sjcZt/src/compiler/execution.jl:108
CUDA-jl version used is: v1.1.0
The text was updated successfully, but these errors were encountered:
samo-lin
changed the title
size(range), length(range) and range[end] fails inside CUDA kernels
size(range), length(range) and range[end] fail inside CUDA kernels
Sep 21, 2020
The following basic code using UnitRange works and shows that ranges are supported inside CUDA kernels:
However, e.g.
size(range,1)
andrange[end]
fails inside CUDA kernels, e.g. the following codes fail:The error is the following for
size(range,1)
:The error is the following for
range[end]
:CUDA-jl version used is: v1.1.0
The text was updated successfully, but these errors were encountered: