diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl index dd9d1f26..efec0c4d 100644 --- a/src/KernelAbstractions.jl +++ b/src/KernelAbstractions.jl @@ -53,28 +53,44 @@ synchronize(backend) ``` """ macro kernel(expr) - __kernel(expr, #=generate_cpu=#true) + __kernel(expr, #=generate_cpu=#true, #=force_inbounds=#false) end """ - @kernel cpu=false function f(args) end + @kernel config function f(args) end -Disable code-generation of the CPU function. This relaxes semantics such that -KernelAbstractions primitives can be used in non-kernel functions. +This allows for two different configurations: + +1. `cpu={true, false}`: Disables code-generation of the CPU function. This relaxes semantics such that KernelAbstractions primitives can be used in non-kernel functions. +2. `inbounds={false, true}`: Enables a forced `@inbounds` macro around the function definition in the case the user is using too many `@inbounds` already in their kernel. Note that this can lead to incorrect results, crashes, etc and is fundamentally unsafe. Be careful! - [`@context`](@ref) !!! warn This is an experimental feature. """ -macro kernel(config, expr) - if config isa Expr && config.head == :(=) && - config.args[1] == :cpu && config.args[2] isa Bool - generate_cpu = config.args[2] +macro kernel(ex...) + if length(ex) == 1 + __kernel(ex[1], true, false) else - error("Configuration should be of form `cpu=false` got $config") + generate_cpu = true + force_inbounds = false + for i = 1:length(ex)-1 + if ex[i] isa Expr && ex[i].head == :(=) && + ex[i].args[1] == :cpu && ex[i].args[2] isa Bool + generate_cpu = ex[i].args[2] + elseif ex[i] isa Expr && ex[i].head == :(=) && + ex[i].args[1] == :inbounds && ex[i].args[2] isa Bool + force_inbounds = ex[i].args[2] + else + error("Configuration should be of form:\n"* + "* `cpu=true`\n"* + "* `inbounds=false`\n"* + "got `", ex[i], "`") + end + end + __kernel(ex[end], generate_cpu, force_inbounds) end - __kernel(expr, generate_cpu) end """ diff --git a/src/macros.jl b/src/macros.jl index e93bc386..0514eef8 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -10,10 +10,16 @@ function find_return(stmt) end # XXX: Proper errors -function __kernel(expr, generate_cpu=true) +function __kernel(expr, generate_cpu=true, force_inbounds=false) def = splitdef(expr) name = def[:name] args = def[:args] + if force_inbounds + body_qt = quote + @inbounds $(def[:body]) + end + def[:body] = body_qt + end find_return(expr) && error("Return statement not permitted in a kernel function $name") diff --git a/test/runtests.jl b/test/runtests.jl index 2572f2aa..8b87224e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -21,6 +21,22 @@ kern_static(CPU(static=true), (1,))(A, ndrange=length(A)) end @test_throws ErrorException("This kernel is unavailable for backend CPU") my_no_cpu_kernel(CPU()) +# testing multiple configurations at the same time +@kernel cpu=false inbounds=false function my_no_cpu_kernel2(a) +end +@test_throws ErrorException("This kernel is unavailable for backend CPU") my_no_cpu_kernel2(CPU()) + +if Base.JLOptions().check_bounds == 0 || Base.JLOptions().check_bounds == 1 + # testing bounds errors + @kernel inbounds=false my_bounded_kernel(a) = a[1] + @test_throws BoundsError(Int64[],(1,)) my_bounded_kernel(CPU())(Int[], ndrange=1) +end + +if Base.JLOptions().check_bounds == 0 || Base.JLOptions().check_bounds == 2 + @kernel inbounds=true my_inbounds_kernel(a) = a[1] + @test nothing == my_inbounds_kernel(CPU())(Int[], ndrange=1) +end + struct NewBackend <: KernelAbstractions.GPU end @testset "Default host implementation" begin backend = NewBackend()