From c8f2180bdac1bbbaec5ff5b4308ad79ab7f01eaf Mon Sep 17 00:00:00 2001 From: samaid Date: Fri, 9 Dec 2022 10:08:54 -0600 Subject: [PATCH] Removed CLK_ prefix from mem fences --- docs/user_guides/kernel_programming_guide/synchronization.rst | 4 ++-- numba_dpex/device_init.py | 4 ++-- numba_dpex/examples/kernel_private_memory.py | 2 +- numba_dpex/examples/sum_reduction_ocl.py | 2 +- numba_dpex/examples/sum_reduction_recursive_ocl.py | 2 +- numba_dpex/ocl/oclimpl.py | 4 ++-- numba_dpex/ocl/stubs.py | 4 ++-- numba_dpex/tests/kernel_tests/test_atomic_op.py | 4 ++-- numba_dpex/tests/kernel_tests/test_barrier.py | 4 ++-- numba_dpex/tests/kernel_tests/test_private_memory.py | 2 +- 10 files changed, 16 insertions(+), 16 deletions(-) diff --git a/docs/user_guides/kernel_programming_guide/synchronization.rst b/docs/user_guides/kernel_programming_guide/synchronization.rst index e10ba9496b..8a2b8df08a 100644 --- a/docs/user_guides/kernel_programming_guide/synchronization.rst +++ b/docs/user_guides/kernel_programming_guide/synchronization.rst @@ -10,7 +10,7 @@ barrier, at which point it returns control to all its callers. ``numba_dpex.barrier()`` supports two memory fence options: -- ``numba_dpex.CLK_GLOBAL_MEM_FENCE``: The barrier function will queue a memory +- ``numba_dpex.GLOBAL_MEM_FENCE``: The barrier function will queue a memory fence to ensure correct ordering of memory operations to global memory. Using the option can be useful when work-items, for example, write to buffer or image objects and then want to read the updated data. Passing no arguments to @@ -20,7 +20,7 @@ barrier, at which point it returns control to all its callers. .. literalinclude:: ../../../numba_dpex/examples/barrier.py :pyobject: no_arg_barrier_support -- ``numba_dpex.CLK_LOCAL_MEM_FENCE``: The barrier function will either flush +- ``numba_dpex.LOCAL_MEM_FENCE``: The barrier function will either flush any variables stored in local memory or queue a memory fence to ensure correct ordering of memory operations to local memory. For example, diff --git a/numba_dpex/device_init.py b/numba_dpex/device_init.py index fb8e8d3cba..b329083bfa 100644 --- a/numba_dpex/device_init.py +++ b/numba_dpex/device_init.py @@ -4,8 +4,8 @@ # Re export from .ocl.stubs import ( - CLK_GLOBAL_MEM_FENCE, - CLK_LOCAL_MEM_FENCE, + GLOBAL_MEM_FENCE, + LOCAL_MEM_FENCE, atomic, barrier, get_global_id, diff --git a/numba_dpex/examples/kernel_private_memory.py b/numba_dpex/examples/kernel_private_memory.py index 85634708bb..537b02c9b2 100644 --- a/numba_dpex/examples/kernel_private_memory.py +++ b/numba_dpex/examples/kernel_private_memory.py @@ -23,7 +23,7 @@ def private_memory_kernel(A): # preload memory[0] = i - numba_dpex.barrier(numba_dpex.CLK_LOCAL_MEM_FENCE) # local mem fence + numba_dpex.barrier(numba_dpex.LOCAL_MEM_FENCE) # local mem fence # memory will not hold correct deterministic result if it is not # private to each thread. diff --git a/numba_dpex/examples/sum_reduction_ocl.py b/numba_dpex/examples/sum_reduction_ocl.py index 8e6aa9aac3..949890d708 100644 --- a/numba_dpex/examples/sum_reduction_ocl.py +++ b/numba_dpex/examples/sum_reduction_ocl.py @@ -29,7 +29,7 @@ def sum_reduction_kernel(A, partial_sums): stride = group_size // 2 while stride > 0: # Waiting for each 2x2 addition into given workgroup - dpex.barrier(dpex.CLK_LOCAL_MEM_FENCE) + dpex.barrier(dpex.LOCAL_MEM_FENCE) # Add elements 2 by 2 between local_id and local_id + stride if local_id < stride: diff --git a/numba_dpex/examples/sum_reduction_recursive_ocl.py b/numba_dpex/examples/sum_reduction_recursive_ocl.py index 4524318b79..bdaebc55df 100644 --- a/numba_dpex/examples/sum_reduction_recursive_ocl.py +++ b/numba_dpex/examples/sum_reduction_recursive_ocl.py @@ -34,7 +34,7 @@ def sum_reduction_kernel(A, input_size, partial_sums): stride = group_size // 2 while stride > 0: # Waiting for each 2x2 addition into given workgroup - dpex.barrier(dpex.CLK_LOCAL_MEM_FENCE) + dpex.barrier(dpex.LOCAL_MEM_FENCE) # Add elements 2 by 2 between local_id and local_id + stride if local_id < stride: diff --git a/numba_dpex/ocl/oclimpl.py b/numba_dpex/ocl/oclimpl.py index e756a32378..c0aafff9c3 100644 --- a/numba_dpex/ocl/oclimpl.py +++ b/numba_dpex/ocl/oclimpl.py @@ -153,7 +153,7 @@ def barrier_no_arg_impl(context, builder, sig, args): barrier = _declare_function( context, builder, "barrier", sig, ["unsigned int"] ) - flags = context.get_constant(types.uint32, stubs.CLK_GLOBAL_MEM_FENCE) + flags = context.get_constant(types.uint32, stubs.GLOBAL_MEM_FENCE) builder.call(barrier, [flags]) return _void_value @@ -175,7 +175,7 @@ def sub_group_barrier_impl(context, builder, sig, args): barrier = _declare_function( context, builder, "barrier", sig, ["unsigned int"] ) - flags = context.get_constant(types.uint32, stubs.CLK_LOCAL_MEM_FENCE) + flags = context.get_constant(types.uint32, stubs.LOCAL_MEM_FENCE) builder.call(barrier, [flags]) return _void_value diff --git a/numba_dpex/ocl/stubs.py b/numba_dpex/ocl/stubs.py index 8c6bf4169e..f025dca136 100644 --- a/numba_dpex/ocl/stubs.py +++ b/numba_dpex/ocl/stubs.py @@ -5,8 +5,8 @@ _stub_error = NotImplementedError("This is a stub.") # mem fence -CLK_LOCAL_MEM_FENCE = 0x1 -CLK_GLOBAL_MEM_FENCE = 0x2 +LOCAL_MEM_FENCE = 0x1 +GLOBAL_MEM_FENCE = 0x2 def get_global_id(*args, **kargs): diff --git a/numba_dpex/tests/kernel_tests/test_atomic_op.py b/numba_dpex/tests/kernel_tests/test_atomic_op.py index bc95b32f89..1d568027d5 100644 --- a/numba_dpex/tests/kernel_tests/test_atomic_op.py +++ b/numba_dpex/tests/kernel_tests/test_atomic_op.py @@ -104,9 +104,9 @@ def get_func_local(op_type, dtype): def f(a): lm = dpex.local.array(1, dtype) lm[0] = a[0] - dpex.barrier(dpex.CLK_GLOBAL_MEM_FENCE) + dpex.barrier(dpex.GLOBAL_MEM_FENCE) op(lm, 0, 1) - dpex.barrier(dpex.CLK_GLOBAL_MEM_FENCE) + dpex.barrier(dpex.GLOBAL_MEM_FENCE) a[0] = lm[0] return f diff --git a/numba_dpex/tests/kernel_tests/test_barrier.py b/numba_dpex/tests/kernel_tests/test_barrier.py index 3af8433f3d..8bc8ef5299 100644 --- a/numba_dpex/tests/kernel_tests/test_barrier.py +++ b/numba_dpex/tests/kernel_tests/test_barrier.py @@ -19,7 +19,7 @@ def test_proper_lowering(filter_str): def twice(A): i = dpex.get_global_id(0) d = A[i] - dpex.barrier(dpex.CLK_LOCAL_MEM_FENCE) # local mem fence + dpex.barrier(dpex.LOCAL_MEM_FENCE) # local mem fence A[i] = d * 2 N = 256 @@ -66,7 +66,7 @@ def reverse_array(A): # preload lm[i] = A[i] # barrier local or global will both work as we only have one work group - dpex.barrier(dpex.CLK_LOCAL_MEM_FENCE) # local mem fence + dpex.barrier(dpex.LOCAL_MEM_FENCE) # local mem fence # write A[i] += lm[blocksize - 1 - i] diff --git a/numba_dpex/tests/kernel_tests/test_private_memory.py b/numba_dpex/tests/kernel_tests/test_private_memory.py index 571628fec6..70fa985a8d 100644 --- a/numba_dpex/tests/kernel_tests/test_private_memory.py +++ b/numba_dpex/tests/kernel_tests/test_private_memory.py @@ -19,7 +19,7 @@ def private_memory_kernel(A): i = numba_dpex.get_global_id(0) prvt_mem = numba_dpex.private.array(shape=1, dtype=np.float32) prvt_mem[0] = i - numba_dpex.barrier(numba_dpex.CLK_LOCAL_MEM_FENCE) # local mem fence + numba_dpex.barrier(numba_dpex.LOCAL_MEM_FENCE) # local mem fence A[i] = prvt_mem[0] * 2 N = 64