From f4d67dc4dca55e451766f8f31cd23c3040dd762f Mon Sep 17 00:00:00 2001 From: "akmkhale@ansatnuc04" Date: Thu, 2 Feb 2023 16:49:38 -0600 Subject: [PATCH 1/2] Caching kernel_bundle after create_program_from_spirv() Store exec_queue along with kernel_bundle Properly initialize self._kernel_bundle_cache with NullCache() Save kernel_bundle with exec_queue as a key --- numba_dpex/core/caching.py | 5 ++- .../core/kernel_interface/dispatcher.py | 31 ++++++++++++++++--- numba_dpex/tests/test_device_array_args.py | 4 +-- 3 files changed, 32 insertions(+), 8 deletions(-) diff --git a/numba_dpex/core/caching.py b/numba_dpex/core/caching.py index 807703188e..015e668118 100644 --- a/numba_dpex/core/caching.py +++ b/numba_dpex/core/caching.py @@ -13,7 +13,9 @@ from numba_dpex.core.types import USMNdArray -def build_key(argtypes, pyfunc, codegen, backend=None, device_type=None): +def build_key( + argtypes, pyfunc, codegen, backend=None, device_type=None, exec_queue=None +): """Constructs a key from python function, context, backend and the device type. @@ -64,6 +66,7 @@ def build_key(argtypes, pyfunc, codegen, backend=None, device_type=None): codegen.magic_tuple(), backend, device_type, + exec_queue, ( hashlib.sha256(codebytes).hexdigest(), hashlib.sha256(cvarbytes).hexdigest(), diff --git a/numba_dpex/core/kernel_interface/dispatcher.py b/numba_dpex/core/kernel_interface/dispatcher.py index 42571e527e..cc6b61bcaa 100644 --- a/numba_dpex/core/kernel_interface/dispatcher.py +++ b/numba_dpex/core/kernel_interface/dispatcher.py @@ -88,14 +88,21 @@ def __init__( # caching related attributes if not config.ENABLE_CACHE: self._cache = NullCache() + self._kernel_bundle_cache = NullCache() elif enable_cache: self._cache = LRUCache( name="SPIRVKernelCache", capacity=config.CACHE_SIZE, pyfunc=self.pyfunc, ) + self._kernel_bundle_cache = LRUCache( + name="KernelBundleCache", + capacity=config.CACHE_SIZE, + pyfunc=self.pyfunc, + ) else: self._cache = NullCache() + self._kernel_bundle_cache = NullCache() self._cache_hits = 0 if array_access_specifiers: @@ -627,12 +634,26 @@ def __call__(self, *args): cache=self._cache, ) - # create a sycl::KernelBundle - kernel_bundle = dpctl_prog.create_program_from_spirv( - exec_queue, - device_driver_ir_module, - " ".join(self._create_sycl_kernel_bundle_flags), + kernel_bundle_key = build_key( + tuple(argtypes), + self.pyfunc, + dpex_kernel_target.target_context.codegen(), + exec_queue=exec_queue, ) + + artifact = self._kernel_bundle_cache.get(kernel_bundle_key) + + if artifact is None: + # create a sycl::KernelBundle + kernel_bundle = dpctl_prog.create_program_from_spirv( + exec_queue, + device_driver_ir_module, + " ".join(self._create_sycl_kernel_bundle_flags), + ) + self._kernel_bundle_cache.put(kernel_bundle_key, kernel_bundle) + else: + kernel_bundle = artifact + # get the sycl::kernel sycl_kernel = kernel_bundle.get_sycl_kernel(kernel_module_name) diff --git a/numba_dpex/tests/test_device_array_args.py b/numba_dpex/tests/test_device_array_args.py index cc50c48854..80f4c19fee 100644 --- a/numba_dpex/tests/test_device_array_args.py +++ b/numba_dpex/tests/test_device_array_args.py @@ -26,7 +26,7 @@ def data_parallel_sum(a, b, c): @skip_no_opencl_cpu -class TestArrayArgsGPU: +class TestArrayArgsCPU: def test_device_array_args_cpu(self): c = np.ones_like(a) @@ -37,7 +37,7 @@ def test_device_array_args_cpu(self): @skip_no_opencl_gpu -class TestArrayArgsCPU: +class TestArrayArgsGPU: def test_device_array_args_gpu(self): c = np.ones_like(a) From 3958d1304fce32f376a6ecfc00ca0fad9218045a Mon Sep 17 00:00:00 2001 From: "akmkhale@ansatnuc04" Date: Wed, 8 Feb 2023 19:29:51 -0600 Subject: [PATCH 2/2] Test with CFD and usm_ndarray --- numba_dpex/core/caching.py | 5 +- .../tests/kernel_tests/test_arg_types.py | 76 ++++++++++++++----- 2 files changed, 61 insertions(+), 20 deletions(-) diff --git a/numba_dpex/core/caching.py b/numba_dpex/core/caching.py index 015e668118..ddd7972b17 100644 --- a/numba_dpex/core/caching.py +++ b/numba_dpex/core/caching.py @@ -16,8 +16,8 @@ def build_key( argtypes, pyfunc, codegen, backend=None, device_type=None, exec_queue=None ): - """Constructs a key from python function, context, backend and the device - type. + """Constructs a key from python function, context, backend, the device + type and execution queue. Compute index key for the given argument types and codegen. It includes a description of the OS, target architecture and hashes of the bytecode for @@ -34,6 +34,7 @@ def build_key( Defaults to None. device_type (enum, optional): A 'device_type' enum. Defaults to None. + exec_queue (dpctl._sycl_queue.SyclQueue', optional): A SYCL queue object. Returns: tuple: A tuple of return type, argtpes, magic_tuple of codegen diff --git a/numba_dpex/tests/kernel_tests/test_arg_types.py b/numba_dpex/tests/kernel_tests/test_arg_types.py index 86fda110db..7e19f124a1 100644 --- a/numba_dpex/tests/kernel_tests/test_arg_types.py +++ b/numba_dpex/tests/kernel_tests/test_arg_types.py @@ -2,12 +2,14 @@ # # SPDX-License-Identifier: Apache-2.0 +import sys + import dpctl +import dpctl.tensor as dpt import numpy as np import pytest import numba_dpex as dpex -from numba_dpex.tests._helper import filter_strings global_size = 1054 local_size = 1 @@ -35,15 +37,39 @@ def input_arrays(request): return a, b, c[0] -@pytest.mark.parametrize("filter_str", filter_strings) -def test_kernel_arg_types(filter_str, input_arrays): - kernel = dpex.kernel(mul_kernel) - a, actual, c = input_arrays +def test_kernel_arg_types(input_arrays): + usm_type = "device" + + a, b, c = input_arrays expected = a * c - device = dpctl.SyclDevice(filter_str) - with dpctl.device_context(device): - kernel[global_size, local_size](a, actual, c) - np.testing.assert_allclose(actual, expected, rtol=1e-5, atol=0) + + queue = dpctl.SyclQueue(dpctl.select_default_device()) + + da = dpt.usm_ndarray( + a.shape, + dtype=a.dtype, + buffer=usm_type, + buffer_ctor_kwargs={"queue": queue}, + ) + da.usm_data.copy_from_host(a.reshape((-1)).view("|u1")) + + db = dpt.usm_ndarray( + b.shape, + dtype=b.dtype, + buffer=usm_type, + buffer_ctor_kwargs={"queue": queue}, + ) + db.usm_data.copy_from_host(b.reshape((-1)).view("|u1")) + + kernel = dpex.kernel(mul_kernel) + kernel[dpex.NdRange(dpex.Range(global_size), dpex.Range(local_size))]( + da, db, c + ) + + result = np.zeros_like(b) + db.usm_data.copy_to_host(result.reshape((-1)).view("|u1")) + + np.testing.assert_allclose(result, expected, rtol=1e-5, atol=0) def check_bool_kernel(A, test): @@ -53,14 +79,28 @@ def check_bool_kernel(A, test): A[0] = 222 -@pytest.mark.parametrize("filter_str", filter_strings) -def test_bool_type(filter_str): - kernel = dpex.kernel(check_bool_kernel) +def test_bool_type(): + usm_type = "device" a = np.array([2], np.int64) - device = dpctl.SyclDevice(filter_str) - with dpctl.device_context(device): - kernel[a.size, dpex.DEFAULT_LOCAL_SIZE](a, True) - assert a[0] == 111 - kernel[a.size, dpex.DEFAULT_LOCAL_SIZE](a, False) - assert a[0] == 222 + queue = dpctl.SyclQueue(dpctl.select_default_device()) + + da = dpt.usm_ndarray( + a.shape, + dtype=a.dtype, + buffer=usm_type, + buffer_ctor_kwargs={"queue": queue}, + ) + da.usm_data.copy_from_host(a.reshape((-1)).view("|u1")) + + kernel = dpex.kernel(check_bool_kernel) + + kernel[dpex.Range(a.size)](da, True) + result = np.zeros_like(a) + da.usm_data.copy_to_host(result.reshape((-1)).view("|u1")) + assert result[0] == 111 + + kernel[dpex.Range(a.size)](da, False) + result = np.zeros_like(a) + da.usm_data.copy_to_host(result.reshape((-1)).view("|u1")) + assert result[0] == 222