diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index ee88b6fff4..4556abcd78 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -224,7 +224,21 @@ jobs: env: NUMBA_DPEX_USE_MLIR: ${{ matrix.use_mlir && '1' || '0' }} run: | - pytest -q -ra --disable-warnings --pyargs ${{ env.MODULE_NAME }} -vv + pytest -q -ra --disable-warnings --pyargs ${{ env.MODULE_NAME }} -vv -k "not test_1d_strided_dpnp_array_in_kernel[2]" + + - name: Run backendless optimization tests + # Running tests that have been found to fail on AMD CPUs with + # -cl-opt-disable. The test failures do not happen on other platforms + # and are possibly due to some driver/opencl compiler bug. + if: ${{ matrix.scope == 'tests' }} + env: + NUMBA_DPEX_USE_MLIR: ${{ matrix.use_mlir && '1' || '0' }} + # Disabling device driver optimization to prevent catching bugs + # from driver compiler. + ONEAPI_DEVICE_SELECTOR: "opencl:cpu" + NUMBA_DPEX_BUILD_KERNEL_OPTIONS: "-cl-opt-disable" + run: | + pytest -q -ra --disable-warnings --pyargs ${{ env.MODULE_NAME }} -vv -k "test_1d_strided_dpnp_array_in_kernel[2]" - name: Run examples if: ${{ matrix.scope == 'examples' }} diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 50a706b58c..a4038d97bb 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -55,9 +55,11 @@ jobs: conda env export > /tmp/env-cov.yml cat /tmp/env-cov.yml + # Ignoring test due to opencl driver optimization bug - name: Run tests with coverage run: | - pytest -q --cov --cov-report term-missing --pyargs numba_dpex + pytest -q --cov --cov-report term-missing --pyargs numba_dpex \ + -k 'not test_1d_strided_dpnp_array_in_kernel[2]' - name: Install coveralls shell: bash -l {0} diff --git a/numba_dpex/core/config.py b/numba_dpex/core/config.py index 500951f068..ecdf240a88 100644 --- a/numba_dpex/core/config.py +++ b/numba_dpex/core/config.py @@ -73,6 +73,11 @@ def __getattr__(name): # a kernel decorated function DEBUG_KERNEL_LAUNCHER = _readenv("NUMBA_DPEX_DEBUG_KERNEL_LAUNCHER", int, 0) +# Sets build kernel options for the kernel compilation on the device side. +# For available OpenCL options refer +# https://intel.github.io/llvm-docs/clang/ClangCommandLineReference.html#opencl-options +BUILD_KERNEL_OPTIONS = _readenv("NUMBA_DPEX_BUILD_KERNEL_OPTIONS", str, "") + # Flag to enable caching, set NUMBA_DPEX_ENABLE_CACHE=0 to turn it off. ENABLE_CACHE = _readenv("NUMBA_DPEX_ENABLE_CACHE", int, 1) # To specify the default cache size, 20 by default. @@ -96,6 +101,6 @@ def __getattr__(name): DPEX_OPT = _readenv("NUMBA_DPEX_OPT", int, 2) -INLINE_THRESHOLD = _readenv("NUMBA_DPEX_INLINE_THRESHOLD", int, None) +INLINE_THRESHOLD = _readenv("NUMBA_DPEX_INLINE_THRESHOLD", int, 2) USE_MLIR = _readenv("NUMBA_DPEX_USE_MLIR", int, 0) diff --git a/numba_dpex/core/utils/kernel_launcher.py b/numba_dpex/core/utils/kernel_launcher.py index 8bb961e902..d0f9426d88 100644 --- a/numba_dpex/core/utils/kernel_launcher.py +++ b/numba_dpex/core/utils/kernel_launcher.py @@ -304,6 +304,15 @@ def set_kernel_from_spirv(self, kernel_module: SPIRVKernelModule): context_ref = sycl.dpctl_queue_get_context(self.builder, queue_ref) device_ref = sycl.dpctl_queue_get_device(self.builder, queue_ref) + if config.BUILD_KERNEL_OPTIONS != "": + spv_compiler_options = self.context.insert_const_string( + self.builder.module, config.BUILD_KERNEL_OPTIONS + ) + else: + spv_compiler_options = self.builder.load( + create_null_ptr(self.builder, self.context) + ) + # build_or_get_kernel steals reference to context and device cause it # needs to keep them alive for keys. kernel_ref = self.dpexrt.build_or_get_kernel( @@ -318,7 +327,7 @@ def set_kernel_from_spirv(self, kernel_module: SPIRVKernelModule): llvmir.Constant( llvmir.IntType(64), len(kernel_module.kernel_bitcode) ), - self.builder.load(create_null_ptr(self.builder, self.context)), + spv_compiler_options, kernel_name, ], ) diff --git a/numba_dpex/kernel_api_impl/spirv/codegen.py b/numba_dpex/kernel_api_impl/spirv/codegen.py index a0fc7739d0..4f236c9671 100644 --- a/numba_dpex/kernel_api_impl/spirv/codegen.py +++ b/numba_dpex/kernel_api_impl/spirv/codegen.py @@ -55,12 +55,6 @@ def inline_threshold(self, value: int): ) self._inline_threshold = 0 else: - if value == 3: - warnings.warn( - "Due to an existing compiler bug, setting INLINE_THRESHOLD " - f"to {value} can lead to incorrect code generation on " - "certain devices." - ) self._inline_threshold = value def _optimize_final_module(self): @@ -69,12 +63,6 @@ def _optimize_final_module(self): # Make optimization level depending on config.DPEX_OPT variable pmb.opt_level = config.DPEX_OPT - if config.DPEX_OPT > 2: - warnings.warn( - "Setting NUMBA_DPEX_OPT greater than 2 known to cause issues " - + "related to very aggressive optimizations that leads to " - + "broken code." - ) pmb.disable_unit_at_a_time = False diff --git a/numba_dpex/tests/experimental/codegen/test_inline_threshold_codegen.py b/numba_dpex/tests/experimental/codegen/test_inline_threshold_codegen.py index 39a2442698..e3953adfb3 100644 --- a/numba_dpex/tests/experimental/codegen/test_inline_threshold_codegen.py +++ b/numba_dpex/tests/experimental/codegen/test_inline_threshold_codegen.py @@ -28,7 +28,7 @@ def test_codegen_with_max_inline_threshold(): and pipeline to compile both host callable "kernels" and device-only "device_func" functions. - Unless the inline_threshold is set to 3, the `spir_func` function is not + Unless the inline_threshold is set to >0, the `spir_func` function is not inlined into the wrapper function. The test checks if the `spir_func` function is fully inlined into the wrapper. The test is rather rudimentary and only checks the count of function in the generated module. @@ -39,7 +39,7 @@ def test_codegen_with_max_inline_threshold(): i64arr_ty = DpnpNdArray(ndim=1, dtype=int64, layout="C", queue=queue_ty) kernel_sig = types.void(ItemType(1), i64arr_ty, i64arr_ty, i64arr_ty) - disp = dpex_exp.kernel(inline_threshold=3)(kernel_func) + disp = dpex_exp.kernel(inline_threshold=1)(kernel_func) disp.compile(kernel_sig) kcres = disp.overloads[kernel_sig.args] llvm_ir_mod = kcres.library._final_module @@ -60,7 +60,7 @@ def test_codegen_without_max_inline_threshold(): i64arr_ty = DpnpNdArray(ndim=1, dtype=int64, layout="C", queue=queue_ty) kernel_sig = types.void(ItemType(1), i64arr_ty, i64arr_ty, i64arr_ty) - disp = dpex_exp.kernel(kernel_func) + disp = dpex_exp.kernel(inline_threshold=0)(kernel_func) disp.compile(kernel_sig) kcres = disp.overloads[kernel_sig.args] llvm_ir_mod = kcres.library._final_module diff --git a/numba_dpex/tests/experimental/codegen/test_intenum_literal_codegen.py b/numba_dpex/tests/experimental/codegen/test_intenum_literal_codegen.py index 8512bd8016..33ca4063e4 100644 --- a/numba_dpex/tests/experimental/codegen/test_intenum_literal_codegen.py +++ b/numba_dpex/tests/experimental/codegen/test_intenum_literal_codegen.py @@ -40,7 +40,7 @@ def pass_flags_to_func(a): i64arr_ty = DpnpNdArray(ndim=1, dtype=int64, layout="C", queue=queue_ty) kernel_sig = types.void(i64arr_ty) - disp = exp_dpex.kernel(pass_flags_to_func) + disp = exp_dpex.kernel(inline_threshold=0)(pass_flags_to_func) disp.compile(kernel_sig) kcres = disp.overloads[kernel_sig.args] llvm_ir_mod = kcres.library._final_module.__str__() diff --git a/numba_dpex/tests/experimental/test_compiler_warnings.py b/numba_dpex/tests/experimental/test_compiler_warnings.py index e7ae1ad550..67f26dd8d2 100644 --- a/numba_dpex/tests/experimental/test_compiler_warnings.py +++ b/numba_dpex/tests/experimental/test_compiler_warnings.py @@ -2,14 +2,9 @@ # # SPDX-License-Identifier: Apache-2.0 -import dpctl import pytest -from numba.core import types -from numba_dpex import DpctlSyclQueue, DpnpNdArray from numba_dpex import experimental as dpex_exp -from numba_dpex import int64 -from numba_dpex.core.types.kernel_api.index_space_ids import ItemType from numba_dpex.kernel_api import Item @@ -21,15 +16,3 @@ def _kernel(item: Item, a, b, c): def test_compilation_mode_option_user_definition(): with pytest.warns(UserWarning): dpex_exp.kernel(_compilation_mode="kernel")(_kernel) - - -def test_inline_threshold_level_warning(): - """ - Test compiler warning generation with an inline_threshold value of 3. - """ - - with pytest.warns(UserWarning): - queue_ty = DpctlSyclQueue(dpctl.SyclQueue()) - i64arr_ty = DpnpNdArray(ndim=1, dtype=int64, layout="C", queue=queue_ty) - kernel_sig = types.void(ItemType(1), i64arr_ty, i64arr_ty, i64arr_ty) - dpex_exp.kernel(inline_threshold=3)(_kernel).compile(kernel_sig) diff --git a/numba_dpex/tests/misc/test_warnings.py b/numba_dpex/tests/misc/test_warnings.py index 2a36695784..247964eaca 100644 --- a/numba_dpex/tests/misc/test_warnings.py +++ b/numba_dpex/tests/misc/test_warnings.py @@ -16,26 +16,6 @@ def foo(a): a[dpex.get_global_id(0)] = 0 -def test_opt_warning(): - bkp = config.DPEX_OPT - config.DPEX_OPT = 3 - - with pytest.warns(UserWarning): - dpex.call_kernel(foo, dpex.Range(10), dpnp.arange(10)) - - config.DPEX_OPT = bkp - - -def test_inline_threshold_eq_3_warning(): - bkp = config.INLINE_THRESHOLD - config.INLINE_THRESHOLD = 3 - - with pytest.warns(UserWarning): - dpex.call_kernel(foo, dpex.Range(10), dpnp.arange(10)) - - config.INLINE_THRESHOLD = bkp - - def test_inline_threshold_negative_val_warning_(): bkp = config.INLINE_THRESHOLD config.INLINE_THRESHOLD = -1