Skip to content

Commit

Permalink
Revert "Remove now unneeded ONEDPL_WORKAROUND_FOR_IGPU_64BIT_REDUCTIO…
Browse files Browse the repository at this point in the history
…N macro"

This reverts commit a4c7835.
  • Loading branch information
mmichel11 committed Nov 5, 2024
1 parent 58b0581 commit 2595281
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 6 deletions.
10 changes: 10 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,16 @@ if (ONEDPL_BACKEND MATCHES "^(tbb|dpcpp|dpcpp_only)$")
endif()
endif()

if (DEFINED ONEDPL_WORKAROUND_FOR_IGPU_64BIT_REDUCTION)
if(ONEDPL_WORKAROUND_FOR_IGPU_64BIT_REDUCTION)
message(STATUS "Adding -DONEDPL_WORKAROUND_FOR_IGPU_64BIT_REDUCTION=1 option")
target_compile_options(oneDPL INTERFACE "-DONEDPL_WORKAROUND_FOR_IGPU_64BIT_REDUCTION=1")
else()
message(STATUS "Adding -DONEDPL_WORKAROUND_FOR_IGPU_64BIT_REDUCTION=0 option")
target_compile_options(oneDPL INTERFACE "-DONEDPL_WORKAROUND_FOR_IGPU_64BIT_REDUCTION=0")
endif()
endif()

# DPC++ specific macro
target_compile_definitions(oneDPL INTERFACE
$<$<OR:$<BOOL:${ONEDPL_USE_DEVICE_FPGA_HW}>,$<BOOL:${ONEDPL_USE_DEVICE_FPGA_EMU}>>:ONEDPL_FPGA_DEVICE>
Expand Down
1 change: 1 addition & 0 deletions cmake/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ The following variables are provided for oneDPL configuration:
| ONEDPL_AOT_ARCH | STRING | Architecture options for ahead-of-time compilation, supported values can be found [here](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-dpcpp-cpp-compiler-dev-guide-and-reference/top/compilation/ahead-of-time-compilation.html) | "*" for GPU device and "avx" for CPU device |
| ONEDPL_TEST_EXPLICIT_KERNEL_NAMES | STRING | Control kernel naming. Affects only oneDPL test targets. Supported values: AUTO, ALWAYS. AUTO: rely on the compiler if "Unnamed SYCL lambda kernels" feature is on, otherwise provide kernel names explicitly; ALWAYS: provide kernel names explicitly | AUTO |
| ONEDPL_TEST_WIN_ICX_FIXES | BOOL | Affects only oneDPL test targets. Enable icx, icx-cl workarounds to fix issues in CMake for Windows. | ON |
| ONEDPL_WORKAROUND_FOR_IGPU_64BIT_REDUCTION | BOOL | Use as a workaround for incorrect results, which may be produced by reduction algorithms with 64-bit data types compiled by the Intel&reg; oneAPI DPC++/C++ Compiler and executed on GPU devices. | |

Some useful CMake variables ([here](https://cmake.org/cmake/help/latest/manual/cmake-variables.7.html) you can find a full list of CMake variables for the latest version):

Expand Down
17 changes: 14 additions & 3 deletions include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,21 @@ namespace unseq_backend
//This optimization depends on Intel(R) oneAPI DPC++ Compiler implementation such as support of binary operators from std namespace.
//We need to use defined(SYCL_IMPLEMENTATION_INTEL) macro as a guard.

template <typename _Tp>
inline constexpr bool __can_use_known_identity =
# if ONEDPL_WORKAROUND_FOR_IGPU_64BIT_REDUCTION
// When ONEDPL_WORKAROUND_FOR_IGPU_64BIT_REDUCTION is defined as non-zero, we avoid using known identity for 64-bit arithmetic data types
!(::std::is_arithmetic_v<_Tp> && sizeof(_Tp) == sizeof(::std::uint64_t));
# else
true;
# endif // ONEDPL_WORKAROUND_FOR_IGPU_64BIT_REDUCTION

//TODO: To change __has_known_identity implementation as soon as the Intel(R) oneAPI DPC++ Compiler implementation issues related to
//std::multiplies, std::bit_or, std::bit_and and std::bit_xor operations will be fixed.
//std::logical_and and std::logical_or are not supported in Intel(R) oneAPI DPC++ Compiler to be used in sycl::inclusive_scan_over_group and sycl::reduce_over_group
template <typename _BinaryOp, typename _Tp>
using __has_known_identity =
using __has_known_identity = ::std::conditional_t<
__can_use_known_identity<_Tp>,
# if _ONEDPL_LIBSYCL_VERSION >= 50200
typename ::std::disjunction<
__dpl_sycl::__has_known_identity<_BinaryOp, _Tp>,
Expand All @@ -50,15 +60,16 @@ using __has_known_identity =
::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__minimum<_Tp>>,
::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__minimum<void>>,
::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__maximum<_Tp>>,
::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__maximum<void>>>>>;
::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__maximum<void>>>>>,
# else //_ONEDPL_LIBSYCL_VERSION >= 50200
typename ::std::conjunction<
::std::is_arithmetic<_Tp>,
::std::disjunction<::std::is_same<::std::decay_t<_BinaryOp>, ::std::plus<_Tp>>,
::std::is_same<::std::decay_t<_BinaryOp>, ::std::plus<void>>,
::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__plus<_Tp>>,
::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__plus<void>>>>;
::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__plus<void>>>>,
# endif //_ONEDPL_LIBSYCL_VERSION >= 50200
::std::false_type>; // This is for the case of __can_use_known_identity<_Tp>==false

#else //_USE_GROUP_ALGOS && defined(SYCL_IMPLEMENTATION_INTEL)

Expand Down
5 changes: 5 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ macro(onedpl_add_test test_source_file switch_off_checked_iterators)
string(REPLACE "\.cpp" "" _test_name ${_test_name})

set(coal_tests "reduce.pass" "transform_reduce.pass" "count.pass" "sycl_iterator_reduce.pass" "minmax_element.pass")
set(workaround_for_igpu_64bit_reduction_tests "reduce_by_segment.pass")
# mark those tests with pstloffload_smoke_tests label
set (pstloffload_smoke_tests "adjacent_find.pass" "copy_move.pass" "merge.pass" "partial_sort.pass" "remove_copy.pass"
"transform_reduce.pass" "transform_reduce.pass.coal" "transform_scan.pass" "algorithm.pass"
Expand All @@ -208,6 +209,10 @@ macro(onedpl_add_test test_source_file switch_off_checked_iterators)
if (_test_name IN_LIST coal_tests)
onedpl_construct_exec(${test_source_file} ${_test_name} ${switch_off_checked_iterators} "-D_ONEDPL_DETECT_SPIRV_COMPILATION=1" "${extra_test_label}")
onedpl_construct_exec(${test_source_file} ${_test_name}.coal ${switch_off_checked_iterators} "-D_ONEDPL_DETECT_SPIRV_COMPILATION=0" "${extra_test_label}")
elseif (_test_name IN_LIST workaround_for_igpu_64bit_reduction_tests)
onedpl_construct_exec(${test_source_file} ${_test_name} ${switch_off_checked_iterators} "" "${extra_test_label}")
string(REPLACE "\.pass" "_workaround_64bit_reduction\.pass" _test_name ${_test_name})
onedpl_construct_exec(${test_source_file} ${_test_name} ${switch_off_checked_iterators} "-D_ONEDPL_TEST_FORCE_WORKAROUND_FOR_IGPU_64BIT_REDUCTION=1" "${extra_test_label}")
elseif(_test_name STREQUAL "free_after_unload.pass")
onedpl_construct_exec(${test_source_file} ${_test_name} ${switch_off_checked_iterators} "" "${extra_test_label}")
onedpl_construct_exec(${test_source_file} ${_test_name}.after_pstl_offload ${switch_off_checked_iterators} "" "${extra_test_label}")
Expand Down
20 changes: 17 additions & 3 deletions test/parallel_api/numeric/numeric.ops/reduce_by_segment.pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,14 @@
//
//===----------------------------------------------------------------------===//

#if defined(ONEDPL_WORKAROUND_FOR_IGPU_64BIT_REDUCTION)
#undef ONEDPL_WORKAROUND_FOR_IGPU_64BIT_REDUCTION
#endif

#if defined(_ONEDPL_TEST_FORCE_WORKAROUND_FOR_IGPU_64BIT_REDUCTION)
# define ONEDPL_WORKAROUND_FOR_IGPU_64BIT_REDUCTION _ONEDPL_TEST_FORCE_WORKAROUND_FOR_IGPU_64BIT_REDUCTION
#endif

#include "support/test_config.h"

#include "oneapi/dpl/execution"
Expand Down Expand Up @@ -298,10 +306,16 @@ void
run_test_on_device()
{
#if TEST_DPCPP_BACKEND_PRESENT
if (TestUtils::has_type_support<ValueType>(TestUtils::get_test_queue().get_device()))
// Skip 64-byte types testing when the algorithm is broken and there is no the workaround
#if _PSTL_ICPX_TEST_RED_BY_SEG_BROKEN_64BIT_TYPES && !ONEDPL_WORKAROUND_FOR_IGPU_64BIT_REDUCTION
if constexpr (sizeof(ValueType) != 8)
#endif
{
constexpr sycl::usm::alloc allocation_type = use_device_alloc ? sycl::usm::alloc::device : sycl::usm::alloc::shared;
test4buffers<sycl::usm::alloc::device, test_reduce_by_segment<ValueType, BinaryPredicate, BinaryOperation>>();
if (TestUtils::has_type_support<ValueType>(TestUtils::get_test_queue().get_device()))
{
constexpr sycl::usm::alloc allocation_type = use_device_alloc ? sycl::usm::alloc::device : sycl::usm::alloc::shared;
test4buffers<sycl::usm::alloc::device, test_reduce_by_segment<ValueType, BinaryPredicate, BinaryOperation>>();
}
}
#endif // TEST_DPCPP_BACKEND_PRESENT
}
Expand Down

0 comments on commit 2595281

Please sign in to comment.