Skip to content

Commit

Permalink
Add condition to ensure value type is trivially copyable to call redu…
Browse files Browse the repository at this point in the history
…ce-then-scan

Signed-off-by: Matthew Michel <matthew.michel@intel.com>
  • Loading branch information
mmichel11 committed Oct 22, 2024
1 parent 49ff65f commit 66ead80
Showing 1 changed file with 15 additions and 13 deletions.
28 changes: 15 additions & 13 deletions include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h
Original file line number Diff line number Diff line change
Expand Up @@ -944,26 +944,28 @@ __pattern_reduce_by_segment(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&&
return 1;
}

using __diff_type = oneapi::dpl::__internal::__difference_t<_Range1>;
using __key_type = oneapi::dpl::__internal::__value_t<_Range1>;
using __val_type = oneapi::dpl::__internal::__value_t<_Range2>;
#if _ONEDPL_BACKEND_SYCL
// We would normally dispatch to the parallel implementation which would make the decision to invoke
// reduce-then-scan. However, since the fallback is implemented at the ranges level we must choose
// whether or not to use reduce-then-scan here.
if (oneapi::dpl::__par_backend_hetero::__is_gpu_with_sg_32(__exec))
if constexpr (std::is_trivially_copyable_v<__val_type>)
{
auto __res = oneapi::dpl::__par_backend_hetero::__parallel_reduce_by_segment_reduce_then_scan(
_BackendTag{}, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__keys),
std::forward<_Range2>(__values), std::forward<_Range3>(__out_keys), std::forward<_Range4>(__out_values),
__binary_pred, __binary_op);
__res.wait();
// Because our init type ends up being tuple<std::size_t, ValType>, return the first component which is the write index. Add 1 to return the
// past-the-end iterator pair of segmented reduction.
return std::get<0>(__res.get()) + 1;
if (oneapi::dpl::__par_backend_hetero::__is_gpu_with_sg_32(__exec))
{
auto __res = oneapi::dpl::__par_backend_hetero::__parallel_reduce_by_segment_reduce_then_scan(
_BackendTag{}, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__keys),
std::forward<_Range2>(__values), std::forward<_Range3>(__out_keys), std::forward<_Range4>(__out_values),
__binary_pred, __binary_op);
__res.wait();
// Because our init type ends up being tuple<std::size_t, ValType>, return the first component which is the write index. Add 1 to return the
// past-the-end iterator pair of segmented reduction.
return std::get<0>(__res.get()) + 1;
}
}
#endif
using __diff_type = oneapi::dpl::__internal::__difference_t<_Range1>;
using __key_type = oneapi::dpl::__internal::__value_t<_Range1>;
using __val_type = oneapi::dpl::__internal::__value_t<_Range2>;

// Round 1: reduce with extra indices added to avoid long segments
// TODO: At threshold points check if the key is equal to the key at the previous threshold point, indicating a long sequence.
// Skip a round of copy_if and reduces if there are none.
Expand Down

0 comments on commit 66ead80

Please sign in to comment.