From af899e356bbe672b7fe5f2599e5d1827f308e305 Mon Sep 17 00:00:00 2001 From: Georgy Evtushenko Date: Fri, 25 Nov 2022 09:00:25 +0400 Subject: [PATCH] Defer reduce offset selection to cub --- testing/cuda/reduce.cu | 20 ++++++++++++++++++++ thrust/system/cuda/detail/reduce.h | 10 ++-------- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/testing/cuda/reduce.cu b/testing/cuda/reduce.cu index 58d71eaeb..f020761c8 100644 --- a/testing/cuda/reduce.cu +++ b/testing/cuda/reduce.cu @@ -1,6 +1,7 @@ #include #include #include +#include template @@ -99,3 +100,22 @@ void TestReduceCudaStreamsNoSync() } DECLARE_UNITTEST(TestReduceCudaStreamsNoSync); +#if defined(THRUST_RDC_ENABLED) +void TestReduceLargeInput() +{ + using T = unsigned long long; + using OffsetT = std::size_t; + const OffsetT num_items = 1ull << 32; + + thrust::constant_iterator d_data(T{1}); + thrust::device_vector d_result(1); + + reduce_kernel<<<1,1>>>(thrust::device, d_data, d_data + num_items, T{}, d_result.begin()); + cudaError_t const err = cudaDeviceSynchronize(); + ASSERT_EQUAL(cudaSuccess, err); + + ASSERT_EQUAL(num_items, d_result[0]); +} +DECLARE_UNITTEST(TestReduceLargeInput); +#endif + diff --git a/thrust/system/cuda/detail/reduce.h b/thrust/system/cuda/detail/reduce.h index 95cda75cc..41d9075da 100644 --- a/thrust/system/cuda/detail/reduce.h +++ b/thrust/system/cuda/detail/reduce.h @@ -943,11 +943,8 @@ T reduce_n_impl(execution_policy& policy, size_t tmp_size = 0; - THRUST_INDEX_TYPE_DISPATCH2(status, + THRUST_INDEX_TYPE_DISPATCH(status, cub::DeviceReduce::Reduce, - (cub::DispatchReduce< - InputIt, T*, Size, BinaryOp, T - >::Dispatch), num_items, (NULL, tmp_size, first, reinterpret_cast(NULL), num_items_fixed, binary_op, init, stream)); @@ -970,11 +967,8 @@ T reduce_n_impl(execution_policy& policy, // make this guarantee. T* ret_ptr = thrust::detail::aligned_reinterpret_cast(tmp.data().get()); void* tmp_ptr = static_cast((tmp.data() + sizeof(T)).get()); - THRUST_INDEX_TYPE_DISPATCH2(status, + THRUST_INDEX_TYPE_DISPATCH(status, cub::DeviceReduce::Reduce, - (cub::DispatchReduce< - InputIt, T*, Size, BinaryOp, T - >::Dispatch), num_items, (tmp_ptr, tmp_size, first, ret_ptr, num_items_fixed, binary_op, init, stream));