Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Commit

Permalink
Use CUB's new CDP macros.
Browse files Browse the repository at this point in the history
  • Loading branch information
alliepiper committed May 10, 2022
1 parent 50316c7 commit c4eadef
Show file tree
Hide file tree
Showing 24 changed files with 942 additions and 2,048 deletions.
2 changes: 1 addition & 1 deletion dependencies/cub
13 changes: 0 additions & 13 deletions thrust/system/cuda/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,6 @@
// older releases. This header will always pull in version info:
#include <cub/util_namespace.cuh>

#if defined(__CUDACC__) || defined(_NVHPC_CUDA)
# if !defined(__CUDA_ARCH__) || defined(__CUDACC_RDC__)
# define __THRUST_HAS_CUDART__ 1
# define THRUST_RUNTIME_FUNCTION __host__ __device__ __forceinline__
# else
# define __THRUST_HAS_CUDART__ 0
# define THRUST_RUNTIME_FUNCTION __host__ __forceinline__
# endif
#else
# define __THRUST_HAS_CUDART__ 0
# define THRUST_RUNTIME_FUNCTION __host__ __forceinline__
#endif

#ifdef THRUST_AGENT_ENTRY_NOINLINE
#define THRUST_AGENT_ENTRY_INLINE_ATTR __noinline__
#else
Expand Down
43 changes: 18 additions & 25 deletions thrust/system/cuda/detail/adjacent_difference.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include <thrust/detail/config.h>

#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC

#include <thrust/detail/cstdint.h>
#include <thrust/detail/minmax.h>
#include <thrust/detail/temporary_array.h>
Expand All @@ -41,6 +42,7 @@
#include <thrust/type_traits/is_contiguous_iterator.h>
#include <thrust/type_traits/remove_cvref.h>

#include <cub/detail/cdp_dispatch.cuh>
#include <cub/device/device_adjacent_difference.cuh>
#include <cub/device/device_select.cuh>
#include <cub/util_math.cuh>
Expand All @@ -64,7 +66,7 @@ namespace __adjacent_difference {
class InputIt,
class OutputIt,
class BinaryOp>
cudaError_t THRUST_RUNTIME_FUNCTION
cudaError_t CUB_RUNTIME_FUNCTION
doit_step(void *d_temp_storage,
size_t &temp_storage_bytes,
InputIt first,
Expand Down Expand Up @@ -114,7 +116,7 @@ namespace __adjacent_difference {
template <class InputIt,
class OutputIt,
class BinaryOp>
cudaError_t THRUST_RUNTIME_FUNCTION
cudaError_t CUB_RUNTIME_FUNCTION
doit_step(void *d_temp_storage,
size_t &temp_storage_bytes,
InputIt first,
Expand All @@ -139,7 +141,7 @@ namespace __adjacent_difference {
template <class InputIt,
class OutputIt,
class BinaryOp>
cudaError_t THRUST_RUNTIME_FUNCTION
cudaError_t CUB_RUNTIME_FUNCTION
doit_step(void *d_temp_storage,
size_t &temp_storage_bytes,
InputIt first,
Expand Down Expand Up @@ -181,7 +183,7 @@ namespace __adjacent_difference {
typename InputIt,
typename OutputIt,
typename BinaryOp>
OutputIt THRUST_RUNTIME_FUNCTION
OutputIt CUB_RUNTIME_FUNCTION
adjacent_difference(execution_policy<Derived>& policy,
InputIt first,
InputIt last,
Expand Down Expand Up @@ -260,27 +262,18 @@ adjacent_difference(execution_policy<Derived> &policy,
OutputIt result,
BinaryOp binary_op)
{
OutputIt ret = result;
if (__THRUST_HAS_CUDART__)
{
ret = __adjacent_difference::adjacent_difference(policy,
first,
last,
result,
binary_op);
}
else
{
#if !__THRUST_HAS_CUDART__
ret = thrust::adjacent_difference(cvt_to_seq(derived_cast(policy)),
first,
last,
result,
binary_op);
#endif
}

return ret;
CUB_CDP_DISPATCH(
(result = __adjacent_difference::adjacent_difference(policy,
first,
last,
result,
binary_op);),
(result = thrust::adjacent_difference(cvt_to_seq(derived_cast(policy)),
first,
last,
result,
binary_op);));
return result;
}

template <class Derived,
Expand Down
46 changes: 17 additions & 29 deletions thrust/system/cuda/detail/copy.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,14 @@

#include <thrust/detail/config.h>

#include <thrust/advance.h>

#include <thrust/system/cuda/config.h>
#include <thrust/system/cuda/detail/execution_policy.h>
#include <thrust/system/cuda/detail/cross_system.h>

#include <cub/detail/cdp_dispatch.cuh>

THRUST_NAMESPACE_BEGIN

template <typename DerivedPolicy, typename InputIt, typename OutputIt>
Expand Down Expand Up @@ -117,22 +121,11 @@ copy(execution_policy<System> &system,
InputIterator last,
OutputIterator result)
{
OutputIterator ret = result;
if (__THRUST_HAS_CUDART__)
{
ret = __copy::device_to_device(system, first, last, result);
}
else
{
#if !__THRUST_HAS_CUDART__
ret = thrust::copy(cvt_to_seq(derived_cast(system)),
first,
last,
result);
#endif
}

return ret;
CUB_CDP_DISPATCH(
(result = __copy::device_to_device(system, first, last, result);),
(result =
thrust::copy(cvt_to_seq(derived_cast(system)), first, last, result);));
return result;
} // end copy()

__thrust_exec_check_disable__
Expand All @@ -146,19 +139,14 @@ copy_n(execution_policy<System> &system,
Size n,
OutputIterator result)
{
OutputIterator ret = result;
if (__THRUST_HAS_CUDART__)
{
ret = __copy::device_to_device(system, first, first + n, result);
}
else
{
#if !__THRUST_HAS_CUDART__
ret = thrust::copy_n(cvt_to_seq(derived_cast(system)), first, n, result);
#endif
}

return ret;
CUB_CDP_DISPATCH(
(result = __copy::device_to_device(system,
first,
thrust::next(first, n),
result);),
(result =
thrust::copy_n(cvt_to_seq(derived_cast(system)), first, n, result);));
return result;
} // end copy_n()
#endif

Expand Down
103 changes: 40 additions & 63 deletions thrust/system/cuda/detail/copy_if.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,20 @@
#include <thrust/detail/config.h>

#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC
#include <thrust/system/cuda/config.h>

#include <thrust/detail/alignment.h>
#include <thrust/detail/cstdint.h>
#include <thrust/detail/function.h>
#include <thrust/detail/temporary_array.h>
#include <thrust/system/cuda/detail/util.h>
#include <cub/device/device_select.cuh>
#include <thrust/distance.h>
#include <thrust/system/cuda/config.h>
#include <thrust/system/cuda/detail/core/agent_launcher.h>
#include <thrust/system/cuda/detail/core/util.h>
#include <thrust/system/cuda/detail/par_to_seq.h>
#include <thrust/detail/function.h>
#include <thrust/distance.h>
#include <thrust/detail/alignment.h>
#include <thrust/system/cuda/detail/util.h>

#include <cub/detail/cdp_dispatch.cuh>
#include <cub/device/device_select.cuh>
#include <cub/util_math.cuh>

THRUST_NAMESPACE_BEGIN
Expand Down Expand Up @@ -598,17 +599,17 @@ namespace __copy_if {
class Predicate,
class Size,
class NumSelectedOutIt>
static cudaError_t THRUST_RUNTIME_FUNCTION
doit_step(void * d_temp_storage,
size_t & temp_storage_bytes,
ItemsIt items,
StencilIt stencil,
OutputIt output_it,
Predicate predicate,
NumSelectedOutIt num_selected_out,
Size num_items,
cudaStream_t stream,
bool debug_sync)
CUB_RUNTIME_FUNCTION
static cudaError_t doit_step(void * d_temp_storage,
size_t & temp_storage_bytes,
ItemsIt items,
StencilIt stencil,
OutputIt output_it,
Predicate predicate,
NumSelectedOutIt num_selected_out,
Size num_items,
cudaStream_t stream,
bool debug_sync)
{
if (num_items == 0)
return cudaSuccess;
Expand Down Expand Up @@ -695,7 +696,7 @@ namespace __copy_if {
typename StencilIt,
typename OutputIt,
typename Predicate>
THRUST_RUNTIME_FUNCTION
CUB_RUNTIME_FUNCTION
OutputIt copy_if(execution_policy<Derived>& policy,
InputIt first,
InputIt last,
Expand Down Expand Up @@ -789,28 +790,18 @@ copy_if(execution_policy<Derived> &policy,
OutputIterator result,
Predicate pred)
{
OutputIterator ret = result;

if (__THRUST_HAS_CUDART__)
{
ret = __copy_if::copy_if(policy,
first,
last,
__copy_if::no_stencil_tag(),
result,
pred);
}
else
{
#if !__THRUST_HAS_CUDART__
ret = thrust::copy_if(cvt_to_seq(derived_cast(policy)),
first,
last,
result,
pred);
#endif
}
return ret;
CUB_CDP_DISPATCH((result = __copy_if::copy_if(policy,
first,
last,
__copy_if::no_stencil_tag(),
result,
pred);),
(result = thrust::copy_if(cvt_to_seq(derived_cast(policy)),
first,
last,
result,
pred);));
return result;
} // func copy_if

__thrust_exec_check_disable__
Expand All @@ -827,29 +818,15 @@ copy_if(execution_policy<Derived> &policy,
OutputIterator result,
Predicate pred)
{
OutputIterator ret = result;

if (__THRUST_HAS_CUDART__)
{
ret = __copy_if::copy_if(policy,
first,
last,
stencil,
result,
pred);
}
else
{
#if !__THRUST_HAS_CUDART__
ret = thrust::copy_if(cvt_to_seq(derived_cast(policy)),
first,
last,
stencil,
result,
pred);
#endif
}
return ret;
CUB_CDP_DISPATCH(
(result = __copy_if::copy_if(policy, first, last, stencil, result, pred);),
(result = thrust::copy_if(cvt_to_seq(derived_cast(policy)),
first,
last,
stencil,
result,
pred);));
return result;
} // func copy_if

} // namespace cuda_cub
Expand Down
Loading

0 comments on commit c4eadef

Please sign in to comment.