Skip to content

Commit

Permalink
Merge pull request #412 from leofang/remove_redundant_include
Browse files Browse the repository at this point in the history
Remove `util_device.cuh` from iterator headers to enable online compilation
  • Loading branch information
gevtushenko authored Sep 11, 2023
2 parents d0af9f7 + 91c4556 commit 137f5c7
Show file tree
Hide file tree
Showing 22 changed files with 123 additions and 58 deletions.
2 changes: 1 addition & 1 deletion cub/cub/agent/single_pass_scan_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@
#include <cub/detail/uninitialized_copy.cuh>
#include <cub/thread/thread_load.cuh>
#include <cub/thread/thread_store.cuh>
#include <cub/util_device.cuh>
#include <cub/warp/warp_reduce.cuh>
#include <cub/util_temporary_storage.cuh>

#include <nv/target>

Expand Down
1 change: 1 addition & 0 deletions cub/cub/cub.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -106,4 +106,5 @@
#include "util_device.cuh"
#include "util_macro.cuh"
#include "util_ptx.cuh"
#include "util_temporary_storage.cuh"
#include "util_type.cuh"
2 changes: 1 addition & 1 deletion cub/cub/detail/temporary_storage.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#pragma once

#include <cub/util_namespace.cuh>
#include <cub/util_device.cuh>
#include <cub/util_temporary_storage.cuh>


CUB_NAMESPACE_BEGIN
Expand Down
2 changes: 2 additions & 0 deletions cub/cub/device/dispatch/dispatch_histogram.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
#include <cub/agent/agent_histogram.cuh>
#include <cub/config.cuh>
#include <cub/detail/cpp_compatibility.cuh>
#include <cub/util_device.cuh>
#include <cub/util_temporary_storage.cuh>
#include <cub/device/dispatch/tuning/tuning_histogram.cuh>
#include <cub/grid/grid_queue.cuh>
#include <cub/thread/thread_search.cuh>
Expand Down
1 change: 1 addition & 0 deletions cub/cub/device/dispatch/dispatch_reduce.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#include <cub/util_debug.cuh>
#include <cub/util_deprecated.cuh>
#include <cub/util_device.cuh>
#include <cub/util_temporary_storage.cuh>

#include <thrust/system/cuda/detail/core/triple_chevron_launch.h>

Expand Down
1 change: 1 addition & 0 deletions cub/cub/device/dispatch/tuning/tuning_histogram.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <cub/agent/agent_histogram.cuh>
#include <cub/block/block_load.cuh>
#include <cub/config.cuh>
#include <cub/util_device.cuh>
#include <cub/util_type.cuh>

CUB_NAMESPACE_BEGIN
Expand Down
1 change: 1 addition & 0 deletions cub/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <cub/block/block_scan.cuh>
#include <cub/block/block_store.cuh>
#include <cub/config.cuh>
#include <cub/util_device.cuh>
#include <cub/util_type.cuh>

CUB_NAMESPACE_BEGIN
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include <cub/block/block_scan.cuh>
#include <cub/block/block_store.cuh>
#include <cub/config.cuh>
#include <cub/util_device.cuh>
#include <cub/util_type.cuh>

CUB_NAMESPACE_BEGIN
Expand Down
1 change: 1 addition & 0 deletions cub/cub/device/dispatch/tuning/tuning_scan.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <cub/block/block_scan.cuh>
#include <cub/block/block_store.cuh>
#include <cub/config.cuh>
#include <cub/util_device.cuh>
#include <cub/util_type.cuh>

CUB_NAMESPACE_BEGIN
Expand Down
1 change: 1 addition & 0 deletions cub/cub/device/dispatch/tuning/tuning_scan_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include <cub/block/block_store.cuh>
#include <cub/config.cuh>
#include <cub/thread/thread_operators.cuh>
#include <cub/util_device.cuh>
#include <cub/util_math.cuh>
#include <cub/util_type.cuh>

Expand Down
1 change: 1 addition & 0 deletions cub/cub/device/dispatch/tuning/tuning_select_if.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <cub/block/block_load.cuh>
#include <cub/block/block_scan.cuh>
#include <cub/config.cuh>
#include <cub/util_device.cuh>
#include <cub/util_type.cuh>

CUB_NAMESPACE_BEGIN
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <cub/block/block_load.cuh>
#include <cub/block/block_scan.cuh>
#include <cub/config.cuh>
#include <cub/util_device.cuh>
#include <cub/util_math.cuh>
#include <cub/util_type.cuh>

Expand Down
1 change: 1 addition & 0 deletions cub/cub/device/dispatch/tuning/tuning_unique_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <cub/block/block_load.cuh>
#include <cub/block/block_scan.cuh>
#include <cub/config.cuh>
#include <cub/util_device.cuh>
#include <cub/util_math.cuh>
#include <cub/util_type.cuh>

Expand Down
1 change: 0 additions & 1 deletion cub/cub/iterator/arg_index_input_iterator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
#include "../config.cuh"
#include "../thread/thread_load.cuh"
#include "../thread/thread_store.cuh"
#include "../util_device.cuh"

#include <thrust/version.h>

Expand Down
1 change: 0 additions & 1 deletion cub/cub/iterator/cache_modified_input_iterator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
#include "../config.cuh"
#include "../thread/thread_load.cuh"
#include "../thread/thread_store.cuh"
#include "../util_device.cuh"

#if (THRUST_VERSION >= 100700)
// This iterator is compatible with Thrust API 1.7 and newer
Expand Down
1 change: 0 additions & 1 deletion cub/cub/iterator/cache_modified_output_iterator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
#include "../thread/thread_load.cuh"
#include "../thread/thread_store.cuh"
#include "../config.cuh"
#include "../util_device.cuh"

#if (THRUST_VERSION >= 100700)
// This iterator is compatible with Thrust API 1.7 and newer
Expand Down
1 change: 0 additions & 1 deletion cub/cub/iterator/counting_input_iterator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
#include "../thread/thread_load.cuh"
#include "../thread/thread_store.cuh"
#include "../config.cuh"
#include "../util_device.cuh"

#if (THRUST_VERSION >= 100700)
// This iterator is compatible with Thrust API 1.7 and newer
Expand Down
1 change: 0 additions & 1 deletion cub/cub/iterator/tex_obj_input_iterator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
#include <cub/thread/thread_load.cuh>
#include <cub/thread/thread_store.cuh>
#include <cub/util_debug.cuh>
#include <cub/util_device.cuh>

#include <nv/target>

Expand Down
1 change: 0 additions & 1 deletion cub/cub/iterator/transform_input_iterator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
#include "../thread/thread_load.cuh"
#include "../thread/thread_store.cuh"
#include "../config.cuh"
#include "../util_device.cuh"

#if (THRUST_VERSION >= 100700)
// This iterator is compatible with Thrust API 1.7 and newer
Expand Down
57 changes: 7 additions & 50 deletions cub/cub/util_device.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@

/**
* \file
* Properties of a given CUDA device and the corresponding PTX bundle
* Properties of a given CUDA device and the corresponding PTX bundle.
*
* \note
* This file contains __host__ only functions and utilities, and should not be
* included in code paths that could be online-compiled (ex: using NVRTC).
*/

#pragma once
Expand All @@ -42,6 +46,8 @@
#include <cub/util_macro.cuh>
#include <cub/util_namespace.cuh>
#include <cub/util_type.cuh>
// for backward compatibility
#include <cub/util_temporary_storage.cuh>

#include <nv/target>

Expand All @@ -60,55 +66,6 @@ CUB_NAMESPACE_BEGIN
#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document


/**
* \brief Alias temporaries to externally-allocated device storage (or simply return the amount of storage needed).
*/
template <int ALLOCATIONS>
__host__ __device__ __forceinline__
cudaError_t AliasTemporaries(
void *d_temp_storage, ///< [in] Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done.
size_t& temp_storage_bytes, ///< [in,out] Size in bytes of \t d_temp_storage allocation
void* (&allocations)[ALLOCATIONS], ///< [in,out] Pointers to device allocations needed
size_t (&allocation_sizes)[ALLOCATIONS]) ///< [in] Sizes in bytes of device allocations needed
{
const int ALIGN_BYTES = 256;
const int ALIGN_MASK = ~(ALIGN_BYTES - 1);

// Compute exclusive prefix sum over allocation requests
size_t allocation_offsets[ALLOCATIONS];
size_t bytes_needed = 0;
for (int i = 0; i < ALLOCATIONS; ++i)
{
size_t allocation_bytes = (allocation_sizes[i] + ALIGN_BYTES - 1) & ALIGN_MASK;
allocation_offsets[i] = bytes_needed;
bytes_needed += allocation_bytes;
}
bytes_needed += ALIGN_BYTES - 1;

// Check if the caller is simply requesting the size of the storage allocation
if (!d_temp_storage)
{
temp_storage_bytes = bytes_needed;
return cudaSuccess;
}

// Check if enough storage provided
if (temp_storage_bytes < bytes_needed)
{
return CubDebug(cudaErrorInvalidValue);
}

// Alias
d_temp_storage = (void *) ((size_t(d_temp_storage) + ALIGN_BYTES - 1) & ALIGN_MASK);
for (int i = 0; i < ALLOCATIONS; ++i)
{
allocations[i] = static_cast<char*>(d_temp_storage) + allocation_offsets[i];
}

return cudaSuccess;
}


/**
* \brief Empty kernel for querying PTX manifest metadata (e.g., version) for the current device
*/
Expand Down
101 changes: 101 additions & 0 deletions cub/cub/util_temporary_storage.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2023, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/

/**
* \file
* Utilities for device-accessible temporary storages.
*/

#pragma once

#include <cub/util_debug.cuh>
#include <cub/util_namespace.cuh>


CUB_NAMESPACE_BEGIN

/**
* \addtogroup UtilMgmt
* @{
*/

#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document

/**
* \brief Alias temporaries to externally-allocated device storage (or simply return the amount of storage needed).
*/
template <int ALLOCATIONS>
__host__ __device__ __forceinline__
cudaError_t AliasTemporaries(
void *d_temp_storage, ///< [in] Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done.
size_t& temp_storage_bytes, ///< [in,out] Size in bytes of \t d_temp_storage allocation
void* (&allocations)[ALLOCATIONS], ///< [in,out] Pointers to device allocations needed
size_t (&allocation_sizes)[ALLOCATIONS]) ///< [in] Sizes in bytes of device allocations needed
{
const int ALIGN_BYTES = 256;
const int ALIGN_MASK = ~(ALIGN_BYTES - 1);

// Compute exclusive prefix sum over allocation requests
size_t allocation_offsets[ALLOCATIONS];
size_t bytes_needed = 0;
for (int i = 0; i < ALLOCATIONS; ++i)
{
size_t allocation_bytes = (allocation_sizes[i] + ALIGN_BYTES - 1) & ALIGN_MASK;
allocation_offsets[i] = bytes_needed;
bytes_needed += allocation_bytes;
}
bytes_needed += ALIGN_BYTES - 1;

// Check if the caller is simply requesting the size of the storage allocation
if (!d_temp_storage)
{
temp_storage_bytes = bytes_needed;
return cudaSuccess;
}

// Check if enough storage provided
if (temp_storage_bytes < bytes_needed)
{
return CubDebug(cudaErrorInvalidValue);
}

// Alias
d_temp_storage = (void *) ((size_t(d_temp_storage) + ALIGN_BYTES - 1) & ALIGN_MASK);
for (int i = 0; i < ALLOCATIONS; ++i)
{
allocations[i] = static_cast<char*>(d_temp_storage) + allocation_offsets[i];
}

return cudaSuccess;
}

#endif // DOXYGEN_SHOULD_SKIP_THIS

/** @} */ // end group UtilMgmt

CUB_NAMESPACE_END
1 change: 1 addition & 0 deletions thrust/thrust/system/cuda/detail/core/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <cub/block/block_load.cuh>
#include <cub/block/block_scan.cuh>
#include <cub/block/block_store.cuh>
#include <cub/util_temporary_storage.cuh>

#include <nv/target>

Expand Down

0 comments on commit 137f5c7

Please sign in to comment.