From f06a8fbb9f659991203cccdde9e52036f441e21e Mon Sep 17 00:00:00 2001 From: neoblizz Date: Sun, 19 Dec 2021 17:07:04 -0800 Subject: [PATCH 01/14] Improving launch_box; removed non-static to static conversion, added a launch method. --- include/gunrock/cuda/detail/launch_box.hxx | 6 +- include/gunrock/cuda/launch_box.hxx | 73 ++++++++++++++----- .../gunrock/framework/frontier/frontier.hxx | 3 + .../operators/advance/block_mapped.hxx | 44 ++++++----- .../operators/advance/thread_mapped.hxx | 20 +++-- unittests/launch_box/test_launch_box.cu | 5 +- 6 files changed, 98 insertions(+), 53 deletions(-) diff --git a/include/gunrock/cuda/detail/launch_box.hxx b/include/gunrock/cuda/detail/launch_box.hxx index a364a493..1c672ce5 100644 --- a/include/gunrock/cuda/detail/launch_box.hxx +++ b/include/gunrock/cuda/detail/launch_box.hxx @@ -24,11 +24,11 @@ namespace detail { * @tparam sm_flags_ Bitwise flags indicating SM versions (`sm_flag_t` enum). */ template -struct launch_params_abc_t { +struct launch_params_base_t { enum : unsigned { sm_flags = sm_flags_ }; protected: - launch_params_abc_t() {} + launch_params_base_t() {} }; /** @@ -60,7 +60,7 @@ struct raise_not_found_error_t { /** * @brief Subsets a pack of launch parameters (children of - * `launch_params_abc_t`), selecting the ones that match the architecture being + * `launch_params_base_t`), selecting the ones that match the architecture being * compiled for, stored in a tuple type. * * @par Overview diff --git a/include/gunrock/cuda/launch_box.hxx b/include/gunrock/cuda/launch_box.hxx index 513708f9..4c5f0411 100644 --- a/include/gunrock/cuda/launch_box.hxx +++ b/include/gunrock/cuda/launch_box.hxx @@ -25,9 +25,22 @@ namespace gunrock { namespace cuda { - namespace launch_box { +struct dimensions_t { + unsigned int x, y, z; + + __host__ __device__ constexpr dimensions_t(const unsigned int _x = 1, + const unsigned int _y = 1, + const unsigned int _z = 1) + : x(_x), y(_y), z(_z) {} + + __host__ __device__ constexpr unsigned int size() const { return x * y * z; } + __host__ __device__ constexpr operator dim3(void) const { + return uint3{x, y, z}; + } +}; + /** * @brief CUDA dim3 template representation, since dim3 cannot be used as a * template argument. @@ -36,10 +49,14 @@ namespace launch_box { * @tparam y_ (default = `1`) Dimension in the Y direction. * @tparam z_ (default = `1`) Dimension in the Z direction. */ -template +template struct dim3_t { - enum : unsigned int { x = x_, y = y_, z = z_, size = x_ * y_ * z_ }; - static constexpr dim3 get_dim3() { return dim3(x, y, z); } + enum : unsigned int { x = x_, y = y_, z = z_ }; + static constexpr unsigned int size() { return x * y * z; } + static constexpr dimensions_t dimensions() { return {x, y, z}; } + + // Convertors must be non-static members. + constexpr operator dimensions_t(void) { return {x, y, z}; } }; /** @@ -97,13 +114,15 @@ template -struct launch_params_t : detail::launch_params_abc_t { +struct launch_params_t : detail::launch_params_base_t { typedef block_dimensions_ block_dimensions_t; typedef grid_dimensions_ grid_dimensions_t; enum : size_t { shared_memory_bytes = shared_memory_bytes_ }; - static constexpr dim3 block_dimensions = block_dimensions_t::get_dim3(); - static constexpr dim3 grid_dimensions = grid_dimensions_t::get_dim3(); + static constexpr dimensions_t block_dimensions = + block_dimensions_t::dimensions(); + static constexpr dimensions_t grid_dimensions = + grid_dimensions_t::dimensions(); standard_context_t& context; launch_params_t(standard_context_t& context_) : context(context_) {} @@ -121,15 +140,17 @@ struct launch_params_t : detail::launch_params_abc_t { template -struct launch_params_dynamic_block_t : detail::launch_params_abc_t { +struct launch_params_dynamic_block_t : detail::launch_params_base_t { typedef grid_dimensions_ grid_dimensions_t; enum : size_t { shared_memory_bytes = shared_memory_bytes_ }; - dim3 block_dimensions; - static constexpr dim3 grid_dimensions = grid_dimensions_t::get_dim3(); + dimensions_t block_dimensions; + static constexpr dimensions_t grid_dimensions = + grid_dimensions_t::dimensions(); + standard_context_t& context; - launch_params_dynamic_block_t(dim3 block_dimensions_, + launch_params_dynamic_block_t(dimensions_t block_dimensions_, standard_context_t& context_) : block_dimensions(block_dimensions_), context(context_) {} }; @@ -146,17 +167,33 @@ struct launch_params_dynamic_block_t : detail::launch_params_abc_t { template -struct launch_params_dynamic_grid_t : detail::launch_params_abc_t { +struct launch_params_dynamic_grid_t : detail::launch_params_base_t { typedef block_dimensions_ block_dimensions_t; enum : size_t { shared_memory_bytes = shared_memory_bytes_ }; - static constexpr dim3 block_dimensions = block_dimensions_t::get_dim3(); - dim3 grid_dimensions; + static constexpr dimensions_t block_dimensions = + block_dimensions_t::dimensions(); + + dimensions_t grid_dimensions; standard_context_t& context; - launch_params_dynamic_grid_t(dim3 grid_dimensions_, - standard_context_t& context_) - : grid_dimensions(grid_dimensions_), context(context_) {} + launch_params_dynamic_grid_t(standard_context_t& context_) + : context(context_) {} + + void calculate_grid_dimensions(std::size_t num_elements) { + grid_dimensions = dimensions_t( + (num_elements + block_dimensions.x - 1) / block_dimensions.x, 1, 1); + } + + /** + * @brief Launch a kernel within the given launch box. + */ + template + void launch(func_t __, args_t&&... args) { + __<<< // kernel function. + grid_dimensions, block_dimensions, shared_memory_bytes, + context.stream()>>>(std::forward(args)...); + } }; /** @@ -169,7 +206,7 @@ struct launch_params_dynamic_grid_t : detail::launch_params_abc_t { template inline float occupancy(func_t kernel) { int max_active_blocks; - int block_size = launch_box_t::block_dimensions_t::size; + int block_size = launch_box_t::block_dimensions_t::size(); int device; cudaDeviceProp props; diff --git a/include/gunrock/framework/frontier/frontier.hxx b/include/gunrock/framework/frontier/frontier.hxx index 2b8b2702..3e2083c4 100644 --- a/include/gunrock/framework/frontier/frontier.hxx +++ b/include/gunrock/framework/frontier/frontier.hxx @@ -38,6 +38,9 @@ class frontier_t : public frontier::vector_frontier_t { public: using vertex_type = vertex_t; using edge_type = edge_t; + using type_t = std::conditional_t<_kind == frontier_kind_t::vertex_frontier, + vertex_t, + edge_t>; using frontier_type = frontier_t; /// TODO: This is a more permenant solution. diff --git a/include/gunrock/framework/operators/advance/block_mapped.hxx b/include/gunrock/framework/operators/advance/block_mapped.hxx index e5c847d5..98872bfb 100644 --- a/include/gunrock/framework/operators/advance/block_mapped.hxx +++ b/include/gunrock/framework/operators/advance/block_mapped.hxx @@ -27,8 +27,8 @@ namespace operators { namespace advance { namespace block_mapped { -template set_number_of_elements(size_of_output); } - std::size_t work_size = (input_type == advance_io_type_t::graph) - ? G.get_number_of_vertices() - : input->get_number_of_elements(); - - using namespace gunrock::cuda::launch_box; - using launch_t = launch_box_t>>; - - launch_t launch_box(dim3((work_size + launch_t::block_dimensions_t::x - 1) / launch_t::block_dimensions_t::x), context); - - // Launch blocked-mapped advance kernel. - block_mapped_kernel - <<>>(G, op, input->data(), output->data(), work_size, - segments.data().get()); + std::size_t num_elements = (input_type == advance_io_type_t::graph) + ? G.get_number_of_vertices() + : input->get_number_of_elements(); + + // Set-up and launch block-mapped advance. + using namespace cuda::launch_box; + using launch_t = + launch_box_t>>; + + launch_t launch_box(context); + + launch_box.calculate_grid_dimensions(num_elements); + auto __bm = block_mapped_kernel< // kernel + launch_box.block_dimensions.x, // threas per block + 1, // items per thread + input_type, output_type, // i/o parameters + graph_t, // graph type + typename frontier_t::type_t, // frontier value type + typename work_tiles_t::value_type, // segments value type + operator_t // lambda type + >; + launch_box.launch(__bm, G, op, input->data(), output->data(), num_elements, + segments.data().get()); context.synchronize(); } diff --git a/include/gunrock/framework/operators/advance/thread_mapped.hxx b/include/gunrock/framework/operators/advance/thread_mapped.hxx index 888fcf5d..d5caefe7 100644 --- a/include/gunrock/framework/operators/advance/thread_mapped.hxx +++ b/include/gunrock/framework/operators/advance/thread_mapped.hxx @@ -97,17 +97,15 @@ void execute(graph_t& G, ? G.get_number_of_vertices() : input.get_number_of_elements(); - using namespace gunrock::cuda::launch_box; - using launch_t = launch_box_t>>; - - launch_t launch_box(dim3((num_elements + launch_t::block_dimensions_t::x - 1) / launch_t::block_dimensions_t::x, 1, 1), context); - - // Launch blocked-mapped advance kernel. - thread_mapped_kernel<<>>(neighbors_expand, num_elements); + // Set-up and launch thread-mapped advance. + using namespace cuda::launch_box; + using launch_t = + launch_box_t>>; + + launch_t launch_box(context); + launch_box.calculate_grid_dimensions(num_elements); + auto __tm = thread_mapped_kernel; + launch_box.launch(__tm, neighbors_expand, num_elements); context.synchronize(); } } // namespace thread_mapped diff --git a/unittests/launch_box/test_launch_box.cu b/unittests/launch_box/test_launch_box.cu index 0945a9a5..0f611b6e 100644 --- a/unittests/launch_box/test_launch_box.cu +++ b/unittests/launch_box/test_launch_box.cu @@ -50,8 +50,9 @@ void test_fallback() { } void test_define() { - dim3 block_dimensions = launch_t::block_dimensions_t::get_dim3(); - dim3 grid_dimensions = launch_t::grid_dimensions_t::get_dim3(); + dimensions_t block_dimensions = launch_t::block_dimensions_t::dimensions(); + dimensions_t grid_dimensions = launch_t::grid_dimensions_t::dimensions(); + dim3 conversion_test = block_dimensions; size_t smem = launch_t::shared_memory_bytes; std::cout << "block_dimensions: " << block_dimensions.x << ", " From 92cc778596ecaa9fb4cfbddfb9689e74e27efc72 Mon Sep 17 00:00:00 2001 From: neoblizz Date: Sun, 19 Dec 2021 17:34:42 -0800 Subject: [PATCH 02/14] MSVC is being stupid. --- include/gunrock/cuda/launch_box.hxx | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/gunrock/cuda/launch_box.hxx b/include/gunrock/cuda/launch_box.hxx index 4c5f0411..802fe696 100644 --- a/include/gunrock/cuda/launch_box.hxx +++ b/include/gunrock/cuda/launch_box.hxx @@ -36,9 +36,14 @@ struct dimensions_t { : x(_x), y(_y), z(_z) {} __host__ __device__ constexpr unsigned int size() const { return x * y * z; } + +#ifdef _MSC_VER + __host__ __device__ operator dim3(void) const { return uint3{x, y, z}; } +#else __host__ __device__ constexpr operator dim3(void) const { return uint3{x, y, z}; } +#endif }; /** From d4b73a4704707a9b9a55bb14e0bec6d59d668bbc Mon Sep 17 00:00:00 2001 From: neoblizz Date: Sun, 19 Dec 2021 18:04:01 -0800 Subject: [PATCH 03/14] changing unnamed to named class. --- include/gunrock/cuda/device_properties.hxx | 114 ++++++++++----------- 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/include/gunrock/cuda/device_properties.hxx b/include/gunrock/cuda/device_properties.hxx index c192dc43..fd62eb9d 100644 --- a/include/gunrock/cuda/device_properties.hxx +++ b/include/gunrock/cuda/device_properties.hxx @@ -18,7 +18,7 @@ namespace cuda { typedef cudaDeviceProp device_properties_t; -typedef struct { +struct compute_capability_t { unsigned major; unsigned minor; constexpr unsigned as_combined_number() const { return major * 10 + minor; } @@ -28,7 +28,7 @@ typedef struct { constexpr bool operator<(int i) { return (int)as_combined_number() < i; } constexpr bool operator>=(int i) { return (int)as_combined_number() >= i; } constexpr bool operator<=(int i) { return (int)as_combined_number() <= i; } -} compute_capability_t; +}; /** * @brief Make compute capability from major and minor versions. @@ -76,18 +76,13 @@ enum : size_t { KiB = 1024, K = 1024 }; * \return const char* architecture name or nullptr if capability is invalid */ inline constexpr const char* arch_name(compute_capability_t capability) { - return (capability.major == 8) - ? "Ampere" - : (capability.major == 7 && capability.minor == 5) - ? "Turing" - : (capability.major == 7) - ? "Volta" - : (capability.major == 6) - ? "Pascal" - : (capability.major == 5) - ? "Maxwell" - : (capability.major == 3) ? "Kepler" - : nullptr; + return (capability.major == 8) ? "Ampere" + : (capability.major == 7 && capability.minor == 5) ? "Turing" + : (capability.major == 7) ? "Volta" + : (capability.major == 6) ? "Pascal" + : (capability.major == 5) ? "Maxwell" + : (capability.major == 3) ? "Kepler" + : nullptr; } // Device properties retrieved from: @@ -115,11 +110,14 @@ inline constexpr unsigned warp_max_threads() { * \return unsigned */ inline constexpr unsigned sm_max_ctas(compute_capability_t capability) { - return (capability >= 86) ? 16 : // SM86+ - (capability >= 80) ? 32 : // SM80 - (capability >= 75) ? 16 : // SM75 - (capability >= 50) ? 32 : // SM50-SM72 - 16; // SM30-SM37 + return (capability >= 86) ? 16 : // SM86+ + (capability >= 80) ? 32 + : // SM80 + (capability >= 75) ? 16 + : // SM75 + (capability >= 50) ? 32 + : // SM50-SM72 + 16; // SM30-SM37 } /** @@ -128,10 +126,12 @@ inline constexpr unsigned sm_max_ctas(compute_capability_t capability) { * \return unsigned */ inline constexpr unsigned sm_max_threads(compute_capability_t capability) { - return (capability >= 86) ? 1536 : // SM86+ - (capability >= 80) ? 2048 : // SM80 - (capability >= 75) ? 1024 : // SM75 - 2048; // SM30-SM72 + return (capability >= 86) ? 1536 : // SM86+ + (capability >= 80) ? 2048 + : // SM80 + (capability >= 75) ? 1024 + : // SM75 + 2048; // SM30-SM72 } /** @@ -140,9 +140,10 @@ inline constexpr unsigned sm_max_threads(compute_capability_t capability) { * \return unsigned */ inline constexpr unsigned sm_registers(compute_capability_t capability) { - return (capability >= 50) ? 64 * K : // SM50+ - (capability >= 37) ? 128 * K : // SM37 - 64 * K; // SM30-SM35 + return (capability >= 50) ? 64 * K : // SM50+ + (capability >= 37) ? 128 * K + : // SM37 + 64 * K; // SM30-SM35 } /** @@ -158,30 +159,32 @@ template inline constexpr unsigned sm_max_shared_memory_bytes( compute_capability_t capability) { unsigned sm3XConfiguredSmem = - (sm3XCacheConfig == cudaFuncCachePreferNone) - ? 48 * KiB - : (sm3XCacheConfig == cudaFuncCachePreferShared) - ? 48 * KiB - : (sm3XCacheConfig == cudaFuncCachePreferL1) - ? 16 * KiB - : (sm3XCacheConfig == cudaFuncCachePreferEqual) - ? 32 * KiB - : 48 * KiB; - - return (capability >= 86) ? 100 * KiB : // SM86+ - (capability >= 80) ? 164 * KiB : // SM80 - (capability >= 75) ? 64 * KiB : // SM75 - (capability >= 70) ? 96 * KiB : // SM70-SM72 - (capability >= 62) ? 64 * KiB : // SM62 - (capability >= 61) ? 96 * KiB : // SM61 - (capability >= 53) ? 64 * KiB : // SM53 - (capability >= 52) ? 96 * KiB : // SM52 - (capability >= 50) ? 64 * KiB - : // SM50 - (capability >= 37) - ? 64 * KiB + sm3XConfiguredSmem - : // SM37 - sm3XConfiguredSmem; // SM30-SM35 + (sm3XCacheConfig == cudaFuncCachePreferNone) ? 48 * KiB + : (sm3XCacheConfig == cudaFuncCachePreferShared) ? 48 * KiB + : (sm3XCacheConfig == cudaFuncCachePreferL1) ? 16 * KiB + : (sm3XCacheConfig == cudaFuncCachePreferEqual) ? 32 * KiB + : 48 * KiB; + + return (capability >= 86) ? 100 * KiB : // SM86+ + (capability >= 80) ? 164 * KiB + : // SM80 + (capability >= 75) ? 64 * KiB + : // SM75 + (capability >= 70) ? 96 * KiB + : // SM70-SM72 + (capability >= 62) ? 64 * KiB + : // SM62 + (capability >= 61) ? 96 * KiB + : // SM61 + (capability >= 53) ? 64 * KiB + : // SM53 + (capability >= 52) ? 96 * KiB + : // SM52 + (capability >= 50) ? 64 * KiB + : // SM50 + (capability >= 37) ? 64 * KiB + sm3XConfiguredSmem + : // SM37 + sm3XConfiguredSmem; // SM30-SM35 } /** @@ -204,13 +207,10 @@ template < inline constexpr unsigned shared_memory_bank_stride() { // The default config on 3.x is the same constant value for later archs // Only let 3.x be configurable if stride later becomes dependent on arch - return (sm3XSmemConfig == cudaSharedMemBankSizeDefault) - ? 1 << 2 - : (sm3XSmemConfig == cudaSharedMemBankSizeFourByte) - ? 1 << 2 - : (sm3XSmemConfig == cudaSharedMemBankSizeEightByte) - ? 1 << 3 - : 1 << 2; + return (sm3XSmemConfig == cudaSharedMemBankSizeDefault) ? 1 << 2 + : (sm3XSmemConfig == cudaSharedMemBankSizeFourByte) ? 1 << 2 + : (sm3XSmemConfig == cudaSharedMemBankSizeEightByte) ? 1 << 3 + : 1 << 2; } void print(device_properties_t& prop) { From b381f2573fe773ff893054a43e4476ea723a669c Mon Sep 17 00:00:00 2001 From: neoblizz Date: Sun, 19 Dec 2021 18:57:21 -0800 Subject: [PATCH 04/14] Better type limit test? --- include/gunrock/util/type_limits.hxx | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/include/gunrock/util/type_limits.hxx b/include/gunrock/util/type_limits.hxx index aaad813e..f46d7d87 100644 --- a/include/gunrock/util/type_limits.hxx +++ b/include/gunrock/util/type_limits.hxx @@ -57,16 +57,14 @@ namespace util { namespace limits { template -__host__ __device__ __forceinline__ bool is_valid(type_t value) { +constexpr __host__ __device__ __forceinline__ bool is_valid(type_t value) { static_assert((std::is_integral::value || std::is_floating_point::value), "type_t must be an arithmetic type."); - if constexpr (std::is_integral::value) + if (std::is_integral::value) return (value != gunrock::numeric_limits::invalid()); - - // else if constexpr (std::is_floating_point::value) - // just putting else doesn't work (gives a warning), even though it should... - return isnan(value) ? false : true; // XXX: test this on device + else + return isnan(value) ? false : true; } } // namespace limits From f92b0cbf48f3f778dc317ee436b7abced4c49eeb Mon Sep 17 00:00:00 2001 From: neoblizz Date: Sun, 19 Dec 2021 18:57:36 -0800 Subject: [PATCH 05/14] CMAKE status message ++. --- unittests/CMakeLists.txt | 1 + unittests/copy_ctor/CMakeLists.txt | 2 +- unittests/device_properties/CMakeLists.txt | 2 +- unittests/for/CMakeLists.txt | 2 +- unittests/launch_box/CMakeLists.txt | 2 +- unittests/type_limits/CMakeLists.txt | 21 +++++++++++++++++++++ unittests/type_limits/test_type_limits.cu | 2 +- 7 files changed, 27 insertions(+), 5 deletions(-) create mode 100644 unittests/type_limits/CMakeLists.txt diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt index 60fe72b3..4a0b3fe1 100644 --- a/unittests/CMakeLists.txt +++ b/unittests/CMakeLists.txt @@ -9,4 +9,5 @@ add_subdirectory(copy_ctor) add_subdirectory(launch_box) add_subdirectory(device_properties) add_subdirectory(for) +add_subdirectory(type_limits) # end /* Add unit tests' subdirectories */ diff --git a/unittests/copy_ctor/CMakeLists.txt b/unittests/copy_ctor/CMakeLists.txt index 63080bbc..b7520fce 100644 --- a/unittests/copy_ctor/CMakeLists.txt +++ b/unittests/copy_ctor/CMakeLists.txt @@ -17,5 +17,5 @@ set_target_properties(${APPLICATION_NAME} CUDA_ARCHITECTURES ${ESSENTIALS_ARCHITECTURES} ) # XXX: Find a better way to inherit essentials properties. -message("-- Example Added: ${APPLICATION_NAME}") +message(STATUS "Example Added: ${APPLICATION_NAME}") # end /* Add CUDA executables */ \ No newline at end of file diff --git a/unittests/device_properties/CMakeLists.txt b/unittests/device_properties/CMakeLists.txt index 8ee08474..9b85db0e 100644 --- a/unittests/device_properties/CMakeLists.txt +++ b/unittests/device_properties/CMakeLists.txt @@ -17,5 +17,5 @@ set_target_properties(${APPLICATION_NAME} CUDA_ARCHITECTURES ${ESSENTIALS_ARCHITECTURES} ) # XXX: Find a better way to inherit essentials properties. -message("-- Example Added: ${APPLICATION_NAME}") +message(STATUS "Example Added: ${APPLICATION_NAME}") # end /* Add CUDA executables */ diff --git a/unittests/for/CMakeLists.txt b/unittests/for/CMakeLists.txt index 49092b06..7f3edc86 100644 --- a/unittests/for/CMakeLists.txt +++ b/unittests/for/CMakeLists.txt @@ -17,5 +17,5 @@ set_target_properties(${APPLICATION_NAME} CUDA_ARCHITECTURES ${ESSENTIALS_ARCHITECTURES} ) # XXX: Find a better way to inherit essentials properties. -message("-- Example Added: ${APPLICATION_NAME}") +message(STATUS "Example Added: ${APPLICATION_NAME}") # end /* Add CUDA executables */ \ No newline at end of file diff --git a/unittests/launch_box/CMakeLists.txt b/unittests/launch_box/CMakeLists.txt index c0dbb267..02efe74b 100644 --- a/unittests/launch_box/CMakeLists.txt +++ b/unittests/launch_box/CMakeLists.txt @@ -17,5 +17,5 @@ set_target_properties(${APPLICATION_NAME} CUDA_ARCHITECTURES ${ESSENTIALS_ARCHITECTURES} ) # XXX: Find a better way to inherit essentials properties. -message("-- Example Added: ${APPLICATION_NAME}") +message(STATUS "Example Added: ${APPLICATION_NAME}") # end /* Add CUDA executables */ \ No newline at end of file diff --git a/unittests/type_limits/CMakeLists.txt b/unittests/type_limits/CMakeLists.txt new file mode 100644 index 00000000..4fdbd7df --- /dev/null +++ b/unittests/type_limits/CMakeLists.txt @@ -0,0 +1,21 @@ +# begin /* Set the application name. */ +set(APPLICATION_NAME type_limits) +# end /* Set the application name. */ + +# begin /* Add CUDA executables */ +add_executable(${APPLICATION_NAME}) + +set(SOURCE_LIST + test_${APPLICATION_NAME}.cu +) + +target_sources(${APPLICATION_NAME} PRIVATE ${SOURCE_LIST}) +target_link_libraries(${APPLICATION_NAME} PRIVATE essentials) +get_target_property(ESSENTIALS_ARCHITECTURES essentials CUDA_ARCHITECTURES) +set_target_properties(${APPLICATION_NAME} + PROPERTIES + CUDA_ARCHITECTURES ${ESSENTIALS_ARCHITECTURES} +) # XXX: Find a better way to inherit essentials properties. + +message(STATUS "Example Added: ${APPLICATION_NAME}") +# end /* Add CUDA executables */ \ No newline at end of file diff --git a/unittests/type_limits/test_type_limits.cu b/unittests/type_limits/test_type_limits.cu index ab268e4c..91d29491 100644 --- a/unittests/type_limits/test_type_limits.cu +++ b/unittests/type_limits/test_type_limits.cu @@ -4,7 +4,7 @@ #include void test_type_limits() { - using type_t = short unsigned int; + using type_t = unsigned int; type_t i = gunrock::numeric_limits::invalid(); std::cout << "i = " << i << " (is valid? " << std::boolalpha << gunrock::util::limits::is_valid(i) << ")" << std::endl; From 17e84753cf894dcf8525775ddcd76cef5c529269 Mon Sep 17 00:00:00 2001 From: neoblizz Date: Sun, 19 Dec 2021 19:22:26 -0800 Subject: [PATCH 06/14] Trying a fix for isnan(int). --- include/gunrock/util/type_limits.hxx | 2 +- unittests/type_limits/test_type_limits.cu | 27 +++++++++++++++++++---- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/include/gunrock/util/type_limits.hxx b/include/gunrock/util/type_limits.hxx index f46d7d87..6d7becef 100644 --- a/include/gunrock/util/type_limits.hxx +++ b/include/gunrock/util/type_limits.hxx @@ -64,7 +64,7 @@ constexpr __host__ __device__ __forceinline__ bool is_valid(type_t value) { if (std::is_integral::value) return (value != gunrock::numeric_limits::invalid()); else - return isnan(value) ? false : true; + return isnan(static_cast(value)) ? false : true; } } // namespace limits diff --git a/unittests/type_limits/test_type_limits.cu b/unittests/type_limits/test_type_limits.cu index 91d29491..8e228ee9 100644 --- a/unittests/type_limits/test_type_limits.cu +++ b/unittests/type_limits/test_type_limits.cu @@ -4,10 +4,29 @@ #include void test_type_limits() { - using type_t = unsigned int; - type_t i = gunrock::numeric_limits::invalid(); - std::cout << "i = " << i << " (is valid? " << std::boolalpha - << gunrock::util::limits::is_valid(i) << ")" << std::endl; + std::cout << "invalid = " << gunrock::numeric_limits::invalid() + << " (is valid? " << std::boolalpha + << gunrock::util::limits::is_valid( + gunrock::numeric_limits::invalid()) + << ")" << std::endl; + + std::cout << "invalid = " << gunrock::numeric_limits::invalid() + << " (is valid? " << std::boolalpha + << gunrock::util::limits::is_valid( + gunrock::numeric_limits::invalid()) + << ")" << std::endl; + + std::cout << "invalid = " << gunrock::numeric_limits::invalid() + << " (is valid? " << std::boolalpha + << gunrock::util::limits::is_valid( + gunrock::numeric_limits::invalid()) + << ")" << std::endl; + + std::cout << "invalid = " << gunrock::numeric_limits::invalid() + << " (is valid? " << std::boolalpha + << gunrock::util::limits::is_valid( + gunrock::numeric_limits::invalid()) + << ")" << std::endl; } int main(int argc, char** argv) { From 4eb6fff7afa362afd6549aa323f64a5fc829934c Mon Sep 17 00:00:00 2001 From: neoblizz Date: Sun, 19 Dec 2021 19:23:27 -0800 Subject: [PATCH 07/14] Trying a fix in new branch --- .github/workflows/windows.yml | 2 +- include/gunrock/util/type_limits.hxx | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 9d32c2a2..84408592 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -5,7 +5,7 @@ name: Windows on: # Triggers the workflow on push or pull request events but only for the master and dev branch push: - branches: [ master, dev ] + branches: [ master, dev, windows ] pull_request: branches: [ master, dev ] diff --git a/include/gunrock/util/type_limits.hxx b/include/gunrock/util/type_limits.hxx index 6d7becef..351c8ead 100644 --- a/include/gunrock/util/type_limits.hxx +++ b/include/gunrock/util/type_limits.hxx @@ -61,6 +61,9 @@ constexpr __host__ __device__ __forceinline__ bool is_valid(type_t value) { static_assert((std::is_integral::value || std::is_floating_point::value), "type_t must be an arithmetic type."); + + // Trying: + // https://stackoverflow.com/questions/61646166/how-to-resolve-fpclassify-ambiguous-call-to-overloaded-function if (std::is_integral::value) return (value != gunrock::numeric_limits::invalid()); else From 8f1e38d55a5446927613703f7c8ba941d1154585 Mon Sep 17 00:00:00 2001 From: neoblizz Date: Sun, 19 Dec 2021 19:59:50 -0800 Subject: [PATCH 08/14] Need verbosity to debug geo.cu, bump CUDA version as well. --- .github/workflows/windows.yml | 2 +- CMakeLists.txt | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 84408592..e53c7bc1 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -26,7 +26,7 @@ jobs: uses: Jimver/cuda-toolkit@v0.2.5 id: cuda-toolkit with: - cuda: '11.4.0' # Build fails @ 11.5.1, 11.3.1 + cuda: '11.5.1' # Build fails @ 11.5.1, 11.3.1 (11.4.0 passes except geo.cu) linux-local-args: '["--toolkit"]' # Runs a single command using the runners shell diff --git a/CMakeLists.txt b/CMakeLists.txt index 634c9776..aac44448 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,6 +14,9 @@ project(essentials LANGUAGES CXX CUDA ) +# Increase verbosity. +set(CMAKE_VERBOSE_MAKEFILE ON) + # begin /* Dependencies directory */ set(PROJECT_DEPS_DIR externals) # end /* Dependencies directory */ @@ -114,16 +117,16 @@ target_sources(essentials #################################################### set(CXX_FLAGS $<$: - -D_SCL_SECURE_NO_WARNINGS + /W4 > $<$: -Wall + -Wextra -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-aliasing -Wno-unused-function -Wno-format-security - # -Wextra # -Werror # -vvv > From d9efb32bcda35263a5c1813e3b81d2a160a75dfd Mon Sep 17 00:00:00 2001 From: neoblizz Date: Sun, 19 Dec 2021 20:29:39 -0800 Subject: [PATCH 09/14] For MSC host platform/dialect, an extended lambda cannot be defined inside the \'if\' or \'else\' block of a constexpr if statement. --- .github/workflows/windows.yml | 2 +- CMakeLists.txt | 4 +-- .../gunrock/framework/operators/for/for.hxx | 36 +++++++++++-------- 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index e53c7bc1..772ff4b6 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -48,7 +48,7 @@ jobs: - name: Build all applications # Build your program with the given configuration - run: cmake --build ${{github.workspace}}/build # --target sssp + run: cmake --build ${{github.workspace}}/build --verbose # --target sssp # TODO: Use the following once we have BUILD_TYPE and envrionment variables set-up: # run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} diff --git a/CMakeLists.txt b/CMakeLists.txt index aac44448..f27d6725 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,7 +15,7 @@ project(essentials ) # Increase verbosity. -set(CMAKE_VERBOSE_MAKEFILE ON) +# set(CMAKE_VERBOSE_MAKEFILE ON) # begin /* Dependencies directory */ set(PROJECT_DEPS_DIR externals) @@ -121,7 +121,7 @@ set(CXX_FLAGS > $<$: -Wall - -Wextra + # -Wextra -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-aliasing diff --git a/include/gunrock/framework/operators/for/for.hxx b/include/gunrock/framework/operators/for/for.hxx index 106e5618..ec1ba21a 100644 --- a/include/gunrock/framework/operators/for/for.hxx +++ b/include/gunrock/framework/operators/for/for.hxx @@ -11,6 +11,19 @@ namespace gunrock { namespace operators { namespace parallel_for { +namespace detail { +template +void for_each(operator_t apply, index_t size, cuda::multi_context_t& context) { + auto single_context = context.get_context(0); + thrust::for_each( + thrust::cuda::par.on(single_context->stream()), + thrust::make_counting_iterator(0), // Begin: 0 + thrust::make_counting_iterator(size), // End: # of V/E + apply // Unary Operator + ); +} +} // namespace detail + template void execute(graph_t& G, operator_t op, cuda::multi_context_t& context) { using index_t = std::conditional_tstream()), - thrust::make_counting_iterator(0), // Begin: 0 - thrust::make_counting_iterator(size), // End: # of V/E - apply // Unary Operator - ); + /// Note: For certain host platform/dialect, an extended lambda cannot be + /// defined inside the 'if' or 'else' block of a constexpr if statement. + if /* constexpr */ (type == parallel_for_each_t::weight) { + detail::for_each( + [=] __device__(index_t const& x) { op(G.get_edge_weight(x)); }, size, + context); } else { - auto apply = [=] __device__(index_t const& x) { op(x); }; - thrust::for_each( - thrust::cuda::par.on(single_context->stream()), - thrust::make_counting_iterator(0), // Begin: 0 - thrust::make_counting_iterator(size), // End: # of V/E - apply // Unary Operator - ); + detail::for_each([=] __device__(index_t const& x) { op(x); }, size, + context); } } From 79f0b703cfe9c96f6f34517724cd50324ffdb6ed Mon Sep 17 00:00:00 2001 From: neoblizz Date: Sun, 19 Dec 2021 21:40:28 -0800 Subject: [PATCH 10/14] Let's debug geo. --- examples/geo/geo.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/geo/geo.cu b/examples/geo/geo.cu index 38416b72..b2606a46 100644 --- a/examples/geo/geo.cu +++ b/examples/geo/geo.cu @@ -164,8 +164,8 @@ void test_geo(int num_arguments, char** argument_array) { // -- // GPU Run - float gpu_elapsed = gunrock::geo::run(G, coordinates.data().get(), - total_iterations, spatial_iterations); + // float gpu_elapsed = gunrock::geo::run(G, coordinates.data().get(), + // total_iterations, spatial_iterations); // -- // Log + Validate From fe702e1c7b0c2cd3d9e0575152791dea5448bbac Mon Sep 17 00:00:00 2001 From: neoblizz Date: Sun, 19 Dec 2021 21:41:12 -0800 Subject: [PATCH 11/14] Let's debug geo. --- examples/geo/geo.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/geo/geo.cu b/examples/geo/geo.cu index b2606a46..f7ca1f2d 100644 --- a/examples/geo/geo.cu +++ b/examples/geo/geo.cu @@ -163,8 +163,9 @@ void test_geo(int num_arguments, char** argument_array) { // -- // GPU Run + float gpu_elapsed = 0; - // float gpu_elapsed = gunrock::geo::run(G, coordinates.data().get(), + // gpu_elapsed = gunrock::geo::run(G, coordinates.data().get(), // total_iterations, spatial_iterations); // -- From ad563d0cd6e3754a399b7984199beaa24fd9c82c Mon Sep 17 00:00:00 2001 From: neoblizz Date: Sun, 19 Dec 2021 21:57:21 -0800 Subject: [PATCH 12/14] Testing if the issue is type limits on device --- unittests/type_limits/test_type_limits.cu | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/unittests/type_limits/test_type_limits.cu b/unittests/type_limits/test_type_limits.cu index 8e228ee9..afa56a05 100644 --- a/unittests/type_limits/test_type_limits.cu +++ b/unittests/type_limits/test_type_limits.cu @@ -3,6 +3,10 @@ #include +#include +#include +#include + void test_type_limits() { std::cout << "invalid = " << gunrock::numeric_limits::invalid() << " (is valid? " << std::boolalpha @@ -27,6 +31,20 @@ void test_type_limits() { << gunrock::util::limits::is_valid( gunrock::numeric_limits::invalid()) << ")" << std::endl; + + auto apply = [=] __device__(int const& x) { + auto y = gunrock::numeric_limits::invalid(); + bool v = gunrock::util::limits::is_valid(x); + bool inv = gunrock::util::limits::is_valid(y); + + printf("%f\n", y); + }; + + thrust::for_each(thrust::device, + thrust::make_counting_iterator(0), // Begin: 0 + thrust::make_counting_iterator(1), // End: 1 + apply // Unary Operator + ); } int main(int argc, char** argv) { From 16322f7a345d6c94b33cde9fa5ed055f3e14580a Mon Sep 17 00:00:00 2001 From: neoblizz Date: Sun, 19 Dec 2021 22:08:21 -0800 Subject: [PATCH 13/14] More fixes for geo, GCC conditionals, and a weird ref. --- examples/geo/geo.cu | 6 +++--- include/gunrock/algorithms/geo.hxx | 12 ++++++++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/examples/geo/geo.cu b/examples/geo/geo.cu index f7ca1f2d..2480bc2b 100644 --- a/examples/geo/geo.cu +++ b/examples/geo/geo.cu @@ -163,10 +163,10 @@ void test_geo(int num_arguments, char** argument_array) { // -- // GPU Run - float gpu_elapsed = 0; + // float gpu_elapsed = 0; - // gpu_elapsed = gunrock::geo::run(G, coordinates.data().get(), - // total_iterations, spatial_iterations); + float gpu_elapsed = gunrock::geo::run(G, coordinates.data().get(), + total_iterations, spatial_iterations); // -- // Log + Validate diff --git a/include/gunrock/algorithms/geo.hxx b/include/gunrock/algorithms/geo.hxx index 7fdc0657..4e6bda9f 100644 --- a/include/gunrock/algorithms/geo.hxx +++ b/include/gunrock/algorithms/geo.hxx @@ -210,7 +210,11 @@ __device__ __host__ void spatial_median(graph_t& G, R.latitude = (T.latitude - y.latitude) * Dinvs; R.longitude = (T.longitude - y.longitude) * Dinvs; r = sqrt(R.latitude * R.latitude + R.longitude * R.longitude); - rinv = r == 0 ?: num_zeros / r; + + // Was rinv = (r == 0) ?: (num_zeros / r); + // https://gcc.gnu.org/onlinedocs/gcc/Conditionals.html + // ... I hate myself too. + rinv = (r == 0) ? 0 : (num_zeros / r); y1.latitude = max(0.0f, 1 - rinv) * T.latitude + min(1.0f, rinv) * y.latitude; // latitude @@ -394,9 +398,9 @@ struct enactor_t : gunrock::enactor_t { template float run(graph_t& G, - coordinates_t* coordinates, // Input/Output - unsigned int& total_iterations, // Parameter - unsigned int& spatial_iterations = 1000 // Parameter + coordinates_t* coordinates, // Input/Output + const unsigned int total_iterations, // Parameter + const unsigned int spatial_iterations = 1000 // Parameter ) { // using param_type = param_t; From 8c6bf81e77f078106fd4f25f7461b91dd4d549ab Mon Sep 17 00:00:00 2001 From: neoblizz Date: Sun, 19 Dec 2021 22:45:54 -0800 Subject: [PATCH 14/14] final cleanup --- .github/workflows/windows.yml | 6 +++--- examples/geo/geo.cu | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 772ff4b6..d0010770 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -5,7 +5,7 @@ name: Windows on: # Triggers the workflow on push or pull request events but only for the master and dev branch push: - branches: [ master, dev, windows ] + branches: [ master, dev ] pull_request: branches: [ master, dev ] @@ -26,7 +26,7 @@ jobs: uses: Jimver/cuda-toolkit@v0.2.5 id: cuda-toolkit with: - cuda: '11.5.1' # Build fails @ 11.5.1, 11.3.1 (11.4.0 passes except geo.cu) + cuda: '11.5.1' linux-local-args: '["--toolkit"]' # Runs a single command using the runners shell @@ -48,7 +48,7 @@ jobs: - name: Build all applications # Build your program with the given configuration - run: cmake --build ${{github.workspace}}/build --verbose # --target sssp + run: cmake --build ${{github.workspace}}/build # --verbose # --target sssp # TODO: Use the following once we have BUILD_TYPE and envrionment variables set-up: # run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} diff --git a/examples/geo/geo.cu b/examples/geo/geo.cu index 2480bc2b..38416b72 100644 --- a/examples/geo/geo.cu +++ b/examples/geo/geo.cu @@ -163,7 +163,6 @@ void test_geo(int num_arguments, char** argument_array) { // -- // GPU Run - // float gpu_elapsed = 0; float gpu_elapsed = gunrock::geo::run(G, coordinates.data().get(), total_iterations, spatial_iterations);