diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 9d32c2a2..d0010770 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -26,7 +26,7 @@ jobs: uses: Jimver/cuda-toolkit@v0.2.5 id: cuda-toolkit with: - cuda: '11.4.0' # Build fails @ 11.5.1, 11.3.1 + cuda: '11.5.1' linux-local-args: '["--toolkit"]' # Runs a single command using the runners shell @@ -48,7 +48,7 @@ jobs: - name: Build all applications # Build your program with the given configuration - run: cmake --build ${{github.workspace}}/build # --target sssp + run: cmake --build ${{github.workspace}}/build # --verbose # --target sssp # TODO: Use the following once we have BUILD_TYPE and envrionment variables set-up: # run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} diff --git a/CMakeLists.txt b/CMakeLists.txt index 634c9776..f27d6725 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,6 +14,9 @@ project(essentials LANGUAGES CXX CUDA ) +# Increase verbosity. +# set(CMAKE_VERBOSE_MAKEFILE ON) + # begin /* Dependencies directory */ set(PROJECT_DEPS_DIR externals) # end /* Dependencies directory */ @@ -114,16 +117,16 @@ target_sources(essentials #################################################### set(CXX_FLAGS $<$: - -D_SCL_SECURE_NO_WARNINGS + /W4 > $<$: -Wall + # -Wextra -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-aliasing -Wno-unused-function -Wno-format-security - # -Wextra # -Werror # -vvv > diff --git a/include/gunrock/algorithms/geo.hxx b/include/gunrock/algorithms/geo.hxx index 7fdc0657..4e6bda9f 100644 --- a/include/gunrock/algorithms/geo.hxx +++ b/include/gunrock/algorithms/geo.hxx @@ -210,7 +210,11 @@ __device__ __host__ void spatial_median(graph_t& G, R.latitude = (T.latitude - y.latitude) * Dinvs; R.longitude = (T.longitude - y.longitude) * Dinvs; r = sqrt(R.latitude * R.latitude + R.longitude * R.longitude); - rinv = r == 0 ?: num_zeros / r; + + // Was rinv = (r == 0) ?: (num_zeros / r); + // https://gcc.gnu.org/onlinedocs/gcc/Conditionals.html + // ... I hate myself too. + rinv = (r == 0) ? 0 : (num_zeros / r); y1.latitude = max(0.0f, 1 - rinv) * T.latitude + min(1.0f, rinv) * y.latitude; // latitude @@ -394,9 +398,9 @@ struct enactor_t : gunrock::enactor_t { template float run(graph_t& G, - coordinates_t* coordinates, // Input/Output - unsigned int& total_iterations, // Parameter - unsigned int& spatial_iterations = 1000 // Parameter + coordinates_t* coordinates, // Input/Output + const unsigned int total_iterations, // Parameter + const unsigned int spatial_iterations = 1000 // Parameter ) { // using param_type = param_t; diff --git a/include/gunrock/cuda/detail/launch_box.hxx b/include/gunrock/cuda/detail/launch_box.hxx index a364a493..1c672ce5 100644 --- a/include/gunrock/cuda/detail/launch_box.hxx +++ b/include/gunrock/cuda/detail/launch_box.hxx @@ -24,11 +24,11 @@ namespace detail { * @tparam sm_flags_ Bitwise flags indicating SM versions (`sm_flag_t` enum). */ template -struct launch_params_abc_t { +struct launch_params_base_t { enum : unsigned { sm_flags = sm_flags_ }; protected: - launch_params_abc_t() {} + launch_params_base_t() {} }; /** @@ -60,7 +60,7 @@ struct raise_not_found_error_t { /** * @brief Subsets a pack of launch parameters (children of - * `launch_params_abc_t`), selecting the ones that match the architecture being + * `launch_params_base_t`), selecting the ones that match the architecture being * compiled for, stored in a tuple type. * * @par Overview diff --git a/include/gunrock/cuda/device_properties.hxx b/include/gunrock/cuda/device_properties.hxx index c192dc43..fd62eb9d 100644 --- a/include/gunrock/cuda/device_properties.hxx +++ b/include/gunrock/cuda/device_properties.hxx @@ -18,7 +18,7 @@ namespace cuda { typedef cudaDeviceProp device_properties_t; -typedef struct { +struct compute_capability_t { unsigned major; unsigned minor; constexpr unsigned as_combined_number() const { return major * 10 + minor; } @@ -28,7 +28,7 @@ typedef struct { constexpr bool operator<(int i) { return (int)as_combined_number() < i; } constexpr bool operator>=(int i) { return (int)as_combined_number() >= i; } constexpr bool operator<=(int i) { return (int)as_combined_number() <= i; } -} compute_capability_t; +}; /** * @brief Make compute capability from major and minor versions. @@ -76,18 +76,13 @@ enum : size_t { KiB = 1024, K = 1024 }; * \return const char* architecture name or nullptr if capability is invalid */ inline constexpr const char* arch_name(compute_capability_t capability) { - return (capability.major == 8) - ? "Ampere" - : (capability.major == 7 && capability.minor == 5) - ? "Turing" - : (capability.major == 7) - ? "Volta" - : (capability.major == 6) - ? "Pascal" - : (capability.major == 5) - ? "Maxwell" - : (capability.major == 3) ? "Kepler" - : nullptr; + return (capability.major == 8) ? "Ampere" + : (capability.major == 7 && capability.minor == 5) ? "Turing" + : (capability.major == 7) ? "Volta" + : (capability.major == 6) ? "Pascal" + : (capability.major == 5) ? "Maxwell" + : (capability.major == 3) ? "Kepler" + : nullptr; } // Device properties retrieved from: @@ -115,11 +110,14 @@ inline constexpr unsigned warp_max_threads() { * \return unsigned */ inline constexpr unsigned sm_max_ctas(compute_capability_t capability) { - return (capability >= 86) ? 16 : // SM86+ - (capability >= 80) ? 32 : // SM80 - (capability >= 75) ? 16 : // SM75 - (capability >= 50) ? 32 : // SM50-SM72 - 16; // SM30-SM37 + return (capability >= 86) ? 16 : // SM86+ + (capability >= 80) ? 32 + : // SM80 + (capability >= 75) ? 16 + : // SM75 + (capability >= 50) ? 32 + : // SM50-SM72 + 16; // SM30-SM37 } /** @@ -128,10 +126,12 @@ inline constexpr unsigned sm_max_ctas(compute_capability_t capability) { * \return unsigned */ inline constexpr unsigned sm_max_threads(compute_capability_t capability) { - return (capability >= 86) ? 1536 : // SM86+ - (capability >= 80) ? 2048 : // SM80 - (capability >= 75) ? 1024 : // SM75 - 2048; // SM30-SM72 + return (capability >= 86) ? 1536 : // SM86+ + (capability >= 80) ? 2048 + : // SM80 + (capability >= 75) ? 1024 + : // SM75 + 2048; // SM30-SM72 } /** @@ -140,9 +140,10 @@ inline constexpr unsigned sm_max_threads(compute_capability_t capability) { * \return unsigned */ inline constexpr unsigned sm_registers(compute_capability_t capability) { - return (capability >= 50) ? 64 * K : // SM50+ - (capability >= 37) ? 128 * K : // SM37 - 64 * K; // SM30-SM35 + return (capability >= 50) ? 64 * K : // SM50+ + (capability >= 37) ? 128 * K + : // SM37 + 64 * K; // SM30-SM35 } /** @@ -158,30 +159,32 @@ template inline constexpr unsigned sm_max_shared_memory_bytes( compute_capability_t capability) { unsigned sm3XConfiguredSmem = - (sm3XCacheConfig == cudaFuncCachePreferNone) - ? 48 * KiB - : (sm3XCacheConfig == cudaFuncCachePreferShared) - ? 48 * KiB - : (sm3XCacheConfig == cudaFuncCachePreferL1) - ? 16 * KiB - : (sm3XCacheConfig == cudaFuncCachePreferEqual) - ? 32 * KiB - : 48 * KiB; - - return (capability >= 86) ? 100 * KiB : // SM86+ - (capability >= 80) ? 164 * KiB : // SM80 - (capability >= 75) ? 64 * KiB : // SM75 - (capability >= 70) ? 96 * KiB : // SM70-SM72 - (capability >= 62) ? 64 * KiB : // SM62 - (capability >= 61) ? 96 * KiB : // SM61 - (capability >= 53) ? 64 * KiB : // SM53 - (capability >= 52) ? 96 * KiB : // SM52 - (capability >= 50) ? 64 * KiB - : // SM50 - (capability >= 37) - ? 64 * KiB + sm3XConfiguredSmem - : // SM37 - sm3XConfiguredSmem; // SM30-SM35 + (sm3XCacheConfig == cudaFuncCachePreferNone) ? 48 * KiB + : (sm3XCacheConfig == cudaFuncCachePreferShared) ? 48 * KiB + : (sm3XCacheConfig == cudaFuncCachePreferL1) ? 16 * KiB + : (sm3XCacheConfig == cudaFuncCachePreferEqual) ? 32 * KiB + : 48 * KiB; + + return (capability >= 86) ? 100 * KiB : // SM86+ + (capability >= 80) ? 164 * KiB + : // SM80 + (capability >= 75) ? 64 * KiB + : // SM75 + (capability >= 70) ? 96 * KiB + : // SM70-SM72 + (capability >= 62) ? 64 * KiB + : // SM62 + (capability >= 61) ? 96 * KiB + : // SM61 + (capability >= 53) ? 64 * KiB + : // SM53 + (capability >= 52) ? 96 * KiB + : // SM52 + (capability >= 50) ? 64 * KiB + : // SM50 + (capability >= 37) ? 64 * KiB + sm3XConfiguredSmem + : // SM37 + sm3XConfiguredSmem; // SM30-SM35 } /** @@ -204,13 +207,10 @@ template < inline constexpr unsigned shared_memory_bank_stride() { // The default config on 3.x is the same constant value for later archs // Only let 3.x be configurable if stride later becomes dependent on arch - return (sm3XSmemConfig == cudaSharedMemBankSizeDefault) - ? 1 << 2 - : (sm3XSmemConfig == cudaSharedMemBankSizeFourByte) - ? 1 << 2 - : (sm3XSmemConfig == cudaSharedMemBankSizeEightByte) - ? 1 << 3 - : 1 << 2; + return (sm3XSmemConfig == cudaSharedMemBankSizeDefault) ? 1 << 2 + : (sm3XSmemConfig == cudaSharedMemBankSizeFourByte) ? 1 << 2 + : (sm3XSmemConfig == cudaSharedMemBankSizeEightByte) ? 1 << 3 + : 1 << 2; } void print(device_properties_t& prop) { diff --git a/include/gunrock/cuda/launch_box.hxx b/include/gunrock/cuda/launch_box.hxx index 513708f9..802fe696 100644 --- a/include/gunrock/cuda/launch_box.hxx +++ b/include/gunrock/cuda/launch_box.hxx @@ -25,9 +25,27 @@ namespace gunrock { namespace cuda { - namespace launch_box { +struct dimensions_t { + unsigned int x, y, z; + + __host__ __device__ constexpr dimensions_t(const unsigned int _x = 1, + const unsigned int _y = 1, + const unsigned int _z = 1) + : x(_x), y(_y), z(_z) {} + + __host__ __device__ constexpr unsigned int size() const { return x * y * z; } + +#ifdef _MSC_VER + __host__ __device__ operator dim3(void) const { return uint3{x, y, z}; } +#else + __host__ __device__ constexpr operator dim3(void) const { + return uint3{x, y, z}; + } +#endif +}; + /** * @brief CUDA dim3 template representation, since dim3 cannot be used as a * template argument. @@ -36,10 +54,14 @@ namespace launch_box { * @tparam y_ (default = `1`) Dimension in the Y direction. * @tparam z_ (default = `1`) Dimension in the Z direction. */ -template +template struct dim3_t { - enum : unsigned int { x = x_, y = y_, z = z_, size = x_ * y_ * z_ }; - static constexpr dim3 get_dim3() { return dim3(x, y, z); } + enum : unsigned int { x = x_, y = y_, z = z_ }; + static constexpr unsigned int size() { return x * y * z; } + static constexpr dimensions_t dimensions() { return {x, y, z}; } + + // Convertors must be non-static members. + constexpr operator dimensions_t(void) { return {x, y, z}; } }; /** @@ -97,13 +119,15 @@ template -struct launch_params_t : detail::launch_params_abc_t { +struct launch_params_t : detail::launch_params_base_t { typedef block_dimensions_ block_dimensions_t; typedef grid_dimensions_ grid_dimensions_t; enum : size_t { shared_memory_bytes = shared_memory_bytes_ }; - static constexpr dim3 block_dimensions = block_dimensions_t::get_dim3(); - static constexpr dim3 grid_dimensions = grid_dimensions_t::get_dim3(); + static constexpr dimensions_t block_dimensions = + block_dimensions_t::dimensions(); + static constexpr dimensions_t grid_dimensions = + grid_dimensions_t::dimensions(); standard_context_t& context; launch_params_t(standard_context_t& context_) : context(context_) {} @@ -121,15 +145,17 @@ struct launch_params_t : detail::launch_params_abc_t { template -struct launch_params_dynamic_block_t : detail::launch_params_abc_t { +struct launch_params_dynamic_block_t : detail::launch_params_base_t { typedef grid_dimensions_ grid_dimensions_t; enum : size_t { shared_memory_bytes = shared_memory_bytes_ }; - dim3 block_dimensions; - static constexpr dim3 grid_dimensions = grid_dimensions_t::get_dim3(); + dimensions_t block_dimensions; + static constexpr dimensions_t grid_dimensions = + grid_dimensions_t::dimensions(); + standard_context_t& context; - launch_params_dynamic_block_t(dim3 block_dimensions_, + launch_params_dynamic_block_t(dimensions_t block_dimensions_, standard_context_t& context_) : block_dimensions(block_dimensions_), context(context_) {} }; @@ -146,17 +172,33 @@ struct launch_params_dynamic_block_t : detail::launch_params_abc_t { template -struct launch_params_dynamic_grid_t : detail::launch_params_abc_t { +struct launch_params_dynamic_grid_t : detail::launch_params_base_t { typedef block_dimensions_ block_dimensions_t; enum : size_t { shared_memory_bytes = shared_memory_bytes_ }; - static constexpr dim3 block_dimensions = block_dimensions_t::get_dim3(); - dim3 grid_dimensions; + static constexpr dimensions_t block_dimensions = + block_dimensions_t::dimensions(); + + dimensions_t grid_dimensions; standard_context_t& context; - launch_params_dynamic_grid_t(dim3 grid_dimensions_, - standard_context_t& context_) - : grid_dimensions(grid_dimensions_), context(context_) {} + launch_params_dynamic_grid_t(standard_context_t& context_) + : context(context_) {} + + void calculate_grid_dimensions(std::size_t num_elements) { + grid_dimensions = dimensions_t( + (num_elements + block_dimensions.x - 1) / block_dimensions.x, 1, 1); + } + + /** + * @brief Launch a kernel within the given launch box. + */ + template + void launch(func_t __, args_t&&... args) { + __<<< // kernel function. + grid_dimensions, block_dimensions, shared_memory_bytes, + context.stream()>>>(std::forward(args)...); + } }; /** @@ -169,7 +211,7 @@ struct launch_params_dynamic_grid_t : detail::launch_params_abc_t { template inline float occupancy(func_t kernel) { int max_active_blocks; - int block_size = launch_box_t::block_dimensions_t::size; + int block_size = launch_box_t::block_dimensions_t::size(); int device; cudaDeviceProp props; diff --git a/include/gunrock/framework/frontier/frontier.hxx b/include/gunrock/framework/frontier/frontier.hxx index 2b8b2702..3e2083c4 100644 --- a/include/gunrock/framework/frontier/frontier.hxx +++ b/include/gunrock/framework/frontier/frontier.hxx @@ -38,6 +38,9 @@ class frontier_t : public frontier::vector_frontier_t { public: using vertex_type = vertex_t; using edge_type = edge_t; + using type_t = std::conditional_t<_kind == frontier_kind_t::vertex_frontier, + vertex_t, + edge_t>; using frontier_type = frontier_t; /// TODO: This is a more permenant solution. diff --git a/include/gunrock/framework/operators/advance/block_mapped.hxx b/include/gunrock/framework/operators/advance/block_mapped.hxx index e5c847d5..98872bfb 100644 --- a/include/gunrock/framework/operators/advance/block_mapped.hxx +++ b/include/gunrock/framework/operators/advance/block_mapped.hxx @@ -27,8 +27,8 @@ namespace operators { namespace advance { namespace block_mapped { -template set_number_of_elements(size_of_output); } - std::size_t work_size = (input_type == advance_io_type_t::graph) - ? G.get_number_of_vertices() - : input->get_number_of_elements(); - - using namespace gunrock::cuda::launch_box; - using launch_t = launch_box_t>>; - - launch_t launch_box(dim3((work_size + launch_t::block_dimensions_t::x - 1) / launch_t::block_dimensions_t::x), context); - - // Launch blocked-mapped advance kernel. - block_mapped_kernel - <<>>(G, op, input->data(), output->data(), work_size, - segments.data().get()); + std::size_t num_elements = (input_type == advance_io_type_t::graph) + ? G.get_number_of_vertices() + : input->get_number_of_elements(); + + // Set-up and launch block-mapped advance. + using namespace cuda::launch_box; + using launch_t = + launch_box_t>>; + + launch_t launch_box(context); + + launch_box.calculate_grid_dimensions(num_elements); + auto __bm = block_mapped_kernel< // kernel + launch_box.block_dimensions.x, // threas per block + 1, // items per thread + input_type, output_type, // i/o parameters + graph_t, // graph type + typename frontier_t::type_t, // frontier value type + typename work_tiles_t::value_type, // segments value type + operator_t // lambda type + >; + launch_box.launch(__bm, G, op, input->data(), output->data(), num_elements, + segments.data().get()); context.synchronize(); } diff --git a/include/gunrock/framework/operators/advance/thread_mapped.hxx b/include/gunrock/framework/operators/advance/thread_mapped.hxx index 888fcf5d..d5caefe7 100644 --- a/include/gunrock/framework/operators/advance/thread_mapped.hxx +++ b/include/gunrock/framework/operators/advance/thread_mapped.hxx @@ -97,17 +97,15 @@ void execute(graph_t& G, ? G.get_number_of_vertices() : input.get_number_of_elements(); - using namespace gunrock::cuda::launch_box; - using launch_t = launch_box_t>>; - - launch_t launch_box(dim3((num_elements + launch_t::block_dimensions_t::x - 1) / launch_t::block_dimensions_t::x, 1, 1), context); - - // Launch blocked-mapped advance kernel. - thread_mapped_kernel<<>>(neighbors_expand, num_elements); + // Set-up and launch thread-mapped advance. + using namespace cuda::launch_box; + using launch_t = + launch_box_t>>; + + launch_t launch_box(context); + launch_box.calculate_grid_dimensions(num_elements); + auto __tm = thread_mapped_kernel; + launch_box.launch(__tm, neighbors_expand, num_elements); context.synchronize(); } } // namespace thread_mapped diff --git a/include/gunrock/framework/operators/for/for.hxx b/include/gunrock/framework/operators/for/for.hxx index 106e5618..ec1ba21a 100644 --- a/include/gunrock/framework/operators/for/for.hxx +++ b/include/gunrock/framework/operators/for/for.hxx @@ -11,6 +11,19 @@ namespace gunrock { namespace operators { namespace parallel_for { +namespace detail { +template +void for_each(operator_t apply, index_t size, cuda::multi_context_t& context) { + auto single_context = context.get_context(0); + thrust::for_each( + thrust::cuda::par.on(single_context->stream()), + thrust::make_counting_iterator(0), // Begin: 0 + thrust::make_counting_iterator(size), // End: # of V/E + apply // Unary Operator + ); +} +} // namespace detail + template void execute(graph_t& G, operator_t op, cuda::multi_context_t& context) { using index_t = std::conditional_tstream()), - thrust::make_counting_iterator(0), // Begin: 0 - thrust::make_counting_iterator(size), // End: # of V/E - apply // Unary Operator - ); + /// Note: For certain host platform/dialect, an extended lambda cannot be + /// defined inside the 'if' or 'else' block of a constexpr if statement. + if /* constexpr */ (type == parallel_for_each_t::weight) { + detail::for_each( + [=] __device__(index_t const& x) { op(G.get_edge_weight(x)); }, size, + context); } else { - auto apply = [=] __device__(index_t const& x) { op(x); }; - thrust::for_each( - thrust::cuda::par.on(single_context->stream()), - thrust::make_counting_iterator(0), // Begin: 0 - thrust::make_counting_iterator(size), // End: # of V/E - apply // Unary Operator - ); + detail::for_each([=] __device__(index_t const& x) { op(x); }, size, + context); } } diff --git a/include/gunrock/util/type_limits.hxx b/include/gunrock/util/type_limits.hxx index aaad813e..351c8ead 100644 --- a/include/gunrock/util/type_limits.hxx +++ b/include/gunrock/util/type_limits.hxx @@ -57,16 +57,17 @@ namespace util { namespace limits { template -__host__ __device__ __forceinline__ bool is_valid(type_t value) { +constexpr __host__ __device__ __forceinline__ bool is_valid(type_t value) { static_assert((std::is_integral::value || std::is_floating_point::value), "type_t must be an arithmetic type."); - if constexpr (std::is_integral::value) - return (value != gunrock::numeric_limits::invalid()); - // else if constexpr (std::is_floating_point::value) - // just putting else doesn't work (gives a warning), even though it should... - return isnan(value) ? false : true; // XXX: test this on device + // Trying: + // https://stackoverflow.com/questions/61646166/how-to-resolve-fpclassify-ambiguous-call-to-overloaded-function + if (std::is_integral::value) + return (value != gunrock::numeric_limits::invalid()); + else + return isnan(static_cast(value)) ? false : true; } } // namespace limits diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt index 60fe72b3..4a0b3fe1 100644 --- a/unittests/CMakeLists.txt +++ b/unittests/CMakeLists.txt @@ -9,4 +9,5 @@ add_subdirectory(copy_ctor) add_subdirectory(launch_box) add_subdirectory(device_properties) add_subdirectory(for) +add_subdirectory(type_limits) # end /* Add unit tests' subdirectories */ diff --git a/unittests/copy_ctor/CMakeLists.txt b/unittests/copy_ctor/CMakeLists.txt index 63080bbc..b7520fce 100644 --- a/unittests/copy_ctor/CMakeLists.txt +++ b/unittests/copy_ctor/CMakeLists.txt @@ -17,5 +17,5 @@ set_target_properties(${APPLICATION_NAME} CUDA_ARCHITECTURES ${ESSENTIALS_ARCHITECTURES} ) # XXX: Find a better way to inherit essentials properties. -message("-- Example Added: ${APPLICATION_NAME}") +message(STATUS "Example Added: ${APPLICATION_NAME}") # end /* Add CUDA executables */ \ No newline at end of file diff --git a/unittests/device_properties/CMakeLists.txt b/unittests/device_properties/CMakeLists.txt index 8ee08474..9b85db0e 100644 --- a/unittests/device_properties/CMakeLists.txt +++ b/unittests/device_properties/CMakeLists.txt @@ -17,5 +17,5 @@ set_target_properties(${APPLICATION_NAME} CUDA_ARCHITECTURES ${ESSENTIALS_ARCHITECTURES} ) # XXX: Find a better way to inherit essentials properties. -message("-- Example Added: ${APPLICATION_NAME}") +message(STATUS "Example Added: ${APPLICATION_NAME}") # end /* Add CUDA executables */ diff --git a/unittests/for/CMakeLists.txt b/unittests/for/CMakeLists.txt index 49092b06..7f3edc86 100644 --- a/unittests/for/CMakeLists.txt +++ b/unittests/for/CMakeLists.txt @@ -17,5 +17,5 @@ set_target_properties(${APPLICATION_NAME} CUDA_ARCHITECTURES ${ESSENTIALS_ARCHITECTURES} ) # XXX: Find a better way to inherit essentials properties. -message("-- Example Added: ${APPLICATION_NAME}") +message(STATUS "Example Added: ${APPLICATION_NAME}") # end /* Add CUDA executables */ \ No newline at end of file diff --git a/unittests/launch_box/CMakeLists.txt b/unittests/launch_box/CMakeLists.txt index c0dbb267..02efe74b 100644 --- a/unittests/launch_box/CMakeLists.txt +++ b/unittests/launch_box/CMakeLists.txt @@ -17,5 +17,5 @@ set_target_properties(${APPLICATION_NAME} CUDA_ARCHITECTURES ${ESSENTIALS_ARCHITECTURES} ) # XXX: Find a better way to inherit essentials properties. -message("-- Example Added: ${APPLICATION_NAME}") +message(STATUS "Example Added: ${APPLICATION_NAME}") # end /* Add CUDA executables */ \ No newline at end of file diff --git a/unittests/launch_box/test_launch_box.cu b/unittests/launch_box/test_launch_box.cu index 0945a9a5..0f611b6e 100644 --- a/unittests/launch_box/test_launch_box.cu +++ b/unittests/launch_box/test_launch_box.cu @@ -50,8 +50,9 @@ void test_fallback() { } void test_define() { - dim3 block_dimensions = launch_t::block_dimensions_t::get_dim3(); - dim3 grid_dimensions = launch_t::grid_dimensions_t::get_dim3(); + dimensions_t block_dimensions = launch_t::block_dimensions_t::dimensions(); + dimensions_t grid_dimensions = launch_t::grid_dimensions_t::dimensions(); + dim3 conversion_test = block_dimensions; size_t smem = launch_t::shared_memory_bytes; std::cout << "block_dimensions: " << block_dimensions.x << ", " diff --git a/unittests/type_limits/CMakeLists.txt b/unittests/type_limits/CMakeLists.txt new file mode 100644 index 00000000..4fdbd7df --- /dev/null +++ b/unittests/type_limits/CMakeLists.txt @@ -0,0 +1,21 @@ +# begin /* Set the application name. */ +set(APPLICATION_NAME type_limits) +# end /* Set the application name. */ + +# begin /* Add CUDA executables */ +add_executable(${APPLICATION_NAME}) + +set(SOURCE_LIST + test_${APPLICATION_NAME}.cu +) + +target_sources(${APPLICATION_NAME} PRIVATE ${SOURCE_LIST}) +target_link_libraries(${APPLICATION_NAME} PRIVATE essentials) +get_target_property(ESSENTIALS_ARCHITECTURES essentials CUDA_ARCHITECTURES) +set_target_properties(${APPLICATION_NAME} + PROPERTIES + CUDA_ARCHITECTURES ${ESSENTIALS_ARCHITECTURES} +) # XXX: Find a better way to inherit essentials properties. + +message(STATUS "Example Added: ${APPLICATION_NAME}") +# end /* Add CUDA executables */ \ No newline at end of file diff --git a/unittests/type_limits/test_type_limits.cu b/unittests/type_limits/test_type_limits.cu index ab268e4c..afa56a05 100644 --- a/unittests/type_limits/test_type_limits.cu +++ b/unittests/type_limits/test_type_limits.cu @@ -3,11 +3,48 @@ #include +#include +#include +#include + void test_type_limits() { - using type_t = short unsigned int; - type_t i = gunrock::numeric_limits::invalid(); - std::cout << "i = " << i << " (is valid? " << std::boolalpha - << gunrock::util::limits::is_valid(i) << ")" << std::endl; + std::cout << "invalid = " << gunrock::numeric_limits::invalid() + << " (is valid? " << std::boolalpha + << gunrock::util::limits::is_valid( + gunrock::numeric_limits::invalid()) + << ")" << std::endl; + + std::cout << "invalid = " << gunrock::numeric_limits::invalid() + << " (is valid? " << std::boolalpha + << gunrock::util::limits::is_valid( + gunrock::numeric_limits::invalid()) + << ")" << std::endl; + + std::cout << "invalid = " << gunrock::numeric_limits::invalid() + << " (is valid? " << std::boolalpha + << gunrock::util::limits::is_valid( + gunrock::numeric_limits::invalid()) + << ")" << std::endl; + + std::cout << "invalid = " << gunrock::numeric_limits::invalid() + << " (is valid? " << std::boolalpha + << gunrock::util::limits::is_valid( + gunrock::numeric_limits::invalid()) + << ")" << std::endl; + + auto apply = [=] __device__(int const& x) { + auto y = gunrock::numeric_limits::invalid(); + bool v = gunrock::util::limits::is_valid(x); + bool inv = gunrock::util::limits::is_valid(y); + + printf("%f\n", y); + }; + + thrust::for_each(thrust::device, + thrust::make_counting_iterator(0), // Begin: 0 + thrust::make_counting_iterator(1), // End: 1 + apply // Unary Operator + ); } int main(int argc, char** argv) {