From b028d2ea5770c812d9679596756e86ec719807e2 Mon Sep 17 00:00:00 2001 From: Nara Prasetya Date: Fri, 8 Sep 2023 20:42:45 +0000 Subject: [PATCH] Block Runlength Decode: Fix incorrect offsets and improve test --- .../rocprim/block/block_run_length_decode.hpp | 2 +- test/rocprim/test_block_run_length_decode.cpp | 87 +++++++++++++++---- 2 files changed, 72 insertions(+), 17 deletions(-) diff --git a/rocprim/include/rocprim/block/block_run_length_decode.hpp b/rocprim/include/rocprim/block/block_run_length_decode.hpp index a4b6ce93d..c1dc8ae0d 100644 --- a/rocprim/include/rocprim/block/block_run_length_decode.hpp +++ b/rocprim/include/rocprim/block/block_run_length_decode.hpp @@ -331,7 +331,7 @@ class block_run_length_decode val = temp_storage.runs.run_values[current_run]; // The run bounds - current_run_begin = thread_decoded_offset; + current_run_begin = temp_storage.runs.run_offsets[current_run]; current_run_end = temp_storage.runs.run_offsets[++current_run]; } diff --git a/test/rocprim/test_block_run_length_decode.cpp b/test/rocprim/test_block_run_length_decode.cpp index 6fecc5501..c16853af7 100644 --- a/test/rocprim/test_block_run_length_decode.cpp +++ b/test/rocprim/test_block_run_length_decode.cpp @@ -24,9 +24,9 @@ // required rocprim headers #include -#include #include #include +#include // required test headers #include "rocprim/block/block_load_func.hpp" @@ -55,7 +55,6 @@ class HipcubBlockRunLengthDecodeTest : public ::testing::Test using params = Params; }; -//using HipcubBlockRunLengthDecodeTestParams = ::testing::Types>; using HipcubBlockRunLengthDecodeTestParams = ::testing::Types, Params, @@ -92,8 +91,11 @@ template -__global__ __launch_bounds__(BlockSize) void block_run_length_decode_kernel( - const ItemT* d_run_items, const LengthT* d_run_lengths, ItemT* d_decoded_items) +__global__ + __launch_bounds__(BlockSize) void block_run_length_decode_kernel(const ItemT* d_run_items, + const LengthT* d_run_lengths, + ItemT* d_decoded_items, + LengthT* d_decoded_offsets) { using BlockRunLengthDecodeT = rocprim::block_run_length_decode; @@ -118,9 +120,13 @@ __global__ __launch_bounds__(BlockSize) void block_run_length_decode_kernel( unsigned decoded_window_offset = 0; while(decoded_window_offset < total_decoded_size) { - ItemT decoded_items[DecodedItemsPerThread]; + ItemT decoded_items[DecodedItemsPerThread]; + LengthT decoded_offsets[DecodedItemsPerThread]; + + block_run_length_decode.run_length_decode(decoded_items, + decoded_offsets, + decoded_window_offset); - block_run_length_decode.run_length_decode(decoded_items, decoded_window_offset); rocprim::block_store_direct_blocked( global_thread_idx, d_decoded_items + decoded_window_offset, @@ -128,13 +134,20 @@ __global__ __launch_bounds__(BlockSize) void block_run_length_decode_kernel( rocprim::minimum{}(total_decoded_size - decoded_window_offset, decoded_items_per_block)); + rocprim::block_store_direct_blocked( + global_thread_idx, + d_decoded_offsets + decoded_window_offset, + decoded_offsets, + rocprim::minimum{}(total_decoded_size - decoded_window_offset, + decoded_items_per_block)); + decoded_window_offset += decoded_items_per_block; } } TYPED_TEST(HipcubBlockRunLengthDecodeTest, TestDecode) { - int device_id = test_common_utils::obtain_device_from_ctest(); + const int device_id = test_common_utils::obtain_device_from_ctest(); SCOPED_TRACE(testing::Message() << "with device_id= " << device_id); HIP_CHECK(hipSetDevice(device_id)); @@ -146,19 +159,34 @@ TYPED_TEST(HipcubBlockRunLengthDecodeTest, TestDecode) for(size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - const unsigned int seed_value + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); - const LengthT max_run_length = static_cast( + size_t num_runs = runs_per_thread * block_size; + constexpr LengthT max_run_length = static_cast( std::min(1000ll, static_cast(std::numeric_limits::max()))); - size_t num_runs = runs_per_thread * block_size; - auto run_items = test_utils::get_random_data(num_runs, - std::numeric_limits::min(), - std::numeric_limits::max(), - seed_value); - auto run_lengths = test_utils::get_random_data(num_runs, + auto run_items = std::vector(num_runs); + run_items[0] = test_utils::get_random_value(test_utils::numeric_limits::min(), + test_utils::numeric_limits::max(), + ++seed_value); + + size_t run_item_index = 1; + while(run_item_index < num_runs) + { + run_items[run_item_index] + = test_utils::get_random_value(test_utils::numeric_limits::min(), + test_utils::numeric_limits::max(), + ++seed_value); + if(test_utils::convert_to_native(run_items[run_item_index]) + != test_utils::convert_to_native(run_items[run_item_index - 1])) + { + ++run_item_index; + } + } + + auto run_lengths = test_utils::get_random_data(num_runs, static_cast(1), max_run_length, seed_value); @@ -205,12 +233,18 @@ TYPED_TEST(HipcubBlockRunLengthDecodeTest, TestDecode) HIP_CHECK( test_common_utils::hipMallocHelper(&d_decoded_runs, expected.size() * sizeof(ItemT))); + LengthT* d_decoded_offsets{}; + HIP_CHECK(test_common_utils::hipMallocHelper(&d_decoded_offsets, + expected.size() * sizeof(LengthT))); block_run_length_decode_kernel - <<>>(d_run_items, d_run_lengths, d_decoded_runs); + <<>>(d_run_items, + d_run_lengths, + d_decoded_runs, + d_decoded_offsets); HIP_CHECK(hipPeekAtLastError()); HIP_CHECK(hipDeviceSynchronize()); @@ -222,14 +256,35 @@ TYPED_TEST(HipcubBlockRunLengthDecodeTest, TestDecode) hipMemcpyDeviceToHost)); HIP_CHECK(hipGetLastError()) + std::vector offsets(expected.size()); + HIP_CHECK(hipMemcpy(offsets.data(), + d_decoded_offsets, + offsets.size() * sizeof(LengthT), + hipMemcpyDeviceToHost)); + HIP_CHECK(hipFree(d_run_items)); HIP_CHECK(hipFree(d_run_lengths)); HIP_CHECK(hipFree(d_decoded_runs)); + HIP_CHECK(hipFree(d_decoded_offsets)); + unsigned int expected_offset = -1; + ItemT previous_value = ItemT{}; for(size_t i = 0; i < output.size(); ++i) { ASSERT_EQ(test_utils::convert_to_native(output[i]), test_utils::convert_to_native(expected[i])); + if(test_utils::convert_to_native(output[i]) + != test_utils::convert_to_native(previous_value)) + { + previous_value = output[i]; + expected_offset = 0; + } + else + { + expected_offset = ++expected_offset; + } + + ASSERT_EQ(offsets[i], expected_offset); } } }