From 55aacac3238c187e266a3a01055c97c2a1857363 Mon Sep 17 00:00:00 2001 From: gaoziyuan Date: Sun, 27 Oct 2024 19:21:14 +0800 Subject: [PATCH] fix review --- csrc/gpu/get_padding_offset_v2.cu | 11 +++++++---- csrc/gpu/set_preids_token_penalty_multi_scores.cu | 12 ++++++------ .../test_get_padding_offset_v2.py | 0 .../test_set_preids_token_penalty_multi_scores.py | 0 .../test_top_p_sampling_reject.py | 0 .../python => unittest}/test_update_inputs_v2.py | 0 6 files changed, 13 insertions(+), 10 deletions(-) rename csrc/gpu/{test/python => unittest}/test_get_padding_offset_v2.py (100%) rename csrc/gpu/{test/python => unittest}/test_set_preids_token_penalty_multi_scores.py (100%) rename csrc/gpu/{test/python => unittest}/test_top_p_sampling_reject.py (100%) rename csrc/gpu/{test/python => unittest}/test_update_inputs_v2.py (100%) diff --git a/csrc/gpu/get_padding_offset_v2.cu b/csrc/gpu/get_padding_offset_v2.cu index ac247d09c2c8..ede308a14d7e 100644 --- a/csrc/gpu/get_padding_offset_v2.cu +++ b/csrc/gpu/get_padding_offset_v2.cu @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/extension.h" +#include "helper.h" __global__ void GetPaddingOffsetV2Kernel(int *padding_offset, int *cum_offsets_out, @@ -54,10 +55,12 @@ std::vector GetPaddingOffsetV2(const paddle::Tensor& input_ids, auto cpu_token_num = token_num.copy_to(paddle::CPUPlace(), false); const int token_num_data = cpu_token_num.data()[0]; - auto x_remove_padding = paddle::full({token_num_data}, 0, paddle::DataType::INT64, input_ids.place()); - auto padding_offset = paddle::full({token_num_data}, 0, paddle::DataType::INT32, input_ids.place()); - auto cu_seqlens_q = paddle::full({bsz + 1}, 0, paddle::DataType::INT32, input_ids.place()); - auto cu_seqlens_k = paddle::full({bsz + 1}, 0, paddle::DataType::INT32, input_ids.place()); + + auto x_remove_padding = GetEmptyTensor({token_num_data}, paddle::DataType::INT64, input_ids.place()); + auto padding_offset = GetEmptyTensor({token_num_data}, paddle::DataType::INT32, input_ids.place()); + auto cu_seqlens_q = GetEmptyTensor({bsz + 1}, paddle::DataType::INT32, input_ids.place()); + auto cu_seqlens_k = GetEmptyTensor({bsz + 1}, paddle::DataType::INT32, input_ids.place()); + GetPaddingOffsetV2Kernel<<>>( padding_offset.data(), cum_offsets_out.data(), diff --git a/csrc/gpu/set_preids_token_penalty_multi_scores.cu b/csrc/gpu/set_preids_token_penalty_multi_scores.cu index dc26e707bf08..7dd04e2f4728 100644 --- a/csrc/gpu/set_preids_token_penalty_multi_scores.cu +++ b/csrc/gpu/set_preids_token_penalty_multi_scores.cu @@ -15,7 +15,7 @@ #include "helper.h" template -__global__ void update_value_all(const bool *stop_flags, +__global__ void set_preids_token_penalty_multi_scores_kernel(const bool *stop_flags, int64_t *pre_ids, const int64_t *input_ids, const int *seq_lens_encoder, @@ -98,7 +98,7 @@ __global__ void update_value_all(const bool *stop_flags, } template -void set_preids_token_penalty_multi_scores_kernel(const paddle::Tensor& pre_ids, +void set_preids_token_penalty_multi_scores(const paddle::Tensor& pre_ids, const paddle::Tensor& input_ids, const paddle::Tensor& seq_lens_encoder, const paddle::Tensor& seq_lens_decoder, @@ -128,7 +128,7 @@ void set_preids_token_penalty_multi_scores_kernel(const paddle::Tensor& pre_ids, int64_t end_length = eos_token_id.shape()[0]; - update_value_all<<>>( + set_preids_token_penalty_multi_scores_kernel<<>>( stop_flags.data(), const_cast(pre_ids.data()), input_ids.data(), @@ -172,7 +172,7 @@ void SetPreidsTokenPenaltyMultiScores(const paddle::Tensor& pre_ids, switch (logits.type()) { case paddle::DataType::BFLOAT16: { - return set_preids_token_penalty_multi_scores_kernel( + return set_preids_token_penalty_multi_scores( pre_ids, input_ids, seq_lens_encoder, @@ -191,7 +191,7 @@ void SetPreidsTokenPenaltyMultiScores(const paddle::Tensor& pre_ids, ); } case paddle::DataType::FLOAT16: { - return set_preids_token_penalty_multi_scores_kernel( + return set_preids_token_penalty_multi_scores( pre_ids, input_ids, seq_lens_encoder, @@ -210,7 +210,7 @@ void SetPreidsTokenPenaltyMultiScores(const paddle::Tensor& pre_ids, ); } case paddle::DataType::FLOAT32: { - return set_preids_token_penalty_multi_scores_kernel( + return set_preids_token_penalty_multi_scores( pre_ids, input_ids, seq_lens_encoder, diff --git a/csrc/gpu/test/python/test_get_padding_offset_v2.py b/csrc/gpu/unittest/test_get_padding_offset_v2.py similarity index 100% rename from csrc/gpu/test/python/test_get_padding_offset_v2.py rename to csrc/gpu/unittest/test_get_padding_offset_v2.py diff --git a/csrc/gpu/test/python/test_set_preids_token_penalty_multi_scores.py b/csrc/gpu/unittest/test_set_preids_token_penalty_multi_scores.py similarity index 100% rename from csrc/gpu/test/python/test_set_preids_token_penalty_multi_scores.py rename to csrc/gpu/unittest/test_set_preids_token_penalty_multi_scores.py diff --git a/csrc/gpu/test/python/test_top_p_sampling_reject.py b/csrc/gpu/unittest/test_top_p_sampling_reject.py similarity index 100% rename from csrc/gpu/test/python/test_top_p_sampling_reject.py rename to csrc/gpu/unittest/test_top_p_sampling_reject.py diff --git a/csrc/gpu/test/python/test_update_inputs_v2.py b/csrc/gpu/unittest/test_update_inputs_v2.py similarity index 100% rename from csrc/gpu/test/python/test_update_inputs_v2.py rename to csrc/gpu/unittest/test_update_inputs_v2.py