diff --git a/lite/backends/arm/math/CMakeLists.txt b/lite/backends/arm/math/CMakeLists.txt index 538bc65f4a6..603a5537c9d 100644 --- a/lite/backends/arm/math/CMakeLists.txt +++ b/lite/backends/arm/math/CMakeLists.txt @@ -127,7 +127,6 @@ if (NOT HAS_ARM_MATH_LIB_DIR) norm.cc pad2d.cc negative.cc - beam_search.cc reduce_max.cc reduce_min.cc reduce_max_min.cc diff --git a/lite/backends/arm/math/funcs.h b/lite/backends/arm/math/funcs.h index c94987ab04e..c637544e3cd 100644 --- a/lite/backends/arm/math/funcs.h +++ b/lite/backends/arm/math/funcs.h @@ -24,7 +24,6 @@ #include "lite/backends/arm/math/anchor_generator.h" #include "lite/backends/arm/math/argmax.h" #include "lite/backends/arm/math/axpy.h" -#include "lite/backends/arm/math/beam_search.h" #include "lite/backends/arm/math/box_coder.h" #include "lite/backends/arm/math/clip.h" #include "lite/backends/arm/math/col_im_transform.h" diff --git a/lite/backends/host/math/CMakeLists.txt b/lite/backends/host/math/CMakeLists.txt index 5b57fddc22f..eb706ac7327 100644 --- a/lite/backends/host/math/CMakeLists.txt +++ b/lite/backends/host/math/CMakeLists.txt @@ -1,4 +1,5 @@ lite_cc_library(math_host SRCS + beam_search.cc sequence_padding.cc slice.cc pad3d.cc diff --git a/lite/backends/arm/math/beam_search.cc b/lite/backends/host/math/beam_search.cc similarity index 96% rename from lite/backends/arm/math/beam_search.cc rename to lite/backends/host/math/beam_search.cc index 74dfa143bda..1eb29c48218 100644 --- a/lite/backends/arm/math/beam_search.cc +++ b/lite/backends/host/math/beam_search.cc @@ -12,16 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "lite/backends/arm/math/beam_search.h" -#include +#include "lite/backends/host/math/beam_search.h" #include #include #include -#include "lite/utils/cp_logging.h" namespace paddle { namespace lite { -namespace arm { +namespace host { namespace math { /* * The basic items help to sort. @@ -207,9 +205,7 @@ void beam_search(const Tensor *pre_ids, int level, int beam_size, int end_id, - bool is_accumulated, - Context *ctx) { - // auto abs_lod = framework::ToAbsOffset(scores->lod()); + bool is_accumulated) { auto abs_lod = scores->lod(); auto &high_level = abs_lod[level]; auto items = SelectTopBeamSizeItems(pre_ids, @@ -266,6 +262,6 @@ void beam_search(const Tensor *pre_ids, } } // namespace math -} // namespace arm +} // namespace host } // namespace lite } // namespace paddle diff --git a/lite/backends/arm/math/beam_search.h b/lite/backends/host/math/beam_search.h similarity index 89% rename from lite/backends/arm/math/beam_search.h rename to lite/backends/host/math/beam_search.h index 2f07175e35e..961e1b3184e 100644 --- a/lite/backends/arm/math/beam_search.h +++ b/lite/backends/host/math/beam_search.h @@ -13,13 +13,11 @@ // limitations under the License. #pragma once - -#include #include "lite/core/context.h" namespace paddle { namespace lite { -namespace arm { +namespace host { namespace math { void beam_search(const Tensor* pre_ids, @@ -32,10 +30,9 @@ void beam_search(const Tensor* pre_ids, int level, int beam_size, int end_id, - bool is_accumulated, - Context* ctx); + bool is_accumulated); } // namespace math -} // namespace arm +} // namespace host } // namespace lite } // namespace paddle diff --git a/lite/backends/x86/math/CMakeLists.txt b/lite/backends/x86/math/CMakeLists.txt index 93c246bbe75..114024dc201 100644 --- a/lite/backends/x86/math/CMakeLists.txt +++ b/lite/backends/x86/math/CMakeLists.txt @@ -70,7 +70,6 @@ math_library(sequence2batch) math_library(sequence_pooling DEPS math_function jit_kernel_helper) math_library(sequence_scale) math_library(softmax DEPS math_function jit_kernel_helper) -math_library(beam_search DEPS math_function) # ## math_library(matrix_bit_code) # @@ -90,7 +89,6 @@ endif() # cc_test(vol2col_test SRCS vol2col_test.cc DEPS vol2col) # cc_test(sequence_padding_test SRCS sequence_padding_test.cc DEPS sequence_padding) # cc_test(sequence_pooling_test SRCS sequence_pooling_test.cc DEPS sequence_pooling) -# cc_test(beam_search_test SRCS beam_search_test.cc DEPS beam_search) # cc_test(concat_test SRCS concat_test.cc DEPS concat_and_split) # cc_test(cpu_vec_test SRCS cpu_vec_test.cc DEPS blas cpu_info) math_library(box_coder DEPS math_function) diff --git a/lite/backends/x86/math/beam_search.cc b/lite/backends/x86/math/beam_search.cc deleted file mode 100644 index 274e8836dd6..00000000000 --- a/lite/backends/x86/math/beam_search.cc +++ /dev/null @@ -1,326 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "lite/backends/x86/math/beam_search.h" -#include -#include -#include -#include "lite/fluid/lod.h" - -namespace paddle { -namespace lite { -namespace x86 { -namespace math { - -template -class BeamSearchFunctor { - public: - void operator()(const lite::X86Context &context, - const lite::Tensor *pre_ids, - const lite::Tensor *pre_scores, - const lite::Tensor *ids, - const lite::Tensor *scores, - lite::Tensor *selected_ids, - lite::Tensor *selected_scores, - lite::Tensor *parent_idx, - size_t level, - size_t beam_size, - int end_id, - bool is_accumulated) { - auto abs_lod = lite::fluid::ToAbsOffset(scores->lod()); - auto &high_level = abs_lod[level]; - - auto items = SelectTopBeamSizeItems(pre_ids, - pre_scores, - ids, - scores, - level, - beam_size, - end_id, - is_accumulated); - auto selected_items = ToMap(items, high_level.back()); - /* - if (FLAGS_v == 3) { - VLOG(3) << "selected_items:"; - for (size_t i = 0; i < selected_items.size(); ++i) { - VLOG(3) << "offset: " << i; - for (auto &item : selected_items[i]) { - VLOG(3) << item.ToString(); - } - } - } - */ - - PruneEndBeams(pre_ids, abs_lod, &selected_items, level, end_id); - // calculate the output tensor's height - size_t num_instances = std::accumulate( - std::begin(selected_items), - std::end(selected_items), - 0, - [](size_t a, std::vector &b) { return a + b.size(); }); - // the output tensor shape should be [num_instances, 1] - // auto dims = framework::make_ddim( - // std::vector({static_cast(num_instances), 1})); - lite::DDim dims( - std::vector({static_cast(num_instances), 1L})); - - selected_ids->Resize(dims); - auto *selected_ids_data = selected_ids->mutable_data(TARGET(kX86)); - - selected_scores->Resize(dims); - auto *selected_scores_data = - selected_scores->mutable_data(TARGET(kX86)); - - // auto *selected_ids_data = - // selected_ids->mutable_data(dims, platform::CPUPlace()); - // auto *selected_scores_data = - // selected_scores->mutable_data(dims, platform::CPUPlace()); - parent_idx->Resize({static_cast(num_instances)}); - auto *parent_idx_data = - parent_idx ? parent_idx->mutable_data(TARGET(kX86)) : nullptr; - // auto *parent_idx_data = - // parent_idx - // ? parent_idx->mutable_data( - // {static_cast(num_instances)}, platform::CPUPlace()) - // : nullptr; - - // fill in data - std::vector low_level; - uint64_t low_offset = 0; - for (auto &items : selected_items) { - low_level.push_back(low_offset); - for (auto &item : items) { - if (parent_idx) { - parent_idx_data[low_offset] = static_cast(low_level.size() - 1); - } - selected_ids_data[low_offset] = item.id; - selected_scores_data[low_offset] = item.score; - low_offset++; - } - } - low_level.push_back(low_offset); - - // fill lod - lite::LoD lod(2); - lod[0].assign(high_level.begin(), high_level.end()); - lod[1].assign(low_level.begin(), low_level.end()); - // if (!lite::fluid::CheckLoD(lod)) { - // //LOG(FATAL)<<"lod %s is not right", framework::LoDToString(lod)); - //} - selected_ids->set_lod(lod); - selected_scores->set_lod(lod); - } - - /* - * The basic items help to sort. - */ - struct Item { - Item() {} - Item(size_t offset, size_t id, float score) - : offset(offset), id(id), score(score) {} - // offset in the higher lod level. - size_t offset; - // prefix id in the lower lod level. - // size_t prefix; - // the candidate id - size_t id; - // the corresponding score - float score; - - inline bool operator<(const Item &in) const { - return (score < in.score) || - ((score == in.score) && (offset < in.offset)); - } - - inline void operator=(const Item &in) { - offset = in.offset; - id = in.id; - score = in.score; - } - - std::string ToString() { - std::ostringstream os; - os << "{"; - os << "offset: " << offset << ", "; - os << "id: " << id << ", "; - os << "score: " << score << ""; - os << "}"; - return os.str(); - } - }; - - protected: - /* - * Prune the source sentences all branchs finished, and it is optional. - * Pruning must one step later than finishing (thus pre_ids is needed here), - * since the end tokens must be writed out. - */ - void PruneEndBeams(const lite::Tensor *pre_ids, - const lite::LoD &abs_lod, - std::vector> *items, - size_t lod_level, - int end_id) { - auto *pre_ids_data = pre_ids->data(); - auto &high_level = abs_lod[lod_level]; - for (size_t src_idx = 0; src_idx < high_level.size() - 1; ++src_idx) { - size_t src_prefix_start = high_level[src_idx]; - size_t src_prefix_end = high_level[src_idx + 1]; - bool finish_flag = true; - for (size_t offset = src_prefix_start; offset < src_prefix_end; - offset++) { - for (auto &item : items->at(offset)) { - if (item.id != static_cast(end_id) || - pre_ids_data[offset] != end_id) { - finish_flag = false; - break; - } - } - if (!finish_flag) break; - } - if (finish_flag) { // all branchs of the beam (source sentence) end and - // prune this beam - for (size_t offset = src_prefix_start; offset < src_prefix_end; - offset++) - items->at(offset).clear(); - } - } - } - - /* - * Transform the items into a map whose key is offset, value is the items. - * NOTE low performance. - */ - std::vector> ToMap( - const std::vector> &items, size_t element_num) { - std::vector> result; - result.resize(element_num); - for (auto &entries : items) { - for (const auto &item : entries) { - result[item.offset].push_back(item); - } - } - return result; - } - - void Insert(std::vector *top_beam_ptr, - const Item &item, - size_t beam_size) { - std::vector &top_beam = *top_beam_ptr; - - size_t num_beams = top_beam.size(); - if (num_beams < beam_size) { - top_beam.resize(num_beams + 1); - num_beams++; - } else { - if (item < top_beam[beam_size - 1]) { - return; - } - } - - for (int k = static_cast(num_beams) - 2; k >= 0; --k) { - if (top_beam[k] < item) { - top_beam[k + 1] = top_beam[k]; - } else { - top_beam[k + 1] = item; - return; - } - } - top_beam[0] = item; - } - - /* - * For each source, select top beam_size records. - */ - std::vector> SelectTopBeamSizeItems( - const lite::Tensor *pre_ids, - const lite::Tensor *pre_scores, - const lite::Tensor *ids, - const lite::Tensor *scores, - size_t lod_level, - size_t beam_size, - int end_id, - bool is_accumulated) { - std::vector> result; - - // find the current candidates - auto abs_lod = lite::fluid::ToAbsOffset(scores->lod()); - - auto *pre_ids_data = pre_ids->data(); - auto *pre_scores_data = pre_scores->data(); - - auto *ids_data = ids ? ids->data() : nullptr; - auto *scores_data = scores->data(); - - // size_t num_seqs = scores->NumElements(lod_level); - size_t num_seqs = scores->lod()[lod_level].size() - 1; - size_t seq_width = 1; - for (size_t i = 1; i < scores->dims().size(); i++) { - seq_width *= scores->dims()[i]; - } - - for (size_t seq_id = 0; seq_id < num_seqs; ++seq_id) { - size_t seq_offset_start = abs_lod[lod_level][seq_id]; - size_t seq_offset_end = abs_lod[lod_level][seq_id + 1]; - - std::vector top_beam; - top_beam.reserve(beam_size); - - for (size_t offset = seq_offset_start; offset < seq_offset_end; - ++offset) { - auto pre_id = pre_ids_data[offset]; - auto pre_score = pre_scores_data[offset]; - if (pre_id == end_id) { - // Allocate all probability mass to end_id for finished branchs and - // the other candidate ids can be ignored. - Item item(offset, end_id, pre_score); - Insert(&top_beam, item, beam_size); - } else { - size_t index = offset * seq_width; - for (size_t d = 0; d < seq_width; d++, index++) { - int64_t id = ids_data ? ids_data[index] : static_cast(d); - float score = is_accumulated - ? scores_data[index] - : pre_score + std::log(scores_data[index]); - Item item(offset, id, score); - Insert(&top_beam, item, beam_size); - } - } - } - - result.emplace_back(top_beam); - } - /* - if (FLAGS_v == 3) { - VLOG(3) << "SelectTopBeamSizeItems result size " << result.size(); - for (auto &items : result) { - VLOG(3) << "item set:"; - for (auto &item : items) { - VLOG(3) << item.ToString(); - } - } - } - */ - return result; - } -}; - -template class BeamSearchFunctor; -template class BeamSearchFunctor; -template class BeamSearchFunctor; -template class BeamSearchFunctor; - -} // namespace math -} // namespace x86 -} // namespace lite -} // namespace paddle diff --git a/lite/backends/x86/math/beam_search.h b/lite/backends/x86/math/beam_search.h deleted file mode 100644 index 40998c89f9e..00000000000 --- a/lite/backends/x86/math/beam_search.h +++ /dev/null @@ -1,125 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include "lite/core/context.h" -#include "lite/core/tensor.h" - -namespace paddle { -namespace lite { -namespace x86 { -namespace math { - -/* - * This is an implementation of beam search. - * - * To explain the details, lets take machine translation task for example, in - * this task, one source sentence is translated to multiple target sentences, - * during this period, one sentence will be translated to multiple translation - * prefixes(target sentence that have not ended), in each time step a prefix - * will have some candidates, input the candidate ids and their corresponding - * scores (probabilities), it will sort and select the top beam_size candidates - * for each source sentence, and store the selected candidates's score and their - * corresponding ids to LoDTensors. - * - * A detailed example: - * - * Input - * - * ids: - * - LoD (should have 2 levels) - * - first level: [0, 1, 4] - * - second level: [0, 1, 2, 3, 4] - * - tensor's data: - * [[4, 2, 5] - * [2, 1, 3] - * [3, 5, 2] - * [8, 2, 1]] - * - * scores: - * - LoD same as `ids` - * - tensor's data - * [[0.5, 0.3, 0.2] - * [0.6, 0.3, 0.1] - * [0.9, 0.5, 0.1] - * [0.7, 0.5, 0.1]] - * - * The inputs means that there are 2 source sentences to translate, and the - * first source has 1 prefix, the second source has 2 prefix. - * - * Lets assume beam size is 2, and the beam search's output should be - * - LoD - * - first level: [0, 1, 2] - * - second level: [0, 2, 4] - * - id tensor's data - * [[4, - * 1, - * 3, - * 8]] - * - score tensor's data - * [[0.5, - * 0.3, - * 0.9, - * 0.7]] - * - * TODO all the prune operations should be in the beam search, so it is better - * to split the beam search algorithm into a sequence of smaller operators, and - * the prune operators can be inserted in this sequence. - */ -template -class BeamSearchFunctor { - public: - /* - * The main function of beam search. - * - * @selected_ids: a [None, 1]-shaped tensor with LoD. - * In a machine translation model, it might be the candidate term id sets, - * each set stored as a varience-length sequence. - * The format might be described with a two-level LoD - * - [[0 1], - * [0 1 2]] - * - [[] - * [0 1]] - * the first level of LoD tells that there are two source sentences. The - * second level describes the details of the candidate id set's offsets in - * the source sentences. - * - * @selected_scores: a LoD tensor with the same shape and LoD with - * selected_ids. - * It stores the corresponding scores of candidate ids in selected_ids. - * - * Return false if all the input tensor is empty, in machine translation task - * that means no candidates is provided, and the task will stop running. - */ - void operator()(const lite::Context& context, - const lite::Tensor* pre_ids, - const lite::Tensor* pre_scores, - const lite::Tensor* ids, - const lite::Tensor* scores, - lite::Tensor* selected_ids, - lite::Tensor* selected_scores, - lite::Tensor* parent_idx, - size_t level, - size_t beam_size, - int end_id, - bool is_accumulated); -}; - -} // namespace math -} // namespace x86 -} // namespace lite -} // namespace paddle diff --git a/lite/backends/x86/math/beam_search_test.cc b/lite/backends/x86/math/beam_search_test.cc deleted file mode 100644 index 233fa03fbaa..00000000000 --- a/lite/backends/x86/math/beam_search_test.cc +++ /dev/null @@ -1,152 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/math/beam_search.h" -#include -#include - -void PrepareCPUTensors(paddle::framework::LoDTensor* ids, - paddle::framework::LoDTensor* scores, - paddle::framework::LoDTensor* pre_ids, - paddle::framework::LoDTensor* pre_scores) { - // lod - paddle::framework::LoD lod; - std::vector level0({0, 2, 4}); - std::vector level1({0, 1, 2, 3, 4}); - lod.push_back(level0); - lod.push_back(level1); - ids->set_lod(lod); - scores->set_lod(lod); - - auto dims = paddle::framework::make_ddim({4, 3}); - ids->Resize(dims); - scores->Resize(dims); - - paddle::platform::CPUPlace place; - auto* ids_data = ids->mutable_data(place); - auto* scores_data = scores->mutable_data(place); - std::vector ids_vec_data({4, 2, 5, 2, 1, 3, 3, 5, 2, 8, 2, 1}); - std::vector scores_vec_data( - {0.6f, 0.3f, 0.5f, 0.2f, 0.3f, 0.1f, 0.9f, 0.5f, 0.1f, 0.7f, 0.5f, 0.1f}); - - CHECK_EQ(static_cast(ids->numel()), ids_vec_data.size()); - CHECK_EQ(static_cast(ids->numel()), scores_vec_data.size()); - - for (int i = 0; i < ids->numel(); i++) { - ids_data[i] = ids_vec_data[i]; - scores_data[i] = scores_vec_data[i]; - } - - // pre_ids - pre_ids->Resize(paddle::framework::make_ddim({4, 1})); - for (int i = 0; i < 4; i++) { - pre_ids->mutable_data(place)[i] = i + 1; - } - - // pre_scores - pre_scores->Resize(paddle::framework::make_ddim({4, 1})); - for (int i = 0; i < 4; i++) { - pre_scores->mutable_data(place)[i] = 0.1 * (i + 1); - } -} - -template -void TestBeamSearch() { - paddle::framework::LoDTensor ids; - paddle::framework::LoDTensor scores; - paddle::framework::LoDTensor pre_ids; - paddle::framework::LoDTensor pre_scores; - - auto* place = new Place(); - DeviceContext* context = new DeviceContext(*place); - if (paddle::platform::is_cpu_place(*place)) { - PrepareCPUTensors(&ids, &scores, &pre_ids, &pre_scores); - } else { - paddle::framework::LoDTensor cpu_ids; - paddle::framework::LoDTensor cpu_scores; - paddle::framework::LoDTensor cpu_pre_ids; - paddle::framework::LoDTensor cpu_pre_scores; - - PrepareCPUTensors(&cpu_ids, &cpu_scores, &cpu_pre_ids, &cpu_pre_scores); - - TensorCopySync(cpu_ids, *place, &ids); - TensorCopySync(cpu_scores, *place, &scores); - TensorCopySync(cpu_pre_ids, *place, &pre_ids); - TensorCopySync(cpu_pre_scores, *place, &pre_scores); - - ids.set_lod(cpu_ids.lod()); - scores.set_lod(cpu_scores.lod()); - pre_ids.set_lod(cpu_pre_ids.lod()); - pre_scores.set_lod(cpu_pre_scores.lod()); - } - - paddle::framework::LoDTensor selected_ids; - paddle::framework::LoDTensor selected_scores; - paddle::framework::LoDTensor parent_idx; - - size_t level = 0; - size_t beam_size = 2; - int end_id = 0; - paddle::operators::math::BeamSearchFunctor beamsearch; - beamsearch(*context, - &pre_ids, - &pre_scores, - &ids, - &scores, - &selected_ids, - &selected_scores, - &parent_idx, - level, - beam_size, - end_id, - true); - - ASSERT_EQ(selected_ids.lod(), selected_scores.lod()); - - paddle::framework::LoDTensor cpu_selected_ids; - paddle::framework::LoDTensor cpu_selected_scores; - if (paddle::platform::is_cpu_place(*place)) { - cpu_selected_ids = selected_ids; - cpu_selected_scores = selected_scores; - } else { - TensorCopySync( - selected_ids, paddle::platform::CPUPlace(), &cpu_selected_ids); - TensorCopySync( - selected_scores, paddle::platform::CPUPlace(), &cpu_selected_scores); - cpu_selected_ids.set_lod(selected_ids.lod()); - cpu_selected_scores.set_lod(selected_scores.lod()); - } - - std::vector expected_ids({4, 5, 3, 8}); - std::vector expected_scores({0.6f, 0.5f, 0.9f, 0.7f}); - for (int i = 0; i < 4; i++) { - ASSERT_EQ(expected_ids[i], cpu_selected_ids.data()[i]); - ASSERT_EQ(expected_scores[i], cpu_selected_scores.data()[i]); - } - - delete place; - delete context; -} - -TEST(BeamSearch, CPU) { - TestBeamSearch(); -} - -#ifdef PADDLE_WITH_CUDA -TEST(BeamSearch, GPU) { - TestBeamSearch(); -} -#endif diff --git a/lite/kernels/arm/CMakeLists.txt b/lite/kernels/arm/CMakeLists.txt index 55270af93a5..28980a94f7d 100644 --- a/lite/kernels/arm/CMakeLists.txt +++ b/lite/kernels/arm/CMakeLists.txt @@ -98,7 +98,7 @@ add_kernel(lookup_table_dequant_compute_arm ARM extra SRCS lookup_table_dequant_ add_kernel(sequence_softmax_compute_arm ARM extra SRCS sequence_softmax_compute.cc DEPS ${lite_kernel_deps} sequence_softmax_compute_host) add_kernel(topk_compute_arm ARM extra SRCS topk_compute.cc DEPS ${lite_kernel_deps} topk_compute_host) add_kernel(increment_compute_arm ARM extra SRCS increment_compute.cc DEPS ${lite_kernel_deps} increment_compute_host) -add_kernel(beam_search_compute_arm ARM extra SRCS beam_search_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(beam_search_compute_arm ARM extra SRCS beam_search_compute.cc DEPS ${lite_kernel_deps} beam_search_compute_host) add_kernel(lod_reset_compute_arm ARM extra SRCS lod_reset_compute.cc DEPS ${lite_kernel_deps} lod_reset_compute_host) add_kernel(lstm_arm ARM extra SRCS lstm_compute.cc DEPS ${lite_kernel_deps} math_arm) diff --git a/lite/kernels/arm/beam_search_compute.cc b/lite/kernels/arm/beam_search_compute.cc index 437ba070b7e..9032734d70c 100644 --- a/lite/kernels/arm/beam_search_compute.cc +++ b/lite/kernels/arm/beam_search_compute.cc @@ -12,41 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "lite/kernels/arm/beam_search_compute.h" -#include "lite/backends/arm/math/funcs.h" - -namespace paddle { -namespace lite { -namespace kernels { -namespace arm { - -void BeamSearchCompute::Run() { - auto& ctx = this->ctx_->template As(); - auto& param = this->Param(); - lite::arm::math::beam_search(param.pre_ids, - param.pre_scores, - param.ids, - param.scores, - param.selected_ids, - param.selected_scores, - param.parent_idx, - param.level, - param.beam_size, - param.end_id, - param.is_accumulated, - &ctx); -} - -} // namespace arm -} // namespace kernels -} // namespace lite -} // namespace paddle +#include "lite/kernels/host/beam_search_compute.h" REGISTER_LITE_KERNEL(beam_search, kARM, kFloat, kNCHW, - paddle::lite::kernels::arm::BeamSearchCompute, + paddle::lite::kernels::host::BeamSearchCompute, def) .BindInput("pre_ids", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt64))}) diff --git a/lite/kernels/host/CMakeLists.txt b/lite/kernels/host/CMakeLists.txt index 528074f217d..6e0fa3888b2 100644 --- a/lite/kernels/host/CMakeLists.txt +++ b/lite/kernels/host/CMakeLists.txt @@ -73,6 +73,7 @@ add_kernel(topk_compute_host Host extra SRCS topk_compute.cc DEPS ${lite_kernel_ add_kernel(topk_v2_compute_host Host extra SRCS topk_v2_compute.cc DEPS ${lite_kernel_deps}) add_kernel(meshgrid_compute_host Host extra SRCS meshgrid_compute.cc DEPS ${lite_kernel_deps}) add_kernel(linspace_compute_host Host extra SRCS linspace_compute.cc DEPS ${lite_kernel_deps}) +add_kernel(beam_search_compute_host Host extra SRCS beam_search_compute.cc DEPS ${lite_kernel_deps} math_host) add_kernel(beam_search_decode_compute_host Host extra SRCS beam_search_decode_compute.cc DEPS ${lite_kernel_deps}) add_kernel(roi_perspective_transform_compute_host Host extra SRCS roi_perspective_transform_compute.cc DEPS ${lite_kernel_deps}) add_kernel(lod_reset_compute_host Host extra SRCS lod_reset_compute.cc DEPS ${lite_kernel_deps}) diff --git a/lite/kernels/host/beam_search_compute.cc b/lite/kernels/host/beam_search_compute.cc new file mode 100644 index 00000000000..3a48f09a7e4 --- /dev/null +++ b/lite/kernels/host/beam_search_compute.cc @@ -0,0 +1,62 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/kernels/host/beam_search_compute.h" +#include "lite/backends/host/math/beam_search.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace host { + +void BeamSearchCompute::Run() { + auto& param = this->Param(); + lite::host::math::beam_search(param.pre_ids, + param.pre_scores, + param.ids, + param.scores, + param.selected_ids, + param.selected_scores, + param.parent_idx, + param.level, + param.beam_size, + param.end_id, + param.is_accumulated); +} + +} // namespace host +} // namespace kernels +} // namespace lite +} // namespace paddle + +REGISTER_LITE_KERNEL(beam_search, + kHost, + kFloat, + kNCHW, + paddle::lite::kernels::host::BeamSearchCompute, + def) + .BindInput("pre_ids", + {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt64))}) + .BindInput("pre_scores", + {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kFloat))}) + .BindInput("ids", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt64))}) + .BindInput("scores", + {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kFloat))}) + .BindOutput("selected_ids", + {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt64))}) + .BindOutput("selected_scores", + {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kFloat))}) + .BindOutput("parent_idx", + {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt32))}) + .Finalize(); diff --git a/lite/kernels/arm/beam_search_compute.h b/lite/kernels/host/beam_search_compute.h similarity index 80% rename from lite/kernels/arm/beam_search_compute.h rename to lite/kernels/host/beam_search_compute.h index 854696e5b9f..c6ca518e4d4 100644 --- a/lite/kernels/arm/beam_search_compute.h +++ b/lite/kernels/host/beam_search_compute.h @@ -13,26 +13,24 @@ // limitations under the License. #pragma once -#include -#include "lite/backends/arm/math/type_trans.h" #include "lite/core/kernel.h" #include "lite/core/op_registry.h" namespace paddle { namespace lite { namespace kernels { -namespace arm { +namespace host { -class BeamSearchCompute : public KernelLite { +class BeamSearchCompute : public KernelLite { public: void Run() override; - ~BeamSearchCompute() {} + virtual ~BeamSearchCompute() = default; private: }; -} // namespace arm +} // namespace host } // namespace kernels } // namespace lite } // namespace paddle