diff --git a/lite/backends/arm/math/CMakeLists.txt b/lite/backends/arm/math/CMakeLists.txt
index 538bc65f4a6..603a5537c9d 100644
--- a/lite/backends/arm/math/CMakeLists.txt
+++ b/lite/backends/arm/math/CMakeLists.txt
@@ -127,7 +127,6 @@ if (NOT HAS_ARM_MATH_LIB_DIR)
       norm.cc
       pad2d.cc
       negative.cc
-      beam_search.cc
       reduce_max.cc
       reduce_min.cc
       reduce_max_min.cc
diff --git a/lite/backends/arm/math/funcs.h b/lite/backends/arm/math/funcs.h
index c94987ab04e..c637544e3cd 100644
--- a/lite/backends/arm/math/funcs.h
+++ b/lite/backends/arm/math/funcs.h
@@ -24,7 +24,6 @@
 #include "lite/backends/arm/math/anchor_generator.h"
 #include "lite/backends/arm/math/argmax.h"
 #include "lite/backends/arm/math/axpy.h"
-#include "lite/backends/arm/math/beam_search.h"
 #include "lite/backends/arm/math/box_coder.h"
 #include "lite/backends/arm/math/clip.h"
 #include "lite/backends/arm/math/col_im_transform.h"
diff --git a/lite/backends/host/math/CMakeLists.txt b/lite/backends/host/math/CMakeLists.txt
index 5b57fddc22f..eb706ac7327 100644
--- a/lite/backends/host/math/CMakeLists.txt
+++ b/lite/backends/host/math/CMakeLists.txt
@@ -1,4 +1,5 @@
 lite_cc_library(math_host SRCS
+    beam_search.cc
     sequence_padding.cc
     slice.cc
     pad3d.cc
diff --git a/lite/backends/arm/math/beam_search.cc b/lite/backends/host/math/beam_search.cc
similarity index 96%
rename from lite/backends/arm/math/beam_search.cc
rename to lite/backends/host/math/beam_search.cc
index 74dfa143bda..1eb29c48218 100644
--- a/lite/backends/arm/math/beam_search.cc
+++ b/lite/backends/host/math/beam_search.cc
@@ -12,16 +12,14 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "lite/backends/arm/math/beam_search.h"
-#include <arm_neon.h>
+#include "lite/backends/host/math/beam_search.h"
 #include <cmath>
 #include <string>
 #include <vector>
-#include "lite/utils/cp_logging.h"
 
 namespace paddle {
 namespace lite {
-namespace arm {
+namespace host {
 namespace math {
 /*
 * The basic items help to sort.
@@ -207,9 +205,7 @@ void beam_search(const Tensor *pre_ids,
                  int level,
                  int beam_size,
                  int end_id,
-                 bool is_accumulated,
-                 Context<TARGET(kARM)> *ctx) {
-  // auto abs_lod = framework::ToAbsOffset(scores->lod());
+                 bool is_accumulated) {
   auto abs_lod = scores->lod();
   auto &high_level = abs_lod[level];
   auto items = SelectTopBeamSizeItems(pre_ids,
@@ -266,6 +262,6 @@ void beam_search(const Tensor *pre_ids,
 }
 
 }  // namespace math
-}  // namespace arm
+}  // namespace host
 }  // namespace lite
 }  // namespace paddle
diff --git a/lite/backends/arm/math/beam_search.h b/lite/backends/host/math/beam_search.h
similarity index 89%
rename from lite/backends/arm/math/beam_search.h
rename to lite/backends/host/math/beam_search.h
index 2f07175e35e..961e1b3184e 100644
--- a/lite/backends/arm/math/beam_search.h
+++ b/lite/backends/host/math/beam_search.h
@@ -13,13 +13,11 @@
 // limitations under the License.
 
 #pragma once
-
-#include <cmath>
 #include "lite/core/context.h"
 
 namespace paddle {
 namespace lite {
-namespace arm {
+namespace host {
 namespace math {
 
 void beam_search(const Tensor* pre_ids,
@@ -32,10 +30,9 @@ void beam_search(const Tensor* pre_ids,
                  int level,
                  int beam_size,
                  int end_id,
-                 bool is_accumulated,
-                 Context<TARGET(kARM)>* ctx);
+                 bool is_accumulated);
 
 }  // namespace math
-}  // namespace arm
+}  // namespace host
 }  // namespace lite
 }  // namespace paddle
diff --git a/lite/backends/x86/math/CMakeLists.txt b/lite/backends/x86/math/CMakeLists.txt
index 93c246bbe75..114024dc201 100644
--- a/lite/backends/x86/math/CMakeLists.txt
+++ b/lite/backends/x86/math/CMakeLists.txt
@@ -70,7 +70,6 @@ math_library(sequence2batch)
 math_library(sequence_pooling DEPS math_function jit_kernel_helper)
 math_library(sequence_scale)
 math_library(softmax DEPS math_function jit_kernel_helper)
-math_library(beam_search DEPS math_function)
 #
 ## math_library(matrix_bit_code)
 #
@@ -90,7 +89,6 @@ endif()
 # cc_test(vol2col_test SRCS vol2col_test.cc DEPS vol2col)
 # cc_test(sequence_padding_test SRCS sequence_padding_test.cc DEPS sequence_padding)
 # cc_test(sequence_pooling_test SRCS sequence_pooling_test.cc DEPS sequence_pooling)
-# cc_test(beam_search_test SRCS beam_search_test.cc DEPS beam_search)
 # cc_test(concat_test SRCS concat_test.cc DEPS concat_and_split)
 # cc_test(cpu_vec_test SRCS cpu_vec_test.cc DEPS blas cpu_info)
 math_library(box_coder DEPS math_function)
diff --git a/lite/backends/x86/math/beam_search.cc b/lite/backends/x86/math/beam_search.cc
deleted file mode 100644
index 274e8836dd6..00000000000
--- a/lite/backends/x86/math/beam_search.cc
+++ /dev/null
@@ -1,326 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "lite/backends/x86/math/beam_search.h"
-#include <algorithm>
-#include <cmath>
-#include <map>
-#include "lite/fluid/lod.h"
-
-namespace paddle {
-namespace lite {
-namespace x86 {
-namespace math {
-
-template <typename T>
-class BeamSearchFunctor<TARGET(kX86), T> {
- public:
-  void operator()(const lite::X86Context &context,
-                  const lite::Tensor *pre_ids,
-                  const lite::Tensor *pre_scores,
-                  const lite::Tensor *ids,
-                  const lite::Tensor *scores,
-                  lite::Tensor *selected_ids,
-                  lite::Tensor *selected_scores,
-                  lite::Tensor *parent_idx,
-                  size_t level,
-                  size_t beam_size,
-                  int end_id,
-                  bool is_accumulated) {
-    auto abs_lod = lite::fluid::ToAbsOffset(scores->lod());
-    auto &high_level = abs_lod[level];
-
-    auto items = SelectTopBeamSizeItems(pre_ids,
-                                        pre_scores,
-                                        ids,
-                                        scores,
-                                        level,
-                                        beam_size,
-                                        end_id,
-                                        is_accumulated);
-    auto selected_items = ToMap(items, high_level.back());
-    /*
-    if (FLAGS_v == 3) {
-      VLOG(3) << "selected_items:";
-      for (size_t i = 0; i < selected_items.size(); ++i) {
-        VLOG(3) << "offset: " << i;
-        for (auto &item : selected_items[i]) {
-          VLOG(3) << item.ToString();
-        }
-      }
-    }
-    */
-
-    PruneEndBeams(pre_ids, abs_lod, &selected_items, level, end_id);
-    // calculate the output tensor's height
-    size_t num_instances = std::accumulate(
-        std::begin(selected_items),
-        std::end(selected_items),
-        0,
-        [](size_t a, std::vector<Item> &b) { return a + b.size(); });
-    // the output tensor shape should be [num_instances, 1]
-    // auto dims = framework::make_ddim(
-    //     std::vector<int64_t>({static_cast<int>(num_instances), 1}));
-    lite::DDim dims(
-        std::vector<int64_t>({static_cast<int>(num_instances), 1L}));
-
-    selected_ids->Resize(dims);
-    auto *selected_ids_data = selected_ids->mutable_data<int64_t>(TARGET(kX86));
-
-    selected_scores->Resize(dims);
-    auto *selected_scores_data =
-        selected_scores->mutable_data<int64_t>(TARGET(kX86));
-
-    // auto *selected_ids_data =
-    //    selected_ids->mutable_data<int64_t>(dims, platform::CPUPlace());
-    // auto *selected_scores_data =
-    //    selected_scores->mutable_data<float>(dims, platform::CPUPlace());
-    parent_idx->Resize({static_cast<int64_t>(num_instances)});
-    auto *parent_idx_data =
-        parent_idx ? parent_idx->mutable_data<int>(TARGET(kX86)) : nullptr;
-    // auto *parent_idx_data =
-    //    parent_idx
-    //        ? parent_idx->mutable_data<int>(
-    //              {static_cast<int64_t>(num_instances)}, platform::CPUPlace())
-    //        : nullptr;
-
-    // fill in data
-    std::vector<uint64_t> low_level;
-    uint64_t low_offset = 0;
-    for (auto &items : selected_items) {
-      low_level.push_back(low_offset);
-      for (auto &item : items) {
-        if (parent_idx) {
-          parent_idx_data[low_offset] = static_cast<int>(low_level.size() - 1);
-        }
-        selected_ids_data[low_offset] = item.id;
-        selected_scores_data[low_offset] = item.score;
-        low_offset++;
-      }
-    }
-    low_level.push_back(low_offset);
-
-    // fill lod
-    lite::LoD lod(2);
-    lod[0].assign(high_level.begin(), high_level.end());
-    lod[1].assign(low_level.begin(), low_level.end());
-    // if (!lite::fluid::CheckLoD(lod)) {
-    //  //LOG(FATAL)<<"lod %s is not right", framework::LoDToString(lod));
-    //}
-    selected_ids->set_lod(lod);
-    selected_scores->set_lod(lod);
-  }
-
-  /*
-   * The basic items help to sort.
-   */
-  struct Item {
-    Item() {}
-    Item(size_t offset, size_t id, float score)
-        : offset(offset), id(id), score(score) {}
-    // offset in the higher lod level.
-    size_t offset;
-    // prefix id in the lower lod level.
-    // size_t prefix;
-    // the candidate id
-    size_t id;
-    // the corresponding score
-    float score;
-
-    inline bool operator<(const Item &in) const {
-      return (score < in.score) ||
-             ((score == in.score) && (offset < in.offset));
-    }
-
-    inline void operator=(const Item &in) {
-      offset = in.offset;
-      id = in.id;
-      score = in.score;
-    }
-
-    std::string ToString() {
-      std::ostringstream os;
-      os << "{";
-      os << "offset: " << offset << ", ";
-      os << "id: " << id << ", ";
-      os << "score: " << score << "";
-      os << "}";
-      return os.str();
-    }
-  };
-
- protected:
-  /*
-   * Prune the source sentences all branchs finished, and it is optional.
-   * Pruning must one step later than finishing (thus pre_ids is needed here),
-   * since the end tokens must be writed out.
-   */
-  void PruneEndBeams(const lite::Tensor *pre_ids,
-                     const lite::LoD &abs_lod,
-                     std::vector<std::vector<Item>> *items,
-                     size_t lod_level,
-                     int end_id) {
-    auto *pre_ids_data = pre_ids->data<int64_t>();
-    auto &high_level = abs_lod[lod_level];
-    for (size_t src_idx = 0; src_idx < high_level.size() - 1; ++src_idx) {
-      size_t src_prefix_start = high_level[src_idx];
-      size_t src_prefix_end = high_level[src_idx + 1];
-      bool finish_flag = true;
-      for (size_t offset = src_prefix_start; offset < src_prefix_end;
-           offset++) {
-        for (auto &item : items->at(offset)) {
-          if (item.id != static_cast<size_t>(end_id) ||
-              pre_ids_data[offset] != end_id) {
-            finish_flag = false;
-            break;
-          }
-        }
-        if (!finish_flag) break;
-      }
-      if (finish_flag) {  // all branchs of the beam (source sentence) end and
-                          // prune this beam
-        for (size_t offset = src_prefix_start; offset < src_prefix_end;
-             offset++)
-          items->at(offset).clear();
-      }
-    }
-  }
-
-  /*
-   * Transform the items into a map whose key is offset, value is the items.
-   * NOTE low performance.
-   */
-  std::vector<std::vector<Item>> ToMap(
-      const std::vector<std::vector<Item>> &items, size_t element_num) {
-    std::vector<std::vector<Item>> result;
-    result.resize(element_num);
-    for (auto &entries : items) {
-      for (const auto &item : entries) {
-        result[item.offset].push_back(item);
-      }
-    }
-    return result;
-  }
-
-  void Insert(std::vector<Item> *top_beam_ptr,
-              const Item &item,
-              size_t beam_size) {
-    std::vector<Item> &top_beam = *top_beam_ptr;
-
-    size_t num_beams = top_beam.size();
-    if (num_beams < beam_size) {
-      top_beam.resize(num_beams + 1);
-      num_beams++;
-    } else {
-      if (item < top_beam[beam_size - 1]) {
-        return;
-      }
-    }
-
-    for (int k = static_cast<int>(num_beams) - 2; k >= 0; --k) {
-      if (top_beam[k] < item) {
-        top_beam[k + 1] = top_beam[k];
-      } else {
-        top_beam[k + 1] = item;
-        return;
-      }
-    }
-    top_beam[0] = item;
-  }
-
-  /*
-   * For each source, select top beam_size records.
-   */
-  std::vector<std::vector<Item>> SelectTopBeamSizeItems(
-      const lite::Tensor *pre_ids,
-      const lite::Tensor *pre_scores,
-      const lite::Tensor *ids,
-      const lite::Tensor *scores,
-      size_t lod_level,
-      size_t beam_size,
-      int end_id,
-      bool is_accumulated) {
-    std::vector<std::vector<Item>> result;
-
-    // find the current candidates
-    auto abs_lod = lite::fluid::ToAbsOffset(scores->lod());
-
-    auto *pre_ids_data = pre_ids->data<int64_t>();
-    auto *pre_scores_data = pre_scores->data<float>();
-
-    auto *ids_data = ids ? ids->data<int64_t>() : nullptr;
-    auto *scores_data = scores->data<float>();
-
-    // size_t num_seqs = scores->NumElements(lod_level);
-    size_t num_seqs = scores->lod()[lod_level].size() - 1;
-    size_t seq_width = 1;
-    for (size_t i = 1; i < scores->dims().size(); i++) {
-      seq_width *= scores->dims()[i];
-    }
-
-    for (size_t seq_id = 0; seq_id < num_seqs; ++seq_id) {
-      size_t seq_offset_start = abs_lod[lod_level][seq_id];
-      size_t seq_offset_end = abs_lod[lod_level][seq_id + 1];
-
-      std::vector<Item> top_beam;
-      top_beam.reserve(beam_size);
-
-      for (size_t offset = seq_offset_start; offset < seq_offset_end;
-           ++offset) {
-        auto pre_id = pre_ids_data[offset];
-        auto pre_score = pre_scores_data[offset];
-        if (pre_id == end_id) {
-          // Allocate all probability mass to end_id for finished branchs and
-          // the other candidate ids can be ignored.
-          Item item(offset, end_id, pre_score);
-          Insert(&top_beam, item, beam_size);
-        } else {
-          size_t index = offset * seq_width;
-          for (size_t d = 0; d < seq_width; d++, index++) {
-            int64_t id = ids_data ? ids_data[index] : static_cast<int64_t>(d);
-            float score = is_accumulated
-                              ? scores_data[index]
-                              : pre_score + std::log(scores_data[index]);
-            Item item(offset, id, score);
-            Insert(&top_beam, item, beam_size);
-          }
-        }
-      }
-
-      result.emplace_back(top_beam);
-    }
-    /*
-    if (FLAGS_v == 3) {
-      VLOG(3) << "SelectTopBeamSizeItems result size " << result.size();
-      for (auto &items : result) {
-        VLOG(3) << "item set:";
-        for (auto &item : items) {
-          VLOG(3) << item.ToString();
-        }
-      }
-    }
-    */
-    return result;
-  }
-};
-
-template class BeamSearchFunctor<TARGET(kX86), int>;
-template class BeamSearchFunctor<TARGET(kX86), int64_t>;
-template class BeamSearchFunctor<TARGET(kX86), float>;
-template class BeamSearchFunctor<TARGET(kX86), double>;
-
-}  // namespace math
-}  // namespace x86
-}  // namespace lite
-}  // namespace paddle
diff --git a/lite/backends/x86/math/beam_search.h b/lite/backends/x86/math/beam_search.h
deleted file mode 100644
index 40998c89f9e..00000000000
--- a/lite/backends/x86/math/beam_search.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include <string>
-#include <vector>
-#include "lite/core/context.h"
-#include "lite/core/tensor.h"
-
-namespace paddle {
-namespace lite {
-namespace x86 {
-namespace math {
-
-/*
- * This is an implementation of beam search.
- *
- * To explain the details, lets take machine translation task for example, in
- * this task, one source sentence is translated to multiple target sentences,
- * during this period, one sentence will be translated to multiple translation
- * prefixes(target sentence that have not ended), in each time step a prefix
- * will have some candidates, input the candidate ids and their corresponding
- * scores (probabilities), it will sort and select the top beam_size candidates
- * for each source sentence, and store the selected candidates's score and their
- * corresponding ids to LoDTensors.
- *
- * A detailed example:
- *
- *  Input
- *
- *    ids:
- *      - LoD (should have 2 levels)
- *        - first level: [0, 1, 4]
- *        - second level: [0, 1, 2, 3, 4]
- *      - tensor's data:
- *          [[4, 2, 5]
- *           [2, 1, 3]
- *           [3, 5, 2]
- *           [8, 2, 1]]
- *
- *    scores:
- *      - LoD same as `ids`
- *      - tensor's data
- *          [[0.5, 0.3, 0.2]
- *           [0.6, 0.3, 0.1]
- *           [0.9, 0.5, 0.1]
- *           [0.7, 0.5, 0.1]]
- *
- * The inputs means that there are 2 source sentences to translate, and the
- * first source has 1 prefix, the second source has 2 prefix.
- *
- * Lets assume beam size is 2, and the beam search's output should be
- *      - LoD
- *        - first level: [0, 1, 2]
- *        - second level: [0, 2, 4]
- *      - id tensor's data
- *          [[4,
- *            1,
- *            3,
- *            8]]
- *      - score tensor's data
- *          [[0.5,
- *            0.3,
- *            0.9,
- *            0.7]]
- *
- * TODO all the prune operations should be in the beam search, so it is better
- * to split the beam search algorithm into a sequence of smaller operators, and
- * the prune operators can be inserted in this sequence.
- */
-template <lite::TargetType Target, typename T>
-class BeamSearchFunctor {
- public:
-  /*
-   * The main function of beam search.
-   *
-   * @selected_ids: a [None, 1]-shaped tensor with LoD.
-   *   In a machine translation model, it might be the candidate term id sets,
-   *   each set stored as a varience-length sequence.
-   *   The format might be described with a two-level LoD
-   *   - [[0 1],
-   *      [0 1 2]]
-   *   - [[]
-   *      [0 1]]
-   *   the first level of LoD tells that there are two source sentences. The
-   *   second level describes the details of the candidate id set's offsets in
-   * the source sentences.
-   *
-   *  @selected_scores: a LoD tensor with the same shape and LoD with
-   * selected_ids.
-   *   It stores the corresponding scores of candidate ids in selected_ids.
-   *
-   * Return false if all the input tensor is empty, in machine translation task
-   * that means no candidates is provided, and the task will stop running.
-   */
-  void operator()(const lite::Context<Target>& context,
-                  const lite::Tensor* pre_ids,
-                  const lite::Tensor* pre_scores,
-                  const lite::Tensor* ids,
-                  const lite::Tensor* scores,
-                  lite::Tensor* selected_ids,
-                  lite::Tensor* selected_scores,
-                  lite::Tensor* parent_idx,
-                  size_t level,
-                  size_t beam_size,
-                  int end_id,
-                  bool is_accumulated);
-};
-
-}  // namespace math
-}  // namespace x86
-}  // namespace lite
-}  // namespace paddle
diff --git a/lite/backends/x86/math/beam_search_test.cc b/lite/backends/x86/math/beam_search_test.cc
deleted file mode 100644
index 233fa03fbaa..00000000000
--- a/lite/backends/x86/math/beam_search_test.cc
+++ /dev/null
@@ -1,152 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/operators/math/beam_search.h"
-#include <gtest/gtest.h>
-#include <vector>
-
-void PrepareCPUTensors(paddle::framework::LoDTensor* ids,
-                       paddle::framework::LoDTensor* scores,
-                       paddle::framework::LoDTensor* pre_ids,
-                       paddle::framework::LoDTensor* pre_scores) {
-  // lod
-  paddle::framework::LoD lod;
-  std::vector<uint64_t> level0({0, 2, 4});
-  std::vector<uint64_t> level1({0, 1, 2, 3, 4});
-  lod.push_back(level0);
-  lod.push_back(level1);
-  ids->set_lod(lod);
-  scores->set_lod(lod);
-
-  auto dims = paddle::framework::make_ddim({4, 3});
-  ids->Resize(dims);
-  scores->Resize(dims);
-
-  paddle::platform::CPUPlace place;
-  auto* ids_data = ids->mutable_data<int64_t>(place);
-  auto* scores_data = scores->mutable_data<float>(place);
-  std::vector<int64_t> ids_vec_data({4, 2, 5, 2, 1, 3, 3, 5, 2, 8, 2, 1});
-  std::vector<float> scores_vec_data(
-      {0.6f, 0.3f, 0.5f, 0.2f, 0.3f, 0.1f, 0.9f, 0.5f, 0.1f, 0.7f, 0.5f, 0.1f});
-
-  CHECK_EQ(static_cast<size_t>(ids->numel()), ids_vec_data.size());
-  CHECK_EQ(static_cast<size_t>(ids->numel()), scores_vec_data.size());
-
-  for (int i = 0; i < ids->numel(); i++) {
-    ids_data[i] = ids_vec_data[i];
-    scores_data[i] = scores_vec_data[i];
-  }
-
-  // pre_ids
-  pre_ids->Resize(paddle::framework::make_ddim({4, 1}));
-  for (int i = 0; i < 4; i++) {
-    pre_ids->mutable_data<int64_t>(place)[i] = i + 1;
-  }
-
-  // pre_scores
-  pre_scores->Resize(paddle::framework::make_ddim({4, 1}));
-  for (int i = 0; i < 4; i++) {
-    pre_scores->mutable_data<float>(place)[i] = 0.1 * (i + 1);
-  }
-}
-
-template <typename DeviceContext, typename Place>
-void TestBeamSearch() {
-  paddle::framework::LoDTensor ids;
-  paddle::framework::LoDTensor scores;
-  paddle::framework::LoDTensor pre_ids;
-  paddle::framework::LoDTensor pre_scores;
-
-  auto* place = new Place();
-  DeviceContext* context = new DeviceContext(*place);
-  if (paddle::platform::is_cpu_place(*place)) {
-    PrepareCPUTensors(&ids, &scores, &pre_ids, &pre_scores);
-  } else {
-    paddle::framework::LoDTensor cpu_ids;
-    paddle::framework::LoDTensor cpu_scores;
-    paddle::framework::LoDTensor cpu_pre_ids;
-    paddle::framework::LoDTensor cpu_pre_scores;
-
-    PrepareCPUTensors(&cpu_ids, &cpu_scores, &cpu_pre_ids, &cpu_pre_scores);
-
-    TensorCopySync(cpu_ids, *place, &ids);
-    TensorCopySync(cpu_scores, *place, &scores);
-    TensorCopySync(cpu_pre_ids, *place, &pre_ids);
-    TensorCopySync(cpu_pre_scores, *place, &pre_scores);
-
-    ids.set_lod(cpu_ids.lod());
-    scores.set_lod(cpu_scores.lod());
-    pre_ids.set_lod(cpu_pre_ids.lod());
-    pre_scores.set_lod(cpu_pre_scores.lod());
-  }
-
-  paddle::framework::LoDTensor selected_ids;
-  paddle::framework::LoDTensor selected_scores;
-  paddle::framework::LoDTensor parent_idx;
-
-  size_t level = 0;
-  size_t beam_size = 2;
-  int end_id = 0;
-  paddle::operators::math::BeamSearchFunctor<DeviceContext, float> beamsearch;
-  beamsearch(*context,
-             &pre_ids,
-             &pre_scores,
-             &ids,
-             &scores,
-             &selected_ids,
-             &selected_scores,
-             &parent_idx,
-             level,
-             beam_size,
-             end_id,
-             true);
-
-  ASSERT_EQ(selected_ids.lod(), selected_scores.lod());
-
-  paddle::framework::LoDTensor cpu_selected_ids;
-  paddle::framework::LoDTensor cpu_selected_scores;
-  if (paddle::platform::is_cpu_place(*place)) {
-    cpu_selected_ids = selected_ids;
-    cpu_selected_scores = selected_scores;
-  } else {
-    TensorCopySync(
-        selected_ids, paddle::platform::CPUPlace(), &cpu_selected_ids);
-    TensorCopySync(
-        selected_scores, paddle::platform::CPUPlace(), &cpu_selected_scores);
-    cpu_selected_ids.set_lod(selected_ids.lod());
-    cpu_selected_scores.set_lod(selected_scores.lod());
-  }
-
-  std::vector<int64_t> expected_ids({4, 5, 3, 8});
-  std::vector<float> expected_scores({0.6f, 0.5f, 0.9f, 0.7f});
-  for (int i = 0; i < 4; i++) {
-    ASSERT_EQ(expected_ids[i], cpu_selected_ids.data<int64_t>()[i]);
-    ASSERT_EQ(expected_scores[i], cpu_selected_scores.data<float>()[i]);
-  }
-
-  delete place;
-  delete context;
-}
-
-TEST(BeamSearch, CPU) {
-  TestBeamSearch<paddle::platform::CPUDeviceContext,
-                 paddle::platform::CPUPlace>();
-}
-
-#ifdef PADDLE_WITH_CUDA
-TEST(BeamSearch, GPU) {
-  TestBeamSearch<paddle::platform::CUDADeviceContext,
-                 paddle::platform::CUDAPlace>();
-}
-#endif
diff --git a/lite/kernels/arm/CMakeLists.txt b/lite/kernels/arm/CMakeLists.txt
index 55270af93a5..28980a94f7d 100644
--- a/lite/kernels/arm/CMakeLists.txt
+++ b/lite/kernels/arm/CMakeLists.txt
@@ -98,7 +98,7 @@ add_kernel(lookup_table_dequant_compute_arm ARM extra SRCS lookup_table_dequant_
 add_kernel(sequence_softmax_compute_arm ARM extra SRCS sequence_softmax_compute.cc DEPS ${lite_kernel_deps} sequence_softmax_compute_host)
 add_kernel(topk_compute_arm ARM extra SRCS topk_compute.cc DEPS ${lite_kernel_deps} topk_compute_host)
 add_kernel(increment_compute_arm ARM extra SRCS increment_compute.cc DEPS ${lite_kernel_deps} increment_compute_host)
-add_kernel(beam_search_compute_arm ARM extra SRCS beam_search_compute.cc DEPS ${lite_kernel_deps} math_arm)
+add_kernel(beam_search_compute_arm ARM extra SRCS beam_search_compute.cc DEPS ${lite_kernel_deps} beam_search_compute_host)
 add_kernel(lod_reset_compute_arm ARM extra SRCS lod_reset_compute.cc DEPS ${lite_kernel_deps} lod_reset_compute_host)
 add_kernel(lstm_arm ARM extra SRCS lstm_compute.cc DEPS ${lite_kernel_deps} math_arm)
 
diff --git a/lite/kernels/arm/beam_search_compute.cc b/lite/kernels/arm/beam_search_compute.cc
index 437ba070b7e..9032734d70c 100644
--- a/lite/kernels/arm/beam_search_compute.cc
+++ b/lite/kernels/arm/beam_search_compute.cc
@@ -12,41 +12,13 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "lite/kernels/arm/beam_search_compute.h"
-#include "lite/backends/arm/math/funcs.h"
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace arm {
-
-void BeamSearchCompute::Run() {
-  auto& ctx = this->ctx_->template As<ARMContext>();
-  auto& param = this->Param<operators::BeamSearchParam>();
-  lite::arm::math::beam_search(param.pre_ids,
-                               param.pre_scores,
-                               param.ids,
-                               param.scores,
-                               param.selected_ids,
-                               param.selected_scores,
-                               param.parent_idx,
-                               param.level,
-                               param.beam_size,
-                               param.end_id,
-                               param.is_accumulated,
-                               &ctx);
-}
-
-}  // namespace arm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
+#include "lite/kernels/host/beam_search_compute.h"
 
 REGISTER_LITE_KERNEL(beam_search,
                      kARM,
                      kFloat,
                      kNCHW,
-                     paddle::lite::kernels::arm::BeamSearchCompute,
+                     paddle::lite::kernels::host::BeamSearchCompute,
                      def)
     .BindInput("pre_ids",
                {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt64))})
diff --git a/lite/kernels/host/CMakeLists.txt b/lite/kernels/host/CMakeLists.txt
index 528074f217d..6e0fa3888b2 100644
--- a/lite/kernels/host/CMakeLists.txt
+++ b/lite/kernels/host/CMakeLists.txt
@@ -73,6 +73,7 @@ add_kernel(topk_compute_host Host extra SRCS topk_compute.cc DEPS ${lite_kernel_
 add_kernel(topk_v2_compute_host Host extra SRCS topk_v2_compute.cc DEPS ${lite_kernel_deps})
 add_kernel(meshgrid_compute_host Host extra SRCS meshgrid_compute.cc DEPS ${lite_kernel_deps})
 add_kernel(linspace_compute_host Host extra SRCS linspace_compute.cc DEPS ${lite_kernel_deps})
+add_kernel(beam_search_compute_host Host extra SRCS beam_search_compute.cc DEPS ${lite_kernel_deps} math_host)
 add_kernel(beam_search_decode_compute_host Host extra SRCS beam_search_decode_compute.cc DEPS ${lite_kernel_deps})
 add_kernel(roi_perspective_transform_compute_host Host extra SRCS roi_perspective_transform_compute.cc DEPS ${lite_kernel_deps})
 add_kernel(lod_reset_compute_host Host extra SRCS lod_reset_compute.cc DEPS ${lite_kernel_deps})
diff --git a/lite/kernels/host/beam_search_compute.cc b/lite/kernels/host/beam_search_compute.cc
new file mode 100644
index 00000000000..3a48f09a7e4
--- /dev/null
+++ b/lite/kernels/host/beam_search_compute.cc
@@ -0,0 +1,62 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/host/beam_search_compute.h"
+#include "lite/backends/host/math/beam_search.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace host {
+
+void BeamSearchCompute::Run() {
+  auto& param = this->Param<operators::BeamSearchParam>();
+  lite::host::math::beam_search(param.pre_ids,
+                                param.pre_scores,
+                                param.ids,
+                                param.scores,
+                                param.selected_ids,
+                                param.selected_scores,
+                                param.parent_idx,
+                                param.level,
+                                param.beam_size,
+                                param.end_id,
+                                param.is_accumulated);
+}
+
+}  // namespace host
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_KERNEL(beam_search,
+                     kHost,
+                     kFloat,
+                     kNCHW,
+                     paddle::lite::kernels::host::BeamSearchCompute,
+                     def)
+    .BindInput("pre_ids",
+               {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt64))})
+    .BindInput("pre_scores",
+               {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kFloat))})
+    .BindInput("ids", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt64))})
+    .BindInput("scores",
+               {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kFloat))})
+    .BindOutput("selected_ids",
+                {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt64))})
+    .BindOutput("selected_scores",
+                {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kFloat))})
+    .BindOutput("parent_idx",
+                {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt32))})
+    .Finalize();
diff --git a/lite/kernels/arm/beam_search_compute.h b/lite/kernels/host/beam_search_compute.h
similarity index 80%
rename from lite/kernels/arm/beam_search_compute.h
rename to lite/kernels/host/beam_search_compute.h
index 854696e5b9f..c6ca518e4d4 100644
--- a/lite/kernels/arm/beam_search_compute.h
+++ b/lite/kernels/host/beam_search_compute.h
@@ -13,26 +13,24 @@
 // limitations under the License.
 
 #pragma once
-#include <stdint.h>
-#include "lite/backends/arm/math/type_trans.h"
 #include "lite/core/kernel.h"
 #include "lite/core/op_registry.h"
 
 namespace paddle {
 namespace lite {
 namespace kernels {
-namespace arm {
+namespace host {
 
-class BeamSearchCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
+class BeamSearchCompute : public KernelLite<TARGET(kHost), PRECISION(kFloat)> {
  public:
   void Run() override;
 
-  ~BeamSearchCompute() {}
+  virtual ~BeamSearchCompute() = default;
 
  private:
 };
 
-}  // namespace arm
+}  // namespace host
 }  // namespace kernels
 }  // namespace lite
 }  // namespace paddle