From f95d41bd257a09ee6778a3f42a59fa65b04e3c42 Mon Sep 17 00:00:00 2001 From: wbn520 <2565033196@qq.com> Date: Wed, 22 Jun 2022 19:10:17 +0800 Subject: [PATCH] fixed the bug of tile op in large input and add xpu implemention. --- lite/kernels/host/tile_compute.cc | 7 ++- lite/kernels/xpu/CMakeLists.txt | 1 + lite/kernels/xpu/tile_compute.cc | 78 +++++++++++++++++++++++++ lite/kernels/xpu/tile_compute.h | 36 ++++++++++++ lite/operators/tile_op.cc | 1 + lite/tests/kernels/tile_compute_test.cc | 3 + 6 files changed, 123 insertions(+), 3 deletions(-) create mode 100644 lite/kernels/xpu/tile_compute.cc create mode 100644 lite/kernels/xpu/tile_compute.h diff --git a/lite/kernels/host/tile_compute.cc b/lite/kernels/host/tile_compute.cc index b1a61aebc41..11d4d013cc6 100644 --- a/lite/kernels/host/tile_compute.cc +++ b/lite/kernels/host/tile_compute.cc @@ -85,9 +85,10 @@ void TileCompute::Run() { int dst_stride = in_stride[i + 1] * right; for (int m = 0; m < num; m++) { for (int j = 0; j < bcast_dims[i]; j++) { - std::memcpy(tmp_dst + j * dst_stride / bcast_dims[i] + m * dst_stride, - tmp_src + m * dst_stride / bcast_dims[i], - dst_stride / bcast_dims[i] * sizeof(T)); + std::memcpy( + tmp_dst + j * (dst_stride / bcast_dims[i]) + m * dst_stride, + tmp_src + m * (dst_stride / bcast_dims[i]), + dst_stride / bcast_dims[i] * sizeof(T)); } } tmp_src_tensor.CopyDataFrom(tmp_dst_tensor); diff --git a/lite/kernels/xpu/CMakeLists.txt b/lite/kernels/xpu/CMakeLists.txt index 29266862dea..1efed16ac05 100644 --- a/lite/kernels/xpu/CMakeLists.txt +++ b/lite/kernels/xpu/CMakeLists.txt @@ -30,6 +30,7 @@ add_kernel(gru_compute_xpu XPU basic SRCS gru_compute.cc) add_kernel(gru_unit_compute_xpu XPU basic SRCS gru_unit_compute.cc) add_kernel(stack_compute_xpu XPU basic SRCS stack_compute.cc) add_kernel(slice_compute_xpu XPU basic SRCS slice_compute.cc) +add_kernel(tile_compute_xpu XPU basic SRCS tile_compute.cc) add_kernel(cast_compute_xpu XPU basic SRCS cast_compute.cc) add_kernel(sequence_topk_avg_pooling_compute_xpu XPU basic SRCS sequence_topk_avg_pooling_compute.cc) add_kernel(concat_compute_xpu XPU basic SRCS concat_compute.cc) diff --git a/lite/kernels/xpu/tile_compute.cc b/lite/kernels/xpu/tile_compute.cc new file mode 100644 index 00000000000..79007b85dcb --- /dev/null +++ b/lite/kernels/xpu/tile_compute.cc @@ -0,0 +1,78 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/kernels/xpu/tile_compute.h" +#include +#include "lite/backends/xpu/xpu_header_sitter.h" +#include "lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace xpu { + +template +void TileCompute::Run() { + auto& param = this->template Param(); + auto& ctx = this->ctx_->template As(); + auto repeat_times = param.repeat_times; + if (param.RepeatTimes) { + auto repeat_times_size = param.RepeatTimes->data_size(); + for (int64_t i = 0; i < repeat_times_size; i++) { + repeat_times.push_back(param.RepeatTimes->template data()[i]); + } + } else if (param.repeat_times_tensor.size() != 0) { + for (int i = 0; i < param.repeat_times_tensor.size(); i++) { + auto temp = param.repeat_times_tensor[i]; + repeat_times.push_back(*(temp->template data())); + } + } + auto in_dims = param.X->dims(); + auto vec_in_dims = in_dims.Vectorize(); + // broadcast for vec_in_dims.size() equal to repeat_times.size() + if (repeat_times.size() < vec_in_dims.size()) { + int diff = vec_in_dims.size() - repeat_times.size(); + repeat_times.insert(repeat_times.begin(), diff, 1); + } else { + int diff = repeat_times.size() - vec_in_dims.size(); + vec_in_dims.insert(vec_in_dims.begin(), diff, 1); + } + + std::vector new_in_dims(vec_in_dims.begin(), vec_in_dims.end()); + std::vector out_dims(param.Out->dims().data().begin(), + param.Out->dims().data().end()); + int r = xdnn::broadcast(ctx.GetRawContext(), + param.X->template data(), + param.Out->template mutable_data(TARGET(kXPU)), + new_in_dims, + out_dims); + + CHECK_EQ(r, 0); +} + +} // namespace xpu +} // namespace kernels +} // namespace lite +} // namespace paddle + +using tile_float = + paddle::lite::kernels::xpu::TileCompute; +REGISTER_LITE_KERNEL(tile, kXPU, kFloat, kNCHW, tile_float, def) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))}) + .BindInput("RepeatTimes", + {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt32))}) + .BindInput("repeat_times_tensor", + {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt32))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))}) + .Finalize(); diff --git a/lite/kernels/xpu/tile_compute.h b/lite/kernels/xpu/tile_compute.h new file mode 100644 index 00000000000..9b6329fa17c --- /dev/null +++ b/lite/kernels/xpu/tile_compute.h @@ -0,0 +1,36 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "lite/core/kernel.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace xpu { + +template +class TileCompute : public KernelLite { + public: + using param_t = operators::TileParam; + + virtual void Run(); + + virtual ~TileCompute() = default; +}; + +} // namespace xpu +} // namespace kernels +} // namespace lite +} // namespace paddle diff --git a/lite/operators/tile_op.cc b/lite/operators/tile_op.cc index 042afa692df..45d3c74e5fe 100644 --- a/lite/operators/tile_op.cc +++ b/lite/operators/tile_op.cc @@ -118,6 +118,7 @@ bool TileOp::AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) { } else if (opdesc.HasInput("repeat_times_tensor") && (opdesc.Input("repeat_times_tensor").size() != 0)) { auto temp = opdesc.Input("repeat_times_tensor"); + param_.repeat_times_tensor.clear(); for (auto var : temp) { param_.repeat_times_tensor.push_back( scope->FindVar(var)->GetMutable()); diff --git a/lite/tests/kernels/tile_compute_test.cc b/lite/tests/kernels/tile_compute_test.cc index 5bf48aa880c..07e11039a12 100644 --- a/lite/tests/kernels/tile_compute_test.cc +++ b/lite/tests/kernels/tile_compute_test.cc @@ -199,6 +199,9 @@ TEST(tile, precision) { #else return; #endif +#elif defined(LITE_WITH_XPU) + place = TARGET(kXPU); + alias = "def"; #elif defined(LITE_WITH_ARM) || defined(LITE_WITH_X86) place = TARGET(kHost); #else