support search gan model. 1.add pixel_unshuffle support &2.enable fil…

…l_constant calc offline on arm and opencl & 3.enable reshape_calc_offline_pass on arm and opencl (#10537) * support search gan model. 1. add pixel_unshuffle support 2. enable fill_constant calc offline on arm and opencl 3. enable reshape_calc_offline_pass on arm and opencl 4. use chinese comments 5. add test for new kernel. test=develop * support search gan model. 1. add pixel_unshuffle support 2. enable fill_constant calc offline on arm and opencl 3. enable reshape_calc_offline_pass on arm and opencl 4. use chinese comments 5. add test for new kernel. test=develop * support search gan model. 1. add pixel_unshuffle support 2. enable fill_constant calc offline on arm and opencl 3. enable reshape_calc_offline_pass on arm and opencl 4. use chinese comments 5. add test for new kernel. 6. fix metal pre-commit test=develop
PaddlePaddle · Aug 9, 2024 · 1d0284e · 1d0284e
1 parent a903554
commit 1d0284e
Show file tree

Hide file tree

Showing 12 changed files with 391 additions and 5 deletions.
diff --git a/lite/backends/metal/metal_kernel/texture/ConcatKernel.metal b/lite/backends/metal/metal_kernel/texture/ConcatKernel.metal
@@ -84,9 +84,12 @@ kernel void concat_normal(texture2d_array<ftype, access::read> inx[[texture(0)]]
     out.write(r, gid.xy, gid.z);
 }
 
-// Metal shader compiler automatically enables compilation optimization to enhance the shader’s computing performance.
-// However, the latest version of the Metal shader compiler may cause out-of-bounds reads of `vdim` or `inTexture` in the function below.
-// Therefore, compilation optimization is disabled for this function (marked with `__attribute__((optnone))`).
+// Metal shader compiler automatically enables compilation optimization to enhance the shader’s
+// computing performance.
+// However, the latest version of the Metal shader compiler may cause out-of-bounds reads of `vdim`
+// or `inTexture` in the function below.
+// Therefore, compilation optimization is disabled for this function (marked with
+// `__attribute__((optnone))`).
 __attribute__((optnone)) kernel void concat(texture2d_array<ftype, access::write> out[[texture(0)]],
     texture2d_array<ftype, access::read> in0[[texture(1)]],
     texture2d_array<ftype, access::read> in1[[texture(2)]],

diff --git a/lite/core/optimizer/mir/elimination/fill_constant_calc_offline_pass.cc b/lite/core/optimizer/mir/elimination/fill_constant_calc_offline_pass.cc
@@ -160,4 +160,4 @@ void FillConstantCalcOfflinePass::RemoveFillConstantPattern(
 
 REGISTER_MIR_PASS(fill_constant_calc_offline_pass,
                   paddle::lite::mir::FillConstantCalcOfflinePass)
-    .BindTargets({TARGET(kNNAdapter)});
+    .BindTargets({TARGET(kNNAdapter), TARGET(kARM), TARGET(kOpenCL)});
diff --git a/lite/core/optimizer/mir/elimination/reshape_calc_offline_pass.cc b/lite/core/optimizer/mir/elimination/reshape_calc_offline_pass.cc
@@ -158,4 +158,4 @@ void ReshapeCalcOfflinePass::RemoveReshapePattern(
 
 REGISTER_MIR_PASS(reshape_calc_offline_pass,
                   paddle::lite::mir::ReshapeCalcOfflinePass)
-    .BindTargets({TARGET(kNNAdapter)});
+    .BindTargets({TARGET(kNNAdapter), TARGET(kARM), TARGET(kOpenCL)});
diff --git a/lite/kernels/host/CMakeLists.txt b/lite/kernels/host/CMakeLists.txt
@@ -125,6 +125,7 @@ add_kernel(temporal_shift_compute_host Host extra SRCS temporal_shift_compute.cc
 add_kernel(pad_compute_host Host extra SRCS pad_compute.cc)
 add_kernel(bitwise_compute_host Host extra SRCS bitwise_compute.cc)
 add_kernel(empty_compute_host Host extra SRCS empty_compute.cc)
+add_kernel(pixel_unshuffle_compute_arm Host extra SRCS pixel_unshuffle_compute.cc)
 
 
 if(LITE_BUILD_EXTRA AND LITE_WITH_x86)

diff --git a/lite/kernels/host/pixel_unshuffle_compute.cc b/lite/kernels/host/pixel_unshuffle_compute.cc
@@ -0,0 +1,70 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/host/pixel_unshuffle_compute.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace host {
+
+void PixelUnShuffleCompute::Run() {
+  auto& param = Param<operators::PixelUnShuffleParam>();
+
+  const float* x_data = param.x->data<float>();
+  float* output_data = param.output->mutable_data<float>();
+  int downscale_factor = param.downscale_factor;
+
+  int batch_size = param.x->dims()[0];
+  int in_channels = param.x->dims()[1];
+  int height = param.x->dims()[2];
+  int width = param.x->dims()[3];
+  int out_channels = param.output->dims()[1];
+  int out_height = param.output->dims()[2];
+  int out_width = param.output->dims()[3];
+
+  for (int b = 0; b < batch_size; ++b) {
+    for (int c = 0; c < in_channels; ++c) {
+      for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+          int out_c = c * downscale_factor * downscale_factor +
+                      (y % downscale_factor) * downscale_factor +
+                      (x % downscale_factor);
+          int out_y = y / downscale_factor;
+          int out_x = x / downscale_factor;
+          int in_index = ((b * in_channels + c) * height + y) * width + x;
+          int out_index =
+              ((b * out_channels + out_c) * out_height + out_y) * out_width +
+              out_x;
+          output_data[out_index] = x_data[in_index];
+        }
+      }
+    }
+  }
+}
+
+}  // namespace host
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_KERNEL(pixel_unshuffle,
+                     kHost,
+                     kFloat,
+                     kNCHW,
+                     paddle::lite::kernels::host::PixelUnShuffleCompute,
+                     def)
+    .BindInput("X", {LiteType::GetTensorTy(TARGET(kHost))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost))})
+    .Finalize();
diff --git a/lite/kernels/host/pixel_unshuffle_compute.h b/lite/kernels/host/pixel_unshuffle_compute.h
@@ -0,0 +1,38 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <algorithm>
+#include "lite/core/kernel.h"
+#include "lite/core/op_registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace host {
+
+class PixelUnShuffleCompute
+    : public KernelLite<TARGET(kHost), PRECISION(kFloat)> {
+ public:
+  using param_t = operators::PixelUnShuffleParam;
+
+  void Run() override;
+
+  virtual ~PixelUnShuffleCompute() = default;
+};
+
+}  // namespace host
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
diff --git a/lite/operators/CMakeLists.txt b/lite/operators/CMakeLists.txt
@@ -140,6 +140,7 @@ add_operator(sampling_id_op_lite extra SRCS sampling_id_op.cc)
 add_operator(polygon_box_transform_op_lite extra SRCS polygon_box_transform_op.cc)
 add_operator(max_pool_with_index_op extra SRCS max_pool_with_index_op.cc)
 add_operator(pixel_shuffle_op extra SRCS pixel_shuffle_op.cc)
+add_operator(pixel_unshuffle_op extra SRCS pixel_unshuffle_op.cc)
 add_operator(clip_op extra SRCS clip_op.cc)
 add_operator(print_op extra SRCS print_op.cc)
 add_operator(scatter extra SRCS scatter_op.cc)

diff --git a/lite/operators/op_params.h b/lite/operators/op_params.h
@@ -2291,6 +2291,12 @@ struct PixelShuffleParam : ParamBase {
   int upscale_factor{1};
 };
 
+struct PixelUnShuffleParam : ParamBase {
+  lite::Tensor* x{nullptr};
+  lite::Tensor* output{nullptr};
+  int downscale_factor{1};
+};
+
 struct RetinanetDetectionOutputParam : ParamBase {
   std::vector<Tensor*> bboxes{};
   std::vector<Tensor*> scores{};

diff --git a/lite/operators/pixel_unshuffle_op.cc b/lite/operators/pixel_unshuffle_op.cc
@@ -0,0 +1,85 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/operators/pixel_unshuffle_op.h"
+#include "lite/core/op_registry.h"
+
+namespace paddle {
+namespace lite {
+namespace operators {
+
+bool PixelUnShuffleOpLite::CheckShape() const {
+  CHECK_OR_FALSE(param_.x);
+  CHECK_OR_FALSE(param_.output);
+  CHECK_OR_FALSE(param_.downscale_factor > 0);
+
+  const auto x_dims = param_.x->dims();
+  const auto downscale_factor = param_.downscale_factor;
+
+  // check input tensor dims size
+  CHECK_EQ_OR_FALSE(x_dims.size(), 4);
+
+  // check if the height and width can be divided by downscale_factor
+  CHECK_EQ_OR_FALSE(x_dims[2] % downscale_factor, 0);
+  CHECK_EQ_OR_FALSE(x_dims[3] % downscale_factor, 0);
+
+  return true;
+}
+
+bool PixelUnShuffleOpLite::InferShapeImpl() const {
+  const auto x_dims = param_.x->dims();
+  const auto downscale_factor = param_.downscale_factor;
+
+  // input tensor dims
+  int N = x_dims[0];
+  int C = x_dims[1];
+  int H = x_dims[2];
+  int W = x_dims[3];
+
+  // output tensor dims
+  int out_C = C * (downscale_factor * downscale_factor);
+  int out_H = H / downscale_factor;
+  int out_W = W / downscale_factor;
+
+  // make sure the output height and width can be divided by downscale_factor
+  if (H % downscale_factor != 0 || W % downscale_factor != 0) {
+    return false;
+  }
+
+  DDim output_dims({N, out_C, out_H, out_W});
+  param_.output->Resize(output_dims);
+  return true;
+}
+
+bool PixelUnShuffleOpLite::AttachImpl(const cpp::OpDesc& opdesc,
+                                      lite::Scope* scope) {
+  auto input = opdesc.Input("X").front();
+  auto out = opdesc.Output("Out").front();
+
+  param_.x = scope->FindVar(input)->GetMutable<lite::Tensor>();
+  param_.output = scope->FindVar(out)->GetMutable<lite::Tensor>();
+
+  if (opdesc.HasAttr("downscale_factor")) {
+    param_.downscale_factor = opdesc.GetAttr<int>("downscale_factor");
+  }
+
+  return true;
+}
+
+}  // namespace operators
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_OP(pixel_unshuffle,
+                 paddle::lite::operators::PixelUnShuffleOpLite);
diff --git a/lite/operators/pixel_unshuffle_op.h b/lite/operators/pixel_unshuffle_op.h
@@ -0,0 +1,57 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include "lite/core/op_lite.h"
+
+namespace paddle {
+namespace lite {
+namespace operators {
+
+class PixelUnShuffleOpLite : public OpLite {
+ public:
+  PixelUnShuffleOpLite() {}
+  explicit PixelUnShuffleOpLite(const std::string &op_type) : OpLite(op_type) {}
+
+  bool CheckShape() const override;
+
+  bool InferShapeImpl() const override;
+
+  bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override;
+
+  void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
+  std::string DebugString() const override { return "pixel_unshuffle"; }
+
+#ifdef LITE_WITH_PROFILE
+  void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
+    auto input_dims = param_.x->dims();
+    auto output_dims = param_.output->dims();
+    ch->input_shape = ch->DimToStr(input_dims);
+    ch->output_shape = ch->DimToStr(output_dims);
+    ch->remark = "downscale_factor" + std::to_string(param_.downscale_factor);
+
+    ch->macs = 1;
+  }
+#endif
+
+ private:
+  mutable PixelUnShuffleParam param_;
+};
+
+}  // namespace operators
+}  // namespace lite
+}  // namespace paddle
diff --git a/lite/tests/kernels/CMakeLists.txt b/lite/tests/kernels/CMakeLists.txt
@@ -132,6 +132,7 @@ if(LITE_BUILD_EXTRA)
     lite_cc_test(test_kernel_where_compute SRCS where_compute_test.cc)
     lite_cc_test(test_kernel_log_softmax_compute SRCS log_softmax_compute_test.cc)
     lite_cc_test(test_kernel_roll_compute SRCS roll_compute_test.cc)
+    lite_cc_test(test_kernel_pixelunshuffle_compute SRCS pixel_unshuffle_compute_test.cc)
     # lite_cc_test(test_kernel_tensor_array_to_tensor_compute SRCS tensor_array_to_tensor_compute_test.cc)
     # TODO: fix tensor_array_to_tensor unittest