PaddlePaddle · zhupengyang · Jan 10, 2022 · Nov 15, 2021 · Nov 23, 2021 · Nov 23, 2021
@@ -42,6 +42,9 @@ void InstanceNormCompute::Run() {
   int c = param.x->dims()[1];
   int height = param.x->dims()[2];
   int width = param.x->dims()[3];
+  if (param.x->dims().size() == 5) {
+    width = param.x->dims()[3] * param.x->dims()[4];
+  }
 
   lite::x86::math::instance_norm(in,
                                  out,

@@ -19,6 +19,7 @@ else()
   # basic
   add_kernel(rnn_compute_xpu XPU basic SRCS rnn_compute.cc)
   add_kernel(conv_compute_xpu XPU basic SRCS conv_compute.cc)
+  add_kernel(conv3d_compute_xpu XPU basic SRCS conv3d_compute.cc)
   add_kernel(conv2d_transpose_compute_xpu XPU basic SRCS conv2d_transpose_compute.cc)
   add_kernel(calib_compute_xpu XPU basic SRCS calib_compute.cc)
   add_kernel(io_copy_compute_xpu XPU basic SRCS io_copy_compute.cc)

@@ -36,27 +36,102 @@ void Conv2dTransposeCompute<PRECISION(kFloat)>::Run() {
   auto paddings = *param.paddings;
   auto dilations = *param.dilations;
 
-  int ret = xdnn::conv2d_transpose<float, float, float, int16_t>(
-      ctx.GetRawContext(),
-      param.x->data<float>(),
-      param.filter->data<float>(),
-      param.output->mutable_data<float>(TARGET(kXPU)),
-      in_dims[0],
-      in_dims[1],
-      in_dims[2],
-      in_dims[3],
-      out_dims[1],
-      std::vector<int>{static_cast<int>(w_dims[2]),
-                       static_cast<int>(w_dims[3])},
-      strides,
-      paddings,
-      dilations,
-      groups,
-      nullptr,
-      nullptr,
-      nullptr,
-      true);
-  CHECK_EQ(ret, 0);
+  if (param.output_padding.empty()) {
+    int ret = xdnn::conv2d_transpose<float, float, float, int16_t>(
+        ctx.GetRawContext(),
+        param.x->data<float>(),
+        param.filter->data<float>(),
+        param.output->mutable_data<float>(TARGET(kXPU)),
+        in_dims[0],
+        in_dims[1],
+        in_dims[2],
+        in_dims[3],
+        out_dims[1],
+        std::vector<int>{static_cast<int>(w_dims[2]),
+                         static_cast<int>(w_dims[3])},
+        strides,
+        paddings,
+        dilations,
+        groups,
+        nullptr,
+        nullptr,
+        nullptr,
+        true);
+    CHECK_EQ(ret, 0);
+  } else {
+    int n = in_dims[0];
+    int yc = in_dims[1];
+    int yh = in_dims[2];
+    int yw = in_dims[3];
+    int xc = out_dims[1];
+    int xh = out_dims[2];
+    int xw = out_dims[3];
+    int kh = w_dims[2];
+    int kw = w_dims[3];
+    float* x_trans = nullptr;
+    XPU_CALL(xpu_malloc(reinterpret_cast<void**>(&x_trans),
+                        (param.x->numel()) * sizeof(float)));
+    float* x_col_before_concat = nullptr;
+    XPU_CALL(xpu_malloc(reinterpret_cast<void**>(&x_col_before_concat),
+                        (n * yh * yw * kh * kw * xc) * sizeof(float)));
+    float* x_col = nullptr;
+    XPU_CALL(xpu_malloc(reinterpret_cast<void**>(&x_col),
+                        (n * yh * yw * kh * kw * xc) * sizeof(float)));
+    const float* weight = param.filter->data<float>();
+    int ret = xdnn::transpose<float>(ctx.GetRawContext(),
+                                     param.x->data<float>(),
+                                     x_trans,
+                                     {n, groups, yc / groups, yh, yw},
+                                     {1, 0, 3, 4, 2});
+    CHECK_EQ(ret, 0);
+    for (int g = 0; g < groups; g++) {
+      const float* curr_y = x_trans + g * n * yh * yw * (yc / groups);
+      const float* curr_w =
+          weight + g * (yc / groups) * (xc / groups) * kh * kw;
+      float* curr_x =
+          x_col_before_concat + g * n * yh * yw * (xc / groups) * kh * kw;
+      int mac_m = n * yh * yw;
+      int mac_k = yc / groups;
+      int mac_n = xc / groups * kh * kw;
+      ret = xdnn::fc<float, float, float, int16_t>(ctx.GetRawContext(),
+                                                   curr_y,
+                                                   curr_w,
+                                                   curr_x,
+                                                   mac_m,
+                                                   mac_n,
+                                                   mac_k,
+                                                   false,
+                                                   false,
+                                                   nullptr,
+                                                   nullptr,
+                                                   nullptr);
+      CHECK_EQ(ret, 0);
+    }
+    ret = xdnn::transpose<float>(ctx.GetRawContext(),
+                                 x_col_before_concat,
+                                 x_col,
+                                 {groups, n * yh * yw, (xc / groups) * kh * kw},
+                                 {1, 0, 2});
+    CHECK_EQ(ret, 0);
+
+    ret = xdnn::col2im<float>(ctx.GetRawContext(),
+                              x_col,
+                              param.output->mutable_data<float>(TARGET(kXPU)),
+                              n,
+                              xc,
+                              xh,
+                              xw,
+                              std::vector<int>{static_cast<int>(w_dims[2]),
+                                               static_cast<int>(w_dims[3])},
+                              strides,
+                              paddings,
+                              dilations,
+                              true);
+    CHECK_EQ(ret, 0);
+    xpu_free(x_trans);
+    xpu_free(x_col_before_concat);
+    xpu_free(x_col);
+  }
 }
 
 }  // namespace xpu

@@ -0,0 +1,76 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/xpu/conv3d_compute.h"
+#include <vector>
+#include "lite/backends/xpu/xpu_header_sitter.h"
+#include "lite/core/op_registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace xpu {
+
+template <>
+void Conv3DCompute<PRECISION(kFloat)>::Run() {
+  auto& param = this->Param<param_t>();
+  auto& ctx = this->ctx_->As<XPUContext>();
+
+  auto& x_dims = param.x->dims();
+  auto& w_dims = param.filter->dims();
+  int groups = param.groups;
+  auto& strides = param.strides;
+  auto paddings = *param.paddings;
+  auto dilations = *param.dilations;
+
+  int r = xdnn::conv3d<float, float, float, int16_t>(
+      ctx.GetRawContext(), /* context */
+      param.x->data<float>(),
+      param.filter->data<float>(), /* weight */
+      param.output->mutable_data<float>(TARGET(kXPU)),
+      x_dims[0], /* input_n */
+      x_dims[1], /* input_c */
+      x_dims[2], /* input_d */
+      x_dims[3], /* input_h */
+      x_dims[4], /* input_w */
+      w_dims[0], /* num_filter */
+      std::vector<int>{static_cast<int>(w_dims[2]),
+                       static_cast<int>(w_dims[3]),
+                       static_cast<int>(w_dims[4])}, /* kernel size*/
+      strides,
+      paddings,
+      dilations,
+      groups,
+      nullptr,
+      nullptr,
+      nullptr,
+      true /*is_ncdhw*/);
+  CHECK_EQ(r, 0);
+}
+
+}  // namespace xpu
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+namespace xpu = paddle::lite::kernels::xpu;
+using Conv3dFp32 = xpu::Conv3DCompute<PRECISION(kFloat)>;
+
+REGISTER_LITE_KERNEL(conv3d, kXPU, kFloat, kNCHW, Conv3dFp32, def)
+    .BindInput("Input", {LiteType::GetTensorTy(TARGET(kXPU))})
+    .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kXPU))})
+    .BindInput("Filter", {LiteType::GetTensorTy(TARGET(kXPU))})
+    .BindOutput("Output", {LiteType::GetTensorTy(TARGET(kXPU))})
+    .BindPaddleOpVersion("conv3d", 1)
+    .Finalize();
@@ -0,0 +1,37 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "lite/core/kernel.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace xpu {
+
+template <PrecisionType FilterPtype>
+class Conv3DCompute : public KernelLite<TARGET(kXPU), FilterPtype> {
+ public:
+  using param_t = operators::Conv3DParam;
+
+  virtual void Run();
+
+  virtual ~Conv3DCompute() = default;
+};
+
+}  // namespace xpu
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
@@ -25,12 +25,32 @@ void InstanceNormCompute::Run() {
   auto& param = this->Param<param_t>();
   auto& ctx = this->ctx_->As<XPUContext>();
   auto x_dims = param.x->dims();
-  CHECK_EQ(x_dims.size(), 4);
+  bool x_dims_support = (x_dims.size() == 4 || x_dims.size() == 5);
+  CHECK_EQ(x_dims_support, true);
+
   int n = x_dims[0];
   int c = x_dims[1];
   int h = x_dims[2];
   int w = x_dims[3];
+  if (x_dims.size() == 5) {
+    h = x_dims[2] * x_dims[3];
+    w = x_dims[4];
+  }
 
+  float* xpu_scale = nullptr;
+  if (param.scale == nullptr) {
+    XPU_CALL(
+        xpu_malloc(reinterpret_cast<void**>(&xpu_scale), c * sizeof(float)));
+    int ret = xdnn::constant<float>(ctx.GetRawContext(), xpu_scale, c, 1.0f);
+    CHECK_EQ(ret, 0);
+  }
+  float* xpu_bias = nullptr;
+  if (param.bias == nullptr) {
+    XPU_CALL(
+        xpu_malloc(reinterpret_cast<void**>(&xpu_bias), c * sizeof(float)));
+    int ret = xdnn::constant<float>(ctx.GetRawContext(), xpu_bias, c, 0.0f);
+    CHECK_EQ(ret, 0);
+  }
   int ret = xdnn::instance_norm<float>(
       ctx.GetRawContext(),
       param.x->data<float>(),
@@ -40,12 +60,15 @@ void InstanceNormCompute::Run() {
       h,
       w,
       param.epsilon,
-      param.scale->data<float>(),
-      param.bias->data<float>(),
+      (param.scale == nullptr)
+          ? xpu_scale
+          : param.scale->data<float>(),  // param.scale->data<float>(),
+      (param.bias == nullptr)
+          ? xpu_bias
+          : param.bias->data<float>(),  // param.bias->data<float>(),
       param.saved_mean->mutable_data<float>(TARGET(kXPU)),
       param.saved_variance->mutable_data<float>(TARGET(kXPU)),
       true);
-
   CHECK_EQ(ret, 0);
 }
 

diff --git a/lite/operators/CMakeLists.txt b/lite/operators/CMakeLists.txt
@@ -2,6 +2,7 @@ set(op_DEPS core op_params)
 
 # 1.basic ops used in basic models
 add_operator(conv_op basic SRCS conv_op.cc)
+add_operator(conv3d_op basic SRCS conv3d_op.cc)
 add_operator(pool_op basic SRCS pool_op.cc)
 add_operator(fc_op basic SRCS fc_op.cc)
 add_operator(mul_op basic SRCS mul_op.cc)