Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[XPU] add conv3d, fix instance_norm, fix conv2d_transpose, test=develop, test=xpu #7642

Merged
merged 10 commits into from
Jan 10, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions lite/kernels/x86/instance_norm_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ void InstanceNormCompute::Run() {
int c = param.x->dims()[1];
int height = param.x->dims()[2];
int width = param.x->dims()[3];
if (param.x->dims().size() == 5) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这部分@chenjiao review下吧

width = param.x->dims()[3] * param.x->dims()[4];
}

lite::x86::math::instance_norm(in,
out,
Expand Down
1 change: 1 addition & 0 deletions lite/kernels/xpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ else()
# basic
add_kernel(rnn_compute_xpu XPU basic SRCS rnn_compute.cc)
add_kernel(conv_compute_xpu XPU basic SRCS conv_compute.cc)
add_kernel(conv3d_compute_xpu XPU basic SRCS conv3d_compute.cc)
add_kernel(conv2d_transpose_compute_xpu XPU basic SRCS conv2d_transpose_compute.cc)
add_kernel(calib_compute_xpu XPU basic SRCS calib_compute.cc)
add_kernel(io_copy_compute_xpu XPU basic SRCS io_copy_compute.cc)
Expand Down
117 changes: 96 additions & 21 deletions lite/kernels/xpu/conv2d_transpose_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,27 +36,102 @@ void Conv2dTransposeCompute<PRECISION(kFloat)>::Run() {
auto paddings = *param.paddings;
auto dilations = *param.dilations;

int ret = xdnn::conv2d_transpose<float, float, float, int16_t>(
ctx.GetRawContext(),
param.x->data<float>(),
param.filter->data<float>(),
param.output->mutable_data<float>(TARGET(kXPU)),
in_dims[0],
in_dims[1],
in_dims[2],
in_dims[3],
out_dims[1],
std::vector<int>{static_cast<int>(w_dims[2]),
static_cast<int>(w_dims[3])},
strides,
paddings,
dilations,
groups,
nullptr,
nullptr,
nullptr,
true);
CHECK_EQ(ret, 0);
if (param.output_padding.empty()) {
int ret = xdnn::conv2d_transpose<float, float, float, int16_t>(
ctx.GetRawContext(),
param.x->data<float>(),
param.filter->data<float>(),
param.output->mutable_data<float>(TARGET(kXPU)),
in_dims[0],
in_dims[1],
in_dims[2],
in_dims[3],
out_dims[1],
std::vector<int>{static_cast<int>(w_dims[2]),
static_cast<int>(w_dims[3])},
strides,
paddings,
dilations,
groups,
nullptr,
nullptr,
nullptr,
true);
CHECK_EQ(ret, 0);
} else {
int n = in_dims[0];
int yc = in_dims[1];
int yh = in_dims[2];
int yw = in_dims[3];
int xc = out_dims[1];
int xh = out_dims[2];
int xw = out_dims[3];
int kh = w_dims[2];
int kw = w_dims[3];
float* x_trans = nullptr;
XPU_CALL(xpu_malloc(reinterpret_cast<void**>(&x_trans),
(param.x->numel()) * sizeof(float)));
float* x_col_before_concat = nullptr;
XPU_CALL(xpu_malloc(reinterpret_cast<void**>(&x_col_before_concat),
(n * yh * yw * kh * kw * xc) * sizeof(float)));
float* x_col = nullptr;
XPU_CALL(xpu_malloc(reinterpret_cast<void**>(&x_col),
(n * yh * yw * kh * kw * xc) * sizeof(float)));
const float* weight = param.filter->data<float>();
int ret = xdnn::transpose<float>(ctx.GetRawContext(),
param.x->data<float>(),
x_trans,
{n, groups, yc / groups, yh, yw},
{1, 0, 3, 4, 2});
CHECK_EQ(ret, 0);
for (int g = 0; g < groups; g++) {
const float* curr_y = x_trans + g * n * yh * yw * (yc / groups);
const float* curr_w =
weight + g * (yc / groups) * (xc / groups) * kh * kw;
float* curr_x =
x_col_before_concat + g * n * yh * yw * (xc / groups) * kh * kw;
int mac_m = n * yh * yw;
int mac_k = yc / groups;
int mac_n = xc / groups * kh * kw;
ret = xdnn::fc<float, float, float, int16_t>(ctx.GetRawContext(),
curr_y,
curr_w,
curr_x,
mac_m,
mac_n,
mac_k,
false,
false,
nullptr,
nullptr,
nullptr);
CHECK_EQ(ret, 0);
}
ret = xdnn::transpose<float>(ctx.GetRawContext(),
x_col_before_concat,
x_col,
{groups, n * yh * yw, (xc / groups) * kh * kw},
{1, 0, 2});
CHECK_EQ(ret, 0);

ret = xdnn::col2im<float>(ctx.GetRawContext(),
x_col,
param.output->mutable_data<float>(TARGET(kXPU)),
n,
xc,
xh,
xw,
std::vector<int>{static_cast<int>(w_dims[2]),
static_cast<int>(w_dims[3])},
strides,
paddings,
dilations,
true);
CHECK_EQ(ret, 0);
xpu_free(x_trans);
xpu_free(x_col_before_concat);
xpu_free(x_col);
}
}

} // namespace xpu
Expand Down
76 changes: 76 additions & 0 deletions lite/kernels/xpu/conv3d_compute.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "lite/kernels/xpu/conv3d_compute.h"
#include <vector>
#include "lite/backends/xpu/xpu_header_sitter.h"
#include "lite/core/op_registry.h"

namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {

template <>
void Conv3DCompute<PRECISION(kFloat)>::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();

auto& x_dims = param.x->dims();
auto& w_dims = param.filter->dims();
int groups = param.groups;
auto& strides = param.strides;
auto paddings = *param.paddings;
auto dilations = *param.dilations;

int r = xdnn::conv3d<float, float, float, int16_t>(
ctx.GetRawContext(), /* context */
param.x->data<float>(),
param.filter->data<float>(), /* weight */
param.output->mutable_data<float>(TARGET(kXPU)),
x_dims[0], /* input_n */
x_dims[1], /* input_c */
x_dims[2], /* input_d */
x_dims[3], /* input_h */
x_dims[4], /* input_w */
w_dims[0], /* num_filter */
std::vector<int>{static_cast<int>(w_dims[2]),
static_cast<int>(w_dims[3]),
static_cast<int>(w_dims[4])}, /* kernel size*/
strides,
paddings,
dilations,
groups,
nullptr,
nullptr,
nullptr,
true /*is_ncdhw*/);
CHECK_EQ(r, 0);
}

} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle

namespace xpu = paddle::lite::kernels::xpu;
using Conv3dFp32 = xpu::Conv3DCompute<PRECISION(kFloat)>;

REGISTER_LITE_KERNEL(conv3d, kXPU, kFloat, kNCHW, Conv3dFp32, def)
.BindInput("Input", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Bias", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Filter", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Output", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindPaddleOpVersion("conv3d", 1)
.Finalize();
37 changes: 37 additions & 0 deletions lite/kernels/xpu/conv3d_compute.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "lite/core/kernel.h"

namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {

template <PrecisionType FilterPtype>
class Conv3DCompute : public KernelLite<TARGET(kXPU), FilterPtype> {
public:
using param_t = operators::Conv3DParam;

virtual void Run();

virtual ~Conv3DCompute() = default;
};

} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
31 changes: 27 additions & 4 deletions lite/kernels/xpu/instance_norm_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,32 @@ void InstanceNormCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();
auto x_dims = param.x->dims();
CHECK_EQ(x_dims.size(), 4);
bool x_dims_support = (x_dims.size() == 4 || x_dims.size() == 5);
CHECK_EQ(x_dims_support, true);
wangleilei001 marked this conversation as resolved.
Show resolved Hide resolved

int n = x_dims[0];
int c = x_dims[1];
int h = x_dims[2];
int w = x_dims[3];
if (x_dims.size() == 5) {
h = x_dims[2] * x_dims[3];
w = x_dims[4];
}

float* xpu_scale = nullptr;
zhupengyang marked this conversation as resolved.
Show resolved Hide resolved
if (param.scale == nullptr) {
XPU_CALL(
xpu_malloc(reinterpret_cast<void**>(&xpu_scale), c * sizeof(float)));
int ret = xdnn::constant<float>(ctx.GetRawContext(), xpu_scale, c, 1.0f);
CHECK_EQ(ret, 0);
}
float* xpu_bias = nullptr;
if (param.bias == nullptr) {
XPU_CALL(
xpu_malloc(reinterpret_cast<void**>(&xpu_bias), c * sizeof(float)));
int ret = xdnn::constant<float>(ctx.GetRawContext(), xpu_bias, c, 0.0f);
CHECK_EQ(ret, 0);
}
int ret = xdnn::instance_norm<float>(
ctx.GetRawContext(),
param.x->data<float>(),
Expand All @@ -40,12 +60,15 @@ void InstanceNormCompute::Run() {
h,
w,
param.epsilon,
param.scale->data<float>(),
param.bias->data<float>(),
(param.scale == nullptr)
? xpu_scale
: param.scale->data<float>(), // param.scale->data<float>(),
wangleilei001 marked this conversation as resolved.
Show resolved Hide resolved
(param.bias == nullptr)
? xpu_bias
: param.bias->data<float>(), // param.bias->data<float>(),
param.saved_mean->mutable_data<float>(TARGET(kXPU)),
param.saved_variance->mutable_data<float>(TARGET(kXPU)),
true);

CHECK_EQ(ret, 0);
}

Expand Down
1 change: 1 addition & 0 deletions lite/operators/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ set(op_DEPS core op_params)

# 1.basic ops used in basic models
add_operator(conv_op basic SRCS conv_op.cc)
add_operator(conv3d_op basic SRCS conv3d_op.cc)
add_operator(pool_op basic SRCS pool_op.cc)
add_operator(fc_op basic SRCS fc_op.cc)
add_operator(mul_op basic SRCS mul_op.cc)
Expand Down
Loading