Skip to content

Commit

Permalink
support search gan model. 1.add pixel_unshuffle support &2.enable fil…
Browse files Browse the repository at this point in the history
…l_constant calc offline on arm and opencl & 3.enable reshape_calc_offline_pass on arm and opencl (#10537)

* support search gan model.
1. add pixel_unshuffle support
2. enable fill_constant calc offline on arm and opencl
3. enable reshape_calc_offline_pass on arm and opencl
4. use chinese comments
5. add test for new kernel.
test=develop

* support search gan model.
1. add pixel_unshuffle support
2. enable fill_constant calc offline on arm and opencl
3. enable reshape_calc_offline_pass on arm and opencl
4. use chinese comments
5. add test for new kernel.
test=develop

* support search gan model.
1. add pixel_unshuffle support
2. enable fill_constant calc offline on arm and opencl
3. enable reshape_calc_offline_pass on arm and opencl
4. use chinese comments
5. add test for new kernel.
6. fix metal pre-commit
test=develop
  • Loading branch information
xiebaiyuan authored Aug 9, 2024
1 parent a903554 commit 1d0284e
Show file tree
Hide file tree
Showing 12 changed files with 391 additions and 5 deletions.
9 changes: 6 additions & 3 deletions lite/backends/metal/metal_kernel/texture/ConcatKernel.metal
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,12 @@ kernel void concat_normal(texture2d_array<ftype, access::read> inx[[texture(0)]]
out.write(r, gid.xy, gid.z);
}

// Metal shader compiler automatically enables compilation optimization to enhance the shader’s computing performance.
// However, the latest version of the Metal shader compiler may cause out-of-bounds reads of `vdim` or `inTexture` in the function below.
// Therefore, compilation optimization is disabled for this function (marked with `__attribute__((optnone))`).
// Metal shader compiler automatically enables compilation optimization to enhance the shader’s
// computing performance.
// However, the latest version of the Metal shader compiler may cause out-of-bounds reads of `vdim`
// or `inTexture` in the function below.
// Therefore, compilation optimization is disabled for this function (marked with
// `__attribute__((optnone))`).
__attribute__((optnone)) kernel void concat(texture2d_array<ftype, access::write> out[[texture(0)]],
texture2d_array<ftype, access::read> in0[[texture(1)]],
texture2d_array<ftype, access::read> in1[[texture(2)]],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,4 +160,4 @@ void FillConstantCalcOfflinePass::RemoveFillConstantPattern(

REGISTER_MIR_PASS(fill_constant_calc_offline_pass,
paddle::lite::mir::FillConstantCalcOfflinePass)
.BindTargets({TARGET(kNNAdapter)});
.BindTargets({TARGET(kNNAdapter), TARGET(kARM), TARGET(kOpenCL)});
Original file line number Diff line number Diff line change
Expand Up @@ -158,4 +158,4 @@ void ReshapeCalcOfflinePass::RemoveReshapePattern(

REGISTER_MIR_PASS(reshape_calc_offline_pass,
paddle::lite::mir::ReshapeCalcOfflinePass)
.BindTargets({TARGET(kNNAdapter)});
.BindTargets({TARGET(kNNAdapter), TARGET(kARM), TARGET(kOpenCL)});
1 change: 1 addition & 0 deletions lite/kernels/host/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ add_kernel(temporal_shift_compute_host Host extra SRCS temporal_shift_compute.cc
add_kernel(pad_compute_host Host extra SRCS pad_compute.cc)
add_kernel(bitwise_compute_host Host extra SRCS bitwise_compute.cc)
add_kernel(empty_compute_host Host extra SRCS empty_compute.cc)
add_kernel(pixel_unshuffle_compute_arm Host extra SRCS pixel_unshuffle_compute.cc)


if(LITE_BUILD_EXTRA AND LITE_WITH_x86)
Expand Down
70 changes: 70 additions & 0 deletions lite/kernels/host/pixel_unshuffle_compute.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "lite/kernels/host/pixel_unshuffle_compute.h"

namespace paddle {
namespace lite {
namespace kernels {
namespace host {

void PixelUnShuffleCompute::Run() {
auto& param = Param<operators::PixelUnShuffleParam>();

const float* x_data = param.x->data<float>();
float* output_data = param.output->mutable_data<float>();
int downscale_factor = param.downscale_factor;

int batch_size = param.x->dims()[0];
int in_channels = param.x->dims()[1];
int height = param.x->dims()[2];
int width = param.x->dims()[3];
int out_channels = param.output->dims()[1];
int out_height = param.output->dims()[2];
int out_width = param.output->dims()[3];

for (int b = 0; b < batch_size; ++b) {
for (int c = 0; c < in_channels; ++c) {
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
int out_c = c * downscale_factor * downscale_factor +
(y % downscale_factor) * downscale_factor +
(x % downscale_factor);
int out_y = y / downscale_factor;
int out_x = x / downscale_factor;
int in_index = ((b * in_channels + c) * height + y) * width + x;
int out_index =
((b * out_channels + out_c) * out_height + out_y) * out_width +
out_x;
output_data[out_index] = x_data[in_index];
}
}
}
}
}

} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle

REGISTER_LITE_KERNEL(pixel_unshuffle,
kHost,
kFloat,
kNCHW,
paddle::lite::kernels::host::PixelUnShuffleCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kHost))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost))})
.Finalize();
38 changes: 38 additions & 0 deletions lite/kernels/host/pixel_unshuffle_compute.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
#include <algorithm>
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"

namespace paddle {
namespace lite {
namespace kernels {
namespace host {

class PixelUnShuffleCompute
: public KernelLite<TARGET(kHost), PRECISION(kFloat)> {
public:
using param_t = operators::PixelUnShuffleParam;

void Run() override;

virtual ~PixelUnShuffleCompute() = default;
};

} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
1 change: 1 addition & 0 deletions lite/operators/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ add_operator(sampling_id_op_lite extra SRCS sampling_id_op.cc)
add_operator(polygon_box_transform_op_lite extra SRCS polygon_box_transform_op.cc)
add_operator(max_pool_with_index_op extra SRCS max_pool_with_index_op.cc)
add_operator(pixel_shuffle_op extra SRCS pixel_shuffle_op.cc)
add_operator(pixel_unshuffle_op extra SRCS pixel_unshuffle_op.cc)
add_operator(clip_op extra SRCS clip_op.cc)
add_operator(print_op extra SRCS print_op.cc)
add_operator(scatter extra SRCS scatter_op.cc)
Expand Down
6 changes: 6 additions & 0 deletions lite/operators/op_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -2291,6 +2291,12 @@ struct PixelShuffleParam : ParamBase {
int upscale_factor{1};
};

struct PixelUnShuffleParam : ParamBase {
lite::Tensor* x{nullptr};
lite::Tensor* output{nullptr};
int downscale_factor{1};
};

struct RetinanetDetectionOutputParam : ParamBase {
std::vector<Tensor*> bboxes{};
std::vector<Tensor*> scores{};
Expand Down
85 changes: 85 additions & 0 deletions lite/operators/pixel_unshuffle_op.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "lite/operators/pixel_unshuffle_op.h"
#include "lite/core/op_registry.h"

namespace paddle {
namespace lite {
namespace operators {

bool PixelUnShuffleOpLite::CheckShape() const {
CHECK_OR_FALSE(param_.x);
CHECK_OR_FALSE(param_.output);
CHECK_OR_FALSE(param_.downscale_factor > 0);

const auto x_dims = param_.x->dims();
const auto downscale_factor = param_.downscale_factor;

// check input tensor dims size
CHECK_EQ_OR_FALSE(x_dims.size(), 4);

// check if the height and width can be divided by downscale_factor
CHECK_EQ_OR_FALSE(x_dims[2] % downscale_factor, 0);
CHECK_EQ_OR_FALSE(x_dims[3] % downscale_factor, 0);

return true;
}

bool PixelUnShuffleOpLite::InferShapeImpl() const {
const auto x_dims = param_.x->dims();
const auto downscale_factor = param_.downscale_factor;

// input tensor dims
int N = x_dims[0];
int C = x_dims[1];
int H = x_dims[2];
int W = x_dims[3];

// output tensor dims
int out_C = C * (downscale_factor * downscale_factor);
int out_H = H / downscale_factor;
int out_W = W / downscale_factor;

// make sure the output height and width can be divided by downscale_factor
if (H % downscale_factor != 0 || W % downscale_factor != 0) {
return false;
}

DDim output_dims({N, out_C, out_H, out_W});
param_.output->Resize(output_dims);
return true;
}

bool PixelUnShuffleOpLite::AttachImpl(const cpp::OpDesc& opdesc,
lite::Scope* scope) {
auto input = opdesc.Input("X").front();
auto out = opdesc.Output("Out").front();

param_.x = scope->FindVar(input)->GetMutable<lite::Tensor>();
param_.output = scope->FindVar(out)->GetMutable<lite::Tensor>();

if (opdesc.HasAttr("downscale_factor")) {
param_.downscale_factor = opdesc.GetAttr<int>("downscale_factor");
}

return true;
}

} // namespace operators
} // namespace lite
} // namespace paddle

REGISTER_LITE_OP(pixel_unshuffle,
paddle::lite::operators::PixelUnShuffleOpLite);
57 changes: 57 additions & 0 deletions lite/operators/pixel_unshuffle_op.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <string>
#include <vector>
#include "lite/core/op_lite.h"

namespace paddle {
namespace lite {
namespace operators {

class PixelUnShuffleOpLite : public OpLite {
public:
PixelUnShuffleOpLite() {}
explicit PixelUnShuffleOpLite(const std::string &op_type) : OpLite(op_type) {}

bool CheckShape() const override;

bool InferShapeImpl() const override;

bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override;

void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
std::string DebugString() const override { return "pixel_unshuffle"; }

#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
auto input_dims = param_.x->dims();
auto output_dims = param_.output->dims();
ch->input_shape = ch->DimToStr(input_dims);
ch->output_shape = ch->DimToStr(output_dims);
ch->remark = "downscale_factor" + std::to_string(param_.downscale_factor);

ch->macs = 1;
}
#endif

private:
mutable PixelUnShuffleParam param_;
};

} // namespace operators
} // namespace lite
} // namespace paddle
1 change: 1 addition & 0 deletions lite/tests/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ if(LITE_BUILD_EXTRA)
lite_cc_test(test_kernel_where_compute SRCS where_compute_test.cc)
lite_cc_test(test_kernel_log_softmax_compute SRCS log_softmax_compute_test.cc)
lite_cc_test(test_kernel_roll_compute SRCS roll_compute_test.cc)
lite_cc_test(test_kernel_pixelunshuffle_compute SRCS pixel_unshuffle_compute_test.cc)
# lite_cc_test(test_kernel_tensor_array_to_tensor_compute SRCS tensor_array_to_tensor_compute_test.cc)
# TODO: fix tensor_array_to_tensor unittest

Expand Down
Loading

0 comments on commit 1d0284e

Please sign in to comment.