Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[OpenCL] fix opencl init bugs & optimize :do not create opencl when user do not use Use opencl. auto enable opencl when opencl model load and opencl check or config. #10557

Merged
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 16 additions & 22 deletions lite/api/paddle_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#endif

#ifdef LITE_WITH_OPENCL
#include "lite/backends/opencl/cl_global.h"
#include "lite/backends/opencl/cl_runtime.h"
#endif

Expand All @@ -39,39 +40,32 @@
namespace paddle {
namespace lite_api {

bool IsOpenCLBackendValid(bool check_fp16_valid) {
#ifdef LITE_WITH_LOG
LOG(INFO) << "need to check fp16 valid:" << check_fp16_valid;
#endif
bool opencl_valid = false;

bool SetOpenCLEnable(bool enable) {
LOG(INFO) << "External SetOpenCLEnable : " << enable;
xiebaiyuan marked this conversation as resolved.
Show resolved Hide resolved
#ifdef LITE_WITH_OPENCL
bool opencl_lib_found = paddle::lite::CLWrapper::Global()->OpenclLibFound();
#ifdef LITE_WITH_LOG
LOG(INFO) << "Found opencl library:" << opencl_lib_found;
paddle::lite::ClGlobalDelegate::Global().SetUseOpenCL(enable);
return enable;
#endif
if (opencl_lib_found == false) return false;
return enable;
}

bool dlsym_success = paddle::lite::CLWrapper::Global()->DlsymSuccess();
bool IsOpenCLBackendValid(bool check_fp16_valid) {
#ifdef LITE_WITH_LOG
LOG(INFO) << "dlsym_success:" << dlsym_success;
LOG(INFO) << "need to check fp16 valid:" << check_fp16_valid;
#endif
if (dlsym_success == false) return false;
opencl_valid = paddle::lite::CLRuntime::Global()->OpenCLAvaliableForDevice(
#ifdef LITE_WITH_OPENCL
return paddle::lite::ClGlobalDelegate::Global().IsOpenCLBackendValid(
check_fp16_valid);

#ifdef LITE_WITH_LOG
LOG(INFO) << "opencl_valid:" << opencl_valid;
#endif
#endif
return opencl_valid;
return false;
}

int GetOpenCLDeviceType() {
#ifdef LITE_WITH_LOG
LOG(INFO) << "GetOpenCLDeviceType";
xiebaiyuan marked this conversation as resolved.
Show resolved Hide resolved
#endif
#ifdef LITE_WITH_OPENCL
if (IsOpenCLBackendValid()) {
return paddle::lite::CLRuntime::Global()->GetGpuType();
}
return paddle::lite::ClGlobalDelegate::Global().GetOpenCLDeviceType();
#endif
return -1;
}
Expand Down
3 changes: 3 additions & 0 deletions lite/api/paddle_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ enum class L3CacheSetMethod {
// kAutoGrow = 3, // Not supported yet, least memory consumption.
};

// return true if current device supports OpenCL model
LITE_API bool SetOpenCLEnable(bool enable);
xiebaiyuan marked this conversation as resolved.
Show resolved Hide resolved

// return true if current device supports OpenCL model
LITE_API bool IsOpenCLBackendValid(bool check_fp16_valid = false);

Expand Down
3 changes: 3 additions & 0 deletions lite/backends/opencl/cl_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ namespace lite {
class CLContext {
public:
~CLContext() {
#ifdef LITE_WITH_LOG
xiebaiyuan marked this conversation as resolved.
Show resolved Hide resolved
VLOG(4) << "CLContext destructor";
xiebaiyuan marked this conversation as resolved.
Show resolved Hide resolved
#endif
GetCommandQueue().finish();
for (size_t kidx = 0; kidx < kernels_.size(); ++kidx) {
// Note(ysh329): Don't need `clReleaseKernel`
Expand Down
103 changes: 103 additions & 0 deletions lite/backends/opencl/cl_global.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

namespace paddle {
namespace lite {
/**
* When LITE_WITH_OPENCL is enabled, Paddle-Lite will interact with the
* OpenCL-related environment. Currently, when Paddle-Lite interacts with the
* OpenCL runtime environment, it directly interacts through CLRuntime and
* ClWrapper. CLContext actually serves as a part of the Kernel, carrying the
* clKernel built for each runtime Kernel. In the process of using paddle_api or
* program, the OpenCL environment has to be initialized. However, in practice,
* sometimes the model is not an OpenCL model. Initializing the OpenCL
* environment in such cases is a waste of memory, especially in environments
* where there is a clear intention to avoid initializing the OpenCL
* environment. Therefore, a method to isolate the OpenCL environment is
* provided. When interacting with the framework, this proxy is uniformly
* adopted.
*/
class ClGlobalDelegate {
public:
static ClGlobalDelegate& Global() {
static ClGlobalDelegate x;
return x;
}
/**
* @brief set use opencl
* @param use_opencl
*/
void SetUseOpenCL(bool use_opencl) {
use_opencl_ = use_opencl;
LOG(INFO) << "set opencl softly : opencl "
xiebaiyuan marked this conversation as resolved.
Show resolved Hide resolved
<< (use_opencl_ ? "enable" : "disable");
}
/**
* @brief get use opencl
* @return
*/
bool UseOpenCL() const { return use_opencl_; }

/**
* @brief check opencl backend valid
* @param check_fp16_valid
* @return
*/
bool IsOpenCLBackendValid(bool check_fp16_valid) {
LOG(INFO) << "delegete opencl valid check";
xiebaiyuan marked this conversation as resolved.
Show resolved Hide resolved
// use attempt to use opencl , enable it.
SetUseOpenCL(true);
bool opencl_valid = false;

#ifdef LITE_WITH_OPENCL
bool opencl_lib_found = paddle::lite::CLWrapper::Global()->OpenclLibFound();
#ifdef LITE_WITH_LOG
xiebaiyuan marked this conversation as resolved.
Show resolved Hide resolved
LOG(INFO) << "Found opencl library:" << opencl_lib_found;
#endif
if (!opencl_lib_found) return false;

bool dlsym_success = paddle::lite::CLWrapper::Global()->DlsymSuccess();
#ifdef LITE_WITH_LOG
xiebaiyuan marked this conversation as resolved.
Show resolved Hide resolved
LOG(INFO) << "dlsym_success:" << dlsym_success;
#endif
if (!dlsym_success) return false;
opencl_valid = paddle::lite::CLRuntime::Global()->OpenCLAvaliableForDevice(
check_fp16_valid);
#ifdef LITE_WITH_LOG
xiebaiyuan marked this conversation as resolved.
Show resolved Hide resolved
LOG(INFO) << "opencl_valid:" << opencl_valid;
#endif
#endif
return opencl_valid;
}

/**
* @brief get opencl device type
* @return
*/
int GetOpenCLDeviceType() {
if (this->IsOpenCLBackendValid(false)) {
return paddle::lite::CLRuntime::Global()->GetGpuType();
}
return -1;
}

private:
ClGlobalDelegate() = default;
// if user do not set this flag, as old ways.
bool use_opencl_{true};
};
} // namespace lite
} // namespace paddle
66 changes: 33 additions & 33 deletions lite/backends/opencl/cl_image_converter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,13 @@ void CLImageConverterDefault::NCHWToImage(float *nchw,
if (c < C) {
// size_t x = (n * width * H + h * width + (c / 4) * W + w) * 4 +
// (c % 4);
fp16_support_ ? image_fp16[i2] = Float2Half(*p) : image_fp32[i2] =
*p;
fp16_support() ? image_fp16[i2] = Float2Half(*p) : image_fp32[i2] =
*p;
i2 += 4;
p++;
} else {
fp16_support_ ? image_fp16[i2] = Float2Half(0.f) : image_fp32[i2] =
0.f;
fp16_support() ? image_fp16[i2] = Float2Half(0.f) : image_fp32[i2] =
0.f;
i2 += 4;
}
}
Expand Down Expand Up @@ -115,7 +115,7 @@ void CLImageConverterDefault::ImageToNCHW(void *image,
for (size_t h = 0; h < H; h++) {
size_t i2 = (i1 << 2) + c % 4;
for (size_t w = 0; w < W; w++) {
*p = fp16_support_ ? Half2Float(image_fp16[i2]) : image_fp32[i2];
*p = fp16_support() ? Half2Float(image_fp16[i2]) : image_fp32[i2];
i2 += 4;
p++;
}
Expand Down Expand Up @@ -196,15 +196,15 @@ void CLImageConverterFolder::NCHWToImage(float *tensor,
for (size_t h = 0; h < tdim[0]; h++) {
for (size_t w = 0; w < width * 4; w++) {
if (w < tdim[1]) {
if (fp16_support_) {
if (fp16_support()) {
image_fp16[(h * width + w / 4) * 4 + (w % 4)] =
Float2Half(tensor[h * tdim[1] + w]);
} else {
image_fp32[(h * width + w / 4) * 4 + (w % 4)] =
tensor[h * tdim[1] + w];
}
} else {
if (fp16_support_) {
if (fp16_support()) {
image_fp16[(h * width + w / 4) * 4 + (w % 4)] = Float2Half(0.f);
} else {
image_fp32[(h * width + w / 4) * 4 + (w % 4)] = 0.f;
Expand Down Expand Up @@ -241,7 +241,7 @@ void CLImageConverterFolder::ImageToNCHW(void *image,
for (size_t h = 0; h < H; h++) {
for (size_t w = 0; w < W; w++) {
p[h * W + w] =
fp16_support_
fp16_support()
? Half2Float(image_fp16[(h * width + w / 4) * 4 + (w % 4)])
: image_fp32[(h * width + w / 4) * 4 + (w % 4)];
}
Expand Down Expand Up @@ -286,14 +286,14 @@ void CLImageConverterNWBlock::NCHWToImage(float *tensor,
size_t index = 4 * c * (width * H) + 4 * h * width + 4 * W * (n / 4) +
w * 4 + n % 4;
if (n < N) {
if (fp16_support_) {
if (fp16_support()) {
image_fp16[index] = Float2Half(*p);
} else {
image_fp32[index] = *p;
}
p++;
} else {
if (fp16_support_) {
if (fp16_support()) {
image_fp16[index] = Float2Half(0.f);
} else {
image_fp32[index] = 0.f;
Expand Down Expand Up @@ -330,8 +330,8 @@ void CLImageConverterNWBlock::ImageToNCHW(void *image,
for (size_t w = 0; w < W; ++w) {
size_t index = 4 * c * (width * H) + 4 * h * width + 4 * W * (n / 4) +
w * 4 + n % 4;
*p =
fp16_support_ ? Half2Float(image_fp16[index]) : image_fp32[index];
*p = fp16_support() ? Half2Float(image_fp16[index])
: image_fp32[index];
p++;
if (index >= (width * height * 4)) {
LOG(INFO) << " index out of range ";
Expand Down Expand Up @@ -393,15 +393,15 @@ void CLImageConverterDWBlock::NCHWToImage(float *tensor,
if (c < C) {
// size_t x = (n * width * H + h * width + (c / 4) * W + w) * 4 +
// (c % 4);
if (fp16_support_) {
if (fp16_support()) {
image_fp16[i2] = Float2Half(*p);
} else {
image_fp32[i2] = *p;
}
i2 += 4;
p++;
} else {
if (fp16_support_) {
if (fp16_support()) {
image_fp16[i2] = Float2Half(0.f);
} else {
image_fp32[i2] = 0.f;
Expand Down Expand Up @@ -437,7 +437,7 @@ void CLImageConverterDWBlock::ImageToNCHW(void *image,
for (size_t h = 0; h < H; h++) {
size_t i2 = (i1 << 2) + c % 4;
for (size_t w = 0; w < W; w++) {
*p = fp16_support_ ? Half2Float(image_fp16[i2]) : image_fp32[i2];
*p = fp16_support() ? Half2Float(image_fp16[i2]) : image_fp32[i2];
i2 += 4;
p++;
}
Expand Down Expand Up @@ -540,7 +540,7 @@ void CLImageConverterWinoTransWeight::NCHWToImage(float *tensor,
float *image_fp32 = static_cast<float *>(image);
half_t *image_fp16 = static_cast<half_t *>(image);
// auto weight_dest_data = static_cast<half_t *>(image);
if (fp16_support_) {
if (fp16_support()) {
memset(image_fp16, 0, num_count * sizeof(half_t));
} else {
memset(image_fp32, 0, num_count * sizeof(float));
Expand Down Expand Up @@ -573,7 +573,7 @@ void CLImageConverterWinoTransWeight::NCHWToImage(float *tensor,
auto dstSz_fp16 = dstOz_fp16 + szC4 * 16 + unitCo * my;
auto dstSz_fp32 = dstOz_fp32 + szC4 * 16 + unitCo * my;
for (int i = 0; i < 16; ++i) {
if (fp16_support_) {
if (fp16_support()) {
*(dstSz_fp16 + i * ((co + 3) / 4) * ((ci + 3) / 4) * 4 * 4) =
Float2Half(K_Transform.data()[i]);
} else {
Expand Down Expand Up @@ -648,12 +648,12 @@ void CLImageConverterNBlock::NCHWToImage(float *nchw,
size_t img_idx =
(((n / 4) * W * H + h * W + w) * c_block4 + c) * 4 + n % 4;
if (n < N && c < C) {
fp16_support_ ? image_fp16[img_idx] = Float2Half(*p)
: image_fp32[img_idx] = *p;
fp16_support() ? image_fp16[img_idx] = Float2Half(*p)
: image_fp32[img_idx] = *p;
p++;
} else {
fp16_support_ ? image_fp16[img_idx] = Float2Half(0.f)
: image_fp32[img_idx] = 0.f;
fp16_support() ? image_fp16[img_idx] = Float2Half(0.f)
: image_fp32[img_idx] = 0.f;
}
}
}
Expand Down Expand Up @@ -697,12 +697,12 @@ void CLImageConverterNBlockGroup::NCHWToImage(float *nchw,
(((n / 4) * W * H + h * W + w) * c_block4 + c) * 4 + n % 4;
size_t remain = n % ((N / groups + 3) / 4 * 4);
if (remain < (N / groups) && c < C) {
fp16_support_ ? image_fp16[img_idx] = Float2Half(*p)
: image_fp32[img_idx] = *p;
fp16_support() ? image_fp16[img_idx] = Float2Half(*p)
: image_fp32[img_idx] = *p;
p++;
} else {
fp16_support_ ? image_fp16[img_idx] = Float2Half(0.f)
: image_fp32[img_idx] = 0.f;
fp16_support() ? image_fp16[img_idx] = Float2Half(0.f)
: image_fp32[img_idx] = 0.f;
}
}
}
Expand Down Expand Up @@ -760,12 +760,12 @@ void CLImageConverterN2Block::NCHWToImage(float *nchw,
(c / 4) * 32 + ((n % 8) / 4) * 16 + (c % 4) * 4 +
(n % 8) % 4;
if (n < N && c < C) {
fp16_support_ ? image_fp16[img_idx] = Float2Half(*p)
: image_fp32[img_idx] = *p;
fp16_support() ? image_fp16[img_idx] = Float2Half(*p)
: image_fp32[img_idx] = *p;
p++;
} else {
fp16_support_ ? image_fp16[img_idx] = Float2Half(0.f)
: image_fp32[img_idx] = 0.f;
fp16_support() ? image_fp16[img_idx] = Float2Half(0.f)
: image_fp32[img_idx] = 0.f;
}
}
}
Expand Down Expand Up @@ -819,12 +819,12 @@ void CLImageConverterDWFilter::NCHWToImage(float *nchw,
for (size_t w = 0; w < W; w++) {
size_t img_idx = (((n / 4) * W * H + h * W + w) * C + c) * 4 + n % 4;
if (n < N) {
fp16_support_ ? image_fp16[img_idx] = Float2Half(*p)
: image_fp32[img_idx] = *p;
fp16_support() ? image_fp16[img_idx] = Float2Half(*p)
: image_fp32[img_idx] = *p;
p++;
} else {
fp16_support_ ? image_fp16[img_idx] = Float2Half(0.f)
: image_fp32[img_idx] = 0.f;
fp16_support() ? image_fp16[img_idx] = Float2Half(0.f)
: image_fp32[img_idx] = 0.f;
}
}
}
Expand Down
6 changes: 4 additions & 2 deletions lite/backends/opencl/cl_image_converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,10 @@ class CLImageConverterBase {
const DDim &tensor_dim) = 0;
virtual DDim InitImageDimInfoWith(const DDim &tensor_dim) = 0;

bool fp16_support_{paddle::lite::CLRuntime::Global()->get_precision() ==
lite_api::CL_PRECISION_FP16};
static bool fp16_support() {
return paddle::lite::CLRuntime::Global()->get_precision() ==
lite_api::CL_PRECISION_FP16;
}
};

class CLImageConverterDefault : public CLImageConverterBase {
Expand Down
Loading