PaddlePaddle · hong19860320 · Aug 23, 2024 · Aug 13, 2024 · Aug 15, 2024 · Aug 15, 2024
@@ -29,6 +29,7 @@
 #endif
 
 #ifdef LITE_WITH_OPENCL
+#include "lite/backends/opencl/cl_global.h"
 #include "lite/backends/opencl/cl_runtime.h"
 #endif
 
@@ -39,39 +40,32 @@
 namespace paddle {
 namespace lite_api {
 
-bool IsOpenCLBackendValid(bool check_fp16_valid) {
-#ifdef LITE_WITH_LOG
-  LOG(INFO) << "need to check fp16 valid:" << check_fp16_valid;
-#endif
-  bool opencl_valid = false;
-
+bool SetOpenCLEnable(bool enable) {
+  LOG(INFO) << "External SetOpenCLEnable : " << enable;
 #ifdef LITE_WITH_OPENCL
-  bool opencl_lib_found = paddle::lite::CLWrapper::Global()->OpenclLibFound();
-#ifdef LITE_WITH_LOG
-  LOG(INFO) << "Found opencl library:" << opencl_lib_found;
+  paddle::lite::ClGlobalDelegate::Global().SetUseOpenCL(enable);
+  return enable;
 #endif
-  if (opencl_lib_found == false) return false;
+  return enable;
+}
 
-  bool dlsym_success = paddle::lite::CLWrapper::Global()->DlsymSuccess();
+bool IsOpenCLBackendValid(bool check_fp16_valid) {
 #ifdef LITE_WITH_LOG
-  LOG(INFO) << "dlsym_success:" << dlsym_success;
+  LOG(INFO) << "need to check fp16 valid:" << check_fp16_valid;
 #endif
-  if (dlsym_success == false) return false;
-  opencl_valid = paddle::lite::CLRuntime::Global()->OpenCLAvaliableForDevice(
+#ifdef LITE_WITH_OPENCL
+  return paddle::lite::ClGlobalDelegate::Global().IsOpenCLBackendValid(
       check_fp16_valid);
-
-#ifdef LITE_WITH_LOG
-  LOG(INFO) << "opencl_valid:" << opencl_valid;
-#endif
 #endif
-  return opencl_valid;
+  return false;
 }
 
 int GetOpenCLDeviceType() {
+#ifdef LITE_WITH_LOG
+  LOG(INFO) << "GetOpenCLDeviceType";
+#endif
 #ifdef LITE_WITH_OPENCL
-  if (IsOpenCLBackendValid()) {
-    return paddle::lite::CLRuntime::Global()->GetGpuType();
-  }
+  return paddle::lite::ClGlobalDelegate::Global().GetOpenCLDeviceType();
 #endif
   return -1;
 }

@@ -43,6 +43,9 @@ enum class L3CacheSetMethod {
   // kAutoGrow = 3,   // Not supported yet, least memory consumption.
 };
 
+// return true if current device supports OpenCL model
+LITE_API bool SetOpenCLEnable(bool enable);
+
 // return true if current device supports OpenCL model
 LITE_API bool IsOpenCLBackendValid(bool check_fp16_valid = false);
 

@@ -28,6 +28,9 @@ namespace lite {
 class CLContext {
  public:
   ~CLContext() {
+#ifdef LITE_WITH_LOG
+    VLOG(4) << "CLContext destructor";
+#endif
     GetCommandQueue().finish();
     for (size_t kidx = 0; kidx < kernels_.size(); ++kidx) {
       // Note(ysh329): Don't need `clReleaseKernel`

@@ -0,0 +1,103 @@
+// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+namespace paddle {
+namespace lite {
+/**
+ * When LITE_WITH_OPENCL is enabled, Paddle-Lite will interact with the
+ * OpenCL-related environment. Currently, when Paddle-Lite interacts with the
+ * OpenCL runtime environment, it directly interacts through CLRuntime and
+ * ClWrapper. CLContext actually serves as a part of the Kernel, carrying the
+ * clKernel built for each runtime Kernel. In the process of using paddle_api or
+ * program, the OpenCL environment has to be initialized. However, in practice,
+ * sometimes the model is not an OpenCL model. Initializing the OpenCL
+ * environment in such cases is a waste of memory, especially in environments
+ * where there is a clear intention to avoid initializing the OpenCL
+ * environment. Therefore, a method to isolate the OpenCL environment is
+ * provided. When interacting with the framework, this proxy is uniformly
+ * adopted.
+ */
+class ClGlobalDelegate {
+ public:
+  static ClGlobalDelegate& Global() {
+    static ClGlobalDelegate x;
+    return x;
+  }
+  /**
+   * @brief set use opencl
+   * @param use_opencl
+   */
+  void SetUseOpenCL(bool use_opencl) {
+    use_opencl_ = use_opencl;
+    LOG(INFO) << "set opencl softly : opencl "
+              << (use_opencl_ ? "enable" : "disable");
+  }
+  /**
+   * @brief get use opencl
+   * @return
+   */
+  bool UseOpenCL() const { return use_opencl_; }
+
+  /**
+   * @brief check opencl backend valid
+   * @param check_fp16_valid
+   * @return
+   */
+  bool IsOpenCLBackendValid(bool check_fp16_valid) {
+    LOG(INFO) << "delegete opencl valid check";
+    // use attempt to use opencl , enable it.
+    SetUseOpenCL(true);
+    bool opencl_valid = false;
+
+#ifdef LITE_WITH_OPENCL
+    bool opencl_lib_found = paddle::lite::CLWrapper::Global()->OpenclLibFound();
+#ifdef LITE_WITH_LOG
+    LOG(INFO) << "Found opencl library:" << opencl_lib_found;
+#endif
+    if (!opencl_lib_found) return false;
+
+    bool dlsym_success = paddle::lite::CLWrapper::Global()->DlsymSuccess();
+#ifdef LITE_WITH_LOG
+    LOG(INFO) << "dlsym_success:" << dlsym_success;
+#endif
+    if (!dlsym_success) return false;
+    opencl_valid = paddle::lite::CLRuntime::Global()->OpenCLAvaliableForDevice(
+        check_fp16_valid);
+#ifdef LITE_WITH_LOG
+    LOG(INFO) << "opencl_valid:" << opencl_valid;
+#endif
+#endif
+    return opencl_valid;
+  }
+
+  /**
+   * @brief get opencl device type
+   * @return
+   */
+  int GetOpenCLDeviceType() {
+    if (this->IsOpenCLBackendValid(false)) {
+      return paddle::lite::CLRuntime::Global()->GetGpuType();
+    }
+    return -1;
+  }
+
+ private:
+  ClGlobalDelegate() = default;
+  // if user do not set this flag, as old ways.
+  bool use_opencl_{true};
+};
+}  // namespace lite
+}  // namespace paddle
@@ -71,13 +71,13 @@ void CLImageConverterDefault::NCHWToImage(float *nchw,
           if (c < C) {
             // size_t x = (n * width * H + h * width + (c / 4) * W + w) * 4 +
             // (c % 4);
-            fp16_support_ ? image_fp16[i2] = Float2Half(*p) : image_fp32[i2] =
-                                                                  *p;
+            fp16_support() ? image_fp16[i2] = Float2Half(*p) : image_fp32[i2] =
+                                                                   *p;
             i2 += 4;
             p++;
           } else {
-            fp16_support_ ? image_fp16[i2] = Float2Half(0.f) : image_fp32[i2] =
-                                                                   0.f;
+            fp16_support() ? image_fp16[i2] = Float2Half(0.f) : image_fp32[i2] =
+                                                                    0.f;
             i2 += 4;
           }
         }
@@ -115,7 +115,7 @@ void CLImageConverterDefault::ImageToNCHW(void *image,
       for (size_t h = 0; h < H; h++) {
         size_t i2 = (i1 << 2) + c % 4;
         for (size_t w = 0; w < W; w++) {
-          *p = fp16_support_ ? Half2Float(image_fp16[i2]) : image_fp32[i2];
+          *p = fp16_support() ? Half2Float(image_fp16[i2]) : image_fp32[i2];
           i2 += 4;
           p++;
         }
@@ -196,15 +196,15 @@ void CLImageConverterFolder::NCHWToImage(float *tensor,
     for (size_t h = 0; h < tdim[0]; h++) {
       for (size_t w = 0; w < width * 4; w++) {
         if (w < tdim[1]) {
-          if (fp16_support_) {
+          if (fp16_support()) {
             image_fp16[(h * width + w / 4) * 4 + (w % 4)] =
                 Float2Half(tensor[h * tdim[1] + w]);
           } else {
             image_fp32[(h * width + w / 4) * 4 + (w % 4)] =
                 tensor[h * tdim[1] + w];
           }
         } else {
-          if (fp16_support_) {
+          if (fp16_support()) {
             image_fp16[(h * width + w / 4) * 4 + (w % 4)] = Float2Half(0.f);
           } else {
             image_fp32[(h * width + w / 4) * 4 + (w % 4)] = 0.f;
@@ -241,7 +241,7 @@ void CLImageConverterFolder::ImageToNCHW(void *image,
     for (size_t h = 0; h < H; h++) {
       for (size_t w = 0; w < W; w++) {
         p[h * W + w] =
-            fp16_support_
+            fp16_support()
                 ? Half2Float(image_fp16[(h * width + w / 4) * 4 + (w % 4)])
                 : image_fp32[(h * width + w / 4) * 4 + (w % 4)];
       }
@@ -286,14 +286,14 @@ void CLImageConverterNWBlock::NCHWToImage(float *tensor,
           size_t index = 4 * c * (width * H) + 4 * h * width + 4 * W * (n / 4) +
                          w * 4 + n % 4;
           if (n < N) {
-            if (fp16_support_) {
+            if (fp16_support()) {
               image_fp16[index] = Float2Half(*p);
             } else {
               image_fp32[index] = *p;
             }
             p++;
           } else {
-            if (fp16_support_) {
+            if (fp16_support()) {
               image_fp16[index] = Float2Half(0.f);
             } else {
               image_fp32[index] = 0.f;
@@ -330,8 +330,8 @@ void CLImageConverterNWBlock::ImageToNCHW(void *image,
         for (size_t w = 0; w < W; ++w) {
           size_t index = 4 * c * (width * H) + 4 * h * width + 4 * W * (n / 4) +
                          w * 4 + n % 4;
-          *p =
-              fp16_support_ ? Half2Float(image_fp16[index]) : image_fp32[index];
+          *p = fp16_support() ? Half2Float(image_fp16[index])
+                              : image_fp32[index];
           p++;
           if (index >= (width * height * 4)) {
             LOG(INFO) << " index out of range ";
@@ -393,15 +393,15 @@ void CLImageConverterDWBlock::NCHWToImage(float *tensor,
           if (c < C) {
             // size_t x = (n * width * H + h * width + (c / 4) * W + w) * 4 +
             // (c % 4);
-            if (fp16_support_) {
+            if (fp16_support()) {
               image_fp16[i2] = Float2Half(*p);
             } else {
               image_fp32[i2] = *p;
             }
             i2 += 4;
             p++;
           } else {
-            if (fp16_support_) {
+            if (fp16_support()) {
               image_fp16[i2] = Float2Half(0.f);
             } else {
               image_fp32[i2] = 0.f;
@@ -437,7 +437,7 @@ void CLImageConverterDWBlock::ImageToNCHW(void *image,
       for (size_t h = 0; h < H; h++) {
         size_t i2 = (i1 << 2) + c % 4;
         for (size_t w = 0; w < W; w++) {
-          *p = fp16_support_ ? Half2Float(image_fp16[i2]) : image_fp32[i2];
+          *p = fp16_support() ? Half2Float(image_fp16[i2]) : image_fp32[i2];
           i2 += 4;
           p++;
         }
@@ -540,7 +540,7 @@ void CLImageConverterWinoTransWeight::NCHWToImage(float *tensor,
   float *image_fp32 = static_cast<float *>(image);
   half_t *image_fp16 = static_cast<half_t *>(image);
   // auto weight_dest_data = static_cast<half_t *>(image);
-  if (fp16_support_) {
+  if (fp16_support()) {
     memset(image_fp16, 0, num_count * sizeof(half_t));
   } else {
     memset(image_fp32, 0, num_count * sizeof(float));
@@ -573,7 +573,7 @@ void CLImageConverterWinoTransWeight::NCHWToImage(float *tensor,
       auto dstSz_fp16 = dstOz_fp16 + szC4 * 16 + unitCo * my;
       auto dstSz_fp32 = dstOz_fp32 + szC4 * 16 + unitCo * my;
       for (int i = 0; i < 16; ++i) {
-        if (fp16_support_) {
+        if (fp16_support()) {
           *(dstSz_fp16 + i * ((co + 3) / 4) * ((ci + 3) / 4) * 4 * 4) =
               Float2Half(K_Transform.data()[i]);
         } else {
@@ -648,12 +648,12 @@ void CLImageConverterNBlock::NCHWToImage(float *nchw,
           size_t img_idx =
               (((n / 4) * W * H + h * W + w) * c_block4 + c) * 4 + n % 4;
           if (n < N && c < C) {
-            fp16_support_ ? image_fp16[img_idx] = Float2Half(*p)
-                          : image_fp32[img_idx] = *p;
+            fp16_support() ? image_fp16[img_idx] = Float2Half(*p)
+                           : image_fp32[img_idx] = *p;
             p++;
           } else {
-            fp16_support_ ? image_fp16[img_idx] = Float2Half(0.f)
-                          : image_fp32[img_idx] = 0.f;
+            fp16_support() ? image_fp16[img_idx] = Float2Half(0.f)
+                           : image_fp32[img_idx] = 0.f;
           }
         }
       }
@@ -697,12 +697,12 @@ void CLImageConverterNBlockGroup::NCHWToImage(float *nchw,
               (((n / 4) * W * H + h * W + w) * c_block4 + c) * 4 + n % 4;
           size_t remain = n % ((N / groups + 3) / 4 * 4);
           if (remain < (N / groups) && c < C) {
-            fp16_support_ ? image_fp16[img_idx] = Float2Half(*p)
-                          : image_fp32[img_idx] = *p;
+            fp16_support() ? image_fp16[img_idx] = Float2Half(*p)
+                           : image_fp32[img_idx] = *p;
             p++;
           } else {
-            fp16_support_ ? image_fp16[img_idx] = Float2Half(0.f)
-                          : image_fp32[img_idx] = 0.f;
+            fp16_support() ? image_fp16[img_idx] = Float2Half(0.f)
+                           : image_fp32[img_idx] = 0.f;
           }
         }
       }
@@ -760,12 +760,12 @@ void CLImageConverterN2Block::NCHWToImage(float *nchw,
                            (c / 4) * 32 + ((n % 8) / 4) * 16 + (c % 4) * 4 +
                            (n % 8) % 4;
           if (n < N && c < C) {
-            fp16_support_ ? image_fp16[img_idx] = Float2Half(*p)
-                          : image_fp32[img_idx] = *p;
+            fp16_support() ? image_fp16[img_idx] = Float2Half(*p)
+                           : image_fp32[img_idx] = *p;
             p++;
           } else {
-            fp16_support_ ? image_fp16[img_idx] = Float2Half(0.f)
-                          : image_fp32[img_idx] = 0.f;
+            fp16_support() ? image_fp16[img_idx] = Float2Half(0.f)
+                           : image_fp32[img_idx] = 0.f;
           }
         }
       }
@@ -819,12 +819,12 @@ void CLImageConverterDWFilter::NCHWToImage(float *nchw,
         for (size_t w = 0; w < W; w++) {
           size_t img_idx = (((n / 4) * W * H + h * W + w) * C + c) * 4 + n % 4;
           if (n < N) {
-            fp16_support_ ? image_fp16[img_idx] = Float2Half(*p)
-                          : image_fp32[img_idx] = *p;
+            fp16_support() ? image_fp16[img_idx] = Float2Half(*p)
+                           : image_fp32[img_idx] = *p;
             p++;
           } else {
-            fp16_support_ ? image_fp16[img_idx] = Float2Half(0.f)
-                          : image_fp32[img_idx] = 0.f;
+            fp16_support() ? image_fp16[img_idx] = Float2Half(0.f)
+                           : image_fp32[img_idx] = 0.f;
           }
         }
       }

@@ -36,8 +36,10 @@ class CLImageConverterBase {
                            const DDim &tensor_dim) = 0;
   virtual DDim InitImageDimInfoWith(const DDim &tensor_dim) = 0;
 
-  bool fp16_support_{paddle::lite::CLRuntime::Global()->get_precision() ==
-                     lite_api::CL_PRECISION_FP16};
+  static bool fp16_support() {
+    return paddle::lite::CLRuntime::Global()->get_precision() ==
+           lite_api::CL_PRECISION_FP16;
+  }
 };
 
 class CLImageConverterDefault : public CLImageConverterBase {