Skip to content

Commit

Permalink
OpenCL:Bugfix: Fix bug for memory mode not valid when create backend
Browse files Browse the repository at this point in the history
  • Loading branch information
xiaying committed Dec 23, 2024
1 parent bbf1a93 commit 62c2ec4
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 22 deletions.
16 changes: 11 additions & 5 deletions source/backend/opencl/core/OpenCLBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,13 @@ std::pair<const void*, size_t> CLRuntime::onGetCache() {
}

Backend* CLRuntime::onCreate(const BackendConfig* config, Backend* origin) const {
// FIXME: Use config info
return new OpenCLBackend(mImagePool, mBufferPool, this);
auto precision = mPrecision;
auto memory = mMemory;
if (nullptr != config) {
precision = config->precision;
memory = config->memory;
}
return new OpenCLBackend(precision, memory, mImagePool, mBufferPool, this);
}

void CLRuntime::onGabageCollect(int level) {
Expand All @@ -217,13 +222,14 @@ std::map<std::pair<OpType, GpuMemObject>, OpenCLBackend::Creator*>* gCreator() {
return creators;
};

OpenCLBackend::OpenCLBackend(std::shared_ptr<ImagePool>imgPool, std::shared_ptr<BufferPool> bufPool, const CLRuntime *runtime)
OpenCLBackend::OpenCLBackend(BackendConfig::PrecisionMode precision, BackendConfig::MemoryMode memory, std::shared_ptr<ImagePool>imgPool, std::shared_ptr<BufferPool> bufPool, const CLRuntime *runtime)
: Backend(MNN_FORWARD_OPENCL) {

mCLRuntime = runtime;
mOpenCLRuntime = mCLRuntime->mOpenCLRuntime;
mPrecision = mCLRuntime->mPrecision;
mMemory = mCLRuntime->mMemory;
mPrecision = precision;
mMemory = memory;
mOpenCLRuntime->setPrecision(precision);
mStaticImagePool = imgPool;
mStaticBufferPool = bufPool;
if(mOpenCLRuntime.get()){
Expand Down
2 changes: 1 addition & 1 deletion source/backend/opencl/core/OpenCLBackend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ class CLRuntime : public Runtime {

class OpenCLBackend : public Backend {
public:
OpenCLBackend(std::shared_ptr<ImagePool>imgPool, std::shared_ptr<BufferPool> bufPool, const CLRuntime *runtime);
OpenCLBackend(BackendConfig::PrecisionMode precision, BackendConfig::MemoryMode memory, std::shared_ptr<ImagePool>imgPool, std::shared_ptr<BufferPool> bufPool, const CLRuntime *runtime);
~OpenCLBackend();

OpenCLRuntime *getOpenCLRuntime();
Expand Down
36 changes: 20 additions & 16 deletions source/backend/opencl/core/runtime/OpenCLRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,11 +237,6 @@ OpenCLRuntime::OpenCLRuntime(const BackendConfig::PrecisionMode precision, const
mFirstGPUDevicePtr->getInfo(CL_DEVICE_MAX_MEM_ALLOC_SIZE, &mMaxMemAllocSize);
mFirstGPUDevicePtr->getInfo(CL_DEVICE_LOCAL_MEM_SIZE, &mMaxLocalMemSize);
mMaxWorkGroupSize = mFirstGPUDevicePtr->getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
cl_device_fp_config fpConfig;
auto success = mFirstGPUDevicePtr->getInfo(CL_DEVICE_HALF_FP_CONFIG, &fpConfig);
mIsDeviceSupportedFP16 = CL_SUCCESS == success && fpConfig > 0;
bool checkFp16Exetension = getDeviceSupportsExtension(*(mFirstGPUDevicePtr.get()), "cl_khr_fp16");
mIsDeviceSupportedFP16 = (mIsDeviceSupportedFP16 && checkFp16Exetension);

//set gpu mode, tuning level and memory object
setGpuMode(cl_mode);
Expand All @@ -253,18 +248,8 @@ OpenCLRuntime::OpenCLRuntime(const BackendConfig::PrecisionMode precision, const
mMemType = IMAGE;
}
}
mPrecisionLevel = 1;
if (mIsDeviceSupportedFP16) {
if (precision == BackendConfig::Precision_Low) {
mPrecisionLevel = 2;
} else if (precision == BackendConfig::Precision_Normal && mMemType == BUFFER) {
mPrecisionLevel = 0;
}
}
setPrecision(precision);

// Is supported fp16 IO storage
mIsSupportedFP16 = (mPrecisionLevel == 2 || mPrecisionLevel == 0);

if(getDeviceSupportsExtension(*(mFirstGPUDevicePtr.get()), "cl_arm_integer_dot_product_int8")){
mSupportDotInt8 = true;
}
Expand Down Expand Up @@ -515,6 +500,25 @@ uint64_t OpenCLRuntime::maxAllocSize() const {
return mMaxMemAllocSize;
}

void OpenCLRuntime::setPrecision(const BackendConfig::PrecisionMode precision){
cl_device_fp_config fpConfig;
auto success = mFirstGPUDevicePtr->getInfo(CL_DEVICE_HALF_FP_CONFIG, &fpConfig);
mIsDeviceSupportedFP16 = CL_SUCCESS == success && fpConfig > 0;
bool checkFp16Exetension = getDeviceSupportsExtension(*(mFirstGPUDevicePtr.get()), "cl_khr_fp16");
mIsDeviceSupportedFP16 = (mIsDeviceSupportedFP16 && checkFp16Exetension);
mPrecisionLevel = 1;
if (mIsDeviceSupportedFP16) {
if (precision == BackendConfig::Precision_Low) {
mPrecisionLevel = 2;
} else if (precision == BackendConfig::Precision_Normal && mMemType == BUFFER) {
mPrecisionLevel = 0;
}
}

// Is supported fp16 IO storage
mIsSupportedFP16 = (mPrecisionLevel == 2 || mPrecisionLevel == 0);
}

bool OpenCLRuntime::loadProgram(const std::string &programName, cl::Program *program) {
std::lock_guard<std::mutex> lck(gCLMutex);
auto it_source = OpenCLProgramMap.find(programName);
Expand Down
1 change: 1 addition & 0 deletions source/backend/opencl/core/runtime/OpenCLRuntime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ class OpenCLRuntime {
uint64_t GetKernelWaveSize(std::shared_ptr<KernelWrap> kernel);
std::vector<uint32_t> getMaxWorkItemSizes();
uint64_t getMaxLocalMem() const;
void setPrecision(const BackendConfig::PrecisionMode precision);
uint32_t getUseRecordableQueueSize(){
return mUseRecordableQueueSize;
}
Expand Down

0 comments on commit 62c2ec4

Please sign in to comment.