From 0b41cc338a734467edf69d2f8ba9545d9975667a Mon Sep 17 00:00:00 2001 From: "Spruit, Neil R" Date: Tue, 28 Nov 2023 15:37:23 -0800 Subject: [PATCH] [L0] Add support for multi-device kernel compilation Signed-off-by: Spruit, Neil R --- source/adapters/level_zero/kernel.cpp | 93 ++++++--- source/adapters/level_zero/kernel.hpp | 15 +- source/adapters/level_zero/program.cpp | 190 ++++++++++-------- source/adapters/level_zero/program.hpp | 9 + .../kernel/kernel_adapter_level_zero.match | 1 + 5 files changed, 195 insertions(+), 113 deletions(-) diff --git a/source/adapters/level_zero/kernel.cpp b/source/adapters/level_zero/kernel.cpp index dfa8915197..cea537b3fe 100644 --- a/source/adapters/level_zero/kernel.cpp +++ b/source/adapters/level_zero/kernel.cpp @@ -41,6 +41,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular kernel execution instance. ) { + auto ZeDevice = Queue->Device->ZeDevice; + + ze_kernel_handle_t ZeKernel{}; + if (Kernel->ZeKernelMap.empty()) { + ZeKernel = Kernel->ZeKernel; + } else { + auto It = Kernel->ZeKernelMap.find(ZeDevice); + ZeKernel = It->second; + } // Lock automatically releases when this goes out of scope. std::scoped_lock Lock( Queue->Mutex, Kernel->Mutex, Kernel->Program->Mutex); @@ -51,7 +60,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( } ZE2UR_CALL(zeKernelSetGlobalOffsetExp, - (Kernel->ZeKernel, GlobalWorkOffset[0], GlobalWorkOffset[1], + (ZeKernel, GlobalWorkOffset[0], GlobalWorkOffset[1], GlobalWorkOffset[2])); } @@ -65,7 +74,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( Queue->Device)); } ZE2UR_CALL(zeKernelSetArgumentValue, - (Kernel->ZeKernel, Arg.Index, Arg.Size, ZeHandlePtr)); + (ZeKernel, Arg.Index, Arg.Size, ZeHandlePtr)); } Kernel->PendingArguments.clear(); @@ -99,7 +108,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( } if (SuggestGroupSize) { ZE2UR_CALL(zeKernelSuggestGroupSize, - (Kernel->ZeKernel, GlobalWorkSize[0], GlobalWorkSize[1], + (ZeKernel, GlobalWorkSize[0], GlobalWorkSize[1], GlobalWorkSize[2], &WG[0], &WG[1], &WG[2])); } else { for (int I : {0, 1, 2}) { @@ -175,7 +184,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE; } - ZE2UR_CALL(zeKernelSetGroupSize, (Kernel->ZeKernel, WG[0], WG[1], WG[2])); + ZE2UR_CALL(zeKernelSetGroupSize, (ZeKernel, WG[0], WG[1], WG[2])); bool UseCopyEngine = false; _ur_ze_event_list_t TmpWaitList; @@ -227,18 +236,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( Queue->CaptureIndirectAccesses(); // Add the command to the command list, which implies submission. ZE2UR_CALL(zeCommandListAppendLaunchKernel, - (CommandList->first, Kernel->ZeKernel, &ZeThreadGroupDimensions, - ZeEvent, (*Event)->WaitList.Length, - (*Event)->WaitList.ZeEventList)); + (CommandList->first, ZeKernel, &ZeThreadGroupDimensions, ZeEvent, + (*Event)->WaitList.Length, (*Event)->WaitList.ZeEventList)); } else { // Add the command to the command list for later submission. // No lock is needed here, unlike the immediate commandlist case above, // because the kernels are not actually submitted yet. Kernels will be // submitted only when the comamndlist is closed. Then, a lock is held. ZE2UR_CALL(zeCommandListAppendLaunchKernel, - (CommandList->first, Kernel->ZeKernel, &ZeThreadGroupDimensions, - ZeEvent, (*Event)->WaitList.Length, - (*Event)->WaitList.ZeEventList)); + (CommandList->first, ZeKernel, &ZeThreadGroupDimensions, ZeEvent, + (*Event)->WaitList.Length, (*Event)->WaitList.ZeEventList)); } urPrint("calling zeCommandListAppendLaunchKernel() with" @@ -363,16 +370,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelCreate( return UR_RESULT_ERROR_INVALID_PROGRAM_EXECUTABLE; } - ZeStruct ZeKernelDesc; - ZeKernelDesc.flags = 0; - ZeKernelDesc.pKernelName = KernelName; - - ze_kernel_handle_t ZeKernel; - ZE2UR_CALL(zeKernelCreate, (Program->ZeModule, &ZeKernelDesc, &ZeKernel)); - try { - ur_kernel_handle_t_ *UrKernel = - new ur_kernel_handle_t_(ZeKernel, true, Program); + ur_kernel_handle_t_ *UrKernel = new ur_kernel_handle_t_(true, Program); *RetKernel = reinterpret_cast(UrKernel); } catch (const std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; @@ -380,6 +379,37 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelCreate( return UR_RESULT_ERROR_UNKNOWN; } + for (auto It : Program->ZeModuleMap) { + auto ZeModule = It.second; + ZeStruct ZeKernelDesc; + ZeKernelDesc.flags = 0; + ZeKernelDesc.pKernelName = KernelName; + + ze_kernel_handle_t ZeKernel; + ZE2UR_CALL(zeKernelCreate, (ZeModule, &ZeKernelDesc, &ZeKernel)); + + auto ZeDevice = It.first; + + // Store the kernel in the ZeKernelMap so the correct + // kernel can be retrieved later for a specific device + // where a queue is being submitted. + (*RetKernel)->ZeKernelMap[ZeDevice] = ZeKernel; + (*RetKernel)->ZeKernels.push_back(ZeKernel); + + // If the device used to create the module's kernel is a root-device + // then store the kernel also using the sub-devices, since application + // could submit the root-device's kernel to a sub-device's queue. + uint32_t SubDevicesCount = 0; + zeDeviceGetSubDevices(ZeDevice, &SubDevicesCount, nullptr); + std::vector ZeSubDevices(SubDevicesCount); + zeDeviceGetSubDevices(ZeDevice, &SubDevicesCount, ZeSubDevices.data()); + for (auto ZeSubDevice : ZeSubDevices) { + (*RetKernel)->ZeKernelMap[ZeSubDevice] = ZeKernel; + } + } + + (*RetKernel)->ZeKernel = (*RetKernel)->ZeKernelMap.begin()->second; + UR_CALL((*RetKernel)->initialize()); return UR_RESULT_SUCCESS; @@ -396,6 +426,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( ) { std::ignore = Properties; + UR_ASSERT(Kernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + // OpenCL: "the arg_value pointer can be NULL or point to a NULL value // in which case a NULL value will be used as the value for the argument // declared as a pointer to global or constant memory in the kernel" @@ -409,8 +441,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( } std::scoped_lock Guard(Kernel->Mutex); - ZE2UR_CALL(zeKernelSetArgumentValue, - (Kernel->ZeKernel, ArgIndex, ArgSize, PArgValue)); + for (auto It : Kernel->ZeKernelMap) { + auto ZeKernel = It.second; + ZE2UR_CALL(zeKernelSetArgumentValue, + (ZeKernel, ArgIndex, ArgSize, PArgValue)); + } return UR_RESULT_SUCCESS; } @@ -596,11 +631,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease( auto KernelProgram = Kernel->Program; if (Kernel->OwnNativeHandle) { - auto ZeResult = ZE_CALL_NOCHECK(zeKernelDestroy, (Kernel->ZeKernel)); - // Gracefully handle the case that L0 was already unloaded. - if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED) - return ze2urResult(ZeResult); + for (auto &ZeKernel : Kernel->ZeKernels) { + auto ZeResult = ZE_CALL_NOCHECK(zeKernelDestroy, (ZeKernel)); + // Gracefully handle the case that L0 was already unloaded. + if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED) + return ze2urResult(ZeResult); + } } + Kernel->ZeKernelMap.clear(); if (IndirectAccessTrackingEnabled) { UR_CALL(urContextRelease(KernelProgram->Context)); } @@ -639,6 +677,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetExecInfo( std::ignore = PropSize; std::ignore = Properties; + auto ZeKernel = Kernel->ZeKernel; std::scoped_lock Guard(Kernel->Mutex); if (PropName == UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS && *(static_cast(PropValue)) == true) { @@ -649,7 +688,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetExecInfo( ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST | ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE | ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED; - ZE2UR_CALL(zeKernelSetIndirectAccess, (Kernel->ZeKernel, IndirectFlags)); + ZE2UR_CALL(zeKernelSetIndirectAccess, (ZeKernel, IndirectFlags)); } else if (PropName == UR_KERNEL_EXEC_INFO_CACHE_CONFIG) { ze_cache_config_flag_t ZeCacheConfig{}; auto CacheConfig = @@ -663,7 +702,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetExecInfo( else // Unexpected cache configuration value. return UR_RESULT_ERROR_INVALID_VALUE; - ZE2UR_CALL(zeKernelSetCacheConfig, (Kernel->ZeKernel, ZeCacheConfig);); + ZE2UR_CALL(zeKernelSetCacheConfig, (ZeKernel, ZeCacheConfig);); } else { urPrint("urKernelSetExecInfo: unsupported ParamName\n"); return UR_RESULT_ERROR_INVALID_VALUE; diff --git a/source/adapters/level_zero/kernel.hpp b/source/adapters/level_zero/kernel.hpp index 4ef21ce18b..a6d85d2baa 100644 --- a/source/adapters/level_zero/kernel.hpp +++ b/source/adapters/level_zero/kernel.hpp @@ -14,10 +14,8 @@ #include struct ur_kernel_handle_t_ : _ur_object { - ur_kernel_handle_t_(ze_kernel_handle_t Kernel, bool OwnZeHandle, - ur_program_handle_t Program) - : Context{nullptr}, Program{Program}, ZeKernel{Kernel}, - SubmissionsCount{0}, MemAllocs{} { + ur_kernel_handle_t_(bool OwnZeHandle, ur_program_handle_t Program) + : Program{Program}, SubmissionsCount{0}, MemAllocs{} { OwnNativeHandle = OwnZeHandle; } @@ -37,6 +35,15 @@ struct ur_kernel_handle_t_ : _ur_object { // Level Zero function handle. ze_kernel_handle_t ZeKernel; + // Map of L0 kernels created for all the devices for which a UR Program + // has been built. It may contain duplicated kernel entries for a root + // device and its sub-devices. + std::unordered_map ZeKernelMap; + + // Vector of L0 kernels. Each entry is unique, so this is used for + // destroying the kernels instead of ZeKernelMap + std::vector ZeKernels; + // Counter to track the number of submissions of the kernel. // When this value is zero, it means that kernel is not submitted for an // execution - at this time we can release memory allocations referenced by diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index f118a5b9dd..0f385c6b37 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -167,48 +167,55 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp( ZeModuleDesc.pBuildFlags = ZeBuildOptions.c_str(); ZeModuleDesc.pConstants = Shim.ze(); + ur_result_t Result = UR_RESULT_SUCCESS; - ze_device_handle_t ZeDevice = phDevices[0]->ZeDevice; - ze_context_handle_t ZeContext = hProgram->Context->ZeContext; - std::ignore = numDevices; - ze_module_handle_t ZeModule = nullptr; + for (uint32_t i = 0; i < numDevices; i++) { + ze_device_handle_t ZeDevice = phDevices[i]->ZeDevice; + ze_context_handle_t ZeContext = hProgram->Context->ZeContext; + ze_module_handle_t ZeModuleHandle = nullptr; + ze_module_build_log_handle_t ZeBuildLog{}; - ur_result_t Result = UR_RESULT_SUCCESS; - hProgram->State = ur_program_handle_t_::Exe; - ze_result_t ZeResult = - ZE_CALL_NOCHECK(zeModuleCreate, (ZeContext, ZeDevice, &ZeModuleDesc, - &ZeModule, &hProgram->ZeBuildLog)); - if (ZeResult != ZE_RESULT_SUCCESS) { - // We adjust ur_program below to avoid attempting to release zeModule when - // RT calls urProgramRelease(). - hProgram->State = ur_program_handle_t_::Invalid; - Result = ze2urResult(ZeResult); - if (ZeModule) { - ZE_CALL_NOCHECK(zeModuleDestroy, (ZeModule)); - ZeModule = nullptr; - } - } else { - // The call to zeModuleCreate does not report an error if there are - // unresolved symbols because it thinks these could be resolved later via a - // call to zeModuleDynamicLink. However, modules created with - // urProgramBuild are supposed to be fully linked and ready to use. - // Therefore, do an extra check now for unresolved symbols. - ZeResult = checkUnresolvedSymbols(ZeModule, &hProgram->ZeBuildLog); + hProgram->State = ur_program_handle_t_::Exe; + ze_result_t ZeResult = + ZE_CALL_NOCHECK(zeModuleCreate, (ZeContext, ZeDevice, &ZeModuleDesc, + &ZeModuleHandle, &ZeBuildLog)); if (ZeResult != ZE_RESULT_SUCCESS) { + // We adjust ur_program below to avoid attempting to release zeModule when + // RT calls urProgramRelease(). hProgram->State = ur_program_handle_t_::Invalid; - Result = (ZeResult == ZE_RESULT_ERROR_MODULE_LINK_FAILURE) - ? UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE - : ze2urResult(ZeResult); - if (ZeModule) { - ZE_CALL_NOCHECK(zeModuleDestroy, (ZeModule)); - ZeModule = nullptr; + Result = ze2urResult(ZeResult); + if (ZeModuleHandle) { + ZE_CALL_NOCHECK(zeModuleDestroy, (ZeModuleHandle)); + ZeModuleHandle = nullptr; + } + } else { + // The call to zeModuleCreate does not report an error if there are + // unresolved symbols because it thinks these could be resolved later via + // a call to zeModuleDynamicLink. However, modules created with + // urProgramBuild are supposed to be fully linked and ready to use. + // Therefore, do an extra check now for unresolved symbols. + ZeResult = checkUnresolvedSymbols(ZeModuleHandle, &ZeBuildLog); + if (ZeResult != ZE_RESULT_SUCCESS) { + hProgram->State = ur_program_handle_t_::Invalid; + Result = (ZeResult == ZE_RESULT_ERROR_MODULE_LINK_FAILURE) + ? UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE + : ze2urResult(ZeResult); + if (ZeModuleHandle) { + ZE_CALL_NOCHECK(zeModuleDestroy, (ZeModuleHandle)); + ZeModuleHandle = nullptr; + } } + hProgram->ZeModuleMap.insert(std::make_pair(ZeDevice, ZeModuleHandle)); + hProgram->ZeBuildLogMap.insert(std::make_pair(ZeDevice, ZeBuildLog)); } } // We no longer need the IL / native code. hProgram->Code.reset(); - hProgram->ZeModule = ZeModule; + if (!hProgram->ZeModuleMap.empty()) + hProgram->ZeModule = hProgram->ZeModuleMap.begin()->second; + if (!hProgram->ZeBuildLogMap.empty()) + hProgram->ZeBuildLog = hProgram->ZeBuildLogMap.begin()->second; return Result; } @@ -292,9 +299,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( ur_program_handle_t *phProgram ///< [out] pointer to handle of program object created. ) { - std::ignore = numDevices; - UR_ASSERT(hContext->isValidDevice(phDevices[0]), - UR_RESULT_ERROR_INVALID_DEVICE); + for (uint32_t i = 0; i < numDevices; i++) { + UR_ASSERT(hContext->isValidDevice(phDevices[i]), + UR_RESULT_ERROR_INVALID_DEVICE); + } // We do not support any link flags at this time because the Level Zero API // does not have any way to pass flags that are specific to linking. @@ -402,49 +410,60 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( return UR_RESULT_ERROR_INVALID_VALUE; } } - - // Call the Level Zero API to compile, link, and create the module. - ze_device_handle_t ZeDevice = phDevices[0]->ZeDevice; - ze_context_handle_t ZeContext = hContext->ZeContext; - ze_module_handle_t ZeModule = nullptr; - ze_module_build_log_handle_t ZeBuildLog = nullptr; - ze_result_t ZeResult = - ZE_CALL_NOCHECK(zeModuleCreate, (ZeContext, ZeDevice, &ZeModuleDesc, - &ZeModule, &ZeBuildLog)); - - // We still create a ur_program_handle_t_ object even if there is a - // BUILD_FAILURE because we need the object to hold the ZeBuildLog. There - // is no build log created for other errors, so we don't create an object. - UrResult = ze2urResult(ZeResult); - if (ZeResult != ZE_RESULT_SUCCESS && - ZeResult != ZE_RESULT_ERROR_MODULE_BUILD_FAILURE) { - return ze2urResult(ZeResult); - } - - // The call to zeModuleCreate does not report an error if there are - // unresolved symbols because it thinks these could be resolved later via a - // call to zeModuleDynamicLink. However, modules created with piProgramLink - // are supposed to be fully linked and ready to use. Therefore, do an extra - // check now for unresolved symbols. Note that we still create a - // ur_program_handle_t_ if there are unresolved symbols because the - // ZeBuildLog tells which symbols are unresolved. - if (ZeResult == ZE_RESULT_SUCCESS) { - ZeResult = checkUnresolvedSymbols(ZeModule, &ZeBuildLog); - if (ZeResult == ZE_RESULT_ERROR_MODULE_LINK_FAILURE) { - UrResult = - UR_RESULT_ERROR_UNKNOWN; // TODO: - // UR_RESULT_ERROR_PROGRAM_LINK_FAILURE; - } else if (ZeResult != ZE_RESULT_SUCCESS) { + std::unordered_map ZeModuleMap; + std::unordered_map + ZeBuildLogMap; + + for (uint32_t i = 0; i < numDevices; i++) { + + // Call the Level Zero API to compile, link, and create the module. + ze_device_handle_t ZeDevice = phDevices[i]->ZeDevice; + ze_context_handle_t ZeContext = hContext->ZeContext; + ze_module_handle_t ZeModule = nullptr; + ze_module_build_log_handle_t ZeBuildLog = nullptr; + ze_result_t ZeResult = + ZE_CALL_NOCHECK(zeModuleCreate, (ZeContext, ZeDevice, &ZeModuleDesc, + &ZeModule, &ZeBuildLog)); + + // We still create a ur_program_handle_t_ object even if there is a + // BUILD_FAILURE because we need the object to hold the ZeBuildLog. There + // is no build log created for other errors, so we don't create an object. + UrResult = ze2urResult(ZeResult); + if (ZeResult != ZE_RESULT_SUCCESS && + ZeResult != ZE_RESULT_ERROR_MODULE_BUILD_FAILURE) { return ze2urResult(ZeResult); } + + // The call to zeModuleCreate does not report an error if there are + // unresolved symbols because it thinks these could be resolved later via + // a call to zeModuleDynamicLink. However, modules created with + // piProgramLink are supposed to be fully linked and ready to use. + // Therefore, do an extra check now for unresolved symbols. Note that we + // still create a ur_program_handle_t_ if there are unresolved symbols + // because the ZeBuildLog tells which symbols are unresolved. + if (ZeResult == ZE_RESULT_SUCCESS) { + ZeResult = checkUnresolvedSymbols(ZeModule, &ZeBuildLog); + if (ZeResult == ZE_RESULT_ERROR_MODULE_LINK_FAILURE) { + UrResult = + UR_RESULT_ERROR_UNKNOWN; // TODO: + // UR_RESULT_ERROR_PROGRAM_LINK_FAILURE; + } else if (ZeResult != ZE_RESULT_SUCCESS) { + return ze2urResult(ZeResult); + } + } + ZeModuleMap.insert(std::make_pair(ZeDevice, ZeModule)); + ZeBuildLogMap.insert(std::make_pair(ZeDevice, ZeBuildLog)); } ur_program_handle_t_::state State = (UrResult == UR_RESULT_SUCCESS) ? ur_program_handle_t_::Exe : ur_program_handle_t_::Invalid; ur_program_handle_t_ *UrProgram = - new ur_program_handle_t_(State, hContext, ZeModule, ZeBuildLog); + new ur_program_handle_t_(State, hContext, ZeModuleMap.begin()->second, + ZeBuildLogMap.begin()->second); *phProgram = reinterpret_cast(UrProgram); + (*phProgram)->ZeModuleMap = ZeModuleMap; + (*phProgram)->ZeBuildLogMap = ZeBuildLogMap; } catch (const std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } catch (...) { @@ -715,23 +734,27 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetBuildInfo( } // Next check if there is a Level Zero build log. - if (Program->ZeBuildLog) { + if (Program->ZeBuildLogMap.find(Device->ZeDevice) != + Program->ZeBuildLogMap.end()) { + ze_module_build_log_handle_t ZeBuildLog = + Program->ZeBuildLogMap.begin()->second; size_t LogSize = PropSize; ZE2UR_CALL(zeModuleBuildLogGetString, - (Program->ZeBuildLog, &LogSize, ur_cast(PropValue))); + (ZeBuildLog, &LogSize, ur_cast(PropValue))); if (PropSizeRet) { *PropSizeRet = LogSize; } if (PropValue) { - // When the program build fails in urProgramBuild(), we delayed cleaning - // up the build log because RT later calls this routine to get the - // failed build log. - // To avoid memory leaks, we should clean up the failed build log here - // because RT does not create sycl::program when urProgramBuild() fails, - // thus it won't call urProgramRelease() to clean up the build log. + // When the program build fails in urProgramBuild(), we delayed + // cleaning up the build log because RT later calls this routine to + // get the failed build log. To avoid memory leaks, we should clean up + // the failed build log here because RT does not create sycl::program + // when urProgramBuild() fails, thus it won't call urProgramRelease() + // to clean up the build log. if (Program->State == ur_program_handle_t_::Invalid) { - ZE_CALL_NOCHECK(zeModuleBuildLogDestroy, (Program->ZeBuildLog)); - Program->ZeBuildLog = nullptr; + ZE_CALL_NOCHECK(zeModuleBuildLogDestroy, (ZeBuildLog)); + Program->ZeBuildLogMap.erase(Device->ZeDevice); + ZeBuildLog = nullptr; } } return UR_RESULT_SUCCESS; @@ -817,12 +840,15 @@ ur_program_handle_t_::~ur_program_handle_t_() { // According to Level Zero Specification, all kernels and build logs // must be destroyed before the Module can be destroyed. So, be sure // to destroy build log before destroying the module. - if (ZeBuildLog) { - ZE_CALL_NOCHECK(zeModuleBuildLogDestroy, (ZeBuildLog)); + for (auto &ZeBuildLogPair : this->ZeBuildLogMap) { + ZE_CALL_NOCHECK(zeModuleBuildLogDestroy, (ZeBuildLogPair.second)); } if (ZeModule && OwnZeModule) { - ZE_CALL_NOCHECK(zeModuleDestroy, (ZeModule)); + for (auto &ZeModulePair : this->ZeModuleMap) { + ZE_CALL_NOCHECK(zeModuleDestroy, (ZeModulePair.second)); + } + this->ZeModuleMap.clear(); } } diff --git a/source/adapters/level_zero/program.hpp b/source/adapters/level_zero/program.hpp index 5aa6ff89a3..1cb233dd45 100644 --- a/source/adapters/level_zero/program.hpp +++ b/source/adapters/level_zero/program.hpp @@ -131,6 +131,15 @@ struct ur_program_handle_t_ : _ur_object { // The Level Zero module handle. Used primarily in Exe state. ze_module_handle_t ZeModule{}; + // Map of L0 Modules created for all the devices for which a UR Program + // has been built. + std::unordered_map ZeModuleMap; + // The Level Zero build log from the last call to zeModuleCreate(). ze_module_build_log_handle_t ZeBuildLog{}; + + // Map of L0 Module Build logs created for all the devices for which a UR + // Program has been built. + std::unordered_map + ZeBuildLogMap; }; diff --git a/test/conformance/kernel/kernel_adapter_level_zero.match b/test/conformance/kernel/kernel_adapter_level_zero.match index 2668b6821a..8194c7ddad 100644 --- a/test/conformance/kernel/kernel_adapter_level_zero.match +++ b/test/conformance/kernel/kernel_adapter_level_zero.match @@ -11,6 +11,7 @@ urKernelSetArgMemObjTest.InvalidKernelArgumentIndex/Intel_R__oneAPI_Unified_Runt urKernelSetArgPointerTest.SuccessHost/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urKernelSetArgPointerTest.SuccessDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urKernelSetArgPointerTest.SuccessShared/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetArgPointerNegativeTest.InvalidNullHandleKernel/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urKernelSetArgPointerNegativeTest.InvalidKernelArgumentIndex/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urKernelSetArgSamplerTest.InvalidKernelArgumentIndex/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urKernelSetArgValueTest.InvalidKernelArgumentIndex/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_