From 77b705dccd4dfe59885b6226456b2d3f803c57ec Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Wed, 18 Oct 2023 14:58:42 +0100 Subject: [PATCH 01/20] [OpenCL] Implement urEventSetCallback and urContextSetExtendedDeleter. --- source/adapters/opencl/context.cpp | 57 ++++++++++++++++++++++-- source/adapters/opencl/enqueue.cpp | 8 ++-- source/adapters/opencl/event.cpp | 69 +++++++++++++++++++++++++++--- 3 files changed, 121 insertions(+), 13 deletions(-) diff --git a/source/adapters/opencl/context.cpp b/source/adapters/opencl/context.cpp index 16c5999160..6bc05c2003 100644 --- a/source/adapters/opencl/context.cpp +++ b/source/adapters/opencl/context.cpp @@ -10,6 +10,10 @@ #include "context.hpp" +#include +#include +#include + ur_result_t cl_adapter::getDevicesFromContext( ur_context_handle_t hContext, std::unique_ptr> &DevicesInCtx) { @@ -130,8 +134,53 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle( } UR_APIEXPORT ur_result_t UR_APICALL urContextSetExtendedDeleter( - [[maybe_unused]] ur_context_handle_t hContext, - [[maybe_unused]] ur_context_extended_deleter_t pfnDeleter, - [[maybe_unused]] void *pUserData) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + ur_context_handle_t hContext, ur_context_extended_deleter_t pfnDeleter, + void *pUserData) { + static std::unordered_map> + ContextCallbackMap; + static std::mutex ContextCallbackMutex; + + { + std::lock_guard Lock(ContextCallbackMutex); + // Callbacks can only be registered once and we need to avoid double + // allocating. + if (ContextCallbackMap.count(hContext) && + ContextCallbackMap[hContext].count(pfnDeleter)) { + return UR_RESULT_SUCCESS; + } + + ContextCallbackMap[hContext].insert(pfnDeleter); + } + + struct ContextCallback { + void execute() { + pfnDeleter(pUserData); + { + std::lock_guard Lock(*CallbackMutex); + (*CallbackMap)[hContext].erase(pfnDeleter); + if ((*CallbackMap)[hContext].empty()) { + CallbackMap->erase(hContext); + } + } + delete this; + } + ur_context_handle_t hContext; + ur_context_extended_deleter_t pfnDeleter; + void *pUserData; + std::unordered_map> *CallbackMap; + std::mutex *CallbackMutex; + }; + auto Callback = + new ContextCallback({hContext, pfnDeleter, pUserData, &ContextCallbackMap, + &ContextCallbackMutex}); + auto ClCallback = [](cl_context, void *pUserData) { + auto *C = static_cast(pUserData); + C->execute(); + }; + CL_RETURN_ON_FAILURE(clSetContextDestructorCallback( + cl_adapter::cast(hContext), ClCallback, Callback)); + + return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index 29c5ad672e..5f41878182 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -350,9 +350,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( return mapCLErrorToUR(CLErr); } - clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr; + cl_ext::clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr; ur_result_t RetVal = - cl_ext::getExtFuncFromContext( + cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clEnqueueReadHostPipeINTELCache, cl_ext::EnqueueReadHostPipeName, &FuncPtr); @@ -382,9 +382,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( return mapCLErrorToUR(CLErr); } - clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr; + cl_ext::clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr; ur_result_t RetVal = - cl_ext::getExtFuncFromContext( + cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clEnqueueWriteHostPipeINTELCache, cl_ext::EnqueueWriteHostPipeName, &FuncPtr); diff --git a/source/adapters/opencl/event.cpp b/source/adapters/opencl/event.cpp index 78303a0829..1d75fa7f28 100644 --- a/source/adapters/opencl/event.cpp +++ b/source/adapters/opencl/event.cpp @@ -10,6 +10,10 @@ #include "common.hpp" +#include +#include +#include + cl_event_info convertUREventInfoToCL(const ur_event_info_t PropName) { switch (PropName) { case UR_EVENT_INFO_COMMAND_QUEUE: @@ -128,9 +132,64 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( UR_APIEXPORT ur_result_t UR_APICALL urEventSetCallback(ur_event_handle_t hEvent, ur_execution_info_t execStatus, ur_event_callback_t pfnNotify, void *pUserData) { - std::ignore = hEvent; - std::ignore = execStatus; - std::ignore = pfnNotify; - std::ignore = pUserData; - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + static std::unordered_map> + EventCallbackMap; + static std::mutex EventCallbackMutex; + + { + std::lock_guard Lock(EventCallbackMutex); + // Callbacks can only be registered once and we need to avoid double + // allocating. + if (EventCallbackMap.count(hEvent) && + EventCallbackMap[hEvent].count(pfnNotify)) { + return UR_RESULT_SUCCESS; + } + + EventCallbackMap[hEvent].insert(pfnNotify); + } + + cl_int CallbackType = 0; + switch (execStatus) { + case UR_EXECUTION_INFO_EXECUTION_INFO_SUBMITTED: + CallbackType = CL_SUBMITTED; + break; + case UR_EXECUTION_INFO_EXECUTION_INFO_RUNNING: + CallbackType = CL_RUNNING; + break; + case UR_EXECUTION_INFO_EXECUTION_INFO_COMPLETE: + CallbackType = CL_COMPLETE; + break; + default: + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + struct EventCallback { + void execute() { + pfnNotify(hEvent, execStatus, pUserData); + { + std::lock_guard Lock(*CallbackMutex); + (*CallbackMap)[hEvent].erase(pfnNotify); + if ((*CallbackMap)[hEvent].empty()) { + CallbackMap->erase(hEvent); + } + } + delete this; + } + ur_event_handle_t hEvent; + ur_execution_info_t execStatus; + ur_event_callback_t pfnNotify; + void *pUserData; + std::unordered_map> + *CallbackMap; + std::mutex *CallbackMutex; + }; + auto Callback = new EventCallback({hEvent, execStatus, pfnNotify, pUserData, + &EventCallbackMap, &EventCallbackMutex}); + auto ClCallback = [](cl_event, cl_int, void *pUserData) { + auto *C = static_cast(pUserData); + C->execute(); + }; + CL_RETURN_ON_FAILURE(clSetEventCallback(cl_adapter::cast(hEvent), + CallbackType, ClCallback, Callback)); + return UR_RESULT_SUCCESS; } From 2792092121d2f911d9c90517ee89d8ab29552e9d Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Fri, 20 Oct 2023 09:57:12 +0100 Subject: [PATCH 02/20] [OpenCL] Add more mappings from CL error codes to UR error codes. Also merge urQueueCreate InvalidValueProperties test into InvalidQueueProperties test. --- source/adapters/opencl/common.cpp | 17 +++++++++++++++++ test/conformance/queue/urQueueCreate.cpp | 19 ++++++++----------- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/source/adapters/opencl/common.cpp b/source/adapters/opencl/common.cpp index 2b0e7b6a27..77a51694dd 100644 --- a/source/adapters/opencl/common.cpp +++ b/source/adapters/opencl/common.cpp @@ -60,6 +60,23 @@ ur_result_t mapCLErrorToUR(cl_int Result) { return UR_RESULT_ERROR_OUT_OF_RESOURCES; case CL_INVALID_MEM_OBJECT: return UR_RESULT_ERROR_INVALID_MEM_OBJECT; + case CL_INVALID_QUEUE_PROPERTIES: + return UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES; + case CL_INVALID_BUFFER_SIZE: + return UR_RESULT_ERROR_INVALID_BUFFER_SIZE; + case CL_INVALID_IMAGE_SIZE: + return UR_RESULT_ERROR_INVALID_IMAGE_SIZE; + case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: + case CL_INVALID_IMAGE_DESCRIPTOR: + return UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR; + case CL_IMAGE_FORMAT_NOT_SUPPORTED: + return UR_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT; + case CL_PROFILING_INFO_NOT_AVAILABLE: + return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE; + case CL_LINK_PROGRAM_FAILURE: + return UR_RESULT_ERROR_PROGRAM_LINK_FAILURE; + case CL_INVALID_ARG_INDEX: + return UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX; default: return UR_RESULT_ERROR_UNKNOWN; } diff --git a/test/conformance/queue/urQueueCreate.cpp b/test/conformance/queue/urQueueCreate.cpp index 0f99009abd..90813b20a5 100644 --- a/test/conformance/queue/urQueueCreate.cpp +++ b/test/conformance/queue/urQueueCreate.cpp @@ -65,26 +65,23 @@ TEST_P(urQueueCreateTest, InvalidNullPointerQueue) { urQueueCreate(context, device, 0, nullptr)); } -TEST_P(urQueueCreateTest, InvalidValueProperties) { - ur_queue_handle_t queue = nullptr; +TEST_P(urQueueCreateTest, InvalidQueueProperties) { ur_queue_properties_t props = { /*.stype =*/UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, /*.pNext =*/nullptr, /*.flags =*/UR_QUEUE_FLAG_FORCE_UINT32, }; - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_VALUE, - urQueueCreate(context, device, &props, &queue)); -} -TEST_P(urQueueCreateTest, InvalidQueueProperties) { - ur_queue_properties_t props = { - /*.stype =*/UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, - /*.pNext =*/nullptr, - /*.flags =*/UR_QUEUE_FLAG_PRIORITY_HIGH | UR_QUEUE_FLAG_PRIORITY_LOW, - }; + // Initial value is just not a valid enum + { + ur_queue_handle_t queue = nullptr; + ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES, + urQueueCreate(context, device, &props, &queue)); + } // It should be an error to specify both low/high priorities { ur_queue_handle_t queue = nullptr; + props.flags = UR_QUEUE_FLAG_PRIORITY_HIGH | UR_QUEUE_FLAG_PRIORITY_LOW; ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES, urQueueCreate(context, device, &props, &queue)); } From 16e28e289d1ff28fee3036ef5f31fc78a246ada0 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Fri, 20 Oct 2023 10:31:36 +0100 Subject: [PATCH 03/20] Update match files to reflect removed test case --- test/conformance/queue/queue_adapter_cuda.match | 1 - test/conformance/queue/queue_adapter_hip.match | 1 - test/conformance/queue/queue_adapter_level_zero.match | 1 - 3 files changed, 3 deletions(-) diff --git a/test/conformance/queue/queue_adapter_cuda.match b/test/conformance/queue/queue_adapter_cuda.match index 3b2f27c1d6..f7967fb388 100644 --- a/test/conformance/queue/queue_adapter_cuda.match +++ b/test/conformance/queue/queue_adapter_cuda.match @@ -1,4 +1,3 @@ -urQueueCreateTest.InvalidValueProperties/NVIDIA_CUDA_BACKEND___{{.*}}_ urQueueCreateTest.InvalidQueueProperties/NVIDIA_CUDA_BACKEND___{{.*}}_ urQueueCreateWithNativeHandleTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ urQueueGetInfoTestWithInfoParam.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_QUEUE_INFO_DEVICE_DEFAULT diff --git a/test/conformance/queue/queue_adapter_hip.match b/test/conformance/queue/queue_adapter_hip.match index 6cce588dc4..16166a827c 100644 --- a/test/conformance/queue/queue_adapter_hip.match +++ b/test/conformance/queue/queue_adapter_hip.match @@ -1,4 +1,3 @@ -urQueueCreateTest.InvalidValueProperties/AMD_HIP_BACKEND___{{.*}}_ urQueueCreateTest.InvalidQueueProperties/AMD_HIP_BACKEND___{{.*}}_ urQueueCreateWithParamTest.SuccessWithProperties/AMD_HIP_BACKEND___{{.*}}___UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE urQueueCreateWithParamTest.SuccessWithProperties/AMD_HIP_BACKEND___{{.*}}___UR_QUEUE_FLAG_PROFILING_ENABLE diff --git a/test/conformance/queue/queue_adapter_level_zero.match b/test/conformance/queue/queue_adapter_level_zero.match index 0013d5b397..9ceebd4233 100644 --- a/test/conformance/queue/queue_adapter_level_zero.match +++ b/test/conformance/queue/queue_adapter_level_zero.match @@ -1,3 +1,2 @@ -urQueueCreateTest.InvalidValueProperties/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urQueueCreateTest.InvalidQueueProperties/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ {{Segmentation fault|Aborted}} From 5c8a86bc8659f646135b7dfc548a29c2290ae8eb Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Fri, 20 Oct 2023 11:57:36 +0100 Subject: [PATCH 04/20] [OpenCL] Fix some unchecked dereferencing of optional params. --- source/adapters/opencl/kernel.cpp | 3 ++- source/adapters/opencl/memory.cpp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index 80b1502854..289ddd81fd 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -199,7 +199,8 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, } } - *(static_cast(pPropValue)) = static_cast(RetVal); + if (pPropValue) + *(static_cast(pPropValue)) = static_cast(RetVal); if (pPropSizeRet) *pPropSizeRet = sizeof(uint32_t); diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index 279faad376..ee3c502006 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -268,9 +268,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( } } + void *HostPtr = pProperties ? pProperties->pHost : nullptr; *phBuffer = reinterpret_cast(clCreateBuffer( cl_adapter::cast(hContext), static_cast(flags), - size, pProperties->pHost, cl_adapter::cast(&RetErr))); + size, HostPtr, cl_adapter::cast(&RetErr))); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; From f2be82325d4c1ccde957899f3d3635bca274396b Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Thu, 26 Oct 2023 12:27:39 -0700 Subject: [PATCH 05/20] [UR][L0] Propagate errors from `USMAllocationMakeResident` This change ensures that USM allocation APIs don't return `UR_RESULT_SUCCESS` when an error occurs within `USMAllocationMakeResident`. Signed-off-by: Michael Aziz --- source/adapters/level_zero/usm.cpp | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp index d06a0353e4..d75f3872b1 100644 --- a/source/adapters/level_zero/usm.cpp +++ b/source/adapters/level_zero/usm.cpp @@ -192,9 +192,8 @@ static ur_result_t USMDeviceAllocImpl(void **ResultPtr, reinterpret_cast(*ResultPtr) % Alignment == 0, UR_RESULT_ERROR_INVALID_VALUE); - USMAllocationMakeResident(USMDeviceAllocationForceResidency, Context, Device, - *ResultPtr, Size); - return UR_RESULT_SUCCESS; + return USMAllocationMakeResident(USMDeviceAllocationForceResidency, Context, + Device, *ResultPtr, Size); } static ur_result_t USMSharedAllocImpl(void **ResultPtr, @@ -225,11 +224,9 @@ static ur_result_t USMSharedAllocImpl(void **ResultPtr, reinterpret_cast(*ResultPtr) % Alignment == 0, UR_RESULT_ERROR_INVALID_VALUE); - USMAllocationMakeResident(USMSharedAllocationForceResidency, Context, Device, - *ResultPtr, Size); - // TODO: Handle PI_MEM_ALLOC_DEVICE_READ_ONLY. - return UR_RESULT_SUCCESS; + return USMAllocationMakeResident(USMSharedAllocationForceResidency, Context, + Device, *ResultPtr, Size); } static ur_result_t USMHostAllocImpl(void **ResultPtr, @@ -247,9 +244,8 @@ static ur_result_t USMHostAllocImpl(void **ResultPtr, reinterpret_cast(*ResultPtr) % Alignment == 0, UR_RESULT_ERROR_INVALID_VALUE); - USMAllocationMakeResident(USMHostAllocationForceResidency, Context, nullptr, - *ResultPtr, Size); - return UR_RESULT_SUCCESS; + return USMAllocationMakeResident(USMHostAllocationForceResidency, Context, + nullptr, *ResultPtr, Size); } UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc( From f056f97fde9cbe501ea703b1112110c8fa4ed768 Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Mon, 30 Oct 2023 14:29:20 -0700 Subject: [PATCH 06/20] Fix error propagation Signed-off-by: Michael Aziz --- source/adapters/level_zero/usm.cpp | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp index d75f3872b1..51e86bb65f 100644 --- a/source/adapters/level_zero/usm.cpp +++ b/source/adapters/level_zero/usm.cpp @@ -192,8 +192,13 @@ static ur_result_t USMDeviceAllocImpl(void **ResultPtr, reinterpret_cast(*ResultPtr) % Alignment == 0, UR_RESULT_ERROR_INVALID_VALUE); - return USMAllocationMakeResident(USMDeviceAllocationForceResidency, Context, - Device, *ResultPtr, Size); + auto Result = USMAllocationMakeResident(USMDeviceAllocationForceResidency, + Context, Device, *ResultPtr, Size); + if (Result == UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY || + Result == UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY) { + return Result; + } + return UR_RESULT_SUCCESS; } static ur_result_t USMSharedAllocImpl(void **ResultPtr, @@ -224,9 +229,15 @@ static ur_result_t USMSharedAllocImpl(void **ResultPtr, reinterpret_cast(*ResultPtr) % Alignment == 0, UR_RESULT_ERROR_INVALID_VALUE); + auto Result = USMAllocationMakeResident(USMSharedAllocationForceResidency, + Context, Device, *ResultPtr, Size); + if (Result == UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY || + Result == UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY) { + return Result; + } + // TODO: Handle PI_MEM_ALLOC_DEVICE_READ_ONLY. - return USMAllocationMakeResident(USMSharedAllocationForceResidency, Context, - Device, *ResultPtr, Size); + return UR_RESULT_SUCCESS; } static ur_result_t USMHostAllocImpl(void **ResultPtr, @@ -244,8 +255,13 @@ static ur_result_t USMHostAllocImpl(void **ResultPtr, reinterpret_cast(*ResultPtr) % Alignment == 0, UR_RESULT_ERROR_INVALID_VALUE); - return USMAllocationMakeResident(USMHostAllocationForceResidency, Context, - nullptr, *ResultPtr, Size); + auto Result = USMAllocationMakeResident(USMHostAllocationForceResidency, + Context, nullptr, *ResultPtr, Size); + if (Result == UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY || + Result == UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY) { + return Result; + } + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc( From bfb3daccc9d88b9484d0544ce8a8d35fd6385234 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Wed, 18 Oct 2023 17:20:36 +0100 Subject: [PATCH 07/20] [OpenCL] Implement urEnqueueUSMMemcpy2D and allow large fill patterns. Normally OpenCL limits fill type operations to a max pattern size of 128, this patch includes a workaround to extend that. --- source/adapters/opencl/enqueue.cpp | 49 ++++++++-- source/adapters/opencl/usm.cpp | 144 ++++++++++++++++++++++++----- 2 files changed, 165 insertions(+), 28 deletions(-) diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index 29c5ad672e..ab5126c53f 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -178,12 +178,47 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( size_t patternSize, size_t offset, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + // CL FillBuffer only allows pattern sizes up to the largest CL type: + // long16/double16 + if (patternSize <= 128) { + CL_RETURN_ON_FAILURE( + clEnqueueFillBuffer(cl_adapter::cast(hQueue), + cl_adapter::cast(hBuffer), pPattern, + patternSize, offset, size, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + return UR_RESULT_SUCCESS; + } + + auto NumValues = size / sizeof(uint64_t); + auto HostBuffer = new uint64_t[NumValues]; + auto NumChunks = patternSize / sizeof(uint64_t); + for (size_t i = 0; i < NumValues; i++) { + HostBuffer[i] = static_cast(pPattern)[i % NumChunks]; + } - CL_RETURN_ON_FAILURE(clEnqueueFillBuffer( + cl_event WriteEvent = nullptr; + auto ClErr = clEnqueueWriteBuffer( cl_adapter::cast(hQueue), - cl_adapter::cast(hBuffer), pPattern, patternSize, offset, size, + cl_adapter::cast(hBuffer), false, offset, size, HostBuffer, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + &WriteEvent); + if (ClErr != CL_SUCCESS) { + delete[] HostBuffer; + CL_RETURN_ON_FAILURE(ClErr); + } + + auto DeleteCallback = [](cl_event, cl_int, void *pUserData) { + delete[] static_cast(pUserData); + }; + CL_RETURN_ON_FAILURE( + clSetEventCallback(WriteEvent, CL_COMPLETE, DeleteCallback, HostBuffer)); + + if (phEvent) { + *phEvent = cl_adapter::cast(WriteEvent); + } else { + CL_RETURN_ON_FAILURE(clReleaseEvent(WriteEvent)); + } return UR_RESULT_SUCCESS; } @@ -350,9 +385,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( return mapCLErrorToUR(CLErr); } - clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr; + cl_ext::clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr; ur_result_t RetVal = - cl_ext::getExtFuncFromContext( + cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clEnqueueReadHostPipeINTELCache, cl_ext::EnqueueReadHostPipeName, &FuncPtr); @@ -382,9 +417,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( return mapCLErrorToUR(CLErr); } - clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr; + cl_ext::clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr; ur_result_t RetVal = - cl_ext::getExtFuncFromContext( + cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clEnqueueWriteHostPipeINTELCache, cl_ext::EnqueueWriteHostPipeName, &FuncPtr); diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index afa22ffbb9..d6008d51e7 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -197,7 +197,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( ur_queue_handle_t hQueue, void *ptr, size_t patternSize, const void *pPattern, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - // Have to look up the context from the kernel cl_context CLContext; cl_int CLErr = clGetCommandQueueInfo( @@ -207,20 +206,82 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( return mapCLErrorToUR(CLErr); } - clEnqueueMemFillINTEL_fn FuncPtr = nullptr; - ur_result_t RetVal = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemFillINTELCache, - cl_ext::EnqueueMemFillName, &FuncPtr); + if (patternSize <= 128) { + clEnqueueMemFillINTEL_fn EnqueueMemFill = nullptr; + UR_RETURN_ON_FAILURE( + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemFillINTELCache, + cl_ext::EnqueueMemFillName, &EnqueueMemFill)); + + CL_RETURN_ON_FAILURE( + EnqueueMemFill(cl_adapter::cast(hQueue), ptr, + pPattern, patternSize, size, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + return UR_RESULT_SUCCESS; + } - if (FuncPtr) { - RetVal = mapCLErrorToUR( - FuncPtr(cl_adapter::cast(hQueue), ptr, pPattern, - patternSize, size, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + // OpenCL only supports pattern sizes as large as the largest CL type + // (double16/long16 - 128 bytes), anything larger we need to do on the host + // side and copy it into the target allocation. + clHostMemAllocINTEL_fn HostMemAlloc = nullptr; + UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, + cl_ext::HostMemAllocName, &HostMemAlloc)); + + clEnqueueMemcpyINTEL_fn USMMemcpy = nullptr; + UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemcpyINTELCache, + cl_ext::EnqueueMemcpyName, &USMMemcpy)); + + clMemBlockingFreeINTEL_fn USMFree = nullptr; + UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clMemBlockingFreeINTELCache, + cl_ext::MemBlockingFreeName, &USMFree)); + + cl_int ClErr = CL_SUCCESS; + auto HostBuffer = static_cast( + HostMemAlloc(CLContext, nullptr, size, 0, &ClErr)); + CL_RETURN_ON_FAILURE(ClErr); + + auto NumValues = size / sizeof(uint64_t); + auto NumChunks = patternSize / sizeof(uint64_t); + for (size_t i = 0; i < NumValues; i++) { + HostBuffer[i] = static_cast(pPattern)[i % NumChunks]; } - return RetVal; + cl_event CopyEvent = nullptr; + CL_RETURN_ON_FAILURE(USMMemcpy( + cl_adapter::cast(hQueue), false, ptr, HostBuffer, size, + numEventsInWaitList, cl_adapter::cast(phEventWaitList), + &CopyEvent)); + + struct DeleteCallbackInfo { + clMemBlockingFreeINTEL_fn USMFree; + cl_context CLContext; + void *HostBuffer; + void execute() { + USMFree(CLContext, HostBuffer); + delete this; + } + }; + + auto Info = new DeleteCallbackInfo{USMFree, CLContext, HostBuffer}; + + auto DeleteCallback = [](cl_event, cl_int, void *pUserData) { + static_cast(pUserData)->execute(); + }; + + CL_RETURN_ON_FAILURE( + clSetEventCallback(CopyEvent, CL_COMPLETE, DeleteCallback, Info)); + + if (phEvent) { + *phEvent = cl_adapter::cast(CopyEvent); + } else { + CL_RETURN_ON_FAILURE(clReleaseEvent(CopyEvent)); + } + + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( @@ -343,18 +404,59 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( [[maybe_unused]] uint32_t numEventsInWaitList, [[maybe_unused]] const ur_event_handle_t *phEventWaitList, [[maybe_unused]] ur_event_handle_t *phEvent) { - return UR_RESULT_ERROR_INVALID_OPERATION; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( - [[maybe_unused]] ur_queue_handle_t hQueue, [[maybe_unused]] bool blocking, - [[maybe_unused]] void *pDst, [[maybe_unused]] size_t dstPitch, - [[maybe_unused]] const void *pSrc, [[maybe_unused]] size_t srcPitch, - [[maybe_unused]] size_t width, [[maybe_unused]] size_t height, - [[maybe_unused]] uint32_t numEventsInWaitList, - [[maybe_unused]] const ur_event_handle_t *phEventWaitList, - [[maybe_unused]] ur_event_handle_t *phEvent) { - return UR_RESULT_ERROR_INVALID_OPERATION; + ur_queue_handle_t hQueue, bool blocking, void *pDst, size_t dstPitch, + const void *pSrc, size_t srcPitch, size_t width, size_t height, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( + cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, + sizeof(cl_context), &CLContext, nullptr)); + + clEnqueueMemcpyINTEL_fn FuncPtr = nullptr; + ur_result_t RetVal = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemcpyINTELCache, + cl_ext::EnqueueMemcpyName, &FuncPtr); + + if (!FuncPtr) { + return RetVal; + } + + std::vector Events; + for (size_t HeightIndex = 0; HeightIndex < height; HeightIndex++) { + cl_event Event = nullptr; + auto ClResult = + FuncPtr(cl_adapter::cast(hQueue), false, + static_cast(pDst) + dstPitch * HeightIndex, + static_cast(pSrc) + srcPitch * HeightIndex, + width, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), &Event); + Events.push_back(Event); + if (ClResult != CL_SUCCESS) { + for (const auto &E : Events) { + clReleaseEvent(E); + } + CL_RETURN_ON_FAILURE(ClResult); + } + } + cl_int ClResult = CL_SUCCESS; + if (blocking) { + ClResult = clWaitForEvents(Events.size(), Events.data()); + } + if (phEvent && ClResult == CL_SUCCESS) { + ClResult = clEnqueueBarrierWithWaitList( + cl_adapter::cast(hQueue), Events.size(), + Events.data(), cl_adapter::cast(phEvent)); + } + for (const auto &E : Events) { + clReleaseEvent(E); + } + CL_RETURN_ON_FAILURE(ClResult) + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL From 603dcfbb524c7d3b72641c543b88aef551c2c784 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Tue, 31 Oct 2023 15:14:22 +0000 Subject: [PATCH 08/20] Address feedback --- source/adapters/opencl/enqueue.cpp | 12 +++++++-- source/adapters/opencl/usm.cpp | 39 +++++++++++++++++++++--------- 2 files changed, 37 insertions(+), 14 deletions(-) diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index ab5126c53f..5dff7066ae 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -211,8 +211,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( auto DeleteCallback = [](cl_event, cl_int, void *pUserData) { delete[] static_cast(pUserData); }; - CL_RETURN_ON_FAILURE( - clSetEventCallback(WriteEvent, CL_COMPLETE, DeleteCallback, HostBuffer)); + ClErr = + clSetEventCallback(WriteEvent, CL_COMPLETE, DeleteCallback, HostBuffer); + if (ClErr != CL_SUCCESS) { + // We can attempt to recover gracefully by attempting to wait for the write + // to finish and deleting the host buffer. + clWaitForEvents(1, &WriteEvent); + delete[] HostBuffer; + clReleaseEvent(WriteEvent); + CL_RETURN_ON_FAILURE(ClErr); + } if (phEvent) { *phEvent = cl_adapter::cast(WriteEvent); diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index d6008d51e7..b411fd1bcd 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -257,24 +257,39 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( &CopyEvent)); struct DeleteCallbackInfo { + DeleteCallbackInfo(clMemBlockingFreeINTEL_fn USMFree, cl_context CLContext, + void *HostBuffer) + : USMFree(USMFree), CLContext(CLContext), HostBuffer(HostBuffer) { + clRetainContext(CLContext); + } + ~DeleteCallbackInfo() { + USMFree(CLContext, HostBuffer); + clReleaseContext(CLContext); + } + DeleteCallbackInfo(const DeleteCallbackInfo &) = delete; + DeleteCallbackInfo &operator=(const DeleteCallbackInfo &) = delete; + clMemBlockingFreeINTEL_fn USMFree; cl_context CLContext; void *HostBuffer; - void execute() { - USMFree(CLContext, HostBuffer); - delete this; - } }; - auto Info = new DeleteCallbackInfo{USMFree, CLContext, HostBuffer}; + auto Info = new DeleteCallbackInfo(USMFree, CLContext, HostBuffer); auto DeleteCallback = [](cl_event, cl_int, void *pUserData) { - static_cast(pUserData)->execute(); + auto Info = static_cast(pUserData); + delete Info; }; - CL_RETURN_ON_FAILURE( - clSetEventCallback(CopyEvent, CL_COMPLETE, DeleteCallback, Info)); - + ClErr = clSetEventCallback(CopyEvent, CL_COMPLETE, DeleteCallback, Info); + if (ClErr != CL_SUCCESS) { + // We can attempt to recover gracefully by attempting to wait for the copy + // to finish and deleting the info struct here. + clWaitForEvents(1, &CopyEvent); + delete Info; + clReleaseEvent(CopyEvent); + CL_RETURN_ON_FAILURE(ClErr); + } if (phEvent) { *phEvent = cl_adapter::cast(CopyEvent); } else { @@ -426,7 +441,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( return RetVal; } - std::vector Events; + std::vector Events(height); for (size_t HeightIndex = 0; HeightIndex < height; HeightIndex++) { cl_event Event = nullptr; auto ClResult = @@ -435,7 +450,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( static_cast(pSrc) + srcPitch * HeightIndex, width, numEventsInWaitList, cl_adapter::cast(phEventWaitList), &Event); - Events.push_back(Event); + Events[HeightIndex] = Event; if (ClResult != CL_SUCCESS) { for (const auto &E : Events) { clReleaseEvent(E); @@ -453,7 +468,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( Events.data(), cl_adapter::cast(phEvent)); } for (const auto &E : Events) { - clReleaseEvent(E); + CL_RETURN_ON_FAILURE(clReleaseEvent(E)); } CL_RETURN_ON_FAILURE(ClResult) return UR_RESULT_SUCCESS; From fc34c26a8fb3ca24795663a4312e2b93aa9635d3 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Wed, 1 Nov 2023 11:12:57 +0000 Subject: [PATCH 09/20] [OpenCL] Make USM functions return UR_RESULT_ERROR_INVALID_USM_SIZE Also ignore flags in no-op urEnqueueUSMPrefetch hint. --- source/adapters/opencl/usm.cpp | 93 ++++++++++++++++++++-------------- 1 file changed, 55 insertions(+), 38 deletions(-) diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index afa22ffbb9..d8cd067f4b 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -15,7 +15,6 @@ urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t, size_t size, void **ppMem) { void *Ptr = nullptr; - ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; uint32_t Alignment = pUSMDesc ? pUSMDesc->align : 0; cl_mem_alloc_flags_intel Flags = 0; @@ -40,23 +39,28 @@ urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, // First we need to look up the function pointer clHostMemAllocINTEL_fn FuncPtr = nullptr; cl_context CLContext = cl_adapter::cast(hContext); - RetVal = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, - cl_ext::HostMemAllocName, &FuncPtr); + if (auto UrResult = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, + cl_ext::HostMemAllocName, &FuncPtr)) { + return UrResult; + } if (FuncPtr) { - Ptr = FuncPtr(CLContext, Properties, size, Alignment, - cl_adapter::cast(&RetVal)); + cl_int ClResult = CL_SUCCESS; + Ptr = FuncPtr(CLContext, Properties, size, Alignment, &ClResult); + if (ClResult == CL_INVALID_BUFFER_SIZE) { + return UR_RESULT_ERROR_INVALID_USM_SIZE; + } + CL_RETURN_ON_FAILURE(ClResult); } *ppMem = Ptr; - // ensure we aligned the allocation correctly - if (RetVal == UR_RESULT_SUCCESS && Alignment != 0) - assert(reinterpret_cast(*ppMem) % Alignment == 0 && - "allocation not aligned correctly"); + assert((Alignment == 0 || + reinterpret_cast(*ppMem) % Alignment == 0) && + "Allocation not aligned correctly!"); - return RetVal; + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL @@ -65,7 +69,6 @@ urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, void **ppMem) { void *Ptr = nullptr; - ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; uint32_t Alignment = pUSMDesc ? pUSMDesc->align : 0; cl_mem_alloc_flags_intel Flags = 0; @@ -92,24 +95,30 @@ urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, // First we need to look up the function pointer clDeviceMemAllocINTEL_fn FuncPtr = nullptr; cl_context CLContext = cl_adapter::cast(hContext); - RetVal = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clDeviceMemAllocINTELCache, - cl_ext::DeviceMemAllocName, &FuncPtr); + if (auto UrResult = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clDeviceMemAllocINTELCache, + cl_ext::DeviceMemAllocName, &FuncPtr)) { + return UrResult; + } if (FuncPtr) { + cl_int ClResult = CL_SUCCESS; Ptr = FuncPtr(CLContext, cl_adapter::cast(hDevice), cl_adapter::cast(Properties), size, - Alignment, cl_adapter::cast(&RetVal)); + Alignment, &ClResult); + if (ClResult == CL_INVALID_BUFFER_SIZE) { + return UR_RESULT_ERROR_INVALID_USM_SIZE; + } + CL_RETURN_ON_FAILURE(ClResult); } *ppMem = Ptr; - // ensure we aligned the allocation correctly - if (RetVal == UR_RESULT_SUCCESS && Alignment != 0) - assert(reinterpret_cast(*ppMem) % Alignment == 0 && - "allocation not aligned correctly"); + assert((Alignment == 0 || + reinterpret_cast(*ppMem) % Alignment == 0) && + "Allocation not aligned correctly!"); - return RetVal; + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL @@ -118,7 +127,6 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, void **ppMem) { void *Ptr = nullptr; - ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; uint32_t Alignment = pUSMDesc ? pUSMDesc->align : 0; cl_mem_alloc_flags_intel Flags = 0; @@ -155,22 +163,29 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, // First we need to look up the function pointer clSharedMemAllocINTEL_fn FuncPtr = nullptr; cl_context CLContext = cl_adapter::cast(hContext); - RetVal = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clSharedMemAllocINTELCache, - cl_ext::SharedMemAllocName, &FuncPtr); + if (auto UrResult = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clSharedMemAllocINTELCache, + cl_ext::SharedMemAllocName, &FuncPtr)) { + return UrResult; + } if (FuncPtr) { + cl_int ClResult = CL_SUCCESS; Ptr = FuncPtr(CLContext, cl_adapter::cast(hDevice), cl_adapter::cast(Properties), size, - Alignment, cl_adapter::cast(&RetVal)); + Alignment, cl_adapter::cast(&ClResult)); + if (ClResult == CL_INVALID_BUFFER_SIZE) { + return UR_RESULT_ERROR_INVALID_USM_SIZE; + } + CL_RETURN_ON_FAILURE(ClResult); } *ppMem = Ptr; - assert(Alignment == 0 || - (RetVal == UR_RESULT_SUCCESS && - reinterpret_cast(*ppMem) % Alignment == 0)); - return RetVal; + assert((Alignment == 0 || + reinterpret_cast(*ppMem) % Alignment == 0) && + "Allocation not aligned correctly!"); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t hContext, @@ -255,14 +270,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( ur_queue_handle_t hQueue, [[maybe_unused]] const void *pMem, - [[maybe_unused]] size_t size, ur_usm_migration_flags_t flags, + [[maybe_unused]] size_t size, + [[maybe_unused]] ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - // flags is currently unused so fail if set - if (flags != 0) - return UR_RESULT_ERROR_INVALID_VALUE; - return mapCLErrorToUR(clEnqueueMarkerWithWaitList( cl_adapter::cast(hQueue), numEventsInWaitList, cl_adapter::cast(phEventWaitList), @@ -387,9 +399,14 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, } if (FuncPtr) { - RetVal = - mapCLErrorToUR(FuncPtr(cl_adapter::cast(hContext), pMem, - PropNameCL, propSize, pPropValue, pPropSizeRet)); + size_t CheckPropSize = 0; + size_t *CheckPropSizeRet = pPropSizeRet ? pPropSizeRet : &CheckPropSize; + RetVal = mapCLErrorToUR(FuncPtr(cl_adapter::cast(hContext), + pMem, PropNameCL, propSize, pPropValue, + CheckPropSizeRet)); + if (pPropValue && *CheckPropSizeRet != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } if (RetVal == UR_RESULT_SUCCESS && pPropValue && propName == UR_USM_ALLOC_INFO_TYPE) { auto *AllocTypeCL = From 371e1b85ac5cee8d52447c0cd8c4eb196ba5878d Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Wed, 1 Nov 2023 11:46:11 +0000 Subject: [PATCH 10/20] [OpenCL] Retain native handle objects when properties dictate. --- source/adapters/opencl/event.cpp | 13 ++++++++----- source/adapters/opencl/kernel.cpp | 7 +++++-- source/adapters/opencl/memory.cpp | 14 ++++++++------ source/adapters/opencl/program.cpp | 7 +++++-- 4 files changed, 26 insertions(+), 15 deletions(-) diff --git a/source/adapters/opencl/event.cpp b/source/adapters/opencl/event.cpp index 78303a0829..64cf410460 100644 --- a/source/adapters/opencl/event.cpp +++ b/source/adapters/opencl/event.cpp @@ -50,12 +50,15 @@ convertURProfilingInfoToCL(const ur_profiling_info_t PropName) { } } -UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( - ur_native_handle_t hNativeEvent, - [[maybe_unused]] ur_context_handle_t hContext, - [[maybe_unused]] const ur_event_native_properties_t *pProperties, - ur_event_handle_t *phEvent) { +UR_APIEXPORT ur_result_t UR_APICALL +urEventCreateWithNativeHandle(ur_native_handle_t hNativeEvent, + [[maybe_unused]] ur_context_handle_t hContext, + const ur_event_native_properties_t *pProperties, + ur_event_handle_t *phEvent) { *phEvent = reinterpret_cast(hNativeEvent); + if (!pProperties || !pProperties->isNativeHandleOwned) { + return urEventRetain(*phEvent); + } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index 80b1502854..6c688021d7 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -335,9 +335,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle( UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle( ur_native_handle_t hNativeKernel, ur_context_handle_t, ur_program_handle_t, - const ur_kernel_native_properties_t *, ur_kernel_handle_t *phKernel) { - + const ur_kernel_native_properties_t *pProperties, + ur_kernel_handle_t *phKernel) { *phKernel = reinterpret_cast(hNativeKernel); + if (!pProperties || !pProperties->isNativeHandleOwned) { + return urKernelRetain(*phKernel); + } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index 279faad376..8912d333e3 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -331,10 +331,11 @@ urMemGetNativeHandle(ur_mem_handle_t hMem, ur_native_handle_t *phNativeMem) { UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( ur_native_handle_t hNativeMem, [[maybe_unused]] ur_context_handle_t hContext, - [[maybe_unused]] const ur_mem_native_properties_t *pProperties, - ur_mem_handle_t *phMem) { - + const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { *phMem = reinterpret_cast(hNativeMem); + if (!pProperties || !pProperties->isNativeHandleOwned) { + return urMemRetain(*phMem); + } return UR_RESULT_SUCCESS; } @@ -343,10 +344,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( [[maybe_unused]] ur_context_handle_t hContext, [[maybe_unused]] const ur_image_format_t *pImageFormat, [[maybe_unused]] const ur_image_desc_t *pImageDesc, - [[maybe_unused]] const ur_mem_native_properties_t *pProperties, - ur_mem_handle_t *phMem) { - + const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { *phMem = reinterpret_cast(hNativeMem); + if (!pProperties || !pProperties->isNativeHandleOwned) { + return urMemRetain(*phMem); + } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index 0beca23dab..954c2dc48f 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -299,9 +299,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle( UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle( ur_native_handle_t hNativeProgram, ur_context_handle_t, - const ur_program_native_properties_t *, ur_program_handle_t *phProgram) { - + const ur_program_native_properties_t *pProperties, + ur_program_handle_t *phProgram) { *phProgram = reinterpret_cast(hNativeProgram); + if (!pProperties || !pProperties->isNativeHandleOwned) { + return urProgramRetain(*phProgram); + } return UR_RESULT_SUCCESS; } From bc7c0f4926393c464ab4893b9b2ea8ba9c09bcdd Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Wed, 1 Nov 2023 08:02:04 -0700 Subject: [PATCH 11/20] Fix result checks Signed-off-by: Michael Aziz --- source/adapters/level_zero/usm.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp index 51e86bb65f..5c4e44930b 100644 --- a/source/adapters/level_zero/usm.cpp +++ b/source/adapters/level_zero/usm.cpp @@ -195,7 +195,7 @@ static ur_result_t USMDeviceAllocImpl(void **ResultPtr, auto Result = USMAllocationMakeResident(USMDeviceAllocationForceResidency, Context, Device, *ResultPtr, Size); if (Result == UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY || - Result == UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY) { + Result == UR_RESULT_ERROR_OUT_OF_HOST_MEMORY) { return Result; } return UR_RESULT_SUCCESS; @@ -232,7 +232,7 @@ static ur_result_t USMSharedAllocImpl(void **ResultPtr, auto Result = USMAllocationMakeResident(USMSharedAllocationForceResidency, Context, Device, *ResultPtr, Size); if (Result == UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY || - Result == UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY) { + Result == UR_RESULT_ERROR_OUT_OF_HOST_MEMORY) { return Result; } @@ -258,7 +258,7 @@ static ur_result_t USMHostAllocImpl(void **ResultPtr, auto Result = USMAllocationMakeResident(USMHostAllocationForceResidency, Context, nullptr, *ResultPtr, Size); if (Result == UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY || - Result == UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY) { + Result == UR_RESULT_ERROR_OUT_OF_HOST_MEMORY) { return Result; } return UR_RESULT_SUCCESS; From 3b26f7b0456ece4f2729df2eb57d2b9a688c4b73 Mon Sep 17 00:00:00 2001 From: Jaime Arteaga Date: Wed, 1 Nov 2023 15:26:13 -0700 Subject: [PATCH 12/20] [UR][L0] Add support for zeCommandListHostSynchronize Instead of creating an event, appending, and synchronizing it on it to wait for all commands in an immediate command list, emit a zeCommandListHostSynchronize instead. Signed-off-by: Jaime Arteaga --- source/adapters/level_zero/queue.cpp | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index 2dcdcfd51e..0388b8fdbb 100755 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -1406,18 +1406,8 @@ ur_result_t ur_queue_handle_t_::synchronize() { if (ImmCmdList == Queue->CommandListMap.end()) return UR_RESULT_SUCCESS; - ur_event_handle_t Event{}; - ur_result_t Res = createEventAndAssociateQueue( - reinterpret_cast(Queue), &Event, - UR_EXT_COMMAND_TYPE_USER, ImmCmdList, /* IsInternal */ false); - if (Res != UR_RESULT_SUCCESS) - return Res; - auto zeEvent = Event->ZeEvent; - ZE2UR_CALL(zeCommandListAppendBarrier, - (ImmCmdList->first, zeEvent, 0, nullptr)); - ZE2UR_CALL(zeHostSynchronize, (zeEvent)); - Event->Completed = true; - UR_CALL(urEventRelease(Event)); + // wait for all commands previously submitted to this immediate command list + ZE2UR_CALL(zeCommandListHostSynchronize, (ImmCmdList->first, UINT64_MAX)); // Cleanup all events from the synced command list. CleanupEventListFromResetCmdList(ImmCmdList->second.EventList, true); From fe469d7fb77d2ebdd70b0713483ed61846a43881 Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Thu, 2 Nov 2023 08:43:33 -0700 Subject: [PATCH 13/20] Add TODO for handling other error results Signed-off-by: Michael Aziz --- source/adapters/level_zero/usm.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp index 5c4e44930b..daec0408fb 100644 --- a/source/adapters/level_zero/usm.cpp +++ b/source/adapters/level_zero/usm.cpp @@ -192,6 +192,8 @@ static ur_result_t USMDeviceAllocImpl(void **ResultPtr, reinterpret_cast(*ResultPtr) % Alignment == 0, UR_RESULT_ERROR_INVALID_VALUE); + // TODO: Return any non-success result from USMAllocationMakeResident once + // oneapi-src/level-zero-spec#240 is resolved. auto Result = USMAllocationMakeResident(USMDeviceAllocationForceResidency, Context, Device, *ResultPtr, Size); if (Result == UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY || @@ -229,6 +231,8 @@ static ur_result_t USMSharedAllocImpl(void **ResultPtr, reinterpret_cast(*ResultPtr) % Alignment == 0, UR_RESULT_ERROR_INVALID_VALUE); + // TODO: Return any non-success result from USMAllocationMakeResident once + // oneapi-src/level-zero-spec#240 is resolved. auto Result = USMAllocationMakeResident(USMSharedAllocationForceResidency, Context, Device, *ResultPtr, Size); if (Result == UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY || @@ -255,6 +259,8 @@ static ur_result_t USMHostAllocImpl(void **ResultPtr, reinterpret_cast(*ResultPtr) % Alignment == 0, UR_RESULT_ERROR_INVALID_VALUE); + // TODO: Return any non-success result from USMAllocationMakeResident once + // oneapi-src/level-zero-spec#240 is resolved. auto Result = USMAllocationMakeResident(USMHostAllocationForceResidency, Context, nullptr, *ResultPtr, Size); if (Result == UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY || From 143a2e4e54c5c5378ae5e84026c2a6391bb6f17b Mon Sep 17 00:00:00 2001 From: Jaime Arteaga Date: Thu, 2 Nov 2023 09:58:03 -0700 Subject: [PATCH 14/20] [UR][L0] Add support for urAdapterGetLastError in L0 Signed-off-by: Jaime Arteaga --- source/adapters/level_zero/adapter.cpp | 18 ++++++++++-------- source/adapters/level_zero/common.cpp | 13 ++++++++----- source/adapters/level_zero/common.hpp | 6 ++++-- source/adapters/level_zero/device.cpp | 3 ++- source/adapters/level_zero/kernel.cpp | 10 ++++++---- .../runtime/runtime_adapter_level_zero.match | 1 - 6 files changed, 30 insertions(+), 21 deletions(-) diff --git a/source/adapters/level_zero/adapter.cpp b/source/adapters/level_zero/adapter.cpp index ddb6047e35..0a4b71a773 100644 --- a/source/adapters/level_zero/adapter.cpp +++ b/source/adapters/level_zero/adapter.cpp @@ -175,17 +175,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { } UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetLastError( - ur_adapter_handle_t Adapter, ///< [in] handle of the platform instance + [[maybe_unused]] ur_adapter_handle_t + AdapterHandle, ///< [in] handle of the platform instance const char **Message, ///< [out] pointer to a C string where the adapter ///< specific error message will be stored. - int32_t *Error ///< [out] pointer to an integer where the adapter specific - ///< error code will be stored. + [[maybe_unused]] int32_t + *Error ///< [out] pointer to an integer where the adapter specific + ///< error code will be stored. ) { - std::ignore = Adapter; - std::ignore = Message; - std::ignore = Error; - urPrint("[UR][L0] %s function not implemented!\n", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + AdapterHandle = &Adapter; + *Message = ErrorMessage; + Error = &ErrorAdapterNativeCode; + + return ErrorMessageCode; } UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, diff --git a/source/adapters/level_zero/common.cpp b/source/adapters/level_zero/common.cpp index 3d83b91139..eb0f34307c 100644 --- a/source/adapters/level_zero/common.cpp +++ b/source/adapters/level_zero/common.cpp @@ -280,13 +280,16 @@ template <> zes_structure_type_t getZesStructureType() { // Global variables for ZER_EXT_RESULT_ADAPTER_SPECIFIC_ERROR thread_local ur_result_t ErrorMessageCode = UR_RESULT_SUCCESS; thread_local char ErrorMessage[MaxMessageSize]; +thread_local int32_t ErrorAdapterNativeCode; // Utility function for setting a message and warning -[[maybe_unused]] void setErrorMessage(const char *message, - ur_result_t error_code) { - assert(strlen(message) <= MaxMessageSize); - strcpy(ErrorMessage, message); - ErrorMessageCode = error_code; +[[maybe_unused]] void setErrorMessage(const char *pMessage, + ur_result_t ErrorCode, + int32_t AdapterErrorCode) { + assert(strlen(pMessage) <= MaxMessageSize); + strcpy(ErrorMessage, pMessage); + ErrorMessageCode = ErrorCode; + ErrorAdapterNativeCode = AdapterErrorCode; } ur_result_t zerPluginGetLastError(char **message) { diff --git a/source/adapters/level_zero/common.hpp b/source/adapters/level_zero/common.hpp index e3e89152b9..7c2ac7f8be 100644 --- a/source/adapters/level_zero/common.hpp +++ b/source/adapters/level_zero/common.hpp @@ -467,7 +467,9 @@ constexpr char ZE_SUPPORTED_EXTENSIONS[] = constexpr size_t MaxMessageSize = 256; extern thread_local ur_result_t ErrorMessageCode; extern thread_local char ErrorMessage[MaxMessageSize]; +extern thread_local int32_t ErrorAdapterNativeCode; // Utility function for setting a message and warning -[[maybe_unused]] void setErrorMessage(const char *message, - ur_result_t error_code); +[[maybe_unused]] void setErrorMessage(const char *pMessage, + ur_result_t ErrorCode, + int32_t AdapterErrorCode); diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index dbc18ead4a..e5157fc134 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -631,7 +631,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( case UR_DEVICE_INFO_GLOBAL_MEM_FREE: { if (getenv("ZES_ENABLE_SYSMAN") == nullptr) { setErrorMessage("Set ZES_ENABLE_SYSMAN=1 to obtain free memory", - UR_RESULT_SUCCESS); + UR_RESULT_ERROR_UNINITIALIZED, + static_cast(ZE_RESULT_ERROR_UNINITIALIZED)); return UR_RESULT_ERROR_ADAPTER_SPECIFIC; } // Only report device memory which zeMemAllocDevice can allocate from. diff --git a/source/adapters/level_zero/kernel.cpp b/source/adapters/level_zero/kernel.cpp index c7d77a1fb2..266efd9291 100644 --- a/source/adapters/level_zero/kernel.cpp +++ b/source/adapters/level_zero/kernel.cpp @@ -284,8 +284,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( (Program->ZeModule, Name, &GlobalVarSize, &GlobalVarPtr)); if (GlobalVarSize < Offset + Count) { setErrorMessage("Write device global variable is out of range.", - UR_RESULT_ERROR_INVALID_VALUE); - return UR_RESULT_ERROR_UNKNOWN; + UR_RESULT_ERROR_INVALID_VALUE, + static_cast(ZE_RESULT_ERROR_INVALID_ARGUMENT)); + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; } // Copy engine is preferred only for host to device transfer. @@ -333,8 +334,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( (Program->ZeModule, Name, &GlobalVarSize, &GlobalVarPtr)); if (GlobalVarSize < Offset + Count) { setErrorMessage("Read from device global variable is out of range.", - UR_RESULT_ERROR_INVALID_VALUE); - return UR_RESULT_ERROR_UNKNOWN; + UR_RESULT_ERROR_INVALID_VALUE, + static_cast(ZE_RESULT_ERROR_INVALID_ARGUMENT)); + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; } // Copy engine is preferred only for host to device transfer. diff --git a/test/conformance/runtime/runtime_adapter_level_zero.match b/test/conformance/runtime/runtime_adapter_level_zero.match index 10e5183dbe..e69de29bb2 100644 --- a/test/conformance/runtime/runtime_adapter_level_zero.match +++ b/test/conformance/runtime/runtime_adapter_level_zero.match @@ -1 +0,0 @@ -urAdapterGetLastErrorTest.Success From c5fbda04f0050ae81b34e22ee3801bdd9c4a2041 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Mon, 6 Nov 2023 12:06:16 +0000 Subject: [PATCH 15/20] [OpenCL] Fix enum passed for urKernelSetExecInfo's USM_PTRS property Also return RESULT_SUCCESS for no-op UR_KERNEL_EXEC_INFO_CACHE_CONFIG hint. --- source/adapters/opencl/kernel.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index 80b1502854..ee5559310e 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -284,12 +284,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetExecInfo( return UR_RESULT_SUCCESS; } case UR_KERNEL_EXEC_INFO_CACHE_CONFIG: { - /* Setting the cache config is unsupported in OpenCL */ - return UR_RESULT_ERROR_INVALID_ENUMERATION; + // Setting the cache config is unsupported in OpenCL, but this is just a + // hint. + return UR_RESULT_SUCCESS; } case UR_KERNEL_EXEC_INFO_USM_PTRS: { CL_RETURN_ON_FAILURE(clSetKernelExecInfo( - cl_adapter::cast(hKernel), propName, propSize, pPropValue)); + cl_adapter::cast(hKernel), + CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, propSize, pPropValue)); return UR_RESULT_SUCCESS; } default: { From 6a3c63da12a4777cfe43d44314d812396c8d8811 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Fri, 3 Nov 2023 13:56:43 +0000 Subject: [PATCH 16/20] [OpenCL] Return INVALID_SIZE from GetInfo entry points. Also includes a few other GetInfo related fixes: * Add missing device info queries * Add mapping of CL command type to UR command type * Correct mapping of UR_QUEUE_INFO_FLAGS * Add mapping of cl_command_queue_properties to ur_queue_flags_t * Add mapping of cl_unified_shared_memory_type_intel to ur_usm_type_t * Add UNSUPPORTED_ENUMERATION path to KernelGeGroupInfo tests. And a fix related to one of the fixed queries: * Populate pfnReadHostPipe and pfnWriteHostPipe ddi table entries. --- source/adapters/opencl/context.cpp | 13 ++- source/adapters/opencl/device.cpp | 36 ++++++- source/adapters/opencl/event.cpp | 96 ++++++++++++++++--- source/adapters/opencl/kernel.cpp | 47 ++++++++- source/adapters/opencl/memory.cpp | 27 ++++-- source/adapters/opencl/program.cpp | 54 ++++++----- source/adapters/opencl/queue.cpp | 52 ++++++++-- source/adapters/opencl/sampler.cpp | 16 +++- .../adapters/opencl/ur_interface_loader.cpp | 2 + source/adapters/opencl/usm.cpp | 70 +++++++------- .../kernel/urKernelGetGroupInfo.cpp | 16 ++-- 11 files changed, 318 insertions(+), 111 deletions(-) diff --git a/source/adapters/opencl/context.cpp b/source/adapters/opencl/context.cpp index 6bc05c2003..3ada4a3d37 100644 --- a/source/adapters/opencl/context.cpp +++ b/source/adapters/opencl/context.cpp @@ -93,10 +93,17 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, case UR_CONTEXT_INFO_NUM_DEVICES: case UR_CONTEXT_INFO_DEVICES: case UR_CONTEXT_INFO_REFERENCE_COUNT: { - - CL_RETURN_ON_FAILURE( + size_t CheckPropSize = 0; + auto ClResult = clGetContextInfo(cl_adapter::cast(hContext), CLPropName, - propSize, pPropValue, pPropSizeRet)); + propSize, pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(ClResult); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } return UR_RESULT_SUCCESS; } default: diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 3fc6f5d491..710ebcfb88 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -345,6 +345,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(URDeviceType); } + case UR_DEVICE_INFO_DEVICE_ID: { + bool Supported = false; + CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( + cl_adapter::cast(hDevice), {"cl_khr_pci_bus_info"}, + Supported)); + + if (!Supported) { + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + } + + cl_device_pci_bus_info_khr PciInfo = {}; + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + cl_adapter::cast(hDevice), CL_DEVICE_PCI_BUS_INFO_KHR, + sizeof(PciInfo), &PciInfo, nullptr)); + return ReturnValue(PciInfo.pci_device); + } + case UR_DEVICE_INFO_BACKEND_RUNTIME_VERSION: { oclv::OpenCLVersion Version; CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( @@ -760,6 +777,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(Supported); } + case UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT: { + return ReturnValue(false); + } + case UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED: { + bool Supported = false; + CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( + cl_adapter::cast(hDevice), + {"cl_intel_program_scope_host_pipe"}, Supported)); + return ReturnValue(Supported); + } case UR_DEVICE_INFO_QUEUE_PROPERTIES: case UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: case UR_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: @@ -775,7 +802,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, /* CL type: cl_bitfield / enum * UR type: ur_flags_t (uint32_t) */ - cl_bitfield CLValue; + cl_bitfield CLValue = 0; CL_RETURN_ON_FAILURE( clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, sizeof(cl_bitfield), &CLValue, nullptr)); @@ -898,13 +925,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * sycl/doc/extensions/supported/sycl_ext_intel_device_info.md */ case UR_DEVICE_INFO_UUID: /* This enums have no equivalent in OpenCL */ - case UR_DEVICE_INFO_DEVICE_ID: + case UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP: case UR_DEVICE_INFO_GLOBAL_MEM_FREE: case UR_DEVICE_INFO_MEMORY_CLOCK_RATE: case UR_DEVICE_INFO_MEMORY_BUS_WIDTH: - case UR_DEVICE_INFO_ASYNC_BARRIER: - case UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED: { - return UR_RESULT_ERROR_INVALID_ENUMERATION; + case UR_DEVICE_INFO_ASYNC_BARRIER: { + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } default: { return UR_RESULT_ERROR_INVALID_ENUMERATION; diff --git a/source/adapters/opencl/event.cpp b/source/adapters/opencl/event.cpp index 850df82648..44862f319f 100644 --- a/source/adapters/opencl/event.cpp +++ b/source/adapters/opencl/event.cpp @@ -54,6 +54,62 @@ convertURProfilingInfoToCL(const ur_profiling_info_t PropName) { } } +const ur_command_t +convertCLCommandTypeToUR(const cl_command_type &CommandType) { + /* Note: the following enums don't have a CL equivalent: + UR_COMMAND_USM_FILL_2D + UR_COMMAND_USM_MEMCPY_2D + UR_COMMAND_DEVICE_GLOBAL_VARIABLE_WRITE + UR_COMMAND_DEVICE_GLOBAL_VARIABLE_READ + UR_COMMAND_READ_HOST_PIPE + UR_COMMAND_WRITE_HOST_PIPE + UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP + UR_COMMAND_INTEROP_SEMAPHORE_WAIT_EXP + UR_COMMAND_INTEROP_SEMAPHORE_SIGNAL_EXP */ + switch (CommandType) { + case CL_COMMAND_NDRANGE_KERNEL: + return UR_COMMAND_KERNEL_LAUNCH; + case CL_COMMAND_MARKER: + // CL can't distinguish between UR_COMMAND_EVENTS_WAIT_WITH_BARRIER and + // UR_COMMAND_EVENTS_WAIT. + return UR_COMMAND_EVENTS_WAIT; + case CL_COMMAND_READ_BUFFER: + return UR_COMMAND_MEM_BUFFER_READ; + case CL_COMMAND_WRITE_BUFFER: + return UR_COMMAND_MEM_BUFFER_WRITE; + case CL_COMMAND_READ_BUFFER_RECT: + return UR_COMMAND_MEM_BUFFER_READ_RECT; + case CL_COMMAND_WRITE_BUFFER_RECT: + return UR_COMMAND_MEM_BUFFER_WRITE_RECT; + case CL_COMMAND_COPY_BUFFER: + return UR_COMMAND_MEM_BUFFER_COPY; + case CL_COMMAND_COPY_BUFFER_RECT: + return UR_COMMAND_MEM_BUFFER_COPY_RECT; + case CL_COMMAND_FILL_BUFFER: + return UR_COMMAND_MEM_BUFFER_FILL; + case CL_COMMAND_READ_IMAGE: + return UR_COMMAND_MEM_IMAGE_READ; + case CL_COMMAND_WRITE_IMAGE: + return UR_COMMAND_MEM_IMAGE_WRITE; + case CL_COMMAND_COPY_IMAGE: + return UR_COMMAND_MEM_IMAGE_COPY; + case CL_COMMAND_MAP_BUFFER: + return UR_COMMAND_MEM_BUFFER_MAP; + case CL_COMMAND_UNMAP_MEM_OBJECT: + return UR_COMMAND_MEM_UNMAP; + case CL_COMMAND_MEMFILL_INTEL: + return UR_COMMAND_USM_FILL; + case CL_COMMAND_MEMCPY_INTEL: + return UR_COMMAND_USM_MEMCPY; + case CL_COMMAND_MIGRATEMEM_INTEL: + return UR_COMMAND_USM_PREFETCH; + case CL_COMMAND_MEMADVISE_INTEL: + return UR_COMMAND_USM_ADVISE; + default: + return UR_COMMAND_FORCE_UINT32; + } +} + UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle(ur_native_handle_t hNativeEvent, [[maybe_unused]] ur_context_handle_t hContext, @@ -97,24 +153,36 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, void *pPropValue, size_t *pPropSizeRet) { cl_event_info CLEventInfo = convertUREventInfoToCL(propName); + + size_t CheckPropSize = 0; cl_int RetErr = clGetEventInfo(cl_adapter::cast(hEvent), CLEventInfo, propSize, - pPropValue, pPropSizeRet); + pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } CL_RETURN_ON_FAILURE(RetErr); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } - if (RetErr == CL_SUCCESS && - propName == UR_EVENT_INFO_COMMAND_EXECUTION_STATUS) { - /* If the CL_EVENT_COMMAND_EXECUTION_STATUS info value is CL_QUEUED, change - * it to CL_SUBMITTED. sycl::info::event::event_command_status has no - * equivalent to CL_QUEUED. - * - * FIXME UR Port: This should not be part of the UR adapter. Since PI_QUEUED - * exists, SYCL RT should be changed to handle this situation. In addition, - * SYCL RT is relying on PI_QUEUED status to make sure that the queues are - * flushed. */ - const auto param_value_int = static_cast(pPropValue); - if (*param_value_int == UR_EVENT_STATUS_QUEUED) { - *param_value_int = UR_EVENT_STATUS_SUBMITTED; + if (pPropValue) { + if (propName == UR_EVENT_INFO_COMMAND_TYPE) { + *reinterpret_cast(pPropValue) = convertCLCommandTypeToUR( + *reinterpret_cast(pPropValue)); + } else if (propName == UR_EVENT_INFO_COMMAND_EXECUTION_STATUS) { + /* If the CL_EVENT_COMMAND_EXECUTION_STATUS info value is CL_QUEUED, + * change it to CL_SUBMITTED. sycl::info::event::event_command_status has + * no equivalent to CL_QUEUED. + * + * FIXME UR Port: This should not be part of the UR adapter. Since + * PI_QUEUED exists, SYCL RT should be changed to handle this situation. + * In addition, SYCL RT is relying on PI_QUEUED status to make sure that + * the queues are flushed. */ + const auto param_value_int = static_cast(pPropValue); + if (*param_value_int == UR_EVENT_STATUS_QUEUED) { + *param_value_int = UR_EVENT_STATUS_SUBMITTED; + } } } diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index 69fcec7b21..e7c8444a17 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -69,10 +69,34 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - - CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), - mapURKernelInfoToCL(propName), propSize, - pPropValue, pPropSizeRet)); + // We need this little bit of ugliness because the UR NUM_ARGS property is + // size_t whereas the CL one is cl_uint. We should consider changing that see + // #1038 + if (propName == UR_KERNEL_INFO_NUM_ARGS) { + if (pPropSizeRet) + *pPropSizeRet = sizeof(size_t); + cl_uint NumArgs = 0; + CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), + mapURKernelInfoToCL(propName), + sizeof(NumArgs), &NumArgs, nullptr)); + if (pPropValue) { + if (propSize != sizeof(size_t)) + return UR_RESULT_ERROR_INVALID_SIZE; + *static_cast(pPropValue) = static_cast(NumArgs); + } + } else { + size_t CheckPropSize = 0; + cl_int ClResult = clGetKernelInfo(cl_adapter::cast(hKernel), + mapURKernelInfoToCL(propName), propSize, + pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(ClResult); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } + } return UR_RESULT_SUCCESS; } @@ -101,7 +125,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, ur_kernel_group_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - + // From the CL spec for GROUP_INFO_GLOBAL: "If device is not a custom device + // and kernel is not a built-in kernel, clGetKernelWorkGroupInfo returns the + // error CL_INVALID_VALUE.". Unfortunately there doesn't seem to be a nice + // way to query whether a kernel is a builtin kernel but this should suffice + // to deter naive use of the query. + if (propName == UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE) { + cl_device_type ClDeviceType; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CL_DEVICE_TYPE, + sizeof(ClDeviceType), &ClDeviceType, nullptr)); + if (ClDeviceType != CL_DEVICE_TYPE_CUSTOM) { + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + } + } CL_RETURN_ON_FAILURE(clGetKernelWorkGroupInfo( cl_adapter::cast(hKernel), cl_adapter::cast(hDevice), diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index 87024f2f9a..be9b266f3d 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -362,9 +362,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); const cl_int CLPropName = mapURMemInfoToCL(propName); - CL_RETURN_ON_FAILURE(clGetMemObjectInfo(cl_adapter::cast(hMemory), - CLPropName, propSize, pPropValue, - pPropSizeRet)); + size_t CheckPropSize = 0; + auto ClResult = + clGetMemObjectInfo(cl_adapter::cast(hMemory), CLPropName, + propSize, pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(ClResult); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } return UR_RESULT_SUCCESS; } @@ -377,9 +385,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); const cl_int CLPropName = mapURMemImageInfoToCL(propName); - CL_RETURN_ON_FAILURE(clGetImageInfo(cl_adapter::cast(hMemory), - CLPropName, propSize, pPropValue, - pPropSizeRet)); + size_t CheckPropSize = 0; + auto ClResult = clGetImageInfo(cl_adapter::cast(hMemory), CLPropName, + propSize, pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(ClResult); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index 733f2509cb..fad0dd69f7 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -176,11 +176,17 @@ static cl_int mapURProgramInfoToCL(ur_program_info_t URPropName) { UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - - CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), - mapURProgramInfoToCL(propName), - propSize, pPropValue, pPropSizeRet)); - + size_t CheckPropSize = 0; + auto ClResult = clGetProgramInfo(cl_adapter::cast(hProgram), + mapURProgramInfoToCL(propName), propSize, + pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(ClResult); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } return UR_RESULT_SUCCESS; } @@ -249,30 +255,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice, ur_program_build_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - - switch (propName) { - case UR_PROGRAM_BUILD_INFO_BINARY_TYPE: - cl_program_binary_type cl_value; + if (propName == UR_PROGRAM_BUILD_INFO_BINARY_TYPE) { + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + cl_program_binary_type BinaryType; CL_RETURN_ON_FAILURE(clGetProgramBuildInfo( cl_adapter::cast(hProgram), cl_adapter::cast(hDevice), mapURProgramBuildInfoToCL(propName), sizeof(cl_program_binary_type), - &cl_value, nullptr)); - return ReturnValue(mapCLBinaryTypeToUR(cl_value)); - case UR_PROGRAM_BUILD_INFO_LOG: - case UR_PROGRAM_BUILD_INFO_OPTIONS: - case UR_PROGRAM_BUILD_INFO_STATUS: - CL_RETURN_ON_FAILURE( - clGetProgramBuildInfo(cl_adapter::cast(hProgram), - cl_adapter::cast(hDevice), - mapURProgramBuildInfoToCL(propName), propSize, - pPropValue, pPropSizeRet)); - return UR_RESULT_SUCCESS; - default: - return UR_RESULT_ERROR_INVALID_ENUMERATION; + &BinaryType, nullptr)); + return ReturnValue(mapCLBinaryTypeToUR(BinaryType)); } + size_t CheckPropSize = 0; + cl_int ClErr = clGetProgramBuildInfo(cl_adapter::cast(hProgram), + cl_adapter::cast(hDevice), + mapURProgramBuildInfoToCL(propName), + propSize, pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(ClErr); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } + + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL diff --git a/source/adapters/opencl/queue.cpp b/source/adapters/opencl/queue.cpp index 8b5496e619..163d283651 100644 --- a/source/adapters/opencl/queue.cpp +++ b/source/adapters/opencl/queue.cpp @@ -19,7 +19,7 @@ cl_command_queue_info mapURQueueInfoToCL(const ur_queue_info_t PropName) { case UR_QUEUE_INFO_DEVICE_DEFAULT: return CL_QUEUE_DEVICE_DEFAULT; case UR_QUEUE_INFO_FLAGS: - return CL_QUEUE_PROPERTIES_ARRAY; + return CL_QUEUE_PROPERTIES; case UR_QUEUE_INFO_REFERENCE_COUNT: return CL_QUEUE_REFERENCE_COUNT; case UR_QUEUE_INFO_SIZE: @@ -49,6 +49,24 @@ convertURQueuePropertiesToCL(const ur_queue_properties_t *URQueueProperties) { return CLCommandQueueProperties; } +const ur_queue_flags_t +mapCLQueuePropsToUR(const cl_command_queue_properties &Properties) { + ur_queue_flags_t Flags = 0; + if (Properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) { + Flags |= UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE; + } + if (Properties & CL_QUEUE_PROFILING_ENABLE) { + Flags |= UR_QUEUE_FLAG_PROFILING_ENABLE; + } + if (Properties & CL_QUEUE_ON_DEVICE) { + Flags |= UR_QUEUE_FLAG_ON_DEVICE; + } + if (Properties & CL_QUEUE_ON_DEVICE_DEFAULT) { + Flags |= UR_QUEUE_FLAG_ON_DEVICE_DEFAULT; + } + return Flags; +} + UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_queue_properties_t *pProperties, ur_queue_handle_t *phQueue) { @@ -102,15 +120,35 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, size_t *pPropSizeRet) { if (propName == UR_QUEUE_INFO_EMPTY) { // OpenCL doesn't provide API to check the status of the queue. - return UR_RESULT_ERROR_INVALID_VALUE; + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } - cl_command_queue_info CLCommandQueueInfo = mapURQueueInfoToCL(propName); - cl_int RetErr = clGetCommandQueueInfo( - cl_adapter::cast(hQueue), CLCommandQueueInfo, propSize, - pPropValue, pPropSizeRet); - CL_RETURN_ON_FAILURE(RetErr); + // Unfortunately the size of cl_bitfield (unsigned long) doesn't line up with + // our enums (forced to be sizeof(uint32_t)) so this needs special handling. + if (propName == UR_QUEUE_INFO_FLAGS) { + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + cl_command_queue_properties QueueProperties = 0; + CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( + cl_adapter::cast(hQueue), CLCommandQueueInfo, + sizeof(QueueProperties), &QueueProperties, nullptr)); + + return ReturnValue(mapCLQueuePropsToUR(QueueProperties)); + } else { + size_t CheckPropSize = 0; + cl_int RetErr = clGetCommandQueueInfo( + cl_adapter::cast(hQueue), CLCommandQueueInfo, + propSize, pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(RetErr); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } + } + return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/sampler.cpp b/source/adapters/opencl/sampler.cpp index 0cd4cbed2b..5f58216446 100644 --- a/source/adapters/opencl/sampler.cpp +++ b/source/adapters/opencl/sampler.cpp @@ -154,16 +154,22 @@ ur_result_t urSamplerCreate(ur_context_handle_t hContext, UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - cl_sampler_info SamplerInfo = ur2CLSamplerInfo(propName); static_assert(sizeof(cl_addressing_mode) == sizeof(ur_sampler_addressing_mode_t)); - if (ur_result_t Err = mapCLErrorToUR( - clGetSamplerInfo(cl_adapter::cast(hSampler), SamplerInfo, - propSize, pPropValue, pPropSizeRet))) { - return Err; + size_t CheckPropSize = 0; + ur_result_t Err = mapCLErrorToUR( + clGetSamplerInfo(cl_adapter::cast(hSampler), SamplerInfo, + propSize, pPropValue, &CheckPropSize)); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(Err); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; } + // Convert OpenCL returns to UR cl2URSamplerInfoValue(SamplerInfo, pPropValue); diff --git a/source/adapters/opencl/ur_interface_loader.cpp b/source/adapters/opencl/ur_interface_loader.cpp index 32d26cf58c..7333385182 100644 --- a/source/adapters/opencl/ur_interface_loader.cpp +++ b/source/adapters/opencl/ur_interface_loader.cpp @@ -190,6 +190,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( pDdiTable->pfnUSMMemcpy2D = urEnqueueUSMMemcpy2D; pDdiTable->pfnUSMMemcpy = urEnqueueUSMMemcpy; pDdiTable->pfnUSMPrefetch = urEnqueueUSMPrefetch; + pDdiTable->pfnReadHostPipe = urEnqueueReadHostPipe; + pDdiTable->pfnWriteHostPipe = urEnqueueWriteHostPipe; return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index 3b77472062..a910a39da5 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -486,16 +486,31 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( return UR_RESULT_SUCCESS; } +const ur_usm_type_t +mapCLUSMTypeToUR(const cl_unified_shared_memory_type_intel &Type) { + switch (Type) { + case CL_MEM_TYPE_HOST_INTEL: + return UR_USM_TYPE_HOST; + case CL_MEM_TYPE_DEVICE_INTEL: + return UR_USM_TYPE_DEVICE; + case CL_MEM_TYPE_SHARED_INTEL: + return UR_USM_TYPE_SHARED; + case CL_MEM_TYPE_UNKNOWN_INTEL: + default: + return UR_USM_TYPE_UNKNOWN; + } +} + UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, ur_usm_alloc_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - clGetMemAllocInfoINTEL_fn FuncPtr = nullptr; + clGetMemAllocInfoINTEL_fn GetMemAllocInfo = nullptr; cl_context CLContext = cl_adapter::cast(hContext); - ur_result_t RetVal = cl_ext::getExtFuncFromContext( + UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clGetMemAllocInfoINTELCache, - cl_ext::GetMemAllocInfoName, &FuncPtr); + cl_ext::GetMemAllocInfoName, &GetMemAllocInfo)); cl_mem_info_intel PropNameCL; switch (propName) { @@ -515,41 +530,24 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, return UR_RESULT_ERROR_INVALID_VALUE; } - if (FuncPtr) { - size_t CheckPropSize = 0; - size_t *CheckPropSizeRet = pPropSizeRet ? pPropSizeRet : &CheckPropSize; - RetVal = mapCLErrorToUR(FuncPtr(cl_adapter::cast(hContext), - pMem, PropNameCL, propSize, pPropValue, - CheckPropSizeRet)); - if (pPropValue && *CheckPropSizeRet != propSize) { - return UR_RESULT_ERROR_INVALID_SIZE; - } - if (RetVal == UR_RESULT_SUCCESS && pPropValue && - propName == UR_USM_ALLOC_INFO_TYPE) { - auto *AllocTypeCL = - static_cast(pPropValue); - ur_usm_type_t AllocTypeUR; - switch (*AllocTypeCL) { - case CL_MEM_TYPE_HOST_INTEL: - AllocTypeUR = UR_USM_TYPE_HOST; - break; - case CL_MEM_TYPE_DEVICE_INTEL: - AllocTypeUR = UR_USM_TYPE_DEVICE; - break; - case CL_MEM_TYPE_SHARED_INTEL: - AllocTypeUR = UR_USM_TYPE_SHARED; - break; - case CL_MEM_TYPE_UNKNOWN_INTEL: - default: - AllocTypeUR = UR_USM_TYPE_UNKNOWN; - break; - } - auto *AllocTypeOut = static_cast(pPropValue); - *AllocTypeOut = AllocTypeUR; - } + size_t CheckPropSize = 0; + cl_int ClErr = + GetMemAllocInfo(cl_adapter::cast(hContext), pMem, PropNameCL, + propSize, pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(ClErr); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; } - return RetVal; + if (pPropValue && propName == UR_USM_ALLOC_INFO_TYPE) { + *static_cast(pPropValue) = mapCLUSMTypeToUR( + *static_cast(pPropValue)); + } + + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL diff --git a/test/conformance/kernel/urKernelGetGroupInfo.cpp b/test/conformance/kernel/urKernelGetGroupInfo.cpp index 7a6066b0b0..5ad6225676 100644 --- a/test/conformance/kernel/urKernelGetGroupInfo.cpp +++ b/test/conformance/kernel/urKernelGetGroupInfo.cpp @@ -22,12 +22,16 @@ TEST_P(urKernelGetGroupInfoTest, Success) { auto property_name = getParam(); size_t property_size = 0; std::vector property_value; - ASSERT_SUCCESS(urKernelGetGroupInfo(kernel, device, property_name, 0, - nullptr, &property_size)); - property_value.resize(property_size); - ASSERT_SUCCESS(urKernelGetGroupInfo(kernel, device, property_name, - property_size, property_value.data(), - nullptr)); + auto result = urKernelGetGroupInfo(kernel, device, property_name, 0, + nullptr, &property_size); + if (result == UR_RESULT_SUCCESS) { + property_value.resize(property_size); + ASSERT_SUCCESS(urKernelGetGroupInfo(kernel, device, property_name, + property_size, + property_value.data(), nullptr)); + } else { + ASSERT_EQ_RESULT(result, UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION); + } } TEST_P(urKernelGetGroupInfoTest, InvalidNullHandleKernel) { From 39eec0c34c561afc68bb1843bed649ae37b974ea Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Mon, 6 Nov 2023 17:25:29 +0000 Subject: [PATCH 17/20] Remove useless const qualifiers from helper function return types. --- source/adapters/opencl/event.cpp | 3 +-- source/adapters/opencl/queue.cpp | 2 +- source/adapters/opencl/usm.cpp | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/source/adapters/opencl/event.cpp b/source/adapters/opencl/event.cpp index 44862f319f..87f1f58f1a 100644 --- a/source/adapters/opencl/event.cpp +++ b/source/adapters/opencl/event.cpp @@ -54,8 +54,7 @@ convertURProfilingInfoToCL(const ur_profiling_info_t PropName) { } } -const ur_command_t -convertCLCommandTypeToUR(const cl_command_type &CommandType) { +ur_command_t convertCLCommandTypeToUR(const cl_command_type &CommandType) { /* Note: the following enums don't have a CL equivalent: UR_COMMAND_USM_FILL_2D UR_COMMAND_USM_MEMCPY_2D diff --git a/source/adapters/opencl/queue.cpp b/source/adapters/opencl/queue.cpp index 163d283651..4a39a91ef5 100644 --- a/source/adapters/opencl/queue.cpp +++ b/source/adapters/opencl/queue.cpp @@ -49,7 +49,7 @@ convertURQueuePropertiesToCL(const ur_queue_properties_t *URQueueProperties) { return CLCommandQueueProperties; } -const ur_queue_flags_t +ur_queue_flags_t mapCLQueuePropsToUR(const cl_command_queue_properties &Properties) { ur_queue_flags_t Flags = 0; if (Properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) { diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index a910a39da5..5d46aec2ef 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -486,7 +486,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( return UR_RESULT_SUCCESS; } -const ur_usm_type_t +ur_usm_type_t mapCLUSMTypeToUR(const cl_unified_shared_memory_type_intel &Type) { switch (Type) { case CL_MEM_TYPE_HOST_INTEL: From b01beb71f15983d275fc0347afde9d8f78432138 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Wed, 1 Nov 2023 15:05:05 +0000 Subject: [PATCH 18/20] [CTS] Add UR_SYCL_LIBRARY_DIR CMake variable The CTS is already dependant on the DPC++ compiler to generate programs inputs for the program, kernel, and enqueue test suites specified via the `UR_DPCXX` CMake variable. If the DPC++ compiler is not installed on the system the executables it outputs will likely fail to find the SYCL runtime library when executed, breaking the generation of CTS program inputs. The patch introduces the `UR_SYCL_LIBRARY_DIR` CMake variable enabling the user to specify the path to the SYCL runtime library for use when generating CTS program inputs. --- .github/workflows/cmake.yml | 4 ++-- CMakeLists.txt | 3 +++ README.md | 2 ++ test/CMakeLists.txt | 2 +- test/conformance/CMakeLists.txt | 10 ++++++++-- test/conformance/device_code/CMakeLists.txt | 13 +++++++++++-- 6 files changed, 27 insertions(+), 7 deletions(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 0a2684462d..315a4ea81b 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -196,12 +196,12 @@ jobs: -DUR_BUILD_TESTS=ON -DUR_BUILD_ADAPTER_${{matrix.adapter.name}}=ON -DUR_DPCXX=${{github.workspace}}/dpcpp_compiler/bin/clang++ + -DUR_SYCL_LIBRARY_DIR=${{github.workspace}}/dpcpp_compiler/lib -DUR_CONFORMANCE_TARGET_TRIPLES=${{matrix.adapter.triplet}} - name: Build # This is so that device binaries can find the sycl runtime library - run: LD_LIBRARY_PATH=${{github.workspace}}/dpcpp_compiler/lib - cmake --build ${{github.workspace}}/build -j $(nproc) + run: cmake --build ${{github.workspace}}/build -j $(nproc) - name: Test adapter specific working-directory: ${{github.workspace}}/build diff --git a/CMakeLists.txt b/CMakeLists.txt index 1210375dd8..0cdd736733 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,6 +41,9 @@ option(UR_BUILD_ADAPTER_CUDA "build cuda adapter from SYCL" OFF) option(UR_BUILD_ADAPTER_HIP "build hip adapter from SYCL" OFF) option(UR_BUILD_EXAMPLE_CODEGEN "Build the codegen example." OFF) option(VAL_USE_LIBBACKTRACE_BACKTRACE "enable libbacktrace validation backtrace for linux" OFF) +set(UR_DPCXX "" CACHE FILEPATH "Path of the DPC++ compiler executable") +set(UR_SYCL_LIBRARY_DIR "" CACHE PATH + "Path of the SYCL runtime library directory") set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) diff --git a/README.md b/README.md index 4917add660..c156d4f079 100644 --- a/README.md +++ b/README.md @@ -133,6 +133,8 @@ List of options provided by CMake: | UR_BUILD_ADAPTER_HIP | Fetch and use hip adapter from SYCL | ON/OFF | OFF | | UR_HIP_PLATFORM | Build hip adapter for AMD or NVIDIA platform | AMD/NVIDIA | AMD | | UR_ENABLE_COMGR | Enable comgr lib usage | AMD/NVIDIA | AMD | +| UR_DPCXX | Path of the DPC++ compiler executable to build CTS device binaries | File path | `""` | +| UR_SYCL_LIBRARY_DIR | Path of the SYCL runtime library directory to build CTS device binaries | Directory path | `""` | ### Additional make targets diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 49e13eb869..a9fdf2ba37 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -25,6 +25,6 @@ add_subdirectory(unit) if(UR_BUILD_TOOLS) add_subdirectory(tools) endif() -if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND DEFINED UR_DPCXX) +if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND UR_DPCXX) add_subdirectory(fuzz) endif() diff --git a/test/conformance/CMakeLists.txt b/test/conformance/CMakeLists.txt index 9a273470eb..f5f6c2a591 100644 --- a/test/conformance/CMakeLists.txt +++ b/test/conformance/CMakeLists.txt @@ -89,7 +89,7 @@ add_subdirectory(queue) add_subdirectory(sampler) add_subdirectory(virtual_memory) -if(DEFINED UR_DPCXX) +if(UR_DPCXX) add_custom_target(generate_device_binaries) set(UR_CONFORMANCE_DEVICE_BINARIES_DIR @@ -99,7 +99,9 @@ if(DEFINED UR_DPCXX) if(NOT "${UR_CONFORMANCE_TARGET_TRIPLES}" STREQUAL "") string(REPLACE "," ";" TARGET_TRIPLES ${UR_CONFORMANCE_TARGET_TRIPLES}) else() - message(WARNING "UR_CONFORMANCE_TARGET_TRIPLES wasn't set, defaulting to only generate spir64 device binaries") + message(WARNING + "UR_CONFORMANCE_TARGET_TRIPLES wasn't set, defaulting to only \ + generate spir64 device binaries") list(APPEND TARGET_TRIPLES "spir64") endif() @@ -107,4 +109,8 @@ if(DEFINED UR_DPCXX) add_subdirectory(kernel) add_subdirectory(program) add_subdirectory(enqueue) +else() + message(WARNING + "UR_DPCXX is not defined, the following conformance test executables \ + are disabled: test-program, test-kernel, test-enqueue") endif() diff --git a/test/conformance/device_code/CMakeLists.txt b/test/conformance/device_code/CMakeLists.txt index 1d3f28df7f..10925b964f 100644 --- a/test/conformance/device_code/CMakeLists.txt +++ b/test/conformance/device_code/CMakeLists.txt @@ -7,13 +7,22 @@ macro(add_device_binary SOURCE_FILE) get_filename_component(KERNEL_NAME ${SOURCE_FILE} NAME_WE) set(DEVICE_BINARY_DIR "${UR_CONFORMANCE_DEVICE_BINARIES_DIR}/${KERNEL_NAME}") file(MAKE_DIRECTORY ${DEVICE_BINARY_DIR}) + if(UR_SYCL_LIBRARY_DIR) + if(CMAKE_SYSTEM_NAME STREQUAL Linux) + set(EXTRA_ENV LD_LIBRARY_PATH=${UR_SYCL_LIBRARY_DIR}) + elseif(CMAKE_SYSTEM_NAME STREQUAL Windows) + set(EXTRA_ENV PATH=${UR_SYCL_LIBRARY_DIR};$ENV{PATH}) + else() + set(EXTRA_ENV DYLD_FALLBACK_LIBRARY_PATH=${UR_SYCL_LIBRARY_DIR}) + endif() + endif() foreach(TRIPLE ${TARGET_TRIPLES}) set(EXE_PATH "${DEVICE_BINARY_DIR}/${KERNEL_NAME}_${TRIPLE}") add_custom_command(OUTPUT ${EXE_PATH} COMMAND ${UR_DPCXX} -fsycl -fsycl-targets=${TRIPLE} -fsycl-device-code-split=off ${SOURCE_FILE} -o ${EXE_PATH} - COMMAND ${CMAKE_COMMAND} -E env SYCL_DUMP_IMAGES=true - ${EXE_PATH} || (exit 0) + COMMAND ${CMAKE_COMMAND} -E env ${EXTRA_ENV} SYCL_DUMP_IMAGES=true + ${EXE_PATH} || exit 0 WORKING_DIRECTORY "${DEVICE_BINARY_DIR}" DEPENDS ${SOURCE_FILE} ) From f65473d9315c1319538f481d7a8c82dd2710c933 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Mon, 6 Nov 2023 16:35:33 +0000 Subject: [PATCH 19/20] [OpenCL] Add bounds checking to the Enqueue memory operations. This allows us to return UR_ERROR_INVALID_SIZE when we should. Extra checks are only performed on a non-success error code. Also adds a missing bounds check to urMemBufferPartition --- source/adapters/opencl/enqueue.cpp | 180 +++++++++++++++++++++++------ source/adapters/opencl/memory.cpp | 11 +- 2 files changed, 152 insertions(+), 39 deletions(-) diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index 5dff7066ae..ad6eaec88f 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -25,6 +25,77 @@ cl_map_flags convertURMapFlagsToCL(ur_map_flags_t URFlags) { return CLFlags; } +ur_result_t ValidateBufferSize(ur_mem_handle_t Buffer, size_t Size, + size_t Origin) { + size_t BufferSize = 0; + CL_RETURN_ON_FAILURE(clGetMemObjectInfo(cl_adapter::cast(Buffer), + CL_MEM_SIZE, sizeof(BufferSize), + &BufferSize, nullptr)); + if (Size + Origin > BufferSize) + return UR_RESULT_ERROR_INVALID_SIZE; + return UR_RESULT_SUCCESS; +} + +ur_result_t ValidateBufferRectSize(ur_mem_handle_t Buffer, + ur_rect_region_t Region, + ur_rect_offset_t Offset) { + size_t BufferSize = 0; + CL_RETURN_ON_FAILURE(clGetMemObjectInfo(cl_adapter::cast(Buffer), + CL_MEM_SIZE, sizeof(BufferSize), + &BufferSize, nullptr)); + if (Offset.x >= BufferSize || Offset.y >= BufferSize || + Offset.z >= BufferSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + if ((Region.width + Offset.x) * (Region.height + Offset.y) * + (Region.depth + Offset.z) > + BufferSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + return UR_RESULT_SUCCESS; +} + +ur_result_t ValidateImageSize(ur_mem_handle_t Image, ur_rect_region_t Region, + ur_rect_offset_t Origin) { + size_t Width = 0; + CL_RETURN_ON_FAILURE(clGetImageInfo(cl_adapter::cast(Image), + CL_IMAGE_WIDTH, sizeof(Width), &Width, + nullptr)); + if (Region.width + Origin.x > Width) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + size_t Height = 0; + CL_RETURN_ON_FAILURE(clGetImageInfo(cl_adapter::cast(Image), + CL_IMAGE_HEIGHT, sizeof(Height), &Height, + nullptr)); + + // CL returns a height and depth of 0 for images that don't have those + // dimensions, but regions for enqueue operations must set these to 1, so we + // need to make this adjustment to validate. + if (Height == 0) + Height = 1; + + if (Region.height + Origin.y > Height) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + size_t Depth = 0; + CL_RETURN_ON_FAILURE(clGetImageInfo(cl_adapter::cast(Image), + CL_IMAGE_DEPTH, sizeof(Depth), &Depth, + nullptr)); + if (Depth == 0) + Depth = 1; + + if (Region.depth + Origin.z > Depth) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + return UR_RESULT_SUCCESS; +} + UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, @@ -70,13 +141,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - CL_RETURN_ON_FAILURE(clEnqueueReadBuffer( + auto ClErr = clEnqueueReadBuffer( cl_adapter::cast(hQueue), cl_adapter::cast(hBuffer), blockingRead, offset, size, pDst, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + cl_adapter::cast(phEvent)); - return UR_RESULT_SUCCESS; + if (ClErr == CL_INVALID_VALUE) { + UR_RETURN_ON_FAILURE(ValidateBufferSize(hBuffer, size, offset)); + } + return mapCLErrorToUR(ClErr); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( @@ -84,13 +158,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - CL_RETURN_ON_FAILURE(clEnqueueWriteBuffer( + auto ClErr = clEnqueueWriteBuffer( cl_adapter::cast(hQueue), cl_adapter::cast(hBuffer), blockingWrite, offset, size, pSrc, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + cl_adapter::cast(phEvent)); - return UR_RESULT_SUCCESS; + if (ClErr == CL_INVALID_VALUE) { + UR_RETURN_ON_FAILURE(ValidateBufferSize(hBuffer, size, offset)); + } + return mapCLErrorToUR(ClErr); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( @@ -101,7 +178,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - CL_RETURN_ON_FAILURE(clEnqueueReadBufferRect( + auto ClErr = clEnqueueReadBufferRect( cl_adapter::cast(hQueue), cl_adapter::cast(hBuffer), blockingRead, cl_adapter::cast(&bufferOrigin), @@ -109,9 +186,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( cl_adapter::cast(®ion), bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + cl_adapter::cast(phEvent)); - return UR_RESULT_SUCCESS; + if (ClErr == CL_INVALID_VALUE) { + UR_RETURN_ON_FAILURE(ValidateBufferRectSize(hBuffer, region, bufferOrigin)); + } + return mapCLErrorToUR(ClErr); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( @@ -122,7 +202,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - CL_RETURN_ON_FAILURE(clEnqueueWriteBufferRect( + auto ClErr = clEnqueueWriteBufferRect( cl_adapter::cast(hQueue), cl_adapter::cast(hBuffer), blockingWrite, cl_adapter::cast(&bufferOrigin), @@ -130,9 +210,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( cl_adapter::cast(®ion), bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + cl_adapter::cast(phEvent)); - return UR_RESULT_SUCCESS; + if (ClErr == CL_INVALID_VALUE) { + UR_RETURN_ON_FAILURE(ValidateBufferRectSize(hBuffer, region, bufferOrigin)); + } + return mapCLErrorToUR(ClErr); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( @@ -141,14 +224,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - CL_RETURN_ON_FAILURE(clEnqueueCopyBuffer( + auto ClErr = clEnqueueCopyBuffer( cl_adapter::cast(hQueue), cl_adapter::cast(hBufferSrc), cl_adapter::cast(hBufferDst), srcOffset, dstOffset, size, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + cl_adapter::cast(phEvent)); - return UR_RESULT_SUCCESS; + if (ClErr == CL_INVALID_VALUE) { + UR_RETURN_ON_FAILURE(ValidateBufferSize(hBufferSrc, size, srcOffset)); + UR_RETURN_ON_FAILURE(ValidateBufferSize(hBufferDst, size, dstOffset)); + } + return mapCLErrorToUR(ClErr); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( @@ -159,7 +246,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - CL_RETURN_ON_FAILURE(clEnqueueCopyBufferRect( + auto ClErr = clEnqueueCopyBufferRect( cl_adapter::cast(hQueue), cl_adapter::cast(hBufferSrc), cl_adapter::cast(hBufferDst), @@ -168,9 +255,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( cl_adapter::cast(®ion), srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + cl_adapter::cast(phEvent)); - return UR_RESULT_SUCCESS; + if (ClErr == CL_INVALID_VALUE) { + UR_RETURN_ON_FAILURE(ValidateBufferRectSize(hBufferSrc, region, srcOrigin)); + UR_RETURN_ON_FAILURE(ValidateBufferRectSize(hBufferDst, region, dstOrigin)); + } + return mapCLErrorToUR(ClErr); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( @@ -181,13 +272,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( // CL FillBuffer only allows pattern sizes up to the largest CL type: // long16/double16 if (patternSize <= 128) { - CL_RETURN_ON_FAILURE( - clEnqueueFillBuffer(cl_adapter::cast(hQueue), - cl_adapter::cast(hBuffer), pPattern, - patternSize, offset, size, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - return UR_RESULT_SUCCESS; + auto ClErr = (clEnqueueFillBuffer( + cl_adapter::cast(hQueue), + cl_adapter::cast(hBuffer), pPattern, patternSize, offset, size, + numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + if (ClErr != CL_SUCCESS) { + UR_RETURN_ON_FAILURE(ValidateBufferSize(hBuffer, size, offset)); + } + return mapCLErrorToUR(ClErr); } auto NumValues = size / sizeof(uint64_t); @@ -205,6 +299,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( &WriteEvent); if (ClErr != CL_SUCCESS) { delete[] HostBuffer; + UR_RETURN_ON_FAILURE(ValidateBufferSize(hBuffer, offset, size)); CL_RETURN_ON_FAILURE(ClErr); } @@ -237,15 +332,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( size_t slicePitch, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - CL_RETURN_ON_FAILURE(clEnqueueReadImage( + auto ClErr = clEnqueueReadImage( cl_adapter::cast(hQueue), cl_adapter::cast(hImage), blockingRead, cl_adapter::cast(&origin), cl_adapter::cast(®ion), rowPitch, slicePitch, pDst, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + cl_adapter::cast(phEvent)); - return UR_RESULT_SUCCESS; + if (ClErr == CL_INVALID_VALUE) { + UR_RETURN_ON_FAILURE(ValidateImageSize(hImage, region, origin)); + } + return mapCLErrorToUR(ClErr); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( @@ -254,15 +352,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( size_t slicePitch, void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - CL_RETURN_ON_FAILURE(clEnqueueWriteImage( + auto ClErr = clEnqueueWriteImage( cl_adapter::cast(hQueue), cl_adapter::cast(hImage), blockingWrite, cl_adapter::cast(&origin), cl_adapter::cast(®ion), rowPitch, slicePitch, pSrc, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + cl_adapter::cast(phEvent)); - return UR_RESULT_SUCCESS; + if (ClErr == CL_INVALID_VALUE) { + UR_RETURN_ON_FAILURE(ValidateImageSize(hImage, region, origin)); + } + return mapCLErrorToUR(ClErr); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( @@ -272,16 +373,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - CL_RETURN_ON_FAILURE(clEnqueueCopyImage( + auto ClErr = clEnqueueCopyImage( cl_adapter::cast(hQueue), cl_adapter::cast(hImageSrc), cl_adapter::cast(hImageDst), cl_adapter::cast(&srcOrigin), cl_adapter::cast(&dstOrigin), cl_adapter::cast(®ion), numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + cl_adapter::cast(phEvent)); - return UR_RESULT_SUCCESS; + if (ClErr == CL_INVALID_VALUE) { + UR_RETURN_ON_FAILURE(ValidateImageSize(hImageSrc, region, srcOrigin)); + UR_RETURN_ON_FAILURE(ValidateImageSize(hImageDst, region, dstOrigin)); + } + return mapCLErrorToUR(ClErr); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( @@ -298,9 +403,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( cl_adapter::cast(phEventWaitList), cl_adapter::cast(phEvent), &Err); - CL_RETURN_ON_FAILURE(Err); - - return UR_RESULT_SUCCESS; + if (Err == CL_INVALID_VALUE) { + UR_RETURN_ON_FAILURE(ValidateBufferSize(hBuffer, size, offset)); + } + return mapCLErrorToUR(Err); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index be9b266f3d..1a77754c57 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -319,9 +319,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( *phMem = reinterpret_cast(clCreateSubBuffer( cl_adapter::cast(hBuffer), static_cast(flags), BufferCreateType, &BufferRegion, cl_adapter::cast(&RetErr))); - CL_RETURN_ON_FAILURE(RetErr); - return UR_RESULT_SUCCESS; + if (RetErr == CL_INVALID_VALUE) { + size_t BufferSize = 0; + CL_RETURN_ON_FAILURE(clGetMemObjectInfo(cl_adapter::cast(hBuffer), + CL_MEM_SIZE, sizeof(BufferSize), + &BufferSize, nullptr)); + if (BufferRegion.size + BufferRegion.origin > BufferSize) + return UR_RESULT_ERROR_INVALID_BUFFER_SIZE; + } + return mapCLErrorToUR(RetErr); } UR_APIEXPORT ur_result_t UR_APICALL From 877fc5bb65234587c3477c21111873be52b0fcc7 Mon Sep 17 00:00:00 2001 From: Krzysztof Swiecicki Date: Mon, 2 Oct 2023 11:56:38 +0200 Subject: [PATCH 20/20] [SYCL][UR][L0] Distinguish min/max calls from macros --- source/adapters/level_zero/command_buffer.cpp | 2 +- source/adapters/level_zero/device.cpp | 4 ++-- source/adapters/level_zero/kernel.cpp | 8 ++++---- source/adapters/level_zero/platform.cpp | 2 +- source/adapters/level_zero/queue.cpp | 8 ++++---- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index bea22aa417..7ba3cfae4d 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -177,7 +177,7 @@ ur_result_t calculateKernelWorkDimensions( Device->ZeDeviceComputeProperties->maxGroupSizeX, Device->ZeDeviceComputeProperties->maxGroupSizeY, Device->ZeDeviceComputeProperties->maxGroupSizeZ}; - GroupSize[I] = std::min(size_t(GroupSize[I]), GlobalWorkSize[I]); + GroupSize[I] = (std::min)(size_t(GroupSize[I]), GlobalWorkSize[I]); while (GlobalWorkSize[I] % GroupSize[I]) { --GroupSize[I]; } diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index e5157fc134..35e48931b2 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -74,7 +74,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet( uint32_t ZeDeviceCount = MatchedDevices.size(); - auto N = std::min(ZeDeviceCount, NumEntries); + auto N = (std::min)(ZeDeviceCount, NumEntries); if (Devices) std::copy_n(MatchedDevices.begin(), N, Devices); @@ -1240,7 +1240,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceSelectBinary( uint32_t *SelectedBinaryInd = SelectedBinary; // Find the appropriate device image, fallback to spirv if not found - constexpr uint32_t InvalidInd = std::numeric_limits::max(); + constexpr uint32_t InvalidInd = (std::numeric_limits::max)(); uint32_t Spirv = InvalidInd; for (uint32_t i = 0; i < NumBinaries; ++i) { diff --git a/source/adapters/level_zero/kernel.cpp b/source/adapters/level_zero/kernel.cpp index 266efd9291..dfa8915197 100644 --- a/source/adapters/level_zero/kernel.cpp +++ b/source/adapters/level_zero/kernel.cpp @@ -79,11 +79,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( UR_RESULT_ERROR_INVALID_VALUE); if (LocalWorkSize) { // L0 - UR_ASSERT(LocalWorkSize[0] < std::numeric_limits::max(), + UR_ASSERT(LocalWorkSize[0] < (std::numeric_limits::max)(), UR_RESULT_ERROR_INVALID_VALUE); - UR_ASSERT(LocalWorkSize[1] < std::numeric_limits::max(), + UR_ASSERT(LocalWorkSize[1] < (std::numeric_limits::max)(), UR_RESULT_ERROR_INVALID_VALUE); - UR_ASSERT(LocalWorkSize[2] < std::numeric_limits::max(), + UR_ASSERT(LocalWorkSize[2] < (std::numeric_limits::max)(), UR_RESULT_ERROR_INVALID_VALUE); WG[0] = static_cast(LocalWorkSize[0]); WG[1] = static_cast(LocalWorkSize[1]); @@ -110,7 +110,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( Queue->Device->ZeDeviceComputeProperties->maxGroupSizeX, Queue->Device->ZeDeviceComputeProperties->maxGroupSizeY, Queue->Device->ZeDeviceComputeProperties->maxGroupSizeZ}; - GroupSize[I] = std::min(size_t(GroupSize[I]), GlobalWorkSize[I]); + GroupSize[I] = (std::min)(size_t(GroupSize[I]), GlobalWorkSize[I]); while (GlobalWorkSize[I] % GroupSize[I]) { --GroupSize[I]; } diff --git a/source/adapters/level_zero/platform.cpp b/source/adapters/level_zero/platform.cpp index e7944aa3aa..308b6909eb 100644 --- a/source/adapters/level_zero/platform.cpp +++ b/source/adapters/level_zero/platform.cpp @@ -121,7 +121,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet( if (*NumPlatforms == 0) *NumPlatforms = URPlatformsCache->size(); else - *NumPlatforms = std::min(URPlatformsCache->size(), (size_t)NumEntries); + *NumPlatforms = (std::min)(URPlatformsCache->size(), (size_t)NumEntries); } return UR_RESULT_SUCCESS; diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index 0388b8fdbb..994f595a5d 100755 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -930,8 +930,8 @@ ur_queue_handle_t_::ur_queue_handle_t_( // Set-up to round-robin across allowed range of engines. uint32_t FilterLowerIndex = getRangeOfAllowedComputeEngines().first; uint32_t FilterUpperIndex = getRangeOfAllowedComputeEngines().second; - FilterUpperIndex = std::min((size_t)FilterUpperIndex, - FilterLowerIndex + ComputeQueues.size() - 1); + FilterUpperIndex = (std::min)((size_t)FilterUpperIndex, + FilterLowerIndex + ComputeQueues.size() - 1); if (FilterLowerIndex <= FilterUpperIndex) { ComputeQueueGroup.LowerIndex = FilterLowerIndex; ComputeQueueGroup.UpperIndex = FilterUpperIndex; @@ -959,8 +959,8 @@ ur_queue_handle_t_::ur_queue_handle_t_( } else { uint32_t FilterLowerIndex = Range.first; uint32_t FilterUpperIndex = Range.second; - FilterUpperIndex = std::min((size_t)FilterUpperIndex, - FilterLowerIndex + CopyQueues.size() - 1); + FilterUpperIndex = (std::min)((size_t)FilterUpperIndex, + FilterLowerIndex + CopyQueues.size() - 1); if (FilterLowerIndex <= FilterUpperIndex) { CopyQueueGroup.ZeQueues = CopyQueues; CopyQueueGroup.LowerIndex = FilterLowerIndex;