From f478846548afc5ccaaf2642955b837acca756a3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Mon, 9 Sep 2024 16:58:00 +0100 Subject: [PATCH] Make newWorkDim non-optional and remove newLocalWorkgroup nullptr errors --- include/ur_api.h | 18 +- scripts/core/exp-command-buffer.yml | 14 +- source/adapters/cuda/command_buffer.cpp | 30 +-- source/adapters/cuda/device.cpp | 1 - source/adapters/hip/command_buffer.cpp | 50 ++--- source/adapters/hip/device.cpp | 24 +-- source/adapters/level_zero/command_buffer.cpp | 32 +-- source/adapters/level_zero/device.cpp | 34 +-- source/adapters/mock/ur_mockddi.cpp | 4 +- source/adapters/opencl/command_buffer.cpp | 31 +-- source/adapters/opencl/command_buffer.hpp | 11 +- source/adapters/opencl/device.cpp | 13 -- source/loader/layers/tracing/ur_trcddi.cpp | 4 +- source/loader/layers/validation/ur_valddi.cpp | 14 +- source/loader/ur_ldrddi.cpp | 4 +- source/loader/ur_libapi.cpp | 12 +- source/ur_api.cpp | 12 +- .../exp_command_buffer_adapter_hip.match | 4 - ...xp_command_buffer_adapter_native_cpu.match | 5 +- .../conformance/exp_command_buffer/fixtures.h | 3 + .../update/buffer_fill_kernel_update.cpp | 58 +++--- .../update/buffer_saxpy_kernel_update.cpp | 4 +- .../update/invalid_update.cpp | 78 +------ .../update/ndrange_update.cpp | 194 +++++++++++++----- .../update/usm_fill_kernel_update.cpp | 28 +-- .../update/usm_saxpy_kernel_update.cpp | 6 +- 26 files changed, 314 insertions(+), 374 deletions(-) diff --git a/include/ur_api.h b/include/ur_api.h index c162434fdc..4ff1f0b49d 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -8286,10 +8286,8 @@ typedef struct ur_exp_command_buffer_update_kernel_launch_desc_t { ///< values that describe the number of global work-items. size_t *pNewLocalWorkSize; ///< [in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned ///< values that describe the number of work-items that make up a - ///< work-group. If newWorkDim is non-zero and pNewLocalWorkSize is - ///< nullptr, then runtime implementation will choose the work-group size. - ///< If newWorkDim is zero and pNewLocalWorkSize is nullptr, then the local - ///< work size is unchanged. + ///< work-group. If pNewLocalWorkSize is nullptr, then the local work size + ///< is unchanged. } ur_exp_command_buffer_update_kernel_launch_desc_t; @@ -8427,7 +8425,9 @@ urCommandBufferAppendKernelLaunchExp( uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t *pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. - const size_t *pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. + const size_t *pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. If this + ///< parameter is nullptr, then a local work size will be generated by the + ///< implementation. uint32_t numKernelAlternatives, ///< [in] The number of kernel alternatives provided in ///< phKernelAlternatives. ur_kernel_handle_t *phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels @@ -8950,17 +8950,15 @@ urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been finalized. -/// + If `pUpdateKernellaunch->hNewKernel` is different from the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is zero. -/// + If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`. -/// + If `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value, and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL. -/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value while `hCommand` is currently associated with a NULL local work size. -/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value while `hCommand` is currently associated with a non-NULL local work size. +/// + `pUpdateKernelLaunch->pNewLocalWorkSize != NULL && pUpdateKernelLaunch->pNewGlobalWorkSize == NULL` +/// + If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is different from the work-dim currently associated with `hCommand`. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION +/// + `pUpdateKernelLaunch->newWorkDim < 0 || pUpdateKernelLaunch->newWorkDim > 3` /// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE /// - ::UR_RESULT_ERROR_INVALID_VALUE /// + If `pUpdateKernelLaunch->hNewKernel` was not passed to the `hKernel` or `phKernelAlternatives` parameters of ::urCommandBufferAppendKernelLaunchExp when this command was created. diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index 6cfd193479..5fefd3ce09 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -226,7 +226,7 @@ members: desc: "[in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned values that describe the number of global work-items." - type: "size_t*" name: pNewLocalWorkSize - desc: "[in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned values that describe the number of work-items that make up a work-group. If newWorkDim is non-zero and pNewLocalWorkSize is nullptr, then runtime implementation will choose the work-group size. If newWorkDim is zero and pNewLocalWorkSize is nullptr, then the local work size is unchanged." + desc: "[in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned values that describe the number of work-items that make up a work-group. If pNewLocalWorkSize is nullptr, then the local work size is unchanged." --- #-------------------------------------------------------------------------- type: typedef desc: "A value that identifies a command inside of a command-buffer, used for defining dependencies between commands in the same command-buffer." @@ -333,7 +333,7 @@ params: desc: "[in] Global work size to use when executing kernel." - type: "const size_t*" name: pLocalWorkSize - desc: "[in][optional] Local work size to use when executing kernel." + desc: "[in][optional] Local work size to use when executing kernel. If this parameter is nullptr, then a local work size will be generated by the implementation." - type: uint32_t name: "numKernelAlternatives" desc: "[in] The number of kernel alternatives provided in phKernelAlternatives." @@ -954,17 +954,15 @@ returns: - $X_RESULT_ERROR_INVALID_OPERATION: - "If $x_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to." - "If the command-buffer `hCommand` belongs to has not been finalized." - - "If `pUpdateKernellaunch->hNewKernel` is different from the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is zero." - - "If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`." - - "If `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value, and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL." - - "If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value while `hCommand` is currently associated with a NULL local work size." - - "If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value while `hCommand` is currently associated with a non-NULL local work size." + - "`pUpdateKernelLaunch->pNewLocalWorkSize != NULL && pUpdateKernelLaunch->pNewGlobalWorkSize == NULL`" + - "If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is different from the work-dim currently associated with `hCommand`." - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX - $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE - $X_RESULT_ERROR_INVALID_ENUMERATION - - $X_RESULT_ERROR_INVALID_WORK_DIMENSION + - $X_RESULT_ERROR_INVALID_WORK_DIMENSION: + - "`pUpdateKernelLaunch->newWorkDim < 0 || pUpdateKernelLaunch->newWorkDim > 3`" - $X_RESULT_ERROR_INVALID_WORK_GROUP_SIZE - $X_RESULT_ERROR_INVALID_VALUE: - "If `pUpdateKernelLaunch->hNewKernel` was not passed to the `hKernel` or `phKernelAlternatives` parameters of $xCommandBufferAppendKernelLaunchExp when this command was created." diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 1305bae515..e5f2a7fcce 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -887,37 +887,11 @@ validateCommandDesc(ur_exp_command_buffer_command_handle_t Command, return UR_RESULT_ERROR_INVALID_OPERATION; } - const uint32_t NewWorkDim = UpdateCommandDesc->newWorkDim; - if (!NewWorkDim && Command->Kernel != UpdateCommandDesc->hNewKernel) { + if (UpdateCommandDesc->newWorkDim != Command->WorkDim && + Command->Kernel == UpdateCommandDesc->hNewKernel) { return UR_RESULT_ERROR_INVALID_OPERATION; } - if (NewWorkDim) { - UR_ASSERT(NewWorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - UR_ASSERT(NewWorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - - if (NewWorkDim != Command->WorkDim && - Command->Kernel == UpdateCommandDesc->hNewKernel) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } - - // Error If Local size and not global size - if ((UpdateCommandDesc->pNewLocalWorkSize != nullptr) && - (UpdateCommandDesc->pNewGlobalWorkSize == nullptr)) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } - - // Error if local size non-nullptr and created with null - // or if local size nullptr and created with non-null - const bool IsNewLocalSizeNull = - UpdateCommandDesc->pNewLocalWorkSize == nullptr; - const bool IsOriginalLocalSizeNull = Command->isNullLocalSize(); - - if (IsNewLocalSizeNull ^ IsOriginalLocalSizeNull) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } - } - if (!Command->ValidKernelHandles.count(UpdateCommandDesc->hNewKernel)) { return UR_RESULT_ERROR_INVALID_VALUE; } diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index 7daf8bdbc8..bcdb1cdc85 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -1093,7 +1093,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: - /*case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP:*/ return ReturnValue(true); case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: { ur_device_command_buffer_update_capability_flags_t UpdateCapabilities = diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index bc533c564d..e7b84c32a7 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -48,9 +48,9 @@ commandHandleReleaseInternal(ur_exp_command_buffer_command_handle_t Command) { ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_( ur_context_handle_t hContext, ur_device_handle_t hDevice, bool IsUpdatable) - : Context(hContext), Device(hDevice), IsUpdatable(IsUpdatable), - HIPGraph{nullptr}, HIPGraphExec{nullptr}, RefCountInternal{1}, - RefCountExternal{1}, NextSyncPoint{0} { + : Context(hContext), Device(hDevice), + IsUpdatable(IsUpdatable), HIPGraph{nullptr}, HIPGraphExec{nullptr}, + RefCountInternal{1}, RefCountExternal{1}, NextSyncPoint{0} { urContextRetain(hContext); urDeviceRetain(hDevice); } @@ -330,9 +330,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( UR_RESULT_ERROR_INVALID_KERNEL); UR_ASSERT(workDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); UR_ASSERT(workDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + UR_ASSERT(!(pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + for (uint32_t i = 0; i < numKernelAlternatives; ++i) { + UR_ASSERT(phKernelAlternatives[i] != hKernel, + UR_RESULT_ERROR_INVALID_VALUE); + } + hipGraphNode_t GraphNode; std::vector DepsList; @@ -866,37 +872,11 @@ validateCommandDesc(ur_exp_command_buffer_command_handle_t Command, return UR_RESULT_ERROR_INVALID_OPERATION; } - const uint32_t NewWorkDim = UpdateCommandDesc->newWorkDim; - if (!NewWorkDim && Command->Kernel != UpdateCommandDesc->hNewKernel) { + if (UpdateCommandDesc->newWorkDim != Command->WorkDim && + Command->Kernel == UpdateCommandDesc->hNewKernel) { return UR_RESULT_ERROR_INVALID_OPERATION; } - if (NewWorkDim) { - UR_ASSERT(NewWorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - UR_ASSERT(NewWorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - - if (NewWorkDim != Command->WorkDim && - Command->Kernel == UpdateCommandDesc->hNewKernel) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } - - // Error If Local size and not global size - if ((UpdateCommandDesc->pNewLocalWorkSize != nullptr) && - (UpdateCommandDesc->pNewGlobalWorkSize == nullptr)) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } - - // Error if local size non-nullptr and created with null - // or if local size nullptr and created with non-null - const bool IsNewLocalSizeNull = - UpdateCommandDesc->pNewLocalWorkSize == nullptr; - const bool IsOriginalLocalSizeNull = Command->isNullLocalSize(); - - if (IsNewLocalSizeNull ^ IsOriginalLocalSizeNull) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } - } - if (!Command->ValidKernelHandles.count(UpdateCommandDesc->hNewKernel)) { return UR_RESULT_ERROR_INVALID_VALUE; } @@ -907,8 +887,8 @@ validateCommandDesc(ur_exp_command_buffer_command_handle_t Command, /** * Updates the arguments of CommandDesc->hNewKernel * @param[in] Device The device associated with the kernel being updated. - * @param[in] UpdateCommandDesc The update command description that contains the - * new kernel and its arguments. + * @param[in] UpdateCommandDesc The update command description that contains + * the new kernel and its arguments. * @return UR_RESULT_SUCCESS or an error code on failure */ ur_result_t @@ -1020,8 +1000,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( updateKernelArguments(CommandBuffer->Device, pUpdateKernelLaunch)); UR_CHECK_ERROR(updateCommand(hCommand, pUpdateKernelLaunch)); - // If no worksize is provided make sure we pass nullptr to setKernelParams so - // it can guess the local work size. + // If no worksize is provided make sure we pass nullptr to setKernelParams + // so it can guess the local work size. const bool ProvidedLocalSize = !hCommand->isNullLocalSize(); size_t *LocalWorkSize = ProvidedLocalSize ? hCommand->LocalWorkSize : nullptr; diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index b3b211af5a..ab5819eced 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -903,20 +903,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_IL_VERSION: case UR_DEVICE_INFO_ASYNC_BARRIER: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: { + int DriverVersion = 0; + UR_CHECK_ERROR(hipDriverGetVersion(&DriverVersion)); - case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: - /*case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: */ { - int DriverVersion = 0; - UR_CHECK_ERROR(hipDriverGetVersion(&DriverVersion)); - - // Return supported for the UR command-buffer experimental feature on - // ROCM 5.5.1 and later. This is to workaround HIP driver bug - // https://github.com/ROCm/HIP/issues/2450 in older versions. - // - // The version is returned as (10000000 major + 1000000 minor + patch). - const int CmdBufDriverMinVersion = 50530202; // ROCM 5.5.1 - return ReturnValue(DriverVersion >= CmdBufDriverMinVersion); - } + // Return supported for the UR command-buffer experimental feature on + // ROCM 5.5.1 and later. This is to workaround HIP driver bug + // https://github.com/ROCm/HIP/issues/2450 in older versions. + // + // The version is returned as (10000000 major + 1000000 minor + patch). + const int CmdBufDriverMinVersion = 50530202; // ROCM 5.5.1 + return ReturnValue(DriverVersion >= CmdBufDriverMinVersion); + } case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: { int DriverVersion = 0; UR_CHECK_ERROR(hipDriverGetVersion(&DriverVersion)); diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 16876976ca..403195d511 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -1320,27 +1320,15 @@ ur_result_t validateCommandDesc( ->mutableCommandFlags; logger::debug("Mutable features supported by device {}", SupportedFeatures); - uint32_t Dim = CommandDesc->newWorkDim; - if (Dim != 0) { - // Error if work dim changes - if (Dim != Command->WorkDim) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } - - // Error If Local size and not global size - if ((CommandDesc->pNewLocalWorkSize != nullptr) && - (CommandDesc->pNewGlobalWorkSize == nullptr)) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } - - // Error if local size non-nullptr and created with null - // or if local size nullptr and created with non-null - const bool IsNewLocalSizeNull = CommandDesc->pNewLocalWorkSize == nullptr; - const bool IsOriginalLocalSizeNull = !Command->UserDefinedLocalSize; + // Kernel handle updates are not yet supported. + if (CommandDesc->hNewKernel != Command->Kernel) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } - if (IsNewLocalSizeNull ^ IsOriginalLocalSizeNull) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } + // Error if work dim changes + if (CommandDesc->hNewKernel == Command->Kernel && + CommandDesc->newWorkDim != Command->WorkDim) { + return UR_RESULT_ERROR_INVALID_OPERATION; } // Check if new global offset is provided. @@ -1348,7 +1336,7 @@ ur_result_t validateCommandDesc( UR_ASSERT(!NewGlobalWorkOffset || (SupportedFeatures & ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET), UR_RESULT_ERROR_UNSUPPORTED_FEATURE); - if (NewGlobalWorkOffset && Dim > 0) { + if (NewGlobalWorkOffset) { if (!CommandBuffer->Context->getPlatform() ->ZeDriverGlobalOffsetExtensionFound) { logger::error("No global offset extension found on this driver"); @@ -1618,8 +1606,6 @@ ur_result_t urCommandBufferUpdateKernelLaunchExp( ur_exp_command_buffer_command_handle_t Command, const ur_exp_command_buffer_update_kernel_launch_desc_t *CommandDesc) { UR_ASSERT(Command->Kernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(CommandDesc->newWorkDim <= 3, - UR_RESULT_ERROR_INVALID_WORK_DIMENSION); // Lock command, kernel and command buffer for update. std::scoped_lock Guard( diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index 507695ec91..c9f9bb9b67 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -994,40 +994,28 @@ ur_result_t urDeviceGetInfo( } case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: return ReturnValue(true); - // case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { - // // Update support requires being able to update kernel arguments and - // all - // // aspects of the kernel NDRange. - // const ze_mutable_command_exp_flags_t UpdateMask = - // ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS | - // ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT | - // ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_SIZE | - // ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET; - // - // const bool KernelArgUpdateSupport = - // (Device->ZeDeviceMutableCmdListsProperties->mutableCommandFlags & - // UpdateMask) == UpdateMask; - // return ReturnValue(KernelArgUpdateSupport && - // Device->Platform->ZeMutableCmdListExt.Supported); - // } case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: { - const bool ZeMutableCommandFlags = + const auto ZeMutableCommandFlags = Device->ZeDeviceMutableCmdListsProperties->mutableCommandFlags; + auto supportsFlags = [&](ze_mutable_command_exp_flags_t RequiredFlags) { + if ((ZeMutableCommandFlags & RequiredFlags) == RequiredFlags) { + return true; + } + return false; + }; ur_device_command_buffer_update_capability_flags_t UpdateCapabilities = 0; - if (ZeMutableCommandFlags & ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS) { + if (supportsFlags(ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS)) { UpdateCapabilities |= UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS; } - ze_mutable_command_exp_flags_t ReqUpdateWG = - ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT | - ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_SIZE; - if ((ZeMutableCommandFlags & ReqUpdateWG) == ReqUpdateWG) { + if (supportsFlags(ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT | + ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_SIZE)) { UpdateCapabilities |= UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE | UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE; } - if (ZeMutableCommandFlags & ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET) { + if (supportsFlags(ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET)) { UpdateCapabilities |= UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET; } diff --git a/source/adapters/mock/ur_mockddi.cpp b/source/adapters/mock/ur_mockddi.cpp index 714bf7817c..f298aae04c 100644 --- a/source/adapters/mock/ur_mockddi.cpp +++ b/source/adapters/mock/ur_mockddi.cpp @@ -8349,7 +8349,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t * pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. const size_t * - pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. + pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. If this + ///< parameter is nullptr, then a local work size will be generated by the + ///< implementation. uint32_t numKernelAlternatives, ///< [in] The number of kernel alternatives provided in ///< phKernelAlternatives. diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 571d14f5d8..76d68b6e37 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -178,7 +178,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( try { auto URCommandHandle = std::make_unique( - hCommandBuffer, CommandHandle, workDim, pLocalWorkSize != nullptr); + hCommandBuffer, CommandHandle, hKernel, workDim, + pLocalWorkSize != nullptr); *phCommandHandle = URCommandHandle.release(); hCommandBuffer->CommandHandles.push_back(*phCommandHandle); } catch (...) { @@ -488,6 +489,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( const ur_exp_command_buffer_update_kernel_launch_desc_t *pUpdateKernelLaunch) { + // Kernel handle updates are not yet supported. + if (pUpdateKernelLaunch->hNewKernel != hCommand->Kernel) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + ur_exp_command_buffer_handle_t hCommandBuffer = hCommand->hCommandBuffer; cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); @@ -500,27 +506,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( if (!hCommandBuffer->IsFinalized || !hCommandBuffer->IsUpdatable) return UR_RESULT_ERROR_INVALID_OPERATION; - if (cl_uint NewWorkDim = pUpdateKernelLaunch->newWorkDim) { - // Error if work dim changes - if (NewWorkDim != hCommand->WorkDim) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } - - // Error If Local size and not global size - if ((pUpdateKernelLaunch->pNewLocalWorkSize != nullptr) && - (pUpdateKernelLaunch->pNewGlobalWorkSize == nullptr)) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } - - // Error if local size non-nullptr and created with null - // or if local size nullptr and created with non-null - const bool IsNewLocalSizeNull = - pUpdateKernelLaunch->pNewLocalWorkSize == nullptr; - const bool IsOriginalLocalSizeNull = !hCommand->UserDefinedLocalSize; - - if (IsNewLocalSizeNull ^ IsOriginalLocalSizeNull) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } + if (pUpdateKernelLaunch->newWorkDim != hCommand->WorkDim) { + return UR_RESULT_ERROR_INVALID_OPERATION; } // Find the CL USM pointer arguments to the kernel to update diff --git a/source/adapters/opencl/command_buffer.hpp b/source/adapters/opencl/command_buffer.hpp index 4c39b1ad74..d8e975a3df 100644 --- a/source/adapters/opencl/command_buffer.hpp +++ b/source/adapters/opencl/command_buffer.hpp @@ -17,6 +17,8 @@ struct ur_exp_command_buffer_command_handle_t_ { ur_exp_command_buffer_handle_t hCommandBuffer; /// OpenCL command-handle. cl_mutable_command_khr CLMutableCommand; + /// Kernel associated with this command handle + ur_kernel_handle_t Kernel; /// Work-dimension the command was originally created with. cl_uint WorkDim; /// Set to true if the user set the local work size on command creation. @@ -31,11 +33,12 @@ struct ur_exp_command_buffer_command_handle_t_ { ur_exp_command_buffer_command_handle_t_( ur_exp_command_buffer_handle_t hCommandBuffer, - cl_mutable_command_khr CLMutableCommand, cl_uint WorkDim, - bool UserDefinedLocalSize) + cl_mutable_command_khr CLMutableCommand, ur_kernel_handle_t Kernel, + cl_uint WorkDim, bool UserDefinedLocalSize) : hCommandBuffer(hCommandBuffer), CLMutableCommand(CLMutableCommand), - WorkDim(WorkDim), UserDefinedLocalSize(UserDefinedLocalSize), - RefCountInternal(0), RefCountExternal(0) {} + Kernel(Kernel), WorkDim(WorkDim), + UserDefinedLocalSize(UserDefinedLocalSize), RefCountInternal(0), + RefCountExternal(0) {} uint32_t incrementInternalReferenceCount() noexcept { return ++RefCountInternal; diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index d8e0fc4e1f..7d7fd7c4ed 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -1065,19 +1065,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(ExtStr.find("cl_khr_command_buffer") != std::string::npos); } - // case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { - // cl_device_id Dev = cl_adapter::cast(hDevice); - // ur_device_command_buffer_update_capability_flags_t UpdateCapabilities; - // CL_RETURN_ON_FAILURE( - // deviceSupportsURCommandBufferKernelUpdate(Dev, - // UpdateCapabilities)); - // ur_device_command_buffer_update_capability_flags_t - // RequiredCapabilities = - // UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS | - // UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_WORKGROUP; - // return ReturnValue((UpdateCapabilities & RequiredCapabilities) == - // RequiredCapabilities); - // } case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: { cl_device_id Dev = cl_adapter::cast(hDevice); ur_device_command_buffer_update_capability_flags_t UpdateCapabilities; diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index a3f48fd533..8c8d9dcca4 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -6493,7 +6493,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t * pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. const size_t * - pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. + pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. If this + ///< parameter is nullptr, then a local work size will be generated by the + ///< implementation. uint32_t numKernelAlternatives, ///< [in] The number of kernel alternatives provided in ///< phKernelAlternatives. diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index 542dfc3be5..446e3fc86b 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -8054,7 +8054,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t * pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. const size_t * - pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. + pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. If this + ///< parameter is nullptr, then a local work size will be generated by the + ///< implementation. uint32_t numKernelAlternatives, ///< [in] The number of kernel alternatives provided in ///< phKernelAlternatives. @@ -8955,6 +8957,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( if (NULL == pUpdateKernelLaunch) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } + + if (pUpdateKernelLaunch->pNewLocalWorkSize != NULL && + pUpdateKernelLaunch->pNewGlobalWorkSize == NULL) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + if (pUpdateKernelLaunch->newWorkDim < 0 || + pUpdateKernelLaunch->newWorkDim > 3) { + return UR_RESULT_ERROR_INVALID_WORK_DIMENSION; + } } ur_result_t result = diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index ddcb63cda1..51e75111dd 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -7105,7 +7105,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t * pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. const size_t * - pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. + pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. If this + ///< parameter is nullptr, then a local work size will be generated by the + ///< implementation. uint32_t numKernelAlternatives, ///< [in] The number of kernel alternatives provided in ///< phKernelAlternatives. diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 81937709ca..5638a0b46b 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -7544,7 +7544,9 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t * pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. const size_t * - pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. + pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. If this + ///< parameter is nullptr, then a local work size will be generated by the + ///< implementation. uint32_t numKernelAlternatives, ///< [in] The number of kernel alternatives provided in ///< phKernelAlternatives. @@ -8314,17 +8316,15 @@ ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been finalized. -/// + If `pUpdateKernellaunch->hNewKernel` is different from the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is zero. -/// + If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`. -/// + If `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value, and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL. -/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value while `hCommand` is currently associated with a NULL local work size. -/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value while `hCommand` is currently associated with a non-NULL local work size. +/// + `pUpdateKernelLaunch->pNewLocalWorkSize != NULL && pUpdateKernelLaunch->pNewGlobalWorkSize == NULL` +/// + If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is different from the work-dim currently associated with `hCommand`. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION +/// + `pUpdateKernelLaunch->newWorkDim < 0 || pUpdateKernelLaunch->newWorkDim > 3` /// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE /// - ::UR_RESULT_ERROR_INVALID_VALUE /// + If `pUpdateKernelLaunch->hNewKernel` was not passed to the `hKernel` or `phKernelAlternatives` parameters of ::urCommandBufferAppendKernelLaunchExp when this command was created. diff --git a/source/ur_api.cpp b/source/ur_api.cpp index f5f02bbee4..dae7e2950d 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -6400,7 +6400,9 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t * pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. const size_t * - pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. + pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. If this + ///< parameter is nullptr, then a local work size will be generated by the + ///< implementation. uint32_t numKernelAlternatives, ///< [in] The number of kernel alternatives provided in ///< phKernelAlternatives. @@ -7027,17 +7029,15 @@ ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been finalized. -/// + If `pUpdateKernellaunch->hNewKernel` is different from the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is zero. -/// + If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`. -/// + If `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value, and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL. -/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value while `hCommand` is currently associated with a NULL local work size. -/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value while `hCommand` is currently associated with a non-NULL local work size. +/// + `pUpdateKernelLaunch->pNewLocalWorkSize != NULL && pUpdateKernelLaunch->pNewGlobalWorkSize == NULL` +/// + If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is different from the work-dim currently associated with `hCommand`. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION +/// + `pUpdateKernelLaunch->newWorkDim < 0 || pUpdateKernelLaunch->newWorkDim > 3` /// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE /// - ::UR_RESULT_ERROR_INVALID_VALUE /// + If `pUpdateKernelLaunch->hNewKernel` was not passed to the `hKernel` or `phKernelAlternatives` parameters of ::urCommandBufferAppendKernelLaunchExp when this command was created. diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match index a39a452d04..e69de29bb2 100644 --- a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match @@ -1,4 +0,0 @@ -urCommandBufferKernelHandleUpdateTest.Success/AMD_HIP_BACKEND___{{.*}}_ -urCommandBufferKernelHandleUpdateTest.UpdateAgain/AMD_HIP_BACKEND___{{.*}}_ -urCommandBufferKernelHandleUpdateTest.KernelAlternativeNotRegistered/AMD_HIP_BACKEND___{{.*}}_ -urCommandBufferKernelHandleUpdateTest.RegisterInvalidKernelAlternative/AMD_HIP_BACKEND___{{.*}}_ diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match index a4b2789372..765a5d44c9 100644 --- a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match @@ -13,8 +13,6 @@ {{OPT}}InvalidUpdateTest.NotFinalizedCommandBuffer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}InvalidUpdateTest.NotUpdatableCommandBuffer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}InvalidUpdateTest.GlobalLocalSizeMistach/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}InvalidUpdateTest.ImplToUserDefinedLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}InvalidUpdateTest.UserToImplDefinedLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}InvalidUpdateTest.InvalidDimensions/SYCL_NATIVE_CPU___SYCL_Native_CPU__X_ {{OPT}}USMFillCommandTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}USMFillCommandTest.UpdateBeforeEnqueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} @@ -23,7 +21,8 @@ {{OPT}}NDRangeUpdateTest.Update3D/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}NDRangeUpdateTest.Update2D/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}NDRangeUpdateTest.Update1D/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}NDRangeUpdateTest.Invalid/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}NDRangeUpdateTest.ImplToUserDefinedLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}NDRangeUpdateTest.UserToImplDefinedLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}USMSaxpyKernelTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}USMMultiSaxpyKernelTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}USMMultiSaxpyKernelTest.UpdateWithoutBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} diff --git a/test/conformance/exp_command_buffer/fixtures.h b/test/conformance/exp_command_buffer/fixtures.h index 2cc91c4d3f..6852ce0ff7 100644 --- a/test/conformance/exp_command_buffer/fixtures.h +++ b/test/conformance/exp_command_buffer/fixtures.h @@ -148,6 +148,9 @@ struct urUpdatableCommandBufferExpExecutionTest : uur::urKernelExecutionTest { void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::SetUp()); + ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, + sizeof(backend), &backend, nullptr)); + UUR_RETURN_ON_FATAL_FAILURE(checkCommandBufferSupport(device)); auto requiredCapabilities = UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS | diff --git a/test/conformance/exp_command_buffer/update/buffer_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/update/buffer_fill_kernel_update.cpp index e694465fd2..08be337466 100644 --- a/test/conformance/exp_command_buffer/update/buffer_fill_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/update/buffer_fill_kernel_update.cpp @@ -73,7 +73,7 @@ struct BufferFillCommandTest static constexpr size_t local_size = 4; static constexpr size_t global_size = 32; static constexpr size_t global_offset = 0; - static constexpr size_t n_dimensions = 1; + static constexpr uint32_t n_dimensions = 1; static constexpr size_t buffer_size = sizeof(val) * global_size; ur_mem_handle_t buffer = nullptr; ur_mem_handle_t new_buffer = nullptr; @@ -128,7 +128,7 @@ TEST_P(BufferFillCommandTest, UpdateParameters) { 1, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs - 0, // newWorkDim + n_dimensions, // newWorkDim &new_output_desc, // pNewMemObjArgList nullptr, // pNewPointerArgList &new_input_desc, // pNewValueArgList @@ -181,7 +181,7 @@ TEST_P(BufferFillCommandTest, UpdateGlobalSize) { 1, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs - 1, // newWorkDim + n_dimensions, // newWorkDim &new_output_desc, // pNewMemObjArgList nullptr, // pNewPointerArgList nullptr, // pNewValueArgList @@ -232,7 +232,7 @@ TEST_P(BufferFillCommandTest, SeparateUpdateCalls) { 1, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs - 0, // newWorkDim + n_dimensions, // newWorkDim &new_output_desc, // pNewMemObjArgList nullptr, // pNewPointerArgList nullptr, // pNewValueArgList @@ -261,7 +261,7 @@ TEST_P(BufferFillCommandTest, SeparateUpdateCalls) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs - 0, // newWorkDim + n_dimensions, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList &new_input_desc, // pNewValueArgList @@ -276,17 +276,17 @@ TEST_P(BufferFillCommandTest, SeparateUpdateCalls) { ur_exp_command_buffer_update_kernel_launch_desc_t global_size_update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, // hNewKernel - 0, // numNewMemObjArgs - 0, // numNewPointerArgs - 0, // numNewValueArgs - static_cast(n_dimensions), // newWorkDim - nullptr, // pNewMemObjArgList - nullptr, // pNewPointerArgList - nullptr, // pNewValueArgList - nullptr, // pNewGlobalWorkOffset - &new_global_size, // pNewGlobalWorkSize - &new_local_size, // pNewLocalWorkSize + kernel, // hNewKernel + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + n_dimensions, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + nullptr, // pNewGlobalWorkOffset + &new_global_size, // pNewGlobalWorkSize + &new_local_size, // pNewLocalWorkSize }; ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp( @@ -325,7 +325,7 @@ TEST_P(BufferFillCommandTest, OverrideUpdate) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs - 0, // newWorkDim + n_dimensions, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList &first_input_desc, // pNewValueArgList @@ -353,7 +353,7 @@ TEST_P(BufferFillCommandTest, OverrideUpdate) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs - 0, // newWorkDim + n_dimensions, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList &second_input_desc, // pNewValueArgList @@ -406,17 +406,17 @@ TEST_P(BufferFillCommandTest, OverrideArgList) { ur_exp_command_buffer_update_kernel_launch_desc_t second_update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, // hNewKernel - 0, // numNewMemObjArgs - 0, // numNewPointerArgs - 2, // numNewValueArgs - 0, // newWorkDim - nullptr, // pNewMemObjArgList - nullptr, // pNewPointerArgList - input_descs, // pNewValueArgList - nullptr, // pNewGlobalWorkOffset - nullptr, // pNewGlobalWorkSize - nullptr, // pNewLocalWorkSize + kernel, // hNewKernel + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 2, // numNewValueArgs + n_dimensions, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + input_descs, // pNewValueArgList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize }; ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handle, diff --git a/test/conformance/exp_command_buffer/update/buffer_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/update/buffer_saxpy_kernel_update.cpp index 19da365084..69ba67eb0f 100644 --- a/test/conformance/exp_command_buffer/update/buffer_saxpy_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/update/buffer_saxpy_kernel_update.cpp @@ -130,7 +130,7 @@ struct BufferSaxpyKernelTest static constexpr size_t local_size = 4; static constexpr size_t global_size = 32; static constexpr size_t global_offset = 0; - static constexpr size_t n_dimensions = 1; + static constexpr uint32_t n_dimensions = 1; static constexpr uint32_t A = 42; std::array buffers = {nullptr, nullptr, nullptr, nullptr}; @@ -188,7 +188,7 @@ TEST_P(BufferSaxpyKernelTest, UpdateParameters) { 2, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs - 0, // newWorkDim + n_dimensions, // newWorkDim new_input_descs, // pNewMemObjArgList nullptr, // pNewPointerArgList &new_A_desc, // pNewValueArgList diff --git a/test/conformance/exp_command_buffer/update/invalid_update.cpp b/test/conformance/exp_command_buffer/update/invalid_update.cpp index c5947e039f..6c9c2b3dce 100644 --- a/test/conformance/exp_command_buffer/update/invalid_update.cpp +++ b/test/conformance/exp_command_buffer/update/invalid_update.cpp @@ -65,7 +65,7 @@ struct InvalidUpdateTest static constexpr size_t local_size = 4; static constexpr size_t global_size = 32; static constexpr size_t global_offset = 0; - static constexpr size_t n_dimensions = 1; + static constexpr uint32_t n_dimensions = 1; static constexpr size_t allocation_size = sizeof(val) * global_size; void *shared_ptr = nullptr; ur_exp_command_buffer_command_handle_t command_handle = nullptr; @@ -94,7 +94,7 @@ TEST_P(InvalidUpdateTest, NotFinalizedCommandBuffer) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs - 0, // newWorkDim + n_dimensions, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList &new_input_desc, // pNewValueArgList @@ -145,7 +145,7 @@ TEST_P(InvalidUpdateTest, NotUpdatableCommandBuffer) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs - 0, // newWorkDim + n_dimensions, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList &new_input_desc, // pNewValueArgList @@ -197,77 +197,6 @@ TEST_P(InvalidUpdateTest, GlobalLocalSizeMistach) { ASSERT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); } -// Test setting `pNewLocalWorkSize` to a non-NULL value when the command was -// created with a NULL local work size gives the correct error. -TEST_P(InvalidUpdateTest, ImplToUserDefinedLocalSize) { - // Append kernel command to command-buffer using NULL local work size - ur_exp_command_buffer_command_handle_t second_command_handle = nullptr; - ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( - updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, nullptr, 0, nullptr, 0, nullptr, nullptr, - &second_command_handle)); - ASSERT_NE(second_command_handle, nullptr); - - EXPECT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); - finalized = true; - - size_t new_global_size = 64; - size_t new_local_size = 16; - ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype - nullptr, // pNext - kernel, // hNewKernel - 0, // numNewMemObjArgs - 0, // numNewPointerArgs - 0, // numNewValueArgs - n_dimensions, // newWorkDim - nullptr, // pNewMemObjArgList - nullptr, // pNewPointerArgList - nullptr, // pNewValueArgList - nullptr, // pNewGlobalWorkOffset - &new_global_size, // pNewGlobalWorkSize - &new_local_size, // pNewLocalWorkSize - }; - - // Update command local size to non-NULL when created with NULL value - ur_result_t result = urCommandBufferUpdateKernelLaunchExp( - second_command_handle, &update_desc); - EXPECT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); - - if (second_command_handle) { - EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(second_command_handle)); - } -} - -// Test setting `pNewLocalWorkSize` to a NULL value when the command was -// created with a non-NULL local work size gives the correct error. -TEST_P(InvalidUpdateTest, UserToImplDefinedLocalSize) { - ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); - finalized = true; - - size_t new_global_size = 64; - ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype - nullptr, // pNext - kernel, // hNewKernel - 0, // numNewMemObjArgs - 0, // numNewPointerArgs - 0, // numNewValueArgs - n_dimensions, // newWorkDim - nullptr, // pNewMemObjArgList - nullptr, // pNewPointerArgList - nullptr, // pNewValueArgList - nullptr, // pNewGlobalWorkOffset - &new_global_size, // pNewGlobalWorkSize - nullptr, // pNewLocalWorkSize - }; - - // Update command local size to NULL when created with non-NULL value - ur_result_t result = - urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc); - ASSERT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); -} - // If the kernel handle is not being updated, then it's invalid to change // the number of dimensions. TEST_P(InvalidUpdateTest, InvalidDimensions) { @@ -291,7 +220,6 @@ TEST_P(InvalidUpdateTest, InvalidDimensions) { nullptr, // pNewLocalWorkSize }; - // Update command local size to NULL when created with non-NULL value ur_result_t result = urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc); ASSERT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); diff --git a/test/conformance/exp_command_buffer/update/ndrange_update.cpp b/test/conformance/exp_command_buffer/update/ndrange_update.cpp index dd3f17a90a..5d0a81d567 100644 --- a/test/conformance/exp_command_buffer/update/ndrange_update.cpp +++ b/test/conformance/exp_command_buffer/update/ndrange_update.cpp @@ -29,30 +29,22 @@ struct NDRangeUpdateTest std::memset(shared_ptr, 0, allocation_size); ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, shared_ptr)); - - // Add a 3 dimension kernel command to command-buffer and close - // command-buffer - ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( - updatable_cmd_buf_handle, kernel, n_dimensions, - global_offset.data(), global_size.data(), local_size.data(), 0, - nullptr, 0, nullptr, nullptr, &command_handle)); - ASSERT_NE(command_handle, nullptr); - - ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); } // For each work-item the kernel prints the global id and local id in each // of the 3 dimensions to an offset in the output based on global linear // id. void Validate(std::array global_size, - std::array local_size, + std::optional> local_size, std::array global_offset) { + // DPC++ swaps the X & Z dimension for 3 Dimensional kernels // between those set by user and SPIR-V builtins. // See `ReverseRangeDimensionsForKernel()` in commands.cpp - std::swap(global_size[0], global_size[2]); - std::swap(local_size[0], local_size[2]); + if (local_size.has_value()) { + std::swap(local_size.value()[0], local_size.value()[2]); + } std::swap(global_offset[0], global_offset[2]); // Verify global ID and local ID of each work item @@ -73,13 +65,15 @@ struct NDRangeUpdateTest EXPECT_EQ(global_id_y, y + global_offset[1]); EXPECT_EQ(global_id_z, z + global_offset[2]); - const int local_id_x = wi_ptr[3]; - const int local_id_y = wi_ptr[4]; - const int local_id_z = wi_ptr[5]; + if (local_size.has_value()) { + const int local_id_x = wi_ptr[3]; + const int local_id_y = wi_ptr[4]; + const int local_id_z = wi_ptr[5]; - EXPECT_EQ(local_id_x, x % local_size[0]); - EXPECT_EQ(local_id_y, y % local_size[1]); - EXPECT_EQ(local_id_z, z % local_size[2]); + EXPECT_EQ(local_id_x, x % local_size.value()[0]); + EXPECT_EQ(local_id_y, y % local_size.value()[1]); + EXPECT_EQ(local_id_z, z % local_size.value()[2]); + } } } } @@ -99,7 +93,7 @@ struct NDRangeUpdateTest } static constexpr size_t elements_per_id = 6; - static constexpr size_t n_dimensions = 3; + static constexpr uint32_t n_dimensions = 3; static constexpr std::array global_size = {8, 8, 8}; static constexpr std::array local_size = {1, 2, 2}; static constexpr std::array global_offset = {0, 4, 4}; @@ -112,10 +106,17 @@ struct NDRangeUpdateTest UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(NDRangeUpdateTest); -// Keep the kernel work dimensions as 3, and update local size and global -// offset. +// Add a 3 dimension kernel command to the command-buffer and update the +// local size and global offset TEST_P(NDRangeUpdateTest, Update3D) { - // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, global_offset.data(), + global_size.data(), local_size.data(), 0, nullptr, 0, nullptr, nullptr, + &command_handle)); + ASSERT_NE(command_handle, nullptr); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + + // Run command-buffer prior to update and verify output ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, nullptr, nullptr)); ASSERT_SUCCESS(urQueueFinish(queue)); @@ -132,7 +133,7 @@ TEST_P(NDRangeUpdateTest, Update3D) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs - 3, // newWorkDim + n_dimensions, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList nullptr, // pNewValueArgList @@ -152,9 +153,17 @@ TEST_P(NDRangeUpdateTest, Update3D) { Validate(new_global_size, new_local_size, new_global_offset); } -// Update the kernel work dimensions to use 1 in the Z dimension, -// and update global size, local size, and global offset to new values. +// Add a 3 dimension kernel command to the command-buffer. Update the kernel +// work dimensions to be 1 in the Z dimension, and update global size, local +// size, and global offset to new values. TEST_P(NDRangeUpdateTest, Update2D) { + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, global_offset.data(), + global_size.data(), local_size.data(), 0, nullptr, 0, nullptr, nullptr, + &command_handle)); + ASSERT_NE(command_handle, nullptr); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + // Run command-buffer prior to update an verify output ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, nullptr, nullptr)); @@ -177,7 +186,7 @@ TEST_P(NDRangeUpdateTest, Update2D) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs - 3, // newWorkDim + n_dimensions, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList nullptr, // pNewValueArgList @@ -201,10 +210,18 @@ TEST_P(NDRangeUpdateTest, Update2D) { Validate(new_global_size, new_local_size, new_global_offset); } -// Update the kernel work dimensions to be 1 in Y & Z dimensions, and check -// that the previously set global size, local size, and global offset update +// Add a 3 dimension kernel command to the command-buffer. Update the kernel +// work dimensions to be 1 in the Y & Z dimensions, and check that the +// previously set global size, local size, and global offset update // accordingly. TEST_P(NDRangeUpdateTest, Update1D) { + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, global_offset.data(), + global_size.data(), local_size.data(), 0, nullptr, 0, nullptr, nullptr, + &command_handle)); + ASSERT_NE(command_handle, nullptr); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + // Run command-buffer prior to update an verify output ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, nullptr, nullptr)); @@ -222,7 +239,7 @@ TEST_P(NDRangeUpdateTest, Update1D) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs - 3, // newWorkDim + n_dimensions, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList nullptr, // pNewValueArgList @@ -246,27 +263,108 @@ TEST_P(NDRangeUpdateTest, Update1D) { Validate(new_global_size, new_local_size, new_global_offset); } -// Test error code is returned if work dimension parameter changes -TEST_P(NDRangeUpdateTest, Invalid) { - const size_t new_work_dim = n_dimensions - 1; +// Test that setting `pNewLocalWorkSize` to a non-NULL value when the command +// was created with a NULL local work size works. +TEST_P(NDRangeUpdateTest, ImplToUserDefinedLocalSize) { + + // Append a kernel node without setting the local work-size. + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, global_offset.data(), + global_size.data(), nullptr, 0, nullptr, 0, nullptr, nullptr, + &command_handle)); + ASSERT_NE(command_handle, nullptr); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Can't validate the local size because it is generated by the + // implementation. + Validate(global_size, std::nullopt, global_offset); + + // Set local size and global offset to update to + std::array new_local_size = {4, 2, 2}; + std::array new_global_offset = {3, 2, 1}; + std::array new_global_size = global_size; + + // Set a user-defined local work-size in the update desc. ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, // hNewKernel - 0, // numNewMemObjArgs - 0, // numNewPointerArgs - 0, // numNewValueArgs - new_work_dim, // newWorkDim - nullptr, // pNewMemObjArgList - nullptr, // pNewPointerArgList - nullptr, // pNewValueArgList - nullptr, // pNewGlobalWorkOffset - nullptr, // pNewGlobalWorkSize - nullptr, // pNewLocalWorkSize + kernel, // hNewKernel + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + n_dimensions, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + new_global_offset.data(), // pNewGlobalWorkOffset + new_global_size.data(), // pNewGlobalWorkSize + new_local_size.data(), // pNewLocalWorkSize }; - // Update command to command-buffer to use different work dim - ur_result_t result = - urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc); - ASSERT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that the user defined local work-size was set correctly. + Validate(new_global_size, new_local_size, new_global_offset); +} + +// Test that setting `pNewLocalWorkSize` to a NULL value when the command was +// created with a non-NULL local work size works. +TEST_P(NDRangeUpdateTest, UserToImplDefinedLocalSize) { + + // Append a kernel node and set a user defined local work-size. + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, global_offset.data(), + global_size.data(), local_size.data(), 0, nullptr, 0, nullptr, nullptr, + &command_handle)); + ASSERT_NE(command_handle, nullptr); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + + // Run command-buffer prior to update and verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate(global_size, local_size, global_offset); + + // Set local size and global offset to update to + std::array new_global_offset = {3, 2, 1}; + std::array new_global_size = global_size; + + // Do not set a local-work size in the update desc to let the implementation + // decide which local-work size should be used. + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + kernel, // hNewKernel + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + n_dimensions, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + new_global_offset.data(), // pNewGlobalWorkOffset + new_global_size.data(), // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that the kernel ran successfully and the global size and the + // local size is unchanged + Validate(new_global_size, local_size, new_global_offset); } diff --git a/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp index b437971e9a..ad631ff6f2 100644 --- a/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp @@ -71,7 +71,7 @@ struct USMFillCommandTest static constexpr size_t local_size = 4; static constexpr size_t global_size = 32; static constexpr size_t global_offset = 0; - static constexpr size_t n_dimensions = 1; + static constexpr uint32_t n_dimensions = 1; static constexpr size_t allocation_size = sizeof(val) * global_size; void *shared_ptr = nullptr; void *new_shared_ptr = nullptr; @@ -120,17 +120,17 @@ TEST_P(USMFillCommandTest, UpdateParameters) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, // hNewKernel - 0, // numNewMemObjArgs - 1, // numNewPointerArgs - 1, // numNewValueArgs - static_cast(n_dimensions), // newWorkDim - nullptr, // pNewMemObjArgList - &new_output_desc, // pNewPointerArgList - &new_input_desc, // pNewValueArgList - nullptr, // pNewGlobalWorkOffset - &new_global_size, // pNewGlobalWorkSize - &new_local_size, // pNewLocalWorkSize + kernel, // hNewKernel + 0, // numNewMemObjArgs + 1, // numNewPointerArgs + 1, // numNewValueArgs + n_dimensions, // newWorkDim + nullptr, // pNewMemObjArgList + &new_output_desc, // pNewPointerArgList + &new_input_desc, // pNewValueArgList + nullptr, // pNewGlobalWorkOffset + &new_global_size, // pNewGlobalWorkSize + &new_local_size, // pNewLocalWorkSize }; // Update kernel and enqueue command-buffer again @@ -178,7 +178,7 @@ TEST_P(USMFillCommandTest, UpdateBeforeEnqueue) { 0, // numNewMemObjArgs 1, // numNewPointerArgs 1, // numNewValueArgs - 0, // newWorkDim + n_dimensions, // newWorkDim nullptr, // pNewMemObjArgList &new_output_desc, // pNewPointerArgList &new_input_desc, // pNewValueArgList @@ -330,7 +330,7 @@ TEST_P(USMMultipleFillCommandTest, UpdateAllKernels) { 0, // numNewMemObjArgs 1, // numNewPointerArgs 1, // numNewValueArgs - 0, // newWorkDim + n_dimensions, // newWorkDim nullptr, // pNewMemObjArgList &new_output_desc, // pNewPointerArgList &new_input_desc, // pNewValueArgList diff --git a/test/conformance/exp_command_buffer/update/usm_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/update/usm_saxpy_kernel_update.cpp index 21f21afa11..1735efdd74 100644 --- a/test/conformance/exp_command_buffer/update/usm_saxpy_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/update/usm_saxpy_kernel_update.cpp @@ -152,7 +152,7 @@ TEST_P(USMSaxpyKernelTest, UpdateParameters) { 0, // numNewMemObjArgs 2, // numNewPointerArgs 1, // numNewValueArgs - 0, // newWorkDim + n_dimensions, // newWorkDim nullptr, // pNewMemObjArgList new_input_descs, // pNewPointerArgList &new_A_desc, // pNewValueArgList @@ -258,7 +258,7 @@ TEST_P(USMMultiSaxpyKernelTest, UpdateParameters) { 0, // numNewMemObjArgs 2, // numNewPointerArgs 1, // numNewValueArgs - 0, // newWorkDim + n_dimensions, // newWorkDim nullptr, // pNewMemObjArgList new_input_descs, // pNewPointerArgList &new_A_desc, // pNewValueArgList @@ -324,7 +324,7 @@ TEST_P(USMMultiSaxpyKernelTest, UpdateWithoutBlocking) { 0, // numNewMemObjArgs 2, // numNewPointerArgs 1, // numNewValueArgs - 0, // newWorkDim + n_dimensions, // newWorkDim nullptr, // pNewMemObjArgList new_input_descs, // pNewPointerArgList &new_A_desc, // pNewValueArgList