Skip to content

Commit

Permalink
Merge pull request #1924 from Bensuo/fabio/cmd_buffer_kernel_update
Browse files Browse the repository at this point in the history
Add support for command-buffer kernel updates
  • Loading branch information
aarongreig authored Sep 30, 2024
2 parents 2296205 + d944ff3 commit 532a4ec
Show file tree
Hide file tree
Showing 50 changed files with 1,831 additions and 651 deletions.
83 changes: 66 additions & 17 deletions include/ur_api.h

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions include/ur_ddi.h
Original file line number Diff line number Diff line change
Expand Up @@ -1932,6 +1932,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendKernelLaunchExp_t)(
const size_t *,
const size_t *,
uint32_t,
ur_kernel_handle_t *,
uint32_t,
const ur_exp_command_buffer_sync_point_t *,
ur_exp_command_buffer_sync_point_t *,
ur_exp_command_buffer_command_handle_t *);
Expand Down
8 changes: 8 additions & 0 deletions include/ur_print.h
Original file line number Diff line number Diff line change
Expand Up @@ -970,6 +970,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintExpExternalSemaphoreDesc(const struct
/// - `buff_size < out_size`
UR_APIEXPORT ur_result_t UR_APICALL urPrintExpImageCopyRegion(const struct ur_exp_image_copy_region_t params, char *buffer, const size_t buff_size, size_t *out_size);

///////////////////////////////////////////////////////////////////////////////
/// @brief Print ur_device_command_buffer_update_capability_flag_t enum
/// @returns
/// - ::UR_RESULT_SUCCESS
/// - ::UR_RESULT_ERROR_INVALID_SIZE
/// - `buff_size < out_size`
UR_APIEXPORT ur_result_t UR_APICALL urPrintDeviceCommandBufferUpdateCapabilityFlags(enum ur_device_command_buffer_update_capability_flag_t value, char *buffer, const size_t buff_size, size_t *out_size);

///////////////////////////////////////////////////////////////////////////////
/// @brief Print ur_exp_command_buffer_info_t enum
/// @returns
Expand Down
138 changes: 131 additions & 7 deletions include/ur_print.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,8 @@ inline ur_result_t printFlag<ur_usm_migration_flag_t>(std::ostream &os, uint32_t
template <>
inline ur_result_t printFlag<ur_exp_image_copy_flag_t>(std::ostream &os, uint32_t flag);

template <>
inline ur_result_t printFlag<ur_device_command_buffer_update_capability_flag_t>(std::ostream &os, uint32_t flag);
template <>
inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_exp_command_buffer_info_t value, size_t size);

Expand Down Expand Up @@ -335,6 +337,7 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_external_mem_desc_t params);
inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_external_semaphore_desc_t params);
inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_image_copy_region_t params);
inline std::ostream &operator<<(std::ostream &os, enum ur_device_command_buffer_update_capability_flag_t value);
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_command_buffer_info_t value);
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_command_buffer_command_info_t value);
inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_desc_t params);
Expand Down Expand Up @@ -2541,8 +2544,8 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) {
case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP:
os << "UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP";
break;
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP:
os << "UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP";
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP:
os << "UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP";
break;
case UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP:
os << "UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP";
Expand Down Expand Up @@ -4049,15 +4052,16 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info

os << ")";
} break;
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: {
const ur_bool_t *tptr = (const ur_bool_t *)ptr;
if (sizeof(ur_bool_t) > size) {
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")";
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: {
const ur_device_command_buffer_update_capability_flags_t *tptr = (const ur_device_command_buffer_update_capability_flags_t *)ptr;
if (sizeof(ur_device_command_buffer_update_capability_flags_t) > size) {
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_device_command_buffer_update_capability_flags_t) << ")";
return UR_RESULT_ERROR_INVALID_SIZE;
}
os << (const void *)(tptr) << " (";

os << *tptr;
ur::details::printFlag<ur_device_command_buffer_update_capability_flag_t>(os,
*tptr);

os << ")";
} break;
Expand Down Expand Up @@ -9701,6 +9705,103 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_image_copy
return os;
}
///////////////////////////////////////////////////////////////////////////////
/// @brief Print operator for the ur_device_command_buffer_update_capability_flag_t type
/// @returns
/// std::ostream &
inline std::ostream &operator<<(std::ostream &os, enum ur_device_command_buffer_update_capability_flag_t value) {
switch (value) {
case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS:
os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS";
break;
case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE:
os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE";
break;
case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE:
os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE";
break;
case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET:
os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET";
break;
case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE:
os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE";
break;
default:
os << "unknown enumerator";
break;
}
return os;
}

namespace ur::details {
///////////////////////////////////////////////////////////////////////////////
/// @brief Print ur_device_command_buffer_update_capability_flag_t flag
template <>
inline ur_result_t printFlag<ur_device_command_buffer_update_capability_flag_t>(std::ostream &os, uint32_t flag) {
uint32_t val = flag;
bool first = true;

if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS) {
val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS;
if (!first) {
os << " | ";
} else {
first = false;
}
os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS;
}

if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE) {
val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE;
if (!first) {
os << " | ";
} else {
first = false;
}
os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE;
}

if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE) {
val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE;
if (!first) {
os << " | ";
} else {
first = false;
}
os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE;
}

if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET) {
val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET;
if (!first) {
os << " | ";
} else {
first = false;
}
os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET;
}

if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE) {
val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE;
if (!first) {
os << " | ";
} else {
first = false;
}
os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE;
}
if (val != 0) {
std::bitset<32> bits(val);
if (!first) {
os << " | ";
}
os << "unknown bit flags " << bits;
} else if (first) {
os << "0";
}
return UR_RESULT_SUCCESS;
}
} // namespace ur::details
///////////////////////////////////////////////////////////////////////////////
/// @brief Print operator for the ur_exp_command_buffer_info_t type
/// @returns
/// std::ostream &
Expand Down Expand Up @@ -9953,6 +10054,12 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_bu
ur::details::printStruct(os,
(params.pNext));

os << ", ";
os << ".hNewKernel = ";

ur::details::printPtr(os,
(params.hNewKernel));

os << ", ";
os << ".numNewMemObjArgs = ";

Expand Down Expand Up @@ -15951,6 +16058,23 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
ur::details::printPtr(os,
*(params->ppLocalWorkSize));

os << ", ";
os << ".numKernelAlternatives = ";

os << *(params->pnumKernelAlternatives);

os << ", ";
os << ".phKernelAlternatives = {";
for (size_t i = 0; *(params->pphKernelAlternatives) != NULL && i < *params->pnumKernelAlternatives; ++i) {
if (i != 0) {
os << ", ";
}

ur::details::printPtr(os,
(*(params->pphKernelAlternatives))[i]);
}
os << "}";

os << ", ";
os << ".numSyncPointsInWaitList = ";

Expand Down
38 changes: 29 additions & 9 deletions scripts/core/EXP-COMMAND-BUFFER.rst
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,8 @@ were obtained from.
// sync-point
${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim,
pGlobalWorkOffset, pGlobalWorkSize,
pLocalWorkSize, 1, &syncPoint,
nullptr, nullptr);
pLocalWorkSize, 0, nullptr, 1,
&syncPoint, nullptr, nullptr);
Enqueueing Command-Buffers
--------------------------------------------------------------------------------
Expand All @@ -167,13 +167,21 @@ Updating Command-Buffer Commands

An adapter implementing the command-buffer experimental feature can optionally
support updating the configuration of kernel commands recorded to a
command-buffer. Support for this is reported by returning true in the
${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP query.
command-buffer. The attributes of kernel commands that can be updated are
device specific and can be queried using the
${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP query.

Updating kernel commands is done by passing the new kernel configuration
to ${x}CommandBufferUpdateKernelLaunchExp along with the command handle of
the kernel command to update. Configurations that can be changed are the
parameters to the kernel and the execution ND-Range.
kernel handle, the parameters to the kernel and the execution ND-Range.

Kernel handles that might be used to update the kernel of a command, need
to be registered when the command is created. This can be done
using the ``phKernelAlternatives`` parameter of
${x}CommandBufferAppendKernelLaunchExp. The command can then be updated
to use the new kernel handle by passing it to
${x}CommandBufferUpdateKernelLaunchExp.

.. parsed-literal::
Expand All @@ -187,12 +195,14 @@ parameters to the kernel and the execution ND-Range.
${x}CommandBufferCreateExp(hContext, hDevice, &desc, &hCommandBuffer);
// Append a kernel command which has two buffer parameters, an input
// and an output.
// and an output. Register hNewKernel as an alternative kernel handle
// which can later be used to change the kernel handle associated
// with this command.
${x}_exp_command_buffer_command_handle_t hCommand;
${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim,
pGlobalWorkOffset, pGlobalWorkSize,
pLocalWorkSize, 0, nullptr,
nullptr, &hCommand);
pLocalWorkSize, 1, &hNewKernel,
0, nullptr, nullptr, &hCommand);
// Close the command-buffer before updating
${x}CommandBufferFinalizeExp(hCommandBuffer);
Expand Down Expand Up @@ -220,6 +230,7 @@ parameters to the kernel and the execution ND-Range.
${x}_exp_command_buffer_update_kernel_launch_desc_t update {
UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype
nullptr, // pNext
hNewKernel // hNewKernel
2, // numNewMemobjArgs
0, // numNewPointerArgs
0, // numNewValueArgs
Expand Down Expand Up @@ -249,7 +260,13 @@ Enums
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* ${x}_device_info_t
* ${X}_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP
* ${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP
* ${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP
* ${x}_device_command_buffer_update_capability_flags_t
* UPDATE_KERNEL_ARGUMENTS
* LOCAL_WORK_SIZE
* GLOBAL_WORK_SIZE
* GLOBAL_WORK_OFFSET
* KERNEL_HANDLE
* ${x}_result_t
* ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP
* ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP
Expand Down Expand Up @@ -340,6 +357,8 @@ Changelog
+-----------+-------------------------------------------------------+
| 1.4 | Add function definitions for kernel command update |
+-----------+-------------------------------------------------------+
| 1.5 | Add support for updating kernel handles. |
+-----------+-------------------------------------------------------+

Contributors
--------------------------------------------------------------------------------
Expand All @@ -348,3 +367,4 @@ Contributors
* Ewan Crawford `ewan@codeplay.com <ewan@codeplay.com>`_
* Maxime France-Pillois `maxime.francepillois@codeplay.com <maxime.francepillois@codeplay.com>`_
* Aaron Greig `aaron.greig@codeplay.com <aaron.greig@codeplay.com>`_
* Fábio Mestre `fabio.mestre@codeplay.com <fabio.mestre@codeplay.com>`_
Loading

0 comments on commit 532a4ec

Please sign in to comment.