Skip to content

Commit

Permalink
Merge branch 'adapters' into revert-1015-revert-1005-merge-some-main-…
Browse files Browse the repository at this point in the history
…changes-into-adapters
  • Loading branch information
omarahmed1111 committed Nov 13, 2023
2 parents 7ba8fec + 192e940 commit fb00d47
Show file tree
Hide file tree
Showing 33 changed files with 965 additions and 298 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -196,12 +196,12 @@ jobs:
-DUR_BUILD_TESTS=ON
-DUR_BUILD_ADAPTER_${{matrix.adapter.name}}=ON
-DUR_DPCXX=${{github.workspace}}/dpcpp_compiler/bin/clang++
-DUR_SYCL_LIBRARY_DIR=${{github.workspace}}/dpcpp_compiler/lib
-DUR_CONFORMANCE_TARGET_TRIPLES=${{matrix.adapter.triplet}}
- name: Build
# This is so that device binaries can find the sycl runtime library
run: LD_LIBRARY_PATH=${{github.workspace}}/dpcpp_compiler/lib
cmake --build ${{github.workspace}}/build -j $(nproc)
run: cmake --build ${{github.workspace}}/build -j $(nproc)

- name: Test adapter specific
working-directory: ${{github.workspace}}/build
Expand Down
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ option(UR_BUILD_ADAPTER_CUDA "build cuda adapter from SYCL" OFF)
option(UR_BUILD_ADAPTER_HIP "build hip adapter from SYCL" OFF)
option(UR_BUILD_EXAMPLE_CODEGEN "Build the codegen example." OFF)
option(VAL_USE_LIBBACKTRACE_BACKTRACE "enable libbacktrace validation backtrace for linux" OFF)
set(UR_DPCXX "" CACHE FILEPATH "Path of the DPC++ compiler executable")
set(UR_SYCL_LIBRARY_DIR "" CACHE PATH
"Path of the SYCL runtime library directory")

set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@ List of options provided by CMake:
| UR_BUILD_ADAPTER_HIP | Fetch and use hip adapter from SYCL | ON/OFF | OFF |
| UR_HIP_PLATFORM | Build hip adapter for AMD or NVIDIA platform | AMD/NVIDIA | AMD |
| UR_ENABLE_COMGR | Enable comgr lib usage | AMD/NVIDIA | AMD |
| UR_DPCXX | Path of the DPC++ compiler executable to build CTS device binaries | File path | `""` |
| UR_SYCL_LIBRARY_DIR | Path of the SYCL runtime library directory to build CTS device binaries | Directory path | `""` |

### Additional make targets

Expand Down
18 changes: 10 additions & 8 deletions source/adapters/level_zero/adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,17 +175,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) {
}

UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetLastError(
ur_adapter_handle_t Adapter, ///< [in] handle of the platform instance
[[maybe_unused]] ur_adapter_handle_t
AdapterHandle, ///< [in] handle of the platform instance
const char **Message, ///< [out] pointer to a C string where the adapter
///< specific error message will be stored.
int32_t *Error ///< [out] pointer to an integer where the adapter specific
///< error code will be stored.
[[maybe_unused]] int32_t
*Error ///< [out] pointer to an integer where the adapter specific
///< error code will be stored.
) {
std::ignore = Adapter;
std::ignore = Message;
std::ignore = Error;
urPrint("[UR][L0] %s function not implemented!\n", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
AdapterHandle = &Adapter;
*Message = ErrorMessage;
Error = &ErrorAdapterNativeCode;

return ErrorMessageCode;
}

UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t,
Expand Down
2 changes: 1 addition & 1 deletion source/adapters/level_zero/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ ur_result_t calculateKernelWorkDimensions(
Device->ZeDeviceComputeProperties->maxGroupSizeX,
Device->ZeDeviceComputeProperties->maxGroupSizeY,
Device->ZeDeviceComputeProperties->maxGroupSizeZ};
GroupSize[I] = std::min(size_t(GroupSize[I]), GlobalWorkSize[I]);
GroupSize[I] = (std::min)(size_t(GroupSize[I]), GlobalWorkSize[I]);
while (GlobalWorkSize[I] % GroupSize[I]) {
--GroupSize[I];
}
Expand Down
13 changes: 8 additions & 5 deletions source/adapters/level_zero/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,13 +280,16 @@ template <> zes_structure_type_t getZesStructureType<zes_mem_properties_t>() {
// Global variables for ZER_EXT_RESULT_ADAPTER_SPECIFIC_ERROR
thread_local ur_result_t ErrorMessageCode = UR_RESULT_SUCCESS;
thread_local char ErrorMessage[MaxMessageSize];
thread_local int32_t ErrorAdapterNativeCode;

// Utility function for setting a message and warning
[[maybe_unused]] void setErrorMessage(const char *message,
ur_result_t error_code) {
assert(strlen(message) <= MaxMessageSize);
strcpy(ErrorMessage, message);
ErrorMessageCode = error_code;
[[maybe_unused]] void setErrorMessage(const char *pMessage,
ur_result_t ErrorCode,
int32_t AdapterErrorCode) {
assert(strlen(pMessage) <= MaxMessageSize);
strcpy(ErrorMessage, pMessage);
ErrorMessageCode = ErrorCode;
ErrorAdapterNativeCode = AdapterErrorCode;
}

ur_result_t zerPluginGetLastError(char **message) {
Expand Down
6 changes: 4 additions & 2 deletions source/adapters/level_zero/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,9 @@ constexpr char ZE_SUPPORTED_EXTENSIONS[] =
constexpr size_t MaxMessageSize = 256;
extern thread_local ur_result_t ErrorMessageCode;
extern thread_local char ErrorMessage[MaxMessageSize];
extern thread_local int32_t ErrorAdapterNativeCode;

// Utility function for setting a message and warning
[[maybe_unused]] void setErrorMessage(const char *message,
ur_result_t error_code);
[[maybe_unused]] void setErrorMessage(const char *pMessage,
ur_result_t ErrorCode,
int32_t AdapterErrorCode);
7 changes: 4 additions & 3 deletions source/adapters/level_zero/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(

uint32_t ZeDeviceCount = MatchedDevices.size();

auto N = std::min(ZeDeviceCount, NumEntries);
auto N = (std::min)(ZeDeviceCount, NumEntries);
if (Devices)
std::copy_n(MatchedDevices.begin(), N, Devices);

Expand Down Expand Up @@ -631,7 +631,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(
case UR_DEVICE_INFO_GLOBAL_MEM_FREE: {
if (getenv("ZES_ENABLE_SYSMAN") == nullptr) {
setErrorMessage("Set ZES_ENABLE_SYSMAN=1 to obtain free memory",
UR_RESULT_SUCCESS);
UR_RESULT_ERROR_UNINITIALIZED,
static_cast<int32_t>(ZE_RESULT_ERROR_UNINITIALIZED));
return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
}
// Only report device memory which zeMemAllocDevice can allocate from.
Expand Down Expand Up @@ -1239,7 +1240,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceSelectBinary(
uint32_t *SelectedBinaryInd = SelectedBinary;

// Find the appropriate device image, fallback to spirv if not found
constexpr uint32_t InvalidInd = std::numeric_limits<uint32_t>::max();
constexpr uint32_t InvalidInd = (std::numeric_limits<uint32_t>::max)();
uint32_t Spirv = InvalidInd;

for (uint32_t i = 0; i < NumBinaries; ++i) {
Expand Down
18 changes: 10 additions & 8 deletions source/adapters/level_zero/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
UR_RESULT_ERROR_INVALID_VALUE);
if (LocalWorkSize) {
// L0
UR_ASSERT(LocalWorkSize[0] < std::numeric_limits<uint32_t>::max(),
UR_ASSERT(LocalWorkSize[0] < (std::numeric_limits<uint32_t>::max)(),
UR_RESULT_ERROR_INVALID_VALUE);
UR_ASSERT(LocalWorkSize[1] < std::numeric_limits<uint32_t>::max(),
UR_ASSERT(LocalWorkSize[1] < (std::numeric_limits<uint32_t>::max)(),
UR_RESULT_ERROR_INVALID_VALUE);
UR_ASSERT(LocalWorkSize[2] < std::numeric_limits<uint32_t>::max(),
UR_ASSERT(LocalWorkSize[2] < (std::numeric_limits<uint32_t>::max)(),
UR_RESULT_ERROR_INVALID_VALUE);
WG[0] = static_cast<uint32_t>(LocalWorkSize[0]);
WG[1] = static_cast<uint32_t>(LocalWorkSize[1]);
Expand All @@ -110,7 +110,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
Queue->Device->ZeDeviceComputeProperties->maxGroupSizeX,
Queue->Device->ZeDeviceComputeProperties->maxGroupSizeY,
Queue->Device->ZeDeviceComputeProperties->maxGroupSizeZ};
GroupSize[I] = std::min(size_t(GroupSize[I]), GlobalWorkSize[I]);
GroupSize[I] = (std::min)(size_t(GroupSize[I]), GlobalWorkSize[I]);
while (GlobalWorkSize[I] % GroupSize[I]) {
--GroupSize[I];
}
Expand Down Expand Up @@ -284,8 +284,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite(
(Program->ZeModule, Name, &GlobalVarSize, &GlobalVarPtr));
if (GlobalVarSize < Offset + Count) {
setErrorMessage("Write device global variable is out of range.",
UR_RESULT_ERROR_INVALID_VALUE);
return UR_RESULT_ERROR_UNKNOWN;
UR_RESULT_ERROR_INVALID_VALUE,
static_cast<int32_t>(ZE_RESULT_ERROR_INVALID_ARGUMENT));
return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
}

// Copy engine is preferred only for host to device transfer.
Expand Down Expand Up @@ -333,8 +334,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead(
(Program->ZeModule, Name, &GlobalVarSize, &GlobalVarPtr));
if (GlobalVarSize < Offset + Count) {
setErrorMessage("Read from device global variable is out of range.",
UR_RESULT_ERROR_INVALID_VALUE);
return UR_RESULT_ERROR_UNKNOWN;
UR_RESULT_ERROR_INVALID_VALUE,
static_cast<int32_t>(ZE_RESULT_ERROR_INVALID_ARGUMENT));
return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
}

// Copy engine is preferred only for host to device transfer.
Expand Down
2 changes: 1 addition & 1 deletion source/adapters/level_zero/platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet(
if (*NumPlatforms == 0)
*NumPlatforms = URPlatformsCache->size();
else
*NumPlatforms = std::min(URPlatformsCache->size(), (size_t)NumEntries);
*NumPlatforms = (std::min)(URPlatformsCache->size(), (size_t)NumEntries);
}

return UR_RESULT_SUCCESS;
Expand Down
22 changes: 6 additions & 16 deletions source/adapters/level_zero/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -930,8 +930,8 @@ ur_queue_handle_t_::ur_queue_handle_t_(
// Set-up to round-robin across allowed range of engines.
uint32_t FilterLowerIndex = getRangeOfAllowedComputeEngines().first;
uint32_t FilterUpperIndex = getRangeOfAllowedComputeEngines().second;
FilterUpperIndex = std::min((size_t)FilterUpperIndex,
FilterLowerIndex + ComputeQueues.size() - 1);
FilterUpperIndex = (std::min)((size_t)FilterUpperIndex,
FilterLowerIndex + ComputeQueues.size() - 1);
if (FilterLowerIndex <= FilterUpperIndex) {
ComputeQueueGroup.LowerIndex = FilterLowerIndex;
ComputeQueueGroup.UpperIndex = FilterUpperIndex;
Expand Down Expand Up @@ -959,8 +959,8 @@ ur_queue_handle_t_::ur_queue_handle_t_(
} else {
uint32_t FilterLowerIndex = Range.first;
uint32_t FilterUpperIndex = Range.second;
FilterUpperIndex = std::min((size_t)FilterUpperIndex,
FilterLowerIndex + CopyQueues.size() - 1);
FilterUpperIndex = (std::min)((size_t)FilterUpperIndex,
FilterLowerIndex + CopyQueues.size() - 1);
if (FilterLowerIndex <= FilterUpperIndex) {
CopyQueueGroup.ZeQueues = CopyQueues;
CopyQueueGroup.LowerIndex = FilterLowerIndex;
Expand Down Expand Up @@ -1406,18 +1406,8 @@ ur_result_t ur_queue_handle_t_::synchronize() {
if (ImmCmdList == Queue->CommandListMap.end())
return UR_RESULT_SUCCESS;

ur_event_handle_t Event{};
ur_result_t Res = createEventAndAssociateQueue(
reinterpret_cast<ur_queue_handle_t>(Queue), &Event,
UR_EXT_COMMAND_TYPE_USER, ImmCmdList, /* IsInternal */ false);
if (Res != UR_RESULT_SUCCESS)
return Res;
auto zeEvent = Event->ZeEvent;
ZE2UR_CALL(zeCommandListAppendBarrier,
(ImmCmdList->first, zeEvent, 0, nullptr));
ZE2UR_CALL(zeHostSynchronize, (zeEvent));
Event->Completed = true;
UR_CALL(urEventRelease(Event));
// wait for all commands previously submitted to this immediate command list
ZE2UR_CALL(zeCommandListHostSynchronize, (ImmCmdList->first, UINT64_MAX));

// Cleanup all events from the synced command list.
CleanupEventListFromResetCmdList(ImmCmdList->second.EventList, true);
Expand Down
30 changes: 24 additions & 6 deletions source/adapters/level_zero/usm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,14 @@ static ur_result_t USMDeviceAllocImpl(void **ResultPtr,
reinterpret_cast<std::uintptr_t>(*ResultPtr) % Alignment == 0,
UR_RESULT_ERROR_INVALID_VALUE);

USMAllocationMakeResident(USMDeviceAllocationForceResidency, Context, Device,
*ResultPtr, Size);
// TODO: Return any non-success result from USMAllocationMakeResident once
// oneapi-src/level-zero-spec#240 is resolved.
auto Result = USMAllocationMakeResident(USMDeviceAllocationForceResidency,
Context, Device, *ResultPtr, Size);
if (Result == UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY ||
Result == UR_RESULT_ERROR_OUT_OF_HOST_MEMORY) {
return Result;
}
return UR_RESULT_SUCCESS;
}

Expand Down Expand Up @@ -225,8 +231,14 @@ static ur_result_t USMSharedAllocImpl(void **ResultPtr,
reinterpret_cast<std::uintptr_t>(*ResultPtr) % Alignment == 0,
UR_RESULT_ERROR_INVALID_VALUE);

USMAllocationMakeResident(USMSharedAllocationForceResidency, Context, Device,
*ResultPtr, Size);
// TODO: Return any non-success result from USMAllocationMakeResident once
// oneapi-src/level-zero-spec#240 is resolved.
auto Result = USMAllocationMakeResident(USMSharedAllocationForceResidency,
Context, Device, *ResultPtr, Size);
if (Result == UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY ||
Result == UR_RESULT_ERROR_OUT_OF_HOST_MEMORY) {
return Result;
}

// TODO: Handle PI_MEM_ALLOC_DEVICE_READ_ONLY.
return UR_RESULT_SUCCESS;
Expand All @@ -247,8 +259,14 @@ static ur_result_t USMHostAllocImpl(void **ResultPtr,
reinterpret_cast<std::uintptr_t>(*ResultPtr) % Alignment == 0,
UR_RESULT_ERROR_INVALID_VALUE);

USMAllocationMakeResident(USMHostAllocationForceResidency, Context, nullptr,
*ResultPtr, Size);
// TODO: Return any non-success result from USMAllocationMakeResident once
// oneapi-src/level-zero-spec#240 is resolved.
auto Result = USMAllocationMakeResident(USMHostAllocationForceResidency,
Context, nullptr, *ResultPtr, Size);
if (Result == UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY ||
Result == UR_RESULT_ERROR_OUT_OF_HOST_MEMORY) {
return Result;
}
return UR_RESULT_SUCCESS;
}

Expand Down
17 changes: 17 additions & 0 deletions source/adapters/opencl/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,23 @@ ur_result_t mapCLErrorToUR(cl_int Result) {
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
case CL_INVALID_MEM_OBJECT:
return UR_RESULT_ERROR_INVALID_MEM_OBJECT;
case CL_INVALID_QUEUE_PROPERTIES:
return UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES;
case CL_INVALID_BUFFER_SIZE:
return UR_RESULT_ERROR_INVALID_BUFFER_SIZE;
case CL_INVALID_IMAGE_SIZE:
return UR_RESULT_ERROR_INVALID_IMAGE_SIZE;
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
case CL_INVALID_IMAGE_DESCRIPTOR:
return UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR;
case CL_IMAGE_FORMAT_NOT_SUPPORTED:
return UR_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT;
case CL_PROFILING_INFO_NOT_AVAILABLE:
return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE;
case CL_LINK_PROGRAM_FAILURE:
return UR_RESULT_ERROR_PROGRAM_LINK_FAILURE;
case CL_INVALID_ARG_INDEX:
return UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX;
default:
return UR_RESULT_ERROR_UNKNOWN;
}
Expand Down
70 changes: 63 additions & 7 deletions source/adapters/opencl/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@

#include "context.hpp"

#include <mutex>
#include <set>
#include <unordered_map>

ur_result_t cl_adapter::getDevicesFromContext(
ur_context_handle_t hContext,
std::unique_ptr<std::vector<cl_device_id>> &DevicesInCtx) {
Expand Down Expand Up @@ -89,10 +93,17 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName,
case UR_CONTEXT_INFO_NUM_DEVICES:
case UR_CONTEXT_INFO_DEVICES:
case UR_CONTEXT_INFO_REFERENCE_COUNT: {

CL_RETURN_ON_FAILURE(
size_t CheckPropSize = 0;
auto ClResult =
clGetContextInfo(cl_adapter::cast<cl_context>(hContext), CLPropName,
propSize, pPropValue, pPropSizeRet));
propSize, pPropValue, &CheckPropSize);
if (pPropValue && CheckPropSize != propSize) {
return UR_RESULT_ERROR_INVALID_SIZE;
}
CL_RETURN_ON_FAILURE(ClResult);
if (pPropSizeRet) {
*pPropSizeRet = CheckPropSize;
}
return UR_RESULT_SUCCESS;
}
default:
Expand Down Expand Up @@ -130,8 +141,53 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle(
}

UR_APIEXPORT ur_result_t UR_APICALL urContextSetExtendedDeleter(
[[maybe_unused]] ur_context_handle_t hContext,
[[maybe_unused]] ur_context_extended_deleter_t pfnDeleter,
[[maybe_unused]] void *pUserData) {
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
ur_context_handle_t hContext, ur_context_extended_deleter_t pfnDeleter,
void *pUserData) {
static std::unordered_map<ur_context_handle_t,
std::set<ur_context_extended_deleter_t>>
ContextCallbackMap;
static std::mutex ContextCallbackMutex;

{
std::lock_guard<std::mutex> Lock(ContextCallbackMutex);
// Callbacks can only be registered once and we need to avoid double
// allocating.
if (ContextCallbackMap.count(hContext) &&
ContextCallbackMap[hContext].count(pfnDeleter)) {
return UR_RESULT_SUCCESS;
}

ContextCallbackMap[hContext].insert(pfnDeleter);
}

struct ContextCallback {
void execute() {
pfnDeleter(pUserData);
{
std::lock_guard<std::mutex> Lock(*CallbackMutex);
(*CallbackMap)[hContext].erase(pfnDeleter);
if ((*CallbackMap)[hContext].empty()) {
CallbackMap->erase(hContext);
}
}
delete this;
}
ur_context_handle_t hContext;
ur_context_extended_deleter_t pfnDeleter;
void *pUserData;
std::unordered_map<ur_context_handle_t,
std::set<ur_context_extended_deleter_t>> *CallbackMap;
std::mutex *CallbackMutex;
};
auto Callback =
new ContextCallback({hContext, pfnDeleter, pUserData, &ContextCallbackMap,
&ContextCallbackMutex});
auto ClCallback = [](cl_context, void *pUserData) {
auto *C = static_cast<ContextCallback *>(pUserData);
C->execute();
};
CL_RETURN_ON_FAILURE(clSetContextDestructorCallback(
cl_adapter::cast<cl_context>(hContext), ClCallback, Callback));

return UR_RESULT_SUCCESS;
}
Loading

0 comments on commit fb00d47

Please sign in to comment.