Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Candidate for v0.8.3 release tag #1219

Merged
merged 9 commits into from
Jan 22, 2024
5 changes: 1 addition & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

cmake_minimum_required(VERSION 3.14.0 FATAL_ERROR)
project(unified-runtime VERSION 0.8.2)
project(unified-runtime VERSION 0.8.3)

include(GNUInstallDirs)
include(CheckCXXSourceCompiles)
Expand Down Expand Up @@ -111,9 +111,6 @@ if(UR_ENABLE_TRACING)
)
if (MSVC)
set(TARGET_XPTI $<IF:$<CONFIG:Release>,xpti,xptid>)

# disable warning C4267: The compiler detected a conversion from size_t to a smaller type.
target_compile_options(xptifw PRIVATE /wd4267)
else()
set(TARGET_XPTI xpti)
endif()
Expand Down
8 changes: 7 additions & 1 deletion cmake/helpers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,16 @@ function(add_ur_target_compile_options name)
/W3
/MD$<$<CONFIG:Debug>:d>
/GS
/DWIN32_LEAN_AND_MEAN
/DNOMINMAX
)

if(UR_DEVELOPER_MODE)
target_compile_options(${name} PRIVATE /WX /GS)
# _CRT_SECURE_NO_WARNINGS used mainly because of getenv
# C4267: The compiler detected a conversion from size_t to a smaller type.
target_compile_options(${name} PRIVATE
/WX /GS /D_CRT_SECURE_NO_WARNINGS /wd4267
)
endif()
endif()
endfunction()
Expand Down
8 changes: 6 additions & 2 deletions source/adapters/cuda/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "context.hpp"
#include "device.hpp"
#include "platform.hpp"
#include "ur_util.hpp"

int getAttribute(ur_device_handle_t device, CUdevice_attribute attribute) {
int value;
Expand All @@ -40,7 +41,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
ur_device_info_t propName,
size_t propSize,
void *pPropValue,
size_t *pPropSizeRet) {
size_t *pPropSizeRet) try {
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);

static constexpr uint32_t MaxWorkItemDimensions = 3u;
Expand Down Expand Up @@ -1033,6 +1034,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
break;
}
return UR_RESULT_ERROR_INVALID_ENUMERATION;
} catch (...) {
return exceptionToResult(std::current_exception());
}

/// \return PI_SUCCESS if the function is executed successfully
Expand Down Expand Up @@ -1097,7 +1100,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform,

UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle(
ur_device_handle_t hDevice, ur_native_handle_t *phNativeHandle) {
*phNativeHandle = reinterpret_cast<ur_native_handle_t>(hDevice->get());
*phNativeHandle = reinterpret_cast<ur_native_handle_t>(
static_cast<std::uintptr_t>(hDevice->get()));
return UR_RESULT_SUCCESS;
}

Expand Down
6 changes: 5 additions & 1 deletion source/adapters/cuda/event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
#include "context.hpp"
#include "device.hpp"
#include "queue.hpp"
#include "ur_api.h"
#include "ur_util.hpp"

#include <cassert>
#include <cuda.h>
Expand Down Expand Up @@ -65,7 +67,7 @@ ur_result_t ur_event_handle_t_::start() {
return Result;
}

bool ur_event_handle_t_::isCompleted() const noexcept {
bool ur_event_handle_t_::isCompleted() const noexcept try {
if (!IsRecorded) {
return false;
}
Expand All @@ -80,6 +82,8 @@ bool ur_event_handle_t_::isCompleted() const noexcept {
}
}
return true;
} catch (...) {
return exceptionToResult(std::current_exception()) == UR_RESULT_SUCCESS;
}

uint64_t ur_event_handle_t_::getQueuedTime() const {
Expand Down
4 changes: 2 additions & 2 deletions source/adapters/cuda/image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ urToCudaImageChannelFormat(ur_image_channel_type_t image_channel_type,
std::make_pair(image_channel_type, num_channels));
cuda_format = cuda_format_and_size.first;
pixel_size_bytes = cuda_format_and_size.second;
} catch (std::out_of_range &e) {
} catch (const std::out_of_range &) {
return UR_RESULT_ERROR_IMAGE_FORMAT_NOT_SUPPORTED;
}
}
Expand Down Expand Up @@ -276,7 +276,7 @@ ur_result_t urTextureCreate(ur_sampler_handle_t hSampler,
ImageTexDesc.mipmapFilterMode = MipFilterMode;
ImageTexDesc.maxMipmapLevelClamp = hSampler->MaxMipmapLevelClamp;
ImageTexDesc.minMipmapLevelClamp = hSampler->MinMipmapLevelClamp;
ImageTexDesc.maxAnisotropy = hSampler->MaxAnisotropy;
ImageTexDesc.maxAnisotropy = static_cast<unsigned>(hSampler->MaxAnisotropy);

// The address modes can interfere with other dimensionsenqueueEventsWait
// e.g. 1D texture sampling can be interfered with when setting other
Expand Down
3 changes: 2 additions & 1 deletion source/adapters/cuda/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,8 @@ ur_result_t ur_program_handle_t_::buildProgram(const char *BuildOptions) {
getMaxRegistersJitOptionValue(this->BuildOptions, MaxRegs);
if (Valid) {
Options.push_back(CU_JIT_MAX_REGISTERS);
OptionVals.push_back(reinterpret_cast<void *>(MaxRegs));
OptionVals.push_back(
reinterpret_cast<void *>(static_cast<std::uintptr_t>(MaxRegs)));
}
}

Expand Down
3 changes: 1 addition & 2 deletions source/adapters/cuda/sampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ urSamplerCreate(ur_context_handle_t hContext, const ur_sampler_desc_t *pDesc,
new ur_sampler_handle_t_(hContext)};

if (pDesc->stype == UR_STRUCTURE_TYPE_SAMPLER_DESC) {
Sampler->Props |= pDesc->normalizedCoords;
Sampler->Props |= static_cast<uint32_t>(pDesc->normalizedCoords);
Sampler->Props |= pDesc->filterMode << 1;
Sampler->Props |= pDesc->addressingMode << 2;
} else {
Expand Down Expand Up @@ -71,7 +71,6 @@ urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName,
default:
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
}
return {};
}

UR_APIEXPORT ur_result_t UR_APICALL
Expand Down
19 changes: 17 additions & 2 deletions source/adapters/hip/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -98,15 +98,30 @@ if("${UR_HIP_PLATFORM}" STREQUAL "AMD")
)

if(UR_ENABLE_COMGR)
set(UR_COMGR_VERSION5_HEADER "${UR_HIP_INCLUDE_DIR}/amd_comgr/amd_comgr.h")
set(UR_COMGR_VERSION4_HEADER "${UR_HIP_INCLUDE_DIR}/amd_comgr.h")
# The COMGR header changed location between ROCm versions 4 and 5.
# Check for existence in the version 5 location or fallback to version 4
if(NOT EXISTS "${UR_COMGR_VERSION5_HEADER}")
if(NOT EXISTS "${UR_COMGR_VERSION4_HEADER}")
message(FATAL_ERROR "Could not find AMD COMGR header at "
"${UR_COMGR_VERSION5_HEADER} or"
"${UR_COMGR_VERSION4_HEADER}, "
"check ROCm installation")
else()
target_compile_definitions(${TARGET_NAME} PRIVATE UR_COMGR_VERSION4_INCLUDE)
endif()
endif()

add_library(amd_comgr SHARED IMPORTED GLOBAL)
set_target_properties(
amd_comgr PROPERTIES
IMPORTED_LOCATION "${UR_HIP_LIB_DIR}/libamd_comgr.so"
INTERFACE_INCLUDE_DIRECTORIES "${HIP_HEADERS}"
INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_HEADERS}"
)
target_link_libraries(pi_hip PUBLIC amd_comgr)
target_compile_definitions(pi_hip PRIVATE SYCL_ENABLE_KERNEL_FUSION)
target_link_libraries(${TARGET_NAME} PUBLIC amd_comgr)
target_compile_definitions(${TARGET_NAME} PRIVATE SYCL_ENABLE_KERNEL_FUSION)
endif(UR_ENABLE_COMGR)

target_link_libraries(${TARGET_NAME} PRIVATE
Expand Down
39 changes: 29 additions & 10 deletions source/adapters/hip/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,29 +10,48 @@
#pragma once

#ifdef SYCL_ENABLE_KERNEL_FUSION
#ifdef UR_COMGR_VERSION4_INCLUDE
#include <amd_comgr.h>
#else
#include <amd_comgr/amd_comgr.h>
#endif
#endif
#include <hip/hip_runtime.h>
#include <ur/ur.hpp>

// Hipify doesn't support cuArrayGetDescriptor, on AMD the hipArray can just be
// indexed, but on NVidia it is an opaque type and needs to go through
// cuArrayGetDescriptor so implement a utility function to get the array
// properties
inline void getArrayDesc(hipArray *Array, hipArray_Format &Format,
size_t &Channels) {
// Before ROCm 6, hipify doesn't support cuArrayGetDescriptor, on AMD the
// hipArray can just be indexed, but on NVidia it is an opaque type and needs to
// go through cuArrayGetDescriptor so implement a utility function to get the
// array properties
inline static hipError_t getArrayDesc(hipArray *Array, hipArray_Format &Format,
size_t &Channels) {
#if HIP_VERSION_MAJOR >= 6
HIP_ARRAY_DESCRIPTOR ArrayDesc;
hipError_t err = hipArrayGetDescriptor(&ArrayDesc, Array);
if (err == hipSuccess) {
Format = ArrayDesc.Format;
Channels = ArrayDesc.NumChannels;
}
return err;
#else
#if defined(__HIP_PLATFORM_AMD__)
Format = Array->Format;
Channels = Array->NumChannels;
return hipSuccess;
#elif defined(__HIP_PLATFORM_NVIDIA__)
CUDA_ARRAY_DESCRIPTOR ArrayDesc;
cuArrayGetDescriptor(&ArrayDesc, (CUarray)Array);

Format = ArrayDesc.Format;
Channels = ArrayDesc.NumChannels;
CUresult err = cuArrayGetDescriptor(&ArrayDesc, (CUarray)Array);
if (err == CUDA_SUCCESS) {
Format = ArrayDesc.Format;
Channels = ArrayDesc.NumChannels;
return hipSuccess;
} else {
return hipErrorUnknown; // No easy way to map CUerror to hipError
}
#else
#error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__");
#endif
#endif
}

// HIP on NVIDIA headers guard hipArray3DCreate behind __CUDACC__, this does not
Expand Down
29 changes: 6 additions & 23 deletions source/adapters/hip/enqueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,9 @@
#include "memory.hpp"
#include "queue.hpp"

namespace {
extern size_t imageElementByteSize(hipArray_Format ArrayFormat);

static size_t imageElementByteSize(hipArray_Format ArrayFormat) {
switch (ArrayFormat) {
case HIP_AD_FORMAT_UNSIGNED_INT8:
case HIP_AD_FORMAT_SIGNED_INT8:
return 1;
case HIP_AD_FORMAT_UNSIGNED_INT16:
case HIP_AD_FORMAT_SIGNED_INT16:
case HIP_AD_FORMAT_HALF:
return 2;
case HIP_AD_FORMAT_UNSIGNED_INT32:
case HIP_AD_FORMAT_SIGNED_INT32:
case HIP_AD_FORMAT_FLOAT:
return 4;
default:
detail::ur::die("Invalid image format.");
}
return 0;
}
namespace {

ur_result_t enqueueEventsWait(ur_queue_handle_t CommandQueue,
hipStream_t Stream, uint32_t NumEventsInWaitList,
Expand Down Expand Up @@ -898,7 +881,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead(

hipArray_Format Format;
size_t NumChannels;
getArrayDesc(Array, Format, NumChannels);
UR_CHECK_ERROR(getArrayDesc(Array, Format, NumChannels));

int ElementByteSize = imageElementByteSize(Format);

Expand Down Expand Up @@ -959,7 +942,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite(

hipArray_Format Format;
size_t NumChannels;
getArrayDesc(Array, Format, NumChannels);
UR_CHECK_ERROR(getArrayDesc(Array, Format, NumChannels));

int ElementByteSize = imageElementByteSize(Format);

Expand Down Expand Up @@ -1023,12 +1006,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy(
hipArray *SrcArray = std::get<SurfaceMem>(hImageSrc->Mem).getArray();
hipArray_Format SrcFormat;
size_t SrcNumChannels;
getArrayDesc(SrcArray, SrcFormat, SrcNumChannels);
UR_CHECK_ERROR(getArrayDesc(SrcArray, SrcFormat, SrcNumChannels));

hipArray *DstArray = std::get<SurfaceMem>(hImageDst->Mem).getArray();
hipArray_Format DstFormat;
size_t DstNumChannels;
getArrayDesc(DstArray, DstFormat, DstNumChannels);
UR_CHECK_ERROR(getArrayDesc(DstArray, DstFormat, DstNumChannels));

UR_ASSERT(SrcFormat == DstFormat,
UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR);
Expand Down
2 changes: 1 addition & 1 deletion source/adapters/hip/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj(
auto array = std::get<SurfaceMem>(hArgValue->Mem).getArray();
hipArray_Format Format;
size_t NumChannels;
getArrayDesc(array, Format, NumChannels);
UR_CHECK_ERROR(getArrayDesc(array, Format, NumChannels));
if (Format != HIP_AD_FORMAT_UNSIGNED_INT32 &&
Format != HIP_AD_FORMAT_SIGNED_INT32 &&
Format != HIP_AD_FORMAT_HALF && Format != HIP_AD_FORMAT_FLOAT) {
Expand Down
32 changes: 5 additions & 27 deletions source/adapters/hip/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,8 @@
#include <cassert>
#include <ur_util.hpp>

namespace {

size_t GetHipFormatPixelSize(hipArray_Format Format) {
switch (Format) {
size_t imageElementByteSize(hipArray_Format ArrayFormat) {
switch (ArrayFormat) {
case HIP_AD_FORMAT_UNSIGNED_INT8:
case HIP_AD_FORMAT_SIGNED_INT8:
return 1;
Expand All @@ -31,10 +29,9 @@ size_t GetHipFormatPixelSize(hipArray_Format Format) {
default:
detail::ur::die("Invalid HIP format specifier");
}
return 0;
}

} // namespace

/// Decreases the reference count of the Mem object.
/// If this is zero, calls the relevant HIP Free function
/// \return UR_RESULT_SUCCESS unless deallocation error
Expand Down Expand Up @@ -280,7 +277,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory,
UR_CHECK_ERROR(
hipArray3DGetDescriptor(&ArrayDescriptor, Mem.getArray()));
const auto PixelSizeBytes =
GetHipFormatPixelSize(ArrayDescriptor.Format) *
imageElementByteSize(ArrayDescriptor.Format) *
ArrayDescriptor.NumChannels;
const auto ImageSizeBytes =
PixelSizeBytes *
Expand Down Expand Up @@ -573,25 +570,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory,
}
};

const auto hipFormatToElementSize =
[](hipArray_Format HipFormat) -> size_t {
switch (HipFormat) {
case HIP_AD_FORMAT_UNSIGNED_INT8:
case HIP_AD_FORMAT_SIGNED_INT8:
return 1;
case HIP_AD_FORMAT_UNSIGNED_INT16:
case HIP_AD_FORMAT_SIGNED_INT16:
case HIP_AD_FORMAT_HALF:
return 2;
case HIP_AD_FORMAT_UNSIGNED_INT32:
case HIP_AD_FORMAT_SIGNED_INT32:
case HIP_AD_FORMAT_FLOAT:
return 4;
default:
detail::ur::die("Invalid Hip format specified.");
}
};

switch (propName) {
case UR_IMAGE_INFO_FORMAT:
return ReturnValue(ur_image_format_t{UR_IMAGE_CHANNEL_ORDER_RGBA,
Expand All @@ -603,7 +581,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory,
case UR_IMAGE_INFO_DEPTH:
return ReturnValue(ArrayInfo.Depth);
case UR_IMAGE_INFO_ELEMENT_SIZE:
return ReturnValue(hipFormatToElementSize(ArrayInfo.Format));
return ReturnValue(imageElementByteSize(ArrayInfo.Format));
case UR_IMAGE_INFO_ROW_PITCH:
case UR_IMAGE_INFO_SLICE_PITCH:
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
Expand Down
4 changes: 4 additions & 0 deletions source/adapters/hip/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@
#include "program.hpp"

#ifdef SYCL_ENABLE_KERNEL_FUSION
#ifdef UR_COMGR_VERSION4_INCLUDE
#include <amd_comgr.h>
#else
#include <amd_comgr/amd_comgr.h>
#endif
namespace {
template <typename ReleaseType, ReleaseType Release, typename T>
struct COMgrObjCleanUp {
Expand Down
Loading