Skip to content

Commit

Permalink
Introduce CELERITY_SYCL_IS_* macros
Browse files Browse the repository at this point in the history
  • Loading branch information
fknorr committed Aug 10, 2024
1 parent bb9b31e commit 501f165
Show file tree
Hide file tree
Showing 8 changed files with 30 additions and 17 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ See our [platform support guide](docs/platform-support.md) for a complete list o
- Add support for SimSYCL as a SYCL implementation (#238)
- Extend compiler support to GCC (optionally with sanitizers) and C++20 code bases (#238)
- Add support for profiling with [Tracy](https://github.com/wolfpld/tracy), via `CELERITY_TRACY_SUPPORT` and environment variable `CELERITY_TRACY` (#267)
- The active SYCL implementation can now be queried via `CELERITY_SYCL_IS_*` macros (#??)

### Changed

Expand Down
12 changes: 12 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,18 @@ elseif(UNIX)
set(SOURCES ${SOURCES} src/platform_specific/named_threads.unix.cc)
endif()

# Read by configure_file()
set(CELERITY_SYCL_IS_ACPP OFF)
set(CELERITY_SYCL_IS_DPCPP OFF)
set(CELERITY_SYCL_IS_SIMSYCL OFF)
if(CELERITY_SYCL_IMPL STREQUAL "AdaptiveCpp")
set(CELERITY_SYCL_IS_ACPP ON)
elseif(CELERITY_SYCL_IMPL STREQUAL "DPC++")
set(CELERITY_SYCL_IS_DPCPP ON)
elseif(CELERITY_SYCL_IMPL STREQUAL "SimSYCL")
set(CELERITY_SYCL_IS_SIMSYCL ON)
endif()

configure_file(include/version.h.in include/version.h @ONLY)
list(APPEND INCLUDES "${CMAKE_CURRENT_BINARY_DIR}/include/version.h")

Expand Down
3 changes: 0 additions & 3 deletions cmake/AddToTarget.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ if(CELERITY_SYCL_IMPL STREQUAL "DPC++")
-fsycl
-sycl-std=2020
"-fsycl-targets=${CELERITY_DPCPP_TARGETS}"
-DCELERITY_DPCPP=1
-Wno-sycl-strict # -Wsycl-strict produces false-positive warnings in DPC++'s own SYCL headers as of 2022-10-06
)
target_compile_options(${ADD_SYCL_TARGET} PUBLIC ${DPCPP_FLAGS})
Expand All @@ -30,8 +29,6 @@ elseif(CELERITY_SYCL_IMPL STREQUAL "SimSYCL")
"${multi_value_args}"
${ARGN}
)
target_compile_options(${ADD_SYCL_TARGET} PUBLIC -DCELERITY_SIMSYCL=1)
target_link_options(${ADD_SYCL_TARGET} PUBLIC -DCELERITY_SIMSYCL=1)
endfunction()
endif()

Expand Down
2 changes: 1 addition & 1 deletion examples/matmul/matmul.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

#include <celerity.h>

#if !defined(NDEBUG) || CELERITY_SIMSYCL
#if !defined(NDEBUG) || CELERITY_SYCL_IS_SIMSYCL
const size_t MAT_SIZE = 128;
#else
const size_t MAT_SIZE = 1024;
Expand Down
4 changes: 4 additions & 0 deletions include/version.h.in
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
#pragma once

#cmakedefine01 CELERITY_SYCL_IS_ACPP
#cmakedefine01 CELERITY_SYCL_IS_DPCPP
#cmakedefine01 CELERITY_SYCL_IS_SIMSYCL

// CELERITY_DETAIL_ENABLE_DEBUG is specified on the command line
#cmakedefine01 CELERITY_USE_MIMALLOC
#cmakedefine01 CELERITY_DETAIL_HAS_NAMED_THREADS
Expand Down
6 changes: 3 additions & 3 deletions include/workaround.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@

#include <sycl/sycl.hpp>

#if defined(CELERITY_DPCPP)
#if CELERITY_SYCL_IS_DPCPP
#define CELERITY_WORKAROUND_DPCPP 1
#else
#define CELERITY_WORKAROUND_DPCPP 0
#endif

#if defined(__HIPSYCL__)
#if CELERITY_SYCL_IS_ACPP
#define CELERITY_WORKAROUND_ACPP 1
#define CELERITY_WORKAROUND_VERSION_MAJOR HIPSYCL_VERSION_MAJOR
#define CELERITY_WORKAROUND_VERSION_MINOR HIPSYCL_VERSION_MINOR
Expand All @@ -21,7 +21,7 @@
#define CELERITY_WORKAROUND_ACPP 0
#endif

#if defined(CELERITY_SIMSYCL)
#if CELERITY_SYCL_IS_SIMSYCL
#define CELERITY_WORKAROUND_SIMSYCL 1
#else
#define CELERITY_WORKAROUND_SIMSYCL 0
Expand Down
13 changes: 6 additions & 7 deletions src/backend/sycl_cuda_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
#include "system_info.h"
#include "tracy.h"
#include "utils.h"
#include "workaround.h"

#include "version.h"

#define CELERITY_STRINGIFY2(f) #f
#define CELERITY_STRINGIFY(f) CELERITY_STRINGIFY2(f)
Expand Down Expand Up @@ -75,7 +74,7 @@ void nd_copy_device_async(cudaStream_t stream, const void* const source_base, vo
// - There are no real thread-safety guarantees. DPC++ currently does not submit kernels from background threads, but if it ever starts doing so, this will
// break more-or-less silently.
// There is an open GitHub issue on the matter: https://github.com/intel/llvm/issues/13706
#if defined(CELERITY_DPCPP)
#if CELERITY_SYCL_IS_DPCPP

struct cuda_native_event_deleter {
void operator()(const cudaEvent_t evt) const { CELERITY_CUDA_CHECK(cudaEventDestroy, evt); }
Expand Down Expand Up @@ -117,7 +116,7 @@ class cuda_event final : public async_event_impl {
unique_cuda_native_event m_after;
};

#endif // defined(CELERITY_DPCPP)
#endif // CELERITY_SYCL_IS_DPCPP

bool can_enable_peer_access(const int id_device, const int id_peer) {
// RTX 30xx and 40xx GPUs do not support peer access, but Nvidia Driver < 550 incorrectly reports that it does, causing kernel panics when enabling it
Expand Down Expand Up @@ -151,15 +150,15 @@ namespace celerity::detail::sycl_backend_detail {
async_event nd_copy_device_cuda(sycl::queue& queue, const void* const source_base, void* const dest_base, const box<3>& source_box, const box<3>& dest_box,
const region<3>& copy_region, const size_t elem_size, bool enable_profiling) //
{
#if defined(__HIPSYCL__)
#if CELERITY_SYCL_IS_ACPP
// AdaptiveCpp provides first-class custom backend op submission without a host round-trip like sycl::queue::host_task would require.
auto event = queue.AdaptiveCpp_enqueue_custom_operation([=](sycl::interop_handle handle) {
const auto stream = handle.get_native_queue<sycl::backend::cuda>();
cuda_backend_detail::nd_copy_device_async(stream, source_base, dest_base, source_box, dest_box, copy_region, elem_size);
});
sycl_backend_detail::flush(queue);
return make_async_event<sycl_event>(std::move(event), enable_profiling);
#elif defined(CELERITY_DPCPP)
#elif CELERITY_SYCL_IS_DPCPP
// With DPC++, we must submit from the executor thread - see the comment on cuda_native_event above.
const auto stream = sycl::get_native<sycl::backend::ext_oneapi_cuda>(queue);
auto before = enable_profiling ? cuda_backend_detail::record_native_event(stream, enable_profiling) : nullptr;
Expand All @@ -171,7 +170,7 @@ async_event nd_copy_device_cuda(sycl::queue& queue, const void* const source_bas
#endif
}

#if defined(CELERITY_DPCPP)
#if CELERITY_SYCL_IS_DPCPP
constexpr sycl::backend sycl_cuda_backend = sycl::backend::ext_oneapi_cuda;
#else
constexpr sycl::backend sycl_cuda_backend = sycl::backend::cuda;
Expand Down
6 changes: 3 additions & 3 deletions src/runtime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,11 @@ namespace detail {
}

static std::string get_sycl_version() {
#if defined(__HIPSYCL__) || defined(__HIPSYCL_TRANSFORM__)
#if CELERITY_SYCL_IS_ACPP
return fmt::format("AdaptiveCpp {}.{}.{}", HIPSYCL_VERSION_MAJOR, HIPSYCL_VERSION_MINOR, HIPSYCL_VERSION_PATCH);
#elif CELERITY_DPCPP
#elif CELERITY_SYCL_IS_DPCPP
return "DPC++ / Clang " __clang_version__;
#elif CELERITY_SIMSYCL
#elif CELERITY_SYCL_IS_SIMSYCL
return "SimSYCL " SIMSYCL_VERSION;
#else
#error "unknown SYCL implementation"
Expand Down

0 comments on commit 501f165

Please sign in to comment.