Skip to content

Commit

Permalink
[SYCL][COMPAT] Added id_query and launch headers (#11018)
Browse files Browse the repository at this point in the history
SYCLcompat primary goals and features are documented
[here](https://intel.github.io/llvm-docs/syclcompat/README.html).

The PR includes the helper code to launch device code through the
`launch<F>` mechanism, and the wrappers to the free function queries.

---------

Co-authored-by: Joe Todd <joe.todd@codeplay.com>
Co-authored-by: Pietro Ghiglio <pietro.ghiglio@codeplay.com>
  • Loading branch information
3 people authored Sep 6, 2023
1 parent bceed65 commit 47d8489
Show file tree
Hide file tree
Showing 12 changed files with 1,107 additions and 6 deletions.
69 changes: 69 additions & 0 deletions sycl/include/syclcompat/id_query.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/***************************************************************************
*
* Copyright (C) Codeplay Software Ltd.
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM
* Exceptions. See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SYCL compatibility extension
*
* id_query.hpp
*
* Description:
* id_query functionality for the SYCL compatibility extension
**************************************************************************/

#pragma once

#include <sycl/nd_item.hpp>

namespace syclcompat {

using sycl::ext::oneapi::experimental::this_nd_item;

inline void wg_barrier() { this_nd_item<3>().barrier(); }

namespace local_id {
inline size_t x() { return this_nd_item<3>().get_local_id(2); }
inline size_t y() { return this_nd_item<3>().get_local_id(1); }
inline size_t z() { return this_nd_item<3>().get_local_id(0); }
} // namespace local_id

namespace local_range {
inline size_t x() { return this_nd_item<3>().get_local_range(2); }
inline size_t y() { return this_nd_item<3>().get_local_range(1); }
inline size_t z() { return this_nd_item<3>().get_local_range(0); }
} // namespace local_range

namespace work_group_id {
inline size_t x() { return this_nd_item<3>().get_group(2); }
inline size_t y() { return this_nd_item<3>().get_group(1); }
inline size_t z() { return this_nd_item<3>().get_group(0); }
} // namespace work_group_id

namespace work_group_range {
inline size_t x() { return this_nd_item<3>().get_group_range(2); }
inline size_t y() { return this_nd_item<3>().get_group_range(1); }
inline size_t z() { return this_nd_item<3>().get_group_range(0); }
} // namespace work_group_range

namespace global_range {
inline size_t x() { return this_nd_item<3>().get_global_range(2); }
inline size_t y() { return this_nd_item<3>().get_global_range(1); }
inline size_t z() { return this_nd_item<3>().get_global_range(0); }
} // namespace global_range

namespace global_id {
inline size_t x() { return this_nd_item<3>().get_global_id(2); }
inline size_t y() { return this_nd_item<3>().get_global_id(1); }
inline size_t z() { return this_nd_item<3>().get_global_id(0); }
} // namespace global_id

} // namespace syclcompat
222 changes: 222 additions & 0 deletions sycl/include/syclcompat/launch.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
/***************************************************************************
*
* Copyright (C) Codeplay Software Ltd.
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM
* Exceptions. See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SYCL compatibility extension
*
* launch.hpp
*
* Description:
* launch functionality for the SYCL compatibility extension
**************************************************************************/

#pragma once

#include <sycl/accessor.hpp>
#include <sycl/event.hpp>
#include <sycl/nd_range.hpp>
#include <sycl/queue.hpp>
#include <sycl/range.hpp>
#include <sycl/reduction.hpp>

#include <syclcompat/device.hpp>
#include <syclcompat/dims.hpp>

namespace syclcompat {

namespace detail {

template <typename R, typename... Types>
constexpr size_t getArgumentCount(R (*f)(Types...)) {
return sizeof...(Types);
}

template <int Dim>
sycl::nd_range<3> transform_nd_range(const sycl::nd_range<Dim> &range) {
sycl::range<Dim> global_range = range.get_global_range();
sycl::range<Dim> local_range = range.get_local_range();
if constexpr (Dim == 3) {
return range;
} else if constexpr (Dim == 2) {
return sycl::nd_range<3>{{1, global_range[0], global_range[1]},
{1, local_range[0], local_range[1]}};
}
return sycl::nd_range<3>{{1, 1, global_range[0]}, {1, 1, local_range[0]}};
}

template <auto F, typename... Args>
std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event>
launch(const sycl::nd_range<3> &range, sycl::queue q, Args... args) {
static_assert(detail::getArgumentCount(F) == sizeof...(args),
"Wrong number of arguments to SYCL kernel");
static_assert(
std::is_same<std::invoke_result_t<decltype(F), Args...>, void>::value,
"SYCL kernels should return void");

return q.parallel_for(range, [=](sycl::nd_item<3>) { F(args...); });
}

template <auto F, typename... Args>
sycl::event launch(const sycl::nd_range<3> &range, size_t mem_size,
sycl::queue q, Args... args) {
static_assert(detail::getArgumentCount(F) == sizeof...(args) + 1,
"Wrong number of arguments to SYCL kernel");

using F_t = decltype(F);
using f_return_t = typename std::invoke_result_t<F_t, Args..., char *>;
static_assert(std::is_same<f_return_t, void>::value,
"SYCL kernels should return void");

return q.submit([&](sycl::handler &cgh) {
auto local_acc = sycl::local_accessor<char, 1>(mem_size, cgh);
cgh.parallel_for(range, [=](sycl::nd_item<3>) {
auto local_mem = local_acc.get_pointer();
F(args..., local_mem);
});
});
}

} // namespace detail

template <int Dim>
sycl::nd_range<Dim> compute_nd_range(sycl::range<Dim> global_size_in,
sycl::range<Dim> work_group_size) {

if (global_size_in.size() == 0 || work_group_size.size() == 0) {
throw std::invalid_argument("Global or local size is zero!");
}
for (size_t i = 0; i < Dim; ++i) {
if (global_size_in[i] < work_group_size[i])
throw std::invalid_argument("Work group size larger than global size");
}

auto global_size =
((global_size_in + work_group_size - 1) / work_group_size) *
work_group_size;
return {global_size, work_group_size};
}

inline sycl::nd_range<1> compute_nd_range(int global_size_in,
int work_group_size) {
return compute_nd_range<1>(global_size_in, work_group_size);
}

template <auto F, int Dim, typename... Args>
std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event>
launch(const sycl::nd_range<Dim> &range, sycl::queue q, Args... args) {
return detail::launch<F>(detail::transform_nd_range<Dim>(range), q, args...);
}

template <auto F, int Dim, typename... Args>
std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event>
launch(const sycl::nd_range<Dim> &range, Args... args) {
return launch<F>(range, get_default_queue(), args...);
}

// Alternative launch through dim3 objects
template <auto F, typename... Args>
std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event>
launch(const dim3 &grid, const dim3 &threads, sycl::queue q, Args... args) {
return launch<F>(sycl::nd_range<3>{grid * threads, threads}, q, args...);
}

template <auto F, typename... Args>
std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event>
launch(const dim3 &grid, const dim3 &threads, Args... args) {
return launch<F>(grid, threads, get_default_queue(), args...);
}

/// Launches a kernel with the templated F param and arguments on a
/// device specified by the given nd_range and SYCL queue.
/// @tparam F SYCL kernel to be executed, expects signature F(T* local_mem,
/// Args... args).
/// @tparam Dim nd_range dimension number.
/// @tparam Args Types of the arguments to be passed to the kernel.
/// @param range Nd_range specifying the work group and global sizes for the
/// kernel.
/// @param q The SYCL queue on which to execute the kernel.
/// @param mem_size The size, in number of bytes, of the local
/// memory to be allocated for kernel.
/// @param args The arguments to be passed to the kernel.
/// @return A SYCL event object that can be used to synchronize with the
/// kernel's execution.
template <auto F, int Dim, typename... Args>
sycl::event launch(const sycl::nd_range<Dim> &range, size_t mem_size,
sycl::queue q, Args... args) {
return detail::launch<F>(detail::transform_nd_range<Dim>(range), mem_size, q,
args...);
}

/// Launches a kernel with the templated F param and arguments on a
/// device specified by the given nd_range using theSYCL default queue.
/// @tparam F SYCL kernel to be executed, expects signature F(T* local_mem,
/// Args... args).
/// @tparam Dim nd_range dimension number.
/// @tparam Args Types of the arguments to be passed to the kernel.
/// @param range Nd_range specifying the work group and global sizes for the
/// kernel.
/// @param mem_size The size, in number of bytes, of the local
/// memory to be allocated for kernel.
/// @param args The arguments to be passed to the kernel.
/// @return A SYCL event object that can be used to synchronize with the
/// kernel's execution.
template <auto F, int Dim, typename... Args>
sycl::event launch(const sycl::nd_range<Dim> &range, size_t mem_size,
Args... args) {
return launch<F>(range, mem_size, get_default_queue(), args...);
}

/// Launches a kernel with the templated F param and arguments on a
/// device with a user-specified grid and block dimensions following the
/// standard of other programming models using a user-defined SYCL queue.
/// @tparam F SYCL kernel to be executed, expects signature F(T* local_mem,
/// Args... args).
/// @tparam Dim nd_range dimension number.
/// @tparam Args Types of the arguments to be passed to the kernel.
/// @param grid Grid dimensions represented with an (x, y, z) iteration space.
/// @param threads Block dimensions represented with an (x, y, z) iteration
/// space.
/// @param mem_size The size, in number of bytes, of the local
/// memory to be allocated for kernel.
/// @param args The arguments to be passed to the kernel.
/// @return A SYCL event object that can be used to synchronize with the
/// kernel's execution.
template <auto F, typename... Args>
sycl::event launch(const dim3 &grid, const dim3 &threads, size_t mem_size,
sycl::queue q, Args... args) {
return launch<F>(sycl::nd_range<3>{grid * threads, threads}, mem_size, q,
args...);
}

/// Launches a kernel with the templated F param and arguments on a
/// device with a user-specified grid and block dimensions following the
/// standard of other programming models using the default SYCL queue.
/// @tparam F SYCL kernel to be executed, expects signature F(T* local_mem,
/// Args... args).
/// @tparam Dim nd_range dimension number.
/// @tparam Args Types of the arguments to be passed to the kernel.
/// @param grid Grid dimensions represented with an (x, y, z) iteration space.
/// @param threads Block dimensions represented with an (x, y, z) iteration
/// space.
/// @param mem_size The size, in number of bytes, of the
/// local memory to be allocated.
/// @param args The arguments to be passed to the kernel.
/// @return A SYCL event object that can be used to synchronize with the
/// kernel's execution.
template <auto F, typename... Args>
sycl::event launch(const dim3 &grid, const dim3 &threads, size_t mem_size,
Args... args) {
return launch<F>(grid, threads, mem_size, get_default_queue(), args...);
}

} // namespace syclcompat
9 changes: 9 additions & 0 deletions sycl/include/syclcompat/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include <utility>

#include <sycl/builtins.hpp>
#include <sycl/ext/oneapi/group_local_memory.hpp>
#include <sycl/usm.hpp>

#include <syclcompat/device.hpp>
Expand All @@ -57,6 +58,14 @@

namespace syclcompat {

template <typename AllocT> auto *local_mem() {
sycl::multi_ptr<AllocT, sycl::access::address_space::local_space>
As_multi_ptr = sycl::ext::oneapi::group_local_memory<AllocT>(
sycl::ext::oneapi::experimental::this_nd_item<3>().get_group());
auto *As = *As_multi_ptr;
return As;
}

namespace detail {
enum memcpy_direction {
host_to_host,
Expand Down
2 changes: 2 additions & 0 deletions sycl/include/syclcompat/syclcompat.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,7 @@
#include <syclcompat/defs.hpp>
#include <syclcompat/device.hpp>
#include <syclcompat/dims.hpp>
#include <syclcompat/id_query.hpp>
#include <syclcompat/kernel.hpp>
#include <syclcompat/launch.hpp>
#include <syclcompat/memory.hpp>
7 changes: 7 additions & 0 deletions sycl/test-e2e/syclcompat/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@

#pragma once

#include <sycl/half_type.hpp>
#include <tuple>

// Typed call helper
// Iterates over all types and calls Functor f for each of them
template <typename tuple, typename Functor>
Expand All @@ -34,3 +37,7 @@ void instantiate_all_types(Functor &&f) {

#define INSTANTIATE_ALL_TYPES(tuple, f) \
instantiate_all_types<tuple>([]<typename T>() { f<T>(); });

using value_type_list =
std::tuple<int, unsigned int, short, unsigned short, long, unsigned long,
long long, unsigned long long, float, double, sycl::half>;
Loading

0 comments on commit 47d8489

Please sign in to comment.