From 47d84897a87b8676b0a16b6e7b38b33f47cdcf99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alberto=20Cabrera=20P=C3=A9rez?= Date: Wed, 6 Sep 2023 12:25:36 +0100 Subject: [PATCH] [SYCL][COMPAT] Added id_query and launch headers (#11018) SYCLcompat primary goals and features are documented [here](https://intel.github.io/llvm-docs/syclcompat/README.html). The PR includes the helper code to launch device code through the `launch` mechanism, and the wrappers to the free function queries. --------- Co-authored-by: Joe Todd Co-authored-by: Pietro Ghiglio --- sycl/include/syclcompat/id_query.hpp | 69 ++++ sycl/include/syclcompat/launch.hpp | 222 +++++++++++ sycl/include/syclcompat/memory.hpp | 9 + sycl/include/syclcompat/syclcompat.hpp | 2 + sycl/test-e2e/syclcompat/common.hpp | 7 + .../test-e2e/syclcompat/id_query/id_query.cpp | 129 +++++++ .../syclcompat/id_query/id_query_fixt.hpp | 61 +++ sycl/test-e2e/syclcompat/launch/launch.cpp | 346 ++++++++++++++++++ .../syclcompat/launch/launch_fixt.hpp | 107 ++++++ .../syclcompat/memory/local_memory.cpp | 126 +++++++ .../syclcompat/memory/memory_common.hpp | 4 - .../syclcompat/memory/memory_fixt.hpp | 31 +- 12 files changed, 1107 insertions(+), 6 deletions(-) create mode 100644 sycl/include/syclcompat/id_query.hpp create mode 100644 sycl/include/syclcompat/launch.hpp create mode 100644 sycl/test-e2e/syclcompat/id_query/id_query.cpp create mode 100644 sycl/test-e2e/syclcompat/id_query/id_query_fixt.hpp create mode 100644 sycl/test-e2e/syclcompat/launch/launch.cpp create mode 100644 sycl/test-e2e/syclcompat/launch/launch_fixt.hpp create mode 100644 sycl/test-e2e/syclcompat/memory/local_memory.cpp diff --git a/sycl/include/syclcompat/id_query.hpp b/sycl/include/syclcompat/id_query.hpp new file mode 100644 index 000000000000..7e85e24ca4dd --- /dev/null +++ b/sycl/include/syclcompat/id_query.hpp @@ -0,0 +1,69 @@ +/*************************************************************************** + * + * Copyright (C) Codeplay Software Ltd. + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM + * Exceptions. See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SYCL compatibility extension + * + * id_query.hpp + * + * Description: + * id_query functionality for the SYCL compatibility extension + **************************************************************************/ + +#pragma once + +#include + +namespace syclcompat { + +using sycl::ext::oneapi::experimental::this_nd_item; + +inline void wg_barrier() { this_nd_item<3>().barrier(); } + +namespace local_id { +inline size_t x() { return this_nd_item<3>().get_local_id(2); } +inline size_t y() { return this_nd_item<3>().get_local_id(1); } +inline size_t z() { return this_nd_item<3>().get_local_id(0); } +} // namespace local_id + +namespace local_range { +inline size_t x() { return this_nd_item<3>().get_local_range(2); } +inline size_t y() { return this_nd_item<3>().get_local_range(1); } +inline size_t z() { return this_nd_item<3>().get_local_range(0); } +} // namespace local_range + +namespace work_group_id { +inline size_t x() { return this_nd_item<3>().get_group(2); } +inline size_t y() { return this_nd_item<3>().get_group(1); } +inline size_t z() { return this_nd_item<3>().get_group(0); } +} // namespace work_group_id + +namespace work_group_range { +inline size_t x() { return this_nd_item<3>().get_group_range(2); } +inline size_t y() { return this_nd_item<3>().get_group_range(1); } +inline size_t z() { return this_nd_item<3>().get_group_range(0); } +} // namespace work_group_range + +namespace global_range { +inline size_t x() { return this_nd_item<3>().get_global_range(2); } +inline size_t y() { return this_nd_item<3>().get_global_range(1); } +inline size_t z() { return this_nd_item<3>().get_global_range(0); } +} // namespace global_range + +namespace global_id { +inline size_t x() { return this_nd_item<3>().get_global_id(2); } +inline size_t y() { return this_nd_item<3>().get_global_id(1); } +inline size_t z() { return this_nd_item<3>().get_global_id(0); } +} // namespace global_id + +} // namespace syclcompat diff --git a/sycl/include/syclcompat/launch.hpp b/sycl/include/syclcompat/launch.hpp new file mode 100644 index 000000000000..c7cf91d90db1 --- /dev/null +++ b/sycl/include/syclcompat/launch.hpp @@ -0,0 +1,222 @@ +/*************************************************************************** + * + * Copyright (C) Codeplay Software Ltd. + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM + * Exceptions. See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SYCL compatibility extension + * + * launch.hpp + * + * Description: + * launch functionality for the SYCL compatibility extension + **************************************************************************/ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace syclcompat { + +namespace detail { + +template +constexpr size_t getArgumentCount(R (*f)(Types...)) { + return sizeof...(Types); +} + +template +sycl::nd_range<3> transform_nd_range(const sycl::nd_range &range) { + sycl::range global_range = range.get_global_range(); + sycl::range local_range = range.get_local_range(); + if constexpr (Dim == 3) { + return range; + } else if constexpr (Dim == 2) { + return sycl::nd_range<3>{{1, global_range[0], global_range[1]}, + {1, local_range[0], local_range[1]}}; + } + return sycl::nd_range<3>{{1, 1, global_range[0]}, {1, 1, local_range[0]}}; +} + +template +std::enable_if_t, sycl::event> +launch(const sycl::nd_range<3> &range, sycl::queue q, Args... args) { + static_assert(detail::getArgumentCount(F) == sizeof...(args), + "Wrong number of arguments to SYCL kernel"); + static_assert( + std::is_same, void>::value, + "SYCL kernels should return void"); + + return q.parallel_for(range, [=](sycl::nd_item<3>) { F(args...); }); +} + +template +sycl::event launch(const sycl::nd_range<3> &range, size_t mem_size, + sycl::queue q, Args... args) { + static_assert(detail::getArgumentCount(F) == sizeof...(args) + 1, + "Wrong number of arguments to SYCL kernel"); + + using F_t = decltype(F); + using f_return_t = typename std::invoke_result_t; + static_assert(std::is_same::value, + "SYCL kernels should return void"); + + return q.submit([&](sycl::handler &cgh) { + auto local_acc = sycl::local_accessor(mem_size, cgh); + cgh.parallel_for(range, [=](sycl::nd_item<3>) { + auto local_mem = local_acc.get_pointer(); + F(args..., local_mem); + }); + }); +} + +} // namespace detail + +template +sycl::nd_range compute_nd_range(sycl::range global_size_in, + sycl::range work_group_size) { + + if (global_size_in.size() == 0 || work_group_size.size() == 0) { + throw std::invalid_argument("Global or local size is zero!"); + } + for (size_t i = 0; i < Dim; ++i) { + if (global_size_in[i] < work_group_size[i]) + throw std::invalid_argument("Work group size larger than global size"); + } + + auto global_size = + ((global_size_in + work_group_size - 1) / work_group_size) * + work_group_size; + return {global_size, work_group_size}; +} + +inline sycl::nd_range<1> compute_nd_range(int global_size_in, + int work_group_size) { + return compute_nd_range<1>(global_size_in, work_group_size); +} + +template +std::enable_if_t, sycl::event> +launch(const sycl::nd_range &range, sycl::queue q, Args... args) { + return detail::launch(detail::transform_nd_range(range), q, args...); +} + +template +std::enable_if_t, sycl::event> +launch(const sycl::nd_range &range, Args... args) { + return launch(range, get_default_queue(), args...); +} + +// Alternative launch through dim3 objects +template +std::enable_if_t, sycl::event> +launch(const dim3 &grid, const dim3 &threads, sycl::queue q, Args... args) { + return launch(sycl::nd_range<3>{grid * threads, threads}, q, args...); +} + +template +std::enable_if_t, sycl::event> +launch(const dim3 &grid, const dim3 &threads, Args... args) { + return launch(grid, threads, get_default_queue(), args...); +} + +/// Launches a kernel with the templated F param and arguments on a +/// device specified by the given nd_range and SYCL queue. +/// @tparam F SYCL kernel to be executed, expects signature F(T* local_mem, +/// Args... args). +/// @tparam Dim nd_range dimension number. +/// @tparam Args Types of the arguments to be passed to the kernel. +/// @param range Nd_range specifying the work group and global sizes for the +/// kernel. +/// @param q The SYCL queue on which to execute the kernel. +/// @param mem_size The size, in number of bytes, of the local +/// memory to be allocated for kernel. +/// @param args The arguments to be passed to the kernel. +/// @return A SYCL event object that can be used to synchronize with the +/// kernel's execution. +template +sycl::event launch(const sycl::nd_range &range, size_t mem_size, + sycl::queue q, Args... args) { + return detail::launch(detail::transform_nd_range(range), mem_size, q, + args...); +} + +/// Launches a kernel with the templated F param and arguments on a +/// device specified by the given nd_range using theSYCL default queue. +/// @tparam F SYCL kernel to be executed, expects signature F(T* local_mem, +/// Args... args). +/// @tparam Dim nd_range dimension number. +/// @tparam Args Types of the arguments to be passed to the kernel. +/// @param range Nd_range specifying the work group and global sizes for the +/// kernel. +/// @param mem_size The size, in number of bytes, of the local +/// memory to be allocated for kernel. +/// @param args The arguments to be passed to the kernel. +/// @return A SYCL event object that can be used to synchronize with the +/// kernel's execution. +template +sycl::event launch(const sycl::nd_range &range, size_t mem_size, + Args... args) { + return launch(range, mem_size, get_default_queue(), args...); +} + +/// Launches a kernel with the templated F param and arguments on a +/// device with a user-specified grid and block dimensions following the +/// standard of other programming models using a user-defined SYCL queue. +/// @tparam F SYCL kernel to be executed, expects signature F(T* local_mem, +/// Args... args). +/// @tparam Dim nd_range dimension number. +/// @tparam Args Types of the arguments to be passed to the kernel. +/// @param grid Grid dimensions represented with an (x, y, z) iteration space. +/// @param threads Block dimensions represented with an (x, y, z) iteration +/// space. +/// @param mem_size The size, in number of bytes, of the local +/// memory to be allocated for kernel. +/// @param args The arguments to be passed to the kernel. +/// @return A SYCL event object that can be used to synchronize with the +/// kernel's execution. +template +sycl::event launch(const dim3 &grid, const dim3 &threads, size_t mem_size, + sycl::queue q, Args... args) { + return launch(sycl::nd_range<3>{grid * threads, threads}, mem_size, q, + args...); +} + +/// Launches a kernel with the templated F param and arguments on a +/// device with a user-specified grid and block dimensions following the +/// standard of other programming models using the default SYCL queue. +/// @tparam F SYCL kernel to be executed, expects signature F(T* local_mem, +/// Args... args). +/// @tparam Dim nd_range dimension number. +/// @tparam Args Types of the arguments to be passed to the kernel. +/// @param grid Grid dimensions represented with an (x, y, z) iteration space. +/// @param threads Block dimensions represented with an (x, y, z) iteration +/// space. +/// @param mem_size The size, in number of bytes, of the +/// local memory to be allocated. +/// @param args The arguments to be passed to the kernel. +/// @return A SYCL event object that can be used to synchronize with the +/// kernel's execution. +template +sycl::event launch(const dim3 &grid, const dim3 &threads, size_t mem_size, + Args... args) { + return launch(grid, threads, mem_size, get_default_queue(), args...); +} + +} // namespace syclcompat diff --git a/sycl/include/syclcompat/memory.hpp b/sycl/include/syclcompat/memory.hpp index 212c5cd6220c..c9cfe0235c10 100644 --- a/sycl/include/syclcompat/memory.hpp +++ b/sycl/include/syclcompat/memory.hpp @@ -42,6 +42,7 @@ #include #include +#include #include #include @@ -57,6 +58,14 @@ namespace syclcompat { +template auto *local_mem() { + sycl::multi_ptr + As_multi_ptr = sycl::ext::oneapi::group_local_memory( + sycl::ext::oneapi::experimental::this_nd_item<3>().get_group()); + auto *As = *As_multi_ptr; + return As; +} + namespace detail { enum memcpy_direction { host_to_host, diff --git a/sycl/include/syclcompat/syclcompat.hpp b/sycl/include/syclcompat/syclcompat.hpp index 08e445c329cc..dae35d73e517 100644 --- a/sycl/include/syclcompat/syclcompat.hpp +++ b/sycl/include/syclcompat/syclcompat.hpp @@ -25,5 +25,7 @@ #include #include #include +#include #include +#include #include diff --git a/sycl/test-e2e/syclcompat/common.hpp b/sycl/test-e2e/syclcompat/common.hpp index 4332d61aa38e..fc4302aad849 100644 --- a/sycl/test-e2e/syclcompat/common.hpp +++ b/sycl/test-e2e/syclcompat/common.hpp @@ -22,6 +22,9 @@ #pragma once +#include +#include + // Typed call helper // Iterates over all types and calls Functor f for each of them template @@ -34,3 +37,7 @@ void instantiate_all_types(Functor &&f) { #define INSTANTIATE_ALL_TYPES(tuple, f) \ instantiate_all_types([]() { f(); }); + +using value_type_list = + std::tuple; diff --git a/sycl/test-e2e/syclcompat/id_query/id_query.cpp b/sycl/test-e2e/syclcompat/id_query/id_query.cpp new file mode 100644 index 000000000000..62bd5b886bea --- /dev/null +++ b/sycl/test-e2e/syclcompat/id_query/id_query.cpp @@ -0,0 +1,129 @@ +/*************************************************************************** + * + * Copyright (C) Codeplay Software Ltd. + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM + * Exceptions. See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SYCLcompat API + * + * id_query.cpp + * + * Description: + * global_id query tests + **************************************************************************/ + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %{run} %t.out + +#include +#include + +#include +#include + +#include "id_query_fixt.hpp" + +void global_id_x_query(int *data) { + data[syclcompat::global_id::x()] = syclcompat::global_id::x(); +} +void global_id_y_query(int *data) { + data[syclcompat::global_id::y()] = syclcompat::global_id::y(); +} +void global_id_z_query(int *data) { + data[syclcompat::global_id::z()] = syclcompat::global_id::z(); +} + +void test_global_id_query() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + + constexpr syclcompat::dim3 grid{4}; + constexpr syclcompat::dim3 threads{32}; + + auto checker = [](std::vector input) { + std::vector expected(input.size()); + std::iota(expected.begin(), expected.end(), 0); + assert(std::equal(expected.begin(), expected.end(), input.begin())); + }; + // Check we can query x, y, z components of global_id + QueryLauncher({4, 1, 1}, {32, 1, 1}).launch_dim3(checker); + QueryLauncher({1, 4, 1}, {1, 32, 1}).launch_dim3(checker); + QueryLauncher({1, 1, 4}, {1, 1, 32}).launch_dim3(checker); + + // Check that we can query x component, irrespective of the kernel dimension + QueryLauncher({4, 1, 1}, {32, 1, 1}) + .launch_ndrange<3>(checker); + QueryLauncher({4, 1, 1}, {32, 1, 1}) + .launch_ndrange<2>(checker); + QueryLauncher({4, 1, 1}, {32, 1, 1}) + .launch_ndrange<1>(checker); + + // Check we can query y component for 2D kernel + QueryLauncher({1, 4, 1}, {1, 32, 1}) + .launch_ndrange<2>(checker); +} + +void range_x_query(int *data) { + data[syclcompat::global_id::x()] = syclcompat::global_range::x() * + syclcompat::work_group_range::x() * + syclcompat::local_range::x(); +} + +void test_ranges_query() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + + constexpr syclcompat::dim3 grid{4}; + constexpr syclcompat::dim3 threads{32}; + + // global_range::x() * work_group_range::x() * local_range::x(); + int target = grid.x * threads.x * grid.x * threads.x; + + auto checker = [&](std::vector input) { + assert(std::all_of(input.begin(), input.end(), + [=](int a) { return a == target; })); + }; + QueryLauncher(grid, threads).launch_dim3(checker); +} + +void wgroup_id_x_query(int *data) { + data[syclcompat::global_id::x()] = syclcompat::work_group_id::x(); +} +void local_id_x_query(int *data) { + data[syclcompat::global_id::x()] = syclcompat::local_id::x(); +} + +void test_ids_query() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + + constexpr syclcompat::dim3 grid{4}; + constexpr syclcompat::dim3 threads{32}; + + auto wgroup_checker = [&](std::vector input) { + for (int i = 0; i < input.size(); ++i) { + assert(input[i] == i / threads.x); + } + }; + QueryLauncher(grid, threads).launch_dim3(wgroup_checker); + + auto local_checker = [&](std::vector input) { + for (int i = 0; i < input.size(); ++i) { + assert(input[i] == i % threads.x); + } + }; + QueryLauncher(grid, threads).launch_dim3(local_checker); +} + +int main() { + test_global_id_query(); + test_ranges_query(); + test_ids_query(); + + return 0; +} diff --git a/sycl/test-e2e/syclcompat/id_query/id_query_fixt.hpp b/sycl/test-e2e/syclcompat/id_query/id_query_fixt.hpp new file mode 100644 index 000000000000..9726982dafc4 --- /dev/null +++ b/sycl/test-e2e/syclcompat/id_query/id_query_fixt.hpp @@ -0,0 +1,61 @@ +/*************************************************************************** + * + * Copyright (C) Codeplay Software Ltd. + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM + * Exceptions. See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SYCLcompat + * + * id_query_fixt.hpp + * + * Description: + * Fixtures and helpers for to tests the id_query functionality + **************************************************************************/ + +#pragma once + +#include +#include + +// Class to launch a kernel and run a lambda on output data +template class QueryLauncher { +protected: + syclcompat::dim3 grid_; + syclcompat::dim3 threads_; + size_t size_; + int *data_; + std::vector host_data_; + using CheckLambda = std::function)>; + +public: + QueryLauncher(syclcompat::dim3 grid, syclcompat::dim3 threads) + : grid_{grid}, threads_{threads}, size_{grid_.size() * threads_.size()}, + host_data_(size_) { + data_ = (int *)syclcompat::malloc(size_ * sizeof(int)); + syclcompat::memset(data_, 0, size_ * sizeof(int)); + }; + ~QueryLauncher() { syclcompat::free(data_); } + template + void launch_dim3(CheckLambda checker, Args... args) { + syclcompat::launch(grid_, threads_, data_, args...); + syclcompat::memcpy(host_data_.data(), data_, size_ * sizeof(int)); + syclcompat::wait(); + checker(host_data_); + } + template + void launch_ndrange(CheckLambda checker, Args... args) { + sycl::nd_range range = {grid_ * threads_, grid_}; + syclcompat::launch(range, data_, args...); + syclcompat::memcpy(host_data_.data(), data_, size_ * sizeof(int)); + syclcompat::wait(); + checker(host_data_); + } +}; diff --git a/sycl/test-e2e/syclcompat/launch/launch.cpp b/sycl/test-e2e/syclcompat/launch/launch.cpp new file mode 100644 index 000000000000..a54bbb8fc4e4 --- /dev/null +++ b/sycl/test-e2e/syclcompat/launch/launch.cpp @@ -0,0 +1,346 @@ +/*************************************************************************** + * + * Copyright (C) Codeplay Software Ltd. + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM + * Exceptions. See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SYCLcompat API + * + * launch.cpp + * + * Description: + * launch and launch with dinamyc local memory tests + **************************************************************************/ + +// RUN: %clangxx -std=c++20 -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %{run} %t.out + +#include +#include +#include + +#include + +#include "../common.hpp" +#include "launch_fixt.hpp" + +// Dummy kernel functions for testing +inline void empty_kernel(){}; +inline void int_kernel(int a){}; +inline void int_ptr_kernel(int *a){}; +inline void dynamic_local_mem_empty_kernel(char *a){}; + +template +inline void dynamic_local_mem_basicdt_kernel(T value, char *local_mem){}; + +template +void dynamic_local_mem_typed_kernel(T *data, char *local_mem) { + constexpr size_t memsize = LaunchTestWithArgs::LOCAL_MEM_SIZE; + constexpr size_t num_elements = memsize / sizeof(T); + T *typed_local_mem = reinterpret_cast(local_mem); + + const int id = sycl::ext::oneapi::experimental::this_item<1>(); + if (id < num_elements) { + typed_local_mem[id] = static_cast(id); + } + sycl::group_barrier(sycl::ext::oneapi::experimental::this_group<1>()); + if (id < num_elements) { + data[id] = typed_local_mem[num_elements - id - 1]; + } +}; + +template +void compute_nd_range_3d(RangeParams range_param, std::string test_name) { + std::cout << __PRETTY_FUNCTION__ << " " << test_name << std::endl; + + try { + auto g_out = syclcompat::compute_nd_range(range_param.global_range_in_, + range_param.local_range_in_); + sycl::nd_range x_out = {range_param.expect_global_range_out_, + range_param.local_range_in_}; + if (range_param.shouldPass_) { + assert(g_out == x_out); + } else { + assert(false); // Trigger failure, expected std::invalid_argument + } + } catch (std::invalid_argument const &err) { + if (range_param.shouldPass_) { + assert(false); // Trigger failure, unexpected std::invalid_argument + } + } +} + +void test_launch_compute_nd_range_3d() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + + compute_nd_range_3d(RangeParams<3>{{11, 1, 1}, {2, 1, 1}, {12, 1, 1}, true}, + "Round up"); + compute_nd_range_3d( + RangeParams<3>{{320, 1, 1}, {32, 1, 1}, {320, 1, 1}, true}, "Even size"); + compute_nd_range_3d( + RangeParams<3>{{32, 193, 1}, {16, 32, 1}, {32, 224, 1}, true}, + "Round up 2"); + compute_nd_range_3d(RangeParams<3>{{10, 0, 0}, {1, 0, 0}, {10, 0, 0}, false}, + "zero size"); + compute_nd_range_3d( + RangeParams<3>{{0, 10, 10}, {0, 10, 10}, {0, 10, 10}, false}, + "zero size 2"); + compute_nd_range_3d(RangeParams<3>{{2, 1, 1}, {32, 1, 1}, {32, 1, 1}, false}, + "local > global"); + compute_nd_range_3d(RangeParams<3>{{1, 2, 1}, {1, 32, 1}, {1, 32, 1}, false}, + "local > global 2"); + compute_nd_range_3d(RangeParams<3>{{1, 1, 2}, {1, 1, 32}, {1, 1, 32}, false}, + "local > global 3"); +} + +void test_no_arg_launch() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + LaunchTest lt; + + syclcompat::launch(lt.range_1_); + syclcompat::launch(lt.range_2_); + syclcompat::launch(lt.range_3_); + syclcompat::launch(lt.grid_, lt.thread_); + + syclcompat::launch(lt.range_1_, lt.q_); + syclcompat::launch(lt.range_2_, lt.q_); + syclcompat::launch(lt.range_3_, lt.q_); + syclcompat::launch(lt.grid_, lt.thread_, lt.q_); +} + +void test_one_arg_launch() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + LaunchTest lt; + + int my_int; + + syclcompat::launch(lt.range_1_, my_int); + syclcompat::launch(lt.range_2_, my_int); + syclcompat::launch(lt.range_3_, my_int); + syclcompat::launch(lt.grid_, lt.thread_, my_int); + + syclcompat::launch(lt.range_1_, lt.q_, my_int); + syclcompat::launch(lt.range_2_, lt.q_, my_int); + syclcompat::launch(lt.range_3_, lt.q_, my_int); + syclcompat::launch(lt.grid_, lt.thread_, lt.q_, my_int); +} + +void test_ptr_arg_launch() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + LaunchTest lt; + + int *int_ptr; + + syclcompat::launch(lt.range_1_, int_ptr); + syclcompat::launch(lt.range_2_, int_ptr); + syclcompat::launch(lt.range_3_, int_ptr); + syclcompat::launch(lt.grid_, lt.thread_, int_ptr); + + syclcompat::launch(lt.range_1_, lt.q_, int_ptr); + syclcompat::launch(lt.range_2_, lt.q_, int_ptr); + syclcompat::launch(lt.range_3_, lt.q_, int_ptr); + syclcompat::launch(lt.grid_, lt.thread_, lt.q_, int_ptr); +} + +void test_dynamic_mem_no_arg_launch() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + LaunchTest lt; + + syclcompat::launch(lt.range_1_, 1); + syclcompat::launch(lt.range_2_, 1); + syclcompat::launch(lt.range_3_, 1); + syclcompat::launch(lt.grid_, lt.thread_, 1); +} + +void test_dynamic_mem_no_arg_launch_q() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + LaunchTest lt; + + syclcompat::launch(lt.range_1_, 1, lt.q_); + syclcompat::launch(lt.range_2_, 1, lt.q_); + syclcompat::launch(lt.range_3_, 1, lt.q_); + syclcompat::launch(lt.grid_, lt.thread_, 1, + lt.q_); +} + +template void test_basic_dt_launch() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + + T d_a = T(1); + LaunchTestWithArgs ltt; + + if (ltt.skip_) // Unsupported aspect + return; + + syclcompat::launch>(ltt.range_1_, + ltt.memsize_, d_a); + syclcompat::launch>(ltt.range_2_, + ltt.memsize_, d_a); + syclcompat::launch>(ltt.range_3_, + ltt.memsize_, d_a); + syclcompat::launch>( + ltt.grid_, ltt.thread_, ltt.memsize_, d_a); +} + +template void test_basic_dt_launch_q() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + + T d_a = T(1); + LaunchTestWithArgs ltt; + + if (ltt.skip_) // Unsupported aspect + return; + + syclcompat::launch>( + ltt.range_1_, ltt.memsize_, ltt.in_order_q_, d_a); + syclcompat::launch>( + ltt.range_2_, ltt.memsize_, ltt.in_order_q_, d_a); + syclcompat::launch>( + ltt.range_3_, ltt.memsize_, ltt.in_order_q_, d_a); + syclcompat::launch>( + ltt.grid_, ltt.thread_, ltt.memsize_, ltt.in_order_q_, d_a); +} + +template void test_arg_launch() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + + LaunchTestWithArgs ltt; + if (ltt.skip_) // Unsupported aspect + return; + + T *d_a = (T *)syclcompat::malloc(ltt.memsize_); + + syclcompat::launch>(ltt.range_1_, + ltt.memsize_, d_a); + syclcompat::launch>(ltt.range_2_, + ltt.memsize_, d_a); + syclcompat::launch>(ltt.range_3_, + ltt.memsize_, d_a); + syclcompat::launch>(ltt.grid_, ltt.thread_, + ltt.memsize_, d_a); + + syclcompat::free(d_a); +} + +template void test_arg_launch_q() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + + LaunchTestWithArgs ltt; + if (ltt.skip_) // Unsupported aspect + return; + + T *d_a = (T *)syclcompat::malloc(ltt.memsize_, ltt.in_order_q_); + + syclcompat::launch>( + ltt.range_1_, ltt.memsize_, ltt.in_order_q_, d_a); + syclcompat::launch>( + ltt.range_2_, ltt.memsize_, ltt.in_order_q_, d_a); + syclcompat::launch>( + ltt.range_3_, ltt.memsize_, ltt.in_order_q_, d_a); + syclcompat::launch>( + ltt.grid_, ltt.thread_, ltt.memsize_, ltt.in_order_q_, d_a); + + syclcompat::free(d_a, ltt.in_order_q_); +} + +template void test_local_mem_usage() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + + LaunchTestWithArgs ltt; + if (ltt.skip_) // Unsupported aspect + return; + + size_t num_elements = ltt.memsize_ / sizeof(T); + + T *h_a = (T *)syclcompat::malloc_host(ltt.memsize_); + T *d_a = (T *)syclcompat::malloc(ltt.memsize_); + + // d_a is the kernel output, no memcpy needed + syclcompat::launch>(ltt.grid_, ltt.thread_, + ltt.memsize_, d_a); + + syclcompat::memcpy(h_a, d_a, ltt.memsize_); + syclcompat::free(d_a); + + for (int i = 0; i < num_elements; i++) { + assert(h_a[i] == static_cast(num_elements - i - 1)); + } + syclcompat::free(h_a); +} + +template void test_local_mem_usage_q() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + + LaunchTestWithArgs ltt; + if (ltt.skip_) // Unsupported aspect + return; + + size_t num_elements = ltt.memsize_ / sizeof(T); + auto &q = ltt.in_order_q_; + + T *h_a = (T *)syclcompat::malloc_host(ltt.memsize_); + T *d_a = (T *)syclcompat::malloc(ltt.memsize_, q); + + // d_a is the kernel output, no memcpy needed + syclcompat::launch>(ltt.grid_, ltt.thread_, + ltt.memsize_, q, d_a); + + syclcompat::memcpy(h_a, d_a, ltt.memsize_, q); + syclcompat::free(d_a, q); + + for (size_t i = 0; i < num_elements; i++) { + assert(h_a[i] == static_cast(num_elements - i - 1)); + } + + syclcompat::free(h_a); +} + +template void test_memsize_no_arg_launch() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + + LaunchTest lt; + T memsize = static_cast(8); + + syclcompat::launch(lt.grid_, lt.thread_, + memsize); +} + +template void test_memsize_no_arg_launch_q() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + + LaunchTest lt; + T memsize = static_cast(8); + + syclcompat::launch(lt.grid_, lt.thread_, + memsize, lt.q_); +} + +int main() { + test_launch_compute_nd_range_3d(); + test_no_arg_launch(); + test_one_arg_launch(); + test_ptr_arg_launch(); + + test_dynamic_mem_no_arg_launch(); + test_dynamic_mem_no_arg_launch_q(); + + INSTANTIATE_ALL_TYPES(value_type_list, test_basic_dt_launch); + INSTANTIATE_ALL_TYPES(value_type_list, test_basic_dt_launch_q); + INSTANTIATE_ALL_TYPES(value_type_list, test_arg_launch); + INSTANTIATE_ALL_TYPES(value_type_list, test_arg_launch_q); + INSTANTIATE_ALL_TYPES(value_type_list, test_local_mem_usage); + INSTANTIATE_ALL_TYPES(value_type_list, test_local_mem_usage_q); + + INSTANTIATE_ALL_TYPES(memsize_type_list, test_memsize_no_arg_launch); + INSTANTIATE_ALL_TYPES(memsize_type_list, test_memsize_no_arg_launch_q); + + return 0; +} diff --git a/sycl/test-e2e/syclcompat/launch/launch_fixt.hpp b/sycl/test-e2e/syclcompat/launch/launch_fixt.hpp new file mode 100644 index 000000000000..793a4f2dfa7e --- /dev/null +++ b/sycl/test-e2e/syclcompat/launch/launch_fixt.hpp @@ -0,0 +1,107 @@ +/*************************************************************************** + * + * Copyright (C) Codeplay Software Ltd. + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM + * Exceptions. See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SYCLcompat + * + * launch_fixt.hpp + * + * Description: + * Fixtures and helpers for to tests the launch functionality + **************************************************************************/ + +#pragma once + +#include +#include + +// Struct containing test case data (local & global ranges) +template struct RangeParams { + RangeParams(sycl::range global_range_in, sycl::range local_range, + sycl::range expect_global_range_out, bool pass) + : global_range_in_{global_range_in}, local_range_in_{local_range}, + expect_global_range_out_{expect_global_range_out}, shouldPass_{pass} {} + + sycl::range local_range_in_; + sycl::range global_range_in_; + sycl::range expect_global_range_out_; + bool shouldPass_; + + // Pretty printing of RangeParams + friend std::ostream &operator<<(std::ostream &os, const RangeParams &range) { + auto print_range = [](std::ostream &os, const sycl::range range) { + os << " {"; + for (int i = 0; i < Dim; ++i) { + os << range[i]; + os << ((Dim - i == 1) ? "} " : ", "); + } + }; + os << "Local:"; + print_range(os, range.local_range_in_); + os << "Global (in): "; + print_range(os, range.global_range_in_); + os << "Global (out): "; + print_range(os, range.expect_global_range_out_); + os << (range.shouldPass_ ? "Should Work" : "Should Throw"); + return os; + } +}; + +// Fixture for launch tests - initializes a few different +// range-like members & a queue. +struct LaunchTest { + LaunchTest() + : q_{syclcompat::get_default_queue()}, grid_{4, 2, 2}, thread_{32, 2, 2}, + range_1_{128, 32}, range_2_{{4, 128}, {2, 32}}, + range_3_{{2, 4, 64}, {2, 2, 32}} {} + sycl::queue const q_; + syclcompat::dim3 const grid_; + syclcompat::dim3 const thread_; + sycl::nd_range<1> const range_1_; + sycl::nd_range<2> const range_2_; + sycl::nd_range<3> const range_3_; +}; + +// Typed tests +template struct LaunchTestWithArgs : public LaunchTest { + LaunchTestWithArgs() + : LaunchTest(), memsize_{LOCAL_MEM_SIZE}, + in_order_q_{{sycl::property::queue::in_order()}}, skip_{false} { + should_skip(); + } + + void should_skip() { + if (!syclcompat::get_current_device().has(sycl::aspect::fp64) && + std::is_same_v) { + std::cout << " sycl::aspect::fp64 not supported by the SYCL device." + << std::endl; + skip_ = true; + } + if (!syclcompat::get_current_device().has(sycl::aspect::fp16) && + std::is_same_v) { + + std::cout << " sycl::aspect::fp16 not supported by the SYCL device." + << std::endl; + skip_ = true; + } + } + + constexpr static size_t LOCAL_MEM_SIZE = 64; + + size_t const memsize_; + sycl::queue const in_order_q_; + bool skip_; +}; + +using memsize_type_list = + std::tuple; diff --git a/sycl/test-e2e/syclcompat/memory/local_memory.cpp b/sycl/test-e2e/syclcompat/memory/local_memory.cpp new file mode 100644 index 000000000000..831fbc0925e5 --- /dev/null +++ b/sycl/test-e2e/syclcompat/memory/local_memory.cpp @@ -0,0 +1,126 @@ +/*************************************************************************** + * + * Copyright (C) Codeplay Software Ltd. + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM + * Exceptions. See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SYCLcompat API + * + * local_memory.cpp + * + * Description: + * launch tests with static local memory + **************************************************************************/ + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %{run} %t.out + +#include + +#include +#include +#include +#include + +#include "memory_fixt.hpp" + +// 1D test +// Write id to linear block, then reverse order +template void local_mem_1d(int *d_A) { + int *as = syclcompat::local_mem(); + int id = syclcompat::local_id::x(); + as[id] = id; + syclcompat::wg_barrier(); + int val = as[BLOCK_SIZE - id - 1]; + d_A[syclcompat::global_id::x()] = val; +} + +void test_local_1d() { + auto checker = [](std::vector input) { + std::vector expected(input.size()); + std::iota(expected.rbegin(), expected.rend(), 0); + assert(std::equal(expected.begin(), expected.end(), input.begin())); + }; + LocalMemTest>(1, 32).launch_test(checker); +} + +// 2D test +// Write id to 2D block, then reverse order +template void local_mem_2d(int *d_A) { + auto as = syclcompat::local_mem(); + int id_x = syclcompat::local_id::x(); + int id_y = syclcompat::local_id::y(); + as[id_y][id_x] = id_x * BLOCK_SIZE + id_y; + syclcompat::wg_barrier(); + int val = as[BLOCK_SIZE - id_y - 1][BLOCK_SIZE - id_x - 1]; + d_A[syclcompat::global_id::y() * BLOCK_SIZE + syclcompat::global_id::x()] = + val; +} + +void test_local_2d() { + constexpr int TILE_SIZE = 16; + auto checker = [](std::vector input) { + for (int y = 0; y < TILE_SIZE; ++y) { + for (int x = 0; x < TILE_SIZE; ++x) { + int linear_id = y * TILE_SIZE + x; + int expected = ((TILE_SIZE - x - 1) * TILE_SIZE) + (TILE_SIZE - y - 1); + assert(input[linear_id] == expected); + } + } + }; + LocalMemTest>({1, 1}, {TILE_SIZE, TILE_SIZE}) + .launch_test(checker); +} + +// 3D test +// Write id to 3D block, then reverse order +template void local_mem_3d(int *d_A) { + auto as = syclcompat::local_mem(); + int id_x = syclcompat::local_id::x(); + int id_y = syclcompat::local_id::y(); + int id_z = syclcompat::local_id::z(); + as[id_z][id_y][id_x] = + (id_x * (BLOCK_SIZE * BLOCK_SIZE)) + (id_y * BLOCK_SIZE) + id_z; + syclcompat::wg_barrier(); + int val = + as[BLOCK_SIZE - id_z - 1][BLOCK_SIZE - id_y - 1][BLOCK_SIZE - id_x - 1]; + d_A[syclcompat::global_id::z() * BLOCK_SIZE * BLOCK_SIZE + + syclcompat::global_id::y() * BLOCK_SIZE + syclcompat::global_id::x()] = + val; +} + +void test_local_3d() { + constexpr int TILE_SIZE = 4; + auto checker = [](std::vector input) { + for (int z = 0; z < TILE_SIZE; ++z) { + for (int y = 0; y < TILE_SIZE; ++y) { + for (int x = 0; x < TILE_SIZE; ++x) { + int linear_id = z * TILE_SIZE * TILE_SIZE + y * TILE_SIZE + x; + int expected = ((TILE_SIZE - x - 1) * TILE_SIZE * TILE_SIZE) + + ((TILE_SIZE - y - 1) * TILE_SIZE) + + (TILE_SIZE - z - 1); + assert(input[linear_id] == expected); + } + } + } + }; + LocalMemTest>({1, 1, 1}, + {TILE_SIZE, TILE_SIZE, TILE_SIZE}) + .launch_test(checker); +} + +int main() { + test_local_1d(); + test_local_2d(); + test_local_3d(); + + return 0; +} diff --git a/sycl/test-e2e/syclcompat/memory/memory_common.hpp b/sycl/test-e2e/syclcompat/memory/memory_common.hpp index 8c24e2096e29..5ee5aac75a3f 100644 --- a/sycl/test-e2e/syclcompat/memory/memory_common.hpp +++ b/sycl/test-e2e/syclcompat/memory/memory_common.hpp @@ -34,7 +34,3 @@ inline void check(float *h_data, float *h_ref, size_t size) { assert(diff <= 1.e-6); } } - -using value_type_list = - std::tuple; diff --git a/sycl/test-e2e/syclcompat/memory/memory_fixt.hpp b/sycl/test-e2e/syclcompat/memory/memory_fixt.hpp index ca79ed026344..7c613e25a4a4 100644 --- a/sycl/test-e2e/syclcompat/memory/memory_fixt.hpp +++ b/sycl/test-e2e/syclcompat/memory/memory_fixt.hpp @@ -24,10 +24,11 @@ #include #include +#include #include -#define WG_SIZE 256 -#define NUM_WG 32 +constexpr size_t WG_SIZE = 256; +constexpr size_t NUM_WG = 32; // Fixture to set up & launch a kernel to depend on, or // a host_task which depends on something else @@ -160,3 +161,29 @@ template struct USMTest { size_t size_; bool skip; }; + +template class LocalMemTest { +public: + LocalMemTest(syclcompat::dim3 grid, syclcompat::dim3 threads) + : grid_{grid}, threads_{threads}, size_{grid_.size() * threads_.size()}, + host_data_(size_) { + data_ = (int *)syclcompat::malloc(size_ * sizeof(int)); + }; + ~LocalMemTest() { syclcompat::free(data_); }; + + template + void launch_test(Lambda checker, Args... args) { + syclcompat::launch(grid_, threads_, data_, args...); + syclcompat::memcpy(host_data_.data(), data_, size_ * sizeof(int)); + checker(host_data_); + } + +private: + syclcompat::dim3 grid_; + syclcompat::dim3 threads_; + size_t size_; + sycl::queue q_; + int *data_; + std::vector host_data_; + using CheckLambda = std::function)>; +};