Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL] Add prototype of group algorithms #1236

Merged
merged 19 commits into from
Mar 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sycl/include/CL/__spirv/spirv_ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ extern SYCL_EXTERNAL bool __spirv_GroupAny(__spv::Scope Execution,
template <typename dataT>
extern SYCL_EXTERNAL dataT __spirv_GroupBroadcast(__spv::Scope Execution,
dataT Value,
uint32_t LocalId) noexcept;
size_t LocalId) noexcept;

template <typename dataT>
extern SYCL_EXTERNAL dataT
Expand Down
4 changes: 2 additions & 2 deletions sycl/include/CL/sycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,22 +23,22 @@
#include <CL/sycl/image.hpp>
#include <CL/sycl/intel/builtins.hpp>
#include <CL/sycl/intel/function_pointer.hpp>
#include <CL/sycl/intel/group_algorithm.hpp>
#include <CL/sycl/intel/sub_group.hpp>
#include <CL/sycl/item.hpp>
#include <CL/sycl/kernel.hpp>
#include <CL/sycl/multi_ptr.hpp>
#include <CL/sycl/nd_item.hpp>
#include <CL/sycl/nd_range.hpp>
#include <CL/sycl/ordered_queue.hpp>
#include <CL/sycl/pipes.hpp>
#include <CL/sycl/platform.hpp>
#include <CL/sycl/pointers.hpp>
#include <CL/sycl/program.hpp>
#include <CL/sycl/queue.hpp>
#include <CL/sycl/ordered_queue.hpp>
#include <CL/sycl/range.hpp>
#include <CL/sycl/sampler.hpp>
#include <CL/sycl/stream.hpp>
#include <CL/sycl/types.hpp>
#include <CL/sycl/usm.hpp>
#include <CL/sycl/version.hpp>

55 changes: 55 additions & 0 deletions sycl/include/CL/sycl/detail/spirv.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
//===-- spirv.hpp - Helpers to generate SPIR-V instructions ----*- C++ -*--===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once
#include <CL/__spirv/spirv_ops.hpp>
#include <CL/__spirv/spirv_types.hpp>
#include <CL/__spirv/spirv_vars.hpp>
#include <CL/sycl/detail/generic_type_traits.hpp>
#include <CL/sycl/detail/type_traits.hpp>

#ifdef __SYCL_DEVICE_ONLY__
__SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
namespace detail {
namespace spirv {

// Broadcast with scalar local index
template <__spv::Scope S, typename T, typename IdT>
Pennycook marked this conversation as resolved.
Show resolved Hide resolved
detail::enable_if_t<std::is_integral<IdT>::value, T>
GroupBroadcast(T x, IdT local_id) {
using OCLT = detail::ConvertToOpenCLType_t<T>;
using OCLIdT = detail::ConvertToOpenCLType_t<IdT>;
OCLT ocl_x = detail::convertDataToType<T, OCLT>(x);
OCLIdT ocl_id = detail::convertDataToType<IdT, OCLIdT>(local_id);
return __spirv_GroupBroadcast(S, ocl_x, ocl_id);
}

// Broadcast with vector local index
template <__spv::Scope S, typename T, int Dimensions>
T GroupBroadcast(T x, id<Dimensions> local_id) {
if (Dimensions == 1) {
return GroupBroadcast<S>(x, local_id[0]);
}
using IdT = vec<size_t, Dimensions>;
using OCLT = detail::ConvertToOpenCLType_t<T>;
using OCLIdT = detail::ConvertToOpenCLType_t<IdT>;
IdT vec_id;
for (int i = 0; i < Dimensions; ++i) {
vec_id[i] = local_id[Dimensions - i - 1];
}
OCLT ocl_x = detail::convertDataToType<T, OCLT>(x);
OCLIdT ocl_id = detail::convertDataToType<IdT, OCLIdT>(vec_id);
return __spirv_GroupBroadcast(S, ocl_x, ocl_id);
}

} // namespace spirv
} // namespace detail
} // namespace sycl
} // __SYCL_INLINE_NAMESPACE(cl)
#endif // __SYCL_DEVICE_ONLY__
8 changes: 8 additions & 0 deletions sycl/include/CL/sycl/detail/type_traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,14 @@ template <typename T>
struct is_arithmetic
: bool_constant<is_integral<T>::value || is_floating_point<T>::value> {};

template <typename T>
struct is_scalar_arithmetic
: bool_constant<!is_vec<T>::value && is_arithmetic<T>::value> {};

template <typename T>
struct is_vector_arithmetic
: bool_constant<is_vec<T>::value && is_arithmetic<T>::value> {};

// is_pointer
template <typename T> struct is_pointer_impl : std::false_type {};

Expand Down
139 changes: 73 additions & 66 deletions sycl/include/CL/sycl/group.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,38 +81,45 @@ template <typename T, int Dimensions = 1> class private_memory {
#endif // #ifdef __SYCL_DEVICE_ONLY__
};

template <int dimensions = 1> class group {
template <int Dimensions = 1> class group {
public:
#ifndef __DISABLE_SYCL_INTEL_GROUP_ALGORITHMS__
using id_type = id<Dimensions>;
using range_type = range<Dimensions>;
using linear_id_type = size_t;
static constexpr int dimensions = Dimensions;
#endif // __DISABLE_SYCL_INTEL_GROUP_ALGORITHMS__

group() = delete;

id<dimensions> get_id() const { return index; }
id<Dimensions> get_id() const { return index; }

size_t get_id(int dimension) const { return index[dimension]; }

range<dimensions> get_global_range() const { return globalRange; }
range<Dimensions> get_global_range() const { return globalRange; }

size_t get_global_range(int dimension) const {
return globalRange[dimension];
}

range<dimensions> get_local_range() const { return localRange; }
range<Dimensions> get_local_range() const { return localRange; }

size_t get_local_range(int dimension) const { return localRange[dimension]; }

range<dimensions> get_group_range() const { return groupRange; }
range<Dimensions> get_group_range() const { return groupRange; }

size_t get_group_range(int dimension) const {
return get_group_range()[dimension];
}

size_t operator[](int dimension) const { return index[dimension]; }

template <int dims = dimensions>
template <int dims = Dimensions>
typename std::enable_if<(dims == 1), size_t>::type get_linear_id() const {
return index[0];
}

template <int dims = dimensions>
template <int dims = Dimensions>
typename std::enable_if<(dims == 2), size_t>::type get_linear_id() const {
return index[0] * groupRange[1] + index[1];
}
Expand All @@ -127,7 +134,7 @@ template <int dimensions = 1> class group {
// size_t get_linear_id()const
// Get a linearized version of the work-group id. Calculating a linear
// work-group id from a multi-dimensional index follows the equation 4.3.
template <int dims = dimensions>
template <int dims = Dimensions>
typename std::enable_if<(dims == 3), size_t>::type get_linear_id() const {
return (index[0] * groupRange[1] * groupRange[2]) +
(index[1] * groupRange[2]) + index[2];
Expand All @@ -139,41 +146,41 @@ template <int dimensions = 1> class group {
// compilers are expected to optimize when possible
detail::workGroupBarrier();
#ifdef __SYCL_DEVICE_ONLY__
range<dimensions> GlobalSize{
__spirv::initGlobalSize<dimensions, range<dimensions>>()};
range<dimensions> LocalSize{
__spirv::initWorkgroupSize<dimensions, range<dimensions>>()};
id<dimensions> GlobalId{
__spirv::initGlobalInvocationId<dimensions, id<dimensions>>()};
id<dimensions> LocalId{
__spirv::initLocalInvocationId<dimensions, id<dimensions>>()};
range<Dimensions> GlobalSize{
__spirv::initGlobalSize<Dimensions, range<Dimensions>>()};
range<Dimensions> LocalSize{
__spirv::initWorkgroupSize<Dimensions, range<Dimensions>>()};
id<Dimensions> GlobalId{
__spirv::initGlobalInvocationId<Dimensions, id<Dimensions>>()};
id<Dimensions> LocalId{
__spirv::initLocalInvocationId<Dimensions, id<Dimensions>>()};

// no 'iterate' in the device code variant, because
// (1) this code is already invoked by each work item as a part of the
// enclosing parallel_for_work_group kernel
// (2) the range this pfwi iterates over matches work group size exactly
item<dimensions, false> GlobalItem =
detail::Builder::createItem<dimensions, false>(GlobalSize, GlobalId);
item<dimensions, false> LocalItem =
detail::Builder::createItem<dimensions, false>(LocalSize, LocalId);
h_item<dimensions> HItem =
detail::Builder::createHItem<dimensions>(GlobalItem, LocalItem);
item<Dimensions, false> GlobalItem =
detail::Builder::createItem<Dimensions, false>(GlobalSize, GlobalId);
item<Dimensions, false> LocalItem =
detail::Builder::createItem<Dimensions, false>(LocalSize, LocalId);
h_item<Dimensions> HItem =
detail::Builder::createHItem<Dimensions>(GlobalItem, LocalItem);

Func(HItem);
#else
id<dimensions> GroupStartID = index * localRange;
id<Dimensions> GroupStartID = index * localRange;

// ... host variant needs explicit 'iterate' because it is serial
detail::NDLoop<dimensions>::iterate(
localRange, [&](const id<dimensions> &LocalID) {
item<dimensions, false> GlobalItem =
detail::Builder::createItem<dimensions, false>(
detail::NDLoop<Dimensions>::iterate(
localRange, [&](const id<Dimensions> &LocalID) {
item<Dimensions, false> GlobalItem =
detail::Builder::createItem<Dimensions, false>(
globalRange, GroupStartID + LocalID);
item<dimensions, false> LocalItem =
detail::Builder::createItem<dimensions, false>(localRange,
item<Dimensions, false> LocalItem =
detail::Builder::createItem<Dimensions, false>(localRange,
LocalID);
h_item<dimensions> HItem =
detail::Builder::createHItem<dimensions>(GlobalItem, LocalItem);
h_item<Dimensions> HItem =
detail::Builder::createHItem<Dimensions>(GlobalItem, LocalItem);
Func(HItem);
});
#endif // __SYCL_DEVICE_ONLY__
Expand All @@ -185,52 +192,52 @@ template <int dimensions = 1> class group {
}

template <typename WorkItemFunctionT>
void parallel_for_work_item(range<dimensions> flexibleRange,
void parallel_for_work_item(range<Dimensions> flexibleRange,
WorkItemFunctionT Func) const {
detail::workGroupBarrier();
#ifdef __SYCL_DEVICE_ONLY__
range<dimensions> GlobalSize{
__spirv::initGlobalSize<dimensions, range<dimensions>>()};
range<dimensions> LocalSize{
__spirv::initWorkgroupSize<dimensions, range<dimensions>>()};
id<dimensions> GlobalId{
__spirv::initGlobalInvocationId<dimensions, id<dimensions>>()};
id<dimensions> LocalId{
__spirv::initLocalInvocationId<dimensions, id<dimensions>>()};

item<dimensions, false> GlobalItem =
detail::Builder::createItem<dimensions, false>(GlobalSize, GlobalId);
item<dimensions, false> LocalItem =
detail::Builder::createItem<dimensions, false>(LocalSize, LocalId);
h_item<dimensions> HItem = detail::Builder::createHItem<dimensions>(
range<Dimensions> GlobalSize{
__spirv::initGlobalSize<Dimensions, range<Dimensions>>()};
range<Dimensions> LocalSize{
__spirv::initWorkgroupSize<Dimensions, range<Dimensions>>()};
id<Dimensions> GlobalId{
__spirv::initGlobalInvocationId<Dimensions, id<Dimensions>>()};
id<Dimensions> LocalId{
__spirv::initLocalInvocationId<Dimensions, id<Dimensions>>()};

item<Dimensions, false> GlobalItem =
detail::Builder::createItem<Dimensions, false>(GlobalSize, GlobalId);
item<Dimensions, false> LocalItem =
detail::Builder::createItem<Dimensions, false>(LocalSize, LocalId);
h_item<Dimensions> HItem = detail::Builder::createHItem<Dimensions>(
GlobalItem, LocalItem, flexibleRange);

// iterate over flexible range with work group size stride; each item
// performs flexibleRange/LocalSize iterations (if the former is divisible
// by the latter)
detail::NDLoop<dimensions>::iterate(
detail::NDLoop<Dimensions>::iterate(
LocalId, LocalSize, flexibleRange,
[&](const id<dimensions> &LogicalLocalID) {
[&](const id<Dimensions> &LogicalLocalID) {
HItem.setLogicalLocalID(LogicalLocalID);
Func(HItem);
});
#else
id<dimensions> GroupStartID = index * localRange;
id<Dimensions> GroupStartID = index * localRange;

detail::NDLoop<dimensions>::iterate(
localRange, [&](const id<dimensions> &LocalID) {
item<dimensions, false> GlobalItem =
detail::Builder::createItem<dimensions, false>(
detail::NDLoop<Dimensions>::iterate(
localRange, [&](const id<Dimensions> &LocalID) {
item<Dimensions, false> GlobalItem =
detail::Builder::createItem<Dimensions, false>(
globalRange, GroupStartID + LocalID);
item<dimensions, false> LocalItem =
detail::Builder::createItem<dimensions, false>(localRange,
item<Dimensions, false> LocalItem =
detail::Builder::createItem<Dimensions, false>(localRange,
LocalID);
h_item<dimensions> HItem = detail::Builder::createHItem<dimensions>(
h_item<Dimensions> HItem = detail::Builder::createHItem<Dimensions>(
GlobalItem, LocalItem, flexibleRange);

detail::NDLoop<dimensions>::iterate(
detail::NDLoop<Dimensions>::iterate(
LocalID, localRange, flexibleRange,
[&](const id<dimensions> &LogicalLocalID) {
[&](const id<Dimensions> &LogicalLocalID) {
HItem.setLogicalLocalID(LogicalLocalID);
Func(HItem);
});
Expand Down Expand Up @@ -311,23 +318,23 @@ template <int dimensions = 1> class group {
waitForHelper(Events...);
}

bool operator==(const group<dimensions> &rhs) const {
bool operator==(const group<Dimensions> &rhs) const {
bool Result = (rhs.globalRange == globalRange) &&
(rhs.localRange == localRange) && (rhs.index == index);
__SYCL_ASSERT(rhs.groupRange == groupRange &&
"inconsistent group class fields");
return Result;
}

bool operator!=(const group<dimensions> &rhs) const {
bool operator!=(const group<Dimensions> &rhs) const {
return !((*this) == rhs);
}

private:
range<dimensions> globalRange;
range<dimensions> localRange;
range<dimensions> groupRange;
id<dimensions> index;
range<Dimensions> globalRange;
range<Dimensions> localRange;
range<Dimensions> groupRange;
id<Dimensions> index;

void waitForHelper() const {}

Expand All @@ -343,8 +350,8 @@ template <int dimensions = 1> class group {

protected:
friend class detail::Builder;
group(const range<dimensions> &G, const range<dimensions> &L,
const range<dimensions> GroupRange, const id<dimensions> &I)
group(const range<Dimensions> &G, const range<Dimensions> &L,
const range<Dimensions> GroupRange, const id<Dimensions> &I)
: globalRange(G), localRange(L), groupRange(GroupRange), index(I) {
// Make sure local range divides global without remainder:
__SYCL_ASSERT(((G % L).size() == 0) &&
Expand Down
Loading